{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999834735328629, "eval_steps": 500, "global_step": 45381, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.2035289516160132e-05, "grad_norm": 14.863543510437012, "learning_rate": 2.2026431718061672e-08, "loss": 2.1663, "step": 1 }, { "epoch": 4.4070579032320264e-05, "grad_norm": 18.296010971069336, "learning_rate": 4.4052863436123345e-08, "loss": 2.283, "step": 2 }, { "epoch": 6.610586854848039e-05, "grad_norm": 19.33452796936035, "learning_rate": 6.607929515418503e-08, "loss": 2.2913, "step": 3 }, { "epoch": 8.814115806464053e-05, "grad_norm": 14.173552513122559, "learning_rate": 8.810572687224669e-08, "loss": 2.1681, "step": 4 }, { "epoch": 0.00011017644758080065, "grad_norm": 15.100581169128418, "learning_rate": 1.1013215859030837e-07, "loss": 2.254, "step": 5 }, { "epoch": 0.00013221173709696078, "grad_norm": 16.82364845275879, "learning_rate": 1.3215859030837005e-07, "loss": 2.288, "step": 6 }, { "epoch": 0.0001542470266131209, "grad_norm": 16.434555053710938, "learning_rate": 1.5418502202643172e-07, "loss": 2.3168, "step": 7 }, { "epoch": 0.00017628231612928105, "grad_norm": 15.644309043884277, "learning_rate": 1.7621145374449338e-07, "loss": 2.2009, "step": 8 }, { "epoch": 0.00019831760564544118, "grad_norm": 13.759249687194824, "learning_rate": 1.9823788546255507e-07, "loss": 2.1977, "step": 9 }, { "epoch": 0.0002203528951616013, "grad_norm": 16.03558921813965, "learning_rate": 2.2026431718061673e-07, "loss": 2.3812, "step": 10 }, { "epoch": 0.00024238818467776143, "grad_norm": 13.386174201965332, "learning_rate": 2.4229074889867845e-07, "loss": 2.2921, "step": 11 }, { "epoch": 0.00026442347419392155, "grad_norm": 13.813769340515137, "learning_rate": 2.643171806167401e-07, "loss": 2.1639, "step": 12 }, { "epoch": 0.0002864587637100817, "grad_norm": 14.936403274536133, "learning_rate": 2.8634361233480177e-07, "loss": 2.1774, "step": 13 }, { "epoch": 0.0003084940532262418, "grad_norm": 13.39548397064209, "learning_rate": 3.0837004405286343e-07, "loss": 2.1612, "step": 14 }, { "epoch": 0.00033052934274240193, "grad_norm": 16.714370727539062, "learning_rate": 3.3039647577092515e-07, "loss": 2.2065, "step": 15 }, { "epoch": 0.0003525646322585621, "grad_norm": 15.17988109588623, "learning_rate": 3.5242290748898676e-07, "loss": 2.1256, "step": 16 }, { "epoch": 0.00037459992177472223, "grad_norm": 14.345056533813477, "learning_rate": 3.744493392070485e-07, "loss": 1.9858, "step": 17 }, { "epoch": 0.00039663521129088236, "grad_norm": 15.949020385742188, "learning_rate": 3.9647577092511014e-07, "loss": 2.1791, "step": 18 }, { "epoch": 0.0004186705008070425, "grad_norm": 13.975539207458496, "learning_rate": 4.1850220264317185e-07, "loss": 2.1126, "step": 19 }, { "epoch": 0.0004407057903232026, "grad_norm": 13.359574317932129, "learning_rate": 4.4052863436123346e-07, "loss": 2.114, "step": 20 }, { "epoch": 0.00046274107983936273, "grad_norm": 14.914043426513672, "learning_rate": 4.625550660792952e-07, "loss": 1.9799, "step": 21 }, { "epoch": 0.00048477636935552286, "grad_norm": 12.221274375915527, "learning_rate": 4.845814977973569e-07, "loss": 1.9707, "step": 22 }, { "epoch": 0.000506811658871683, "grad_norm": 12.325453758239746, "learning_rate": 5.066079295154185e-07, "loss": 1.9363, "step": 23 }, { "epoch": 0.0005288469483878431, "grad_norm": 12.193845748901367, "learning_rate": 5.286343612334802e-07, "loss": 2.0412, "step": 24 }, { "epoch": 0.0005508822379040033, "grad_norm": 13.141278266906738, "learning_rate": 5.506607929515418e-07, "loss": 2.087, "step": 25 }, { "epoch": 0.0005729175274201634, "grad_norm": 12.32343578338623, "learning_rate": 5.726872246696035e-07, "loss": 2.023, "step": 26 }, { "epoch": 0.0005949528169363235, "grad_norm": 12.555145263671875, "learning_rate": 5.947136563876653e-07, "loss": 1.9484, "step": 27 }, { "epoch": 0.0006169881064524836, "grad_norm": 13.836896896362305, "learning_rate": 6.167400881057269e-07, "loss": 2.0215, "step": 28 }, { "epoch": 0.0006390233959686438, "grad_norm": 12.038517951965332, "learning_rate": 6.387665198237886e-07, "loss": 2.0544, "step": 29 }, { "epoch": 0.0006610586854848039, "grad_norm": 11.800670623779297, "learning_rate": 6.607929515418503e-07, "loss": 1.8815, "step": 30 }, { "epoch": 0.000683093975000964, "grad_norm": 11.072229385375977, "learning_rate": 6.828193832599119e-07, "loss": 1.909, "step": 31 }, { "epoch": 0.0007051292645171242, "grad_norm": 11.484781265258789, "learning_rate": 7.048458149779735e-07, "loss": 1.766, "step": 32 }, { "epoch": 0.0007271645540332843, "grad_norm": 10.837560653686523, "learning_rate": 7.268722466960353e-07, "loss": 1.8193, "step": 33 }, { "epoch": 0.0007491998435494445, "grad_norm": 9.166078567504883, "learning_rate": 7.48898678414097e-07, "loss": 1.8072, "step": 34 }, { "epoch": 0.0007712351330656045, "grad_norm": 10.550054550170898, "learning_rate": 7.709251101321586e-07, "loss": 1.7323, "step": 35 }, { "epoch": 0.0007932704225817647, "grad_norm": 9.746813774108887, "learning_rate": 7.929515418502203e-07, "loss": 1.699, "step": 36 }, { "epoch": 0.0008153057120979248, "grad_norm": 9.526512145996094, "learning_rate": 8.14977973568282e-07, "loss": 1.7924, "step": 37 }, { "epoch": 0.000837341001614085, "grad_norm": 8.774542808532715, "learning_rate": 8.370044052863437e-07, "loss": 1.6568, "step": 38 }, { "epoch": 0.000859376291130245, "grad_norm": 8.937015533447266, "learning_rate": 8.590308370044053e-07, "loss": 1.7986, "step": 39 }, { "epoch": 0.0008814115806464052, "grad_norm": 9.050973892211914, "learning_rate": 8.810572687224669e-07, "loss": 1.643, "step": 40 }, { "epoch": 0.0009034468701625654, "grad_norm": 8.598289489746094, "learning_rate": 9.030837004405287e-07, "loss": 1.6564, "step": 41 }, { "epoch": 0.0009254821596787255, "grad_norm": 8.702563285827637, "learning_rate": 9.251101321585904e-07, "loss": 1.7131, "step": 42 }, { "epoch": 0.0009475174491948856, "grad_norm": 7.124616622924805, "learning_rate": 9.471365638766521e-07, "loss": 1.5912, "step": 43 }, { "epoch": 0.0009695527387110457, "grad_norm": 8.025264739990234, "learning_rate": 9.691629955947138e-07, "loss": 1.6517, "step": 44 }, { "epoch": 0.0009915880282272058, "grad_norm": 7.635185718536377, "learning_rate": 9.911894273127754e-07, "loss": 1.566, "step": 45 }, { "epoch": 0.001013623317743366, "grad_norm": 8.089704513549805, "learning_rate": 1.013215859030837e-06, "loss": 1.5235, "step": 46 }, { "epoch": 0.0010356586072595261, "grad_norm": 7.060845375061035, "learning_rate": 1.0352422907488986e-06, "loss": 1.5987, "step": 47 }, { "epoch": 0.0010576938967756862, "grad_norm": 6.379401206970215, "learning_rate": 1.0572687224669604e-06, "loss": 1.3538, "step": 48 }, { "epoch": 0.0010797291862918463, "grad_norm": 6.153185844421387, "learning_rate": 1.079295154185022e-06, "loss": 1.4913, "step": 49 }, { "epoch": 0.0011017644758080066, "grad_norm": 8.285276412963867, "learning_rate": 1.1013215859030837e-06, "loss": 1.3672, "step": 50 }, { "epoch": 0.0011237997653241666, "grad_norm": 7.05129861831665, "learning_rate": 1.1233480176211455e-06, "loss": 1.4228, "step": 51 }, { "epoch": 0.0011458350548403267, "grad_norm": 6.545504093170166, "learning_rate": 1.145374449339207e-06, "loss": 1.4002, "step": 52 }, { "epoch": 0.001167870344356487, "grad_norm": 5.7380146980285645, "learning_rate": 1.1674008810572687e-06, "loss": 1.3835, "step": 53 }, { "epoch": 0.001189905633872647, "grad_norm": 6.724987030029297, "learning_rate": 1.1894273127753305e-06, "loss": 1.3719, "step": 54 }, { "epoch": 0.0012119409233888071, "grad_norm": 6.8235554695129395, "learning_rate": 1.2114537444933921e-06, "loss": 1.3714, "step": 55 }, { "epoch": 0.0012339762129049672, "grad_norm": 6.33079719543457, "learning_rate": 1.2334801762114537e-06, "loss": 1.3641, "step": 56 }, { "epoch": 0.0012560115024211275, "grad_norm": 6.587262153625488, "learning_rate": 1.2555066079295153e-06, "loss": 1.3383, "step": 57 }, { "epoch": 0.0012780467919372876, "grad_norm": 5.708672523498535, "learning_rate": 1.2775330396475772e-06, "loss": 1.2506, "step": 58 }, { "epoch": 0.0013000820814534476, "grad_norm": 7.027276039123535, "learning_rate": 1.2995594713656388e-06, "loss": 1.33, "step": 59 }, { "epoch": 0.0013221173709696077, "grad_norm": 5.209397792816162, "learning_rate": 1.3215859030837006e-06, "loss": 1.2168, "step": 60 }, { "epoch": 0.001344152660485768, "grad_norm": 5.050267219543457, "learning_rate": 1.3436123348017622e-06, "loss": 1.1266, "step": 61 }, { "epoch": 0.001366187950001928, "grad_norm": 6.557770729064941, "learning_rate": 1.3656387665198238e-06, "loss": 1.2883, "step": 62 }, { "epoch": 0.0013882232395180881, "grad_norm": 5.636152744293213, "learning_rate": 1.3876651982378854e-06, "loss": 1.2514, "step": 63 }, { "epoch": 0.0014102585290342484, "grad_norm": 6.484176158905029, "learning_rate": 1.409691629955947e-06, "loss": 1.2286, "step": 64 }, { "epoch": 0.0014322938185504085, "grad_norm": 5.509383201599121, "learning_rate": 1.4317180616740089e-06, "loss": 1.1334, "step": 65 }, { "epoch": 0.0014543291080665686, "grad_norm": 5.853956699371338, "learning_rate": 1.4537444933920707e-06, "loss": 1.1577, "step": 66 }, { "epoch": 0.0014763643975827287, "grad_norm": 5.367517471313477, "learning_rate": 1.4757709251101323e-06, "loss": 1.2193, "step": 67 }, { "epoch": 0.001498399687098889, "grad_norm": 5.6789422035217285, "learning_rate": 1.497797356828194e-06, "loss": 1.1853, "step": 68 }, { "epoch": 0.001520434976615049, "grad_norm": 5.6490936279296875, "learning_rate": 1.5198237885462555e-06, "loss": 1.1538, "step": 69 }, { "epoch": 0.001542470266131209, "grad_norm": 4.788982391357422, "learning_rate": 1.5418502202643171e-06, "loss": 1.0885, "step": 70 }, { "epoch": 0.0015645055556473694, "grad_norm": 5.7118706703186035, "learning_rate": 1.563876651982379e-06, "loss": 1.0812, "step": 71 }, { "epoch": 0.0015865408451635294, "grad_norm": 5.752577781677246, "learning_rate": 1.5859030837004405e-06, "loss": 1.0506, "step": 72 }, { "epoch": 0.0016085761346796895, "grad_norm": 6.064701557159424, "learning_rate": 1.6079295154185022e-06, "loss": 1.0754, "step": 73 }, { "epoch": 0.0016306114241958496, "grad_norm": 4.86065149307251, "learning_rate": 1.629955947136564e-06, "loss": 1.0674, "step": 74 }, { "epoch": 0.0016526467137120099, "grad_norm": 5.31305456161499, "learning_rate": 1.6519823788546256e-06, "loss": 1.0641, "step": 75 }, { "epoch": 0.00167468200322817, "grad_norm": 5.636301517486572, "learning_rate": 1.6740088105726874e-06, "loss": 1.0866, "step": 76 }, { "epoch": 0.00169671729274433, "grad_norm": 5.361306190490723, "learning_rate": 1.696035242290749e-06, "loss": 1.0404, "step": 77 }, { "epoch": 0.00171875258226049, "grad_norm": 5.966645240783691, "learning_rate": 1.7180616740088106e-06, "loss": 1.0589, "step": 78 }, { "epoch": 0.0017407878717766504, "grad_norm": 6.485328197479248, "learning_rate": 1.7400881057268722e-06, "loss": 1.0912, "step": 79 }, { "epoch": 0.0017628231612928104, "grad_norm": 5.117496490478516, "learning_rate": 1.7621145374449338e-06, "loss": 1.0363, "step": 80 }, { "epoch": 0.0017848584508089705, "grad_norm": 4.624040126800537, "learning_rate": 1.7841409691629955e-06, "loss": 0.9802, "step": 81 }, { "epoch": 0.0018068937403251308, "grad_norm": 5.33875846862793, "learning_rate": 1.8061674008810575e-06, "loss": 0.9963, "step": 82 }, { "epoch": 0.0018289290298412909, "grad_norm": 4.680130481719971, "learning_rate": 1.828193832599119e-06, "loss": 0.983, "step": 83 }, { "epoch": 0.001850964319357451, "grad_norm": 5.091893196105957, "learning_rate": 1.8502202643171807e-06, "loss": 0.897, "step": 84 }, { "epoch": 0.001872999608873611, "grad_norm": 4.357666015625, "learning_rate": 1.8722466960352423e-06, "loss": 0.8853, "step": 85 }, { "epoch": 0.0018950348983897713, "grad_norm": 5.904634952545166, "learning_rate": 1.8942731277533041e-06, "loss": 0.9399, "step": 86 }, { "epoch": 0.0019170701879059314, "grad_norm": 4.072411060333252, "learning_rate": 1.9162995594713658e-06, "loss": 0.8381, "step": 87 }, { "epoch": 0.0019391054774220914, "grad_norm": 4.493630886077881, "learning_rate": 1.9383259911894276e-06, "loss": 0.9415, "step": 88 }, { "epoch": 0.0019611407669382517, "grad_norm": 4.716130256652832, "learning_rate": 1.960352422907489e-06, "loss": 0.9, "step": 89 }, { "epoch": 0.0019831760564544116, "grad_norm": 4.769815444946289, "learning_rate": 1.982378854625551e-06, "loss": 0.9354, "step": 90 }, { "epoch": 0.002005211345970572, "grad_norm": 4.9310622215271, "learning_rate": 2.0044052863436126e-06, "loss": 0.9737, "step": 91 }, { "epoch": 0.002027246635486732, "grad_norm": 4.5948004722595215, "learning_rate": 2.026431718061674e-06, "loss": 0.8849, "step": 92 }, { "epoch": 0.002049281925002892, "grad_norm": 4.968603134155273, "learning_rate": 2.048458149779736e-06, "loss": 0.9006, "step": 93 }, { "epoch": 0.0020713172145190523, "grad_norm": 4.842495441436768, "learning_rate": 2.0704845814977972e-06, "loss": 0.8795, "step": 94 }, { "epoch": 0.0020933525040352126, "grad_norm": 4.8845977783203125, "learning_rate": 2.092511013215859e-06, "loss": 0.8112, "step": 95 }, { "epoch": 0.0021153877935513724, "grad_norm": 4.181149005889893, "learning_rate": 2.114537444933921e-06, "loss": 0.8316, "step": 96 }, { "epoch": 0.0021374230830675327, "grad_norm": 5.00706672668457, "learning_rate": 2.1365638766519823e-06, "loss": 0.858, "step": 97 }, { "epoch": 0.0021594583725836926, "grad_norm": 4.331758975982666, "learning_rate": 2.158590308370044e-06, "loss": 0.9236, "step": 98 }, { "epoch": 0.002181493662099853, "grad_norm": 4.829426288604736, "learning_rate": 2.1806167400881055e-06, "loss": 0.8832, "step": 99 }, { "epoch": 0.002203528951616013, "grad_norm": 4.341812610626221, "learning_rate": 2.2026431718061673e-06, "loss": 0.8096, "step": 100 }, { "epoch": 0.002225564241132173, "grad_norm": 5.245118141174316, "learning_rate": 2.224669603524229e-06, "loss": 0.8943, "step": 101 }, { "epoch": 0.0022475995306483333, "grad_norm": 4.710810661315918, "learning_rate": 2.246696035242291e-06, "loss": 0.8705, "step": 102 }, { "epoch": 0.0022696348201644936, "grad_norm": 4.328925132751465, "learning_rate": 2.2687224669603528e-06, "loss": 0.7967, "step": 103 }, { "epoch": 0.0022916701096806534, "grad_norm": 3.9094460010528564, "learning_rate": 2.290748898678414e-06, "loss": 0.7484, "step": 104 }, { "epoch": 0.0023137053991968137, "grad_norm": 4.497801780700684, "learning_rate": 2.312775330396476e-06, "loss": 0.7881, "step": 105 }, { "epoch": 0.002335740688712974, "grad_norm": 3.949643135070801, "learning_rate": 2.3348017621145374e-06, "loss": 0.7927, "step": 106 }, { "epoch": 0.002357775978229134, "grad_norm": 4.332653522491455, "learning_rate": 2.3568281938325992e-06, "loss": 0.7473, "step": 107 }, { "epoch": 0.002379811267745294, "grad_norm": 5.033111095428467, "learning_rate": 2.378854625550661e-06, "loss": 0.7527, "step": 108 }, { "epoch": 0.002401846557261454, "grad_norm": 4.430014133453369, "learning_rate": 2.4008810572687224e-06, "loss": 0.8056, "step": 109 }, { "epoch": 0.0024238818467776143, "grad_norm": 4.626926898956299, "learning_rate": 2.4229074889867843e-06, "loss": 0.774, "step": 110 }, { "epoch": 0.0024459171362937746, "grad_norm": 4.512505054473877, "learning_rate": 2.4449339207048457e-06, "loss": 0.6828, "step": 111 }, { "epoch": 0.0024679524258099344, "grad_norm": 3.9183568954467773, "learning_rate": 2.4669603524229075e-06, "loss": 0.7165, "step": 112 }, { "epoch": 0.0024899877153260947, "grad_norm": 4.644755840301514, "learning_rate": 2.4889867841409693e-06, "loss": 0.758, "step": 113 }, { "epoch": 0.002512023004842255, "grad_norm": 3.956103801727295, "learning_rate": 2.5110132158590307e-06, "loss": 0.7366, "step": 114 }, { "epoch": 0.002534058294358415, "grad_norm": 4.6232686042785645, "learning_rate": 2.5330396475770925e-06, "loss": 0.7871, "step": 115 }, { "epoch": 0.002556093583874575, "grad_norm": 4.515181064605713, "learning_rate": 2.5550660792951543e-06, "loss": 0.7268, "step": 116 }, { "epoch": 0.0025781288733907354, "grad_norm": 4.467825412750244, "learning_rate": 2.577092511013216e-06, "loss": 0.7789, "step": 117 }, { "epoch": 0.0026001641629068953, "grad_norm": 4.605722904205322, "learning_rate": 2.5991189427312776e-06, "loss": 0.7256, "step": 118 }, { "epoch": 0.0026221994524230556, "grad_norm": 4.519166946411133, "learning_rate": 2.6211453744493394e-06, "loss": 0.7484, "step": 119 }, { "epoch": 0.0026442347419392154, "grad_norm": 4.480443477630615, "learning_rate": 2.643171806167401e-06, "loss": 0.682, "step": 120 }, { "epoch": 0.0026662700314553757, "grad_norm": 4.626105308532715, "learning_rate": 2.6651982378854626e-06, "loss": 0.73, "step": 121 }, { "epoch": 0.002688305320971536, "grad_norm": 4.555341720581055, "learning_rate": 2.6872246696035244e-06, "loss": 0.7574, "step": 122 }, { "epoch": 0.002710340610487696, "grad_norm": 4.622945308685303, "learning_rate": 2.709251101321586e-06, "loss": 0.6749, "step": 123 }, { "epoch": 0.002732375900003856, "grad_norm": 4.150022506713867, "learning_rate": 2.7312775330396476e-06, "loss": 0.7442, "step": 124 }, { "epoch": 0.0027544111895200164, "grad_norm": 4.175771713256836, "learning_rate": 2.7533039647577095e-06, "loss": 0.7562, "step": 125 }, { "epoch": 0.0027764464790361763, "grad_norm": 5.478354454040527, "learning_rate": 2.775330396475771e-06, "loss": 0.6433, "step": 126 }, { "epoch": 0.0027984817685523366, "grad_norm": 4.2222676277160645, "learning_rate": 2.7973568281938327e-06, "loss": 0.6835, "step": 127 }, { "epoch": 0.002820517058068497, "grad_norm": 4.664309501647949, "learning_rate": 2.819383259911894e-06, "loss": 0.6785, "step": 128 }, { "epoch": 0.0028425523475846567, "grad_norm": 4.1581315994262695, "learning_rate": 2.841409691629956e-06, "loss": 0.733, "step": 129 }, { "epoch": 0.002864587637100817, "grad_norm": 5.323365688323975, "learning_rate": 2.8634361233480177e-06, "loss": 0.6478, "step": 130 }, { "epoch": 0.0028866229266169773, "grad_norm": 4.387621879577637, "learning_rate": 2.885462555066079e-06, "loss": 0.6193, "step": 131 }, { "epoch": 0.002908658216133137, "grad_norm": 4.904480934143066, "learning_rate": 2.9074889867841414e-06, "loss": 0.6632, "step": 132 }, { "epoch": 0.0029306935056492974, "grad_norm": 4.969379901885986, "learning_rate": 2.9295154185022028e-06, "loss": 0.6984, "step": 133 }, { "epoch": 0.0029527287951654573, "grad_norm": 4.7311320304870605, "learning_rate": 2.9515418502202646e-06, "loss": 0.6734, "step": 134 }, { "epoch": 0.0029747640846816176, "grad_norm": 5.518086910247803, "learning_rate": 2.973568281938326e-06, "loss": 0.6668, "step": 135 }, { "epoch": 0.002996799374197778, "grad_norm": 4.98081111907959, "learning_rate": 2.995594713656388e-06, "loss": 0.6586, "step": 136 }, { "epoch": 0.0030188346637139377, "grad_norm": 3.8441755771636963, "learning_rate": 3.0176211453744496e-06, "loss": 0.635, "step": 137 }, { "epoch": 0.003040869953230098, "grad_norm": 4.4399638175964355, "learning_rate": 3.039647577092511e-06, "loss": 0.6028, "step": 138 }, { "epoch": 0.0030629052427462583, "grad_norm": 3.612694025039673, "learning_rate": 3.061674008810573e-06, "loss": 0.5698, "step": 139 }, { "epoch": 0.003084940532262418, "grad_norm": 5.392683982849121, "learning_rate": 3.0837004405286342e-06, "loss": 0.7578, "step": 140 }, { "epoch": 0.0031069758217785784, "grad_norm": 4.722553730010986, "learning_rate": 3.105726872246696e-06, "loss": 0.6811, "step": 141 }, { "epoch": 0.0031290111112947387, "grad_norm": 3.5989248752593994, "learning_rate": 3.127753303964758e-06, "loss": 0.57, "step": 142 }, { "epoch": 0.0031510464008108986, "grad_norm": 4.5936455726623535, "learning_rate": 3.1497797356828193e-06, "loss": 0.6061, "step": 143 }, { "epoch": 0.003173081690327059, "grad_norm": 4.4781107902526855, "learning_rate": 3.171806167400881e-06, "loss": 0.6728, "step": 144 }, { "epoch": 0.0031951169798432187, "grad_norm": 4.586116790771484, "learning_rate": 3.1938325991189425e-06, "loss": 0.6431, "step": 145 }, { "epoch": 0.003217152269359379, "grad_norm": 4.255934238433838, "learning_rate": 3.2158590308370043e-06, "loss": 0.5724, "step": 146 }, { "epoch": 0.0032391875588755393, "grad_norm": 4.9351630210876465, "learning_rate": 3.237885462555066e-06, "loss": 0.6172, "step": 147 }, { "epoch": 0.003261222848391699, "grad_norm": 4.485715866088867, "learning_rate": 3.259911894273128e-06, "loss": 0.513, "step": 148 }, { "epoch": 0.0032832581379078594, "grad_norm": 4.589945316314697, "learning_rate": 3.2819383259911898e-06, "loss": 0.5829, "step": 149 }, { "epoch": 0.0033052934274240197, "grad_norm": 4.480189800262451, "learning_rate": 3.303964757709251e-06, "loss": 0.6337, "step": 150 }, { "epoch": 0.0033273287169401796, "grad_norm": 4.141391277313232, "learning_rate": 3.325991189427313e-06, "loss": 0.5778, "step": 151 }, { "epoch": 0.00334936400645634, "grad_norm": 4.622832298278809, "learning_rate": 3.348017621145375e-06, "loss": 0.6726, "step": 152 }, { "epoch": 0.0033713992959725, "grad_norm": 3.984546422958374, "learning_rate": 3.3700440528634362e-06, "loss": 0.6045, "step": 153 }, { "epoch": 0.00339343458548866, "grad_norm": 3.8441805839538574, "learning_rate": 3.392070484581498e-06, "loss": 0.6001, "step": 154 }, { "epoch": 0.0034154698750048203, "grad_norm": 4.488755226135254, "learning_rate": 3.4140969162995594e-06, "loss": 0.6086, "step": 155 }, { "epoch": 0.00343750516452098, "grad_norm": 4.372840404510498, "learning_rate": 3.4361233480176213e-06, "loss": 0.6359, "step": 156 }, { "epoch": 0.0034595404540371404, "grad_norm": 4.692843437194824, "learning_rate": 3.4581497797356827e-06, "loss": 0.6428, "step": 157 }, { "epoch": 0.0034815757435533007, "grad_norm": 4.342028617858887, "learning_rate": 3.4801762114537445e-06, "loss": 0.6161, "step": 158 }, { "epoch": 0.0035036110330694606, "grad_norm": 4.23424768447876, "learning_rate": 3.5022026431718063e-06, "loss": 0.5713, "step": 159 }, { "epoch": 0.003525646322585621, "grad_norm": 4.328470706939697, "learning_rate": 3.5242290748898677e-06, "loss": 0.5496, "step": 160 }, { "epoch": 0.003547681612101781, "grad_norm": 4.369101047515869, "learning_rate": 3.5462555066079295e-06, "loss": 0.6083, "step": 161 }, { "epoch": 0.003569716901617941, "grad_norm": 4.731849193572998, "learning_rate": 3.568281938325991e-06, "loss": 0.5993, "step": 162 }, { "epoch": 0.0035917521911341013, "grad_norm": 3.969937324523926, "learning_rate": 3.590308370044053e-06, "loss": 0.534, "step": 163 }, { "epoch": 0.0036137874806502616, "grad_norm": 4.261894226074219, "learning_rate": 3.612334801762115e-06, "loss": 0.5684, "step": 164 }, { "epoch": 0.0036358227701664214, "grad_norm": 4.561306476593018, "learning_rate": 3.6343612334801764e-06, "loss": 0.595, "step": 165 }, { "epoch": 0.0036578580596825817, "grad_norm": 4.508062362670898, "learning_rate": 3.656387665198238e-06, "loss": 0.5866, "step": 166 }, { "epoch": 0.0036798933491987416, "grad_norm": 4.511661052703857, "learning_rate": 3.6784140969162996e-06, "loss": 0.5354, "step": 167 }, { "epoch": 0.003701928638714902, "grad_norm": 4.4026923179626465, "learning_rate": 3.7004405286343614e-06, "loss": 0.5806, "step": 168 }, { "epoch": 0.003723963928231062, "grad_norm": 4.105160713195801, "learning_rate": 3.7224669603524232e-06, "loss": 0.571, "step": 169 }, { "epoch": 0.003745999217747222, "grad_norm": 4.638918399810791, "learning_rate": 3.7444933920704846e-06, "loss": 0.5633, "step": 170 }, { "epoch": 0.0037680345072633823, "grad_norm": 4.557953834533691, "learning_rate": 3.7665198237885465e-06, "loss": 0.5516, "step": 171 }, { "epoch": 0.0037900697967795426, "grad_norm": 4.504685401916504, "learning_rate": 3.7885462555066083e-06, "loss": 0.558, "step": 172 }, { "epoch": 0.0038121050862957024, "grad_norm": 4.116576194763184, "learning_rate": 3.8105726872246693e-06, "loss": 0.638, "step": 173 }, { "epoch": 0.0038341403758118627, "grad_norm": 4.4539899826049805, "learning_rate": 3.8325991189427315e-06, "loss": 0.5526, "step": 174 }, { "epoch": 0.003856175665328023, "grad_norm": 3.802306652069092, "learning_rate": 3.854625550660793e-06, "loss": 0.5132, "step": 175 }, { "epoch": 0.003878210954844183, "grad_norm": 4.620787620544434, "learning_rate": 3.876651982378855e-06, "loss": 0.5418, "step": 176 }, { "epoch": 0.003900246244360343, "grad_norm": 4.530858039855957, "learning_rate": 3.898678414096917e-06, "loss": 0.5437, "step": 177 }, { "epoch": 0.0039222815338765035, "grad_norm": 5.1028618812561035, "learning_rate": 3.920704845814978e-06, "loss": 0.6031, "step": 178 }, { "epoch": 0.003944316823392664, "grad_norm": 4.622397422790527, "learning_rate": 3.94273127753304e-06, "loss": 0.465, "step": 179 }, { "epoch": 0.003966352112908823, "grad_norm": 3.723165988922119, "learning_rate": 3.964757709251102e-06, "loss": 0.5524, "step": 180 }, { "epoch": 0.0039883874024249834, "grad_norm": 4.541408538818359, "learning_rate": 3.986784140969163e-06, "loss": 0.5352, "step": 181 }, { "epoch": 0.004010422691941144, "grad_norm": 4.119327545166016, "learning_rate": 4.008810572687225e-06, "loss": 0.5424, "step": 182 }, { "epoch": 0.004032457981457304, "grad_norm": 4.565866947174072, "learning_rate": 4.030837004405286e-06, "loss": 0.5362, "step": 183 }, { "epoch": 0.004054493270973464, "grad_norm": 3.8576784133911133, "learning_rate": 4.052863436123348e-06, "loss": 0.5253, "step": 184 }, { "epoch": 0.004076528560489624, "grad_norm": 4.053845405578613, "learning_rate": 4.07488986784141e-06, "loss": 0.518, "step": 185 }, { "epoch": 0.004098563850005784, "grad_norm": 3.7366268634796143, "learning_rate": 4.096916299559472e-06, "loss": 0.4213, "step": 186 }, { "epoch": 0.004120599139521944, "grad_norm": 4.67158842086792, "learning_rate": 4.1189427312775335e-06, "loss": 0.5494, "step": 187 }, { "epoch": 0.004142634429038105, "grad_norm": 3.816983938217163, "learning_rate": 4.1409691629955945e-06, "loss": 0.5093, "step": 188 }, { "epoch": 0.004164669718554265, "grad_norm": 4.126262187957764, "learning_rate": 4.162995594713656e-06, "loss": 0.456, "step": 189 }, { "epoch": 0.004186705008070425, "grad_norm": 4.438512325286865, "learning_rate": 4.185022026431718e-06, "loss": 0.5346, "step": 190 }, { "epoch": 0.004208740297586585, "grad_norm": 4.335185527801514, "learning_rate": 4.20704845814978e-06, "loss": 0.5123, "step": 191 }, { "epoch": 0.004230775587102745, "grad_norm": 4.388969421386719, "learning_rate": 4.229074889867842e-06, "loss": 0.4479, "step": 192 }, { "epoch": 0.004252810876618905, "grad_norm": 4.707867622375488, "learning_rate": 4.251101321585903e-06, "loss": 0.525, "step": 193 }, { "epoch": 0.0042748461661350655, "grad_norm": 3.9091434478759766, "learning_rate": 4.2731277533039645e-06, "loss": 0.4818, "step": 194 }, { "epoch": 0.004296881455651226, "grad_norm": 4.97855281829834, "learning_rate": 4.295154185022026e-06, "loss": 0.5295, "step": 195 }, { "epoch": 0.004318916745167385, "grad_norm": 4.0719170570373535, "learning_rate": 4.317180616740088e-06, "loss": 0.4405, "step": 196 }, { "epoch": 0.0043409520346835455, "grad_norm": 3.4649999141693115, "learning_rate": 4.33920704845815e-06, "loss": 0.478, "step": 197 }, { "epoch": 0.004362987324199706, "grad_norm": 4.488457202911377, "learning_rate": 4.361233480176211e-06, "loss": 0.4861, "step": 198 }, { "epoch": 0.004385022613715866, "grad_norm": 4.489469051361084, "learning_rate": 4.383259911894273e-06, "loss": 0.4779, "step": 199 }, { "epoch": 0.004407057903232026, "grad_norm": 3.825566053390503, "learning_rate": 4.405286343612335e-06, "loss": 0.4672, "step": 200 }, { "epoch": 0.004429093192748187, "grad_norm": 6.108468532562256, "learning_rate": 4.4273127753303964e-06, "loss": 0.5131, "step": 201 }, { "epoch": 0.004451128482264346, "grad_norm": 3.8309035301208496, "learning_rate": 4.449339207048458e-06, "loss": 0.4626, "step": 202 }, { "epoch": 0.004473163771780506, "grad_norm": 3.8892319202423096, "learning_rate": 4.47136563876652e-06, "loss": 0.6039, "step": 203 }, { "epoch": 0.004495199061296667, "grad_norm": 3.976562738418579, "learning_rate": 4.493392070484582e-06, "loss": 0.4909, "step": 204 }, { "epoch": 0.004517234350812827, "grad_norm": 3.386951446533203, "learning_rate": 4.515418502202644e-06, "loss": 0.4263, "step": 205 }, { "epoch": 0.004539269640328987, "grad_norm": 4.194526672363281, "learning_rate": 4.5374449339207055e-06, "loss": 0.4092, "step": 206 }, { "epoch": 0.004561304929845147, "grad_norm": 3.7113382816314697, "learning_rate": 4.5594713656387665e-06, "loss": 0.4995, "step": 207 }, { "epoch": 0.004583340219361307, "grad_norm": 4.010459899902344, "learning_rate": 4.581497797356828e-06, "loss": 0.5355, "step": 208 }, { "epoch": 0.004605375508877467, "grad_norm": 4.476009845733643, "learning_rate": 4.60352422907489e-06, "loss": 0.4802, "step": 209 }, { "epoch": 0.0046274107983936275, "grad_norm": 4.6383137702941895, "learning_rate": 4.625550660792952e-06, "loss": 0.4839, "step": 210 }, { "epoch": 0.004649446087909788, "grad_norm": 4.935291290283203, "learning_rate": 4.647577092511014e-06, "loss": 0.4851, "step": 211 }, { "epoch": 0.004671481377425948, "grad_norm": 4.0498528480529785, "learning_rate": 4.669603524229075e-06, "loss": 0.4779, "step": 212 }, { "epoch": 0.0046935166669421075, "grad_norm": 3.4303297996520996, "learning_rate": 4.691629955947137e-06, "loss": 0.5107, "step": 213 }, { "epoch": 0.004715551956458268, "grad_norm": 4.609081745147705, "learning_rate": 4.7136563876651984e-06, "loss": 0.4378, "step": 214 }, { "epoch": 0.004737587245974428, "grad_norm": 5.003839492797852, "learning_rate": 4.73568281938326e-06, "loss": 0.5046, "step": 215 }, { "epoch": 0.004759622535490588, "grad_norm": 5.200089454650879, "learning_rate": 4.757709251101322e-06, "loss": 0.4767, "step": 216 }, { "epoch": 0.004781657825006749, "grad_norm": 5.4806060791015625, "learning_rate": 4.779735682819383e-06, "loss": 0.4709, "step": 217 }, { "epoch": 0.004803693114522908, "grad_norm": 3.817233085632324, "learning_rate": 4.801762114537445e-06, "loss": 0.4754, "step": 218 }, { "epoch": 0.004825728404039068, "grad_norm": 4.592571258544922, "learning_rate": 4.823788546255507e-06, "loss": 0.5482, "step": 219 }, { "epoch": 0.004847763693555229, "grad_norm": 3.6904609203338623, "learning_rate": 4.8458149779735685e-06, "loss": 0.4083, "step": 220 }, { "epoch": 0.004869798983071389, "grad_norm": 3.656541347503662, "learning_rate": 4.86784140969163e-06, "loss": 0.495, "step": 221 }, { "epoch": 0.004891834272587549, "grad_norm": 3.9072365760803223, "learning_rate": 4.889867841409691e-06, "loss": 0.4905, "step": 222 }, { "epoch": 0.0049138695621037095, "grad_norm": 3.8386282920837402, "learning_rate": 4.911894273127753e-06, "loss": 0.5396, "step": 223 }, { "epoch": 0.004935904851619869, "grad_norm": 3.445505380630493, "learning_rate": 4.933920704845815e-06, "loss": 0.5057, "step": 224 }, { "epoch": 0.004957940141136029, "grad_norm": 4.076171398162842, "learning_rate": 4.955947136563877e-06, "loss": 0.4511, "step": 225 }, { "epoch": 0.0049799754306521895, "grad_norm": 4.79937219619751, "learning_rate": 4.977973568281939e-06, "loss": 0.4884, "step": 226 }, { "epoch": 0.00500201072016835, "grad_norm": 3.633678913116455, "learning_rate": 4.9999999999999996e-06, "loss": 0.4521, "step": 227 }, { "epoch": 0.00502404600968451, "grad_norm": 5.197061538696289, "learning_rate": 5.022026431718061e-06, "loss": 0.5572, "step": 228 }, { "epoch": 0.0050460812992006695, "grad_norm": 3.981276750564575, "learning_rate": 5.044052863436123e-06, "loss": 0.4613, "step": 229 }, { "epoch": 0.00506811658871683, "grad_norm": 4.301450252532959, "learning_rate": 5.066079295154185e-06, "loss": 0.4111, "step": 230 }, { "epoch": 0.00509015187823299, "grad_norm": 3.4767324924468994, "learning_rate": 5.088105726872247e-06, "loss": 0.4199, "step": 231 }, { "epoch": 0.00511218716774915, "grad_norm": 4.206533432006836, "learning_rate": 5.110132158590309e-06, "loss": 0.393, "step": 232 }, { "epoch": 0.005134222457265311, "grad_norm": 4.781305313110352, "learning_rate": 5.1321585903083705e-06, "loss": 0.3909, "step": 233 }, { "epoch": 0.005156257746781471, "grad_norm": 3.7678451538085938, "learning_rate": 5.154185022026432e-06, "loss": 0.4788, "step": 234 }, { "epoch": 0.00517829303629763, "grad_norm": 3.949051856994629, "learning_rate": 5.176211453744494e-06, "loss": 0.4748, "step": 235 }, { "epoch": 0.005200328325813791, "grad_norm": 4.247105598449707, "learning_rate": 5.198237885462555e-06, "loss": 0.484, "step": 236 }, { "epoch": 0.005222363615329951, "grad_norm": 4.526872634887695, "learning_rate": 5.220264317180617e-06, "loss": 0.4023, "step": 237 }, { "epoch": 0.005244398904846111, "grad_norm": 3.8285973072052, "learning_rate": 5.242290748898679e-06, "loss": 0.3949, "step": 238 }, { "epoch": 0.0052664341943622715, "grad_norm": 3.487353563308716, "learning_rate": 5.2643171806167406e-06, "loss": 0.4385, "step": 239 }, { "epoch": 0.005288469483878431, "grad_norm": 4.374270439147949, "learning_rate": 5.286343612334802e-06, "loss": 0.4201, "step": 240 }, { "epoch": 0.005310504773394591, "grad_norm": 4.428084850311279, "learning_rate": 5.308370044052863e-06, "loss": 0.4864, "step": 241 }, { "epoch": 0.0053325400629107515, "grad_norm": 5.060960292816162, "learning_rate": 5.330396475770925e-06, "loss": 0.4636, "step": 242 }, { "epoch": 0.005354575352426912, "grad_norm": 5.051955699920654, "learning_rate": 5.352422907488987e-06, "loss": 0.424, "step": 243 }, { "epoch": 0.005376610641943072, "grad_norm": 3.9420509338378906, "learning_rate": 5.374449339207049e-06, "loss": 0.4045, "step": 244 }, { "epoch": 0.005398645931459232, "grad_norm": 4.416740417480469, "learning_rate": 5.396475770925111e-06, "loss": 0.4547, "step": 245 }, { "epoch": 0.005420681220975392, "grad_norm": 4.701189041137695, "learning_rate": 5.418502202643172e-06, "loss": 0.4624, "step": 246 }, { "epoch": 0.005442716510491552, "grad_norm": 3.592484474182129, "learning_rate": 5.4405286343612334e-06, "loss": 0.3721, "step": 247 }, { "epoch": 0.005464751800007712, "grad_norm": 4.037052154541016, "learning_rate": 5.462555066079295e-06, "loss": 0.477, "step": 248 }, { "epoch": 0.005486787089523873, "grad_norm": 4.505414009094238, "learning_rate": 5.484581497797357e-06, "loss": 0.4393, "step": 249 }, { "epoch": 0.005508822379040033, "grad_norm": 4.363866329193115, "learning_rate": 5.506607929515419e-06, "loss": 0.4158, "step": 250 }, { "epoch": 0.005530857668556192, "grad_norm": 3.4859724044799805, "learning_rate": 5.52863436123348e-06, "loss": 0.3847, "step": 251 }, { "epoch": 0.005552892958072353, "grad_norm": 3.9141509532928467, "learning_rate": 5.550660792951542e-06, "loss": 0.4153, "step": 252 }, { "epoch": 0.005574928247588513, "grad_norm": 4.2472825050354, "learning_rate": 5.5726872246696035e-06, "loss": 0.4311, "step": 253 }, { "epoch": 0.005596963537104673, "grad_norm": 3.5479774475097656, "learning_rate": 5.594713656387665e-06, "loss": 0.439, "step": 254 }, { "epoch": 0.0056189988266208335, "grad_norm": 3.833350419998169, "learning_rate": 5.616740088105727e-06, "loss": 0.3964, "step": 255 }, { "epoch": 0.005641034116136994, "grad_norm": 3.6379685401916504, "learning_rate": 5.638766519823788e-06, "loss": 0.4267, "step": 256 }, { "epoch": 0.005663069405653153, "grad_norm": 5.40219783782959, "learning_rate": 5.66079295154185e-06, "loss": 0.4134, "step": 257 }, { "epoch": 0.0056851046951693135, "grad_norm": 3.949547529220581, "learning_rate": 5.682819383259912e-06, "loss": 0.4589, "step": 258 }, { "epoch": 0.005707139984685474, "grad_norm": 5.185919284820557, "learning_rate": 5.704845814977974e-06, "loss": 0.4138, "step": 259 }, { "epoch": 0.005729175274201634, "grad_norm": 5.4088897705078125, "learning_rate": 5.7268722466960354e-06, "loss": 0.4396, "step": 260 }, { "epoch": 0.005751210563717794, "grad_norm": 3.3954687118530273, "learning_rate": 5.748898678414096e-06, "loss": 0.3853, "step": 261 }, { "epoch": 0.005773245853233955, "grad_norm": 4.073044776916504, "learning_rate": 5.770925110132158e-06, "loss": 0.3979, "step": 262 }, { "epoch": 0.005795281142750114, "grad_norm": 3.4753637313842773, "learning_rate": 5.79295154185022e-06, "loss": 0.4053, "step": 263 }, { "epoch": 0.005817316432266274, "grad_norm": 4.085331916809082, "learning_rate": 5.814977973568283e-06, "loss": 0.3965, "step": 264 }, { "epoch": 0.005839351721782435, "grad_norm": 4.184905529022217, "learning_rate": 5.8370044052863445e-06, "loss": 0.4533, "step": 265 }, { "epoch": 0.005861387011298595, "grad_norm": 3.8815622329711914, "learning_rate": 5.8590308370044055e-06, "loss": 0.3604, "step": 266 }, { "epoch": 0.005883422300814755, "grad_norm": 3.6461002826690674, "learning_rate": 5.881057268722467e-06, "loss": 0.3768, "step": 267 }, { "epoch": 0.005905457590330915, "grad_norm": 5.01765251159668, "learning_rate": 5.903083700440529e-06, "loss": 0.3671, "step": 268 }, { "epoch": 0.005927492879847075, "grad_norm": 3.8605992794036865, "learning_rate": 5.925110132158591e-06, "loss": 0.4082, "step": 269 }, { "epoch": 0.005949528169363235, "grad_norm": 3.454718589782715, "learning_rate": 5.947136563876652e-06, "loss": 0.4055, "step": 270 }, { "epoch": 0.0059715634588793955, "grad_norm": 4.760176658630371, "learning_rate": 5.969162995594714e-06, "loss": 0.4561, "step": 271 }, { "epoch": 0.005993598748395556, "grad_norm": 3.5970613956451416, "learning_rate": 5.991189427312776e-06, "loss": 0.4224, "step": 272 }, { "epoch": 0.006015634037911716, "grad_norm": 4.418846130371094, "learning_rate": 6.013215859030837e-06, "loss": 0.4091, "step": 273 }, { "epoch": 0.0060376693274278755, "grad_norm": 4.187165260314941, "learning_rate": 6.035242290748899e-06, "loss": 0.4427, "step": 274 }, { "epoch": 0.006059704616944036, "grad_norm": 5.082152843475342, "learning_rate": 6.05726872246696e-06, "loss": 0.4314, "step": 275 }, { "epoch": 0.006081739906460196, "grad_norm": 3.5163066387176514, "learning_rate": 6.079295154185022e-06, "loss": 0.363, "step": 276 }, { "epoch": 0.006103775195976356, "grad_norm": 5.062868118286133, "learning_rate": 6.101321585903084e-06, "loss": 0.4333, "step": 277 }, { "epoch": 0.006125810485492517, "grad_norm": 3.5544567108154297, "learning_rate": 6.123348017621146e-06, "loss": 0.4052, "step": 278 }, { "epoch": 0.006147845775008676, "grad_norm": 4.34557580947876, "learning_rate": 6.1453744493392075e-06, "loss": 0.3539, "step": 279 }, { "epoch": 0.006169881064524836, "grad_norm": 4.244579315185547, "learning_rate": 6.1674008810572685e-06, "loss": 0.4213, "step": 280 }, { "epoch": 0.006191916354040997, "grad_norm": 4.048991680145264, "learning_rate": 6.18942731277533e-06, "loss": 0.4492, "step": 281 }, { "epoch": 0.006213951643557157, "grad_norm": 3.2557361125946045, "learning_rate": 6.211453744493392e-06, "loss": 0.3035, "step": 282 }, { "epoch": 0.006235986933073317, "grad_norm": 4.054141521453857, "learning_rate": 6.233480176211454e-06, "loss": 0.4228, "step": 283 }, { "epoch": 0.0062580222225894775, "grad_norm": 3.9855895042419434, "learning_rate": 6.255506607929516e-06, "loss": 0.5035, "step": 284 }, { "epoch": 0.006280057512105637, "grad_norm": 4.252941608428955, "learning_rate": 6.277533039647577e-06, "loss": 0.4568, "step": 285 }, { "epoch": 0.006302092801621797, "grad_norm": 4.743152618408203, "learning_rate": 6.2995594713656386e-06, "loss": 0.4554, "step": 286 }, { "epoch": 0.0063241280911379575, "grad_norm": 3.492908000946045, "learning_rate": 6.3215859030837e-06, "loss": 0.3361, "step": 287 }, { "epoch": 0.006346163380654118, "grad_norm": 3.8294730186462402, "learning_rate": 6.343612334801762e-06, "loss": 0.3513, "step": 288 }, { "epoch": 0.006368198670170278, "grad_norm": 4.074195861816406, "learning_rate": 6.365638766519824e-06, "loss": 0.3966, "step": 289 }, { "epoch": 0.0063902339596864375, "grad_norm": 4.61902379989624, "learning_rate": 6.387665198237885e-06, "loss": 0.4561, "step": 290 }, { "epoch": 0.006412269249202598, "grad_norm": 3.8646469116210938, "learning_rate": 6.409691629955947e-06, "loss": 0.4339, "step": 291 }, { "epoch": 0.006434304538718758, "grad_norm": 4.366521835327148, "learning_rate": 6.431718061674009e-06, "loss": 0.4236, "step": 292 }, { "epoch": 0.006456339828234918, "grad_norm": 3.8180365562438965, "learning_rate": 6.4537444933920705e-06, "loss": 0.4126, "step": 293 }, { "epoch": 0.006478375117751079, "grad_norm": 3.7844278812408447, "learning_rate": 6.475770925110132e-06, "loss": 0.4122, "step": 294 }, { "epoch": 0.006500410407267239, "grad_norm": 5.262153148651123, "learning_rate": 6.497797356828194e-06, "loss": 0.4838, "step": 295 }, { "epoch": 0.006522445696783398, "grad_norm": 3.5902860164642334, "learning_rate": 6.519823788546256e-06, "loss": 0.4041, "step": 296 }, { "epoch": 0.006544480986299559, "grad_norm": 3.889249563217163, "learning_rate": 6.541850220264318e-06, "loss": 0.3635, "step": 297 }, { "epoch": 0.006566516275815719, "grad_norm": 3.9349656105041504, "learning_rate": 6.5638766519823796e-06, "loss": 0.3736, "step": 298 }, { "epoch": 0.006588551565331879, "grad_norm": 4.753224849700928, "learning_rate": 6.585903083700441e-06, "loss": 0.4097, "step": 299 }, { "epoch": 0.0066105868548480395, "grad_norm": 4.52436637878418, "learning_rate": 6.607929515418502e-06, "loss": 0.3667, "step": 300 }, { "epoch": 0.006632622144364199, "grad_norm": 3.862224578857422, "learning_rate": 6.629955947136564e-06, "loss": 0.3592, "step": 301 }, { "epoch": 0.006654657433880359, "grad_norm": 4.129425525665283, "learning_rate": 6.651982378854626e-06, "loss": 0.4008, "step": 302 }, { "epoch": 0.0066766927233965195, "grad_norm": 3.5636045932769775, "learning_rate": 6.674008810572688e-06, "loss": 0.4092, "step": 303 }, { "epoch": 0.00669872801291268, "grad_norm": 3.8120152950286865, "learning_rate": 6.69603524229075e-06, "loss": 0.4079, "step": 304 }, { "epoch": 0.00672076330242884, "grad_norm": 3.6423521041870117, "learning_rate": 6.718061674008811e-06, "loss": 0.3741, "step": 305 }, { "epoch": 0.006742798591945, "grad_norm": 2.913937568664551, "learning_rate": 6.7400881057268724e-06, "loss": 0.3709, "step": 306 }, { "epoch": 0.00676483388146116, "grad_norm": 3.806185007095337, "learning_rate": 6.762114537444934e-06, "loss": 0.3637, "step": 307 }, { "epoch": 0.00678686917097732, "grad_norm": 4.274352550506592, "learning_rate": 6.784140969162996e-06, "loss": 0.408, "step": 308 }, { "epoch": 0.00680890446049348, "grad_norm": 3.804652690887451, "learning_rate": 6.806167400881057e-06, "loss": 0.3261, "step": 309 }, { "epoch": 0.006830939750009641, "grad_norm": 3.9296226501464844, "learning_rate": 6.828193832599119e-06, "loss": 0.3854, "step": 310 }, { "epoch": 0.006852975039525801, "grad_norm": 3.5890614986419678, "learning_rate": 6.850220264317181e-06, "loss": 0.4018, "step": 311 }, { "epoch": 0.00687501032904196, "grad_norm": 3.756777763366699, "learning_rate": 6.8722466960352425e-06, "loss": 0.3694, "step": 312 }, { "epoch": 0.006897045618558121, "grad_norm": 3.66719651222229, "learning_rate": 6.894273127753304e-06, "loss": 0.3578, "step": 313 }, { "epoch": 0.006919080908074281, "grad_norm": 4.1880621910095215, "learning_rate": 6.916299559471365e-06, "loss": 0.3706, "step": 314 }, { "epoch": 0.006941116197590441, "grad_norm": 4.492776870727539, "learning_rate": 6.938325991189427e-06, "loss": 0.3106, "step": 315 }, { "epoch": 0.0069631514871066015, "grad_norm": 4.331742286682129, "learning_rate": 6.960352422907489e-06, "loss": 0.3679, "step": 316 }, { "epoch": 0.006985186776622762, "grad_norm": 3.6891157627105713, "learning_rate": 6.982378854625551e-06, "loss": 0.3453, "step": 317 }, { "epoch": 0.007007222066138921, "grad_norm": 3.7290730476379395, "learning_rate": 7.004405286343613e-06, "loss": 0.32, "step": 318 }, { "epoch": 0.0070292573556550815, "grad_norm": 3.4580330848693848, "learning_rate": 7.0264317180616736e-06, "loss": 0.3359, "step": 319 }, { "epoch": 0.007051292645171242, "grad_norm": 4.30208158493042, "learning_rate": 7.048458149779735e-06, "loss": 0.3573, "step": 320 }, { "epoch": 0.007073327934687402, "grad_norm": 4.812725067138672, "learning_rate": 7.070484581497797e-06, "loss": 0.3522, "step": 321 }, { "epoch": 0.007095363224203562, "grad_norm": 4.041245460510254, "learning_rate": 7.092511013215859e-06, "loss": 0.349, "step": 322 }, { "epoch": 0.007117398513719722, "grad_norm": 4.966184616088867, "learning_rate": 7.114537444933921e-06, "loss": 0.3979, "step": 323 }, { "epoch": 0.007139433803235882, "grad_norm": 3.5850229263305664, "learning_rate": 7.136563876651982e-06, "loss": 0.3678, "step": 324 }, { "epoch": 0.007161469092752042, "grad_norm": 4.093824863433838, "learning_rate": 7.1585903083700445e-06, "loss": 0.3812, "step": 325 }, { "epoch": 0.007183504382268203, "grad_norm": 3.6403210163116455, "learning_rate": 7.180616740088106e-06, "loss": 0.3766, "step": 326 }, { "epoch": 0.007205539671784363, "grad_norm": 3.560739278793335, "learning_rate": 7.202643171806168e-06, "loss": 0.3412, "step": 327 }, { "epoch": 0.007227574961300523, "grad_norm": 4.048598289489746, "learning_rate": 7.22466960352423e-06, "loss": 0.3868, "step": 328 }, { "epoch": 0.007249610250816683, "grad_norm": 3.9898486137390137, "learning_rate": 7.246696035242291e-06, "loss": 0.3616, "step": 329 }, { "epoch": 0.007271645540332843, "grad_norm": 4.040022373199463, "learning_rate": 7.268722466960353e-06, "loss": 0.3603, "step": 330 }, { "epoch": 0.007293680829849003, "grad_norm": 4.751725196838379, "learning_rate": 7.290748898678415e-06, "loss": 0.3658, "step": 331 }, { "epoch": 0.0073157161193651635, "grad_norm": 3.520599603652954, "learning_rate": 7.312775330396476e-06, "loss": 0.3106, "step": 332 }, { "epoch": 0.007337751408881324, "grad_norm": 3.768846273422241, "learning_rate": 7.334801762114538e-06, "loss": 0.2995, "step": 333 }, { "epoch": 0.007359786698397483, "grad_norm": 5.417898654937744, "learning_rate": 7.356828193832599e-06, "loss": 0.3695, "step": 334 }, { "epoch": 0.0073818219879136435, "grad_norm": 3.4114747047424316, "learning_rate": 7.378854625550661e-06, "loss": 0.3172, "step": 335 }, { "epoch": 0.007403857277429804, "grad_norm": 4.177585601806641, "learning_rate": 7.400881057268723e-06, "loss": 0.3821, "step": 336 }, { "epoch": 0.007425892566945964, "grad_norm": 3.640482187271118, "learning_rate": 7.422907488986785e-06, "loss": 0.3968, "step": 337 }, { "epoch": 0.007447927856462124, "grad_norm": 4.0594401359558105, "learning_rate": 7.4449339207048465e-06, "loss": 0.3857, "step": 338 }, { "epoch": 0.007469963145978285, "grad_norm": 4.5109381675720215, "learning_rate": 7.4669603524229075e-06, "loss": 0.3792, "step": 339 }, { "epoch": 0.007491998435494444, "grad_norm": 4.187204360961914, "learning_rate": 7.488986784140969e-06, "loss": 0.3073, "step": 340 }, { "epoch": 0.007514033725010604, "grad_norm": 3.258667230606079, "learning_rate": 7.511013215859031e-06, "loss": 0.319, "step": 341 }, { "epoch": 0.007536069014526765, "grad_norm": 3.2926366329193115, "learning_rate": 7.533039647577093e-06, "loss": 0.4106, "step": 342 }, { "epoch": 0.007558104304042925, "grad_norm": 2.90930438041687, "learning_rate": 7.555066079295155e-06, "loss": 0.3474, "step": 343 }, { "epoch": 0.007580139593559085, "grad_norm": 4.358021259307861, "learning_rate": 7.5770925110132166e-06, "loss": 0.3029, "step": 344 }, { "epoch": 0.007602174883075245, "grad_norm": 3.783026695251465, "learning_rate": 7.599118942731278e-06, "loss": 0.2927, "step": 345 }, { "epoch": 0.007624210172591405, "grad_norm": 3.868164539337158, "learning_rate": 7.6211453744493385e-06, "loss": 0.2555, "step": 346 }, { "epoch": 0.007646245462107565, "grad_norm": 3.586362838745117, "learning_rate": 7.643171806167401e-06, "loss": 0.3967, "step": 347 }, { "epoch": 0.0076682807516237255, "grad_norm": 3.8789901733398438, "learning_rate": 7.665198237885463e-06, "loss": 0.2884, "step": 348 }, { "epoch": 0.007690316041139886, "grad_norm": 3.2446577548980713, "learning_rate": 7.687224669603525e-06, "loss": 0.351, "step": 349 }, { "epoch": 0.007712351330656046, "grad_norm": 3.016533374786377, "learning_rate": 7.709251101321587e-06, "loss": 0.2998, "step": 350 }, { "epoch": 0.0077343866201722055, "grad_norm": 3.669875383377075, "learning_rate": 7.731277533039648e-06, "loss": 0.3297, "step": 351 }, { "epoch": 0.007756421909688366, "grad_norm": 5.233467102050781, "learning_rate": 7.75330396475771e-06, "loss": 0.4109, "step": 352 }, { "epoch": 0.007778457199204526, "grad_norm": 4.5610833168029785, "learning_rate": 7.775330396475772e-06, "loss": 0.335, "step": 353 }, { "epoch": 0.007800492488720686, "grad_norm": 3.439732551574707, "learning_rate": 7.797356828193834e-06, "loss": 0.3317, "step": 354 }, { "epoch": 0.007822527778236846, "grad_norm": 3.3706507682800293, "learning_rate": 7.819383259911896e-06, "loss": 0.3211, "step": 355 }, { "epoch": 0.007844563067753007, "grad_norm": 4.486478328704834, "learning_rate": 7.841409691629956e-06, "loss": 0.339, "step": 356 }, { "epoch": 0.007866598357269166, "grad_norm": 5.038827419281006, "learning_rate": 7.863436123348018e-06, "loss": 0.3826, "step": 357 }, { "epoch": 0.007888633646785327, "grad_norm": 3.6392154693603516, "learning_rate": 7.88546255506608e-06, "loss": 0.3767, "step": 358 }, { "epoch": 0.007910668936301487, "grad_norm": 4.827049732208252, "learning_rate": 7.907488986784141e-06, "loss": 0.4282, "step": 359 }, { "epoch": 0.007932704225817646, "grad_norm": 3.4365665912628174, "learning_rate": 7.929515418502203e-06, "loss": 0.3352, "step": 360 }, { "epoch": 0.007954739515333807, "grad_norm": 3.3142614364624023, "learning_rate": 7.951541850220265e-06, "loss": 0.3247, "step": 361 }, { "epoch": 0.007976774804849967, "grad_norm": 4.206857681274414, "learning_rate": 7.973568281938327e-06, "loss": 0.3331, "step": 362 }, { "epoch": 0.007998810094366128, "grad_norm": 3.3698906898498535, "learning_rate": 7.995594713656389e-06, "loss": 0.3207, "step": 363 }, { "epoch": 0.008020845383882287, "grad_norm": 3.917440414428711, "learning_rate": 8.01762114537445e-06, "loss": 0.311, "step": 364 }, { "epoch": 0.008042880673398447, "grad_norm": 4.325022220611572, "learning_rate": 8.03964757709251e-06, "loss": 0.365, "step": 365 }, { "epoch": 0.008064915962914608, "grad_norm": 4.2359466552734375, "learning_rate": 8.061674008810572e-06, "loss": 0.3689, "step": 366 }, { "epoch": 0.008086951252430767, "grad_norm": 4.011540412902832, "learning_rate": 8.083700440528634e-06, "loss": 0.3511, "step": 367 }, { "epoch": 0.008108986541946929, "grad_norm": 4.187591552734375, "learning_rate": 8.105726872246696e-06, "loss": 0.3499, "step": 368 }, { "epoch": 0.008131021831463088, "grad_norm": 3.5033881664276123, "learning_rate": 8.127753303964758e-06, "loss": 0.3025, "step": 369 }, { "epoch": 0.008153057120979247, "grad_norm": 4.572875022888184, "learning_rate": 8.14977973568282e-06, "loss": 0.4066, "step": 370 }, { "epoch": 0.008175092410495409, "grad_norm": 5.513815402984619, "learning_rate": 8.171806167400882e-06, "loss": 0.3415, "step": 371 }, { "epoch": 0.008197127700011568, "grad_norm": 4.214174270629883, "learning_rate": 8.193832599118943e-06, "loss": 0.3564, "step": 372 }, { "epoch": 0.00821916298952773, "grad_norm": 3.9623289108276367, "learning_rate": 8.215859030837005e-06, "loss": 0.2992, "step": 373 }, { "epoch": 0.008241198279043889, "grad_norm": 4.9878973960876465, "learning_rate": 8.237885462555067e-06, "loss": 0.3548, "step": 374 }, { "epoch": 0.00826323356856005, "grad_norm": 5.080916881561279, "learning_rate": 8.259911894273127e-06, "loss": 0.316, "step": 375 }, { "epoch": 0.00828526885807621, "grad_norm": 3.9703171253204346, "learning_rate": 8.281938325991189e-06, "loss": 0.353, "step": 376 }, { "epoch": 0.008307304147592369, "grad_norm": 4.666238784790039, "learning_rate": 8.30396475770925e-06, "loss": 0.3081, "step": 377 }, { "epoch": 0.00832933943710853, "grad_norm": 2.885751724243164, "learning_rate": 8.325991189427313e-06, "loss": 0.3297, "step": 378 }, { "epoch": 0.00835137472662469, "grad_norm": 4.075888156890869, "learning_rate": 8.348017621145374e-06, "loss": 0.3639, "step": 379 }, { "epoch": 0.00837341001614085, "grad_norm": 3.787062168121338, "learning_rate": 8.370044052863436e-06, "loss": 0.3399, "step": 380 }, { "epoch": 0.00839544530565701, "grad_norm": 3.6515281200408936, "learning_rate": 8.392070484581498e-06, "loss": 0.2931, "step": 381 }, { "epoch": 0.00841748059517317, "grad_norm": 4.038602352142334, "learning_rate": 8.41409691629956e-06, "loss": 0.353, "step": 382 }, { "epoch": 0.00843951588468933, "grad_norm": 3.5081043243408203, "learning_rate": 8.436123348017622e-06, "loss": 0.2865, "step": 383 }, { "epoch": 0.00846155117420549, "grad_norm": 3.3260464668273926, "learning_rate": 8.458149779735683e-06, "loss": 0.2677, "step": 384 }, { "epoch": 0.008483586463721651, "grad_norm": 3.903970956802368, "learning_rate": 8.480176211453744e-06, "loss": 0.3342, "step": 385 }, { "epoch": 0.00850562175323781, "grad_norm": 3.8243584632873535, "learning_rate": 8.502202643171805e-06, "loss": 0.3502, "step": 386 }, { "epoch": 0.00852765704275397, "grad_norm": 4.62246036529541, "learning_rate": 8.524229074889867e-06, "loss": 0.3241, "step": 387 }, { "epoch": 0.008549692332270131, "grad_norm": 3.59289813041687, "learning_rate": 8.546255506607929e-06, "loss": 0.2647, "step": 388 }, { "epoch": 0.00857172762178629, "grad_norm": 3.3789732456207275, "learning_rate": 8.568281938325991e-06, "loss": 0.3008, "step": 389 }, { "epoch": 0.008593762911302451, "grad_norm": 3.229379415512085, "learning_rate": 8.590308370044053e-06, "loss": 0.3369, "step": 390 }, { "epoch": 0.008615798200818611, "grad_norm": 4.09065055847168, "learning_rate": 8.612334801762115e-06, "loss": 0.3044, "step": 391 }, { "epoch": 0.00863783349033477, "grad_norm": 3.645310640335083, "learning_rate": 8.634361233480176e-06, "loss": 0.317, "step": 392 }, { "epoch": 0.008659868779850931, "grad_norm": 4.544421195983887, "learning_rate": 8.656387665198238e-06, "loss": 0.3163, "step": 393 }, { "epoch": 0.008681904069367091, "grad_norm": 3.7679965496063232, "learning_rate": 8.6784140969163e-06, "loss": 0.337, "step": 394 }, { "epoch": 0.008703939358883252, "grad_norm": 3.83672833442688, "learning_rate": 8.70044052863436e-06, "loss": 0.3239, "step": 395 }, { "epoch": 0.008725974648399411, "grad_norm": 3.6614277362823486, "learning_rate": 8.722466960352422e-06, "loss": 0.3159, "step": 396 }, { "epoch": 0.008748009937915573, "grad_norm": 4.136526107788086, "learning_rate": 8.744493392070484e-06, "loss": 0.3084, "step": 397 }, { "epoch": 0.008770045227431732, "grad_norm": 3.6752233505249023, "learning_rate": 8.766519823788546e-06, "loss": 0.3337, "step": 398 }, { "epoch": 0.008792080516947891, "grad_norm": 3.8122642040252686, "learning_rate": 8.788546255506607e-06, "loss": 0.3558, "step": 399 }, { "epoch": 0.008814115806464053, "grad_norm": 4.550193786621094, "learning_rate": 8.81057268722467e-06, "loss": 0.3147, "step": 400 }, { "epoch": 0.008836151095980212, "grad_norm": 3.8499679565429688, "learning_rate": 8.832599118942731e-06, "loss": 0.3333, "step": 401 }, { "epoch": 0.008858186385496373, "grad_norm": 3.359562397003174, "learning_rate": 8.854625550660793e-06, "loss": 0.3506, "step": 402 }, { "epoch": 0.008880221675012533, "grad_norm": 3.6550238132476807, "learning_rate": 8.876651982378856e-06, "loss": 0.3219, "step": 403 }, { "epoch": 0.008902256964528692, "grad_norm": 2.951533794403076, "learning_rate": 8.898678414096917e-06, "loss": 0.2399, "step": 404 }, { "epoch": 0.008924292254044853, "grad_norm": 5.023627758026123, "learning_rate": 8.920704845814978e-06, "loss": 0.3011, "step": 405 }, { "epoch": 0.008946327543561013, "grad_norm": 3.5515973567962646, "learning_rate": 8.94273127753304e-06, "loss": 0.3272, "step": 406 }, { "epoch": 0.008968362833077174, "grad_norm": 4.449505805969238, "learning_rate": 8.964757709251102e-06, "loss": 0.2598, "step": 407 }, { "epoch": 0.008990398122593333, "grad_norm": 5.151919841766357, "learning_rate": 8.986784140969164e-06, "loss": 0.3786, "step": 408 }, { "epoch": 0.009012433412109493, "grad_norm": 4.0129547119140625, "learning_rate": 9.008810572687226e-06, "loss": 0.3136, "step": 409 }, { "epoch": 0.009034468701625654, "grad_norm": 4.307564735412598, "learning_rate": 9.030837004405287e-06, "loss": 0.2571, "step": 410 }, { "epoch": 0.009056503991141813, "grad_norm": 2.9005680084228516, "learning_rate": 9.05286343612335e-06, "loss": 0.2769, "step": 411 }, { "epoch": 0.009078539280657974, "grad_norm": 3.493131637573242, "learning_rate": 9.074889867841411e-06, "loss": 0.3816, "step": 412 }, { "epoch": 0.009100574570174134, "grad_norm": 3.6033523082733154, "learning_rate": 9.096916299559473e-06, "loss": 0.2839, "step": 413 }, { "epoch": 0.009122609859690293, "grad_norm": 3.457846164703369, "learning_rate": 9.118942731277533e-06, "loss": 0.2587, "step": 414 }, { "epoch": 0.009144645149206454, "grad_norm": 3.2585484981536865, "learning_rate": 9.140969162995595e-06, "loss": 0.2761, "step": 415 }, { "epoch": 0.009166680438722614, "grad_norm": 5.218542575836182, "learning_rate": 9.162995594713657e-06, "loss": 0.3234, "step": 416 }, { "epoch": 0.009188715728238775, "grad_norm": 3.207927942276001, "learning_rate": 9.185022026431719e-06, "loss": 0.2996, "step": 417 }, { "epoch": 0.009210751017754934, "grad_norm": 4.046978950500488, "learning_rate": 9.20704845814978e-06, "loss": 0.2847, "step": 418 }, { "epoch": 0.009232786307271095, "grad_norm": 3.4882795810699463, "learning_rate": 9.229074889867842e-06, "loss": 0.3245, "step": 419 }, { "epoch": 0.009254821596787255, "grad_norm": 3.1434166431427, "learning_rate": 9.251101321585904e-06, "loss": 0.3171, "step": 420 }, { "epoch": 0.009276856886303414, "grad_norm": 4.818523406982422, "learning_rate": 9.273127753303966e-06, "loss": 0.3197, "step": 421 }, { "epoch": 0.009298892175819575, "grad_norm": 3.5332932472229004, "learning_rate": 9.295154185022028e-06, "loss": 0.2962, "step": 422 }, { "epoch": 0.009320927465335735, "grad_norm": 3.066016912460327, "learning_rate": 9.31718061674009e-06, "loss": 0.2432, "step": 423 }, { "epoch": 0.009342962754851896, "grad_norm": 2.84258770942688, "learning_rate": 9.33920704845815e-06, "loss": 0.2515, "step": 424 }, { "epoch": 0.009364998044368055, "grad_norm": 6.000263690948486, "learning_rate": 9.361233480176211e-06, "loss": 0.2939, "step": 425 }, { "epoch": 0.009387033333884215, "grad_norm": 3.625215768814087, "learning_rate": 9.383259911894273e-06, "loss": 0.2791, "step": 426 }, { "epoch": 0.009409068623400376, "grad_norm": 4.2451982498168945, "learning_rate": 9.405286343612335e-06, "loss": 0.2825, "step": 427 }, { "epoch": 0.009431103912916535, "grad_norm": 3.273071050643921, "learning_rate": 9.427312775330397e-06, "loss": 0.3081, "step": 428 }, { "epoch": 0.009453139202432697, "grad_norm": 3.8822126388549805, "learning_rate": 9.449339207048459e-06, "loss": 0.2885, "step": 429 }, { "epoch": 0.009475174491948856, "grad_norm": 4.231334209442139, "learning_rate": 9.47136563876652e-06, "loss": 0.3644, "step": 430 }, { "epoch": 0.009497209781465015, "grad_norm": 3.9243662357330322, "learning_rate": 9.493392070484582e-06, "loss": 0.3027, "step": 431 }, { "epoch": 0.009519245070981177, "grad_norm": 3.1854639053344727, "learning_rate": 9.515418502202644e-06, "loss": 0.3157, "step": 432 }, { "epoch": 0.009541280360497336, "grad_norm": 3.258707046508789, "learning_rate": 9.537444933920704e-06, "loss": 0.3365, "step": 433 }, { "epoch": 0.009563315650013497, "grad_norm": 3.192932367324829, "learning_rate": 9.559471365638766e-06, "loss": 0.3017, "step": 434 }, { "epoch": 0.009585350939529657, "grad_norm": 2.8377208709716797, "learning_rate": 9.581497797356828e-06, "loss": 0.2644, "step": 435 }, { "epoch": 0.009607386229045816, "grad_norm": 3.0926074981689453, "learning_rate": 9.60352422907489e-06, "loss": 0.3025, "step": 436 }, { "epoch": 0.009629421518561977, "grad_norm": 3.61940598487854, "learning_rate": 9.625550660792952e-06, "loss": 0.3004, "step": 437 }, { "epoch": 0.009651456808078137, "grad_norm": 2.7397496700286865, "learning_rate": 9.647577092511013e-06, "loss": 0.2288, "step": 438 }, { "epoch": 0.009673492097594298, "grad_norm": 2.9807233810424805, "learning_rate": 9.669603524229075e-06, "loss": 0.2916, "step": 439 }, { "epoch": 0.009695527387110457, "grad_norm": 3.265972137451172, "learning_rate": 9.691629955947137e-06, "loss": 0.2905, "step": 440 }, { "epoch": 0.009717562676626618, "grad_norm": 3.2442474365234375, "learning_rate": 9.713656387665199e-06, "loss": 0.2631, "step": 441 }, { "epoch": 0.009739597966142778, "grad_norm": 4.202004432678223, "learning_rate": 9.73568281938326e-06, "loss": 0.348, "step": 442 }, { "epoch": 0.009761633255658937, "grad_norm": 3.372788667678833, "learning_rate": 9.75770925110132e-06, "loss": 0.3247, "step": 443 }, { "epoch": 0.009783668545175098, "grad_norm": 3.0059688091278076, "learning_rate": 9.779735682819383e-06, "loss": 0.2923, "step": 444 }, { "epoch": 0.009805703834691258, "grad_norm": 3.6981401443481445, "learning_rate": 9.801762114537444e-06, "loss": 0.2898, "step": 445 }, { "epoch": 0.009827739124207419, "grad_norm": 2.6911263465881348, "learning_rate": 9.823788546255506e-06, "loss": 0.2583, "step": 446 }, { "epoch": 0.009849774413723578, "grad_norm": 3.2781028747558594, "learning_rate": 9.845814977973568e-06, "loss": 0.3033, "step": 447 }, { "epoch": 0.009871809703239738, "grad_norm": 4.080313205718994, "learning_rate": 9.86784140969163e-06, "loss": 0.3648, "step": 448 }, { "epoch": 0.009893844992755899, "grad_norm": 4.8163042068481445, "learning_rate": 9.889867841409692e-06, "loss": 0.3158, "step": 449 }, { "epoch": 0.009915880282272058, "grad_norm": 2.589672803878784, "learning_rate": 9.911894273127754e-06, "loss": 0.2999, "step": 450 }, { "epoch": 0.00993791557178822, "grad_norm": 3.0838136672973633, "learning_rate": 9.933920704845815e-06, "loss": 0.2858, "step": 451 }, { "epoch": 0.009959950861304379, "grad_norm": 4.384760856628418, "learning_rate": 9.955947136563877e-06, "loss": 0.307, "step": 452 }, { "epoch": 0.009981986150820538, "grad_norm": 3.5309877395629883, "learning_rate": 9.977973568281937e-06, "loss": 0.3024, "step": 453 }, { "epoch": 0.0100040214403367, "grad_norm": 4.358819484710693, "learning_rate": 9.999999999999999e-06, "loss": 0.2655, "step": 454 }, { "epoch": 0.010026056729852859, "grad_norm": 2.7143523693084717, "learning_rate": 1.0022026431718061e-05, "loss": 0.2647, "step": 455 }, { "epoch": 0.01004809201936902, "grad_norm": 3.321615219116211, "learning_rate": 1.0044052863436123e-05, "loss": 0.3124, "step": 456 }, { "epoch": 0.01007012730888518, "grad_norm": 3.5308358669281006, "learning_rate": 1.0066079295154185e-05, "loss": 0.3599, "step": 457 }, { "epoch": 0.010092162598401339, "grad_norm": 3.0546867847442627, "learning_rate": 1.0088105726872246e-05, "loss": 0.2552, "step": 458 }, { "epoch": 0.0101141978879175, "grad_norm": 4.755788803100586, "learning_rate": 1.0110132158590308e-05, "loss": 0.2699, "step": 459 }, { "epoch": 0.01013623317743366, "grad_norm": 4.163509368896484, "learning_rate": 1.013215859030837e-05, "loss": 0.2953, "step": 460 }, { "epoch": 0.01015826846694982, "grad_norm": 6.214312553405762, "learning_rate": 1.0154185022026432e-05, "loss": 0.3889, "step": 461 }, { "epoch": 0.01018030375646598, "grad_norm": 4.6382575035095215, "learning_rate": 1.0176211453744494e-05, "loss": 0.3051, "step": 462 }, { "epoch": 0.010202339045982141, "grad_norm": 3.558485984802246, "learning_rate": 1.0198237885462554e-05, "loss": 0.3743, "step": 463 }, { "epoch": 0.0102243743354983, "grad_norm": 3.2987890243530273, "learning_rate": 1.0220264317180617e-05, "loss": 0.2856, "step": 464 }, { "epoch": 0.01024640962501446, "grad_norm": 3.767589807510376, "learning_rate": 1.024229074889868e-05, "loss": 0.3332, "step": 465 }, { "epoch": 0.010268444914530621, "grad_norm": 4.662984848022461, "learning_rate": 1.0264317180616741e-05, "loss": 0.3158, "step": 466 }, { "epoch": 0.01029048020404678, "grad_norm": 3.699167251586914, "learning_rate": 1.0286343612334803e-05, "loss": 0.276, "step": 467 }, { "epoch": 0.010312515493562942, "grad_norm": 4.53949499130249, "learning_rate": 1.0308370044052865e-05, "loss": 0.3533, "step": 468 }, { "epoch": 0.010334550783079101, "grad_norm": 3.235049247741699, "learning_rate": 1.0330396475770926e-05, "loss": 0.3412, "step": 469 }, { "epoch": 0.01035658607259526, "grad_norm": 3.875438690185547, "learning_rate": 1.0352422907488988e-05, "loss": 0.3257, "step": 470 }, { "epoch": 0.010378621362111422, "grad_norm": 3.44260835647583, "learning_rate": 1.037444933920705e-05, "loss": 0.2856, "step": 471 }, { "epoch": 0.010400656651627581, "grad_norm": 3.60825514793396, "learning_rate": 1.039647577092511e-05, "loss": 0.2983, "step": 472 }, { "epoch": 0.010422691941143742, "grad_norm": 3.441405773162842, "learning_rate": 1.0418502202643172e-05, "loss": 0.3209, "step": 473 }, { "epoch": 0.010444727230659902, "grad_norm": 3.6408324241638184, "learning_rate": 1.0440528634361234e-05, "loss": 0.3353, "step": 474 }, { "epoch": 0.010466762520176061, "grad_norm": 3.7070565223693848, "learning_rate": 1.0462555066079296e-05, "loss": 0.2592, "step": 475 }, { "epoch": 0.010488797809692222, "grad_norm": 5.189818382263184, "learning_rate": 1.0484581497797357e-05, "loss": 0.3419, "step": 476 }, { "epoch": 0.010510833099208382, "grad_norm": 2.896646499633789, "learning_rate": 1.050660792951542e-05, "loss": 0.3071, "step": 477 }, { "epoch": 0.010532868388724543, "grad_norm": 3.840245008468628, "learning_rate": 1.0528634361233481e-05, "loss": 0.3241, "step": 478 }, { "epoch": 0.010554903678240702, "grad_norm": 3.8423831462860107, "learning_rate": 1.0550660792951543e-05, "loss": 0.2788, "step": 479 }, { "epoch": 0.010576938967756862, "grad_norm": 6.013378143310547, "learning_rate": 1.0572687224669605e-05, "loss": 0.282, "step": 480 }, { "epoch": 0.010598974257273023, "grad_norm": 3.4571728706359863, "learning_rate": 1.0594713656387667e-05, "loss": 0.2552, "step": 481 }, { "epoch": 0.010621009546789182, "grad_norm": 4.503485202789307, "learning_rate": 1.0616740088105727e-05, "loss": 0.3429, "step": 482 }, { "epoch": 0.010643044836305343, "grad_norm": 3.513561725616455, "learning_rate": 1.0638766519823789e-05, "loss": 0.2936, "step": 483 }, { "epoch": 0.010665080125821503, "grad_norm": 4.348287105560303, "learning_rate": 1.066079295154185e-05, "loss": 0.3296, "step": 484 }, { "epoch": 0.010687115415337664, "grad_norm": 3.8069064617156982, "learning_rate": 1.0682819383259912e-05, "loss": 0.2986, "step": 485 }, { "epoch": 0.010709150704853823, "grad_norm": 3.7113208770751953, "learning_rate": 1.0704845814977974e-05, "loss": 0.3264, "step": 486 }, { "epoch": 0.010731185994369983, "grad_norm": 3.8479883670806885, "learning_rate": 1.0726872246696036e-05, "loss": 0.3255, "step": 487 }, { "epoch": 0.010753221283886144, "grad_norm": 3.300534725189209, "learning_rate": 1.0748898678414098e-05, "loss": 0.3403, "step": 488 }, { "epoch": 0.010775256573402303, "grad_norm": 3.987475872039795, "learning_rate": 1.077092511013216e-05, "loss": 0.3147, "step": 489 }, { "epoch": 0.010797291862918465, "grad_norm": 3.7962632179260254, "learning_rate": 1.0792951541850221e-05, "loss": 0.3046, "step": 490 }, { "epoch": 0.010819327152434624, "grad_norm": 3.2389297485351562, "learning_rate": 1.0814977973568283e-05, "loss": 0.2428, "step": 491 }, { "epoch": 0.010841362441950783, "grad_norm": 3.619792938232422, "learning_rate": 1.0837004405286343e-05, "loss": 0.2681, "step": 492 }, { "epoch": 0.010863397731466945, "grad_norm": 3.5591962337493896, "learning_rate": 1.0859030837004405e-05, "loss": 0.3292, "step": 493 }, { "epoch": 0.010885433020983104, "grad_norm": 3.831942081451416, "learning_rate": 1.0881057268722467e-05, "loss": 0.2912, "step": 494 }, { "epoch": 0.010907468310499265, "grad_norm": 3.4058752059936523, "learning_rate": 1.0903083700440529e-05, "loss": 0.2963, "step": 495 }, { "epoch": 0.010929503600015425, "grad_norm": 3.247645854949951, "learning_rate": 1.092511013215859e-05, "loss": 0.2992, "step": 496 }, { "epoch": 0.010951538889531584, "grad_norm": 4.016951084136963, "learning_rate": 1.0947136563876652e-05, "loss": 0.2945, "step": 497 }, { "epoch": 0.010973574179047745, "grad_norm": 3.6864078044891357, "learning_rate": 1.0969162995594714e-05, "loss": 0.2898, "step": 498 }, { "epoch": 0.010995609468563905, "grad_norm": 4.157149791717529, "learning_rate": 1.0991189427312776e-05, "loss": 0.3433, "step": 499 }, { "epoch": 0.011017644758080066, "grad_norm": 4.512271881103516, "learning_rate": 1.1013215859030838e-05, "loss": 0.3013, "step": 500 }, { "epoch": 0.011039680047596225, "grad_norm": 3.0304386615753174, "learning_rate": 1.10352422907489e-05, "loss": 0.2705, "step": 501 }, { "epoch": 0.011061715337112385, "grad_norm": 3.945819139480591, "learning_rate": 1.105726872246696e-05, "loss": 0.2608, "step": 502 }, { "epoch": 0.011083750626628546, "grad_norm": 3.829049587249756, "learning_rate": 1.1079295154185022e-05, "loss": 0.2896, "step": 503 }, { "epoch": 0.011105785916144705, "grad_norm": 4.285023212432861, "learning_rate": 1.1101321585903083e-05, "loss": 0.2204, "step": 504 }, { "epoch": 0.011127821205660866, "grad_norm": 3.2274909019470215, "learning_rate": 1.1123348017621145e-05, "loss": 0.2402, "step": 505 }, { "epoch": 0.011149856495177026, "grad_norm": 3.6628222465515137, "learning_rate": 1.1145374449339207e-05, "loss": 0.2985, "step": 506 }, { "epoch": 0.011171891784693187, "grad_norm": 4.468387603759766, "learning_rate": 1.1167400881057269e-05, "loss": 0.3048, "step": 507 }, { "epoch": 0.011193927074209346, "grad_norm": 3.3970515727996826, "learning_rate": 1.118942731277533e-05, "loss": 0.2749, "step": 508 }, { "epoch": 0.011215962363725506, "grad_norm": 3.4051146507263184, "learning_rate": 1.1211453744493393e-05, "loss": 0.309, "step": 509 }, { "epoch": 0.011237997653241667, "grad_norm": 2.2320733070373535, "learning_rate": 1.1233480176211454e-05, "loss": 0.2557, "step": 510 }, { "epoch": 0.011260032942757826, "grad_norm": 3.5512518882751465, "learning_rate": 1.1255506607929514e-05, "loss": 0.3181, "step": 511 }, { "epoch": 0.011282068232273988, "grad_norm": 3.2860591411590576, "learning_rate": 1.1277533039647576e-05, "loss": 0.2729, "step": 512 }, { "epoch": 0.011304103521790147, "grad_norm": 3.5674383640289307, "learning_rate": 1.1299559471365638e-05, "loss": 0.2652, "step": 513 }, { "epoch": 0.011326138811306306, "grad_norm": 3.404066801071167, "learning_rate": 1.13215859030837e-05, "loss": 0.299, "step": 514 }, { "epoch": 0.011348174100822467, "grad_norm": 4.4304680824279785, "learning_rate": 1.1343612334801762e-05, "loss": 0.3431, "step": 515 }, { "epoch": 0.011370209390338627, "grad_norm": 2.4595513343811035, "learning_rate": 1.1365638766519824e-05, "loss": 0.2615, "step": 516 }, { "epoch": 0.011392244679854788, "grad_norm": 2.817800998687744, "learning_rate": 1.1387665198237885e-05, "loss": 0.2737, "step": 517 }, { "epoch": 0.011414279969370947, "grad_norm": 3.0745491981506348, "learning_rate": 1.1409691629955947e-05, "loss": 0.3338, "step": 518 }, { "epoch": 0.011436315258887107, "grad_norm": 3.077319860458374, "learning_rate": 1.1431718061674009e-05, "loss": 0.289, "step": 519 }, { "epoch": 0.011458350548403268, "grad_norm": 3.4089603424072266, "learning_rate": 1.1453744493392071e-05, "loss": 0.311, "step": 520 }, { "epoch": 0.011480385837919427, "grad_norm": 2.5657594203948975, "learning_rate": 1.1475770925110131e-05, "loss": 0.2437, "step": 521 }, { "epoch": 0.011502421127435589, "grad_norm": 3.073305130004883, "learning_rate": 1.1497797356828193e-05, "loss": 0.2886, "step": 522 }, { "epoch": 0.011524456416951748, "grad_norm": 2.3724708557128906, "learning_rate": 1.1519823788546255e-05, "loss": 0.2831, "step": 523 }, { "epoch": 0.01154649170646791, "grad_norm": 3.4780874252319336, "learning_rate": 1.1541850220264316e-05, "loss": 0.2534, "step": 524 }, { "epoch": 0.011568526995984069, "grad_norm": 3.8551855087280273, "learning_rate": 1.1563876651982378e-05, "loss": 0.3351, "step": 525 }, { "epoch": 0.011590562285500228, "grad_norm": 3.968693256378174, "learning_rate": 1.158590308370044e-05, "loss": 0.3276, "step": 526 }, { "epoch": 0.01161259757501639, "grad_norm": 4.355198383331299, "learning_rate": 1.1607929515418504e-05, "loss": 0.3279, "step": 527 }, { "epoch": 0.011634632864532549, "grad_norm": 4.234310150146484, "learning_rate": 1.1629955947136565e-05, "loss": 0.2761, "step": 528 }, { "epoch": 0.01165666815404871, "grad_norm": 3.938307762145996, "learning_rate": 1.1651982378854627e-05, "loss": 0.2486, "step": 529 }, { "epoch": 0.01167870344356487, "grad_norm": 5.876544952392578, "learning_rate": 1.1674008810572689e-05, "loss": 0.291, "step": 530 }, { "epoch": 0.011700738733081029, "grad_norm": 3.708709478378296, "learning_rate": 1.169603524229075e-05, "loss": 0.3295, "step": 531 }, { "epoch": 0.01172277402259719, "grad_norm": 2.9964535236358643, "learning_rate": 1.1718061674008811e-05, "loss": 0.2558, "step": 532 }, { "epoch": 0.01174480931211335, "grad_norm": 4.2269606590271, "learning_rate": 1.1740088105726873e-05, "loss": 0.3148, "step": 533 }, { "epoch": 0.01176684460162951, "grad_norm": 5.228407859802246, "learning_rate": 1.1762114537444935e-05, "loss": 0.33, "step": 534 }, { "epoch": 0.01178887989114567, "grad_norm": 3.345733165740967, "learning_rate": 1.1784140969162996e-05, "loss": 0.2563, "step": 535 }, { "epoch": 0.01181091518066183, "grad_norm": 3.2070069313049316, "learning_rate": 1.1806167400881058e-05, "loss": 0.2422, "step": 536 }, { "epoch": 0.01183295047017799, "grad_norm": 3.0753121376037598, "learning_rate": 1.182819383259912e-05, "loss": 0.2962, "step": 537 }, { "epoch": 0.01185498575969415, "grad_norm": 3.884464979171753, "learning_rate": 1.1850220264317182e-05, "loss": 0.2397, "step": 538 }, { "epoch": 0.011877021049210311, "grad_norm": 3.412348508834839, "learning_rate": 1.1872246696035244e-05, "loss": 0.3106, "step": 539 }, { "epoch": 0.01189905633872647, "grad_norm": 3.7950894832611084, "learning_rate": 1.1894273127753304e-05, "loss": 0.2987, "step": 540 }, { "epoch": 0.01192109162824263, "grad_norm": 2.976278305053711, "learning_rate": 1.1916299559471366e-05, "loss": 0.2437, "step": 541 }, { "epoch": 0.011943126917758791, "grad_norm": 4.292806148529053, "learning_rate": 1.1938325991189428e-05, "loss": 0.275, "step": 542 }, { "epoch": 0.01196516220727495, "grad_norm": 2.868410587310791, "learning_rate": 1.196035242290749e-05, "loss": 0.2759, "step": 543 }, { "epoch": 0.011987197496791112, "grad_norm": 2.78783917427063, "learning_rate": 1.1982378854625551e-05, "loss": 0.3018, "step": 544 }, { "epoch": 0.012009232786307271, "grad_norm": 2.74430251121521, "learning_rate": 1.2004405286343613e-05, "loss": 0.2748, "step": 545 }, { "epoch": 0.012031268075823432, "grad_norm": 4.014166355133057, "learning_rate": 1.2026431718061675e-05, "loss": 0.3005, "step": 546 }, { "epoch": 0.012053303365339591, "grad_norm": 3.5601816177368164, "learning_rate": 1.2048458149779737e-05, "loss": 0.2475, "step": 547 }, { "epoch": 0.012075338654855751, "grad_norm": 3.006810426712036, "learning_rate": 1.2070484581497798e-05, "loss": 0.2662, "step": 548 }, { "epoch": 0.012097373944371912, "grad_norm": 2.8806982040405273, "learning_rate": 1.209251101321586e-05, "loss": 0.2983, "step": 549 }, { "epoch": 0.012119409233888071, "grad_norm": 3.4792206287384033, "learning_rate": 1.211453744493392e-05, "loss": 0.2373, "step": 550 }, { "epoch": 0.012141444523404233, "grad_norm": 3.011566162109375, "learning_rate": 1.2136563876651982e-05, "loss": 0.2704, "step": 551 }, { "epoch": 0.012163479812920392, "grad_norm": 2.7110798358917236, "learning_rate": 1.2158590308370044e-05, "loss": 0.2278, "step": 552 }, { "epoch": 0.012185515102436551, "grad_norm": 3.061516761779785, "learning_rate": 1.2180616740088106e-05, "loss": 0.2467, "step": 553 }, { "epoch": 0.012207550391952713, "grad_norm": 3.1916468143463135, "learning_rate": 1.2202643171806168e-05, "loss": 0.3235, "step": 554 }, { "epoch": 0.012229585681468872, "grad_norm": 3.330008029937744, "learning_rate": 1.222466960352423e-05, "loss": 0.2062, "step": 555 }, { "epoch": 0.012251620970985033, "grad_norm": 3.5831353664398193, "learning_rate": 1.2246696035242291e-05, "loss": 0.2669, "step": 556 }, { "epoch": 0.012273656260501193, "grad_norm": 3.41119122505188, "learning_rate": 1.2268722466960353e-05, "loss": 0.3305, "step": 557 }, { "epoch": 0.012295691550017352, "grad_norm": 3.916719913482666, "learning_rate": 1.2290748898678415e-05, "loss": 0.2729, "step": 558 }, { "epoch": 0.012317726839533513, "grad_norm": 3.8243422508239746, "learning_rate": 1.2312775330396477e-05, "loss": 0.2571, "step": 559 }, { "epoch": 0.012339762129049673, "grad_norm": 2.788290023803711, "learning_rate": 1.2334801762114537e-05, "loss": 0.2748, "step": 560 }, { "epoch": 0.012361797418565834, "grad_norm": 3.5593814849853516, "learning_rate": 1.2356828193832599e-05, "loss": 0.1834, "step": 561 }, { "epoch": 0.012383832708081993, "grad_norm": 3.176518201828003, "learning_rate": 1.237885462555066e-05, "loss": 0.2897, "step": 562 }, { "epoch": 0.012405867997598153, "grad_norm": 2.819260597229004, "learning_rate": 1.2400881057268722e-05, "loss": 0.2754, "step": 563 }, { "epoch": 0.012427903287114314, "grad_norm": 3.1279115676879883, "learning_rate": 1.2422907488986784e-05, "loss": 0.3269, "step": 564 }, { "epoch": 0.012449938576630473, "grad_norm": 2.989398717880249, "learning_rate": 1.2444933920704846e-05, "loss": 0.3039, "step": 565 }, { "epoch": 0.012471973866146634, "grad_norm": 3.3936879634857178, "learning_rate": 1.2466960352422908e-05, "loss": 0.242, "step": 566 }, { "epoch": 0.012494009155662794, "grad_norm": 4.197988033294678, "learning_rate": 1.248898678414097e-05, "loss": 0.2869, "step": 567 }, { "epoch": 0.012516044445178955, "grad_norm": 2.392108201980591, "learning_rate": 1.2511013215859032e-05, "loss": 0.3292, "step": 568 }, { "epoch": 0.012538079734695114, "grad_norm": 3.700887680053711, "learning_rate": 1.2533039647577093e-05, "loss": 0.2684, "step": 569 }, { "epoch": 0.012560115024211274, "grad_norm": 3.5635900497436523, "learning_rate": 1.2555066079295153e-05, "loss": 0.2698, "step": 570 }, { "epoch": 0.012582150313727435, "grad_norm": 2.412540912628174, "learning_rate": 1.2577092511013215e-05, "loss": 0.2606, "step": 571 }, { "epoch": 0.012604185603243594, "grad_norm": 2.8183987140655518, "learning_rate": 1.2599118942731277e-05, "loss": 0.2258, "step": 572 }, { "epoch": 0.012626220892759756, "grad_norm": 4.028066158294678, "learning_rate": 1.2621145374449339e-05, "loss": 0.3246, "step": 573 }, { "epoch": 0.012648256182275915, "grad_norm": 2.241328239440918, "learning_rate": 1.26431718061674e-05, "loss": 0.2095, "step": 574 }, { "epoch": 0.012670291471792074, "grad_norm": 2.9961490631103516, "learning_rate": 1.2665198237885463e-05, "loss": 0.2811, "step": 575 }, { "epoch": 0.012692326761308236, "grad_norm": 3.1044139862060547, "learning_rate": 1.2687224669603524e-05, "loss": 0.2982, "step": 576 }, { "epoch": 0.012714362050824395, "grad_norm": 2.3109943866729736, "learning_rate": 1.2709251101321586e-05, "loss": 0.2613, "step": 577 }, { "epoch": 0.012736397340340556, "grad_norm": 3.729283332824707, "learning_rate": 1.2731277533039648e-05, "loss": 0.2449, "step": 578 }, { "epoch": 0.012758432629856716, "grad_norm": 3.4748752117156982, "learning_rate": 1.2753303964757708e-05, "loss": 0.291, "step": 579 }, { "epoch": 0.012780467919372875, "grad_norm": 3.1852059364318848, "learning_rate": 1.277533039647577e-05, "loss": 0.2241, "step": 580 }, { "epoch": 0.012802503208889036, "grad_norm": 3.9816322326660156, "learning_rate": 1.2797356828193832e-05, "loss": 0.3048, "step": 581 }, { "epoch": 0.012824538498405195, "grad_norm": 3.324131965637207, "learning_rate": 1.2819383259911894e-05, "loss": 0.221, "step": 582 }, { "epoch": 0.012846573787921357, "grad_norm": 2.9271767139434814, "learning_rate": 1.2841409691629955e-05, "loss": 0.3343, "step": 583 }, { "epoch": 0.012868609077437516, "grad_norm": 3.6790807247161865, "learning_rate": 1.2863436123348017e-05, "loss": 0.2635, "step": 584 }, { "epoch": 0.012890644366953675, "grad_norm": 3.8432304859161377, "learning_rate": 1.2885462555066079e-05, "loss": 0.2866, "step": 585 }, { "epoch": 0.012912679656469837, "grad_norm": 3.474083423614502, "learning_rate": 1.2907488986784141e-05, "loss": 0.3403, "step": 586 }, { "epoch": 0.012934714945985996, "grad_norm": 3.3400471210479736, "learning_rate": 1.2929515418502203e-05, "loss": 0.2423, "step": 587 }, { "epoch": 0.012956750235502157, "grad_norm": 3.847761869430542, "learning_rate": 1.2951541850220265e-05, "loss": 0.2882, "step": 588 }, { "epoch": 0.012978785525018317, "grad_norm": 2.9960811138153076, "learning_rate": 1.2973568281938326e-05, "loss": 0.3107, "step": 589 }, { "epoch": 0.013000820814534478, "grad_norm": 4.321402072906494, "learning_rate": 1.2995594713656388e-05, "loss": 0.2277, "step": 590 }, { "epoch": 0.013022856104050637, "grad_norm": 3.4483871459960938, "learning_rate": 1.301762114537445e-05, "loss": 0.2107, "step": 591 }, { "epoch": 0.013044891393566797, "grad_norm": 3.770524024963379, "learning_rate": 1.3039647577092512e-05, "loss": 0.2356, "step": 592 }, { "epoch": 0.013066926683082958, "grad_norm": 2.9880709648132324, "learning_rate": 1.3061674008810574e-05, "loss": 0.2157, "step": 593 }, { "epoch": 0.013088961972599117, "grad_norm": 3.361750364303589, "learning_rate": 1.3083700440528635e-05, "loss": 0.2966, "step": 594 }, { "epoch": 0.013110997262115278, "grad_norm": 4.228800296783447, "learning_rate": 1.3105726872246697e-05, "loss": 0.2745, "step": 595 }, { "epoch": 0.013133032551631438, "grad_norm": 3.179659843444824, "learning_rate": 1.3127753303964759e-05, "loss": 0.2953, "step": 596 }, { "epoch": 0.013155067841147597, "grad_norm": 4.04373025894165, "learning_rate": 1.3149779735682821e-05, "loss": 0.2822, "step": 597 }, { "epoch": 0.013177103130663758, "grad_norm": 3.3423328399658203, "learning_rate": 1.3171806167400883e-05, "loss": 0.2384, "step": 598 }, { "epoch": 0.013199138420179918, "grad_norm": 3.480769157409668, "learning_rate": 1.3193832599118943e-05, "loss": 0.2578, "step": 599 }, { "epoch": 0.013221173709696079, "grad_norm": 3.527440309524536, "learning_rate": 1.3215859030837005e-05, "loss": 0.2451, "step": 600 }, { "epoch": 0.013243208999212238, "grad_norm": 3.377185344696045, "learning_rate": 1.3237885462555067e-05, "loss": 0.2263, "step": 601 }, { "epoch": 0.013265244288728398, "grad_norm": 3.5735037326812744, "learning_rate": 1.3259911894273128e-05, "loss": 0.2499, "step": 602 }, { "epoch": 0.013287279578244559, "grad_norm": 3.3165130615234375, "learning_rate": 1.328193832599119e-05, "loss": 0.3244, "step": 603 }, { "epoch": 0.013309314867760718, "grad_norm": 3.730644464492798, "learning_rate": 1.3303964757709252e-05, "loss": 0.2439, "step": 604 }, { "epoch": 0.01333135015727688, "grad_norm": 3.437988042831421, "learning_rate": 1.3325991189427314e-05, "loss": 0.2362, "step": 605 }, { "epoch": 0.013353385446793039, "grad_norm": 3.5792315006256104, "learning_rate": 1.3348017621145376e-05, "loss": 0.3212, "step": 606 }, { "epoch": 0.013375420736309198, "grad_norm": 3.2218728065490723, "learning_rate": 1.3370044052863437e-05, "loss": 0.2805, "step": 607 }, { "epoch": 0.01339745602582536, "grad_norm": 3.0021443367004395, "learning_rate": 1.33920704845815e-05, "loss": 0.2341, "step": 608 }, { "epoch": 0.013419491315341519, "grad_norm": 3.887422800064087, "learning_rate": 1.341409691629956e-05, "loss": 0.2716, "step": 609 }, { "epoch": 0.01344152660485768, "grad_norm": 3.1428420543670654, "learning_rate": 1.3436123348017621e-05, "loss": 0.2788, "step": 610 }, { "epoch": 0.01346356189437384, "grad_norm": 2.9674289226531982, "learning_rate": 1.3458149779735683e-05, "loss": 0.2081, "step": 611 }, { "epoch": 0.01348559718389, "grad_norm": 2.9460418224334717, "learning_rate": 1.3480176211453745e-05, "loss": 0.2862, "step": 612 }, { "epoch": 0.01350763247340616, "grad_norm": 3.436612129211426, "learning_rate": 1.3502202643171807e-05, "loss": 0.3013, "step": 613 }, { "epoch": 0.01352966776292232, "grad_norm": 2.8693554401397705, "learning_rate": 1.3524229074889869e-05, "loss": 0.212, "step": 614 }, { "epoch": 0.01355170305243848, "grad_norm": 3.0468533039093018, "learning_rate": 1.354625550660793e-05, "loss": 0.2549, "step": 615 }, { "epoch": 0.01357373834195464, "grad_norm": 4.365853309631348, "learning_rate": 1.3568281938325992e-05, "loss": 0.267, "step": 616 }, { "epoch": 0.013595773631470801, "grad_norm": 3.4414868354797363, "learning_rate": 1.3590308370044054e-05, "loss": 0.2493, "step": 617 }, { "epoch": 0.01361780892098696, "grad_norm": 3.2758848667144775, "learning_rate": 1.3612334801762114e-05, "loss": 0.3478, "step": 618 }, { "epoch": 0.01363984421050312, "grad_norm": 4.718617916107178, "learning_rate": 1.3634361233480176e-05, "loss": 0.2851, "step": 619 }, { "epoch": 0.013661879500019281, "grad_norm": 3.0670948028564453, "learning_rate": 1.3656387665198238e-05, "loss": 0.289, "step": 620 }, { "epoch": 0.01368391478953544, "grad_norm": 4.9432573318481445, "learning_rate": 1.36784140969163e-05, "loss": 0.2684, "step": 621 }, { "epoch": 0.013705950079051602, "grad_norm": 4.502549171447754, "learning_rate": 1.3700440528634361e-05, "loss": 0.2495, "step": 622 }, { "epoch": 0.013727985368567761, "grad_norm": 2.682070732116699, "learning_rate": 1.3722466960352423e-05, "loss": 0.2741, "step": 623 }, { "epoch": 0.01375002065808392, "grad_norm": 3.344390392303467, "learning_rate": 1.3744493392070485e-05, "loss": 0.2976, "step": 624 }, { "epoch": 0.013772055947600082, "grad_norm": 3.031468391418457, "learning_rate": 1.3766519823788547e-05, "loss": 0.2584, "step": 625 }, { "epoch": 0.013794091237116241, "grad_norm": 3.0368239879608154, "learning_rate": 1.3788546255506609e-05, "loss": 0.2609, "step": 626 }, { "epoch": 0.013816126526632402, "grad_norm": 3.601219415664673, "learning_rate": 1.381057268722467e-05, "loss": 0.3406, "step": 627 }, { "epoch": 0.013838161816148562, "grad_norm": 3.18799090385437, "learning_rate": 1.383259911894273e-05, "loss": 0.2701, "step": 628 }, { "epoch": 0.013860197105664721, "grad_norm": 3.6806116104125977, "learning_rate": 1.3854625550660792e-05, "loss": 0.378, "step": 629 }, { "epoch": 0.013882232395180882, "grad_norm": 3.964245319366455, "learning_rate": 1.3876651982378854e-05, "loss": 0.3158, "step": 630 }, { "epoch": 0.013904267684697042, "grad_norm": 2.540928602218628, "learning_rate": 1.3898678414096916e-05, "loss": 0.1955, "step": 631 }, { "epoch": 0.013926302974213203, "grad_norm": 3.711000919342041, "learning_rate": 1.3920704845814978e-05, "loss": 0.2682, "step": 632 }, { "epoch": 0.013948338263729362, "grad_norm": 3.8765196800231934, "learning_rate": 1.394273127753304e-05, "loss": 0.2891, "step": 633 }, { "epoch": 0.013970373553245524, "grad_norm": 3.2492353916168213, "learning_rate": 1.3964757709251102e-05, "loss": 0.2372, "step": 634 }, { "epoch": 0.013992408842761683, "grad_norm": 2.9936647415161133, "learning_rate": 1.3986784140969163e-05, "loss": 0.2626, "step": 635 }, { "epoch": 0.014014444132277842, "grad_norm": 3.091855049133301, "learning_rate": 1.4008810572687225e-05, "loss": 0.2817, "step": 636 }, { "epoch": 0.014036479421794004, "grad_norm": 2.9523725509643555, "learning_rate": 1.4030837004405287e-05, "loss": 0.2883, "step": 637 }, { "epoch": 0.014058514711310163, "grad_norm": 2.682565927505493, "learning_rate": 1.4052863436123347e-05, "loss": 0.2532, "step": 638 }, { "epoch": 0.014080550000826324, "grad_norm": 3.1344423294067383, "learning_rate": 1.4074889867841409e-05, "loss": 0.2617, "step": 639 }, { "epoch": 0.014102585290342484, "grad_norm": 3.877883195877075, "learning_rate": 1.409691629955947e-05, "loss": 0.2465, "step": 640 }, { "epoch": 0.014124620579858643, "grad_norm": 2.9209017753601074, "learning_rate": 1.4118942731277533e-05, "loss": 0.299, "step": 641 }, { "epoch": 0.014146655869374804, "grad_norm": 2.7576797008514404, "learning_rate": 1.4140969162995594e-05, "loss": 0.2521, "step": 642 }, { "epoch": 0.014168691158890964, "grad_norm": 3.671383857727051, "learning_rate": 1.4162995594713656e-05, "loss": 0.3343, "step": 643 }, { "epoch": 0.014190726448407125, "grad_norm": 2.8038809299468994, "learning_rate": 1.4185022026431718e-05, "loss": 0.2643, "step": 644 }, { "epoch": 0.014212761737923284, "grad_norm": 3.3057174682617188, "learning_rate": 1.420704845814978e-05, "loss": 0.2752, "step": 645 }, { "epoch": 0.014234797027439444, "grad_norm": 2.9133989810943604, "learning_rate": 1.4229074889867842e-05, "loss": 0.2047, "step": 646 }, { "epoch": 0.014256832316955605, "grad_norm": 3.3634188175201416, "learning_rate": 1.4251101321585902e-05, "loss": 0.3239, "step": 647 }, { "epoch": 0.014278867606471764, "grad_norm": 2.6048011779785156, "learning_rate": 1.4273127753303964e-05, "loss": 0.3129, "step": 648 }, { "epoch": 0.014300902895987925, "grad_norm": 4.200425148010254, "learning_rate": 1.4295154185022025e-05, "loss": 0.3856, "step": 649 }, { "epoch": 0.014322938185504085, "grad_norm": 3.1988730430603027, "learning_rate": 1.4317180616740089e-05, "loss": 0.3095, "step": 650 }, { "epoch": 0.014344973475020244, "grad_norm": 3.09702205657959, "learning_rate": 1.433920704845815e-05, "loss": 0.2484, "step": 651 }, { "epoch": 0.014367008764536405, "grad_norm": 2.312436580657959, "learning_rate": 1.4361233480176213e-05, "loss": 0.246, "step": 652 }, { "epoch": 0.014389044054052565, "grad_norm": 2.7483956813812256, "learning_rate": 1.4383259911894274e-05, "loss": 0.2565, "step": 653 }, { "epoch": 0.014411079343568726, "grad_norm": 2.9288907051086426, "learning_rate": 1.4405286343612336e-05, "loss": 0.2777, "step": 654 }, { "epoch": 0.014433114633084885, "grad_norm": 2.7977070808410645, "learning_rate": 1.4427312775330398e-05, "loss": 0.267, "step": 655 }, { "epoch": 0.014455149922601046, "grad_norm": 3.3667519092559814, "learning_rate": 1.444933920704846e-05, "loss": 0.3005, "step": 656 }, { "epoch": 0.014477185212117206, "grad_norm": 3.126516103744507, "learning_rate": 1.447136563876652e-05, "loss": 0.2234, "step": 657 }, { "epoch": 0.014499220501633365, "grad_norm": 3.7249069213867188, "learning_rate": 1.4493392070484582e-05, "loss": 0.2634, "step": 658 }, { "epoch": 0.014521255791149526, "grad_norm": 3.6569631099700928, "learning_rate": 1.4515418502202644e-05, "loss": 0.2729, "step": 659 }, { "epoch": 0.014543291080665686, "grad_norm": 3.5444133281707764, "learning_rate": 1.4537444933920706e-05, "loss": 0.2292, "step": 660 }, { "epoch": 0.014565326370181847, "grad_norm": 3.120774030685425, "learning_rate": 1.4559471365638767e-05, "loss": 0.2285, "step": 661 }, { "epoch": 0.014587361659698006, "grad_norm": 2.3384389877319336, "learning_rate": 1.458149779735683e-05, "loss": 0.2445, "step": 662 }, { "epoch": 0.014609396949214166, "grad_norm": 3.2924089431762695, "learning_rate": 1.4603524229074891e-05, "loss": 0.2576, "step": 663 }, { "epoch": 0.014631432238730327, "grad_norm": 3.032785415649414, "learning_rate": 1.4625550660792953e-05, "loss": 0.2906, "step": 664 }, { "epoch": 0.014653467528246486, "grad_norm": 3.686978340148926, "learning_rate": 1.4647577092511015e-05, "loss": 0.2627, "step": 665 }, { "epoch": 0.014675502817762648, "grad_norm": 3.478410005569458, "learning_rate": 1.4669603524229076e-05, "loss": 0.2795, "step": 666 }, { "epoch": 0.014697538107278807, "grad_norm": 3.4265048503875732, "learning_rate": 1.4691629955947137e-05, "loss": 0.2883, "step": 667 }, { "epoch": 0.014719573396794966, "grad_norm": 3.6512176990509033, "learning_rate": 1.4713656387665198e-05, "loss": 0.3048, "step": 668 }, { "epoch": 0.014741608686311128, "grad_norm": 3.277768135070801, "learning_rate": 1.473568281938326e-05, "loss": 0.2775, "step": 669 }, { "epoch": 0.014763643975827287, "grad_norm": 4.000757694244385, "learning_rate": 1.4757709251101322e-05, "loss": 0.3131, "step": 670 }, { "epoch": 0.014785679265343448, "grad_norm": 3.491603374481201, "learning_rate": 1.4779735682819384e-05, "loss": 0.2677, "step": 671 }, { "epoch": 0.014807714554859608, "grad_norm": 3.1292874813079834, "learning_rate": 1.4801762114537446e-05, "loss": 0.219, "step": 672 }, { "epoch": 0.014829749844375767, "grad_norm": 2.769434690475464, "learning_rate": 1.4823788546255508e-05, "loss": 0.2512, "step": 673 }, { "epoch": 0.014851785133891928, "grad_norm": 4.8682780265808105, "learning_rate": 1.484581497797357e-05, "loss": 0.2897, "step": 674 }, { "epoch": 0.014873820423408088, "grad_norm": 3.29706072807312, "learning_rate": 1.4867841409691631e-05, "loss": 0.2402, "step": 675 }, { "epoch": 0.014895855712924249, "grad_norm": 3.1270580291748047, "learning_rate": 1.4889867841409693e-05, "loss": 0.2827, "step": 676 }, { "epoch": 0.014917891002440408, "grad_norm": 3.340996265411377, "learning_rate": 1.4911894273127753e-05, "loss": 0.271, "step": 677 }, { "epoch": 0.01493992629195657, "grad_norm": 3.661348581314087, "learning_rate": 1.4933920704845815e-05, "loss": 0.2181, "step": 678 }, { "epoch": 0.014961961581472729, "grad_norm": 2.8524646759033203, "learning_rate": 1.4955947136563877e-05, "loss": 0.2876, "step": 679 }, { "epoch": 0.014983996870988888, "grad_norm": 3.3869717121124268, "learning_rate": 1.4977973568281939e-05, "loss": 0.247, "step": 680 }, { "epoch": 0.01500603216050505, "grad_norm": 3.753751039505005, "learning_rate": 1.5e-05, "loss": 0.309, "step": 681 }, { "epoch": 0.015028067450021209, "grad_norm": 2.621032238006592, "learning_rate": 1.5022026431718062e-05, "loss": 0.22, "step": 682 }, { "epoch": 0.01505010273953737, "grad_norm": 3.076338529586792, "learning_rate": 1.5044052863436124e-05, "loss": 0.2806, "step": 683 }, { "epoch": 0.01507213802905353, "grad_norm": 3.1805579662323, "learning_rate": 1.5066079295154186e-05, "loss": 0.2501, "step": 684 }, { "epoch": 0.015094173318569689, "grad_norm": 3.258394479751587, "learning_rate": 1.5088105726872248e-05, "loss": 0.2832, "step": 685 }, { "epoch": 0.01511620860808585, "grad_norm": 3.179863452911377, "learning_rate": 1.511013215859031e-05, "loss": 0.2291, "step": 686 }, { "epoch": 0.01513824389760201, "grad_norm": 3.982550859451294, "learning_rate": 1.5132158590308371e-05, "loss": 0.2575, "step": 687 }, { "epoch": 0.01516027918711817, "grad_norm": 2.409248113632202, "learning_rate": 1.5154185022026433e-05, "loss": 0.2817, "step": 688 }, { "epoch": 0.01518231447663433, "grad_norm": 3.389535903930664, "learning_rate": 1.5176211453744495e-05, "loss": 0.3056, "step": 689 }, { "epoch": 0.01520434976615049, "grad_norm": 2.3245112895965576, "learning_rate": 1.5198237885462557e-05, "loss": 0.2168, "step": 690 }, { "epoch": 0.01522638505566665, "grad_norm": 3.0717363357543945, "learning_rate": 1.5220264317180615e-05, "loss": 0.2769, "step": 691 }, { "epoch": 0.01524842034518281, "grad_norm": 3.3378145694732666, "learning_rate": 1.5242290748898677e-05, "loss": 0.2467, "step": 692 }, { "epoch": 0.015270455634698971, "grad_norm": 3.053020477294922, "learning_rate": 1.526431718061674e-05, "loss": 0.2654, "step": 693 }, { "epoch": 0.01529249092421513, "grad_norm": 3.0528368949890137, "learning_rate": 1.5286343612334802e-05, "loss": 0.2357, "step": 694 }, { "epoch": 0.015314526213731292, "grad_norm": 2.5507349967956543, "learning_rate": 1.5308370044052864e-05, "loss": 0.2351, "step": 695 }, { "epoch": 0.015336561503247451, "grad_norm": 2.970811367034912, "learning_rate": 1.5330396475770926e-05, "loss": 0.2773, "step": 696 }, { "epoch": 0.01535859679276361, "grad_norm": 2.8753581047058105, "learning_rate": 1.5352422907488988e-05, "loss": 0.2768, "step": 697 }, { "epoch": 0.015380632082279772, "grad_norm": 3.3525846004486084, "learning_rate": 1.537444933920705e-05, "loss": 0.3018, "step": 698 }, { "epoch": 0.015402667371795931, "grad_norm": 3.1620471477508545, "learning_rate": 1.539647577092511e-05, "loss": 0.2779, "step": 699 }, { "epoch": 0.015424702661312092, "grad_norm": 4.194980621337891, "learning_rate": 1.5418502202643173e-05, "loss": 0.2556, "step": 700 }, { "epoch": 0.015446737950828252, "grad_norm": 2.7073280811309814, "learning_rate": 1.5440528634361235e-05, "loss": 0.2213, "step": 701 }, { "epoch": 0.015468773240344411, "grad_norm": 3.565321683883667, "learning_rate": 1.5462555066079297e-05, "loss": 0.2531, "step": 702 }, { "epoch": 0.015490808529860572, "grad_norm": 3.3526394367218018, "learning_rate": 1.548458149779736e-05, "loss": 0.2782, "step": 703 }, { "epoch": 0.015512843819376732, "grad_norm": 3.0924110412597656, "learning_rate": 1.550660792951542e-05, "loss": 0.2154, "step": 704 }, { "epoch": 0.015534879108892893, "grad_norm": 2.990306854248047, "learning_rate": 1.5528634361233482e-05, "loss": 0.2477, "step": 705 }, { "epoch": 0.015556914398409052, "grad_norm": 1.9051871299743652, "learning_rate": 1.5550660792951544e-05, "loss": 0.2516, "step": 706 }, { "epoch": 0.015578949687925212, "grad_norm": 2.829636573791504, "learning_rate": 1.5572687224669606e-05, "loss": 0.263, "step": 707 }, { "epoch": 0.015600984977441373, "grad_norm": 2.981165885925293, "learning_rate": 1.5594713656387668e-05, "loss": 0.28, "step": 708 }, { "epoch": 0.015623020266957532, "grad_norm": 3.0005381107330322, "learning_rate": 1.561674008810573e-05, "loss": 0.2628, "step": 709 }, { "epoch": 0.01564505555647369, "grad_norm": 3.2182958126068115, "learning_rate": 1.563876651982379e-05, "loss": 0.2173, "step": 710 }, { "epoch": 0.015667090845989853, "grad_norm": 2.5173871517181396, "learning_rate": 1.566079295154185e-05, "loss": 0.2089, "step": 711 }, { "epoch": 0.015689126135506014, "grad_norm": 3.0725698471069336, "learning_rate": 1.5682819383259912e-05, "loss": 0.3413, "step": 712 }, { "epoch": 0.01571116142502217, "grad_norm": 2.9203574657440186, "learning_rate": 1.5704845814977974e-05, "loss": 0.2149, "step": 713 }, { "epoch": 0.015733196714538333, "grad_norm": 3.0472099781036377, "learning_rate": 1.5726872246696035e-05, "loss": 0.2702, "step": 714 }, { "epoch": 0.015755232004054494, "grad_norm": 2.9679603576660156, "learning_rate": 1.5748898678414097e-05, "loss": 0.2584, "step": 715 }, { "epoch": 0.015777267293570655, "grad_norm": 3.669816493988037, "learning_rate": 1.577092511013216e-05, "loss": 0.2871, "step": 716 }, { "epoch": 0.015799302583086813, "grad_norm": 3.1498477458953857, "learning_rate": 1.579295154185022e-05, "loss": 0.3049, "step": 717 }, { "epoch": 0.015821337872602974, "grad_norm": 2.579390287399292, "learning_rate": 1.5814977973568283e-05, "loss": 0.2807, "step": 718 }, { "epoch": 0.015843373162119135, "grad_norm": 4.312929153442383, "learning_rate": 1.5837004405286345e-05, "loss": 0.3049, "step": 719 }, { "epoch": 0.015865408451635293, "grad_norm": 2.409344434738159, "learning_rate": 1.5859030837004406e-05, "loss": 0.2596, "step": 720 }, { "epoch": 0.015887443741151454, "grad_norm": 2.8105015754699707, "learning_rate": 1.5881057268722468e-05, "loss": 0.2815, "step": 721 }, { "epoch": 0.015909479030667615, "grad_norm": 2.675820827484131, "learning_rate": 1.590308370044053e-05, "loss": 0.2565, "step": 722 }, { "epoch": 0.015931514320183773, "grad_norm": 2.7303826808929443, "learning_rate": 1.5925110132158592e-05, "loss": 0.2825, "step": 723 }, { "epoch": 0.015953549609699934, "grad_norm": 2.592402219772339, "learning_rate": 1.5947136563876654e-05, "loss": 0.2258, "step": 724 }, { "epoch": 0.015975584899216095, "grad_norm": 2.9546992778778076, "learning_rate": 1.5969162995594715e-05, "loss": 0.2591, "step": 725 }, { "epoch": 0.015997620188732256, "grad_norm": 2.62752103805542, "learning_rate": 1.5991189427312777e-05, "loss": 0.2839, "step": 726 }, { "epoch": 0.016019655478248414, "grad_norm": 2.3762857913970947, "learning_rate": 1.601321585903084e-05, "loss": 0.2512, "step": 727 }, { "epoch": 0.016041690767764575, "grad_norm": 4.4768757820129395, "learning_rate": 1.60352422907489e-05, "loss": 0.2374, "step": 728 }, { "epoch": 0.016063726057280736, "grad_norm": 3.6655328273773193, "learning_rate": 1.6057268722466963e-05, "loss": 0.2366, "step": 729 }, { "epoch": 0.016085761346796894, "grad_norm": 2.821748733520508, "learning_rate": 1.607929515418502e-05, "loss": 0.2041, "step": 730 }, { "epoch": 0.016107796636313055, "grad_norm": 2.9633092880249023, "learning_rate": 1.6101321585903083e-05, "loss": 0.2166, "step": 731 }, { "epoch": 0.016129831925829216, "grad_norm": 3.4112563133239746, "learning_rate": 1.6123348017621145e-05, "loss": 0.1967, "step": 732 }, { "epoch": 0.016151867215345377, "grad_norm": 3.605498790740967, "learning_rate": 1.6145374449339207e-05, "loss": 0.2653, "step": 733 }, { "epoch": 0.016173902504861535, "grad_norm": 2.805403232574463, "learning_rate": 1.616740088105727e-05, "loss": 0.27, "step": 734 }, { "epoch": 0.016195937794377696, "grad_norm": 2.21580171585083, "learning_rate": 1.618942731277533e-05, "loss": 0.2007, "step": 735 }, { "epoch": 0.016217973083893857, "grad_norm": 2.838106632232666, "learning_rate": 1.6211453744493392e-05, "loss": 0.22, "step": 736 }, { "epoch": 0.016240008373410015, "grad_norm": 2.782536268234253, "learning_rate": 1.6233480176211454e-05, "loss": 0.2061, "step": 737 }, { "epoch": 0.016262043662926176, "grad_norm": 3.449523448944092, "learning_rate": 1.6255506607929516e-05, "loss": 0.2387, "step": 738 }, { "epoch": 0.016284078952442337, "grad_norm": 3.2599971294403076, "learning_rate": 1.6277533039647578e-05, "loss": 0.2618, "step": 739 }, { "epoch": 0.016306114241958495, "grad_norm": 2.4966881275177, "learning_rate": 1.629955947136564e-05, "loss": 0.2044, "step": 740 }, { "epoch": 0.016328149531474656, "grad_norm": 4.530304431915283, "learning_rate": 1.63215859030837e-05, "loss": 0.2864, "step": 741 }, { "epoch": 0.016350184820990817, "grad_norm": 3.1732301712036133, "learning_rate": 1.6343612334801763e-05, "loss": 0.285, "step": 742 }, { "epoch": 0.01637222011050698, "grad_norm": 3.562178373336792, "learning_rate": 1.6365638766519825e-05, "loss": 0.2621, "step": 743 }, { "epoch": 0.016394255400023136, "grad_norm": 2.7901124954223633, "learning_rate": 1.6387665198237887e-05, "loss": 0.204, "step": 744 }, { "epoch": 0.016416290689539297, "grad_norm": 2.6813488006591797, "learning_rate": 1.640969162995595e-05, "loss": 0.247, "step": 745 }, { "epoch": 0.01643832597905546, "grad_norm": 3.193466901779175, "learning_rate": 1.643171806167401e-05, "loss": 0.2047, "step": 746 }, { "epoch": 0.016460361268571616, "grad_norm": 3.1388256549835205, "learning_rate": 1.6453744493392072e-05, "loss": 0.2808, "step": 747 }, { "epoch": 0.016482396558087777, "grad_norm": 2.5120205879211426, "learning_rate": 1.6475770925110134e-05, "loss": 0.2766, "step": 748 }, { "epoch": 0.01650443184760394, "grad_norm": 4.301722526550293, "learning_rate": 1.6497797356828196e-05, "loss": 0.2686, "step": 749 }, { "epoch": 0.0165264671371201, "grad_norm": 3.3354082107543945, "learning_rate": 1.6519823788546254e-05, "loss": 0.2262, "step": 750 }, { "epoch": 0.016548502426636257, "grad_norm": 2.73612117767334, "learning_rate": 1.6541850220264316e-05, "loss": 0.2528, "step": 751 }, { "epoch": 0.01657053771615242, "grad_norm": 3.0998759269714355, "learning_rate": 1.6563876651982378e-05, "loss": 0.2014, "step": 752 }, { "epoch": 0.01659257300566858, "grad_norm": 2.5849547386169434, "learning_rate": 1.658590308370044e-05, "loss": 0.2399, "step": 753 }, { "epoch": 0.016614608295184737, "grad_norm": 3.290086030960083, "learning_rate": 1.66079295154185e-05, "loss": 0.2769, "step": 754 }, { "epoch": 0.0166366435847009, "grad_norm": 3.4915640354156494, "learning_rate": 1.6629955947136563e-05, "loss": 0.3771, "step": 755 }, { "epoch": 0.01665867887421706, "grad_norm": 3.659616231918335, "learning_rate": 1.6651982378854625e-05, "loss": 0.2735, "step": 756 }, { "epoch": 0.016680714163733217, "grad_norm": 2.6602344512939453, "learning_rate": 1.6674008810572687e-05, "loss": 0.2313, "step": 757 }, { "epoch": 0.01670274945324938, "grad_norm": 4.04259729385376, "learning_rate": 1.669603524229075e-05, "loss": 0.272, "step": 758 }, { "epoch": 0.01672478474276554, "grad_norm": 3.138597011566162, "learning_rate": 1.671806167400881e-05, "loss": 0.216, "step": 759 }, { "epoch": 0.0167468200322817, "grad_norm": 2.7375917434692383, "learning_rate": 1.6740088105726872e-05, "loss": 0.2427, "step": 760 }, { "epoch": 0.01676885532179786, "grad_norm": 3.152761697769165, "learning_rate": 1.6762114537444934e-05, "loss": 0.2572, "step": 761 }, { "epoch": 0.01679089061131402, "grad_norm": 3.712825059890747, "learning_rate": 1.6784140969162996e-05, "loss": 0.2519, "step": 762 }, { "epoch": 0.01681292590083018, "grad_norm": 3.0967516899108887, "learning_rate": 1.6806167400881058e-05, "loss": 0.2174, "step": 763 }, { "epoch": 0.01683496119034634, "grad_norm": 3.541537284851074, "learning_rate": 1.682819383259912e-05, "loss": 0.2459, "step": 764 }, { "epoch": 0.0168569964798625, "grad_norm": 3.213573455810547, "learning_rate": 1.685022026431718e-05, "loss": 0.2487, "step": 765 }, { "epoch": 0.01687903176937866, "grad_norm": 3.1161603927612305, "learning_rate": 1.6872246696035243e-05, "loss": 0.2585, "step": 766 }, { "epoch": 0.01690106705889482, "grad_norm": 3.4217021465301514, "learning_rate": 1.6894273127753305e-05, "loss": 0.2325, "step": 767 }, { "epoch": 0.01692310234841098, "grad_norm": 2.636427640914917, "learning_rate": 1.6916299559471367e-05, "loss": 0.1949, "step": 768 }, { "epoch": 0.01694513763792714, "grad_norm": 3.1827144622802734, "learning_rate": 1.6938325991189425e-05, "loss": 0.2478, "step": 769 }, { "epoch": 0.016967172927443302, "grad_norm": 4.419973850250244, "learning_rate": 1.6960352422907487e-05, "loss": 0.2444, "step": 770 }, { "epoch": 0.01698920821695946, "grad_norm": 4.020773887634277, "learning_rate": 1.698237885462555e-05, "loss": 0.2287, "step": 771 }, { "epoch": 0.01701124350647562, "grad_norm": 3.8528237342834473, "learning_rate": 1.700440528634361e-05, "loss": 0.3149, "step": 772 }, { "epoch": 0.017033278795991782, "grad_norm": 3.884190797805786, "learning_rate": 1.7026431718061673e-05, "loss": 0.2685, "step": 773 }, { "epoch": 0.01705531408550794, "grad_norm": 3.388733386993408, "learning_rate": 1.7048458149779735e-05, "loss": 0.301, "step": 774 }, { "epoch": 0.0170773493750241, "grad_norm": 3.7530906200408936, "learning_rate": 1.7070484581497796e-05, "loss": 0.2333, "step": 775 }, { "epoch": 0.017099384664540262, "grad_norm": 3.3276636600494385, "learning_rate": 1.7092511013215858e-05, "loss": 0.2157, "step": 776 }, { "epoch": 0.017121419954056423, "grad_norm": 3.90177845954895, "learning_rate": 1.711453744493392e-05, "loss": 0.2285, "step": 777 }, { "epoch": 0.01714345524357258, "grad_norm": 3.55320143699646, "learning_rate": 1.7136563876651982e-05, "loss": 0.2171, "step": 778 }, { "epoch": 0.017165490533088742, "grad_norm": 2.6671977043151855, "learning_rate": 1.7158590308370044e-05, "loss": 0.2145, "step": 779 }, { "epoch": 0.017187525822604903, "grad_norm": 2.918696880340576, "learning_rate": 1.7180616740088105e-05, "loss": 0.1953, "step": 780 }, { "epoch": 0.01720956111212106, "grad_norm": 4.406886577606201, "learning_rate": 1.7202643171806167e-05, "loss": 0.2281, "step": 781 }, { "epoch": 0.017231596401637222, "grad_norm": 5.5160346031188965, "learning_rate": 1.722466960352423e-05, "loss": 0.2405, "step": 782 }, { "epoch": 0.017253631691153383, "grad_norm": 2.7891178131103516, "learning_rate": 1.724669603524229e-05, "loss": 0.1729, "step": 783 }, { "epoch": 0.01727566698066954, "grad_norm": 4.96388053894043, "learning_rate": 1.7268722466960353e-05, "loss": 0.3042, "step": 784 }, { "epoch": 0.017297702270185702, "grad_norm": 2.737529993057251, "learning_rate": 1.7290748898678415e-05, "loss": 0.2271, "step": 785 }, { "epoch": 0.017319737559701863, "grad_norm": 3.1218416690826416, "learning_rate": 1.7312775330396476e-05, "loss": 0.2401, "step": 786 }, { "epoch": 0.017341772849218024, "grad_norm": 3.060581684112549, "learning_rate": 1.7334801762114538e-05, "loss": 0.2731, "step": 787 }, { "epoch": 0.017363808138734182, "grad_norm": 3.9927990436553955, "learning_rate": 1.73568281938326e-05, "loss": 0.2543, "step": 788 }, { "epoch": 0.017385843428250343, "grad_norm": 2.9511067867279053, "learning_rate": 1.737885462555066e-05, "loss": 0.2502, "step": 789 }, { "epoch": 0.017407878717766504, "grad_norm": 2.5914807319641113, "learning_rate": 1.740088105726872e-05, "loss": 0.1894, "step": 790 }, { "epoch": 0.017429914007282662, "grad_norm": 3.473292827606201, "learning_rate": 1.7422907488986782e-05, "loss": 0.2568, "step": 791 }, { "epoch": 0.017451949296798823, "grad_norm": 2.4426746368408203, "learning_rate": 1.7444933920704844e-05, "loss": 0.2437, "step": 792 }, { "epoch": 0.017473984586314984, "grad_norm": 2.7043399810791016, "learning_rate": 1.7466960352422906e-05, "loss": 0.2453, "step": 793 }, { "epoch": 0.017496019875831145, "grad_norm": 3.6561594009399414, "learning_rate": 1.7488986784140968e-05, "loss": 0.2304, "step": 794 }, { "epoch": 0.017518055165347303, "grad_norm": 2.6228909492492676, "learning_rate": 1.751101321585903e-05, "loss": 0.2357, "step": 795 }, { "epoch": 0.017540090454863464, "grad_norm": 3.3055827617645264, "learning_rate": 1.753303964757709e-05, "loss": 0.2783, "step": 796 }, { "epoch": 0.017562125744379625, "grad_norm": 2.977837562561035, "learning_rate": 1.7555066079295153e-05, "loss": 0.2144, "step": 797 }, { "epoch": 0.017584161033895783, "grad_norm": 2.7927684783935547, "learning_rate": 1.7577092511013215e-05, "loss": 0.2045, "step": 798 }, { "epoch": 0.017606196323411944, "grad_norm": 3.795210838317871, "learning_rate": 1.7599118942731277e-05, "loss": 0.2371, "step": 799 }, { "epoch": 0.017628231612928105, "grad_norm": 3.465338945388794, "learning_rate": 1.762114537444934e-05, "loss": 0.259, "step": 800 }, { "epoch": 0.017650266902444263, "grad_norm": 2.9999213218688965, "learning_rate": 1.76431718061674e-05, "loss": 0.2327, "step": 801 }, { "epoch": 0.017672302191960424, "grad_norm": 2.624293565750122, "learning_rate": 1.7665198237885462e-05, "loss": 0.1951, "step": 802 }, { "epoch": 0.017694337481476585, "grad_norm": 2.951467990875244, "learning_rate": 1.7687224669603524e-05, "loss": 0.236, "step": 803 }, { "epoch": 0.017716372770992746, "grad_norm": 2.610347270965576, "learning_rate": 1.7709251101321586e-05, "loss": 0.2221, "step": 804 }, { "epoch": 0.017738408060508904, "grad_norm": 3.6178627014160156, "learning_rate": 1.7731277533039648e-05, "loss": 0.2709, "step": 805 }, { "epoch": 0.017760443350025065, "grad_norm": 3.130147695541382, "learning_rate": 1.7753303964757713e-05, "loss": 0.2011, "step": 806 }, { "epoch": 0.017782478639541226, "grad_norm": 3.5167150497436523, "learning_rate": 1.7775330396475775e-05, "loss": 0.23, "step": 807 }, { "epoch": 0.017804513929057384, "grad_norm": 2.7741963863372803, "learning_rate": 1.7797356828193833e-05, "loss": 0.2594, "step": 808 }, { "epoch": 0.017826549218573545, "grad_norm": 3.209543228149414, "learning_rate": 1.7819383259911895e-05, "loss": 0.2715, "step": 809 }, { "epoch": 0.017848584508089706, "grad_norm": 3.2282564640045166, "learning_rate": 1.7841409691629957e-05, "loss": 0.2574, "step": 810 }, { "epoch": 0.017870619797605864, "grad_norm": 2.3441758155822754, "learning_rate": 1.786343612334802e-05, "loss": 0.2401, "step": 811 }, { "epoch": 0.017892655087122025, "grad_norm": 3.5681958198547363, "learning_rate": 1.788546255506608e-05, "loss": 0.2637, "step": 812 }, { "epoch": 0.017914690376638186, "grad_norm": 2.893138885498047, "learning_rate": 1.7907488986784142e-05, "loss": 0.259, "step": 813 }, { "epoch": 0.017936725666154348, "grad_norm": 3.073061227798462, "learning_rate": 1.7929515418502204e-05, "loss": 0.231, "step": 814 }, { "epoch": 0.017958760955670505, "grad_norm": 2.606715202331543, "learning_rate": 1.7951541850220266e-05, "loss": 0.255, "step": 815 }, { "epoch": 0.017980796245186666, "grad_norm": 2.4430861473083496, "learning_rate": 1.7973568281938328e-05, "loss": 0.1873, "step": 816 }, { "epoch": 0.018002831534702828, "grad_norm": 2.480198621749878, "learning_rate": 1.799559471365639e-05, "loss": 0.1926, "step": 817 }, { "epoch": 0.018024866824218985, "grad_norm": 2.6944899559020996, "learning_rate": 1.801762114537445e-05, "loss": 0.2403, "step": 818 }, { "epoch": 0.018046902113735146, "grad_norm": 2.734078884124756, "learning_rate": 1.8039647577092513e-05, "loss": 0.2623, "step": 819 }, { "epoch": 0.018068937403251308, "grad_norm": 2.969634771347046, "learning_rate": 1.8061674008810575e-05, "loss": 0.1919, "step": 820 }, { "epoch": 0.01809097269276747, "grad_norm": 3.901761770248413, "learning_rate": 1.8083700440528637e-05, "loss": 0.2772, "step": 821 }, { "epoch": 0.018113007982283626, "grad_norm": 2.9780895709991455, "learning_rate": 1.81057268722467e-05, "loss": 0.2802, "step": 822 }, { "epoch": 0.018135043271799788, "grad_norm": 2.9315974712371826, "learning_rate": 1.812775330396476e-05, "loss": 0.2574, "step": 823 }, { "epoch": 0.01815707856131595, "grad_norm": 3.4662208557128906, "learning_rate": 1.8149779735682822e-05, "loss": 0.188, "step": 824 }, { "epoch": 0.018179113850832106, "grad_norm": 2.613100051879883, "learning_rate": 1.8171806167400884e-05, "loss": 0.2451, "step": 825 }, { "epoch": 0.018201149140348268, "grad_norm": 3.6031198501586914, "learning_rate": 1.8193832599118946e-05, "loss": 0.1831, "step": 826 }, { "epoch": 0.01822318442986443, "grad_norm": 4.330585956573486, "learning_rate": 1.8215859030837004e-05, "loss": 0.2333, "step": 827 }, { "epoch": 0.018245219719380586, "grad_norm": 3.3529999256134033, "learning_rate": 1.8237885462555066e-05, "loss": 0.2138, "step": 828 }, { "epoch": 0.018267255008896748, "grad_norm": 3.1574339866638184, "learning_rate": 1.8259911894273128e-05, "loss": 0.2383, "step": 829 }, { "epoch": 0.01828929029841291, "grad_norm": 2.9194419384002686, "learning_rate": 1.828193832599119e-05, "loss": 0.2427, "step": 830 }, { "epoch": 0.01831132558792907, "grad_norm": 2.7497711181640625, "learning_rate": 1.830396475770925e-05, "loss": 0.1796, "step": 831 }, { "epoch": 0.018333360877445228, "grad_norm": 3.467034101486206, "learning_rate": 1.8325991189427313e-05, "loss": 0.227, "step": 832 }, { "epoch": 0.01835539616696139, "grad_norm": 3.4126665592193604, "learning_rate": 1.8348017621145375e-05, "loss": 0.2042, "step": 833 }, { "epoch": 0.01837743145647755, "grad_norm": 2.2044620513916016, "learning_rate": 1.8370044052863437e-05, "loss": 0.2189, "step": 834 }, { "epoch": 0.018399466745993708, "grad_norm": 3.540421485900879, "learning_rate": 1.83920704845815e-05, "loss": 0.2321, "step": 835 }, { "epoch": 0.01842150203550987, "grad_norm": 2.9164092540740967, "learning_rate": 1.841409691629956e-05, "loss": 0.2685, "step": 836 }, { "epoch": 0.01844353732502603, "grad_norm": 3.2613728046417236, "learning_rate": 1.8436123348017622e-05, "loss": 0.2523, "step": 837 }, { "epoch": 0.01846557261454219, "grad_norm": 2.8051044940948486, "learning_rate": 1.8458149779735684e-05, "loss": 0.2406, "step": 838 }, { "epoch": 0.01848760790405835, "grad_norm": 3.3317055702209473, "learning_rate": 1.8480176211453746e-05, "loss": 0.2705, "step": 839 }, { "epoch": 0.01850964319357451, "grad_norm": 2.2795329093933105, "learning_rate": 1.8502202643171808e-05, "loss": 0.2293, "step": 840 }, { "epoch": 0.01853167848309067, "grad_norm": 2.523857831954956, "learning_rate": 1.852422907488987e-05, "loss": 0.185, "step": 841 }, { "epoch": 0.01855371377260683, "grad_norm": 3.358626365661621, "learning_rate": 1.854625550660793e-05, "loss": 0.2972, "step": 842 }, { "epoch": 0.01857574906212299, "grad_norm": 4.353667259216309, "learning_rate": 1.8568281938325993e-05, "loss": 0.2193, "step": 843 }, { "epoch": 0.01859778435163915, "grad_norm": 3.040576457977295, "learning_rate": 1.8590308370044055e-05, "loss": 0.1852, "step": 844 }, { "epoch": 0.01861981964115531, "grad_norm": 2.766453981399536, "learning_rate": 1.8612334801762117e-05, "loss": 0.211, "step": 845 }, { "epoch": 0.01864185493067147, "grad_norm": 3.704435110092163, "learning_rate": 1.863436123348018e-05, "loss": 0.2463, "step": 846 }, { "epoch": 0.01866389022018763, "grad_norm": 3.5523531436920166, "learning_rate": 1.8656387665198237e-05, "loss": 0.2377, "step": 847 }, { "epoch": 0.018685925509703792, "grad_norm": 3.1293466091156006, "learning_rate": 1.86784140969163e-05, "loss": 0.2958, "step": 848 }, { "epoch": 0.01870796079921995, "grad_norm": 4.101553916931152, "learning_rate": 1.870044052863436e-05, "loss": 0.2174, "step": 849 }, { "epoch": 0.01872999608873611, "grad_norm": 2.7324140071868896, "learning_rate": 1.8722466960352423e-05, "loss": 0.2179, "step": 850 }, { "epoch": 0.018752031378252272, "grad_norm": 4.178292274475098, "learning_rate": 1.8744493392070485e-05, "loss": 0.2684, "step": 851 }, { "epoch": 0.01877406666776843, "grad_norm": 3.384700059890747, "learning_rate": 1.8766519823788546e-05, "loss": 0.2585, "step": 852 }, { "epoch": 0.01879610195728459, "grad_norm": 2.9928810596466064, "learning_rate": 1.8788546255506608e-05, "loss": 0.2329, "step": 853 }, { "epoch": 0.018818137246800752, "grad_norm": 3.0036590099334717, "learning_rate": 1.881057268722467e-05, "loss": 0.2581, "step": 854 }, { "epoch": 0.01884017253631691, "grad_norm": 3.266324043273926, "learning_rate": 1.8832599118942732e-05, "loss": 0.2278, "step": 855 }, { "epoch": 0.01886220782583307, "grad_norm": 2.637821674346924, "learning_rate": 1.8854625550660794e-05, "loss": 0.2174, "step": 856 }, { "epoch": 0.018884243115349232, "grad_norm": 2.713192939758301, "learning_rate": 1.8876651982378856e-05, "loss": 0.2052, "step": 857 }, { "epoch": 0.018906278404865393, "grad_norm": 2.5285897254943848, "learning_rate": 1.8898678414096917e-05, "loss": 0.2432, "step": 858 }, { "epoch": 0.01892831369438155, "grad_norm": 2.6517751216888428, "learning_rate": 1.892070484581498e-05, "loss": 0.2236, "step": 859 }, { "epoch": 0.018950348983897712, "grad_norm": 2.5006730556488037, "learning_rate": 1.894273127753304e-05, "loss": 0.2042, "step": 860 }, { "epoch": 0.018972384273413873, "grad_norm": 2.8537795543670654, "learning_rate": 1.8964757709251103e-05, "loss": 0.1947, "step": 861 }, { "epoch": 0.01899441956293003, "grad_norm": 3.3328354358673096, "learning_rate": 1.8986784140969165e-05, "loss": 0.2156, "step": 862 }, { "epoch": 0.019016454852446192, "grad_norm": 3.113740921020508, "learning_rate": 1.9008810572687226e-05, "loss": 0.1843, "step": 863 }, { "epoch": 0.019038490141962353, "grad_norm": 3.0143492221832275, "learning_rate": 1.9030837004405288e-05, "loss": 0.2512, "step": 864 }, { "epoch": 0.019060525431478514, "grad_norm": 2.2945756912231445, "learning_rate": 1.905286343612335e-05, "loss": 0.2415, "step": 865 }, { "epoch": 0.019082560720994672, "grad_norm": 2.8330492973327637, "learning_rate": 1.907488986784141e-05, "loss": 0.221, "step": 866 }, { "epoch": 0.019104596010510833, "grad_norm": 2.2182693481445312, "learning_rate": 1.909691629955947e-05, "loss": 0.2555, "step": 867 }, { "epoch": 0.019126631300026994, "grad_norm": 4.607930660247803, "learning_rate": 1.9118942731277532e-05, "loss": 0.2099, "step": 868 }, { "epoch": 0.019148666589543152, "grad_norm": 5.553369045257568, "learning_rate": 1.9140969162995594e-05, "loss": 0.2322, "step": 869 }, { "epoch": 0.019170701879059313, "grad_norm": 4.1968607902526855, "learning_rate": 1.9162995594713656e-05, "loss": 0.232, "step": 870 }, { "epoch": 0.019192737168575474, "grad_norm": 2.2902345657348633, "learning_rate": 1.9185022026431718e-05, "loss": 0.2411, "step": 871 }, { "epoch": 0.019214772458091632, "grad_norm": 2.448507070541382, "learning_rate": 1.920704845814978e-05, "loss": 0.1682, "step": 872 }, { "epoch": 0.019236807747607793, "grad_norm": 2.6388397216796875, "learning_rate": 1.922907488986784e-05, "loss": 0.2461, "step": 873 }, { "epoch": 0.019258843037123954, "grad_norm": 3.0698139667510986, "learning_rate": 1.9251101321585903e-05, "loss": 0.2708, "step": 874 }, { "epoch": 0.019280878326640116, "grad_norm": 3.5321433544158936, "learning_rate": 1.9273127753303965e-05, "loss": 0.2604, "step": 875 }, { "epoch": 0.019302913616156273, "grad_norm": 2.7359678745269775, "learning_rate": 1.9295154185022027e-05, "loss": 0.2617, "step": 876 }, { "epoch": 0.019324948905672434, "grad_norm": 4.362700462341309, "learning_rate": 1.931718061674009e-05, "loss": 0.2494, "step": 877 }, { "epoch": 0.019346984195188596, "grad_norm": 2.6761374473571777, "learning_rate": 1.933920704845815e-05, "loss": 0.2117, "step": 878 }, { "epoch": 0.019369019484704753, "grad_norm": 3.4107210636138916, "learning_rate": 1.9361233480176212e-05, "loss": 0.2166, "step": 879 }, { "epoch": 0.019391054774220914, "grad_norm": 3.714329957962036, "learning_rate": 1.9383259911894274e-05, "loss": 0.2544, "step": 880 }, { "epoch": 0.019413090063737076, "grad_norm": 2.7199289798736572, "learning_rate": 1.9405286343612336e-05, "loss": 0.2157, "step": 881 }, { "epoch": 0.019435125353253237, "grad_norm": 2.6487784385681152, "learning_rate": 1.9427312775330398e-05, "loss": 0.2975, "step": 882 }, { "epoch": 0.019457160642769394, "grad_norm": 2.8102338314056396, "learning_rate": 1.944933920704846e-05, "loss": 0.2343, "step": 883 }, { "epoch": 0.019479195932285556, "grad_norm": 2.477489948272705, "learning_rate": 1.947136563876652e-05, "loss": 0.2086, "step": 884 }, { "epoch": 0.019501231221801717, "grad_norm": 2.901937484741211, "learning_rate": 1.9493392070484583e-05, "loss": 0.1954, "step": 885 }, { "epoch": 0.019523266511317874, "grad_norm": 3.1609461307525635, "learning_rate": 1.951541850220264e-05, "loss": 0.2875, "step": 886 }, { "epoch": 0.019545301800834036, "grad_norm": 2.9539146423339844, "learning_rate": 1.9537444933920703e-05, "loss": 0.2222, "step": 887 }, { "epoch": 0.019567337090350197, "grad_norm": 3.112253189086914, "learning_rate": 1.9559471365638765e-05, "loss": 0.242, "step": 888 }, { "epoch": 0.019589372379866354, "grad_norm": 2.7534337043762207, "learning_rate": 1.9581497797356827e-05, "loss": 0.1938, "step": 889 }, { "epoch": 0.019611407669382516, "grad_norm": 2.3321142196655273, "learning_rate": 1.960352422907489e-05, "loss": 0.1727, "step": 890 }, { "epoch": 0.019633442958898677, "grad_norm": 2.241750955581665, "learning_rate": 1.962555066079295e-05, "loss": 0.1985, "step": 891 }, { "epoch": 0.019655478248414838, "grad_norm": 2.1884801387786865, "learning_rate": 1.9647577092511012e-05, "loss": 0.2096, "step": 892 }, { "epoch": 0.019677513537930996, "grad_norm": 2.5192484855651855, "learning_rate": 1.9669603524229074e-05, "loss": 0.2131, "step": 893 }, { "epoch": 0.019699548827447157, "grad_norm": 3.496293067932129, "learning_rate": 1.9691629955947136e-05, "loss": 0.3285, "step": 894 }, { "epoch": 0.019721584116963318, "grad_norm": 3.4263057708740234, "learning_rate": 1.9713656387665198e-05, "loss": 0.1916, "step": 895 }, { "epoch": 0.019743619406479476, "grad_norm": 3.3952362537384033, "learning_rate": 1.973568281938326e-05, "loss": 0.2727, "step": 896 }, { "epoch": 0.019765654695995637, "grad_norm": 2.773470401763916, "learning_rate": 1.975770925110132e-05, "loss": 0.2419, "step": 897 }, { "epoch": 0.019787689985511798, "grad_norm": 3.384795665740967, "learning_rate": 1.9779735682819383e-05, "loss": 0.2298, "step": 898 }, { "epoch": 0.01980972527502796, "grad_norm": 3.242594003677368, "learning_rate": 1.9801762114537445e-05, "loss": 0.2135, "step": 899 }, { "epoch": 0.019831760564544117, "grad_norm": 2.6999948024749756, "learning_rate": 1.9823788546255507e-05, "loss": 0.2128, "step": 900 }, { "epoch": 0.019853795854060278, "grad_norm": 3.5444581508636475, "learning_rate": 1.984581497797357e-05, "loss": 0.2396, "step": 901 }, { "epoch": 0.01987583114357644, "grad_norm": 4.034584045410156, "learning_rate": 1.986784140969163e-05, "loss": 0.2111, "step": 902 }, { "epoch": 0.019897866433092597, "grad_norm": 2.4550416469573975, "learning_rate": 1.9889867841409693e-05, "loss": 0.2074, "step": 903 }, { "epoch": 0.019919901722608758, "grad_norm": 3.3446667194366455, "learning_rate": 1.9911894273127754e-05, "loss": 0.2332, "step": 904 }, { "epoch": 0.01994193701212492, "grad_norm": 2.915567398071289, "learning_rate": 1.9933920704845813e-05, "loss": 0.2278, "step": 905 }, { "epoch": 0.019963972301641077, "grad_norm": 2.6929163932800293, "learning_rate": 1.9955947136563875e-05, "loss": 0.2601, "step": 906 }, { "epoch": 0.019986007591157238, "grad_norm": 2.0702455043792725, "learning_rate": 1.9977973568281936e-05, "loss": 0.2359, "step": 907 }, { "epoch": 0.0200080428806734, "grad_norm": 3.798839569091797, "learning_rate": 1.9999999999999998e-05, "loss": 0.2575, "step": 908 }, { "epoch": 0.02003007817018956, "grad_norm": 3.728821277618408, "learning_rate": 2.002202643171806e-05, "loss": 0.2227, "step": 909 }, { "epoch": 0.020052113459705718, "grad_norm": 3.156275510787964, "learning_rate": 2.0044052863436122e-05, "loss": 0.2228, "step": 910 }, { "epoch": 0.02007414874922188, "grad_norm": 2.5573556423187256, "learning_rate": 2.0066079295154184e-05, "loss": 0.2051, "step": 911 }, { "epoch": 0.02009618403873804, "grad_norm": 2.507664680480957, "learning_rate": 2.0088105726872246e-05, "loss": 0.2433, "step": 912 }, { "epoch": 0.020118219328254198, "grad_norm": 2.73882794380188, "learning_rate": 2.0110132158590307e-05, "loss": 0.2453, "step": 913 }, { "epoch": 0.02014025461777036, "grad_norm": 2.656216621398926, "learning_rate": 2.013215859030837e-05, "loss": 0.1842, "step": 914 }, { "epoch": 0.02016228990728652, "grad_norm": 3.2782726287841797, "learning_rate": 2.015418502202643e-05, "loss": 0.2486, "step": 915 }, { "epoch": 0.020184325196802678, "grad_norm": 2.522024631500244, "learning_rate": 2.0176211453744493e-05, "loss": 0.3021, "step": 916 }, { "epoch": 0.02020636048631884, "grad_norm": 2.59272837638855, "learning_rate": 2.0198237885462555e-05, "loss": 0.2774, "step": 917 }, { "epoch": 0.020228395775835, "grad_norm": 4.039885520935059, "learning_rate": 2.0220264317180616e-05, "loss": 0.2436, "step": 918 }, { "epoch": 0.02025043106535116, "grad_norm": 2.8094913959503174, "learning_rate": 2.0242290748898678e-05, "loss": 0.2252, "step": 919 }, { "epoch": 0.02027246635486732, "grad_norm": 2.1966869831085205, "learning_rate": 2.026431718061674e-05, "loss": 0.193, "step": 920 }, { "epoch": 0.02029450164438348, "grad_norm": 2.3626959323883057, "learning_rate": 2.0286343612334802e-05, "loss": 0.2462, "step": 921 }, { "epoch": 0.02031653693389964, "grad_norm": 2.4800870418548584, "learning_rate": 2.0308370044052864e-05, "loss": 0.1871, "step": 922 }, { "epoch": 0.0203385722234158, "grad_norm": 2.527250051498413, "learning_rate": 2.0330396475770926e-05, "loss": 0.2294, "step": 923 }, { "epoch": 0.02036060751293196, "grad_norm": 2.671823263168335, "learning_rate": 2.0352422907488987e-05, "loss": 0.2207, "step": 924 }, { "epoch": 0.02038264280244812, "grad_norm": 2.248128890991211, "learning_rate": 2.0374449339207046e-05, "loss": 0.1982, "step": 925 }, { "epoch": 0.020404678091964282, "grad_norm": 2.4219303131103516, "learning_rate": 2.0396475770925108e-05, "loss": 0.2034, "step": 926 }, { "epoch": 0.02042671338148044, "grad_norm": 3.508439302444458, "learning_rate": 2.041850220264317e-05, "loss": 0.2796, "step": 927 }, { "epoch": 0.0204487486709966, "grad_norm": 2.524834394454956, "learning_rate": 2.0440528634361235e-05, "loss": 0.1704, "step": 928 }, { "epoch": 0.020470783960512762, "grad_norm": 4.063796520233154, "learning_rate": 2.0462555066079296e-05, "loss": 0.2361, "step": 929 }, { "epoch": 0.02049281925002892, "grad_norm": 3.143101692199707, "learning_rate": 2.048458149779736e-05, "loss": 0.2517, "step": 930 }, { "epoch": 0.02051485453954508, "grad_norm": 2.34199595451355, "learning_rate": 2.050660792951542e-05, "loss": 0.2544, "step": 931 }, { "epoch": 0.020536889829061242, "grad_norm": 2.9705049991607666, "learning_rate": 2.0528634361233482e-05, "loss": 0.2684, "step": 932 }, { "epoch": 0.0205589251185774, "grad_norm": 4.427044868469238, "learning_rate": 2.0550660792951544e-05, "loss": 0.2765, "step": 933 }, { "epoch": 0.02058096040809356, "grad_norm": 3.3710763454437256, "learning_rate": 2.0572687224669606e-05, "loss": 0.2441, "step": 934 }, { "epoch": 0.020602995697609722, "grad_norm": 2.1745314598083496, "learning_rate": 2.0594713656387667e-05, "loss": 0.1798, "step": 935 }, { "epoch": 0.020625030987125884, "grad_norm": 3.5220043659210205, "learning_rate": 2.061674008810573e-05, "loss": 0.2572, "step": 936 }, { "epoch": 0.02064706627664204, "grad_norm": 2.539228677749634, "learning_rate": 2.063876651982379e-05, "loss": 0.2529, "step": 937 }, { "epoch": 0.020669101566158202, "grad_norm": 2.7548413276672363, "learning_rate": 2.0660792951541853e-05, "loss": 0.1975, "step": 938 }, { "epoch": 0.020691136855674364, "grad_norm": 2.7643163204193115, "learning_rate": 2.0682819383259915e-05, "loss": 0.2307, "step": 939 }, { "epoch": 0.02071317214519052, "grad_norm": 3.131291151046753, "learning_rate": 2.0704845814977977e-05, "loss": 0.2718, "step": 940 }, { "epoch": 0.020735207434706682, "grad_norm": 2.884193181991577, "learning_rate": 2.072687224669604e-05, "loss": 0.2518, "step": 941 }, { "epoch": 0.020757242724222844, "grad_norm": 2.299976110458374, "learning_rate": 2.07488986784141e-05, "loss": 0.2146, "step": 942 }, { "epoch": 0.020779278013739005, "grad_norm": 2.588331460952759, "learning_rate": 2.0770925110132162e-05, "loss": 0.2088, "step": 943 }, { "epoch": 0.020801313303255162, "grad_norm": 2.0428125858306885, "learning_rate": 2.079295154185022e-05, "loss": 0.2001, "step": 944 }, { "epoch": 0.020823348592771324, "grad_norm": 3.7521555423736572, "learning_rate": 2.0814977973568282e-05, "loss": 0.1706, "step": 945 }, { "epoch": 0.020845383882287485, "grad_norm": 3.2505600452423096, "learning_rate": 2.0837004405286344e-05, "loss": 0.2998, "step": 946 }, { "epoch": 0.020867419171803642, "grad_norm": 2.4625930786132812, "learning_rate": 2.0859030837004406e-05, "loss": 0.2195, "step": 947 }, { "epoch": 0.020889454461319804, "grad_norm": 2.612722873687744, "learning_rate": 2.0881057268722468e-05, "loss": 0.2096, "step": 948 }, { "epoch": 0.020911489750835965, "grad_norm": 1.7008068561553955, "learning_rate": 2.090308370044053e-05, "loss": 0.2017, "step": 949 }, { "epoch": 0.020933525040352122, "grad_norm": 3.8386895656585693, "learning_rate": 2.092511013215859e-05, "loss": 0.2697, "step": 950 }, { "epoch": 0.020955560329868284, "grad_norm": 3.237934112548828, "learning_rate": 2.0947136563876653e-05, "loss": 0.2376, "step": 951 }, { "epoch": 0.020977595619384445, "grad_norm": 2.6358377933502197, "learning_rate": 2.0969162995594715e-05, "loss": 0.1919, "step": 952 }, { "epoch": 0.020999630908900606, "grad_norm": 2.4000444412231445, "learning_rate": 2.0991189427312777e-05, "loss": 0.2017, "step": 953 }, { "epoch": 0.021021666198416764, "grad_norm": 2.5809340476989746, "learning_rate": 2.101321585903084e-05, "loss": 0.2361, "step": 954 }, { "epoch": 0.021043701487932925, "grad_norm": 2.305525302886963, "learning_rate": 2.10352422907489e-05, "loss": 0.225, "step": 955 }, { "epoch": 0.021065736777449086, "grad_norm": 2.8679988384246826, "learning_rate": 2.1057268722466962e-05, "loss": 0.2978, "step": 956 }, { "epoch": 0.021087772066965244, "grad_norm": 3.7640838623046875, "learning_rate": 2.1079295154185024e-05, "loss": 0.269, "step": 957 }, { "epoch": 0.021109807356481405, "grad_norm": 2.5378193855285645, "learning_rate": 2.1101321585903086e-05, "loss": 0.2889, "step": 958 }, { "epoch": 0.021131842645997566, "grad_norm": 2.3179636001586914, "learning_rate": 2.1123348017621148e-05, "loss": 0.227, "step": 959 }, { "epoch": 0.021153877935513724, "grad_norm": 3.856510639190674, "learning_rate": 2.114537444933921e-05, "loss": 0.2414, "step": 960 }, { "epoch": 0.021175913225029885, "grad_norm": 3.7173209190368652, "learning_rate": 2.116740088105727e-05, "loss": 0.2027, "step": 961 }, { "epoch": 0.021197948514546046, "grad_norm": 2.3273439407348633, "learning_rate": 2.1189427312775333e-05, "loss": 0.2212, "step": 962 }, { "epoch": 0.021219983804062207, "grad_norm": 3.0760610103607178, "learning_rate": 2.1211453744493395e-05, "loss": 0.1943, "step": 963 }, { "epoch": 0.021242019093578365, "grad_norm": 2.7380073070526123, "learning_rate": 2.1233480176211453e-05, "loss": 0.2329, "step": 964 }, { "epoch": 0.021264054383094526, "grad_norm": 2.3225295543670654, "learning_rate": 2.1255506607929515e-05, "loss": 0.2454, "step": 965 }, { "epoch": 0.021286089672610687, "grad_norm": 3.0268285274505615, "learning_rate": 2.1277533039647577e-05, "loss": 0.2745, "step": 966 }, { "epoch": 0.021308124962126845, "grad_norm": 2.8153347969055176, "learning_rate": 2.129955947136564e-05, "loss": 0.2395, "step": 967 }, { "epoch": 0.021330160251643006, "grad_norm": 3.908576250076294, "learning_rate": 2.13215859030837e-05, "loss": 0.314, "step": 968 }, { "epoch": 0.021352195541159167, "grad_norm": 3.0274407863616943, "learning_rate": 2.1343612334801763e-05, "loss": 0.2191, "step": 969 }, { "epoch": 0.021374230830675328, "grad_norm": 2.902554512023926, "learning_rate": 2.1365638766519824e-05, "loss": 0.2099, "step": 970 }, { "epoch": 0.021396266120191486, "grad_norm": 3.0461056232452393, "learning_rate": 2.1387665198237886e-05, "loss": 0.2377, "step": 971 }, { "epoch": 0.021418301409707647, "grad_norm": 2.5733633041381836, "learning_rate": 2.1409691629955948e-05, "loss": 0.2189, "step": 972 }, { "epoch": 0.021440336699223808, "grad_norm": 3.3688113689422607, "learning_rate": 2.143171806167401e-05, "loss": 0.2082, "step": 973 }, { "epoch": 0.021462371988739966, "grad_norm": 3.436519145965576, "learning_rate": 2.145374449339207e-05, "loss": 0.2446, "step": 974 }, { "epoch": 0.021484407278256127, "grad_norm": 3.505758762359619, "learning_rate": 2.1475770925110134e-05, "loss": 0.1872, "step": 975 }, { "epoch": 0.021506442567772288, "grad_norm": 2.6359009742736816, "learning_rate": 2.1497797356828195e-05, "loss": 0.2754, "step": 976 }, { "epoch": 0.021528477857288446, "grad_norm": 3.3337392807006836, "learning_rate": 2.1519823788546257e-05, "loss": 0.2591, "step": 977 }, { "epoch": 0.021550513146804607, "grad_norm": 3.5614187717437744, "learning_rate": 2.154185022026432e-05, "loss": 0.2771, "step": 978 }, { "epoch": 0.021572548436320768, "grad_norm": 2.2906479835510254, "learning_rate": 2.156387665198238e-05, "loss": 0.2098, "step": 979 }, { "epoch": 0.02159458372583693, "grad_norm": 3.474865198135376, "learning_rate": 2.1585903083700443e-05, "loss": 0.2285, "step": 980 }, { "epoch": 0.021616619015353087, "grad_norm": 2.6036014556884766, "learning_rate": 2.1607929515418504e-05, "loss": 0.2317, "step": 981 }, { "epoch": 0.021638654304869248, "grad_norm": 2.7366766929626465, "learning_rate": 2.1629955947136566e-05, "loss": 0.2559, "step": 982 }, { "epoch": 0.02166068959438541, "grad_norm": 3.0299649238586426, "learning_rate": 2.1651982378854625e-05, "loss": 0.2568, "step": 983 }, { "epoch": 0.021682724883901567, "grad_norm": 2.8388187885284424, "learning_rate": 2.1674008810572687e-05, "loss": 0.2641, "step": 984 }, { "epoch": 0.021704760173417728, "grad_norm": 3.1076552867889404, "learning_rate": 2.169603524229075e-05, "loss": 0.2376, "step": 985 }, { "epoch": 0.02172679546293389, "grad_norm": 4.013922214508057, "learning_rate": 2.171806167400881e-05, "loss": 0.2879, "step": 986 }, { "epoch": 0.02174883075245005, "grad_norm": 2.8130359649658203, "learning_rate": 2.1740088105726872e-05, "loss": 0.2983, "step": 987 }, { "epoch": 0.021770866041966208, "grad_norm": 2.8264002799987793, "learning_rate": 2.1762114537444934e-05, "loss": 0.1978, "step": 988 }, { "epoch": 0.02179290133148237, "grad_norm": 2.8150582313537598, "learning_rate": 2.1784140969162996e-05, "loss": 0.2439, "step": 989 }, { "epoch": 0.02181493662099853, "grad_norm": 3.4945638179779053, "learning_rate": 2.1806167400881057e-05, "loss": 0.2248, "step": 990 }, { "epoch": 0.021836971910514688, "grad_norm": 2.7892649173736572, "learning_rate": 2.182819383259912e-05, "loss": 0.2307, "step": 991 }, { "epoch": 0.02185900720003085, "grad_norm": 2.0955910682678223, "learning_rate": 2.185022026431718e-05, "loss": 0.2145, "step": 992 }, { "epoch": 0.02188104248954701, "grad_norm": 2.545419692993164, "learning_rate": 2.1872246696035243e-05, "loss": 0.2547, "step": 993 }, { "epoch": 0.021903077779063168, "grad_norm": 4.151726722717285, "learning_rate": 2.1894273127753305e-05, "loss": 0.3438, "step": 994 }, { "epoch": 0.02192511306857933, "grad_norm": 2.9764435291290283, "learning_rate": 2.1916299559471367e-05, "loss": 0.2281, "step": 995 }, { "epoch": 0.02194714835809549, "grad_norm": 4.260838508605957, "learning_rate": 2.193832599118943e-05, "loss": 0.2476, "step": 996 }, { "epoch": 0.02196918364761165, "grad_norm": 3.9723458290100098, "learning_rate": 2.196035242290749e-05, "loss": 0.2884, "step": 997 }, { "epoch": 0.02199121893712781, "grad_norm": 2.781456232070923, "learning_rate": 2.1982378854625552e-05, "loss": 0.1987, "step": 998 }, { "epoch": 0.02201325422664397, "grad_norm": 3.1566519737243652, "learning_rate": 2.2004405286343614e-05, "loss": 0.1824, "step": 999 }, { "epoch": 0.02203528951616013, "grad_norm": 3.1467881202697754, "learning_rate": 2.2026431718061676e-05, "loss": 0.24, "step": 1000 }, { "epoch": 0.02205732480567629, "grad_norm": 3.0318734645843506, "learning_rate": 2.2048458149779737e-05, "loss": 0.2283, "step": 1001 }, { "epoch": 0.02207936009519245, "grad_norm": 2.271181344985962, "learning_rate": 2.20704845814978e-05, "loss": 0.208, "step": 1002 }, { "epoch": 0.02210139538470861, "grad_norm": 2.4553873538970947, "learning_rate": 2.2092511013215858e-05, "loss": 0.2328, "step": 1003 }, { "epoch": 0.02212343067422477, "grad_norm": 3.206530809402466, "learning_rate": 2.211453744493392e-05, "loss": 0.2928, "step": 1004 }, { "epoch": 0.02214546596374093, "grad_norm": 2.852266550064087, "learning_rate": 2.213656387665198e-05, "loss": 0.2041, "step": 1005 }, { "epoch": 0.02216750125325709, "grad_norm": 2.9436593055725098, "learning_rate": 2.2158590308370043e-05, "loss": 0.1889, "step": 1006 }, { "epoch": 0.022189536542773253, "grad_norm": 3.9904513359069824, "learning_rate": 2.2180616740088105e-05, "loss": 0.2388, "step": 1007 }, { "epoch": 0.02221157183228941, "grad_norm": 3.5129315853118896, "learning_rate": 2.2202643171806167e-05, "loss": 0.1967, "step": 1008 }, { "epoch": 0.02223360712180557, "grad_norm": 2.5423545837402344, "learning_rate": 2.222466960352423e-05, "loss": 0.1721, "step": 1009 }, { "epoch": 0.022255642411321733, "grad_norm": 3.103986978530884, "learning_rate": 2.224669603524229e-05, "loss": 0.2528, "step": 1010 }, { "epoch": 0.02227767770083789, "grad_norm": 2.3288581371307373, "learning_rate": 2.2268722466960352e-05, "loss": 0.1892, "step": 1011 }, { "epoch": 0.02229971299035405, "grad_norm": 2.8395862579345703, "learning_rate": 2.2290748898678414e-05, "loss": 0.2218, "step": 1012 }, { "epoch": 0.022321748279870213, "grad_norm": 2.9643731117248535, "learning_rate": 2.2312775330396476e-05, "loss": 0.2581, "step": 1013 }, { "epoch": 0.022343783569386374, "grad_norm": 2.998253345489502, "learning_rate": 2.2334801762114538e-05, "loss": 0.2153, "step": 1014 }, { "epoch": 0.02236581885890253, "grad_norm": 3.064769744873047, "learning_rate": 2.23568281938326e-05, "loss": 0.2218, "step": 1015 }, { "epoch": 0.022387854148418693, "grad_norm": 2.7853212356567383, "learning_rate": 2.237885462555066e-05, "loss": 0.2455, "step": 1016 }, { "epoch": 0.022409889437934854, "grad_norm": 3.1158549785614014, "learning_rate": 2.2400881057268723e-05, "loss": 0.2594, "step": 1017 }, { "epoch": 0.02243192472745101, "grad_norm": 2.3947651386260986, "learning_rate": 2.2422907488986785e-05, "loss": 0.2206, "step": 1018 }, { "epoch": 0.022453960016967173, "grad_norm": 3.1902923583984375, "learning_rate": 2.2444933920704847e-05, "loss": 0.2197, "step": 1019 }, { "epoch": 0.022475995306483334, "grad_norm": 2.840759754180908, "learning_rate": 2.246696035242291e-05, "loss": 0.1646, "step": 1020 }, { "epoch": 0.02249803059599949, "grad_norm": 2.5555834770202637, "learning_rate": 2.248898678414097e-05, "loss": 0.2497, "step": 1021 }, { "epoch": 0.022520065885515653, "grad_norm": 2.3340020179748535, "learning_rate": 2.251101321585903e-05, "loss": 0.2375, "step": 1022 }, { "epoch": 0.022542101175031814, "grad_norm": 2.571176052093506, "learning_rate": 2.253303964757709e-05, "loss": 0.2026, "step": 1023 }, { "epoch": 0.022564136464547975, "grad_norm": 2.341569423675537, "learning_rate": 2.2555066079295153e-05, "loss": 0.179, "step": 1024 }, { "epoch": 0.022586171754064133, "grad_norm": 2.986682176589966, "learning_rate": 2.2577092511013214e-05, "loss": 0.2155, "step": 1025 }, { "epoch": 0.022608207043580294, "grad_norm": 2.715974807739258, "learning_rate": 2.2599118942731276e-05, "loss": 0.2206, "step": 1026 }, { "epoch": 0.022630242333096455, "grad_norm": 3.5182301998138428, "learning_rate": 2.2621145374449338e-05, "loss": 0.2907, "step": 1027 }, { "epoch": 0.022652277622612613, "grad_norm": 3.162926197052002, "learning_rate": 2.26431718061674e-05, "loss": 0.2215, "step": 1028 }, { "epoch": 0.022674312912128774, "grad_norm": 5.444604396820068, "learning_rate": 2.266519823788546e-05, "loss": 0.2119, "step": 1029 }, { "epoch": 0.022696348201644935, "grad_norm": 4.352402210235596, "learning_rate": 2.2687224669603524e-05, "loss": 0.224, "step": 1030 }, { "epoch": 0.022718383491161096, "grad_norm": 3.47255539894104, "learning_rate": 2.2709251101321585e-05, "loss": 0.2283, "step": 1031 }, { "epoch": 0.022740418780677254, "grad_norm": 3.13531494140625, "learning_rate": 2.2731277533039647e-05, "loss": 0.2558, "step": 1032 }, { "epoch": 0.022762454070193415, "grad_norm": 2.2991867065429688, "learning_rate": 2.275330396475771e-05, "loss": 0.2335, "step": 1033 }, { "epoch": 0.022784489359709576, "grad_norm": 2.649061918258667, "learning_rate": 2.277533039647577e-05, "loss": 0.2117, "step": 1034 }, { "epoch": 0.022806524649225734, "grad_norm": 2.8488810062408447, "learning_rate": 2.2797356828193833e-05, "loss": 0.3079, "step": 1035 }, { "epoch": 0.022828559938741895, "grad_norm": 3.4667739868164062, "learning_rate": 2.2819383259911894e-05, "loss": 0.2355, "step": 1036 }, { "epoch": 0.022850595228258056, "grad_norm": 2.690838098526001, "learning_rate": 2.2841409691629956e-05, "loss": 0.1934, "step": 1037 }, { "epoch": 0.022872630517774214, "grad_norm": 1.9690778255462646, "learning_rate": 2.2863436123348018e-05, "loss": 0.1785, "step": 1038 }, { "epoch": 0.022894665807290375, "grad_norm": 2.268190383911133, "learning_rate": 2.288546255506608e-05, "loss": 0.1773, "step": 1039 }, { "epoch": 0.022916701096806536, "grad_norm": 2.215599536895752, "learning_rate": 2.2907488986784142e-05, "loss": 0.22, "step": 1040 }, { "epoch": 0.022938736386322697, "grad_norm": 2.3450582027435303, "learning_rate": 2.29295154185022e-05, "loss": 0.2235, "step": 1041 }, { "epoch": 0.022960771675838855, "grad_norm": 3.160391092300415, "learning_rate": 2.2951541850220262e-05, "loss": 0.1754, "step": 1042 }, { "epoch": 0.022982806965355016, "grad_norm": 2.7827019691467285, "learning_rate": 2.2973568281938324e-05, "loss": 0.1827, "step": 1043 }, { "epoch": 0.023004842254871177, "grad_norm": 2.593463182449341, "learning_rate": 2.2995594713656386e-05, "loss": 0.211, "step": 1044 }, { "epoch": 0.023026877544387335, "grad_norm": 2.863063335418701, "learning_rate": 2.3017621145374447e-05, "loss": 0.1553, "step": 1045 }, { "epoch": 0.023048912833903496, "grad_norm": 1.9642045497894287, "learning_rate": 2.303964757709251e-05, "loss": 0.229, "step": 1046 }, { "epoch": 0.023070948123419657, "grad_norm": 5.985294342041016, "learning_rate": 2.306167400881057e-05, "loss": 0.2357, "step": 1047 }, { "epoch": 0.02309298341293582, "grad_norm": 3.4084737300872803, "learning_rate": 2.3083700440528633e-05, "loss": 0.2421, "step": 1048 }, { "epoch": 0.023115018702451976, "grad_norm": 5.342086315155029, "learning_rate": 2.3105726872246695e-05, "loss": 0.244, "step": 1049 }, { "epoch": 0.023137053991968137, "grad_norm": 2.847729206085205, "learning_rate": 2.3127753303964757e-05, "loss": 0.2125, "step": 1050 }, { "epoch": 0.0231590892814843, "grad_norm": 2.5514798164367676, "learning_rate": 2.314977973568282e-05, "loss": 0.208, "step": 1051 }, { "epoch": 0.023181124571000456, "grad_norm": 6.006385803222656, "learning_rate": 2.317180616740088e-05, "loss": 0.3026, "step": 1052 }, { "epoch": 0.023203159860516617, "grad_norm": 5.574637413024902, "learning_rate": 2.3193832599118945e-05, "loss": 0.2941, "step": 1053 }, { "epoch": 0.02322519515003278, "grad_norm": 3.595933675765991, "learning_rate": 2.3215859030837007e-05, "loss": 0.2341, "step": 1054 }, { "epoch": 0.023247230439548936, "grad_norm": 2.2158286571502686, "learning_rate": 2.323788546255507e-05, "loss": 0.2626, "step": 1055 }, { "epoch": 0.023269265729065097, "grad_norm": 3.6864328384399414, "learning_rate": 2.325991189427313e-05, "loss": 0.2244, "step": 1056 }, { "epoch": 0.02329130101858126, "grad_norm": 3.575397253036499, "learning_rate": 2.3281938325991193e-05, "loss": 0.336, "step": 1057 }, { "epoch": 0.02331333630809742, "grad_norm": 3.2165253162384033, "learning_rate": 2.3303964757709255e-05, "loss": 0.2669, "step": 1058 }, { "epoch": 0.023335371597613577, "grad_norm": 2.723158836364746, "learning_rate": 2.3325991189427316e-05, "loss": 0.2358, "step": 1059 }, { "epoch": 0.02335740688712974, "grad_norm": 2.6584436893463135, "learning_rate": 2.3348017621145378e-05, "loss": 0.2658, "step": 1060 }, { "epoch": 0.0233794421766459, "grad_norm": 1.791660189628601, "learning_rate": 2.3370044052863437e-05, "loss": 0.168, "step": 1061 }, { "epoch": 0.023401477466162057, "grad_norm": 2.6552512645721436, "learning_rate": 2.33920704845815e-05, "loss": 0.1967, "step": 1062 }, { "epoch": 0.02342351275567822, "grad_norm": 2.6771485805511475, "learning_rate": 2.341409691629956e-05, "loss": 0.2694, "step": 1063 }, { "epoch": 0.02344554804519438, "grad_norm": 3.3310773372650146, "learning_rate": 2.3436123348017622e-05, "loss": 0.2065, "step": 1064 }, { "epoch": 0.023467583334710537, "grad_norm": 2.008955240249634, "learning_rate": 2.3458149779735684e-05, "loss": 0.1884, "step": 1065 }, { "epoch": 0.0234896186242267, "grad_norm": 3.3062477111816406, "learning_rate": 2.3480176211453746e-05, "loss": 0.2304, "step": 1066 }, { "epoch": 0.02351165391374286, "grad_norm": 3.3372411727905273, "learning_rate": 2.3502202643171808e-05, "loss": 0.2584, "step": 1067 }, { "epoch": 0.02353368920325902, "grad_norm": 2.9459829330444336, "learning_rate": 2.352422907488987e-05, "loss": 0.1809, "step": 1068 }, { "epoch": 0.02355572449277518, "grad_norm": 2.776418447494507, "learning_rate": 2.354625550660793e-05, "loss": 0.2098, "step": 1069 }, { "epoch": 0.02357775978229134, "grad_norm": 3.019237756729126, "learning_rate": 2.3568281938325993e-05, "loss": 0.2815, "step": 1070 }, { "epoch": 0.0235997950718075, "grad_norm": 5.386958122253418, "learning_rate": 2.3590308370044055e-05, "loss": 0.2534, "step": 1071 }, { "epoch": 0.02362183036132366, "grad_norm": 4.305097579956055, "learning_rate": 2.3612334801762117e-05, "loss": 0.2132, "step": 1072 }, { "epoch": 0.02364386565083982, "grad_norm": 5.173962593078613, "learning_rate": 2.363436123348018e-05, "loss": 0.2074, "step": 1073 }, { "epoch": 0.02366590094035598, "grad_norm": 4.286397457122803, "learning_rate": 2.365638766519824e-05, "loss": 0.2547, "step": 1074 }, { "epoch": 0.023687936229872142, "grad_norm": 2.628359079360962, "learning_rate": 2.3678414096916302e-05, "loss": 0.2576, "step": 1075 }, { "epoch": 0.0237099715193883, "grad_norm": 2.969508171081543, "learning_rate": 2.3700440528634364e-05, "loss": 0.2148, "step": 1076 }, { "epoch": 0.02373200680890446, "grad_norm": 1.7720385789871216, "learning_rate": 2.3722466960352426e-05, "loss": 0.1798, "step": 1077 }, { "epoch": 0.023754042098420622, "grad_norm": 3.0701985359191895, "learning_rate": 2.3744493392070488e-05, "loss": 0.254, "step": 1078 }, { "epoch": 0.02377607738793678, "grad_norm": 3.3677303791046143, "learning_rate": 2.376651982378855e-05, "loss": 0.2589, "step": 1079 }, { "epoch": 0.02379811267745294, "grad_norm": 3.115213394165039, "learning_rate": 2.3788546255506608e-05, "loss": 0.2601, "step": 1080 }, { "epoch": 0.023820147966969102, "grad_norm": 2.6163415908813477, "learning_rate": 2.381057268722467e-05, "loss": 0.2148, "step": 1081 }, { "epoch": 0.02384218325648526, "grad_norm": 3.2948851585388184, "learning_rate": 2.383259911894273e-05, "loss": 0.2087, "step": 1082 }, { "epoch": 0.02386421854600142, "grad_norm": 3.706451177597046, "learning_rate": 2.3854625550660793e-05, "loss": 0.2549, "step": 1083 }, { "epoch": 0.023886253835517582, "grad_norm": 3.2411437034606934, "learning_rate": 2.3876651982378855e-05, "loss": 0.2307, "step": 1084 }, { "epoch": 0.023908289125033743, "grad_norm": 3.246323585510254, "learning_rate": 2.3898678414096917e-05, "loss": 0.2146, "step": 1085 }, { "epoch": 0.0239303244145499, "grad_norm": 2.487457752227783, "learning_rate": 2.392070484581498e-05, "loss": 0.1931, "step": 1086 }, { "epoch": 0.023952359704066062, "grad_norm": 2.7757444381713867, "learning_rate": 2.394273127753304e-05, "loss": 0.1912, "step": 1087 }, { "epoch": 0.023974394993582223, "grad_norm": 2.9502272605895996, "learning_rate": 2.3964757709251102e-05, "loss": 0.2269, "step": 1088 }, { "epoch": 0.02399643028309838, "grad_norm": 1.8965600728988647, "learning_rate": 2.3986784140969164e-05, "loss": 0.1965, "step": 1089 }, { "epoch": 0.024018465572614542, "grad_norm": 5.14764928817749, "learning_rate": 2.4008810572687226e-05, "loss": 0.2265, "step": 1090 }, { "epoch": 0.024040500862130703, "grad_norm": 2.5495762825012207, "learning_rate": 2.4030837004405288e-05, "loss": 0.2629, "step": 1091 }, { "epoch": 0.024062536151646864, "grad_norm": 3.3171966075897217, "learning_rate": 2.405286343612335e-05, "loss": 0.2102, "step": 1092 }, { "epoch": 0.024084571441163022, "grad_norm": 2.6007070541381836, "learning_rate": 2.407488986784141e-05, "loss": 0.2137, "step": 1093 }, { "epoch": 0.024106606730679183, "grad_norm": 3.582632541656494, "learning_rate": 2.4096916299559473e-05, "loss": 0.2774, "step": 1094 }, { "epoch": 0.024128642020195344, "grad_norm": 4.134748458862305, "learning_rate": 2.4118942731277535e-05, "loss": 0.2414, "step": 1095 }, { "epoch": 0.024150677309711502, "grad_norm": 3.3397789001464844, "learning_rate": 2.4140969162995597e-05, "loss": 0.1864, "step": 1096 }, { "epoch": 0.024172712599227663, "grad_norm": 3.038652181625366, "learning_rate": 2.416299559471366e-05, "loss": 0.267, "step": 1097 }, { "epoch": 0.024194747888743824, "grad_norm": 2.737476110458374, "learning_rate": 2.418502202643172e-05, "loss": 0.1933, "step": 1098 }, { "epoch": 0.024216783178259982, "grad_norm": 3.0935401916503906, "learning_rate": 2.4207048458149782e-05, "loss": 0.2628, "step": 1099 }, { "epoch": 0.024238818467776143, "grad_norm": 3.2370681762695312, "learning_rate": 2.422907488986784e-05, "loss": 0.2354, "step": 1100 }, { "epoch": 0.024260853757292304, "grad_norm": 2.507180690765381, "learning_rate": 2.4251101321585903e-05, "loss": 0.1931, "step": 1101 }, { "epoch": 0.024282889046808465, "grad_norm": 2.6521198749542236, "learning_rate": 2.4273127753303964e-05, "loss": 0.2319, "step": 1102 }, { "epoch": 0.024304924336324623, "grad_norm": 2.7440059185028076, "learning_rate": 2.4295154185022026e-05, "loss": 0.1953, "step": 1103 }, { "epoch": 0.024326959625840784, "grad_norm": 3.3921003341674805, "learning_rate": 2.4317180616740088e-05, "loss": 0.2565, "step": 1104 }, { "epoch": 0.024348994915356945, "grad_norm": 2.576613664627075, "learning_rate": 2.433920704845815e-05, "loss": 0.1669, "step": 1105 }, { "epoch": 0.024371030204873103, "grad_norm": 2.8194572925567627, "learning_rate": 2.4361233480176212e-05, "loss": 0.2, "step": 1106 }, { "epoch": 0.024393065494389264, "grad_norm": 2.750958204269409, "learning_rate": 2.4383259911894274e-05, "loss": 0.1974, "step": 1107 }, { "epoch": 0.024415100783905425, "grad_norm": 2.0594985485076904, "learning_rate": 2.4405286343612335e-05, "loss": 0.2294, "step": 1108 }, { "epoch": 0.024437136073421583, "grad_norm": 3.0039222240448, "learning_rate": 2.4427312775330397e-05, "loss": 0.2353, "step": 1109 }, { "epoch": 0.024459171362937744, "grad_norm": 2.928184986114502, "learning_rate": 2.444933920704846e-05, "loss": 0.1906, "step": 1110 }, { "epoch": 0.024481206652453905, "grad_norm": 2.7444005012512207, "learning_rate": 2.447136563876652e-05, "loss": 0.212, "step": 1111 }, { "epoch": 0.024503241941970066, "grad_norm": 2.6003596782684326, "learning_rate": 2.4493392070484583e-05, "loss": 0.2441, "step": 1112 }, { "epoch": 0.024525277231486224, "grad_norm": 2.4697375297546387, "learning_rate": 2.4515418502202645e-05, "loss": 0.183, "step": 1113 }, { "epoch": 0.024547312521002385, "grad_norm": 3.052793025970459, "learning_rate": 2.4537444933920706e-05, "loss": 0.249, "step": 1114 }, { "epoch": 0.024569347810518546, "grad_norm": 3.4790539741516113, "learning_rate": 2.4559471365638768e-05, "loss": 0.1805, "step": 1115 }, { "epoch": 0.024591383100034704, "grad_norm": 3.8166823387145996, "learning_rate": 2.458149779735683e-05, "loss": 0.2121, "step": 1116 }, { "epoch": 0.024613418389550865, "grad_norm": 3.761317253112793, "learning_rate": 2.4603524229074892e-05, "loss": 0.2426, "step": 1117 }, { "epoch": 0.024635453679067026, "grad_norm": 3.657074451446533, "learning_rate": 2.4625550660792954e-05, "loss": 0.2188, "step": 1118 }, { "epoch": 0.024657488968583188, "grad_norm": 2.752566337585449, "learning_rate": 2.4647577092511012e-05, "loss": 0.2614, "step": 1119 }, { "epoch": 0.024679524258099345, "grad_norm": 3.0438613891601562, "learning_rate": 2.4669603524229074e-05, "loss": 0.2658, "step": 1120 }, { "epoch": 0.024701559547615506, "grad_norm": 4.554818153381348, "learning_rate": 2.4691629955947136e-05, "loss": 0.2243, "step": 1121 }, { "epoch": 0.024723594837131668, "grad_norm": 2.038992166519165, "learning_rate": 2.4713656387665198e-05, "loss": 0.2319, "step": 1122 }, { "epoch": 0.024745630126647825, "grad_norm": 2.977513074874878, "learning_rate": 2.473568281938326e-05, "loss": 0.2735, "step": 1123 }, { "epoch": 0.024767665416163986, "grad_norm": 2.36407470703125, "learning_rate": 2.475770925110132e-05, "loss": 0.2521, "step": 1124 }, { "epoch": 0.024789700705680148, "grad_norm": 2.118586301803589, "learning_rate": 2.4779735682819383e-05, "loss": 0.1696, "step": 1125 }, { "epoch": 0.024811735995196305, "grad_norm": 2.8015689849853516, "learning_rate": 2.4801762114537445e-05, "loss": 0.2069, "step": 1126 }, { "epoch": 0.024833771284712466, "grad_norm": 2.6192781925201416, "learning_rate": 2.4823788546255507e-05, "loss": 0.2397, "step": 1127 }, { "epoch": 0.024855806574228628, "grad_norm": 2.737544536590576, "learning_rate": 2.484581497797357e-05, "loss": 0.2456, "step": 1128 }, { "epoch": 0.02487784186374479, "grad_norm": 2.3210153579711914, "learning_rate": 2.486784140969163e-05, "loss": 0.2157, "step": 1129 }, { "epoch": 0.024899877153260946, "grad_norm": 3.0563604831695557, "learning_rate": 2.4889867841409692e-05, "loss": 0.2279, "step": 1130 }, { "epoch": 0.024921912442777108, "grad_norm": 2.205476999282837, "learning_rate": 2.4911894273127754e-05, "loss": 0.2348, "step": 1131 }, { "epoch": 0.02494394773229327, "grad_norm": 2.4890646934509277, "learning_rate": 2.4933920704845816e-05, "loss": 0.229, "step": 1132 }, { "epoch": 0.024965983021809426, "grad_norm": 3.204711437225342, "learning_rate": 2.4955947136563878e-05, "loss": 0.2799, "step": 1133 }, { "epoch": 0.024988018311325588, "grad_norm": 3.05952787399292, "learning_rate": 2.497797356828194e-05, "loss": 0.1898, "step": 1134 }, { "epoch": 0.02501005360084175, "grad_norm": 2.3630573749542236, "learning_rate": 2.5e-05, "loss": 0.1849, "step": 1135 }, { "epoch": 0.02503208889035791, "grad_norm": 4.926763534545898, "learning_rate": 2.5022026431718063e-05, "loss": 0.21, "step": 1136 }, { "epoch": 0.025054124179874068, "grad_norm": 2.7962777614593506, "learning_rate": 2.5044052863436125e-05, "loss": 0.2028, "step": 1137 }, { "epoch": 0.02507615946939023, "grad_norm": 3.1890134811401367, "learning_rate": 2.5066079295154187e-05, "loss": 0.2533, "step": 1138 }, { "epoch": 0.02509819475890639, "grad_norm": 2.547553539276123, "learning_rate": 2.5088105726872245e-05, "loss": 0.2538, "step": 1139 }, { "epoch": 0.025120230048422548, "grad_norm": 2.5645506381988525, "learning_rate": 2.5110132158590307e-05, "loss": 0.2461, "step": 1140 }, { "epoch": 0.02514226533793871, "grad_norm": 2.607694149017334, "learning_rate": 2.513215859030837e-05, "loss": 0.2712, "step": 1141 }, { "epoch": 0.02516430062745487, "grad_norm": 2.626410961151123, "learning_rate": 2.515418502202643e-05, "loss": 0.2306, "step": 1142 }, { "epoch": 0.025186335916971028, "grad_norm": 3.049211025238037, "learning_rate": 2.5176211453744492e-05, "loss": 0.2482, "step": 1143 }, { "epoch": 0.02520837120648719, "grad_norm": 2.4939992427825928, "learning_rate": 2.5198237885462554e-05, "loss": 0.1701, "step": 1144 }, { "epoch": 0.02523040649600335, "grad_norm": 4.849621772766113, "learning_rate": 2.5220264317180616e-05, "loss": 0.2173, "step": 1145 }, { "epoch": 0.02525244178551951, "grad_norm": 3.3685383796691895, "learning_rate": 2.5242290748898678e-05, "loss": 0.2552, "step": 1146 }, { "epoch": 0.02527447707503567, "grad_norm": 2.03926944732666, "learning_rate": 2.526431718061674e-05, "loss": 0.2321, "step": 1147 }, { "epoch": 0.02529651236455183, "grad_norm": 2.5960335731506348, "learning_rate": 2.52863436123348e-05, "loss": 0.2103, "step": 1148 }, { "epoch": 0.02531854765406799, "grad_norm": 2.5136003494262695, "learning_rate": 2.5308370044052863e-05, "loss": 0.2244, "step": 1149 }, { "epoch": 0.02534058294358415, "grad_norm": 2.6268296241760254, "learning_rate": 2.5330396475770925e-05, "loss": 0.19, "step": 1150 }, { "epoch": 0.02536261823310031, "grad_norm": 2.3273768424987793, "learning_rate": 2.5352422907488987e-05, "loss": 0.2341, "step": 1151 }, { "epoch": 0.02538465352261647, "grad_norm": 3.2205474376678467, "learning_rate": 2.537444933920705e-05, "loss": 0.2144, "step": 1152 }, { "epoch": 0.02540668881213263, "grad_norm": 2.726389169692993, "learning_rate": 2.539647577092511e-05, "loss": 0.25, "step": 1153 }, { "epoch": 0.02542872410164879, "grad_norm": 3.7423856258392334, "learning_rate": 2.5418502202643172e-05, "loss": 0.2666, "step": 1154 }, { "epoch": 0.02545075939116495, "grad_norm": 2.613774538040161, "learning_rate": 2.5440528634361234e-05, "loss": 0.1704, "step": 1155 }, { "epoch": 0.025472794680681112, "grad_norm": 1.8858356475830078, "learning_rate": 2.5462555066079296e-05, "loss": 0.2203, "step": 1156 }, { "epoch": 0.02549482997019727, "grad_norm": 2.1175363063812256, "learning_rate": 2.5484581497797358e-05, "loss": 0.222, "step": 1157 }, { "epoch": 0.02551686525971343, "grad_norm": 2.9257397651672363, "learning_rate": 2.5506607929515416e-05, "loss": 0.2487, "step": 1158 }, { "epoch": 0.025538900549229592, "grad_norm": 3.340658664703369, "learning_rate": 2.5528634361233478e-05, "loss": 0.2503, "step": 1159 }, { "epoch": 0.02556093583874575, "grad_norm": 2.242044687271118, "learning_rate": 2.555066079295154e-05, "loss": 0.1827, "step": 1160 }, { "epoch": 0.02558297112826191, "grad_norm": 2.167738199234009, "learning_rate": 2.5572687224669602e-05, "loss": 0.1942, "step": 1161 }, { "epoch": 0.025605006417778072, "grad_norm": 1.920395851135254, "learning_rate": 2.5594713656387664e-05, "loss": 0.2185, "step": 1162 }, { "epoch": 0.025627041707294233, "grad_norm": 1.9800058603286743, "learning_rate": 2.5616740088105725e-05, "loss": 0.1828, "step": 1163 }, { "epoch": 0.02564907699681039, "grad_norm": 2.518352508544922, "learning_rate": 2.5638766519823787e-05, "loss": 0.2005, "step": 1164 }, { "epoch": 0.025671112286326552, "grad_norm": 2.4303927421569824, "learning_rate": 2.566079295154185e-05, "loss": 0.2101, "step": 1165 }, { "epoch": 0.025693147575842713, "grad_norm": 1.932472825050354, "learning_rate": 2.568281938325991e-05, "loss": 0.199, "step": 1166 }, { "epoch": 0.02571518286535887, "grad_norm": 2.3588995933532715, "learning_rate": 2.5704845814977973e-05, "loss": 0.2382, "step": 1167 }, { "epoch": 0.025737218154875032, "grad_norm": 3.2735087871551514, "learning_rate": 2.5726872246696035e-05, "loss": 0.1981, "step": 1168 }, { "epoch": 0.025759253444391193, "grad_norm": 3.0812203884124756, "learning_rate": 2.5748898678414096e-05, "loss": 0.1907, "step": 1169 }, { "epoch": 0.02578128873390735, "grad_norm": 2.4519710540771484, "learning_rate": 2.5770925110132158e-05, "loss": 0.198, "step": 1170 }, { "epoch": 0.025803324023423512, "grad_norm": 2.9401180744171143, "learning_rate": 2.579295154185022e-05, "loss": 0.173, "step": 1171 }, { "epoch": 0.025825359312939673, "grad_norm": 3.0683536529541016, "learning_rate": 2.5814977973568282e-05, "loss": 0.2676, "step": 1172 }, { "epoch": 0.025847394602455834, "grad_norm": 2.6876375675201416, "learning_rate": 2.5837004405286344e-05, "loss": 0.2515, "step": 1173 }, { "epoch": 0.025869429891971992, "grad_norm": 2.2549304962158203, "learning_rate": 2.5859030837004405e-05, "loss": 0.2371, "step": 1174 }, { "epoch": 0.025891465181488153, "grad_norm": 2.7870073318481445, "learning_rate": 2.5881057268722467e-05, "loss": 0.2173, "step": 1175 }, { "epoch": 0.025913500471004314, "grad_norm": 2.8353183269500732, "learning_rate": 2.590308370044053e-05, "loss": 0.2351, "step": 1176 }, { "epoch": 0.025935535760520472, "grad_norm": 2.351041316986084, "learning_rate": 2.592511013215859e-05, "loss": 0.2314, "step": 1177 }, { "epoch": 0.025957571050036633, "grad_norm": 2.508976459503174, "learning_rate": 2.5947136563876653e-05, "loss": 0.2322, "step": 1178 }, { "epoch": 0.025979606339552794, "grad_norm": 2.3990304470062256, "learning_rate": 2.5969162995594715e-05, "loss": 0.2489, "step": 1179 }, { "epoch": 0.026001641629068956, "grad_norm": 2.2170679569244385, "learning_rate": 2.5991189427312776e-05, "loss": 0.1989, "step": 1180 }, { "epoch": 0.026023676918585113, "grad_norm": 3.642301082611084, "learning_rate": 2.6013215859030838e-05, "loss": 0.2351, "step": 1181 }, { "epoch": 0.026045712208101274, "grad_norm": 3.4133291244506836, "learning_rate": 2.60352422907489e-05, "loss": 0.2238, "step": 1182 }, { "epoch": 0.026067747497617436, "grad_norm": 2.9240312576293945, "learning_rate": 2.6057268722466962e-05, "loss": 0.2211, "step": 1183 }, { "epoch": 0.026089782787133593, "grad_norm": 2.3186440467834473, "learning_rate": 2.6079295154185024e-05, "loss": 0.1966, "step": 1184 }, { "epoch": 0.026111818076649754, "grad_norm": 3.1868410110473633, "learning_rate": 2.6101321585903085e-05, "loss": 0.2462, "step": 1185 }, { "epoch": 0.026133853366165916, "grad_norm": 3.135422945022583, "learning_rate": 2.6123348017621147e-05, "loss": 0.2392, "step": 1186 }, { "epoch": 0.026155888655682073, "grad_norm": 2.868077516555786, "learning_rate": 2.614537444933921e-05, "loss": 0.2841, "step": 1187 }, { "epoch": 0.026177923945198234, "grad_norm": 2.9465997219085693, "learning_rate": 2.616740088105727e-05, "loss": 0.2405, "step": 1188 }, { "epoch": 0.026199959234714396, "grad_norm": 2.679192543029785, "learning_rate": 2.6189427312775333e-05, "loss": 0.1807, "step": 1189 }, { "epoch": 0.026221994524230557, "grad_norm": 2.525892734527588, "learning_rate": 2.6211453744493395e-05, "loss": 0.1873, "step": 1190 }, { "epoch": 0.026244029813746714, "grad_norm": 2.3461685180664062, "learning_rate": 2.6233480176211456e-05, "loss": 0.2071, "step": 1191 }, { "epoch": 0.026266065103262876, "grad_norm": 2.4700450897216797, "learning_rate": 2.6255506607929518e-05, "loss": 0.216, "step": 1192 }, { "epoch": 0.026288100392779037, "grad_norm": 2.0616865158081055, "learning_rate": 2.627753303964758e-05, "loss": 0.2056, "step": 1193 }, { "epoch": 0.026310135682295194, "grad_norm": 3.053572177886963, "learning_rate": 2.6299559471365642e-05, "loss": 0.2123, "step": 1194 }, { "epoch": 0.026332170971811356, "grad_norm": 4.22866153717041, "learning_rate": 2.6321585903083704e-05, "loss": 0.156, "step": 1195 }, { "epoch": 0.026354206261327517, "grad_norm": 2.811462163925171, "learning_rate": 2.6343612334801766e-05, "loss": 0.2398, "step": 1196 }, { "epoch": 0.026376241550843674, "grad_norm": 2.0922937393188477, "learning_rate": 2.6365638766519824e-05, "loss": 0.2046, "step": 1197 }, { "epoch": 0.026398276840359836, "grad_norm": 2.2547240257263184, "learning_rate": 2.6387665198237886e-05, "loss": 0.2482, "step": 1198 }, { "epoch": 0.026420312129875997, "grad_norm": 2.7277538776397705, "learning_rate": 2.6409691629955948e-05, "loss": 0.1974, "step": 1199 }, { "epoch": 0.026442347419392158, "grad_norm": 1.8224791288375854, "learning_rate": 2.643171806167401e-05, "loss": 0.1633, "step": 1200 }, { "epoch": 0.026464382708908316, "grad_norm": 2.6782491207122803, "learning_rate": 2.645374449339207e-05, "loss": 0.1934, "step": 1201 }, { "epoch": 0.026486417998424477, "grad_norm": 2.754680871963501, "learning_rate": 2.6475770925110133e-05, "loss": 0.2354, "step": 1202 }, { "epoch": 0.026508453287940638, "grad_norm": 2.6438791751861572, "learning_rate": 2.6497797356828195e-05, "loss": 0.1936, "step": 1203 }, { "epoch": 0.026530488577456796, "grad_norm": 2.755949020385742, "learning_rate": 2.6519823788546257e-05, "loss": 0.1837, "step": 1204 }, { "epoch": 0.026552523866972957, "grad_norm": 2.0142297744750977, "learning_rate": 2.654185022026432e-05, "loss": 0.2037, "step": 1205 }, { "epoch": 0.026574559156489118, "grad_norm": 2.2794363498687744, "learning_rate": 2.656387665198238e-05, "loss": 0.2637, "step": 1206 }, { "epoch": 0.02659659444600528, "grad_norm": 1.8637146949768066, "learning_rate": 2.6585903083700442e-05, "loss": 0.1782, "step": 1207 }, { "epoch": 0.026618629735521437, "grad_norm": 2.5039424896240234, "learning_rate": 2.6607929515418504e-05, "loss": 0.2647, "step": 1208 }, { "epoch": 0.026640665025037598, "grad_norm": 3.214482069015503, "learning_rate": 2.6629955947136566e-05, "loss": 0.2169, "step": 1209 }, { "epoch": 0.02666270031455376, "grad_norm": 2.6779916286468506, "learning_rate": 2.6651982378854628e-05, "loss": 0.1972, "step": 1210 }, { "epoch": 0.026684735604069917, "grad_norm": 2.548062801361084, "learning_rate": 2.667400881057269e-05, "loss": 0.226, "step": 1211 }, { "epoch": 0.026706770893586078, "grad_norm": 2.633751630783081, "learning_rate": 2.669603524229075e-05, "loss": 0.2246, "step": 1212 }, { "epoch": 0.02672880618310224, "grad_norm": 4.26351261138916, "learning_rate": 2.6718061674008813e-05, "loss": 0.2463, "step": 1213 }, { "epoch": 0.026750841472618397, "grad_norm": 3.2607855796813965, "learning_rate": 2.6740088105726875e-05, "loss": 0.2166, "step": 1214 }, { "epoch": 0.026772876762134558, "grad_norm": 4.45855188369751, "learning_rate": 2.6762114537444937e-05, "loss": 0.2635, "step": 1215 }, { "epoch": 0.02679491205165072, "grad_norm": 2.17087721824646, "learning_rate": 2.6784140969163e-05, "loss": 0.2002, "step": 1216 }, { "epoch": 0.02681694734116688, "grad_norm": 1.911519169807434, "learning_rate": 2.6806167400881057e-05, "loss": 0.145, "step": 1217 }, { "epoch": 0.026838982630683038, "grad_norm": 3.9134883880615234, "learning_rate": 2.682819383259912e-05, "loss": 0.2161, "step": 1218 }, { "epoch": 0.0268610179201992, "grad_norm": 2.021348476409912, "learning_rate": 2.685022026431718e-05, "loss": 0.1882, "step": 1219 }, { "epoch": 0.02688305320971536, "grad_norm": 2.3208131790161133, "learning_rate": 2.6872246696035242e-05, "loss": 0.248, "step": 1220 }, { "epoch": 0.026905088499231518, "grad_norm": 3.6399800777435303, "learning_rate": 2.6894273127753304e-05, "loss": 0.2472, "step": 1221 }, { "epoch": 0.02692712378874768, "grad_norm": 2.3067626953125, "learning_rate": 2.6916299559471366e-05, "loss": 0.2103, "step": 1222 }, { "epoch": 0.02694915907826384, "grad_norm": 3.187807083129883, "learning_rate": 2.6938325991189428e-05, "loss": 0.2682, "step": 1223 }, { "epoch": 0.02697119436778, "grad_norm": 3.095724582672119, "learning_rate": 2.696035242290749e-05, "loss": 0.23, "step": 1224 }, { "epoch": 0.02699322965729616, "grad_norm": 2.8156745433807373, "learning_rate": 2.698237885462555e-05, "loss": 0.1627, "step": 1225 }, { "epoch": 0.02701526494681232, "grad_norm": 2.283876657485962, "learning_rate": 2.7004405286343613e-05, "loss": 0.22, "step": 1226 }, { "epoch": 0.02703730023632848, "grad_norm": 2.7022361755371094, "learning_rate": 2.7026431718061675e-05, "loss": 0.2181, "step": 1227 }, { "epoch": 0.02705933552584464, "grad_norm": 2.6883368492126465, "learning_rate": 2.7048458149779737e-05, "loss": 0.2459, "step": 1228 }, { "epoch": 0.0270813708153608, "grad_norm": 2.0383174419403076, "learning_rate": 2.70704845814978e-05, "loss": 0.1844, "step": 1229 }, { "epoch": 0.02710340610487696, "grad_norm": 2.8621456623077393, "learning_rate": 2.709251101321586e-05, "loss": 0.2595, "step": 1230 }, { "epoch": 0.02712544139439312, "grad_norm": 2.4446868896484375, "learning_rate": 2.7114537444933922e-05, "loss": 0.2493, "step": 1231 }, { "epoch": 0.02714747668390928, "grad_norm": 3.178471803665161, "learning_rate": 2.7136563876651984e-05, "loss": 0.2311, "step": 1232 }, { "epoch": 0.02716951197342544, "grad_norm": 3.5877773761749268, "learning_rate": 2.7158590308370046e-05, "loss": 0.2802, "step": 1233 }, { "epoch": 0.027191547262941602, "grad_norm": 2.9589760303497314, "learning_rate": 2.7180616740088108e-05, "loss": 0.2041, "step": 1234 }, { "epoch": 0.02721358255245776, "grad_norm": 1.7532808780670166, "learning_rate": 2.720264317180617e-05, "loss": 0.2165, "step": 1235 }, { "epoch": 0.02723561784197392, "grad_norm": 2.5378479957580566, "learning_rate": 2.7224669603524228e-05, "loss": 0.2222, "step": 1236 }, { "epoch": 0.027257653131490082, "grad_norm": 2.787534236907959, "learning_rate": 2.724669603524229e-05, "loss": 0.1897, "step": 1237 }, { "epoch": 0.02727968842100624, "grad_norm": 2.90470290184021, "learning_rate": 2.7268722466960352e-05, "loss": 0.1871, "step": 1238 }, { "epoch": 0.0273017237105224, "grad_norm": 2.6578996181488037, "learning_rate": 2.7290748898678414e-05, "loss": 0.1873, "step": 1239 }, { "epoch": 0.027323759000038562, "grad_norm": 3.622091054916382, "learning_rate": 2.7312775330396475e-05, "loss": 0.2558, "step": 1240 }, { "epoch": 0.027345794289554724, "grad_norm": 4.806876182556152, "learning_rate": 2.7334801762114537e-05, "loss": 0.185, "step": 1241 }, { "epoch": 0.02736782957907088, "grad_norm": 3.003139019012451, "learning_rate": 2.73568281938326e-05, "loss": 0.158, "step": 1242 }, { "epoch": 0.027389864868587042, "grad_norm": 3.365955352783203, "learning_rate": 2.737885462555066e-05, "loss": 0.2413, "step": 1243 }, { "epoch": 0.027411900158103204, "grad_norm": 3.5340065956115723, "learning_rate": 2.7400881057268723e-05, "loss": 0.2325, "step": 1244 }, { "epoch": 0.02743393544761936, "grad_norm": 2.367172956466675, "learning_rate": 2.7422907488986785e-05, "loss": 0.258, "step": 1245 }, { "epoch": 0.027455970737135522, "grad_norm": 2.2584388256073, "learning_rate": 2.7444933920704846e-05, "loss": 0.1999, "step": 1246 }, { "epoch": 0.027478006026651684, "grad_norm": 2.710632801055908, "learning_rate": 2.7466960352422908e-05, "loss": 0.2063, "step": 1247 }, { "epoch": 0.02750004131616784, "grad_norm": 2.627814531326294, "learning_rate": 2.748898678414097e-05, "loss": 0.2173, "step": 1248 }, { "epoch": 0.027522076605684002, "grad_norm": 1.9246782064437866, "learning_rate": 2.7511013215859032e-05, "loss": 0.2137, "step": 1249 }, { "epoch": 0.027544111895200164, "grad_norm": 3.0471301078796387, "learning_rate": 2.7533039647577094e-05, "loss": 0.1829, "step": 1250 }, { "epoch": 0.027566147184716325, "grad_norm": 3.0395729541778564, "learning_rate": 2.7555066079295156e-05, "loss": 0.2214, "step": 1251 }, { "epoch": 0.027588182474232482, "grad_norm": 2.919290542602539, "learning_rate": 2.7577092511013217e-05, "loss": 0.242, "step": 1252 }, { "epoch": 0.027610217763748644, "grad_norm": 2.3138954639434814, "learning_rate": 2.759911894273128e-05, "loss": 0.19, "step": 1253 }, { "epoch": 0.027632253053264805, "grad_norm": 2.3339877128601074, "learning_rate": 2.762114537444934e-05, "loss": 0.1963, "step": 1254 }, { "epoch": 0.027654288342780962, "grad_norm": 2.443009376525879, "learning_rate": 2.76431718061674e-05, "loss": 0.2313, "step": 1255 }, { "epoch": 0.027676323632297124, "grad_norm": 1.6927855014801025, "learning_rate": 2.766519823788546e-05, "loss": 0.1753, "step": 1256 }, { "epoch": 0.027698358921813285, "grad_norm": 2.14739727973938, "learning_rate": 2.7687224669603523e-05, "loss": 0.2176, "step": 1257 }, { "epoch": 0.027720394211329442, "grad_norm": 2.1699061393737793, "learning_rate": 2.7709251101321585e-05, "loss": 0.2005, "step": 1258 }, { "epoch": 0.027742429500845604, "grad_norm": 2.7483177185058594, "learning_rate": 2.7731277533039647e-05, "loss": 0.1921, "step": 1259 }, { "epoch": 0.027764464790361765, "grad_norm": 2.8058903217315674, "learning_rate": 2.775330396475771e-05, "loss": 0.2352, "step": 1260 }, { "epoch": 0.027786500079877926, "grad_norm": 3.359823703765869, "learning_rate": 2.777533039647577e-05, "loss": 0.2239, "step": 1261 }, { "epoch": 0.027808535369394084, "grad_norm": 2.9268383979797363, "learning_rate": 2.7797356828193832e-05, "loss": 0.1539, "step": 1262 }, { "epoch": 0.027830570658910245, "grad_norm": 2.1297779083251953, "learning_rate": 2.7819383259911894e-05, "loss": 0.2709, "step": 1263 }, { "epoch": 0.027852605948426406, "grad_norm": 3.299407958984375, "learning_rate": 2.7841409691629956e-05, "loss": 0.2421, "step": 1264 }, { "epoch": 0.027874641237942564, "grad_norm": 2.519167423248291, "learning_rate": 2.7863436123348018e-05, "loss": 0.1915, "step": 1265 }, { "epoch": 0.027896676527458725, "grad_norm": 2.2730371952056885, "learning_rate": 2.788546255506608e-05, "loss": 0.2032, "step": 1266 }, { "epoch": 0.027918711816974886, "grad_norm": 2.8252766132354736, "learning_rate": 2.790748898678414e-05, "loss": 0.1917, "step": 1267 }, { "epoch": 0.027940747106491047, "grad_norm": 3.9888436794281006, "learning_rate": 2.7929515418502203e-05, "loss": 0.1645, "step": 1268 }, { "epoch": 0.027962782396007205, "grad_norm": 3.31514573097229, "learning_rate": 2.7951541850220265e-05, "loss": 0.1764, "step": 1269 }, { "epoch": 0.027984817685523366, "grad_norm": 2.481799602508545, "learning_rate": 2.7973568281938327e-05, "loss": 0.2498, "step": 1270 }, { "epoch": 0.028006852975039527, "grad_norm": 4.110326766967773, "learning_rate": 2.799559471365639e-05, "loss": 0.322, "step": 1271 }, { "epoch": 0.028028888264555685, "grad_norm": 2.823557138442993, "learning_rate": 2.801762114537445e-05, "loss": 0.2123, "step": 1272 }, { "epoch": 0.028050923554071846, "grad_norm": 3.1416397094726562, "learning_rate": 2.8039647577092512e-05, "loss": 0.2129, "step": 1273 }, { "epoch": 0.028072958843588007, "grad_norm": 2.528921604156494, "learning_rate": 2.8061674008810574e-05, "loss": 0.2331, "step": 1274 }, { "epoch": 0.028094994133104165, "grad_norm": 2.688331365585327, "learning_rate": 2.8083700440528632e-05, "loss": 0.2167, "step": 1275 }, { "epoch": 0.028117029422620326, "grad_norm": 3.168377637863159, "learning_rate": 2.8105726872246694e-05, "loss": 0.2436, "step": 1276 }, { "epoch": 0.028139064712136487, "grad_norm": 2.785221815109253, "learning_rate": 2.8127753303964756e-05, "loss": 0.2216, "step": 1277 }, { "epoch": 0.028161100001652648, "grad_norm": 3.577209234237671, "learning_rate": 2.8149779735682818e-05, "loss": 0.2931, "step": 1278 }, { "epoch": 0.028183135291168806, "grad_norm": 2.702573776245117, "learning_rate": 2.817180616740088e-05, "loss": 0.2573, "step": 1279 }, { "epoch": 0.028205170580684967, "grad_norm": 2.6510088443756104, "learning_rate": 2.819383259911894e-05, "loss": 0.2449, "step": 1280 }, { "epoch": 0.028227205870201128, "grad_norm": 2.260392904281616, "learning_rate": 2.8215859030837003e-05, "loss": 0.2186, "step": 1281 }, { "epoch": 0.028249241159717286, "grad_norm": 3.326524257659912, "learning_rate": 2.8237885462555065e-05, "loss": 0.2373, "step": 1282 }, { "epoch": 0.028271276449233447, "grad_norm": 2.91377592086792, "learning_rate": 2.8259911894273127e-05, "loss": 0.2241, "step": 1283 }, { "epoch": 0.028293311738749608, "grad_norm": 3.5604045391082764, "learning_rate": 2.828193832599119e-05, "loss": 0.214, "step": 1284 }, { "epoch": 0.02831534702826577, "grad_norm": 2.804419994354248, "learning_rate": 2.830396475770925e-05, "loss": 0.2115, "step": 1285 }, { "epoch": 0.028337382317781927, "grad_norm": 2.397944688796997, "learning_rate": 2.8325991189427313e-05, "loss": 0.2156, "step": 1286 }, { "epoch": 0.028359417607298088, "grad_norm": 3.271928071975708, "learning_rate": 2.8348017621145374e-05, "loss": 0.2231, "step": 1287 }, { "epoch": 0.02838145289681425, "grad_norm": 2.30417537689209, "learning_rate": 2.8370044052863436e-05, "loss": 0.2061, "step": 1288 }, { "epoch": 0.028403488186330407, "grad_norm": 2.3098561763763428, "learning_rate": 2.8392070484581498e-05, "loss": 0.1964, "step": 1289 }, { "epoch": 0.028425523475846568, "grad_norm": 2.620710849761963, "learning_rate": 2.841409691629956e-05, "loss": 0.2377, "step": 1290 }, { "epoch": 0.02844755876536273, "grad_norm": 2.453521728515625, "learning_rate": 2.843612334801762e-05, "loss": 0.2041, "step": 1291 }, { "epoch": 0.028469594054878887, "grad_norm": 2.9171109199523926, "learning_rate": 2.8458149779735683e-05, "loss": 0.243, "step": 1292 }, { "epoch": 0.028491629344395048, "grad_norm": 2.6787326335906982, "learning_rate": 2.8480176211453745e-05, "loss": 0.2125, "step": 1293 }, { "epoch": 0.02851366463391121, "grad_norm": 2.1256463527679443, "learning_rate": 2.8502202643171804e-05, "loss": 0.1535, "step": 1294 }, { "epoch": 0.02853569992342737, "grad_norm": 2.581195116043091, "learning_rate": 2.8524229074889866e-05, "loss": 0.1849, "step": 1295 }, { "epoch": 0.028557735212943528, "grad_norm": 1.800770878791809, "learning_rate": 2.8546255506607927e-05, "loss": 0.1522, "step": 1296 }, { "epoch": 0.02857977050245969, "grad_norm": 2.3405964374542236, "learning_rate": 2.856828193832599e-05, "loss": 0.2136, "step": 1297 }, { "epoch": 0.02860180579197585, "grad_norm": 3.1081485748291016, "learning_rate": 2.859030837004405e-05, "loss": 0.1692, "step": 1298 }, { "epoch": 0.028623841081492008, "grad_norm": 3.6019670963287354, "learning_rate": 2.8612334801762113e-05, "loss": 0.1907, "step": 1299 }, { "epoch": 0.02864587637100817, "grad_norm": 2.2542572021484375, "learning_rate": 2.8634361233480178e-05, "loss": 0.2074, "step": 1300 }, { "epoch": 0.02866791166052433, "grad_norm": 2.99780535697937, "learning_rate": 2.865638766519824e-05, "loss": 0.241, "step": 1301 }, { "epoch": 0.028689946950040488, "grad_norm": 2.9453773498535156, "learning_rate": 2.86784140969163e-05, "loss": 0.2375, "step": 1302 }, { "epoch": 0.02871198223955665, "grad_norm": 2.3272836208343506, "learning_rate": 2.8700440528634363e-05, "loss": 0.2032, "step": 1303 }, { "epoch": 0.02873401752907281, "grad_norm": 2.9406793117523193, "learning_rate": 2.8722466960352425e-05, "loss": 0.2732, "step": 1304 }, { "epoch": 0.02875605281858897, "grad_norm": 2.7597076892852783, "learning_rate": 2.8744493392070487e-05, "loss": 0.2244, "step": 1305 }, { "epoch": 0.02877808810810513, "grad_norm": 3.1059820652008057, "learning_rate": 2.876651982378855e-05, "loss": 0.1929, "step": 1306 }, { "epoch": 0.02880012339762129, "grad_norm": 2.7833313941955566, "learning_rate": 2.878854625550661e-05, "loss": 0.2229, "step": 1307 }, { "epoch": 0.02882215868713745, "grad_norm": 1.7339136600494385, "learning_rate": 2.8810572687224673e-05, "loss": 0.2384, "step": 1308 }, { "epoch": 0.02884419397665361, "grad_norm": 2.474156141281128, "learning_rate": 2.8832599118942734e-05, "loss": 0.1994, "step": 1309 }, { "epoch": 0.02886622926616977, "grad_norm": 2.976231575012207, "learning_rate": 2.8854625550660796e-05, "loss": 0.2278, "step": 1310 }, { "epoch": 0.02888826455568593, "grad_norm": 2.315129041671753, "learning_rate": 2.8876651982378858e-05, "loss": 0.2087, "step": 1311 }, { "epoch": 0.028910299845202093, "grad_norm": 3.113551139831543, "learning_rate": 2.889867841409692e-05, "loss": 0.2868, "step": 1312 }, { "epoch": 0.02893233513471825, "grad_norm": 2.1823244094848633, "learning_rate": 2.892070484581498e-05, "loss": 0.1629, "step": 1313 }, { "epoch": 0.02895437042423441, "grad_norm": 2.6481261253356934, "learning_rate": 2.894273127753304e-05, "loss": 0.1826, "step": 1314 }, { "epoch": 0.028976405713750573, "grad_norm": 2.751340866088867, "learning_rate": 2.8964757709251102e-05, "loss": 0.2033, "step": 1315 }, { "epoch": 0.02899844100326673, "grad_norm": 2.2858221530914307, "learning_rate": 2.8986784140969164e-05, "loss": 0.2361, "step": 1316 }, { "epoch": 0.02902047629278289, "grad_norm": 2.537257194519043, "learning_rate": 2.9008810572687226e-05, "loss": 0.2227, "step": 1317 }, { "epoch": 0.029042511582299053, "grad_norm": 3.286376953125, "learning_rate": 2.9030837004405287e-05, "loss": 0.2385, "step": 1318 }, { "epoch": 0.02906454687181521, "grad_norm": 2.95625376701355, "learning_rate": 2.905286343612335e-05, "loss": 0.2576, "step": 1319 }, { "epoch": 0.02908658216133137, "grad_norm": 2.645599126815796, "learning_rate": 2.907488986784141e-05, "loss": 0.2014, "step": 1320 }, { "epoch": 0.029108617450847533, "grad_norm": 2.762407064437866, "learning_rate": 2.9096916299559473e-05, "loss": 0.2477, "step": 1321 }, { "epoch": 0.029130652740363694, "grad_norm": 2.793487310409546, "learning_rate": 2.9118942731277535e-05, "loss": 0.1936, "step": 1322 }, { "epoch": 0.02915268802987985, "grad_norm": 3.226590633392334, "learning_rate": 2.9140969162995597e-05, "loss": 0.2297, "step": 1323 }, { "epoch": 0.029174723319396013, "grad_norm": 2.579878807067871, "learning_rate": 2.916299559471366e-05, "loss": 0.1952, "step": 1324 }, { "epoch": 0.029196758608912174, "grad_norm": 1.8865463733673096, "learning_rate": 2.918502202643172e-05, "loss": 0.2016, "step": 1325 }, { "epoch": 0.02921879389842833, "grad_norm": 2.2638797760009766, "learning_rate": 2.9207048458149782e-05, "loss": 0.2513, "step": 1326 }, { "epoch": 0.029240829187944493, "grad_norm": 2.5727038383483887, "learning_rate": 2.9229074889867844e-05, "loss": 0.1935, "step": 1327 }, { "epoch": 0.029262864477460654, "grad_norm": 2.8603882789611816, "learning_rate": 2.9251101321585906e-05, "loss": 0.2623, "step": 1328 }, { "epoch": 0.029284899766976815, "grad_norm": 2.1468584537506104, "learning_rate": 2.9273127753303967e-05, "loss": 0.2175, "step": 1329 }, { "epoch": 0.029306935056492973, "grad_norm": 2.703486204147339, "learning_rate": 2.929515418502203e-05, "loss": 0.2351, "step": 1330 }, { "epoch": 0.029328970346009134, "grad_norm": 3.480334997177124, "learning_rate": 2.931718061674009e-05, "loss": 0.2468, "step": 1331 }, { "epoch": 0.029351005635525295, "grad_norm": 2.283900737762451, "learning_rate": 2.9339207048458153e-05, "loss": 0.2032, "step": 1332 }, { "epoch": 0.029373040925041453, "grad_norm": 2.3369431495666504, "learning_rate": 2.936123348017621e-05, "loss": 0.2764, "step": 1333 }, { "epoch": 0.029395076214557614, "grad_norm": 2.4502060413360596, "learning_rate": 2.9383259911894273e-05, "loss": 0.1729, "step": 1334 }, { "epoch": 0.029417111504073775, "grad_norm": 2.6748552322387695, "learning_rate": 2.9405286343612335e-05, "loss": 0.1878, "step": 1335 }, { "epoch": 0.029439146793589933, "grad_norm": 2.18721866607666, "learning_rate": 2.9427312775330397e-05, "loss": 0.1606, "step": 1336 }, { "epoch": 0.029461182083106094, "grad_norm": 3.746263027191162, "learning_rate": 2.944933920704846e-05, "loss": 0.2097, "step": 1337 }, { "epoch": 0.029483217372622255, "grad_norm": 2.1602180004119873, "learning_rate": 2.947136563876652e-05, "loss": 0.1843, "step": 1338 }, { "epoch": 0.029505252662138416, "grad_norm": 2.102477788925171, "learning_rate": 2.9493392070484582e-05, "loss": 0.2432, "step": 1339 }, { "epoch": 0.029527287951654574, "grad_norm": 2.072204351425171, "learning_rate": 2.9515418502202644e-05, "loss": 0.1999, "step": 1340 }, { "epoch": 0.029549323241170735, "grad_norm": 3.587893486022949, "learning_rate": 2.9537444933920706e-05, "loss": 0.2129, "step": 1341 }, { "epoch": 0.029571358530686896, "grad_norm": 1.860559344291687, "learning_rate": 2.9559471365638768e-05, "loss": 0.1775, "step": 1342 }, { "epoch": 0.029593393820203054, "grad_norm": 2.9871408939361572, "learning_rate": 2.958149779735683e-05, "loss": 0.2371, "step": 1343 }, { "epoch": 0.029615429109719215, "grad_norm": 1.8299057483673096, "learning_rate": 2.960352422907489e-05, "loss": 0.1941, "step": 1344 }, { "epoch": 0.029637464399235376, "grad_norm": 2.7704689502716064, "learning_rate": 2.9625550660792953e-05, "loss": 0.2369, "step": 1345 }, { "epoch": 0.029659499688751534, "grad_norm": 1.9206063747406006, "learning_rate": 2.9647577092511015e-05, "loss": 0.2228, "step": 1346 }, { "epoch": 0.029681534978267695, "grad_norm": 2.5762972831726074, "learning_rate": 2.9669603524229077e-05, "loss": 0.2039, "step": 1347 }, { "epoch": 0.029703570267783856, "grad_norm": 3.1190829277038574, "learning_rate": 2.969162995594714e-05, "loss": 0.2359, "step": 1348 }, { "epoch": 0.029725605557300017, "grad_norm": 3.1124038696289062, "learning_rate": 2.97136563876652e-05, "loss": 0.2976, "step": 1349 }, { "epoch": 0.029747640846816175, "grad_norm": 3.0240848064422607, "learning_rate": 2.9735682819383262e-05, "loss": 0.2607, "step": 1350 }, { "epoch": 0.029769676136332336, "grad_norm": 2.480562210083008, "learning_rate": 2.9757709251101324e-05, "loss": 0.1576, "step": 1351 }, { "epoch": 0.029791711425848497, "grad_norm": 3.016371726989746, "learning_rate": 2.9779735682819386e-05, "loss": 0.2091, "step": 1352 }, { "epoch": 0.029813746715364655, "grad_norm": 2.4753239154815674, "learning_rate": 2.9801762114537444e-05, "loss": 0.2412, "step": 1353 }, { "epoch": 0.029835782004880816, "grad_norm": 2.039365291595459, "learning_rate": 2.9823788546255506e-05, "loss": 0.1835, "step": 1354 }, { "epoch": 0.029857817294396977, "grad_norm": 3.701620101928711, "learning_rate": 2.9845814977973568e-05, "loss": 0.2116, "step": 1355 }, { "epoch": 0.02987985258391314, "grad_norm": 2.68599009513855, "learning_rate": 2.986784140969163e-05, "loss": 0.2733, "step": 1356 }, { "epoch": 0.029901887873429296, "grad_norm": 2.502281427383423, "learning_rate": 2.988986784140969e-05, "loss": 0.1938, "step": 1357 }, { "epoch": 0.029923923162945457, "grad_norm": 2.353191375732422, "learning_rate": 2.9911894273127753e-05, "loss": 0.2476, "step": 1358 }, { "epoch": 0.02994595845246162, "grad_norm": 2.8782334327697754, "learning_rate": 2.9933920704845815e-05, "loss": 0.2604, "step": 1359 }, { "epoch": 0.029967993741977776, "grad_norm": 2.389878034591675, "learning_rate": 2.9955947136563877e-05, "loss": 0.1927, "step": 1360 }, { "epoch": 0.029990029031493937, "grad_norm": 2.5744121074676514, "learning_rate": 2.997797356828194e-05, "loss": 0.2294, "step": 1361 }, { "epoch": 0.0300120643210101, "grad_norm": 2.36771297454834, "learning_rate": 3e-05, "loss": 0.2153, "step": 1362 }, { "epoch": 0.030034099610526256, "grad_norm": 2.7398200035095215, "learning_rate": 2.999999996179848e-05, "loss": 0.2761, "step": 1363 }, { "epoch": 0.030056134900042417, "grad_norm": 3.201306104660034, "learning_rate": 2.9999999847193912e-05, "loss": 0.2462, "step": 1364 }, { "epoch": 0.03007817018955858, "grad_norm": 1.9348007440567017, "learning_rate": 2.9999999656186302e-05, "loss": 0.172, "step": 1365 }, { "epoch": 0.03010020547907474, "grad_norm": 3.2571194171905518, "learning_rate": 2.9999999388775656e-05, "loss": 0.2067, "step": 1366 }, { "epoch": 0.030122240768590897, "grad_norm": 2.382093667984009, "learning_rate": 2.9999999044961967e-05, "loss": 0.2142, "step": 1367 }, { "epoch": 0.03014427605810706, "grad_norm": 4.348245143890381, "learning_rate": 2.9999998624745236e-05, "loss": 0.2082, "step": 1368 }, { "epoch": 0.03016631134762322, "grad_norm": 2.878229856491089, "learning_rate": 2.9999998128125472e-05, "loss": 0.2709, "step": 1369 }, { "epoch": 0.030188346637139377, "grad_norm": 3.0409202575683594, "learning_rate": 2.999999755510267e-05, "loss": 0.1781, "step": 1370 }, { "epoch": 0.03021038192665554, "grad_norm": 5.56165075302124, "learning_rate": 2.9999996905676836e-05, "loss": 0.2157, "step": 1371 }, { "epoch": 0.0302324172161717, "grad_norm": 4.9175872802734375, "learning_rate": 2.9999996179847978e-05, "loss": 0.2233, "step": 1372 }, { "epoch": 0.03025445250568786, "grad_norm": 2.7622690200805664, "learning_rate": 2.9999995377616097e-05, "loss": 0.2059, "step": 1373 }, { "epoch": 0.03027648779520402, "grad_norm": 2.9762892723083496, "learning_rate": 2.9999994498981195e-05, "loss": 0.2325, "step": 1374 }, { "epoch": 0.03029852308472018, "grad_norm": 4.730923652648926, "learning_rate": 2.9999993543943273e-05, "loss": 0.1932, "step": 1375 }, { "epoch": 0.03032055837423634, "grad_norm": 5.464694976806641, "learning_rate": 2.9999992512502345e-05, "loss": 0.2917, "step": 1376 }, { "epoch": 0.0303425936637525, "grad_norm": 2.9160892963409424, "learning_rate": 2.999999140465841e-05, "loss": 0.2257, "step": 1377 }, { "epoch": 0.03036462895326866, "grad_norm": 2.924124240875244, "learning_rate": 2.9999990220411475e-05, "loss": 0.2229, "step": 1378 }, { "epoch": 0.03038666424278482, "grad_norm": 3.002365827560425, "learning_rate": 2.9999988959761545e-05, "loss": 0.2051, "step": 1379 }, { "epoch": 0.03040869953230098, "grad_norm": 2.0627121925354004, "learning_rate": 2.999998762270863e-05, "loss": 0.1778, "step": 1380 }, { "epoch": 0.03043073482181714, "grad_norm": 2.5297000408172607, "learning_rate": 2.999998620925273e-05, "loss": 0.2243, "step": 1381 }, { "epoch": 0.0304527701113333, "grad_norm": 2.931899309158325, "learning_rate": 2.999998471939386e-05, "loss": 0.2488, "step": 1382 }, { "epoch": 0.030474805400849462, "grad_norm": 2.7948291301727295, "learning_rate": 2.9999983153132028e-05, "loss": 0.2304, "step": 1383 }, { "epoch": 0.03049684069036562, "grad_norm": 2.4706196784973145, "learning_rate": 2.999998151046723e-05, "loss": 0.2044, "step": 1384 }, { "epoch": 0.03051887597988178, "grad_norm": 3.664155960083008, "learning_rate": 2.9999979791399488e-05, "loss": 0.2077, "step": 1385 }, { "epoch": 0.030540911269397942, "grad_norm": 2.134409189224243, "learning_rate": 2.99999779959288e-05, "loss": 0.2414, "step": 1386 }, { "epoch": 0.0305629465589141, "grad_norm": 2.0099270343780518, "learning_rate": 2.9999976124055187e-05, "loss": 0.1602, "step": 1387 }, { "epoch": 0.03058498184843026, "grad_norm": 2.026399850845337, "learning_rate": 2.999997417577865e-05, "loss": 0.1639, "step": 1388 }, { "epoch": 0.030607017137946422, "grad_norm": 2.944110631942749, "learning_rate": 2.9999972151099203e-05, "loss": 0.2141, "step": 1389 }, { "epoch": 0.030629052427462583, "grad_norm": 2.9902901649475098, "learning_rate": 2.9999970050016847e-05, "loss": 0.2065, "step": 1390 }, { "epoch": 0.03065108771697874, "grad_norm": 3.27616286277771, "learning_rate": 2.9999967872531607e-05, "loss": 0.2001, "step": 1391 }, { "epoch": 0.030673123006494902, "grad_norm": 1.5432050228118896, "learning_rate": 2.9999965618643484e-05, "loss": 0.2645, "step": 1392 }, { "epoch": 0.030695158296011063, "grad_norm": 2.270176649093628, "learning_rate": 2.999996328835249e-05, "loss": 0.2445, "step": 1393 }, { "epoch": 0.03071719358552722, "grad_norm": 2.451404333114624, "learning_rate": 2.9999960881658646e-05, "loss": 0.2034, "step": 1394 }, { "epoch": 0.030739228875043382, "grad_norm": 2.09096097946167, "learning_rate": 2.9999958398561954e-05, "loss": 0.208, "step": 1395 }, { "epoch": 0.030761264164559543, "grad_norm": 2.344715118408203, "learning_rate": 2.9999955839062426e-05, "loss": 0.138, "step": 1396 }, { "epoch": 0.0307832994540757, "grad_norm": 3.4342610836029053, "learning_rate": 2.9999953203160086e-05, "loss": 0.2918, "step": 1397 }, { "epoch": 0.030805334743591862, "grad_norm": 2.8994085788726807, "learning_rate": 2.9999950490854936e-05, "loss": 0.2143, "step": 1398 }, { "epoch": 0.030827370033108023, "grad_norm": 2.632744789123535, "learning_rate": 2.9999947702147e-05, "loss": 0.2153, "step": 1399 }, { "epoch": 0.030849405322624184, "grad_norm": 1.9772915840148926, "learning_rate": 2.999994483703628e-05, "loss": 0.1873, "step": 1400 }, { "epoch": 0.030871440612140342, "grad_norm": 3.605748176574707, "learning_rate": 2.9999941895522805e-05, "loss": 0.2289, "step": 1401 }, { "epoch": 0.030893475901656503, "grad_norm": 2.3456928730010986, "learning_rate": 2.9999938877606576e-05, "loss": 0.2193, "step": 1402 }, { "epoch": 0.030915511191172664, "grad_norm": 2.8290839195251465, "learning_rate": 2.9999935783287616e-05, "loss": 0.1983, "step": 1403 }, { "epoch": 0.030937546480688822, "grad_norm": 2.686522960662842, "learning_rate": 2.999993261256594e-05, "loss": 0.2433, "step": 1404 }, { "epoch": 0.030959581770204983, "grad_norm": 2.500715732574463, "learning_rate": 2.9999929365441564e-05, "loss": 0.2254, "step": 1405 }, { "epoch": 0.030981617059721144, "grad_norm": 2.5738332271575928, "learning_rate": 2.9999926041914506e-05, "loss": 0.1936, "step": 1406 }, { "epoch": 0.031003652349237302, "grad_norm": 2.014705181121826, "learning_rate": 2.9999922641984777e-05, "loss": 0.1902, "step": 1407 }, { "epoch": 0.031025687638753463, "grad_norm": 1.4814574718475342, "learning_rate": 2.99999191656524e-05, "loss": 0.2027, "step": 1408 }, { "epoch": 0.031047722928269624, "grad_norm": 2.0942842960357666, "learning_rate": 2.9999915612917393e-05, "loss": 0.1258, "step": 1409 }, { "epoch": 0.031069758217785785, "grad_norm": 2.077263593673706, "learning_rate": 2.999991198377977e-05, "loss": 0.1816, "step": 1410 }, { "epoch": 0.031091793507301943, "grad_norm": 1.9494309425354004, "learning_rate": 2.999990827823955e-05, "loss": 0.2615, "step": 1411 }, { "epoch": 0.031113828796818104, "grad_norm": 2.8590948581695557, "learning_rate": 2.9999904496296755e-05, "loss": 0.1802, "step": 1412 }, { "epoch": 0.031135864086334265, "grad_norm": 2.1161985397338867, "learning_rate": 2.9999900637951406e-05, "loss": 0.1796, "step": 1413 }, { "epoch": 0.031157899375850423, "grad_norm": 3.4794747829437256, "learning_rate": 2.9999896703203517e-05, "loss": 0.2142, "step": 1414 }, { "epoch": 0.031179934665366584, "grad_norm": 3.7188634872436523, "learning_rate": 2.9999892692053113e-05, "loss": 0.2182, "step": 1415 }, { "epoch": 0.031201969954882745, "grad_norm": 3.6089015007019043, "learning_rate": 2.9999888604500205e-05, "loss": 0.2152, "step": 1416 }, { "epoch": 0.031224005244398906, "grad_norm": 2.5169692039489746, "learning_rate": 2.9999884440544828e-05, "loss": 0.1987, "step": 1417 }, { "epoch": 0.031246040533915064, "grad_norm": 3.125302314758301, "learning_rate": 2.999988020018699e-05, "loss": 0.2352, "step": 1418 }, { "epoch": 0.031268075823431225, "grad_norm": 2.7216687202453613, "learning_rate": 2.9999875883426726e-05, "loss": 0.2434, "step": 1419 }, { "epoch": 0.03129011111294738, "grad_norm": 2.602871894836426, "learning_rate": 2.9999871490264043e-05, "loss": 0.1915, "step": 1420 }, { "epoch": 0.03131214640246355, "grad_norm": 2.138556480407715, "learning_rate": 2.9999867020698978e-05, "loss": 0.2286, "step": 1421 }, { "epoch": 0.031334181691979705, "grad_norm": 3.2792701721191406, "learning_rate": 2.999986247473154e-05, "loss": 0.2118, "step": 1422 }, { "epoch": 0.03135621698149586, "grad_norm": 3.0996525287628174, "learning_rate": 2.9999857852361763e-05, "loss": 0.1886, "step": 1423 }, { "epoch": 0.03137825227101203, "grad_norm": 2.828702688217163, "learning_rate": 2.9999853153589667e-05, "loss": 0.2743, "step": 1424 }, { "epoch": 0.031400287560528185, "grad_norm": 2.8200645446777344, "learning_rate": 2.9999848378415274e-05, "loss": 0.2545, "step": 1425 }, { "epoch": 0.03142232285004434, "grad_norm": 2.780768632888794, "learning_rate": 2.9999843526838608e-05, "loss": 0.2153, "step": 1426 }, { "epoch": 0.03144435813956051, "grad_norm": 2.381836175918579, "learning_rate": 2.9999838598859694e-05, "loss": 0.2069, "step": 1427 }, { "epoch": 0.031466393429076665, "grad_norm": 2.9434878826141357, "learning_rate": 2.9999833594478565e-05, "loss": 0.1656, "step": 1428 }, { "epoch": 0.03148842871859282, "grad_norm": 1.975088357925415, "learning_rate": 2.9999828513695234e-05, "loss": 0.2017, "step": 1429 }, { "epoch": 0.03151046400810899, "grad_norm": 3.1371941566467285, "learning_rate": 2.9999823356509737e-05, "loss": 0.1872, "step": 1430 }, { "epoch": 0.031532499297625145, "grad_norm": 2.518996477127075, "learning_rate": 2.9999818122922092e-05, "loss": 0.1854, "step": 1431 }, { "epoch": 0.03155453458714131, "grad_norm": 3.061795473098755, "learning_rate": 2.9999812812932332e-05, "loss": 0.1954, "step": 1432 }, { "epoch": 0.03157656987665747, "grad_norm": 2.326853036880493, "learning_rate": 2.9999807426540482e-05, "loss": 0.2257, "step": 1433 }, { "epoch": 0.031598605166173625, "grad_norm": 2.298957347869873, "learning_rate": 2.999980196374657e-05, "loss": 0.2018, "step": 1434 }, { "epoch": 0.03162064045568979, "grad_norm": 2.2237815856933594, "learning_rate": 2.999979642455062e-05, "loss": 0.1875, "step": 1435 }, { "epoch": 0.03164267574520595, "grad_norm": 1.899704933166504, "learning_rate": 2.9999790808952663e-05, "loss": 0.2688, "step": 1436 }, { "epoch": 0.031664711034722105, "grad_norm": 2.656571865081787, "learning_rate": 2.9999785116952732e-05, "loss": 0.1914, "step": 1437 }, { "epoch": 0.03168674632423827, "grad_norm": 1.7956653833389282, "learning_rate": 2.999977934855085e-05, "loss": 0.152, "step": 1438 }, { "epoch": 0.03170878161375443, "grad_norm": 1.8585726022720337, "learning_rate": 2.9999773503747048e-05, "loss": 0.206, "step": 1439 }, { "epoch": 0.031730816903270585, "grad_norm": 2.9016623497009277, "learning_rate": 2.9999767582541356e-05, "loss": 0.2426, "step": 1440 }, { "epoch": 0.03175285219278675, "grad_norm": 2.104621648788452, "learning_rate": 2.9999761584933805e-05, "loss": 0.1838, "step": 1441 }, { "epoch": 0.03177488748230291, "grad_norm": 2.59928560256958, "learning_rate": 2.9999755510924423e-05, "loss": 0.2593, "step": 1442 }, { "epoch": 0.031796922771819065, "grad_norm": 2.392934799194336, "learning_rate": 2.9999749360513246e-05, "loss": 0.1761, "step": 1443 }, { "epoch": 0.03181895806133523, "grad_norm": 3.2071659564971924, "learning_rate": 2.9999743133700296e-05, "loss": 0.2041, "step": 1444 }, { "epoch": 0.03184099335085139, "grad_norm": 2.4209229946136475, "learning_rate": 2.9999736830485615e-05, "loss": 0.2228, "step": 1445 }, { "epoch": 0.031863028640367545, "grad_norm": 1.9142885208129883, "learning_rate": 2.9999730450869233e-05, "loss": 0.191, "step": 1446 }, { "epoch": 0.03188506392988371, "grad_norm": 2.0143861770629883, "learning_rate": 2.9999723994851177e-05, "loss": 0.2671, "step": 1447 }, { "epoch": 0.03190709921939987, "grad_norm": 2.161675453186035, "learning_rate": 2.9999717462431485e-05, "loss": 0.2122, "step": 1448 }, { "epoch": 0.03192913450891603, "grad_norm": 2.8800101280212402, "learning_rate": 2.9999710853610186e-05, "loss": 0.1924, "step": 1449 }, { "epoch": 0.03195116979843219, "grad_norm": 2.651376247406006, "learning_rate": 2.999970416838732e-05, "loss": 0.2195, "step": 1450 }, { "epoch": 0.03197320508794835, "grad_norm": 2.2058751583099365, "learning_rate": 2.9999697406762913e-05, "loss": 0.2129, "step": 1451 }, { "epoch": 0.03199524037746451, "grad_norm": 2.383312463760376, "learning_rate": 2.9999690568737006e-05, "loss": 0.2202, "step": 1452 }, { "epoch": 0.03201727566698067, "grad_norm": 2.482146739959717, "learning_rate": 2.999968365430963e-05, "loss": 0.2038, "step": 1453 }, { "epoch": 0.03203931095649683, "grad_norm": 2.412886381149292, "learning_rate": 2.999967666348082e-05, "loss": 0.2135, "step": 1454 }, { "epoch": 0.03206134624601299, "grad_norm": 2.9312868118286133, "learning_rate": 2.9999669596250617e-05, "loss": 0.2137, "step": 1455 }, { "epoch": 0.03208338153552915, "grad_norm": 2.339761734008789, "learning_rate": 2.9999662452619055e-05, "loss": 0.2623, "step": 1456 }, { "epoch": 0.03210541682504531, "grad_norm": 5.413787364959717, "learning_rate": 2.9999655232586164e-05, "loss": 0.1691, "step": 1457 }, { "epoch": 0.03212745211456147, "grad_norm": 4.353178024291992, "learning_rate": 2.9999647936151986e-05, "loss": 0.1913, "step": 1458 }, { "epoch": 0.03214948740407763, "grad_norm": 2.824965476989746, "learning_rate": 2.999964056331656e-05, "loss": 0.2661, "step": 1459 }, { "epoch": 0.03217152269359379, "grad_norm": 2.5731091499328613, "learning_rate": 2.999963311407992e-05, "loss": 0.2138, "step": 1460 }, { "epoch": 0.03219355798310995, "grad_norm": 1.86831533908844, "learning_rate": 2.9999625588442105e-05, "loss": 0.2305, "step": 1461 }, { "epoch": 0.03221559327262611, "grad_norm": 3.061866521835327, "learning_rate": 2.9999617986403152e-05, "loss": 0.2069, "step": 1462 }, { "epoch": 0.03223762856214227, "grad_norm": 3.5707783699035645, "learning_rate": 2.9999610307963105e-05, "loss": 0.2355, "step": 1463 }, { "epoch": 0.03225966385165843, "grad_norm": 2.3200700283050537, "learning_rate": 2.9999602553121997e-05, "loss": 0.1635, "step": 1464 }, { "epoch": 0.03228169914117459, "grad_norm": 2.9810609817504883, "learning_rate": 2.9999594721879872e-05, "loss": 0.2361, "step": 1465 }, { "epoch": 0.032303734430690755, "grad_norm": 2.1035115718841553, "learning_rate": 2.9999586814236765e-05, "loss": 0.2687, "step": 1466 }, { "epoch": 0.03232576972020691, "grad_norm": 2.453146457672119, "learning_rate": 2.999957883019272e-05, "loss": 0.2116, "step": 1467 }, { "epoch": 0.03234780500972307, "grad_norm": 2.7171037197113037, "learning_rate": 2.9999570769747772e-05, "loss": 0.1702, "step": 1468 }, { "epoch": 0.032369840299239235, "grad_norm": 2.4584619998931885, "learning_rate": 2.999956263290197e-05, "loss": 0.2857, "step": 1469 }, { "epoch": 0.03239187558875539, "grad_norm": 1.8372347354888916, "learning_rate": 2.9999554419655353e-05, "loss": 0.1687, "step": 1470 }, { "epoch": 0.03241391087827155, "grad_norm": 2.500131130218506, "learning_rate": 2.9999546130007962e-05, "loss": 0.2251, "step": 1471 }, { "epoch": 0.032435946167787715, "grad_norm": 2.4470086097717285, "learning_rate": 2.9999537763959836e-05, "loss": 0.2212, "step": 1472 }, { "epoch": 0.03245798145730387, "grad_norm": 2.4842886924743652, "learning_rate": 2.9999529321511026e-05, "loss": 0.1925, "step": 1473 }, { "epoch": 0.03248001674682003, "grad_norm": 2.4810872077941895, "learning_rate": 2.9999520802661568e-05, "loss": 0.2119, "step": 1474 }, { "epoch": 0.032502052036336194, "grad_norm": 2.918168306350708, "learning_rate": 2.9999512207411507e-05, "loss": 0.2272, "step": 1475 }, { "epoch": 0.03252408732585235, "grad_norm": 2.0136165618896484, "learning_rate": 2.9999503535760883e-05, "loss": 0.2557, "step": 1476 }, { "epoch": 0.03254612261536851, "grad_norm": 2.2484498023986816, "learning_rate": 2.999949478770975e-05, "loss": 0.2285, "step": 1477 }, { "epoch": 0.032568157904884674, "grad_norm": 2.027024984359741, "learning_rate": 2.999948596325814e-05, "loss": 0.1595, "step": 1478 }, { "epoch": 0.03259019319440083, "grad_norm": 2.947331428527832, "learning_rate": 2.9999477062406115e-05, "loss": 0.239, "step": 1479 }, { "epoch": 0.03261222848391699, "grad_norm": 2.632817506790161, "learning_rate": 2.99994680851537e-05, "loss": 0.19, "step": 1480 }, { "epoch": 0.032634263773433154, "grad_norm": 2.079902172088623, "learning_rate": 2.9999459031500956e-05, "loss": 0.1742, "step": 1481 }, { "epoch": 0.03265629906294931, "grad_norm": 1.9827262163162231, "learning_rate": 2.9999449901447923e-05, "loss": 0.1917, "step": 1482 }, { "epoch": 0.03267833435246548, "grad_norm": 3.7527973651885986, "learning_rate": 2.999944069499465e-05, "loss": 0.19, "step": 1483 }, { "epoch": 0.032700369641981634, "grad_norm": 2.2329232692718506, "learning_rate": 2.9999431412141178e-05, "loss": 0.1844, "step": 1484 }, { "epoch": 0.03272240493149779, "grad_norm": 2.394864797592163, "learning_rate": 2.999942205288756e-05, "loss": 0.2187, "step": 1485 }, { "epoch": 0.03274444022101396, "grad_norm": 2.4008142948150635, "learning_rate": 2.9999412617233843e-05, "loss": 0.2167, "step": 1486 }, { "epoch": 0.032766475510530114, "grad_norm": 1.8207476139068604, "learning_rate": 2.9999403105180073e-05, "loss": 0.1829, "step": 1487 }, { "epoch": 0.03278851080004627, "grad_norm": 2.3769166469573975, "learning_rate": 2.99993935167263e-05, "loss": 0.2313, "step": 1488 }, { "epoch": 0.03281054608956244, "grad_norm": 1.9329476356506348, "learning_rate": 2.9999383851872575e-05, "loss": 0.2226, "step": 1489 }, { "epoch": 0.032832581379078594, "grad_norm": 1.7186075448989868, "learning_rate": 2.999937411061894e-05, "loss": 0.2478, "step": 1490 }, { "epoch": 0.03285461666859475, "grad_norm": 3.4806721210479736, "learning_rate": 2.9999364292965455e-05, "loss": 0.2432, "step": 1491 }, { "epoch": 0.03287665195811092, "grad_norm": 2.5311036109924316, "learning_rate": 2.999935439891216e-05, "loss": 0.2135, "step": 1492 }, { "epoch": 0.032898687247627074, "grad_norm": 2.366364002227783, "learning_rate": 2.9999344428459107e-05, "loss": 0.2133, "step": 1493 }, { "epoch": 0.03292072253714323, "grad_norm": 2.920992374420166, "learning_rate": 2.999933438160636e-05, "loss": 0.2849, "step": 1494 }, { "epoch": 0.0329427578266594, "grad_norm": 3.1043319702148438, "learning_rate": 2.9999324258353948e-05, "loss": 0.1901, "step": 1495 }, { "epoch": 0.032964793116175554, "grad_norm": 3.132878303527832, "learning_rate": 2.9999314058701942e-05, "loss": 0.2469, "step": 1496 }, { "epoch": 0.03298682840569171, "grad_norm": 2.174670457839966, "learning_rate": 2.9999303782650384e-05, "loss": 0.1533, "step": 1497 }, { "epoch": 0.03300886369520788, "grad_norm": 2.416364908218384, "learning_rate": 2.9999293430199327e-05, "loss": 0.1862, "step": 1498 }, { "epoch": 0.033030898984724034, "grad_norm": 2.81280779838562, "learning_rate": 2.9999283001348828e-05, "loss": 0.17, "step": 1499 }, { "epoch": 0.0330529342742402, "grad_norm": 2.6933951377868652, "learning_rate": 2.9999272496098936e-05, "loss": 0.2082, "step": 1500 }, { "epoch": 0.03307496956375636, "grad_norm": 1.8237242698669434, "learning_rate": 2.9999261914449705e-05, "loss": 0.1479, "step": 1501 }, { "epoch": 0.033097004853272514, "grad_norm": 2.870042324066162, "learning_rate": 2.999925125640119e-05, "loss": 0.226, "step": 1502 }, { "epoch": 0.03311904014278868, "grad_norm": 2.047572135925293, "learning_rate": 2.9999240521953447e-05, "loss": 0.2, "step": 1503 }, { "epoch": 0.03314107543230484, "grad_norm": 2.0639894008636475, "learning_rate": 2.999922971110653e-05, "loss": 0.1835, "step": 1504 }, { "epoch": 0.033163110721820994, "grad_norm": 2.115103244781494, "learning_rate": 2.999921882386049e-05, "loss": 0.2129, "step": 1505 }, { "epoch": 0.03318514601133716, "grad_norm": 2.184892416000366, "learning_rate": 2.9999207860215387e-05, "loss": 0.2071, "step": 1506 }, { "epoch": 0.03320718130085332, "grad_norm": 2.704177141189575, "learning_rate": 2.999919682017127e-05, "loss": 0.2451, "step": 1507 }, { "epoch": 0.033229216590369474, "grad_norm": 5.858370780944824, "learning_rate": 2.9999185703728208e-05, "loss": 0.2017, "step": 1508 }, { "epoch": 0.03325125187988564, "grad_norm": 2.556501626968384, "learning_rate": 2.9999174510886246e-05, "loss": 0.2009, "step": 1509 }, { "epoch": 0.0332732871694018, "grad_norm": 2.0876235961914062, "learning_rate": 2.9999163241645447e-05, "loss": 0.1963, "step": 1510 }, { "epoch": 0.033295322458917954, "grad_norm": 2.8241279125213623, "learning_rate": 2.999915189600586e-05, "loss": 0.2241, "step": 1511 }, { "epoch": 0.03331735774843412, "grad_norm": 2.092841386795044, "learning_rate": 2.999914047396756e-05, "loss": 0.1832, "step": 1512 }, { "epoch": 0.03333939303795028, "grad_norm": 1.9143059253692627, "learning_rate": 2.999912897553058e-05, "loss": 0.1326, "step": 1513 }, { "epoch": 0.033361428327466434, "grad_norm": 2.887096405029297, "learning_rate": 2.9999117400695006e-05, "loss": 0.2586, "step": 1514 }, { "epoch": 0.0333834636169826, "grad_norm": 2.3843276500701904, "learning_rate": 2.9999105749460883e-05, "loss": 0.2402, "step": 1515 }, { "epoch": 0.03340549890649876, "grad_norm": 1.6576262712478638, "learning_rate": 2.9999094021828265e-05, "loss": 0.1856, "step": 1516 }, { "epoch": 0.033427534196014914, "grad_norm": 2.479098320007324, "learning_rate": 2.9999082217797222e-05, "loss": 0.2127, "step": 1517 }, { "epoch": 0.03344956948553108, "grad_norm": 2.318106174468994, "learning_rate": 2.999907033736781e-05, "loss": 0.2077, "step": 1518 }, { "epoch": 0.03347160477504724, "grad_norm": 2.4967381954193115, "learning_rate": 2.9999058380540086e-05, "loss": 0.2151, "step": 1519 }, { "epoch": 0.0334936400645634, "grad_norm": 2.3221471309661865, "learning_rate": 2.9999046347314118e-05, "loss": 0.1535, "step": 1520 }, { "epoch": 0.03351567535407956, "grad_norm": 1.9692414999008179, "learning_rate": 2.9999034237689962e-05, "loss": 0.1625, "step": 1521 }, { "epoch": 0.03353771064359572, "grad_norm": 2.1885478496551514, "learning_rate": 2.9999022051667684e-05, "loss": 0.1986, "step": 1522 }, { "epoch": 0.03355974593311188, "grad_norm": 1.7037841081619263, "learning_rate": 2.999900978924734e-05, "loss": 0.1819, "step": 1523 }, { "epoch": 0.03358178122262804, "grad_norm": 2.8930468559265137, "learning_rate": 2.9998997450429e-05, "loss": 0.1589, "step": 1524 }, { "epoch": 0.0336038165121442, "grad_norm": 3.3246591091156006, "learning_rate": 2.999898503521272e-05, "loss": 0.1838, "step": 1525 }, { "epoch": 0.03362585180166036, "grad_norm": 2.571681022644043, "learning_rate": 2.9998972543598565e-05, "loss": 0.1669, "step": 1526 }, { "epoch": 0.03364788709117652, "grad_norm": 2.339069366455078, "learning_rate": 2.9998959975586605e-05, "loss": 0.1568, "step": 1527 }, { "epoch": 0.03366992238069268, "grad_norm": 2.6737945079803467, "learning_rate": 2.9998947331176893e-05, "loss": 0.1861, "step": 1528 }, { "epoch": 0.03369195767020884, "grad_norm": 2.3925397396087646, "learning_rate": 2.99989346103695e-05, "loss": 0.1609, "step": 1529 }, { "epoch": 0.033713992959725, "grad_norm": 2.1816043853759766, "learning_rate": 2.9998921813164493e-05, "loss": 0.1886, "step": 1530 }, { "epoch": 0.03373602824924116, "grad_norm": 2.411606788635254, "learning_rate": 2.9998908939561932e-05, "loss": 0.2038, "step": 1531 }, { "epoch": 0.03375806353875732, "grad_norm": 2.035348892211914, "learning_rate": 2.9998895989561885e-05, "loss": 0.1774, "step": 1532 }, { "epoch": 0.03378009882827348, "grad_norm": 2.665674924850464, "learning_rate": 2.999888296316442e-05, "loss": 0.1923, "step": 1533 }, { "epoch": 0.03380213411778964, "grad_norm": 2.4880332946777344, "learning_rate": 2.99988698603696e-05, "loss": 0.2063, "step": 1534 }, { "epoch": 0.0338241694073058, "grad_norm": 2.979024887084961, "learning_rate": 2.999885668117749e-05, "loss": 0.1738, "step": 1535 }, { "epoch": 0.03384620469682196, "grad_norm": 2.5971665382385254, "learning_rate": 2.9998843425588163e-05, "loss": 0.2113, "step": 1536 }, { "epoch": 0.033868239986338124, "grad_norm": 2.1794075965881348, "learning_rate": 2.9998830093601684e-05, "loss": 0.2547, "step": 1537 }, { "epoch": 0.03389027527585428, "grad_norm": 2.6802239418029785, "learning_rate": 2.9998816685218113e-05, "loss": 0.1941, "step": 1538 }, { "epoch": 0.03391231056537044, "grad_norm": 2.7686820030212402, "learning_rate": 2.9998803200437536e-05, "loss": 0.1642, "step": 1539 }, { "epoch": 0.033934345854886604, "grad_norm": 2.3543784618377686, "learning_rate": 2.9998789639260005e-05, "loss": 0.2091, "step": 1540 }, { "epoch": 0.03395638114440276, "grad_norm": 1.9149447679519653, "learning_rate": 2.9998776001685598e-05, "loss": 0.1759, "step": 1541 }, { "epoch": 0.03397841643391892, "grad_norm": 2.2423651218414307, "learning_rate": 2.999876228771438e-05, "loss": 0.1783, "step": 1542 }, { "epoch": 0.034000451723435084, "grad_norm": 2.167536497116089, "learning_rate": 2.9998748497346426e-05, "loss": 0.2031, "step": 1543 }, { "epoch": 0.03402248701295124, "grad_norm": 6.410635471343994, "learning_rate": 2.9998734630581793e-05, "loss": 0.2582, "step": 1544 }, { "epoch": 0.0340445223024674, "grad_norm": 2.4739060401916504, "learning_rate": 2.9998720687420572e-05, "loss": 0.1445, "step": 1545 }, { "epoch": 0.034066557591983564, "grad_norm": 1.9970877170562744, "learning_rate": 2.999870666786282e-05, "loss": 0.2423, "step": 1546 }, { "epoch": 0.03408859288149972, "grad_norm": 2.997537612915039, "learning_rate": 2.9998692571908612e-05, "loss": 0.2182, "step": 1547 }, { "epoch": 0.03411062817101588, "grad_norm": 3.2771565914154053, "learning_rate": 2.9998678399558022e-05, "loss": 0.197, "step": 1548 }, { "epoch": 0.034132663460532044, "grad_norm": 1.996707797050476, "learning_rate": 2.999866415081112e-05, "loss": 0.2214, "step": 1549 }, { "epoch": 0.0341546987500482, "grad_norm": 2.909700632095337, "learning_rate": 2.9998649825667976e-05, "loss": 0.203, "step": 1550 }, { "epoch": 0.03417673403956436, "grad_norm": 1.9393125772476196, "learning_rate": 2.9998635424128665e-05, "loss": 0.2343, "step": 1551 }, { "epoch": 0.034198769329080524, "grad_norm": 2.551891803741455, "learning_rate": 2.999862094619326e-05, "loss": 0.1898, "step": 1552 }, { "epoch": 0.03422080461859668, "grad_norm": 2.993811845779419, "learning_rate": 2.9998606391861837e-05, "loss": 0.2065, "step": 1553 }, { "epoch": 0.034242839908112846, "grad_norm": 2.132248878479004, "learning_rate": 2.9998591761134472e-05, "loss": 0.2138, "step": 1554 }, { "epoch": 0.034264875197629004, "grad_norm": 2.093893051147461, "learning_rate": 2.9998577054011237e-05, "loss": 0.195, "step": 1555 }, { "epoch": 0.03428691048714516, "grad_norm": 2.457646608352661, "learning_rate": 2.99985622704922e-05, "loss": 0.2049, "step": 1556 }, { "epoch": 0.034308945776661326, "grad_norm": 1.9600709676742554, "learning_rate": 2.999854741057745e-05, "loss": 0.1964, "step": 1557 }, { "epoch": 0.034330981066177484, "grad_norm": 3.1462275981903076, "learning_rate": 2.9998532474267047e-05, "loss": 0.2618, "step": 1558 }, { "epoch": 0.03435301635569364, "grad_norm": 2.676966905593872, "learning_rate": 2.9998517461561078e-05, "loss": 0.214, "step": 1559 }, { "epoch": 0.034375051645209806, "grad_norm": 3.413569211959839, "learning_rate": 2.999850237245962e-05, "loss": 0.1982, "step": 1560 }, { "epoch": 0.034397086934725964, "grad_norm": 2.136289358139038, "learning_rate": 2.999848720696274e-05, "loss": 0.1975, "step": 1561 }, { "epoch": 0.03441912222424212, "grad_norm": 2.1256179809570312, "learning_rate": 2.9998471965070527e-05, "loss": 0.1436, "step": 1562 }, { "epoch": 0.034441157513758286, "grad_norm": 2.206298351287842, "learning_rate": 2.9998456646783056e-05, "loss": 0.1795, "step": 1563 }, { "epoch": 0.034463192803274444, "grad_norm": 2.6175575256347656, "learning_rate": 2.99984412521004e-05, "loss": 0.197, "step": 1564 }, { "epoch": 0.0344852280927906, "grad_norm": 1.9785828590393066, "learning_rate": 2.999842578102264e-05, "loss": 0.1646, "step": 1565 }, { "epoch": 0.034507263382306766, "grad_norm": 2.5134499073028564, "learning_rate": 2.999841023354985e-05, "loss": 0.1792, "step": 1566 }, { "epoch": 0.034529298671822924, "grad_norm": 2.46706485748291, "learning_rate": 2.999839460968212e-05, "loss": 0.2494, "step": 1567 }, { "epoch": 0.03455133396133908, "grad_norm": 2.722097635269165, "learning_rate": 2.999837890941952e-05, "loss": 0.1896, "step": 1568 }, { "epoch": 0.034573369250855246, "grad_norm": 2.525578498840332, "learning_rate": 2.9998363132762134e-05, "loss": 0.1611, "step": 1569 }, { "epoch": 0.034595404540371404, "grad_norm": 2.874803066253662, "learning_rate": 2.9998347279710045e-05, "loss": 0.2145, "step": 1570 }, { "epoch": 0.03461743982988757, "grad_norm": 1.9391862154006958, "learning_rate": 2.9998331350263325e-05, "loss": 0.2249, "step": 1571 }, { "epoch": 0.034639475119403726, "grad_norm": 2.658616781234741, "learning_rate": 2.9998315344422063e-05, "loss": 0.2382, "step": 1572 }, { "epoch": 0.034661510408919884, "grad_norm": 2.6060638427734375, "learning_rate": 2.999829926218634e-05, "loss": 0.2059, "step": 1573 }, { "epoch": 0.03468354569843605, "grad_norm": 4.029916763305664, "learning_rate": 2.9998283103556233e-05, "loss": 0.1714, "step": 1574 }, { "epoch": 0.034705580987952206, "grad_norm": 2.4589786529541016, "learning_rate": 2.9998266868531827e-05, "loss": 0.1478, "step": 1575 }, { "epoch": 0.034727616277468364, "grad_norm": 1.9026607275009155, "learning_rate": 2.999825055711321e-05, "loss": 0.2275, "step": 1576 }, { "epoch": 0.03474965156698453, "grad_norm": 2.1233432292938232, "learning_rate": 2.999823416930046e-05, "loss": 0.218, "step": 1577 }, { "epoch": 0.034771686856500686, "grad_norm": 2.1926474571228027, "learning_rate": 2.9998217705093658e-05, "loss": 0.1844, "step": 1578 }, { "epoch": 0.034793722146016844, "grad_norm": 3.1858415603637695, "learning_rate": 2.999820116449289e-05, "loss": 0.2449, "step": 1579 }, { "epoch": 0.03481575743553301, "grad_norm": 1.6996924877166748, "learning_rate": 2.999818454749824e-05, "loss": 0.2339, "step": 1580 }, { "epoch": 0.034837792725049166, "grad_norm": 1.9185471534729004, "learning_rate": 2.9998167854109796e-05, "loss": 0.2235, "step": 1581 }, { "epoch": 0.034859828014565324, "grad_norm": 1.7647584676742554, "learning_rate": 2.999815108432764e-05, "loss": 0.206, "step": 1582 }, { "epoch": 0.03488186330408149, "grad_norm": 2.1185717582702637, "learning_rate": 2.9998134238151854e-05, "loss": 0.1764, "step": 1583 }, { "epoch": 0.034903898593597646, "grad_norm": 2.1211822032928467, "learning_rate": 2.9998117315582538e-05, "loss": 0.2086, "step": 1584 }, { "epoch": 0.034925933883113804, "grad_norm": 2.537855863571167, "learning_rate": 2.999810031661976e-05, "loss": 0.1883, "step": 1585 }, { "epoch": 0.03494796917262997, "grad_norm": 2.1984035968780518, "learning_rate": 2.9998083241263613e-05, "loss": 0.2098, "step": 1586 }, { "epoch": 0.034970004462146126, "grad_norm": 2.1225147247314453, "learning_rate": 2.9998066089514188e-05, "loss": 0.1888, "step": 1587 }, { "epoch": 0.03499203975166229, "grad_norm": 2.324681043624878, "learning_rate": 2.999804886137157e-05, "loss": 0.1722, "step": 1588 }, { "epoch": 0.03501407504117845, "grad_norm": 2.2844536304473877, "learning_rate": 2.9998031556835846e-05, "loss": 0.2006, "step": 1589 }, { "epoch": 0.035036110330694606, "grad_norm": 2.1110339164733887, "learning_rate": 2.99980141759071e-05, "loss": 0.1619, "step": 1590 }, { "epoch": 0.03505814562021077, "grad_norm": 2.9564430713653564, "learning_rate": 2.9997996718585434e-05, "loss": 0.1935, "step": 1591 }, { "epoch": 0.03508018090972693, "grad_norm": 2.66178822517395, "learning_rate": 2.9997979184870927e-05, "loss": 0.1862, "step": 1592 }, { "epoch": 0.035102216199243086, "grad_norm": 2.3400204181671143, "learning_rate": 2.9997961574763666e-05, "loss": 0.1816, "step": 1593 }, { "epoch": 0.03512425148875925, "grad_norm": 1.7698872089385986, "learning_rate": 2.9997943888263744e-05, "loss": 0.1793, "step": 1594 }, { "epoch": 0.03514628677827541, "grad_norm": 2.676933526992798, "learning_rate": 2.9997926125371255e-05, "loss": 0.2208, "step": 1595 }, { "epoch": 0.035168322067791566, "grad_norm": 4.777138710021973, "learning_rate": 2.999790828608628e-05, "loss": 0.211, "step": 1596 }, { "epoch": 0.03519035735730773, "grad_norm": 2.3743579387664795, "learning_rate": 2.999789037040892e-05, "loss": 0.2162, "step": 1597 }, { "epoch": 0.03521239264682389, "grad_norm": 1.5839550495147705, "learning_rate": 2.999787237833926e-05, "loss": 0.1876, "step": 1598 }, { "epoch": 0.035234427936340046, "grad_norm": 1.645933985710144, "learning_rate": 2.9997854309877395e-05, "loss": 0.1734, "step": 1599 }, { "epoch": 0.03525646322585621, "grad_norm": 2.9945383071899414, "learning_rate": 2.9997836165023412e-05, "loss": 0.154, "step": 1600 }, { "epoch": 0.03527849851537237, "grad_norm": 1.3561333417892456, "learning_rate": 2.999781794377741e-05, "loss": 0.1886, "step": 1601 }, { "epoch": 0.035300533804888526, "grad_norm": 2.0744688510894775, "learning_rate": 2.999779964613948e-05, "loss": 0.2059, "step": 1602 }, { "epoch": 0.03532256909440469, "grad_norm": 2.462268352508545, "learning_rate": 2.9997781272109708e-05, "loss": 0.1538, "step": 1603 }, { "epoch": 0.03534460438392085, "grad_norm": 2.1622564792633057, "learning_rate": 2.9997762821688196e-05, "loss": 0.1999, "step": 1604 }, { "epoch": 0.03536663967343701, "grad_norm": 2.4369328022003174, "learning_rate": 2.999774429487504e-05, "loss": 0.1529, "step": 1605 }, { "epoch": 0.03538867496295317, "grad_norm": 2.557490587234497, "learning_rate": 2.9997725691670328e-05, "loss": 0.2008, "step": 1606 }, { "epoch": 0.03541071025246933, "grad_norm": 2.433790922164917, "learning_rate": 2.9997707012074155e-05, "loss": 0.204, "step": 1607 }, { "epoch": 0.03543274554198549, "grad_norm": 2.989246368408203, "learning_rate": 2.9997688256086618e-05, "loss": 0.2435, "step": 1608 }, { "epoch": 0.03545478083150165, "grad_norm": 1.8058068752288818, "learning_rate": 2.9997669423707813e-05, "loss": 0.2034, "step": 1609 }, { "epoch": 0.03547681612101781, "grad_norm": 2.829294443130493, "learning_rate": 2.9997650514937833e-05, "loss": 0.2285, "step": 1610 }, { "epoch": 0.03549885141053397, "grad_norm": 1.9792448282241821, "learning_rate": 2.999763152977678e-05, "loss": 0.1993, "step": 1611 }, { "epoch": 0.03552088670005013, "grad_norm": 2.641970634460449, "learning_rate": 2.9997612468224745e-05, "loss": 0.243, "step": 1612 }, { "epoch": 0.03554292198956629, "grad_norm": 1.9830713272094727, "learning_rate": 2.999759333028183e-05, "loss": 0.1538, "step": 1613 }, { "epoch": 0.03556495727908245, "grad_norm": 1.7239954471588135, "learning_rate": 2.9997574115948125e-05, "loss": 0.2205, "step": 1614 }, { "epoch": 0.03558699256859861, "grad_norm": 2.4209420680999756, "learning_rate": 2.999755482522374e-05, "loss": 0.2406, "step": 1615 }, { "epoch": 0.03560902785811477, "grad_norm": 2.6431448459625244, "learning_rate": 2.999753545810876e-05, "loss": 0.226, "step": 1616 }, { "epoch": 0.03563106314763093, "grad_norm": 1.8665324449539185, "learning_rate": 2.9997516014603288e-05, "loss": 0.1887, "step": 1617 }, { "epoch": 0.03565309843714709, "grad_norm": 1.517804503440857, "learning_rate": 2.999749649470743e-05, "loss": 0.2234, "step": 1618 }, { "epoch": 0.03567513372666325, "grad_norm": 1.6408882141113281, "learning_rate": 2.9997476898421277e-05, "loss": 0.1388, "step": 1619 }, { "epoch": 0.03569716901617941, "grad_norm": 2.2626736164093018, "learning_rate": 2.9997457225744934e-05, "loss": 0.2228, "step": 1620 }, { "epoch": 0.03571920430569557, "grad_norm": 1.8494712114334106, "learning_rate": 2.9997437476678497e-05, "loss": 0.1627, "step": 1621 }, { "epoch": 0.03574123959521173, "grad_norm": 2.783003807067871, "learning_rate": 2.999741765122207e-05, "loss": 0.2045, "step": 1622 }, { "epoch": 0.03576327488472789, "grad_norm": 2.241934299468994, "learning_rate": 2.999739774937575e-05, "loss": 0.2266, "step": 1623 }, { "epoch": 0.03578531017424405, "grad_norm": 2.355060338973999, "learning_rate": 2.9997377771139644e-05, "loss": 0.1873, "step": 1624 }, { "epoch": 0.035807345463760215, "grad_norm": 2.5657975673675537, "learning_rate": 2.999735771651385e-05, "loss": 0.2006, "step": 1625 }, { "epoch": 0.03582938075327637, "grad_norm": 1.6966972351074219, "learning_rate": 2.999733758549847e-05, "loss": 0.2496, "step": 1626 }, { "epoch": 0.03585141604279253, "grad_norm": 1.9401682615280151, "learning_rate": 2.9997317378093608e-05, "loss": 0.2247, "step": 1627 }, { "epoch": 0.035873451332308695, "grad_norm": 2.5192830562591553, "learning_rate": 2.9997297094299368e-05, "loss": 0.2776, "step": 1628 }, { "epoch": 0.03589548662182485, "grad_norm": 2.354856252670288, "learning_rate": 2.999727673411585e-05, "loss": 0.1746, "step": 1629 }, { "epoch": 0.03591752191134101, "grad_norm": 2.7448816299438477, "learning_rate": 2.999725629754316e-05, "loss": 0.1822, "step": 1630 }, { "epoch": 0.035939557200857175, "grad_norm": 2.396585702896118, "learning_rate": 2.99972357845814e-05, "loss": 0.1771, "step": 1631 }, { "epoch": 0.03596159249037333, "grad_norm": 2.4289021492004395, "learning_rate": 2.9997215195230675e-05, "loss": 0.2052, "step": 1632 }, { "epoch": 0.03598362777988949, "grad_norm": 2.4640285968780518, "learning_rate": 2.9997194529491095e-05, "loss": 0.3123, "step": 1633 }, { "epoch": 0.036005663069405655, "grad_norm": 1.7855608463287354, "learning_rate": 2.999717378736276e-05, "loss": 0.1786, "step": 1634 }, { "epoch": 0.03602769835892181, "grad_norm": 3.734280824661255, "learning_rate": 2.999715296884578e-05, "loss": 0.24, "step": 1635 }, { "epoch": 0.03604973364843797, "grad_norm": 2.9434471130371094, "learning_rate": 2.9997132073940252e-05, "loss": 0.2169, "step": 1636 }, { "epoch": 0.036071768937954135, "grad_norm": 2.010932445526123, "learning_rate": 2.9997111102646286e-05, "loss": 0.2025, "step": 1637 }, { "epoch": 0.03609380422747029, "grad_norm": 1.9630963802337646, "learning_rate": 2.9997090054963996e-05, "loss": 0.1741, "step": 1638 }, { "epoch": 0.03611583951698645, "grad_norm": 2.2611382007598877, "learning_rate": 2.9997068930893485e-05, "loss": 0.2198, "step": 1639 }, { "epoch": 0.036137874806502615, "grad_norm": 1.3546634912490845, "learning_rate": 2.999704773043486e-05, "loss": 0.1788, "step": 1640 }, { "epoch": 0.03615991009601877, "grad_norm": 2.634770631790161, "learning_rate": 2.9997026453588226e-05, "loss": 0.2709, "step": 1641 }, { "epoch": 0.03618194538553494, "grad_norm": 1.6034694910049438, "learning_rate": 2.9997005100353693e-05, "loss": 0.21, "step": 1642 }, { "epoch": 0.036203980675051095, "grad_norm": 3.50986647605896, "learning_rate": 2.999698367073138e-05, "loss": 0.2383, "step": 1643 }, { "epoch": 0.03622601596456725, "grad_norm": 1.7846318483352661, "learning_rate": 2.9996962164721376e-05, "loss": 0.2021, "step": 1644 }, { "epoch": 0.03624805125408342, "grad_norm": 2.564774990081787, "learning_rate": 2.9996940582323805e-05, "loss": 0.235, "step": 1645 }, { "epoch": 0.036270086543599575, "grad_norm": 2.5851361751556396, "learning_rate": 2.9996918923538776e-05, "loss": 0.2094, "step": 1646 }, { "epoch": 0.03629212183311573, "grad_norm": 2.326569080352783, "learning_rate": 2.9996897188366395e-05, "loss": 0.223, "step": 1647 }, { "epoch": 0.0363141571226319, "grad_norm": 2.2413008213043213, "learning_rate": 2.9996875376806776e-05, "loss": 0.1765, "step": 1648 }, { "epoch": 0.036336192412148055, "grad_norm": 2.35068941116333, "learning_rate": 2.9996853488860026e-05, "loss": 0.1864, "step": 1649 }, { "epoch": 0.03635822770166421, "grad_norm": 2.1763100624084473, "learning_rate": 2.9996831524526263e-05, "loss": 0.198, "step": 1650 }, { "epoch": 0.03638026299118038, "grad_norm": 2.7527177333831787, "learning_rate": 2.9996809483805594e-05, "loss": 0.2402, "step": 1651 }, { "epoch": 0.036402298280696535, "grad_norm": 1.9027599096298218, "learning_rate": 2.9996787366698132e-05, "loss": 0.1836, "step": 1652 }, { "epoch": 0.03642433357021269, "grad_norm": 3.304931163787842, "learning_rate": 2.999676517320399e-05, "loss": 0.2372, "step": 1653 }, { "epoch": 0.03644636885972886, "grad_norm": 2.6939873695373535, "learning_rate": 2.999674290332328e-05, "loss": 0.1979, "step": 1654 }, { "epoch": 0.036468404149245015, "grad_norm": 2.0473389625549316, "learning_rate": 2.9996720557056115e-05, "loss": 0.2359, "step": 1655 }, { "epoch": 0.03649043943876117, "grad_norm": 2.1743133068084717, "learning_rate": 2.9996698134402616e-05, "loss": 0.1715, "step": 1656 }, { "epoch": 0.03651247472827734, "grad_norm": 2.1522767543792725, "learning_rate": 2.999667563536289e-05, "loss": 0.1893, "step": 1657 }, { "epoch": 0.036534510017793495, "grad_norm": 1.5079511404037476, "learning_rate": 2.999665305993705e-05, "loss": 0.1925, "step": 1658 }, { "epoch": 0.03655654530730966, "grad_norm": 2.483748435974121, "learning_rate": 2.9996630408125216e-05, "loss": 0.181, "step": 1659 }, { "epoch": 0.03657858059682582, "grad_norm": 2.0336618423461914, "learning_rate": 2.9996607679927498e-05, "loss": 0.1934, "step": 1660 }, { "epoch": 0.036600615886341975, "grad_norm": 2.449723958969116, "learning_rate": 2.999658487534402e-05, "loss": 0.2207, "step": 1661 }, { "epoch": 0.03662265117585814, "grad_norm": 2.361534595489502, "learning_rate": 2.999656199437489e-05, "loss": 0.2197, "step": 1662 }, { "epoch": 0.0366446864653743, "grad_norm": 2.84956693649292, "learning_rate": 2.9996539037020232e-05, "loss": 0.1411, "step": 1663 }, { "epoch": 0.036666721754890455, "grad_norm": 2.493288278579712, "learning_rate": 2.9996516003280156e-05, "loss": 0.1589, "step": 1664 }, { "epoch": 0.03668875704440662, "grad_norm": 1.685049295425415, "learning_rate": 2.9996492893154784e-05, "loss": 0.2013, "step": 1665 }, { "epoch": 0.03671079233392278, "grad_norm": 2.0369791984558105, "learning_rate": 2.9996469706644228e-05, "loss": 0.209, "step": 1666 }, { "epoch": 0.036732827623438935, "grad_norm": 2.961378335952759, "learning_rate": 2.9996446443748613e-05, "loss": 0.1827, "step": 1667 }, { "epoch": 0.0367548629129551, "grad_norm": 1.7055730819702148, "learning_rate": 2.9996423104468056e-05, "loss": 0.1982, "step": 1668 }, { "epoch": 0.03677689820247126, "grad_norm": 2.806504964828491, "learning_rate": 2.9996399688802673e-05, "loss": 0.2454, "step": 1669 }, { "epoch": 0.036798933491987415, "grad_norm": 2.8478572368621826, "learning_rate": 2.9996376196752585e-05, "loss": 0.1896, "step": 1670 }, { "epoch": 0.03682096878150358, "grad_norm": 4.381589889526367, "learning_rate": 2.999635262831791e-05, "loss": 0.234, "step": 1671 }, { "epoch": 0.03684300407101974, "grad_norm": 3.359956741333008, "learning_rate": 2.999632898349877e-05, "loss": 0.255, "step": 1672 }, { "epoch": 0.036865039360535895, "grad_norm": 2.2975027561187744, "learning_rate": 2.9996305262295282e-05, "loss": 0.2349, "step": 1673 }, { "epoch": 0.03688707465005206, "grad_norm": 2.1223015785217285, "learning_rate": 2.9996281464707575e-05, "loss": 0.1905, "step": 1674 }, { "epoch": 0.03690910993956822, "grad_norm": 2.1100730895996094, "learning_rate": 2.999625759073576e-05, "loss": 0.2273, "step": 1675 }, { "epoch": 0.03693114522908438, "grad_norm": 3.024836778640747, "learning_rate": 2.999623364037997e-05, "loss": 0.216, "step": 1676 }, { "epoch": 0.03695318051860054, "grad_norm": 1.9085707664489746, "learning_rate": 2.9996209613640314e-05, "loss": 0.1947, "step": 1677 }, { "epoch": 0.0369752158081167, "grad_norm": 3.273733377456665, "learning_rate": 2.9996185510516923e-05, "loss": 0.2328, "step": 1678 }, { "epoch": 0.03699725109763286, "grad_norm": 1.5207520723342896, "learning_rate": 2.9996161331009918e-05, "loss": 0.1808, "step": 1679 }, { "epoch": 0.03701928638714902, "grad_norm": 2.8331446647644043, "learning_rate": 2.9996137075119422e-05, "loss": 0.1833, "step": 1680 }, { "epoch": 0.03704132167666518, "grad_norm": 2.055053949356079, "learning_rate": 2.9996112742845562e-05, "loss": 0.1375, "step": 1681 }, { "epoch": 0.03706335696618134, "grad_norm": 4.272768974304199, "learning_rate": 2.9996088334188453e-05, "loss": 0.215, "step": 1682 }, { "epoch": 0.0370853922556975, "grad_norm": 1.297680377960205, "learning_rate": 2.9996063849148223e-05, "loss": 0.1671, "step": 1683 }, { "epoch": 0.03710742754521366, "grad_norm": 1.993939757347107, "learning_rate": 2.9996039287725002e-05, "loss": 0.1843, "step": 1684 }, { "epoch": 0.03712946283472982, "grad_norm": 2.785531759262085, "learning_rate": 2.999601464991891e-05, "loss": 0.2057, "step": 1685 }, { "epoch": 0.03715149812424598, "grad_norm": 2.0664141178131104, "learning_rate": 2.9995989935730072e-05, "loss": 0.1818, "step": 1686 }, { "epoch": 0.03717353341376214, "grad_norm": 1.9082000255584717, "learning_rate": 2.999596514515862e-05, "loss": 0.1636, "step": 1687 }, { "epoch": 0.0371955687032783, "grad_norm": 2.914585828781128, "learning_rate": 2.9995940278204673e-05, "loss": 0.1933, "step": 1688 }, { "epoch": 0.03721760399279446, "grad_norm": 1.8582422733306885, "learning_rate": 2.9995915334868363e-05, "loss": 0.2181, "step": 1689 }, { "epoch": 0.03723963928231062, "grad_norm": 3.043696403503418, "learning_rate": 2.999589031514981e-05, "loss": 0.1934, "step": 1690 }, { "epoch": 0.03726167457182678, "grad_norm": 1.6540178060531616, "learning_rate": 2.9995865219049154e-05, "loss": 0.1815, "step": 1691 }, { "epoch": 0.03728370986134294, "grad_norm": 2.3543310165405273, "learning_rate": 2.9995840046566508e-05, "loss": 0.2031, "step": 1692 }, { "epoch": 0.037305745150859104, "grad_norm": 2.078350067138672, "learning_rate": 2.999581479770201e-05, "loss": 0.2398, "step": 1693 }, { "epoch": 0.03732778044037526, "grad_norm": 2.719982385635376, "learning_rate": 2.999578947245579e-05, "loss": 0.2159, "step": 1694 }, { "epoch": 0.03734981572989142, "grad_norm": 1.8577003479003906, "learning_rate": 2.999576407082797e-05, "loss": 0.2019, "step": 1695 }, { "epoch": 0.037371851019407584, "grad_norm": 3.249674081802368, "learning_rate": 2.999573859281868e-05, "loss": 0.2315, "step": 1696 }, { "epoch": 0.03739388630892374, "grad_norm": 1.8523080348968506, "learning_rate": 2.999571303842805e-05, "loss": 0.1919, "step": 1697 }, { "epoch": 0.0374159215984399, "grad_norm": 2.7555742263793945, "learning_rate": 2.9995687407656215e-05, "loss": 0.1998, "step": 1698 }, { "epoch": 0.037437956887956064, "grad_norm": 1.8347655534744263, "learning_rate": 2.9995661700503306e-05, "loss": 0.1919, "step": 1699 }, { "epoch": 0.03745999217747222, "grad_norm": 1.4768139123916626, "learning_rate": 2.9995635916969443e-05, "loss": 0.1749, "step": 1700 }, { "epoch": 0.03748202746698838, "grad_norm": 2.2201437950134277, "learning_rate": 2.9995610057054772e-05, "loss": 0.1978, "step": 1701 }, { "epoch": 0.037504062756504544, "grad_norm": 2.4133975505828857, "learning_rate": 2.9995584120759415e-05, "loss": 0.2302, "step": 1702 }, { "epoch": 0.0375260980460207, "grad_norm": 2.661372423171997, "learning_rate": 2.9995558108083508e-05, "loss": 0.2025, "step": 1703 }, { "epoch": 0.03754813333553686, "grad_norm": 2.1492133140563965, "learning_rate": 2.999553201902718e-05, "loss": 0.1956, "step": 1704 }, { "epoch": 0.037570168625053024, "grad_norm": 2.2307424545288086, "learning_rate": 2.9995505853590566e-05, "loss": 0.2335, "step": 1705 }, { "epoch": 0.03759220391456918, "grad_norm": 1.9147117137908936, "learning_rate": 2.9995479611773805e-05, "loss": 0.21, "step": 1706 }, { "epoch": 0.03761423920408534, "grad_norm": 1.7811315059661865, "learning_rate": 2.9995453293577014e-05, "loss": 0.1756, "step": 1707 }, { "epoch": 0.037636274493601504, "grad_norm": 2.7168402671813965, "learning_rate": 2.999542689900035e-05, "loss": 0.1853, "step": 1708 }, { "epoch": 0.03765830978311766, "grad_norm": 2.5083963871002197, "learning_rate": 2.999540042804393e-05, "loss": 0.2082, "step": 1709 }, { "epoch": 0.03768034507263382, "grad_norm": 2.1590609550476074, "learning_rate": 2.9995373880707894e-05, "loss": 0.235, "step": 1710 }, { "epoch": 0.037702380362149984, "grad_norm": 3.0626611709594727, "learning_rate": 2.999534725699238e-05, "loss": 0.2074, "step": 1711 }, { "epoch": 0.03772441565166614, "grad_norm": 2.7612500190734863, "learning_rate": 2.999532055689752e-05, "loss": 0.1777, "step": 1712 }, { "epoch": 0.03774645094118231, "grad_norm": 2.863628387451172, "learning_rate": 2.999529378042345e-05, "loss": 0.2227, "step": 1713 }, { "epoch": 0.037768486230698464, "grad_norm": 2.389066696166992, "learning_rate": 2.9995266927570308e-05, "loss": 0.2003, "step": 1714 }, { "epoch": 0.03779052152021462, "grad_norm": 3.017984628677368, "learning_rate": 2.9995239998338233e-05, "loss": 0.2368, "step": 1715 }, { "epoch": 0.03781255680973079, "grad_norm": 1.52202570438385, "learning_rate": 2.9995212992727357e-05, "loss": 0.2052, "step": 1716 }, { "epoch": 0.037834592099246944, "grad_norm": 2.4514169692993164, "learning_rate": 2.9995185910737824e-05, "loss": 0.177, "step": 1717 }, { "epoch": 0.0378566273887631, "grad_norm": 2.354659080505371, "learning_rate": 2.9995158752369764e-05, "loss": 0.1903, "step": 1718 }, { "epoch": 0.037878662678279267, "grad_norm": 2.0171995162963867, "learning_rate": 2.999513151762332e-05, "loss": 0.1314, "step": 1719 }, { "epoch": 0.037900697967795424, "grad_norm": 1.8815269470214844, "learning_rate": 2.999510420649863e-05, "loss": 0.2326, "step": 1720 }, { "epoch": 0.03792273325731158, "grad_norm": 1.8228734731674194, "learning_rate": 2.9995076818995836e-05, "loss": 0.2088, "step": 1721 }, { "epoch": 0.037944768546827747, "grad_norm": 1.9612154960632324, "learning_rate": 2.9995049355115074e-05, "loss": 0.2209, "step": 1722 }, { "epoch": 0.037966803836343904, "grad_norm": 1.8684133291244507, "learning_rate": 2.9995021814856484e-05, "loss": 0.204, "step": 1723 }, { "epoch": 0.03798883912586006, "grad_norm": 4.149603366851807, "learning_rate": 2.9994994198220204e-05, "loss": 0.2078, "step": 1724 }, { "epoch": 0.038010874415376227, "grad_norm": 2.686951160430908, "learning_rate": 2.999496650520638e-05, "loss": 0.2592, "step": 1725 }, { "epoch": 0.038032909704892384, "grad_norm": 2.62905216217041, "learning_rate": 2.9994938735815153e-05, "loss": 0.2126, "step": 1726 }, { "epoch": 0.03805494499440854, "grad_norm": 2.552924156188965, "learning_rate": 2.999491089004666e-05, "loss": 0.2035, "step": 1727 }, { "epoch": 0.038076980283924707, "grad_norm": 2.248133420944214, "learning_rate": 2.9994882967901043e-05, "loss": 0.209, "step": 1728 }, { "epoch": 0.038099015573440864, "grad_norm": 2.6405811309814453, "learning_rate": 2.9994854969378448e-05, "loss": 0.1692, "step": 1729 }, { "epoch": 0.03812105086295703, "grad_norm": 2.320180654525757, "learning_rate": 2.9994826894479017e-05, "loss": 0.2428, "step": 1730 }, { "epoch": 0.038143086152473187, "grad_norm": 3.89127254486084, "learning_rate": 2.999479874320289e-05, "loss": 0.2054, "step": 1731 }, { "epoch": 0.038165121441989344, "grad_norm": 2.4916296005249023, "learning_rate": 2.9994770515550213e-05, "loss": 0.1685, "step": 1732 }, { "epoch": 0.03818715673150551, "grad_norm": 1.6276112794876099, "learning_rate": 2.999474221152113e-05, "loss": 0.1957, "step": 1733 }, { "epoch": 0.038209192021021667, "grad_norm": 2.4665777683258057, "learning_rate": 2.999471383111578e-05, "loss": 0.1945, "step": 1734 }, { "epoch": 0.038231227310537824, "grad_norm": 2.3545119762420654, "learning_rate": 2.9994685374334314e-05, "loss": 0.1496, "step": 1735 }, { "epoch": 0.03825326260005399, "grad_norm": 1.9189778566360474, "learning_rate": 2.999465684117688e-05, "loss": 0.2299, "step": 1736 }, { "epoch": 0.038275297889570147, "grad_norm": 2.441385269165039, "learning_rate": 2.9994628231643608e-05, "loss": 0.2003, "step": 1737 }, { "epoch": 0.038297333179086304, "grad_norm": 1.828598141670227, "learning_rate": 2.999459954573466e-05, "loss": 0.1571, "step": 1738 }, { "epoch": 0.03831936846860247, "grad_norm": 2.040421724319458, "learning_rate": 2.9994570783450172e-05, "loss": 0.1793, "step": 1739 }, { "epoch": 0.038341403758118626, "grad_norm": 2.5295662879943848, "learning_rate": 2.9994541944790298e-05, "loss": 0.1929, "step": 1740 }, { "epoch": 0.038363439047634784, "grad_norm": 2.8605520725250244, "learning_rate": 2.999451302975518e-05, "loss": 0.1177, "step": 1741 }, { "epoch": 0.03838547433715095, "grad_norm": 1.4298815727233887, "learning_rate": 2.9994484038344966e-05, "loss": 0.2901, "step": 1742 }, { "epoch": 0.038407509626667106, "grad_norm": 2.6274092197418213, "learning_rate": 2.9994454970559804e-05, "loss": 0.2163, "step": 1743 }, { "epoch": 0.038429544916183264, "grad_norm": 2.449000120162964, "learning_rate": 2.999442582639984e-05, "loss": 0.1868, "step": 1744 }, { "epoch": 0.03845158020569943, "grad_norm": 2.835906982421875, "learning_rate": 2.999439660586523e-05, "loss": 0.244, "step": 1745 }, { "epoch": 0.038473615495215586, "grad_norm": 2.2437548637390137, "learning_rate": 2.9994367308956116e-05, "loss": 0.2131, "step": 1746 }, { "epoch": 0.03849565078473175, "grad_norm": 1.9271056652069092, "learning_rate": 2.9994337935672648e-05, "loss": 0.2049, "step": 1747 }, { "epoch": 0.03851768607424791, "grad_norm": 2.1557977199554443, "learning_rate": 2.9994308486014973e-05, "loss": 0.2475, "step": 1748 }, { "epoch": 0.038539721363764066, "grad_norm": 2.629340171813965, "learning_rate": 2.9994278959983247e-05, "loss": 0.1449, "step": 1749 }, { "epoch": 0.03856175665328023, "grad_norm": 1.8001192808151245, "learning_rate": 2.9994249357577617e-05, "loss": 0.217, "step": 1750 }, { "epoch": 0.03858379194279639, "grad_norm": 2.225473403930664, "learning_rate": 2.9994219678798235e-05, "loss": 0.1355, "step": 1751 }, { "epoch": 0.038605827232312546, "grad_norm": 2.148468017578125, "learning_rate": 2.9994189923645253e-05, "loss": 0.2282, "step": 1752 }, { "epoch": 0.03862786252182871, "grad_norm": 2.0312366485595703, "learning_rate": 2.999416009211882e-05, "loss": 0.2123, "step": 1753 }, { "epoch": 0.03864989781134487, "grad_norm": 1.3451743125915527, "learning_rate": 2.9994130184219085e-05, "loss": 0.1662, "step": 1754 }, { "epoch": 0.038671933100861026, "grad_norm": 2.371762990951538, "learning_rate": 2.999410019994621e-05, "loss": 0.2053, "step": 1755 }, { "epoch": 0.03869396839037719, "grad_norm": 2.2500810623168945, "learning_rate": 2.999407013930034e-05, "loss": 0.166, "step": 1756 }, { "epoch": 0.03871600367989335, "grad_norm": 2.1482996940612793, "learning_rate": 2.9994040002281636e-05, "loss": 0.1732, "step": 1757 }, { "epoch": 0.038738038969409506, "grad_norm": 2.209326982498169, "learning_rate": 2.999400978889024e-05, "loss": 0.1999, "step": 1758 }, { "epoch": 0.03876007425892567, "grad_norm": 1.9356684684753418, "learning_rate": 2.999397949912631e-05, "loss": 0.1931, "step": 1759 }, { "epoch": 0.03878210954844183, "grad_norm": 2.8754091262817383, "learning_rate": 2.9993949132990005e-05, "loss": 0.1808, "step": 1760 }, { "epoch": 0.038804144837957986, "grad_norm": 3.032339096069336, "learning_rate": 2.9993918690481475e-05, "loss": 0.2134, "step": 1761 }, { "epoch": 0.03882618012747415, "grad_norm": 2.3407633304595947, "learning_rate": 2.999388817160088e-05, "loss": 0.1524, "step": 1762 }, { "epoch": 0.03884821541699031, "grad_norm": 3.0427486896514893, "learning_rate": 2.999385757634837e-05, "loss": 0.1669, "step": 1763 }, { "epoch": 0.03887025070650647, "grad_norm": 2.308006763458252, "learning_rate": 2.9993826904724102e-05, "loss": 0.2084, "step": 1764 }, { "epoch": 0.03889228599602263, "grad_norm": 2.134993553161621, "learning_rate": 2.9993796156728234e-05, "loss": 0.1873, "step": 1765 }, { "epoch": 0.03891432128553879, "grad_norm": 2.37923002243042, "learning_rate": 2.999376533236092e-05, "loss": 0.1914, "step": 1766 }, { "epoch": 0.03893635657505495, "grad_norm": 2.051516056060791, "learning_rate": 2.9993734431622325e-05, "loss": 0.1904, "step": 1767 }, { "epoch": 0.03895839186457111, "grad_norm": 2.2236058712005615, "learning_rate": 2.9993703454512596e-05, "loss": 0.1939, "step": 1768 }, { "epoch": 0.03898042715408727, "grad_norm": 1.9697165489196777, "learning_rate": 2.9993672401031897e-05, "loss": 0.2064, "step": 1769 }, { "epoch": 0.03900246244360343, "grad_norm": 2.1196789741516113, "learning_rate": 2.999364127118038e-05, "loss": 0.1414, "step": 1770 }, { "epoch": 0.03902449773311959, "grad_norm": 2.1953213214874268, "learning_rate": 2.9993610064958212e-05, "loss": 0.1611, "step": 1771 }, { "epoch": 0.03904653302263575, "grad_norm": 2.4946165084838867, "learning_rate": 2.9993578782365543e-05, "loss": 0.2271, "step": 1772 }, { "epoch": 0.03906856831215191, "grad_norm": 1.5657597780227661, "learning_rate": 2.9993547423402543e-05, "loss": 0.171, "step": 1773 }, { "epoch": 0.03909060360166807, "grad_norm": 6.7063517570495605, "learning_rate": 2.999351598806936e-05, "loss": 0.1595, "step": 1774 }, { "epoch": 0.03911263889118423, "grad_norm": 1.8813955783843994, "learning_rate": 2.9993484476366166e-05, "loss": 0.1799, "step": 1775 }, { "epoch": 0.03913467418070039, "grad_norm": 2.013237953186035, "learning_rate": 2.999345288829311e-05, "loss": 0.2186, "step": 1776 }, { "epoch": 0.03915670947021655, "grad_norm": 2.041886568069458, "learning_rate": 2.999342122385036e-05, "loss": 0.2078, "step": 1777 }, { "epoch": 0.03917874475973271, "grad_norm": 2.39229416847229, "learning_rate": 2.9993389483038075e-05, "loss": 0.2086, "step": 1778 }, { "epoch": 0.03920078004924887, "grad_norm": 2.2479922771453857, "learning_rate": 2.999335766585642e-05, "loss": 0.195, "step": 1779 }, { "epoch": 0.03922281533876503, "grad_norm": 1.883947491645813, "learning_rate": 2.9993325772305554e-05, "loss": 0.1964, "step": 1780 }, { "epoch": 0.039244850628281196, "grad_norm": 2.080033540725708, "learning_rate": 2.9993293802385638e-05, "loss": 0.223, "step": 1781 }, { "epoch": 0.03926688591779735, "grad_norm": 2.2211360931396484, "learning_rate": 2.9993261756096838e-05, "loss": 0.1828, "step": 1782 }, { "epoch": 0.03928892120731351, "grad_norm": 1.551729440689087, "learning_rate": 2.9993229633439314e-05, "loss": 0.1704, "step": 1783 }, { "epoch": 0.039310956496829676, "grad_norm": 1.864548683166504, "learning_rate": 2.9993197434413232e-05, "loss": 0.192, "step": 1784 }, { "epoch": 0.03933299178634583, "grad_norm": 2.7818260192871094, "learning_rate": 2.9993165159018757e-05, "loss": 0.1622, "step": 1785 }, { "epoch": 0.03935502707586199, "grad_norm": 2.429128646850586, "learning_rate": 2.999313280725605e-05, "loss": 0.2035, "step": 1786 }, { "epoch": 0.039377062365378156, "grad_norm": 2.627441167831421, "learning_rate": 2.999310037912528e-05, "loss": 0.1576, "step": 1787 }, { "epoch": 0.03939909765489431, "grad_norm": 2.053431510925293, "learning_rate": 2.9993067874626606e-05, "loss": 0.2191, "step": 1788 }, { "epoch": 0.03942113294441047, "grad_norm": 1.9389272928237915, "learning_rate": 2.9993035293760203e-05, "loss": 0.1906, "step": 1789 }, { "epoch": 0.039443168233926636, "grad_norm": 3.2079696655273438, "learning_rate": 2.9993002636526226e-05, "loss": 0.2376, "step": 1790 }, { "epoch": 0.03946520352344279, "grad_norm": 3.1001973152160645, "learning_rate": 2.999296990292485e-05, "loss": 0.1882, "step": 1791 }, { "epoch": 0.03948723881295895, "grad_norm": 1.6344751119613647, "learning_rate": 2.9992937092956238e-05, "loss": 0.1576, "step": 1792 }, { "epoch": 0.039509274102475116, "grad_norm": 2.8150551319122314, "learning_rate": 2.9992904206620555e-05, "loss": 0.2537, "step": 1793 }, { "epoch": 0.03953130939199127, "grad_norm": 2.043792963027954, "learning_rate": 2.9992871243917973e-05, "loss": 0.1959, "step": 1794 }, { "epoch": 0.03955334468150743, "grad_norm": 1.6916645765304565, "learning_rate": 2.999283820484866e-05, "loss": 0.2149, "step": 1795 }, { "epoch": 0.039575379971023596, "grad_norm": 2.946621894836426, "learning_rate": 2.9992805089412776e-05, "loss": 0.2146, "step": 1796 }, { "epoch": 0.03959741526053975, "grad_norm": 2.103386640548706, "learning_rate": 2.99927718976105e-05, "loss": 0.186, "step": 1797 }, { "epoch": 0.03961945055005592, "grad_norm": 2.5457987785339355, "learning_rate": 2.9992738629441996e-05, "loss": 0.2164, "step": 1798 }, { "epoch": 0.039641485839572076, "grad_norm": 1.6023178100585938, "learning_rate": 2.999270528490744e-05, "loss": 0.2039, "step": 1799 }, { "epoch": 0.03966352112908823, "grad_norm": 2.4023971557617188, "learning_rate": 2.9992671864006992e-05, "loss": 0.1744, "step": 1800 }, { "epoch": 0.0396855564186044, "grad_norm": 3.538234233856201, "learning_rate": 2.9992638366740823e-05, "loss": 0.2299, "step": 1801 }, { "epoch": 0.039707591708120556, "grad_norm": 1.886046051979065, "learning_rate": 2.9992604793109108e-05, "loss": 0.1576, "step": 1802 }, { "epoch": 0.03972962699763671, "grad_norm": 2.4102838039398193, "learning_rate": 2.999257114311202e-05, "loss": 0.2245, "step": 1803 }, { "epoch": 0.03975166228715288, "grad_norm": 2.996487855911255, "learning_rate": 2.999253741674973e-05, "loss": 0.1823, "step": 1804 }, { "epoch": 0.039773697576669036, "grad_norm": 11.45955753326416, "learning_rate": 2.9992503614022403e-05, "loss": 0.1974, "step": 1805 }, { "epoch": 0.03979573286618519, "grad_norm": 1.7509477138519287, "learning_rate": 2.9992469734930213e-05, "loss": 0.1943, "step": 1806 }, { "epoch": 0.03981776815570136, "grad_norm": 1.435357928276062, "learning_rate": 2.9992435779473342e-05, "loss": 0.1868, "step": 1807 }, { "epoch": 0.039839803445217516, "grad_norm": 1.4266613721847534, "learning_rate": 2.9992401747651954e-05, "loss": 0.1603, "step": 1808 }, { "epoch": 0.03986183873473367, "grad_norm": 1.7197155952453613, "learning_rate": 2.9992367639466223e-05, "loss": 0.1758, "step": 1809 }, { "epoch": 0.03988387402424984, "grad_norm": 2.2705557346343994, "learning_rate": 2.999233345491632e-05, "loss": 0.169, "step": 1810 }, { "epoch": 0.039905909313765996, "grad_norm": 2.0977160930633545, "learning_rate": 2.999229919400243e-05, "loss": 0.16, "step": 1811 }, { "epoch": 0.03992794460328215, "grad_norm": 1.9308127164840698, "learning_rate": 2.9992264856724718e-05, "loss": 0.179, "step": 1812 }, { "epoch": 0.03994997989279832, "grad_norm": 1.3584598302841187, "learning_rate": 2.999223044308336e-05, "loss": 0.1458, "step": 1813 }, { "epoch": 0.039972015182314476, "grad_norm": 2.260929584503174, "learning_rate": 2.9992195953078535e-05, "loss": 0.2216, "step": 1814 }, { "epoch": 0.03999405047183063, "grad_norm": 2.1798229217529297, "learning_rate": 2.9992161386710416e-05, "loss": 0.2435, "step": 1815 }, { "epoch": 0.0400160857613468, "grad_norm": 2.233510971069336, "learning_rate": 2.999212674397918e-05, "loss": 0.2058, "step": 1816 }, { "epoch": 0.040038121050862956, "grad_norm": 1.6087898015975952, "learning_rate": 2.9992092024885e-05, "loss": 0.1943, "step": 1817 }, { "epoch": 0.04006015634037912, "grad_norm": 1.9432777166366577, "learning_rate": 2.999205722942806e-05, "loss": 0.198, "step": 1818 }, { "epoch": 0.04008219162989528, "grad_norm": 2.431713581085205, "learning_rate": 2.9992022357608532e-05, "loss": 0.2421, "step": 1819 }, { "epoch": 0.040104226919411436, "grad_norm": 1.9119641780853271, "learning_rate": 2.9991987409426593e-05, "loss": 0.1768, "step": 1820 }, { "epoch": 0.0401262622089276, "grad_norm": 1.9754780530929565, "learning_rate": 2.9991952384882426e-05, "loss": 0.1844, "step": 1821 }, { "epoch": 0.04014829749844376, "grad_norm": 1.7709630727767944, "learning_rate": 2.9991917283976205e-05, "loss": 0.2065, "step": 1822 }, { "epoch": 0.040170332787959916, "grad_norm": 1.8754762411117554, "learning_rate": 2.9991882106708107e-05, "loss": 0.2103, "step": 1823 }, { "epoch": 0.04019236807747608, "grad_norm": 2.315809726715088, "learning_rate": 2.999184685307832e-05, "loss": 0.1783, "step": 1824 }, { "epoch": 0.04021440336699224, "grad_norm": 1.6651184558868408, "learning_rate": 2.9991811523087012e-05, "loss": 0.189, "step": 1825 }, { "epoch": 0.040236438656508396, "grad_norm": 2.2173476219177246, "learning_rate": 2.9991776116734368e-05, "loss": 0.2087, "step": 1826 }, { "epoch": 0.04025847394602456, "grad_norm": 1.897375464439392, "learning_rate": 2.9991740634020575e-05, "loss": 0.1722, "step": 1827 }, { "epoch": 0.04028050923554072, "grad_norm": 2.2644944190979004, "learning_rate": 2.9991705074945804e-05, "loss": 0.205, "step": 1828 }, { "epoch": 0.040302544525056876, "grad_norm": 1.8010320663452148, "learning_rate": 2.999166943951024e-05, "loss": 0.1684, "step": 1829 }, { "epoch": 0.04032457981457304, "grad_norm": 2.4895517826080322, "learning_rate": 2.999163372771407e-05, "loss": 0.2561, "step": 1830 }, { "epoch": 0.0403466151040892, "grad_norm": 1.7303928136825562, "learning_rate": 2.9991597939557464e-05, "loss": 0.1431, "step": 1831 }, { "epoch": 0.040368650393605356, "grad_norm": 2.2430522441864014, "learning_rate": 2.9991562075040606e-05, "loss": 0.1428, "step": 1832 }, { "epoch": 0.04039068568312152, "grad_norm": 2.8682868480682373, "learning_rate": 2.999152613416369e-05, "loss": 0.1282, "step": 1833 }, { "epoch": 0.04041272097263768, "grad_norm": 2.9087753295898438, "learning_rate": 2.9991490116926893e-05, "loss": 0.2084, "step": 1834 }, { "epoch": 0.04043475626215384, "grad_norm": 2.80407452583313, "learning_rate": 2.9991454023330394e-05, "loss": 0.2223, "step": 1835 }, { "epoch": 0.04045679155167, "grad_norm": 2.167449712753296, "learning_rate": 2.9991417853374383e-05, "loss": 0.1643, "step": 1836 }, { "epoch": 0.04047882684118616, "grad_norm": 1.913635015487671, "learning_rate": 2.999138160705904e-05, "loss": 0.2079, "step": 1837 }, { "epoch": 0.04050086213070232, "grad_norm": 2.3194468021392822, "learning_rate": 2.999134528438455e-05, "loss": 0.1703, "step": 1838 }, { "epoch": 0.04052289742021848, "grad_norm": 2.6797571182250977, "learning_rate": 2.9991308885351106e-05, "loss": 0.1857, "step": 1839 }, { "epoch": 0.04054493270973464, "grad_norm": 3.4717929363250732, "learning_rate": 2.9991272409958887e-05, "loss": 0.1777, "step": 1840 }, { "epoch": 0.0405669679992508, "grad_norm": 1.5110814571380615, "learning_rate": 2.9991235858208074e-05, "loss": 0.1739, "step": 1841 }, { "epoch": 0.04058900328876696, "grad_norm": 4.6051506996154785, "learning_rate": 2.9991199230098857e-05, "loss": 0.2296, "step": 1842 }, { "epoch": 0.04061103857828312, "grad_norm": 3.0679666996002197, "learning_rate": 2.9991162525631426e-05, "loss": 0.2286, "step": 1843 }, { "epoch": 0.04063307386779928, "grad_norm": 4.1947503089904785, "learning_rate": 2.9991125744805964e-05, "loss": 0.2497, "step": 1844 }, { "epoch": 0.04065510915731544, "grad_norm": 2.3384745121002197, "learning_rate": 2.9991088887622656e-05, "loss": 0.2196, "step": 1845 }, { "epoch": 0.0406771444468316, "grad_norm": 1.9872690439224243, "learning_rate": 2.99910519540817e-05, "loss": 0.1897, "step": 1846 }, { "epoch": 0.04069917973634776, "grad_norm": 2.57297682762146, "learning_rate": 2.9991014944183276e-05, "loss": 0.165, "step": 1847 }, { "epoch": 0.04072121502586392, "grad_norm": 2.357224941253662, "learning_rate": 2.999097785792757e-05, "loss": 0.2037, "step": 1848 }, { "epoch": 0.04074325031538008, "grad_norm": 2.9072117805480957, "learning_rate": 2.999094069531478e-05, "loss": 0.1905, "step": 1849 }, { "epoch": 0.04076528560489624, "grad_norm": 2.386625289916992, "learning_rate": 2.999090345634509e-05, "loss": 0.2497, "step": 1850 }, { "epoch": 0.0407873208944124, "grad_norm": 2.179068088531494, "learning_rate": 2.9990866141018688e-05, "loss": 0.1772, "step": 1851 }, { "epoch": 0.040809356183928565, "grad_norm": 1.8046461343765259, "learning_rate": 2.9990828749335765e-05, "loss": 0.1645, "step": 1852 }, { "epoch": 0.04083139147344472, "grad_norm": 2.194554090499878, "learning_rate": 2.9990791281296514e-05, "loss": 0.1399, "step": 1853 }, { "epoch": 0.04085342676296088, "grad_norm": 1.7649967670440674, "learning_rate": 2.9990753736901126e-05, "loss": 0.1911, "step": 1854 }, { "epoch": 0.040875462052477045, "grad_norm": 2.2110722064971924, "learning_rate": 2.9990716116149787e-05, "loss": 0.2016, "step": 1855 }, { "epoch": 0.0408974973419932, "grad_norm": 2.9037346839904785, "learning_rate": 2.9990678419042697e-05, "loss": 0.1888, "step": 1856 }, { "epoch": 0.04091953263150936, "grad_norm": 2.6964361667633057, "learning_rate": 2.9990640645580042e-05, "loss": 0.1727, "step": 1857 }, { "epoch": 0.040941567921025525, "grad_norm": 2.323363780975342, "learning_rate": 2.9990602795762012e-05, "loss": 0.2278, "step": 1858 }, { "epoch": 0.04096360321054168, "grad_norm": 2.6012723445892334, "learning_rate": 2.9990564869588806e-05, "loss": 0.136, "step": 1859 }, { "epoch": 0.04098563850005784, "grad_norm": 1.5461540222167969, "learning_rate": 2.9990526867060612e-05, "loss": 0.14, "step": 1860 }, { "epoch": 0.041007673789574005, "grad_norm": 1.9958794116973877, "learning_rate": 2.999048878817763e-05, "loss": 0.153, "step": 1861 }, { "epoch": 0.04102970907909016, "grad_norm": 3.2530345916748047, "learning_rate": 2.9990450632940044e-05, "loss": 0.1866, "step": 1862 }, { "epoch": 0.04105174436860632, "grad_norm": 2.607499599456787, "learning_rate": 2.999041240134806e-05, "loss": 0.2058, "step": 1863 }, { "epoch": 0.041073779658122485, "grad_norm": 2.1172897815704346, "learning_rate": 2.9990374093401865e-05, "loss": 0.1748, "step": 1864 }, { "epoch": 0.04109581494763864, "grad_norm": 2.381505250930786, "learning_rate": 2.9990335709101654e-05, "loss": 0.1409, "step": 1865 }, { "epoch": 0.0411178502371548, "grad_norm": 1.5671364068984985, "learning_rate": 2.9990297248447626e-05, "loss": 0.1863, "step": 1866 }, { "epoch": 0.041139885526670965, "grad_norm": 2.455610752105713, "learning_rate": 2.999025871143998e-05, "loss": 0.1942, "step": 1867 }, { "epoch": 0.04116192081618712, "grad_norm": 1.7991431951522827, "learning_rate": 2.9990220098078905e-05, "loss": 0.1752, "step": 1868 }, { "epoch": 0.04118395610570329, "grad_norm": 3.45621395111084, "learning_rate": 2.99901814083646e-05, "loss": 0.2316, "step": 1869 }, { "epoch": 0.041205991395219445, "grad_norm": 1.7658312320709229, "learning_rate": 2.999014264229726e-05, "loss": 0.1686, "step": 1870 }, { "epoch": 0.0412280266847356, "grad_norm": 2.00256085395813, "learning_rate": 2.9990103799877093e-05, "loss": 0.1821, "step": 1871 }, { "epoch": 0.04125006197425177, "grad_norm": 2.188093662261963, "learning_rate": 2.9990064881104283e-05, "loss": 0.2156, "step": 1872 }, { "epoch": 0.041272097263767925, "grad_norm": 1.9610097408294678, "learning_rate": 2.9990025885979035e-05, "loss": 0.2161, "step": 1873 }, { "epoch": 0.04129413255328408, "grad_norm": 1.546890377998352, "learning_rate": 2.998998681450155e-05, "loss": 0.2366, "step": 1874 }, { "epoch": 0.04131616784280025, "grad_norm": 2.433121919631958, "learning_rate": 2.9989947666672017e-05, "loss": 0.2193, "step": 1875 }, { "epoch": 0.041338203132316405, "grad_norm": 2.135744571685791, "learning_rate": 2.998990844249065e-05, "loss": 0.1763, "step": 1876 }, { "epoch": 0.04136023842183256, "grad_norm": 1.7406120300292969, "learning_rate": 2.998986914195764e-05, "loss": 0.1847, "step": 1877 }, { "epoch": 0.04138227371134873, "grad_norm": 1.6209641695022583, "learning_rate": 2.9989829765073184e-05, "loss": 0.1445, "step": 1878 }, { "epoch": 0.041404309000864885, "grad_norm": 2.0402677059173584, "learning_rate": 2.9989790311837488e-05, "loss": 0.1893, "step": 1879 }, { "epoch": 0.04142634429038104, "grad_norm": 1.7675797939300537, "learning_rate": 2.998975078225075e-05, "loss": 0.1994, "step": 1880 }, { "epoch": 0.04144837957989721, "grad_norm": 3.790071725845337, "learning_rate": 2.998971117631318e-05, "loss": 0.1779, "step": 1881 }, { "epoch": 0.041470414869413365, "grad_norm": 1.340013861656189, "learning_rate": 2.9989671494024966e-05, "loss": 0.1613, "step": 1882 }, { "epoch": 0.04149245015892952, "grad_norm": 1.8342838287353516, "learning_rate": 2.998963173538632e-05, "loss": 0.2395, "step": 1883 }, { "epoch": 0.04151448544844569, "grad_norm": 2.7320141792297363, "learning_rate": 2.9989591900397443e-05, "loss": 0.1506, "step": 1884 }, { "epoch": 0.041536520737961845, "grad_norm": 2.4068751335144043, "learning_rate": 2.9989551989058534e-05, "loss": 0.2043, "step": 1885 }, { "epoch": 0.04155855602747801, "grad_norm": 2.351077079772949, "learning_rate": 2.99895120013698e-05, "loss": 0.1907, "step": 1886 }, { "epoch": 0.04158059131699417, "grad_norm": 1.7501691579818726, "learning_rate": 2.9989471937331442e-05, "loss": 0.1736, "step": 1887 }, { "epoch": 0.041602626606510325, "grad_norm": 1.593366265296936, "learning_rate": 2.9989431796943667e-05, "loss": 0.1723, "step": 1888 }, { "epoch": 0.04162466189602649, "grad_norm": 1.341302514076233, "learning_rate": 2.9989391580206676e-05, "loss": 0.1589, "step": 1889 }, { "epoch": 0.04164669718554265, "grad_norm": 2.6392099857330322, "learning_rate": 2.9989351287120673e-05, "loss": 0.2278, "step": 1890 }, { "epoch": 0.041668732475058805, "grad_norm": 2.519662380218506, "learning_rate": 2.998931091768587e-05, "loss": 0.1806, "step": 1891 }, { "epoch": 0.04169076776457497, "grad_norm": 2.2605133056640625, "learning_rate": 2.998927047190247e-05, "loss": 0.1539, "step": 1892 }, { "epoch": 0.04171280305409113, "grad_norm": 2.8919737339019775, "learning_rate": 2.9989229949770678e-05, "loss": 0.1826, "step": 1893 }, { "epoch": 0.041734838343607285, "grad_norm": 1.63577401638031, "learning_rate": 2.9989189351290697e-05, "loss": 0.1465, "step": 1894 }, { "epoch": 0.04175687363312345, "grad_norm": 2.0294501781463623, "learning_rate": 2.9989148676462736e-05, "loss": 0.1436, "step": 1895 }, { "epoch": 0.04177890892263961, "grad_norm": 1.6507039070129395, "learning_rate": 2.9989107925287007e-05, "loss": 0.1601, "step": 1896 }, { "epoch": 0.041800944212155765, "grad_norm": 1.9890323877334595, "learning_rate": 2.998906709776371e-05, "loss": 0.1624, "step": 1897 }, { "epoch": 0.04182297950167193, "grad_norm": 1.716225266456604, "learning_rate": 2.9989026193893056e-05, "loss": 0.1189, "step": 1898 }, { "epoch": 0.04184501479118809, "grad_norm": 2.5425400733947754, "learning_rate": 2.9988985213675258e-05, "loss": 0.1934, "step": 1899 }, { "epoch": 0.041867050080704245, "grad_norm": 3.4591119289398193, "learning_rate": 2.9988944157110518e-05, "loss": 0.2047, "step": 1900 }, { "epoch": 0.04188908537022041, "grad_norm": 3.60031795501709, "learning_rate": 2.9988903024199045e-05, "loss": 0.255, "step": 1901 }, { "epoch": 0.04191112065973657, "grad_norm": 3.205812931060791, "learning_rate": 2.9988861814941058e-05, "loss": 0.2603, "step": 1902 }, { "epoch": 0.04193315594925273, "grad_norm": 2.8895223140716553, "learning_rate": 2.9988820529336755e-05, "loss": 0.2285, "step": 1903 }, { "epoch": 0.04195519123876889, "grad_norm": 2.420598030090332, "learning_rate": 2.9988779167386354e-05, "loss": 0.2691, "step": 1904 }, { "epoch": 0.04197722652828505, "grad_norm": 2.8850810527801514, "learning_rate": 2.9988737729090058e-05, "loss": 0.2567, "step": 1905 }, { "epoch": 0.04199926181780121, "grad_norm": 2.638603925704956, "learning_rate": 2.9988696214448087e-05, "loss": 0.1778, "step": 1906 }, { "epoch": 0.04202129710731737, "grad_norm": 2.6518161296844482, "learning_rate": 2.9988654623460648e-05, "loss": 0.2079, "step": 1907 }, { "epoch": 0.04204333239683353, "grad_norm": 2.8829920291900635, "learning_rate": 2.9988612956127957e-05, "loss": 0.2252, "step": 1908 }, { "epoch": 0.04206536768634969, "grad_norm": 1.6674519777297974, "learning_rate": 2.9988571212450215e-05, "loss": 0.1498, "step": 1909 }, { "epoch": 0.04208740297586585, "grad_norm": 2.1418378353118896, "learning_rate": 2.998852939242765e-05, "loss": 0.2176, "step": 1910 }, { "epoch": 0.04210943826538201, "grad_norm": 6.621813774108887, "learning_rate": 2.9988487496060462e-05, "loss": 0.1739, "step": 1911 }, { "epoch": 0.04213147355489817, "grad_norm": 2.4474518299102783, "learning_rate": 2.9988445523348877e-05, "loss": 0.2471, "step": 1912 }, { "epoch": 0.04215350884441433, "grad_norm": 2.2016305923461914, "learning_rate": 2.998840347429309e-05, "loss": 0.1812, "step": 1913 }, { "epoch": 0.04217554413393049, "grad_norm": 2.9256136417388916, "learning_rate": 2.9988361348893338e-05, "loss": 0.2311, "step": 1914 }, { "epoch": 0.04219757942344665, "grad_norm": 1.9729199409484863, "learning_rate": 2.9988319147149817e-05, "loss": 0.1872, "step": 1915 }, { "epoch": 0.04221961471296281, "grad_norm": 1.8508163690567017, "learning_rate": 2.9988276869062753e-05, "loss": 0.1833, "step": 1916 }, { "epoch": 0.04224165000247897, "grad_norm": 2.994065046310425, "learning_rate": 2.9988234514632355e-05, "loss": 0.1654, "step": 1917 }, { "epoch": 0.04226368529199513, "grad_norm": 1.6407265663146973, "learning_rate": 2.998819208385884e-05, "loss": 0.2294, "step": 1918 }, { "epoch": 0.04228572058151129, "grad_norm": 2.5732250213623047, "learning_rate": 2.998814957674243e-05, "loss": 0.2342, "step": 1919 }, { "epoch": 0.04230775587102745, "grad_norm": 1.9157298803329468, "learning_rate": 2.9988106993283335e-05, "loss": 0.184, "step": 1920 }, { "epoch": 0.04232979116054361, "grad_norm": 2.1041252613067627, "learning_rate": 2.998806433348177e-05, "loss": 0.1891, "step": 1921 }, { "epoch": 0.04235182645005977, "grad_norm": 2.280700206756592, "learning_rate": 2.998802159733796e-05, "loss": 0.2606, "step": 1922 }, { "epoch": 0.042373861739575934, "grad_norm": 2.1619699001312256, "learning_rate": 2.9987978784852118e-05, "loss": 0.2021, "step": 1923 }, { "epoch": 0.04239589702909209, "grad_norm": 2.293001174926758, "learning_rate": 2.9987935896024466e-05, "loss": 0.1298, "step": 1924 }, { "epoch": 0.04241793231860825, "grad_norm": 1.9771158695220947, "learning_rate": 2.9987892930855215e-05, "loss": 0.1761, "step": 1925 }, { "epoch": 0.042439967608124414, "grad_norm": 2.3009235858917236, "learning_rate": 2.998784988934459e-05, "loss": 0.1709, "step": 1926 }, { "epoch": 0.04246200289764057, "grad_norm": 1.3215711116790771, "learning_rate": 2.998780677149281e-05, "loss": 0.2351, "step": 1927 }, { "epoch": 0.04248403818715673, "grad_norm": 2.435842990875244, "learning_rate": 2.9987763577300093e-05, "loss": 0.1776, "step": 1928 }, { "epoch": 0.042506073476672894, "grad_norm": 2.1317408084869385, "learning_rate": 2.9987720306766655e-05, "loss": 0.1444, "step": 1929 }, { "epoch": 0.04252810876618905, "grad_norm": 1.725921392440796, "learning_rate": 2.9987676959892717e-05, "loss": 0.1904, "step": 1930 }, { "epoch": 0.04255014405570521, "grad_norm": 1.8475112915039062, "learning_rate": 2.9987633536678514e-05, "loss": 0.1597, "step": 1931 }, { "epoch": 0.042572179345221374, "grad_norm": 2.138302803039551, "learning_rate": 2.9987590037124248e-05, "loss": 0.1359, "step": 1932 }, { "epoch": 0.04259421463473753, "grad_norm": 2.2329814434051514, "learning_rate": 2.9987546461230154e-05, "loss": 0.215, "step": 1933 }, { "epoch": 0.04261624992425369, "grad_norm": 2.6817708015441895, "learning_rate": 2.9987502808996447e-05, "loss": 0.2484, "step": 1934 }, { "epoch": 0.042638285213769854, "grad_norm": 1.609300136566162, "learning_rate": 2.998745908042335e-05, "loss": 0.1339, "step": 1935 }, { "epoch": 0.04266032050328601, "grad_norm": 2.8636996746063232, "learning_rate": 2.9987415275511088e-05, "loss": 0.2186, "step": 1936 }, { "epoch": 0.04268235579280217, "grad_norm": 2.0149965286254883, "learning_rate": 2.9987371394259885e-05, "loss": 0.1905, "step": 1937 }, { "epoch": 0.042704391082318334, "grad_norm": 2.1007797718048096, "learning_rate": 2.998732743666996e-05, "loss": 0.1923, "step": 1938 }, { "epoch": 0.04272642637183449, "grad_norm": 2.045741081237793, "learning_rate": 2.998728340274154e-05, "loss": 0.1531, "step": 1939 }, { "epoch": 0.042748461661350656, "grad_norm": 1.6245951652526855, "learning_rate": 2.9987239292474852e-05, "loss": 0.1546, "step": 1940 }, { "epoch": 0.042770496950866814, "grad_norm": 2.1703100204467773, "learning_rate": 2.9987195105870112e-05, "loss": 0.1692, "step": 1941 }, { "epoch": 0.04279253224038297, "grad_norm": 2.1507840156555176, "learning_rate": 2.9987150842927554e-05, "loss": 0.2127, "step": 1942 }, { "epoch": 0.042814567529899136, "grad_norm": 1.865747094154358, "learning_rate": 2.9987106503647396e-05, "loss": 0.1347, "step": 1943 }, { "epoch": 0.042836602819415294, "grad_norm": 1.5826389789581299, "learning_rate": 2.9987062088029873e-05, "loss": 0.1795, "step": 1944 }, { "epoch": 0.04285863810893145, "grad_norm": 2.274186134338379, "learning_rate": 2.9987017596075207e-05, "loss": 0.1846, "step": 1945 }, { "epoch": 0.042880673398447616, "grad_norm": 1.8068686723709106, "learning_rate": 2.998697302778362e-05, "loss": 0.1959, "step": 1946 }, { "epoch": 0.042902708687963774, "grad_norm": 1.6291686296463013, "learning_rate": 2.9986928383155343e-05, "loss": 0.1832, "step": 1947 }, { "epoch": 0.04292474397747993, "grad_norm": 1.9711172580718994, "learning_rate": 2.99868836621906e-05, "loss": 0.1824, "step": 1948 }, { "epoch": 0.042946779266996096, "grad_norm": 1.8168766498565674, "learning_rate": 2.998683886488963e-05, "loss": 0.1903, "step": 1949 }, { "epoch": 0.042968814556512254, "grad_norm": 2.1462483406066895, "learning_rate": 2.998679399125265e-05, "loss": 0.1729, "step": 1950 }, { "epoch": 0.04299084984602841, "grad_norm": 1.7926838397979736, "learning_rate": 2.998674904127989e-05, "loss": 0.1669, "step": 1951 }, { "epoch": 0.043012885135544576, "grad_norm": 40.3138313293457, "learning_rate": 2.998670401497158e-05, "loss": 0.2433, "step": 1952 }, { "epoch": 0.043034920425060734, "grad_norm": 8.926349639892578, "learning_rate": 2.998665891232795e-05, "loss": 0.1864, "step": 1953 }, { "epoch": 0.04305695571457689, "grad_norm": 8.516855239868164, "learning_rate": 2.998661373334923e-05, "loss": 0.2305, "step": 1954 }, { "epoch": 0.043078991004093056, "grad_norm": 2.3461453914642334, "learning_rate": 2.9986568478035655e-05, "loss": 0.1857, "step": 1955 }, { "epoch": 0.043101026293609214, "grad_norm": 2.8618104457855225, "learning_rate": 2.9986523146387447e-05, "loss": 0.1891, "step": 1956 }, { "epoch": 0.04312306158312538, "grad_norm": 3.110322952270508, "learning_rate": 2.9986477738404836e-05, "loss": 0.1881, "step": 1957 }, { "epoch": 0.043145096872641536, "grad_norm": 3.392820358276367, "learning_rate": 2.998643225408806e-05, "loss": 0.2048, "step": 1958 }, { "epoch": 0.043167132162157694, "grad_norm": 1.830959677696228, "learning_rate": 2.9986386693437346e-05, "loss": 0.1772, "step": 1959 }, { "epoch": 0.04318916745167386, "grad_norm": 1.9078007936477661, "learning_rate": 2.9986341056452937e-05, "loss": 0.1939, "step": 1960 }, { "epoch": 0.043211202741190016, "grad_norm": 2.3595290184020996, "learning_rate": 2.9986295343135047e-05, "loss": 0.1988, "step": 1961 }, { "epoch": 0.043233238030706174, "grad_norm": 3.112215757369995, "learning_rate": 2.998624955348392e-05, "loss": 0.1878, "step": 1962 }, { "epoch": 0.04325527332022234, "grad_norm": 1.7228790521621704, "learning_rate": 2.9986203687499793e-05, "loss": 0.1722, "step": 1963 }, { "epoch": 0.043277308609738496, "grad_norm": 1.54295015335083, "learning_rate": 2.998615774518289e-05, "loss": 0.1618, "step": 1964 }, { "epoch": 0.043299343899254654, "grad_norm": 1.8039367198944092, "learning_rate": 2.998611172653345e-05, "loss": 0.1836, "step": 1965 }, { "epoch": 0.04332137918877082, "grad_norm": 3.177907943725586, "learning_rate": 2.9986065631551708e-05, "loss": 0.2388, "step": 1966 }, { "epoch": 0.043343414478286976, "grad_norm": 1.7669583559036255, "learning_rate": 2.9986019460237896e-05, "loss": 0.1729, "step": 1967 }, { "epoch": 0.043365449767803134, "grad_norm": 1.9863190650939941, "learning_rate": 2.998597321259225e-05, "loss": 0.1835, "step": 1968 }, { "epoch": 0.0433874850573193, "grad_norm": 2.6637306213378906, "learning_rate": 2.998592688861501e-05, "loss": 0.1754, "step": 1969 }, { "epoch": 0.043409520346835456, "grad_norm": 2.6160197257995605, "learning_rate": 2.998588048830641e-05, "loss": 0.2029, "step": 1970 }, { "epoch": 0.043431555636351614, "grad_norm": 1.6451162099838257, "learning_rate": 2.9985834011666676e-05, "loss": 0.1932, "step": 1971 }, { "epoch": 0.04345359092586778, "grad_norm": 2.530601978302002, "learning_rate": 2.998578745869606e-05, "loss": 0.2058, "step": 1972 }, { "epoch": 0.043475626215383936, "grad_norm": 1.3358477354049683, "learning_rate": 2.9985740829394793e-05, "loss": 0.1537, "step": 1973 }, { "epoch": 0.0434976615049001, "grad_norm": 2.5560264587402344, "learning_rate": 2.998569412376311e-05, "loss": 0.1693, "step": 1974 }, { "epoch": 0.04351969679441626, "grad_norm": 1.9453662633895874, "learning_rate": 2.9985647341801245e-05, "loss": 0.183, "step": 1975 }, { "epoch": 0.043541732083932416, "grad_norm": 2.510883092880249, "learning_rate": 2.9985600483509452e-05, "loss": 0.1946, "step": 1976 }, { "epoch": 0.04356376737344858, "grad_norm": 2.0411248207092285, "learning_rate": 2.9985553548887956e-05, "loss": 0.1578, "step": 1977 }, { "epoch": 0.04358580266296474, "grad_norm": 1.7436776161193848, "learning_rate": 2.9985506537936997e-05, "loss": 0.1633, "step": 1978 }, { "epoch": 0.043607837952480896, "grad_norm": 1.7931602001190186, "learning_rate": 2.9985459450656823e-05, "loss": 0.1875, "step": 1979 }, { "epoch": 0.04362987324199706, "grad_norm": 1.9856364727020264, "learning_rate": 2.9985412287047664e-05, "loss": 0.1753, "step": 1980 }, { "epoch": 0.04365190853151322, "grad_norm": 1.9596918821334839, "learning_rate": 2.9985365047109764e-05, "loss": 0.2492, "step": 1981 }, { "epoch": 0.043673943821029376, "grad_norm": 2.6766867637634277, "learning_rate": 2.9985317730843363e-05, "loss": 0.2101, "step": 1982 }, { "epoch": 0.04369597911054554, "grad_norm": 1.9784040451049805, "learning_rate": 2.998527033824871e-05, "loss": 0.2416, "step": 1983 }, { "epoch": 0.0437180144000617, "grad_norm": 2.1010241508483887, "learning_rate": 2.998522286932603e-05, "loss": 0.1713, "step": 1984 }, { "epoch": 0.043740049689577856, "grad_norm": 1.6118135452270508, "learning_rate": 2.998517532407558e-05, "loss": 0.1771, "step": 1985 }, { "epoch": 0.04376208497909402, "grad_norm": 2.226954936981201, "learning_rate": 2.9985127702497596e-05, "loss": 0.1865, "step": 1986 }, { "epoch": 0.04378412026861018, "grad_norm": 1.8932780027389526, "learning_rate": 2.998508000459232e-05, "loss": 0.2249, "step": 1987 }, { "epoch": 0.043806155558126336, "grad_norm": 2.244818687438965, "learning_rate": 2.9985032230359998e-05, "loss": 0.185, "step": 1988 }, { "epoch": 0.0438281908476425, "grad_norm": 1.823850154876709, "learning_rate": 2.9984984379800867e-05, "loss": 0.22, "step": 1989 }, { "epoch": 0.04385022613715866, "grad_norm": 2.767831325531006, "learning_rate": 2.9984936452915178e-05, "loss": 0.1883, "step": 1990 }, { "epoch": 0.04387226142667482, "grad_norm": 2.4869680404663086, "learning_rate": 2.9984888449703173e-05, "loss": 0.1903, "step": 1991 }, { "epoch": 0.04389429671619098, "grad_norm": 2.685718536376953, "learning_rate": 2.998484037016509e-05, "loss": 0.1692, "step": 1992 }, { "epoch": 0.04391633200570714, "grad_norm": 3.7800652980804443, "learning_rate": 2.998479221430119e-05, "loss": 0.2335, "step": 1993 }, { "epoch": 0.0439383672952233, "grad_norm": 1.5911405086517334, "learning_rate": 2.99847439821117e-05, "loss": 0.1749, "step": 1994 }, { "epoch": 0.04396040258473946, "grad_norm": 2.051208257675171, "learning_rate": 2.998469567359687e-05, "loss": 0.1563, "step": 1995 }, { "epoch": 0.04398243787425562, "grad_norm": 1.9707437753677368, "learning_rate": 2.9984647288756957e-05, "loss": 0.2176, "step": 1996 }, { "epoch": 0.04400447316377178, "grad_norm": 2.5912890434265137, "learning_rate": 2.9984598827592195e-05, "loss": 0.2341, "step": 1997 }, { "epoch": 0.04402650845328794, "grad_norm": 2.5069448947906494, "learning_rate": 2.9984550290102842e-05, "loss": 0.1874, "step": 1998 }, { "epoch": 0.0440485437428041, "grad_norm": 1.8805489540100098, "learning_rate": 2.9984501676289133e-05, "loss": 0.1765, "step": 1999 }, { "epoch": 0.04407057903232026, "grad_norm": 2.158423900604248, "learning_rate": 2.9984452986151325e-05, "loss": 0.1753, "step": 2000 }, { "epoch": 0.04409261432183642, "grad_norm": 2.055086374282837, "learning_rate": 2.998440421968966e-05, "loss": 0.1901, "step": 2001 }, { "epoch": 0.04411464961135258, "grad_norm": 2.058084726333618, "learning_rate": 2.9984355376904394e-05, "loss": 0.1965, "step": 2002 }, { "epoch": 0.04413668490086874, "grad_norm": 1.6967847347259521, "learning_rate": 2.998430645779576e-05, "loss": 0.1985, "step": 2003 }, { "epoch": 0.0441587201903849, "grad_norm": 2.002208709716797, "learning_rate": 2.998425746236403e-05, "loss": 0.2309, "step": 2004 }, { "epoch": 0.04418075547990106, "grad_norm": 2.4479575157165527, "learning_rate": 2.998420839060944e-05, "loss": 0.1727, "step": 2005 }, { "epoch": 0.04420279076941722, "grad_norm": 2.3626909255981445, "learning_rate": 2.9984159242532236e-05, "loss": 0.2179, "step": 2006 }, { "epoch": 0.04422482605893338, "grad_norm": 2.0217342376708984, "learning_rate": 2.9984110018132672e-05, "loss": 0.1449, "step": 2007 }, { "epoch": 0.04424686134844954, "grad_norm": 2.126882314682007, "learning_rate": 2.9984060717411008e-05, "loss": 0.2126, "step": 2008 }, { "epoch": 0.0442688966379657, "grad_norm": 2.3693201541900635, "learning_rate": 2.9984011340367484e-05, "loss": 0.1986, "step": 2009 }, { "epoch": 0.04429093192748186, "grad_norm": 3.648294687271118, "learning_rate": 2.9983961887002355e-05, "loss": 0.2352, "step": 2010 }, { "epoch": 0.044312967216998025, "grad_norm": 1.7788747549057007, "learning_rate": 2.9983912357315874e-05, "loss": 0.1854, "step": 2011 }, { "epoch": 0.04433500250651418, "grad_norm": 1.8422585725784302, "learning_rate": 2.9983862751308292e-05, "loss": 0.1523, "step": 2012 }, { "epoch": 0.04435703779603034, "grad_norm": 1.7107486724853516, "learning_rate": 2.998381306897986e-05, "loss": 0.1932, "step": 2013 }, { "epoch": 0.044379073085546505, "grad_norm": 1.7970302104949951, "learning_rate": 2.9983763310330837e-05, "loss": 0.1309, "step": 2014 }, { "epoch": 0.04440110837506266, "grad_norm": 3.379906177520752, "learning_rate": 2.998371347536147e-05, "loss": 0.1826, "step": 2015 }, { "epoch": 0.04442314366457882, "grad_norm": 1.9496876001358032, "learning_rate": 2.9983663564072017e-05, "loss": 0.1802, "step": 2016 }, { "epoch": 0.044445178954094985, "grad_norm": 2.3452889919281006, "learning_rate": 2.9983613576462727e-05, "loss": 0.1721, "step": 2017 }, { "epoch": 0.04446721424361114, "grad_norm": 1.795641303062439, "learning_rate": 2.9983563512533864e-05, "loss": 0.1917, "step": 2018 }, { "epoch": 0.0444892495331273, "grad_norm": 2.069120168685913, "learning_rate": 2.9983513372285672e-05, "loss": 0.1841, "step": 2019 }, { "epoch": 0.044511284822643465, "grad_norm": 2.374434471130371, "learning_rate": 2.9983463155718412e-05, "loss": 0.1972, "step": 2020 }, { "epoch": 0.04453332011215962, "grad_norm": 2.2447638511657715, "learning_rate": 2.9983412862832346e-05, "loss": 0.1812, "step": 2021 }, { "epoch": 0.04455535540167578, "grad_norm": 2.5746121406555176, "learning_rate": 2.9983362493627716e-05, "loss": 0.1635, "step": 2022 }, { "epoch": 0.044577390691191945, "grad_norm": 1.8824840784072876, "learning_rate": 2.9983312048104788e-05, "loss": 0.1772, "step": 2023 }, { "epoch": 0.0445994259807081, "grad_norm": 2.4743993282318115, "learning_rate": 2.9983261526263818e-05, "loss": 0.1805, "step": 2024 }, { "epoch": 0.04462146127022426, "grad_norm": 2.3036551475524902, "learning_rate": 2.998321092810506e-05, "loss": 0.1685, "step": 2025 }, { "epoch": 0.044643496559740425, "grad_norm": 1.833315372467041, "learning_rate": 2.9983160253628778e-05, "loss": 0.1613, "step": 2026 }, { "epoch": 0.04466553184925658, "grad_norm": 2.3824524879455566, "learning_rate": 2.9983109502835223e-05, "loss": 0.2079, "step": 2027 }, { "epoch": 0.04468756713877275, "grad_norm": 2.017996072769165, "learning_rate": 2.998305867572466e-05, "loss": 0.1919, "step": 2028 }, { "epoch": 0.044709602428288905, "grad_norm": 2.349212169647217, "learning_rate": 2.9983007772297343e-05, "loss": 0.1422, "step": 2029 }, { "epoch": 0.04473163771780506, "grad_norm": 2.2135884761810303, "learning_rate": 2.998295679255353e-05, "loss": 0.2375, "step": 2030 }, { "epoch": 0.04475367300732123, "grad_norm": 1.835681676864624, "learning_rate": 2.998290573649349e-05, "loss": 0.1552, "step": 2031 }, { "epoch": 0.044775708296837385, "grad_norm": 2.119410514831543, "learning_rate": 2.998285460411747e-05, "loss": 0.1917, "step": 2032 }, { "epoch": 0.04479774358635354, "grad_norm": 2.5235581398010254, "learning_rate": 2.998280339542574e-05, "loss": 0.1744, "step": 2033 }, { "epoch": 0.04481977887586971, "grad_norm": 1.8905818462371826, "learning_rate": 2.9982752110418557e-05, "loss": 0.2028, "step": 2034 }, { "epoch": 0.044841814165385865, "grad_norm": 1.7235020399093628, "learning_rate": 2.9982700749096186e-05, "loss": 0.2007, "step": 2035 }, { "epoch": 0.04486384945490202, "grad_norm": 2.0233919620513916, "learning_rate": 2.9982649311458884e-05, "loss": 0.1869, "step": 2036 }, { "epoch": 0.04488588474441819, "grad_norm": 1.4767084121704102, "learning_rate": 2.9982597797506912e-05, "loss": 0.2052, "step": 2037 }, { "epoch": 0.044907920033934345, "grad_norm": 1.3286480903625488, "learning_rate": 2.9982546207240538e-05, "loss": 0.144, "step": 2038 }, { "epoch": 0.0449299553234505, "grad_norm": 3.229623317718506, "learning_rate": 2.998249454066002e-05, "loss": 0.2265, "step": 2039 }, { "epoch": 0.04495199061296667, "grad_norm": 1.8907825946807861, "learning_rate": 2.9982442797765622e-05, "loss": 0.1839, "step": 2040 }, { "epoch": 0.044974025902482825, "grad_norm": 2.5286476612091064, "learning_rate": 2.9982390978557613e-05, "loss": 0.1912, "step": 2041 }, { "epoch": 0.04499606119199898, "grad_norm": 1.8644145727157593, "learning_rate": 2.9982339083036252e-05, "loss": 0.2083, "step": 2042 }, { "epoch": 0.04501809648151515, "grad_norm": 1.9441059827804565, "learning_rate": 2.9982287111201803e-05, "loss": 0.2347, "step": 2043 }, { "epoch": 0.045040131771031305, "grad_norm": 2.3488423824310303, "learning_rate": 2.998223506305453e-05, "loss": 0.2025, "step": 2044 }, { "epoch": 0.04506216706054747, "grad_norm": 1.981846809387207, "learning_rate": 2.9982182938594698e-05, "loss": 0.1784, "step": 2045 }, { "epoch": 0.04508420235006363, "grad_norm": 1.4195104837417603, "learning_rate": 2.9982130737822578e-05, "loss": 0.1725, "step": 2046 }, { "epoch": 0.045106237639579785, "grad_norm": 2.0608127117156982, "learning_rate": 2.998207846073843e-05, "loss": 0.129, "step": 2047 }, { "epoch": 0.04512827292909595, "grad_norm": 2.1954734325408936, "learning_rate": 2.9982026107342523e-05, "loss": 0.1622, "step": 2048 }, { "epoch": 0.04515030821861211, "grad_norm": 1.577004313468933, "learning_rate": 2.9981973677635124e-05, "loss": 0.1625, "step": 2049 }, { "epoch": 0.045172343508128265, "grad_norm": 1.99764084815979, "learning_rate": 2.9981921171616498e-05, "loss": 0.1532, "step": 2050 }, { "epoch": 0.04519437879764443, "grad_norm": 1.8350701332092285, "learning_rate": 2.9981868589286914e-05, "loss": 0.1765, "step": 2051 }, { "epoch": 0.04521641408716059, "grad_norm": 1.4683164358139038, "learning_rate": 2.9981815930646638e-05, "loss": 0.1741, "step": 2052 }, { "epoch": 0.045238449376676745, "grad_norm": 2.3614726066589355, "learning_rate": 2.9981763195695945e-05, "loss": 0.1963, "step": 2053 }, { "epoch": 0.04526048466619291, "grad_norm": 2.180438995361328, "learning_rate": 2.9981710384435094e-05, "loss": 0.19, "step": 2054 }, { "epoch": 0.04528251995570907, "grad_norm": 1.6982135772705078, "learning_rate": 2.9981657496864357e-05, "loss": 0.1704, "step": 2055 }, { "epoch": 0.045304555245225225, "grad_norm": 2.0014476776123047, "learning_rate": 2.9981604532984005e-05, "loss": 0.1851, "step": 2056 }, { "epoch": 0.04532659053474139, "grad_norm": 1.6972578763961792, "learning_rate": 2.998155149279431e-05, "loss": 0.1937, "step": 2057 }, { "epoch": 0.04534862582425755, "grad_norm": 1.7539225816726685, "learning_rate": 2.9981498376295534e-05, "loss": 0.2147, "step": 2058 }, { "epoch": 0.045370661113773705, "grad_norm": 2.5733156204223633, "learning_rate": 2.9981445183487957e-05, "loss": 0.1935, "step": 2059 }, { "epoch": 0.04539269640328987, "grad_norm": 1.5451236963272095, "learning_rate": 2.9981391914371846e-05, "loss": 0.1713, "step": 2060 }, { "epoch": 0.04541473169280603, "grad_norm": 1.2458642721176147, "learning_rate": 2.9981338568947475e-05, "loss": 0.1403, "step": 2061 }, { "epoch": 0.04543676698232219, "grad_norm": 1.6351298093795776, "learning_rate": 2.9981285147215108e-05, "loss": 0.171, "step": 2062 }, { "epoch": 0.04545880227183835, "grad_norm": 1.45901358127594, "learning_rate": 2.9981231649175024e-05, "loss": 0.2436, "step": 2063 }, { "epoch": 0.04548083756135451, "grad_norm": 2.118612289428711, "learning_rate": 2.9981178074827494e-05, "loss": 0.1918, "step": 2064 }, { "epoch": 0.04550287285087067, "grad_norm": 1.390524983406067, "learning_rate": 2.998112442417279e-05, "loss": 0.169, "step": 2065 }, { "epoch": 0.04552490814038683, "grad_norm": 3.227787733078003, "learning_rate": 2.9981070697211187e-05, "loss": 0.1524, "step": 2066 }, { "epoch": 0.04554694342990299, "grad_norm": 1.4316824674606323, "learning_rate": 2.9981016893942957e-05, "loss": 0.1095, "step": 2067 }, { "epoch": 0.04556897871941915, "grad_norm": 1.867021918296814, "learning_rate": 2.998096301436837e-05, "loss": 0.2367, "step": 2068 }, { "epoch": 0.04559101400893531, "grad_norm": 2.2060916423797607, "learning_rate": 2.9980909058487714e-05, "loss": 0.1927, "step": 2069 }, { "epoch": 0.04561304929845147, "grad_norm": 2.562835454940796, "learning_rate": 2.998085502630125e-05, "loss": 0.2053, "step": 2070 }, { "epoch": 0.04563508458796763, "grad_norm": 1.825291633605957, "learning_rate": 2.9980800917809258e-05, "loss": 0.1599, "step": 2071 }, { "epoch": 0.04565711987748379, "grad_norm": 2.0226309299468994, "learning_rate": 2.9980746733012014e-05, "loss": 0.1677, "step": 2072 }, { "epoch": 0.04567915516699995, "grad_norm": 1.9573113918304443, "learning_rate": 2.998069247190979e-05, "loss": 0.1508, "step": 2073 }, { "epoch": 0.04570119045651611, "grad_norm": 2.0857598781585693, "learning_rate": 2.998063813450287e-05, "loss": 0.2132, "step": 2074 }, { "epoch": 0.04572322574603227, "grad_norm": 2.936612129211426, "learning_rate": 2.9980583720791525e-05, "loss": 0.2172, "step": 2075 }, { "epoch": 0.04574526103554843, "grad_norm": 2.0482378005981445, "learning_rate": 2.9980529230776032e-05, "loss": 0.1746, "step": 2076 }, { "epoch": 0.04576729632506459, "grad_norm": 1.3025821447372437, "learning_rate": 2.9980474664456677e-05, "loss": 0.1714, "step": 2077 }, { "epoch": 0.04578933161458075, "grad_norm": 2.2196407318115234, "learning_rate": 2.9980420021833728e-05, "loss": 0.1978, "step": 2078 }, { "epoch": 0.045811366904096915, "grad_norm": 1.6754134893417358, "learning_rate": 2.9980365302907465e-05, "loss": 0.1632, "step": 2079 }, { "epoch": 0.04583340219361307, "grad_norm": 1.5505187511444092, "learning_rate": 2.9980310507678168e-05, "loss": 0.1512, "step": 2080 }, { "epoch": 0.04585543748312923, "grad_norm": 1.5833866596221924, "learning_rate": 2.9980255636146116e-05, "loss": 0.179, "step": 2081 }, { "epoch": 0.045877472772645395, "grad_norm": 2.0721256732940674, "learning_rate": 2.9980200688311592e-05, "loss": 0.1836, "step": 2082 }, { "epoch": 0.04589950806216155, "grad_norm": 2.0435962677001953, "learning_rate": 2.9980145664174872e-05, "loss": 0.189, "step": 2083 }, { "epoch": 0.04592154335167771, "grad_norm": 1.8264813423156738, "learning_rate": 2.998009056373624e-05, "loss": 0.1653, "step": 2084 }, { "epoch": 0.045943578641193875, "grad_norm": 1.832947015762329, "learning_rate": 2.998003538699597e-05, "loss": 0.1713, "step": 2085 }, { "epoch": 0.04596561393071003, "grad_norm": 1.870182991027832, "learning_rate": 2.9979980133954347e-05, "loss": 0.1769, "step": 2086 }, { "epoch": 0.04598764922022619, "grad_norm": 1.8951828479766846, "learning_rate": 2.9979924804611653e-05, "loss": 0.2008, "step": 2087 }, { "epoch": 0.046009684509742355, "grad_norm": 1.9100546836853027, "learning_rate": 2.9979869398968167e-05, "loss": 0.195, "step": 2088 }, { "epoch": 0.04603171979925851, "grad_norm": 1.758140206336975, "learning_rate": 2.9979813917024174e-05, "loss": 0.1964, "step": 2089 }, { "epoch": 0.04605375508877467, "grad_norm": 2.196054697036743, "learning_rate": 2.997975835877996e-05, "loss": 0.1894, "step": 2090 }, { "epoch": 0.046075790378290835, "grad_norm": 2.3186323642730713, "learning_rate": 2.9979702724235802e-05, "loss": 0.214, "step": 2091 }, { "epoch": 0.04609782566780699, "grad_norm": 1.809166431427002, "learning_rate": 2.9979647013391983e-05, "loss": 0.2227, "step": 2092 }, { "epoch": 0.04611986095732315, "grad_norm": 1.7103297710418701, "learning_rate": 2.9979591226248787e-05, "loss": 0.1594, "step": 2093 }, { "epoch": 0.046141896246839315, "grad_norm": 1.8289560079574585, "learning_rate": 2.9979535362806508e-05, "loss": 0.2288, "step": 2094 }, { "epoch": 0.04616393153635547, "grad_norm": 1.4369052648544312, "learning_rate": 2.9979479423065417e-05, "loss": 0.1978, "step": 2095 }, { "epoch": 0.04618596682587164, "grad_norm": 2.0039422512054443, "learning_rate": 2.9979423407025805e-05, "loss": 0.1694, "step": 2096 }, { "epoch": 0.046208002115387795, "grad_norm": 1.8716015815734863, "learning_rate": 2.997936731468796e-05, "loss": 0.1163, "step": 2097 }, { "epoch": 0.04623003740490395, "grad_norm": 1.0700079202651978, "learning_rate": 2.997931114605216e-05, "loss": 0.1461, "step": 2098 }, { "epoch": 0.04625207269442012, "grad_norm": 1.3556705713272095, "learning_rate": 2.99792549011187e-05, "loss": 0.1624, "step": 2099 }, { "epoch": 0.046274107983936275, "grad_norm": 2.5020875930786133, "learning_rate": 2.997919857988786e-05, "loss": 0.1812, "step": 2100 }, { "epoch": 0.04629614327345243, "grad_norm": 1.4687423706054688, "learning_rate": 2.997914218235993e-05, "loss": 0.1623, "step": 2101 }, { "epoch": 0.0463181785629686, "grad_norm": 1.9493725299835205, "learning_rate": 2.9979085708535198e-05, "loss": 0.165, "step": 2102 }, { "epoch": 0.046340213852484755, "grad_norm": 2.142267942428589, "learning_rate": 2.9979029158413946e-05, "loss": 0.1917, "step": 2103 }, { "epoch": 0.04636224914200091, "grad_norm": 1.6780908107757568, "learning_rate": 2.997897253199647e-05, "loss": 0.1872, "step": 2104 }, { "epoch": 0.04638428443151708, "grad_norm": 1.6073884963989258, "learning_rate": 2.9978915829283053e-05, "loss": 0.141, "step": 2105 }, { "epoch": 0.046406319721033235, "grad_norm": 1.6187978982925415, "learning_rate": 2.9978859050273987e-05, "loss": 0.1913, "step": 2106 }, { "epoch": 0.04642835501054939, "grad_norm": 1.7268908023834229, "learning_rate": 2.9978802194969553e-05, "loss": 0.2395, "step": 2107 }, { "epoch": 0.04645039030006556, "grad_norm": 1.767960548400879, "learning_rate": 2.9978745263370053e-05, "loss": 0.1739, "step": 2108 }, { "epoch": 0.046472425589581715, "grad_norm": 3.575988292694092, "learning_rate": 2.997868825547577e-05, "loss": 0.1813, "step": 2109 }, { "epoch": 0.04649446087909787, "grad_norm": 2.1649277210235596, "learning_rate": 2.9978631171287e-05, "loss": 0.2, "step": 2110 }, { "epoch": 0.04651649616861404, "grad_norm": 2.246314525604248, "learning_rate": 2.997857401080402e-05, "loss": 0.1493, "step": 2111 }, { "epoch": 0.046538531458130195, "grad_norm": 1.890142798423767, "learning_rate": 2.997851677402714e-05, "loss": 0.1958, "step": 2112 }, { "epoch": 0.04656056674764635, "grad_norm": 1.7056506872177124, "learning_rate": 2.9978459460956633e-05, "loss": 0.1483, "step": 2113 }, { "epoch": 0.04658260203716252, "grad_norm": 1.6703808307647705, "learning_rate": 2.9978402071592807e-05, "loss": 0.1657, "step": 2114 }, { "epoch": 0.046604637326678675, "grad_norm": 2.331068515777588, "learning_rate": 2.9978344605935943e-05, "loss": 0.1638, "step": 2115 }, { "epoch": 0.04662667261619484, "grad_norm": 1.660488247871399, "learning_rate": 2.9978287063986338e-05, "loss": 0.1072, "step": 2116 }, { "epoch": 0.046648707905711, "grad_norm": 1.752854585647583, "learning_rate": 2.9978229445744288e-05, "loss": 0.2202, "step": 2117 }, { "epoch": 0.046670743195227155, "grad_norm": 2.11161732673645, "learning_rate": 2.9978171751210083e-05, "loss": 0.203, "step": 2118 }, { "epoch": 0.04669277848474332, "grad_norm": 2.9149250984191895, "learning_rate": 2.9978113980384013e-05, "loss": 0.2498, "step": 2119 }, { "epoch": 0.04671481377425948, "grad_norm": 3.231614351272583, "learning_rate": 2.997805613326638e-05, "loss": 0.153, "step": 2120 }, { "epoch": 0.046736849063775635, "grad_norm": 2.2344319820404053, "learning_rate": 2.9977998209857476e-05, "loss": 0.1867, "step": 2121 }, { "epoch": 0.0467588843532918, "grad_norm": 1.5826764106750488, "learning_rate": 2.9977940210157594e-05, "loss": 0.1942, "step": 2122 }, { "epoch": 0.04678091964280796, "grad_norm": 1.5414133071899414, "learning_rate": 2.997788213416703e-05, "loss": 0.1787, "step": 2123 }, { "epoch": 0.046802954932324115, "grad_norm": 2.334760904312134, "learning_rate": 2.9977823981886084e-05, "loss": 0.1567, "step": 2124 }, { "epoch": 0.04682499022184028, "grad_norm": 1.7654284238815308, "learning_rate": 2.9977765753315044e-05, "loss": 0.1947, "step": 2125 }, { "epoch": 0.04684702551135644, "grad_norm": 1.7751580476760864, "learning_rate": 2.9977707448454213e-05, "loss": 0.1931, "step": 2126 }, { "epoch": 0.046869060800872595, "grad_norm": 2.2111690044403076, "learning_rate": 2.9977649067303886e-05, "loss": 0.1737, "step": 2127 }, { "epoch": 0.04689109609038876, "grad_norm": 1.628325343132019, "learning_rate": 2.9977590609864364e-05, "loss": 0.1697, "step": 2128 }, { "epoch": 0.04691313137990492, "grad_norm": 1.5285600423812866, "learning_rate": 2.9977532076135936e-05, "loss": 0.2086, "step": 2129 }, { "epoch": 0.046935166669421075, "grad_norm": 2.792679786682129, "learning_rate": 2.997747346611891e-05, "loss": 0.1888, "step": 2130 }, { "epoch": 0.04695720195893724, "grad_norm": 1.3448984622955322, "learning_rate": 2.997741477981358e-05, "loss": 0.1568, "step": 2131 }, { "epoch": 0.0469792372484534, "grad_norm": 1.4624087810516357, "learning_rate": 2.9977356017220242e-05, "loss": 0.1527, "step": 2132 }, { "epoch": 0.04700127253796956, "grad_norm": 2.2697536945343018, "learning_rate": 2.99772971783392e-05, "loss": 0.2086, "step": 2133 }, { "epoch": 0.04702330782748572, "grad_norm": 2.2985169887542725, "learning_rate": 2.997723826317075e-05, "loss": 0.1997, "step": 2134 }, { "epoch": 0.04704534311700188, "grad_norm": 2.2318899631500244, "learning_rate": 2.9977179271715197e-05, "loss": 0.195, "step": 2135 }, { "epoch": 0.04706737840651804, "grad_norm": 2.077385187149048, "learning_rate": 2.9977120203972838e-05, "loss": 0.1796, "step": 2136 }, { "epoch": 0.0470894136960342, "grad_norm": 1.6101807355880737, "learning_rate": 2.997706105994397e-05, "loss": 0.1798, "step": 2137 }, { "epoch": 0.04711144898555036, "grad_norm": 2.0199849605560303, "learning_rate": 2.9977001839628907e-05, "loss": 0.1929, "step": 2138 }, { "epoch": 0.04713348427506652, "grad_norm": 2.4232847690582275, "learning_rate": 2.9976942543027936e-05, "loss": 0.2327, "step": 2139 }, { "epoch": 0.04715551956458268, "grad_norm": 2.2169792652130127, "learning_rate": 2.9976883170141368e-05, "loss": 0.1285, "step": 2140 }, { "epoch": 0.04717755485409884, "grad_norm": 2.1676747798919678, "learning_rate": 2.99768237209695e-05, "loss": 0.1306, "step": 2141 }, { "epoch": 0.047199590143615, "grad_norm": 2.5208210945129395, "learning_rate": 2.9976764195512642e-05, "loss": 0.1567, "step": 2142 }, { "epoch": 0.04722162543313116, "grad_norm": 2.140056848526001, "learning_rate": 2.9976704593771087e-05, "loss": 0.1718, "step": 2143 }, { "epoch": 0.04724366072264732, "grad_norm": 2.305865526199341, "learning_rate": 2.997664491574515e-05, "loss": 0.2032, "step": 2144 }, { "epoch": 0.04726569601216348, "grad_norm": 2.335604429244995, "learning_rate": 2.9976585161435124e-05, "loss": 0.1504, "step": 2145 }, { "epoch": 0.04728773130167964, "grad_norm": 1.9043959379196167, "learning_rate": 2.9976525330841323e-05, "loss": 0.1724, "step": 2146 }, { "epoch": 0.0473097665911958, "grad_norm": 1.5400025844573975, "learning_rate": 2.9976465423964047e-05, "loss": 0.1845, "step": 2147 }, { "epoch": 0.04733180188071196, "grad_norm": 1.8918979167938232, "learning_rate": 2.99764054408036e-05, "loss": 0.1973, "step": 2148 }, { "epoch": 0.04735383717022812, "grad_norm": 1.632961392402649, "learning_rate": 2.9976345381360287e-05, "loss": 0.2033, "step": 2149 }, { "epoch": 0.047375872459744284, "grad_norm": 1.4050004482269287, "learning_rate": 2.9976285245634424e-05, "loss": 0.1897, "step": 2150 }, { "epoch": 0.04739790774926044, "grad_norm": 2.44889760017395, "learning_rate": 2.9976225033626305e-05, "loss": 0.2124, "step": 2151 }, { "epoch": 0.0474199430387766, "grad_norm": 2.469829559326172, "learning_rate": 2.9976164745336238e-05, "loss": 0.1715, "step": 2152 }, { "epoch": 0.047441978328292764, "grad_norm": 3.185238838195801, "learning_rate": 2.9976104380764536e-05, "loss": 0.2077, "step": 2153 }, { "epoch": 0.04746401361780892, "grad_norm": 2.4938673973083496, "learning_rate": 2.99760439399115e-05, "loss": 0.2157, "step": 2154 }, { "epoch": 0.04748604890732508, "grad_norm": 1.8948007822036743, "learning_rate": 2.9975983422777447e-05, "loss": 0.1522, "step": 2155 }, { "epoch": 0.047508084196841244, "grad_norm": 2.061250925064087, "learning_rate": 2.9975922829362677e-05, "loss": 0.1859, "step": 2156 }, { "epoch": 0.0475301194863574, "grad_norm": 1.9795057773590088, "learning_rate": 2.9975862159667498e-05, "loss": 0.1668, "step": 2157 }, { "epoch": 0.04755215477587356, "grad_norm": 2.042806386947632, "learning_rate": 2.9975801413692224e-05, "loss": 0.1561, "step": 2158 }, { "epoch": 0.047574190065389724, "grad_norm": 1.6567738056182861, "learning_rate": 2.9975740591437165e-05, "loss": 0.2256, "step": 2159 }, { "epoch": 0.04759622535490588, "grad_norm": 2.0298869609832764, "learning_rate": 2.9975679692902632e-05, "loss": 0.1473, "step": 2160 }, { "epoch": 0.04761826064442204, "grad_norm": 1.342381238937378, "learning_rate": 2.9975618718088926e-05, "loss": 0.1645, "step": 2161 }, { "epoch": 0.047640295933938204, "grad_norm": 1.9161208868026733, "learning_rate": 2.9975557666996366e-05, "loss": 0.2048, "step": 2162 }, { "epoch": 0.04766233122345436, "grad_norm": 1.570351004600525, "learning_rate": 2.9975496539625257e-05, "loss": 0.1803, "step": 2163 }, { "epoch": 0.04768436651297052, "grad_norm": 1.5947623252868652, "learning_rate": 2.9975435335975913e-05, "loss": 0.117, "step": 2164 }, { "epoch": 0.047706401802486684, "grad_norm": 2.0516631603240967, "learning_rate": 2.997537405604865e-05, "loss": 0.1664, "step": 2165 }, { "epoch": 0.04772843709200284, "grad_norm": 2.230271577835083, "learning_rate": 2.9975312699843775e-05, "loss": 0.2138, "step": 2166 }, { "epoch": 0.047750472381519006, "grad_norm": 2.634995222091675, "learning_rate": 2.9975251267361603e-05, "loss": 0.2003, "step": 2167 }, { "epoch": 0.047772507671035164, "grad_norm": 2.2997145652770996, "learning_rate": 2.9975189758602447e-05, "loss": 0.1633, "step": 2168 }, { "epoch": 0.04779454296055132, "grad_norm": 2.2912380695343018, "learning_rate": 2.9975128173566616e-05, "loss": 0.2029, "step": 2169 }, { "epoch": 0.047816578250067486, "grad_norm": 2.079495429992676, "learning_rate": 2.997506651225443e-05, "loss": 0.183, "step": 2170 }, { "epoch": 0.047838613539583644, "grad_norm": 1.5313202142715454, "learning_rate": 2.99750047746662e-05, "loss": 0.1712, "step": 2171 }, { "epoch": 0.0478606488290998, "grad_norm": 1.2596193552017212, "learning_rate": 2.997494296080224e-05, "loss": 0.138, "step": 2172 }, { "epoch": 0.047882684118615966, "grad_norm": 1.8475587368011475, "learning_rate": 2.9974881070662864e-05, "loss": 0.1644, "step": 2173 }, { "epoch": 0.047904719408132124, "grad_norm": 2.352276563644409, "learning_rate": 2.9974819104248385e-05, "loss": 0.2069, "step": 2174 }, { "epoch": 0.04792675469764828, "grad_norm": 2.115828275680542, "learning_rate": 2.9974757061559128e-05, "loss": 0.1766, "step": 2175 }, { "epoch": 0.047948789987164446, "grad_norm": 1.8605713844299316, "learning_rate": 2.99746949425954e-05, "loss": 0.1967, "step": 2176 }, { "epoch": 0.047970825276680604, "grad_norm": 1.4968047142028809, "learning_rate": 2.9974632747357522e-05, "loss": 0.1765, "step": 2177 }, { "epoch": 0.04799286056619676, "grad_norm": 1.8273663520812988, "learning_rate": 2.9974570475845814e-05, "loss": 0.2007, "step": 2178 }, { "epoch": 0.048014895855712926, "grad_norm": 2.8607571125030518, "learning_rate": 2.997450812806058e-05, "loss": 0.2355, "step": 2179 }, { "epoch": 0.048036931145229084, "grad_norm": 1.9081579446792603, "learning_rate": 2.997444570400215e-05, "loss": 0.1891, "step": 2180 }, { "epoch": 0.04805896643474524, "grad_norm": 1.5396188497543335, "learning_rate": 2.997438320367084e-05, "loss": 0.1836, "step": 2181 }, { "epoch": 0.048081001724261406, "grad_norm": 3.26745343208313, "learning_rate": 2.9974320627066965e-05, "loss": 0.1887, "step": 2182 }, { "epoch": 0.048103037013777564, "grad_norm": 2.093717336654663, "learning_rate": 2.9974257974190842e-05, "loss": 0.1846, "step": 2183 }, { "epoch": 0.04812507230329373, "grad_norm": 1.9059534072875977, "learning_rate": 2.9974195245042798e-05, "loss": 0.1913, "step": 2184 }, { "epoch": 0.048147107592809886, "grad_norm": 1.6843832731246948, "learning_rate": 2.9974132439623146e-05, "loss": 0.1856, "step": 2185 }, { "epoch": 0.048169142882326044, "grad_norm": 2.7338130474090576, "learning_rate": 2.9974069557932203e-05, "loss": 0.172, "step": 2186 }, { "epoch": 0.04819117817184221, "grad_norm": 2.0378901958465576, "learning_rate": 2.99740065999703e-05, "loss": 0.1846, "step": 2187 }, { "epoch": 0.048213213461358366, "grad_norm": 1.9202189445495605, "learning_rate": 2.9973943565737753e-05, "loss": 0.1555, "step": 2188 }, { "epoch": 0.048235248750874524, "grad_norm": 1.0405933856964111, "learning_rate": 2.9973880455234876e-05, "loss": 0.1486, "step": 2189 }, { "epoch": 0.04825728404039069, "grad_norm": 1.5024127960205078, "learning_rate": 2.9973817268462e-05, "loss": 0.1572, "step": 2190 }, { "epoch": 0.048279319329906846, "grad_norm": 1.6337788105010986, "learning_rate": 2.9973754005419444e-05, "loss": 0.1511, "step": 2191 }, { "epoch": 0.048301354619423004, "grad_norm": 1.8653346300125122, "learning_rate": 2.9973690666107526e-05, "loss": 0.1745, "step": 2192 }, { "epoch": 0.04832338990893917, "grad_norm": 1.6159164905548096, "learning_rate": 2.9973627250526577e-05, "loss": 0.2201, "step": 2193 }, { "epoch": 0.048345425198455326, "grad_norm": 2.3006012439727783, "learning_rate": 2.9973563758676907e-05, "loss": 0.185, "step": 2194 }, { "epoch": 0.048367460487971484, "grad_norm": 1.754709005355835, "learning_rate": 2.9973500190558852e-05, "loss": 0.1612, "step": 2195 }, { "epoch": 0.04838949577748765, "grad_norm": 1.9184231758117676, "learning_rate": 2.997343654617273e-05, "loss": 0.1533, "step": 2196 }, { "epoch": 0.048411531067003806, "grad_norm": 1.4372689723968506, "learning_rate": 2.9973372825518866e-05, "loss": 0.1835, "step": 2197 }, { "epoch": 0.048433566356519964, "grad_norm": 2.057863473892212, "learning_rate": 2.997330902859759e-05, "loss": 0.2016, "step": 2198 }, { "epoch": 0.04845560164603613, "grad_norm": 1.913713812828064, "learning_rate": 2.9973245155409217e-05, "loss": 0.1649, "step": 2199 }, { "epoch": 0.048477636935552286, "grad_norm": 2.1378910541534424, "learning_rate": 2.997318120595408e-05, "loss": 0.1861, "step": 2200 }, { "epoch": 0.048499672225068444, "grad_norm": 2.9651265144348145, "learning_rate": 2.9973117180232493e-05, "loss": 0.2128, "step": 2201 }, { "epoch": 0.04852170751458461, "grad_norm": 1.8998123407363892, "learning_rate": 2.99730530782448e-05, "loss": 0.1374, "step": 2202 }, { "epoch": 0.048543742804100766, "grad_norm": 1.512853980064392, "learning_rate": 2.9972988899991317e-05, "loss": 0.1554, "step": 2203 }, { "epoch": 0.04856577809361693, "grad_norm": 1.7645938396453857, "learning_rate": 2.9972924645472372e-05, "loss": 0.1772, "step": 2204 }, { "epoch": 0.04858781338313309, "grad_norm": 1.6421009302139282, "learning_rate": 2.9972860314688294e-05, "loss": 0.1584, "step": 2205 }, { "epoch": 0.048609848672649246, "grad_norm": 1.3395322561264038, "learning_rate": 2.9972795907639407e-05, "loss": 0.1435, "step": 2206 }, { "epoch": 0.04863188396216541, "grad_norm": 1.7451319694519043, "learning_rate": 2.997273142432604e-05, "loss": 0.1669, "step": 2207 }, { "epoch": 0.04865391925168157, "grad_norm": 2.205073118209839, "learning_rate": 2.9972666864748525e-05, "loss": 0.1782, "step": 2208 }, { "epoch": 0.048675954541197726, "grad_norm": 1.8027243614196777, "learning_rate": 2.9972602228907185e-05, "loss": 0.2075, "step": 2209 }, { "epoch": 0.04869798983071389, "grad_norm": 2.5348613262176514, "learning_rate": 2.997253751680236e-05, "loss": 0.198, "step": 2210 }, { "epoch": 0.04872002512023005, "grad_norm": 2.063133955001831, "learning_rate": 2.9972472728434367e-05, "loss": 0.1709, "step": 2211 }, { "epoch": 0.048742060409746206, "grad_norm": 1.363233208656311, "learning_rate": 2.9972407863803546e-05, "loss": 0.168, "step": 2212 }, { "epoch": 0.04876409569926237, "grad_norm": 1.8446871042251587, "learning_rate": 2.997234292291022e-05, "loss": 0.1699, "step": 2213 }, { "epoch": 0.04878613098877853, "grad_norm": 1.6307531595230103, "learning_rate": 2.9972277905754724e-05, "loss": 0.1574, "step": 2214 }, { "epoch": 0.048808166278294686, "grad_norm": 1.9244418144226074, "learning_rate": 2.9972212812337384e-05, "loss": 0.1278, "step": 2215 }, { "epoch": 0.04883020156781085, "grad_norm": 1.0372105836868286, "learning_rate": 2.997214764265854e-05, "loss": 0.1536, "step": 2216 }, { "epoch": 0.04885223685732701, "grad_norm": 1.9560108184814453, "learning_rate": 2.9972082396718518e-05, "loss": 0.1508, "step": 2217 }, { "epoch": 0.048874272146843166, "grad_norm": 2.038074016571045, "learning_rate": 2.997201707451765e-05, "loss": 0.1258, "step": 2218 }, { "epoch": 0.04889630743635933, "grad_norm": 1.8331972360610962, "learning_rate": 2.997195167605627e-05, "loss": 0.1953, "step": 2219 }, { "epoch": 0.04891834272587549, "grad_norm": 1.6507664918899536, "learning_rate": 2.9971886201334712e-05, "loss": 0.1851, "step": 2220 }, { "epoch": 0.04894037801539165, "grad_norm": 1.5801055431365967, "learning_rate": 2.9971820650353314e-05, "loss": 0.1464, "step": 2221 }, { "epoch": 0.04896241330490781, "grad_norm": 1.293305516242981, "learning_rate": 2.9971755023112402e-05, "loss": 0.1707, "step": 2222 }, { "epoch": 0.04898444859442397, "grad_norm": 1.837126612663269, "learning_rate": 2.9971689319612307e-05, "loss": 0.2063, "step": 2223 }, { "epoch": 0.04900648388394013, "grad_norm": 1.8592627048492432, "learning_rate": 2.997162353985338e-05, "loss": 0.1866, "step": 2224 }, { "epoch": 0.04902851917345629, "grad_norm": 1.8133010864257812, "learning_rate": 2.9971557683835938e-05, "loss": 0.2038, "step": 2225 }, { "epoch": 0.04905055446297245, "grad_norm": 2.4563376903533936, "learning_rate": 2.9971491751560328e-05, "loss": 0.1588, "step": 2226 }, { "epoch": 0.04907258975248861, "grad_norm": 1.5896400213241577, "learning_rate": 2.9971425743026884e-05, "loss": 0.2053, "step": 2227 }, { "epoch": 0.04909462504200477, "grad_norm": 3.513277053833008, "learning_rate": 2.9971359658235938e-05, "loss": 0.1902, "step": 2228 }, { "epoch": 0.04911666033152093, "grad_norm": 1.6793426275253296, "learning_rate": 2.9971293497187825e-05, "loss": 0.2233, "step": 2229 }, { "epoch": 0.04913869562103709, "grad_norm": 1.6419342756271362, "learning_rate": 2.997122725988289e-05, "loss": 0.1636, "step": 2230 }, { "epoch": 0.04916073091055325, "grad_norm": 2.1516289710998535, "learning_rate": 2.9971160946321468e-05, "loss": 0.1745, "step": 2231 }, { "epoch": 0.04918276620006941, "grad_norm": 1.9192301034927368, "learning_rate": 2.997109455650389e-05, "loss": 0.2004, "step": 2232 }, { "epoch": 0.04920480148958557, "grad_norm": 2.4643194675445557, "learning_rate": 2.9971028090430506e-05, "loss": 0.2168, "step": 2233 }, { "epoch": 0.04922683677910173, "grad_norm": 1.5616497993469238, "learning_rate": 2.9970961548101645e-05, "loss": 0.1648, "step": 2234 }, { "epoch": 0.04924887206861789, "grad_norm": 1.6614863872528076, "learning_rate": 2.9970894929517647e-05, "loss": 0.1536, "step": 2235 }, { "epoch": 0.04927090735813405, "grad_norm": 1.5426658391952515, "learning_rate": 2.9970828234678855e-05, "loss": 0.183, "step": 2236 }, { "epoch": 0.04929294264765021, "grad_norm": 1.6818604469299316, "learning_rate": 2.9970761463585602e-05, "loss": 0.1708, "step": 2237 }, { "epoch": 0.049314977937166375, "grad_norm": 2.21270489692688, "learning_rate": 2.997069461623824e-05, "loss": 0.1524, "step": 2238 }, { "epoch": 0.04933701322668253, "grad_norm": 1.8897753953933716, "learning_rate": 2.9970627692637097e-05, "loss": 0.1273, "step": 2239 }, { "epoch": 0.04935904851619869, "grad_norm": 2.0411767959594727, "learning_rate": 2.9970560692782523e-05, "loss": 0.1574, "step": 2240 }, { "epoch": 0.049381083805714855, "grad_norm": 2.6011431217193604, "learning_rate": 2.9970493616674854e-05, "loss": 0.203, "step": 2241 }, { "epoch": 0.04940311909523101, "grad_norm": 2.247560739517212, "learning_rate": 2.9970426464314434e-05, "loss": 0.2052, "step": 2242 }, { "epoch": 0.04942515438474717, "grad_norm": 2.0115647315979004, "learning_rate": 2.9970359235701603e-05, "loss": 0.1665, "step": 2243 }, { "epoch": 0.049447189674263335, "grad_norm": 2.792348861694336, "learning_rate": 2.997029193083671e-05, "loss": 0.1761, "step": 2244 }, { "epoch": 0.04946922496377949, "grad_norm": 1.323501467704773, "learning_rate": 2.9970224549720082e-05, "loss": 0.1395, "step": 2245 }, { "epoch": 0.04949126025329565, "grad_norm": 1.419264793395996, "learning_rate": 2.9970157092352082e-05, "loss": 0.1827, "step": 2246 }, { "epoch": 0.049513295542811815, "grad_norm": 1.7326180934906006, "learning_rate": 2.9970089558733042e-05, "loss": 0.1802, "step": 2247 }, { "epoch": 0.04953533083232797, "grad_norm": 1.8773893117904663, "learning_rate": 2.9970021948863306e-05, "loss": 0.1729, "step": 2248 }, { "epoch": 0.04955736612184413, "grad_norm": 1.7239066362380981, "learning_rate": 2.9969954262743224e-05, "loss": 0.1512, "step": 2249 }, { "epoch": 0.049579401411360295, "grad_norm": 1.8641421794891357, "learning_rate": 2.996988650037313e-05, "loss": 0.1509, "step": 2250 }, { "epoch": 0.04960143670087645, "grad_norm": 3.5911476612091064, "learning_rate": 2.9969818661753382e-05, "loss": 0.1715, "step": 2251 }, { "epoch": 0.04962347199039261, "grad_norm": 1.8406556844711304, "learning_rate": 2.996975074688432e-05, "loss": 0.1576, "step": 2252 }, { "epoch": 0.049645507279908775, "grad_norm": 1.8380881547927856, "learning_rate": 2.9969682755766292e-05, "loss": 0.1797, "step": 2253 }, { "epoch": 0.04966754256942493, "grad_norm": 1.6369097232818604, "learning_rate": 2.9969614688399638e-05, "loss": 0.1491, "step": 2254 }, { "epoch": 0.0496895778589411, "grad_norm": 2.0056309700012207, "learning_rate": 2.9969546544784714e-05, "loss": 0.142, "step": 2255 }, { "epoch": 0.049711613148457255, "grad_norm": 1.9256089925765991, "learning_rate": 2.9969478324921854e-05, "loss": 0.2058, "step": 2256 }, { "epoch": 0.04973364843797341, "grad_norm": 1.5185409784317017, "learning_rate": 2.996941002881142e-05, "loss": 0.1803, "step": 2257 }, { "epoch": 0.04975568372748958, "grad_norm": 2.1110129356384277, "learning_rate": 2.9969341656453753e-05, "loss": 0.181, "step": 2258 }, { "epoch": 0.049777719017005735, "grad_norm": 2.3829174041748047, "learning_rate": 2.99692732078492e-05, "loss": 0.1484, "step": 2259 }, { "epoch": 0.04979975430652189, "grad_norm": 2.0686988830566406, "learning_rate": 2.9969204682998114e-05, "loss": 0.1377, "step": 2260 }, { "epoch": 0.04982178959603806, "grad_norm": 2.1694064140319824, "learning_rate": 2.9969136081900838e-05, "loss": 0.2037, "step": 2261 }, { "epoch": 0.049843824885554215, "grad_norm": 1.2348510026931763, "learning_rate": 2.996906740455773e-05, "loss": 0.1896, "step": 2262 }, { "epoch": 0.04986586017507037, "grad_norm": 1.307726502418518, "learning_rate": 2.996899865096913e-05, "loss": 0.1431, "step": 2263 }, { "epoch": 0.04988789546458654, "grad_norm": 2.1983530521392822, "learning_rate": 2.99689298211354e-05, "loss": 0.1484, "step": 2264 }, { "epoch": 0.049909930754102695, "grad_norm": 1.649925947189331, "learning_rate": 2.9968860915056873e-05, "loss": 0.1362, "step": 2265 }, { "epoch": 0.04993196604361885, "grad_norm": 2.516263008117676, "learning_rate": 2.9968791932733913e-05, "loss": 0.1605, "step": 2266 }, { "epoch": 0.04995400133313502, "grad_norm": 1.9826009273529053, "learning_rate": 2.9968722874166875e-05, "loss": 0.2205, "step": 2267 }, { "epoch": 0.049976036622651175, "grad_norm": 1.2019950151443481, "learning_rate": 2.99686537393561e-05, "loss": 0.1708, "step": 2268 }, { "epoch": 0.04999807191216733, "grad_norm": 2.411120653152466, "learning_rate": 2.9968584528301946e-05, "loss": 0.1367, "step": 2269 }, { "epoch": 0.0500201072016835, "grad_norm": 1.8298044204711914, "learning_rate": 2.9968515241004764e-05, "loss": 0.1869, "step": 2270 }, { "epoch": 0.050042142491199655, "grad_norm": 1.602998971939087, "learning_rate": 2.9968445877464905e-05, "loss": 0.1177, "step": 2271 }, { "epoch": 0.05006417778071582, "grad_norm": 1.4052618741989136, "learning_rate": 2.9968376437682727e-05, "loss": 0.1531, "step": 2272 }, { "epoch": 0.05008621307023198, "grad_norm": 1.608267903327942, "learning_rate": 2.9968306921658578e-05, "loss": 0.1317, "step": 2273 }, { "epoch": 0.050108248359748135, "grad_norm": 1.9881094694137573, "learning_rate": 2.9968237329392817e-05, "loss": 0.1951, "step": 2274 }, { "epoch": 0.0501302836492643, "grad_norm": 2.3511645793914795, "learning_rate": 2.99681676608858e-05, "loss": 0.1722, "step": 2275 }, { "epoch": 0.05015231893878046, "grad_norm": 1.406882643699646, "learning_rate": 2.9968097916137877e-05, "loss": 0.1664, "step": 2276 }, { "epoch": 0.050174354228296615, "grad_norm": 1.7595196962356567, "learning_rate": 2.9968028095149398e-05, "loss": 0.1675, "step": 2277 }, { "epoch": 0.05019638951781278, "grad_norm": 1.913394570350647, "learning_rate": 2.9967958197920734e-05, "loss": 0.1626, "step": 2278 }, { "epoch": 0.05021842480732894, "grad_norm": 1.8876636028289795, "learning_rate": 2.9967888224452227e-05, "loss": 0.1505, "step": 2279 }, { "epoch": 0.050240460096845095, "grad_norm": 1.499680519104004, "learning_rate": 2.996781817474424e-05, "loss": 0.1482, "step": 2280 }, { "epoch": 0.05026249538636126, "grad_norm": 1.9890538454055786, "learning_rate": 2.996774804879713e-05, "loss": 0.2006, "step": 2281 }, { "epoch": 0.05028453067587742, "grad_norm": 1.7482661008834839, "learning_rate": 2.9967677846611252e-05, "loss": 0.1491, "step": 2282 }, { "epoch": 0.050306565965393575, "grad_norm": 1.719227910041809, "learning_rate": 2.9967607568186967e-05, "loss": 0.1918, "step": 2283 }, { "epoch": 0.05032860125490974, "grad_norm": 2.525846004486084, "learning_rate": 2.996753721352463e-05, "loss": 0.2142, "step": 2284 }, { "epoch": 0.0503506365444259, "grad_norm": 2.21545672416687, "learning_rate": 2.9967466782624597e-05, "loss": 0.1793, "step": 2285 }, { "epoch": 0.050372671833942055, "grad_norm": 1.5771660804748535, "learning_rate": 2.996739627548723e-05, "loss": 0.1839, "step": 2286 }, { "epoch": 0.05039470712345822, "grad_norm": 1.7074079513549805, "learning_rate": 2.996732569211289e-05, "loss": 0.1499, "step": 2287 }, { "epoch": 0.05041674241297438, "grad_norm": 1.1007485389709473, "learning_rate": 2.996725503250193e-05, "loss": 0.1401, "step": 2288 }, { "epoch": 0.05043877770249054, "grad_norm": 1.680150032043457, "learning_rate": 2.9967184296654716e-05, "loss": 0.2115, "step": 2289 }, { "epoch": 0.0504608129920067, "grad_norm": 1.9771863222122192, "learning_rate": 2.9967113484571607e-05, "loss": 0.1977, "step": 2290 }, { "epoch": 0.05048284828152286, "grad_norm": 2.2414510250091553, "learning_rate": 2.9967042596252964e-05, "loss": 0.1711, "step": 2291 }, { "epoch": 0.05050488357103902, "grad_norm": 1.597259521484375, "learning_rate": 2.9966971631699144e-05, "loss": 0.1952, "step": 2292 }, { "epoch": 0.05052691886055518, "grad_norm": 2.1988255977630615, "learning_rate": 2.9966900590910516e-05, "loss": 0.2212, "step": 2293 }, { "epoch": 0.05054895415007134, "grad_norm": 1.8856205940246582, "learning_rate": 2.9966829473887434e-05, "loss": 0.1647, "step": 2294 }, { "epoch": 0.0505709894395875, "grad_norm": 2.1901533603668213, "learning_rate": 2.9966758280630263e-05, "loss": 0.2457, "step": 2295 }, { "epoch": 0.05059302472910366, "grad_norm": 1.8535206317901611, "learning_rate": 2.9966687011139368e-05, "loss": 0.1801, "step": 2296 }, { "epoch": 0.05061506001861982, "grad_norm": 1.861648678779602, "learning_rate": 2.996661566541511e-05, "loss": 0.1634, "step": 2297 }, { "epoch": 0.05063709530813598, "grad_norm": 1.681947112083435, "learning_rate": 2.996654424345785e-05, "loss": 0.1478, "step": 2298 }, { "epoch": 0.05065913059765214, "grad_norm": 2.097883701324463, "learning_rate": 2.996647274526796e-05, "loss": 0.1863, "step": 2299 }, { "epoch": 0.0506811658871683, "grad_norm": 1.6233421564102173, "learning_rate": 2.9966401170845794e-05, "loss": 0.203, "step": 2300 }, { "epoch": 0.05070320117668446, "grad_norm": 1.6739819049835205, "learning_rate": 2.9966329520191723e-05, "loss": 0.1362, "step": 2301 }, { "epoch": 0.05072523646620062, "grad_norm": 2.4665515422821045, "learning_rate": 2.9966257793306112e-05, "loss": 0.1692, "step": 2302 }, { "epoch": 0.05074727175571678, "grad_norm": 1.9980577230453491, "learning_rate": 2.9966185990189322e-05, "loss": 0.1759, "step": 2303 }, { "epoch": 0.05076930704523294, "grad_norm": 1.5549153089523315, "learning_rate": 2.996611411084172e-05, "loss": 0.2226, "step": 2304 }, { "epoch": 0.0507913423347491, "grad_norm": 1.284590244293213, "learning_rate": 2.996604215526368e-05, "loss": 0.2006, "step": 2305 }, { "epoch": 0.05081337762426526, "grad_norm": 1.3470510244369507, "learning_rate": 2.9965970123455552e-05, "loss": 0.1659, "step": 2306 }, { "epoch": 0.05083541291378142, "grad_norm": 2.1700050830841064, "learning_rate": 2.996589801541772e-05, "loss": 0.1569, "step": 2307 }, { "epoch": 0.05085744820329758, "grad_norm": 1.9806784391403198, "learning_rate": 2.9965825831150543e-05, "loss": 0.1512, "step": 2308 }, { "epoch": 0.050879483492813744, "grad_norm": 1.6687729358673096, "learning_rate": 2.996575357065439e-05, "loss": 0.206, "step": 2309 }, { "epoch": 0.0509015187823299, "grad_norm": 1.853078842163086, "learning_rate": 2.9965681233929624e-05, "loss": 0.1489, "step": 2310 }, { "epoch": 0.05092355407184606, "grad_norm": 2.374864339828491, "learning_rate": 2.9965608820976623e-05, "loss": 0.211, "step": 2311 }, { "epoch": 0.050945589361362224, "grad_norm": 1.5673325061798096, "learning_rate": 2.996553633179575e-05, "loss": 0.1656, "step": 2312 }, { "epoch": 0.05096762465087838, "grad_norm": 1.6197465658187866, "learning_rate": 2.9965463766387376e-05, "loss": 0.1949, "step": 2313 }, { "epoch": 0.05098965994039454, "grad_norm": 1.4291930198669434, "learning_rate": 2.9965391124751868e-05, "loss": 0.1203, "step": 2314 }, { "epoch": 0.051011695229910704, "grad_norm": 2.241382122039795, "learning_rate": 2.9965318406889598e-05, "loss": 0.1735, "step": 2315 }, { "epoch": 0.05103373051942686, "grad_norm": 1.0556001663208008, "learning_rate": 2.9965245612800936e-05, "loss": 0.1157, "step": 2316 }, { "epoch": 0.05105576580894302, "grad_norm": 1.8395787477493286, "learning_rate": 2.9965172742486258e-05, "loss": 0.2162, "step": 2317 }, { "epoch": 0.051077801098459184, "grad_norm": 1.485666036605835, "learning_rate": 2.9965099795945926e-05, "loss": 0.2029, "step": 2318 }, { "epoch": 0.05109983638797534, "grad_norm": 1.5853978395462036, "learning_rate": 2.9965026773180317e-05, "loss": 0.146, "step": 2319 }, { "epoch": 0.0511218716774915, "grad_norm": 1.8763668537139893, "learning_rate": 2.99649536741898e-05, "loss": 0.1841, "step": 2320 }, { "epoch": 0.051143906967007664, "grad_norm": 2.083228588104248, "learning_rate": 2.996488049897475e-05, "loss": 0.1925, "step": 2321 }, { "epoch": 0.05116594225652382, "grad_norm": 2.5835022926330566, "learning_rate": 2.9964807247535537e-05, "loss": 0.1891, "step": 2322 }, { "epoch": 0.05118797754603998, "grad_norm": 1.874070167541504, "learning_rate": 2.996473391987254e-05, "loss": 0.2196, "step": 2323 }, { "epoch": 0.051210012835556144, "grad_norm": 1.7485530376434326, "learning_rate": 2.9964660515986124e-05, "loss": 0.1856, "step": 2324 }, { "epoch": 0.0512320481250723, "grad_norm": 2.1131179332733154, "learning_rate": 2.9964587035876672e-05, "loss": 0.1905, "step": 2325 }, { "epoch": 0.05125408341458847, "grad_norm": 1.9245226383209229, "learning_rate": 2.996451347954455e-05, "loss": 0.1718, "step": 2326 }, { "epoch": 0.051276118704104624, "grad_norm": 1.8882548809051514, "learning_rate": 2.996443984699014e-05, "loss": 0.1901, "step": 2327 }, { "epoch": 0.05129815399362078, "grad_norm": 1.6984179019927979, "learning_rate": 2.996436613821381e-05, "loss": 0.2006, "step": 2328 }, { "epoch": 0.05132018928313695, "grad_norm": 1.555770754814148, "learning_rate": 2.996429235321594e-05, "loss": 0.1648, "step": 2329 }, { "epoch": 0.051342224572653104, "grad_norm": 2.2121121883392334, "learning_rate": 2.99642184919969e-05, "loss": 0.1892, "step": 2330 }, { "epoch": 0.05136425986216926, "grad_norm": 2.475735902786255, "learning_rate": 2.9964144554557075e-05, "loss": 0.2493, "step": 2331 }, { "epoch": 0.05138629515168543, "grad_norm": 1.5675427913665771, "learning_rate": 2.996407054089684e-05, "loss": 0.1783, "step": 2332 }, { "epoch": 0.051408330441201584, "grad_norm": 2.1016790866851807, "learning_rate": 2.9963996451016566e-05, "loss": 0.1697, "step": 2333 }, { "epoch": 0.05143036573071774, "grad_norm": 2.385733127593994, "learning_rate": 2.9963922284916635e-05, "loss": 0.2466, "step": 2334 }, { "epoch": 0.05145240102023391, "grad_norm": 1.86067795753479, "learning_rate": 2.9963848042597423e-05, "loss": 0.1704, "step": 2335 }, { "epoch": 0.051474436309750064, "grad_norm": 1.4547359943389893, "learning_rate": 2.9963773724059307e-05, "loss": 0.1453, "step": 2336 }, { "epoch": 0.05149647159926622, "grad_norm": 1.247819185256958, "learning_rate": 2.9963699329302672e-05, "loss": 0.1608, "step": 2337 }, { "epoch": 0.05151850688878239, "grad_norm": 2.226529359817505, "learning_rate": 2.9963624858327886e-05, "loss": 0.1966, "step": 2338 }, { "epoch": 0.051540542178298544, "grad_norm": 1.4977210760116577, "learning_rate": 2.996355031113534e-05, "loss": 0.1791, "step": 2339 }, { "epoch": 0.0515625774678147, "grad_norm": 4.1906352043151855, "learning_rate": 2.9963475687725403e-05, "loss": 0.2217, "step": 2340 }, { "epoch": 0.05158461275733087, "grad_norm": 1.7369134426116943, "learning_rate": 2.9963400988098463e-05, "loss": 0.2073, "step": 2341 }, { "epoch": 0.051606648046847024, "grad_norm": 1.916811227798462, "learning_rate": 2.99633262122549e-05, "loss": 0.166, "step": 2342 }, { "epoch": 0.05162868333636319, "grad_norm": 1.2058528661727905, "learning_rate": 2.996325136019509e-05, "loss": 0.1684, "step": 2343 }, { "epoch": 0.05165071862587935, "grad_norm": 1.9903408288955688, "learning_rate": 2.9963176431919416e-05, "loss": 0.1794, "step": 2344 }, { "epoch": 0.051672753915395504, "grad_norm": 3.1529972553253174, "learning_rate": 2.9963101427428262e-05, "loss": 0.2287, "step": 2345 }, { "epoch": 0.05169478920491167, "grad_norm": 2.3531546592712402, "learning_rate": 2.996302634672201e-05, "loss": 0.1694, "step": 2346 }, { "epoch": 0.05171682449442783, "grad_norm": 1.983137845993042, "learning_rate": 2.9962951189801036e-05, "loss": 0.116, "step": 2347 }, { "epoch": 0.051738859783943984, "grad_norm": 2.1724536418914795, "learning_rate": 2.9962875956665735e-05, "loss": 0.1657, "step": 2348 }, { "epoch": 0.05176089507346015, "grad_norm": 2.2288386821746826, "learning_rate": 2.9962800647316477e-05, "loss": 0.1575, "step": 2349 }, { "epoch": 0.05178293036297631, "grad_norm": 1.7279876470565796, "learning_rate": 2.996272526175365e-05, "loss": 0.1597, "step": 2350 }, { "epoch": 0.051804965652492464, "grad_norm": 2.040879964828491, "learning_rate": 2.9962649799977645e-05, "loss": 0.2013, "step": 2351 }, { "epoch": 0.05182700094200863, "grad_norm": 1.739982008934021, "learning_rate": 2.996257426198884e-05, "loss": 0.1553, "step": 2352 }, { "epoch": 0.05184903623152479, "grad_norm": 3.0151851177215576, "learning_rate": 2.996249864778762e-05, "loss": 0.1883, "step": 2353 }, { "epoch": 0.051871071521040944, "grad_norm": 1.7834763526916504, "learning_rate": 2.996242295737437e-05, "loss": 0.1768, "step": 2354 }, { "epoch": 0.05189310681055711, "grad_norm": 1.2922735214233398, "learning_rate": 2.996234719074948e-05, "loss": 0.1385, "step": 2355 }, { "epoch": 0.05191514210007327, "grad_norm": 1.7297793626785278, "learning_rate": 2.9962271347913328e-05, "loss": 0.1283, "step": 2356 }, { "epoch": 0.051937177389589424, "grad_norm": 1.430147647857666, "learning_rate": 2.996219542886631e-05, "loss": 0.1628, "step": 2357 }, { "epoch": 0.05195921267910559, "grad_norm": 1.5300363302230835, "learning_rate": 2.99621194336088e-05, "loss": 0.1517, "step": 2358 }, { "epoch": 0.05198124796862175, "grad_norm": 3.179201364517212, "learning_rate": 2.9962043362141194e-05, "loss": 0.1525, "step": 2359 }, { "epoch": 0.05200328325813791, "grad_norm": 1.7714743614196777, "learning_rate": 2.996196721446388e-05, "loss": 0.1355, "step": 2360 }, { "epoch": 0.05202531854765407, "grad_norm": 1.606168270111084, "learning_rate": 2.9961890990577244e-05, "loss": 0.1714, "step": 2361 }, { "epoch": 0.05204735383717023, "grad_norm": 2.087008237838745, "learning_rate": 2.9961814690481675e-05, "loss": 0.1934, "step": 2362 }, { "epoch": 0.05206938912668639, "grad_norm": 2.85833477973938, "learning_rate": 2.9961738314177556e-05, "loss": 0.1787, "step": 2363 }, { "epoch": 0.05209142441620255, "grad_norm": 2.63161301612854, "learning_rate": 2.9961661861665287e-05, "loss": 0.1599, "step": 2364 }, { "epoch": 0.05211345970571871, "grad_norm": 1.811483383178711, "learning_rate": 2.9961585332945247e-05, "loss": 0.1398, "step": 2365 }, { "epoch": 0.05213549499523487, "grad_norm": 1.422831654548645, "learning_rate": 2.996150872801783e-05, "loss": 0.1458, "step": 2366 }, { "epoch": 0.05215753028475103, "grad_norm": 4.539665222167969, "learning_rate": 2.9961432046883428e-05, "loss": 0.1898, "step": 2367 }, { "epoch": 0.05217956557426719, "grad_norm": 1.9100631475448608, "learning_rate": 2.9961355289542425e-05, "loss": 0.1702, "step": 2368 }, { "epoch": 0.05220160086378335, "grad_norm": 1.550703525543213, "learning_rate": 2.996127845599522e-05, "loss": 0.1592, "step": 2369 }, { "epoch": 0.05222363615329951, "grad_norm": 6.652529239654541, "learning_rate": 2.99612015462422e-05, "loss": 0.1445, "step": 2370 }, { "epoch": 0.05224567144281567, "grad_norm": 1.7366546392440796, "learning_rate": 2.996112456028376e-05, "loss": 0.2547, "step": 2371 }, { "epoch": 0.05226770673233183, "grad_norm": 1.93606436252594, "learning_rate": 2.9961047498120288e-05, "loss": 0.1467, "step": 2372 }, { "epoch": 0.05228974202184799, "grad_norm": 2.6857309341430664, "learning_rate": 2.996097035975218e-05, "loss": 0.1805, "step": 2373 }, { "epoch": 0.05231177731136415, "grad_norm": 2.50237774848938, "learning_rate": 2.9960893145179825e-05, "loss": 0.1653, "step": 2374 }, { "epoch": 0.05233381260088031, "grad_norm": 1.8556718826293945, "learning_rate": 2.9960815854403622e-05, "loss": 0.1311, "step": 2375 }, { "epoch": 0.05235584789039647, "grad_norm": 1.334943175315857, "learning_rate": 2.9960738487423957e-05, "loss": 0.1675, "step": 2376 }, { "epoch": 0.052377883179912633, "grad_norm": 2.109074592590332, "learning_rate": 2.996066104424123e-05, "loss": 0.1827, "step": 2377 }, { "epoch": 0.05239991846942879, "grad_norm": 4.577401638031006, "learning_rate": 2.9960583524855833e-05, "loss": 0.1832, "step": 2378 }, { "epoch": 0.05242195375894495, "grad_norm": 3.1972107887268066, "learning_rate": 2.9960505929268163e-05, "loss": 0.1464, "step": 2379 }, { "epoch": 0.052443989048461113, "grad_norm": 1.9826148748397827, "learning_rate": 2.996042825747861e-05, "loss": 0.1646, "step": 2380 }, { "epoch": 0.05246602433797727, "grad_norm": 2.2884349822998047, "learning_rate": 2.996035050948758e-05, "loss": 0.1831, "step": 2381 }, { "epoch": 0.05248805962749343, "grad_norm": 4.186753273010254, "learning_rate": 2.996027268529546e-05, "loss": 0.1969, "step": 2382 }, { "epoch": 0.052510094917009593, "grad_norm": 2.2861855030059814, "learning_rate": 2.9960194784902645e-05, "loss": 0.1556, "step": 2383 }, { "epoch": 0.05253213020652575, "grad_norm": 1.8320813179016113, "learning_rate": 2.996011680830954e-05, "loss": 0.1598, "step": 2384 }, { "epoch": 0.05255416549604191, "grad_norm": 1.828973650932312, "learning_rate": 2.9960038755516538e-05, "loss": 0.204, "step": 2385 }, { "epoch": 0.05257620078555807, "grad_norm": 0.9833694100379944, "learning_rate": 2.9959960626524032e-05, "loss": 0.117, "step": 2386 }, { "epoch": 0.05259823607507423, "grad_norm": 1.391093373298645, "learning_rate": 2.9959882421332428e-05, "loss": 0.1874, "step": 2387 }, { "epoch": 0.05262027136459039, "grad_norm": 1.4684926271438599, "learning_rate": 2.995980413994212e-05, "loss": 0.1712, "step": 2388 }, { "epoch": 0.05264230665410655, "grad_norm": 1.6808874607086182, "learning_rate": 2.995972578235351e-05, "loss": 0.1435, "step": 2389 }, { "epoch": 0.05266434194362271, "grad_norm": 1.983286738395691, "learning_rate": 2.995964734856699e-05, "loss": 0.2056, "step": 2390 }, { "epoch": 0.05268637723313887, "grad_norm": 2.3316380977630615, "learning_rate": 2.995956883858297e-05, "loss": 0.1721, "step": 2391 }, { "epoch": 0.05270841252265503, "grad_norm": 2.208134651184082, "learning_rate": 2.9959490252401838e-05, "loss": 0.1885, "step": 2392 }, { "epoch": 0.05273044781217119, "grad_norm": 3.6348299980163574, "learning_rate": 2.9959411590024e-05, "loss": 0.1345, "step": 2393 }, { "epoch": 0.05275248310168735, "grad_norm": 2.1032626628875732, "learning_rate": 2.9959332851449862e-05, "loss": 0.1945, "step": 2394 }, { "epoch": 0.05277451839120351, "grad_norm": 1.8691258430480957, "learning_rate": 2.9959254036679817e-05, "loss": 0.134, "step": 2395 }, { "epoch": 0.05279655368071967, "grad_norm": 2.234145164489746, "learning_rate": 2.995917514571427e-05, "loss": 0.1695, "step": 2396 }, { "epoch": 0.052818588970235836, "grad_norm": 1.7338114976882935, "learning_rate": 2.995909617855362e-05, "loss": 0.1646, "step": 2397 }, { "epoch": 0.05284062425975199, "grad_norm": 2.083401918411255, "learning_rate": 2.995901713519827e-05, "loss": 0.18, "step": 2398 }, { "epoch": 0.05286265954926815, "grad_norm": 1.5532934665679932, "learning_rate": 2.995893801564863e-05, "loss": 0.2228, "step": 2399 }, { "epoch": 0.052884694838784316, "grad_norm": 1.3203235864639282, "learning_rate": 2.9958858819905095e-05, "loss": 0.1683, "step": 2400 }, { "epoch": 0.05290673012830047, "grad_norm": 1.7819371223449707, "learning_rate": 2.9958779547968068e-05, "loss": 0.1449, "step": 2401 }, { "epoch": 0.05292876541781663, "grad_norm": 1.6615755558013916, "learning_rate": 2.9958700199837957e-05, "loss": 0.1639, "step": 2402 }, { "epoch": 0.052950800707332796, "grad_norm": 1.6108399629592896, "learning_rate": 2.9958620775515163e-05, "loss": 0.178, "step": 2403 }, { "epoch": 0.05297283599684895, "grad_norm": 1.5697166919708252, "learning_rate": 2.9958541275000095e-05, "loss": 0.1862, "step": 2404 }, { "epoch": 0.05299487128636511, "grad_norm": 1.3783488273620605, "learning_rate": 2.9958461698293157e-05, "loss": 0.1346, "step": 2405 }, { "epoch": 0.053016906575881276, "grad_norm": 2.437655448913574, "learning_rate": 2.9958382045394744e-05, "loss": 0.1637, "step": 2406 }, { "epoch": 0.05303894186539743, "grad_norm": 1.5725246667861938, "learning_rate": 2.9958302316305274e-05, "loss": 0.1536, "step": 2407 }, { "epoch": 0.05306097715491359, "grad_norm": 2.3489644527435303, "learning_rate": 2.995822251102515e-05, "loss": 0.1614, "step": 2408 }, { "epoch": 0.053083012444429756, "grad_norm": 1.2600735425949097, "learning_rate": 2.995814262955478e-05, "loss": 0.181, "step": 2409 }, { "epoch": 0.05310504773394591, "grad_norm": 1.623470425605774, "learning_rate": 2.9958062671894567e-05, "loss": 0.1565, "step": 2410 }, { "epoch": 0.05312708302346207, "grad_norm": 1.7725688219070435, "learning_rate": 2.9957982638044916e-05, "loss": 0.1503, "step": 2411 }, { "epoch": 0.053149118312978236, "grad_norm": 2.1920993328094482, "learning_rate": 2.995790252800624e-05, "loss": 0.2267, "step": 2412 }, { "epoch": 0.05317115360249439, "grad_norm": 1.6493747234344482, "learning_rate": 2.9957822341778943e-05, "loss": 0.1402, "step": 2413 }, { "epoch": 0.05319318889201056, "grad_norm": 2.1084632873535156, "learning_rate": 2.995774207936344e-05, "loss": 0.1793, "step": 2414 }, { "epoch": 0.053215224181526716, "grad_norm": 1.741025686264038, "learning_rate": 2.9957661740760134e-05, "loss": 0.1321, "step": 2415 }, { "epoch": 0.05323725947104287, "grad_norm": 1.8068358898162842, "learning_rate": 2.9957581325969433e-05, "loss": 0.1384, "step": 2416 }, { "epoch": 0.05325929476055904, "grad_norm": 1.3552173376083374, "learning_rate": 2.9957500834991753e-05, "loss": 0.1497, "step": 2417 }, { "epoch": 0.053281330050075196, "grad_norm": 1.0747954845428467, "learning_rate": 2.99574202678275e-05, "loss": 0.1492, "step": 2418 }, { "epoch": 0.05330336533959135, "grad_norm": 1.67129647731781, "learning_rate": 2.9957339624477085e-05, "loss": 0.1707, "step": 2419 }, { "epoch": 0.05332540062910752, "grad_norm": 1.9030591249465942, "learning_rate": 2.9957258904940918e-05, "loss": 0.1619, "step": 2420 }, { "epoch": 0.053347435918623676, "grad_norm": 4.496284008026123, "learning_rate": 2.995717810921941e-05, "loss": 0.1993, "step": 2421 }, { "epoch": 0.05336947120813983, "grad_norm": 2.2861757278442383, "learning_rate": 2.9957097237312973e-05, "loss": 0.1541, "step": 2422 }, { "epoch": 0.053391506497656, "grad_norm": 1.404751181602478, "learning_rate": 2.9957016289222024e-05, "loss": 0.1672, "step": 2423 }, { "epoch": 0.053413541787172156, "grad_norm": 1.2201318740844727, "learning_rate": 2.9956935264946966e-05, "loss": 0.1667, "step": 2424 }, { "epoch": 0.05343557707668831, "grad_norm": 2.413719654083252, "learning_rate": 2.9956854164488217e-05, "loss": 0.1518, "step": 2425 }, { "epoch": 0.05345761236620448, "grad_norm": 1.9124215841293335, "learning_rate": 2.9956772987846185e-05, "loss": 0.1709, "step": 2426 }, { "epoch": 0.053479647655720636, "grad_norm": 2.2251362800598145, "learning_rate": 2.9956691735021296e-05, "loss": 0.1984, "step": 2427 }, { "epoch": 0.05350168294523679, "grad_norm": 1.2045329809188843, "learning_rate": 2.995661040601395e-05, "loss": 0.1864, "step": 2428 }, { "epoch": 0.05352371823475296, "grad_norm": 1.6278417110443115, "learning_rate": 2.9956529000824566e-05, "loss": 0.1973, "step": 2429 }, { "epoch": 0.053545753524269116, "grad_norm": 2.372695207595825, "learning_rate": 2.9956447519453565e-05, "loss": 0.176, "step": 2430 }, { "epoch": 0.05356778881378528, "grad_norm": 1.7464354038238525, "learning_rate": 2.995636596190135e-05, "loss": 0.1578, "step": 2431 }, { "epoch": 0.05358982410330144, "grad_norm": 2.0470101833343506, "learning_rate": 2.995628432816835e-05, "loss": 0.1027, "step": 2432 }, { "epoch": 0.053611859392817596, "grad_norm": 2.050614833831787, "learning_rate": 2.9956202618254967e-05, "loss": 0.2237, "step": 2433 }, { "epoch": 0.05363389468233376, "grad_norm": 1.2356072664260864, "learning_rate": 2.9956120832161624e-05, "loss": 0.1648, "step": 2434 }, { "epoch": 0.05365592997184992, "grad_norm": 1.1931252479553223, "learning_rate": 2.995603896988874e-05, "loss": 0.2128, "step": 2435 }, { "epoch": 0.053677965261366076, "grad_norm": 2.1434030532836914, "learning_rate": 2.995595703143673e-05, "loss": 0.1731, "step": 2436 }, { "epoch": 0.05370000055088224, "grad_norm": 1.9418619871139526, "learning_rate": 2.9955875016806012e-05, "loss": 0.1817, "step": 2437 }, { "epoch": 0.0537220358403984, "grad_norm": 1.727112889289856, "learning_rate": 2.9955792925997e-05, "loss": 0.1669, "step": 2438 }, { "epoch": 0.053744071129914556, "grad_norm": 1.9923598766326904, "learning_rate": 2.9955710759010116e-05, "loss": 0.1684, "step": 2439 }, { "epoch": 0.05376610641943072, "grad_norm": 1.4697766304016113, "learning_rate": 2.9955628515845774e-05, "loss": 0.1509, "step": 2440 }, { "epoch": 0.05378814170894688, "grad_norm": 2.0822198390960693, "learning_rate": 2.99555461965044e-05, "loss": 0.178, "step": 2441 }, { "epoch": 0.053810176998463036, "grad_norm": 1.3170231580734253, "learning_rate": 2.995546380098641e-05, "loss": 0.1988, "step": 2442 }, { "epoch": 0.0538322122879792, "grad_norm": 2.34338641166687, "learning_rate": 2.9955381329292223e-05, "loss": 0.1445, "step": 2443 }, { "epoch": 0.05385424757749536, "grad_norm": 1.3941690921783447, "learning_rate": 2.9955298781422255e-05, "loss": 0.1517, "step": 2444 }, { "epoch": 0.053876282867011516, "grad_norm": 1.5950162410736084, "learning_rate": 2.9955216157376934e-05, "loss": 0.1092, "step": 2445 }, { "epoch": 0.05389831815652768, "grad_norm": 2.234492063522339, "learning_rate": 2.995513345715668e-05, "loss": 0.1877, "step": 2446 }, { "epoch": 0.05392035344604384, "grad_norm": 1.4496610164642334, "learning_rate": 2.9955050680761905e-05, "loss": 0.1692, "step": 2447 }, { "epoch": 0.05394238873556, "grad_norm": 1.3335156440734863, "learning_rate": 2.995496782819304e-05, "loss": 0.1662, "step": 2448 }, { "epoch": 0.05396442402507616, "grad_norm": 1.9429728984832764, "learning_rate": 2.9954884899450507e-05, "loss": 0.2146, "step": 2449 }, { "epoch": 0.05398645931459232, "grad_norm": 1.915850043296814, "learning_rate": 2.9954801894534724e-05, "loss": 0.1574, "step": 2450 }, { "epoch": 0.05400849460410848, "grad_norm": 1.693010687828064, "learning_rate": 2.9954718813446115e-05, "loss": 0.1614, "step": 2451 }, { "epoch": 0.05403052989362464, "grad_norm": 2.1012678146362305, "learning_rate": 2.9954635656185103e-05, "loss": 0.1489, "step": 2452 }, { "epoch": 0.0540525651831408, "grad_norm": 2.5079233646392822, "learning_rate": 2.9954552422752116e-05, "loss": 0.2256, "step": 2453 }, { "epoch": 0.05407460047265696, "grad_norm": 1.605141520500183, "learning_rate": 2.995446911314757e-05, "loss": 0.1711, "step": 2454 }, { "epoch": 0.05409663576217312, "grad_norm": 1.5018091201782227, "learning_rate": 2.9954385727371896e-05, "loss": 0.1967, "step": 2455 }, { "epoch": 0.05411867105168928, "grad_norm": 1.8418138027191162, "learning_rate": 2.9954302265425513e-05, "loss": 0.1908, "step": 2456 }, { "epoch": 0.05414070634120544, "grad_norm": 1.5016065835952759, "learning_rate": 2.9954218727308853e-05, "loss": 0.155, "step": 2457 }, { "epoch": 0.0541627416307216, "grad_norm": 1.8416152000427246, "learning_rate": 2.9954135113022338e-05, "loss": 0.179, "step": 2458 }, { "epoch": 0.05418477692023776, "grad_norm": 1.776621699333191, "learning_rate": 2.995405142256639e-05, "loss": 0.1608, "step": 2459 }, { "epoch": 0.05420681220975392, "grad_norm": 2.535048484802246, "learning_rate": 2.9953967655941445e-05, "loss": 0.1266, "step": 2460 }, { "epoch": 0.05422884749927008, "grad_norm": 1.4722340106964111, "learning_rate": 2.995388381314792e-05, "loss": 0.1694, "step": 2461 }, { "epoch": 0.05425088278878624, "grad_norm": 1.6362353563308716, "learning_rate": 2.9953799894186247e-05, "loss": 0.149, "step": 2462 }, { "epoch": 0.0542729180783024, "grad_norm": 2.391136646270752, "learning_rate": 2.9953715899056852e-05, "loss": 0.1849, "step": 2463 }, { "epoch": 0.05429495336781856, "grad_norm": 1.5835890769958496, "learning_rate": 2.9953631827760165e-05, "loss": 0.1468, "step": 2464 }, { "epoch": 0.054316988657334725, "grad_norm": 2.2498912811279297, "learning_rate": 2.9953547680296608e-05, "loss": 0.145, "step": 2465 }, { "epoch": 0.05433902394685088, "grad_norm": 2.6676628589630127, "learning_rate": 2.9953463456666615e-05, "loss": 0.1984, "step": 2466 }, { "epoch": 0.05436105923636704, "grad_norm": 1.3121116161346436, "learning_rate": 2.9953379156870616e-05, "loss": 0.1496, "step": 2467 }, { "epoch": 0.054383094525883205, "grad_norm": 1.7414097785949707, "learning_rate": 2.9953294780909036e-05, "loss": 0.2142, "step": 2468 }, { "epoch": 0.05440512981539936, "grad_norm": 2.9576268196105957, "learning_rate": 2.995321032878231e-05, "loss": 0.1797, "step": 2469 }, { "epoch": 0.05442716510491552, "grad_norm": 1.6761884689331055, "learning_rate": 2.9953125800490863e-05, "loss": 0.1876, "step": 2470 }, { "epoch": 0.054449200394431685, "grad_norm": 1.814668893814087, "learning_rate": 2.9953041196035128e-05, "loss": 0.1713, "step": 2471 }, { "epoch": 0.05447123568394784, "grad_norm": 1.826755404472351, "learning_rate": 2.995295651541554e-05, "loss": 0.1621, "step": 2472 }, { "epoch": 0.054493270973464, "grad_norm": 1.8329919576644897, "learning_rate": 2.995287175863252e-05, "loss": 0.1814, "step": 2473 }, { "epoch": 0.054515306262980165, "grad_norm": 1.3394056558609009, "learning_rate": 2.9952786925686506e-05, "loss": 0.1843, "step": 2474 }, { "epoch": 0.05453734155249632, "grad_norm": 1.9593628644943237, "learning_rate": 2.9952702016577937e-05, "loss": 0.17, "step": 2475 }, { "epoch": 0.05455937684201248, "grad_norm": 9.792741775512695, "learning_rate": 2.9952617031307227e-05, "loss": 0.1747, "step": 2476 }, { "epoch": 0.054581412131528645, "grad_norm": 3.7067792415618896, "learning_rate": 2.995253196987483e-05, "loss": 0.1799, "step": 2477 }, { "epoch": 0.0546034474210448, "grad_norm": 2.379535675048828, "learning_rate": 2.9952446832281164e-05, "loss": 0.1569, "step": 2478 }, { "epoch": 0.05462548271056096, "grad_norm": 1.901483416557312, "learning_rate": 2.995236161852667e-05, "loss": 0.2054, "step": 2479 }, { "epoch": 0.054647518000077125, "grad_norm": 2.364112377166748, "learning_rate": 2.995227632861178e-05, "loss": 0.1571, "step": 2480 }, { "epoch": 0.05466955328959328, "grad_norm": 1.4389675855636597, "learning_rate": 2.995219096253693e-05, "loss": 0.1781, "step": 2481 }, { "epoch": 0.05469158857910945, "grad_norm": 1.7553791999816895, "learning_rate": 2.9952105520302556e-05, "loss": 0.1937, "step": 2482 }, { "epoch": 0.054713623868625605, "grad_norm": 2.437204122543335, "learning_rate": 2.9952020001909085e-05, "loss": 0.1787, "step": 2483 }, { "epoch": 0.05473565915814176, "grad_norm": 1.5172849893569946, "learning_rate": 2.995193440735696e-05, "loss": 0.2284, "step": 2484 }, { "epoch": 0.05475769444765793, "grad_norm": 1.6890572309494019, "learning_rate": 2.9951848736646615e-05, "loss": 0.1629, "step": 2485 }, { "epoch": 0.054779729737174085, "grad_norm": 1.1905877590179443, "learning_rate": 2.9951762989778486e-05, "loss": 0.1333, "step": 2486 }, { "epoch": 0.05480176502669024, "grad_norm": 1.5705231428146362, "learning_rate": 2.9951677166753012e-05, "loss": 0.143, "step": 2487 }, { "epoch": 0.05482380031620641, "grad_norm": 1.3043239116668701, "learning_rate": 2.9951591267570632e-05, "loss": 0.1407, "step": 2488 }, { "epoch": 0.054845835605722565, "grad_norm": 1.2646092176437378, "learning_rate": 2.995150529223177e-05, "loss": 0.1317, "step": 2489 }, { "epoch": 0.05486787089523872, "grad_norm": 1.131646752357483, "learning_rate": 2.9951419240736882e-05, "loss": 0.141, "step": 2490 }, { "epoch": 0.05488990618475489, "grad_norm": 2.0439772605895996, "learning_rate": 2.9951333113086396e-05, "loss": 0.1245, "step": 2491 }, { "epoch": 0.054911941474271045, "grad_norm": 1.7471940517425537, "learning_rate": 2.9951246909280753e-05, "loss": 0.174, "step": 2492 }, { "epoch": 0.0549339767637872, "grad_norm": 1.6595929861068726, "learning_rate": 2.995116062932039e-05, "loss": 0.1341, "step": 2493 }, { "epoch": 0.05495601205330337, "grad_norm": 1.5477288961410522, "learning_rate": 2.995107427320575e-05, "loss": 0.126, "step": 2494 }, { "epoch": 0.054978047342819525, "grad_norm": 2.1769890785217285, "learning_rate": 2.9950987840937274e-05, "loss": 0.2151, "step": 2495 }, { "epoch": 0.05500008263233568, "grad_norm": 1.9794528484344482, "learning_rate": 2.99509013325154e-05, "loss": 0.1706, "step": 2496 }, { "epoch": 0.05502211792185185, "grad_norm": 1.444242238998413, "learning_rate": 2.995081474794056e-05, "loss": 0.1843, "step": 2497 }, { "epoch": 0.055044153211368005, "grad_norm": 1.6283477544784546, "learning_rate": 2.9950728087213214e-05, "loss": 0.1508, "step": 2498 }, { "epoch": 0.05506618850088416, "grad_norm": 1.9731148481369019, "learning_rate": 2.9950641350333785e-05, "loss": 0.2207, "step": 2499 }, { "epoch": 0.05508822379040033, "grad_norm": 1.3805590867996216, "learning_rate": 2.9950554537302724e-05, "loss": 0.1476, "step": 2500 }, { "epoch": 0.055110259079916485, "grad_norm": 1.5866495370864868, "learning_rate": 2.9950467648120474e-05, "loss": 0.1644, "step": 2501 }, { "epoch": 0.05513229436943265, "grad_norm": 1.8321795463562012, "learning_rate": 2.9950380682787474e-05, "loss": 0.1612, "step": 2502 }, { "epoch": 0.05515432965894881, "grad_norm": 2.0607399940490723, "learning_rate": 2.9950293641304166e-05, "loss": 0.1931, "step": 2503 }, { "epoch": 0.055176364948464965, "grad_norm": 1.9631569385528564, "learning_rate": 2.9950206523670998e-05, "loss": 0.1691, "step": 2504 }, { "epoch": 0.05519840023798113, "grad_norm": 1.4307111501693726, "learning_rate": 2.995011932988841e-05, "loss": 0.1592, "step": 2505 }, { "epoch": 0.05522043552749729, "grad_norm": 2.436655282974243, "learning_rate": 2.9950032059956848e-05, "loss": 0.217, "step": 2506 }, { "epoch": 0.055242470817013445, "grad_norm": 1.9021259546279907, "learning_rate": 2.9949944713876752e-05, "loss": 0.1643, "step": 2507 }, { "epoch": 0.05526450610652961, "grad_norm": 1.798663854598999, "learning_rate": 2.9949857291648577e-05, "loss": 0.1299, "step": 2508 }, { "epoch": 0.05528654139604577, "grad_norm": 2.0442140102386475, "learning_rate": 2.994976979327276e-05, "loss": 0.1762, "step": 2509 }, { "epoch": 0.055308576685561925, "grad_norm": 1.9865894317626953, "learning_rate": 2.9949682218749745e-05, "loss": 0.172, "step": 2510 }, { "epoch": 0.05533061197507809, "grad_norm": 1.4277558326721191, "learning_rate": 2.994959456807999e-05, "loss": 0.1917, "step": 2511 }, { "epoch": 0.05535264726459425, "grad_norm": 1.2761399745941162, "learning_rate": 2.9949506841263926e-05, "loss": 0.1598, "step": 2512 }, { "epoch": 0.055374682554110405, "grad_norm": 2.3327085971832275, "learning_rate": 2.994941903830201e-05, "loss": 0.189, "step": 2513 }, { "epoch": 0.05539671784362657, "grad_norm": 1.8167200088500977, "learning_rate": 2.9949331159194685e-05, "loss": 0.1615, "step": 2514 }, { "epoch": 0.05541875313314273, "grad_norm": 1.5002840757369995, "learning_rate": 2.99492432039424e-05, "loss": 0.1534, "step": 2515 }, { "epoch": 0.055440788422658885, "grad_norm": 1.8362456560134888, "learning_rate": 2.9949155172545607e-05, "loss": 0.2068, "step": 2516 }, { "epoch": 0.05546282371217505, "grad_norm": 1.7017686367034912, "learning_rate": 2.994906706500474e-05, "loss": 0.1948, "step": 2517 }, { "epoch": 0.05548485900169121, "grad_norm": 1.4259154796600342, "learning_rate": 2.994897888132027e-05, "loss": 0.1421, "step": 2518 }, { "epoch": 0.05550689429120737, "grad_norm": 1.476458191871643, "learning_rate": 2.9948890621492627e-05, "loss": 0.1751, "step": 2519 }, { "epoch": 0.05552892958072353, "grad_norm": 1.7283824682235718, "learning_rate": 2.994880228552227e-05, "loss": 0.1732, "step": 2520 }, { "epoch": 0.05555096487023969, "grad_norm": 1.6770316362380981, "learning_rate": 2.994871387340965e-05, "loss": 0.1973, "step": 2521 }, { "epoch": 0.05557300015975585, "grad_norm": 2.2909786701202393, "learning_rate": 2.9948625385155207e-05, "loss": 0.1502, "step": 2522 }, { "epoch": 0.05559503544927201, "grad_norm": 0.922211766242981, "learning_rate": 2.9948536820759403e-05, "loss": 0.1801, "step": 2523 }, { "epoch": 0.05561707073878817, "grad_norm": 1.4828505516052246, "learning_rate": 2.9948448180222686e-05, "loss": 0.2237, "step": 2524 }, { "epoch": 0.05563910602830433, "grad_norm": 1.5256437063217163, "learning_rate": 2.9948359463545506e-05, "loss": 0.1508, "step": 2525 }, { "epoch": 0.05566114131782049, "grad_norm": 1.7215654850006104, "learning_rate": 2.994827067072832e-05, "loss": 0.1696, "step": 2526 }, { "epoch": 0.05568317660733665, "grad_norm": 1.96400785446167, "learning_rate": 2.9948181801771563e-05, "loss": 0.1677, "step": 2527 }, { "epoch": 0.05570521189685281, "grad_norm": 1.877888560295105, "learning_rate": 2.994809285667571e-05, "loss": 0.199, "step": 2528 }, { "epoch": 0.05572724718636897, "grad_norm": 2.02632737159729, "learning_rate": 2.99480038354412e-05, "loss": 0.1722, "step": 2529 }, { "epoch": 0.05574928247588513, "grad_norm": 2.096930980682373, "learning_rate": 2.994791473806849e-05, "loss": 0.1507, "step": 2530 }, { "epoch": 0.05577131776540129, "grad_norm": 1.4438577890396118, "learning_rate": 2.9947825564558044e-05, "loss": 0.1497, "step": 2531 }, { "epoch": 0.05579335305491745, "grad_norm": 2.3000080585479736, "learning_rate": 2.9947736314910295e-05, "loss": 0.1671, "step": 2532 }, { "epoch": 0.05581538834443361, "grad_norm": 2.0669212341308594, "learning_rate": 2.9947646989125713e-05, "loss": 0.1478, "step": 2533 }, { "epoch": 0.05583742363394977, "grad_norm": 1.4057976007461548, "learning_rate": 2.994755758720475e-05, "loss": 0.1679, "step": 2534 }, { "epoch": 0.05585945892346593, "grad_norm": 1.4510499238967896, "learning_rate": 2.9947468109147863e-05, "loss": 0.1327, "step": 2535 }, { "epoch": 0.055881494212982094, "grad_norm": 1.8869397640228271, "learning_rate": 2.9947378554955503e-05, "loss": 0.1949, "step": 2536 }, { "epoch": 0.05590352950249825, "grad_norm": 1.8770592212677002, "learning_rate": 2.994728892462813e-05, "loss": 0.1346, "step": 2537 }, { "epoch": 0.05592556479201441, "grad_norm": 1.7444621324539185, "learning_rate": 2.9947199218166198e-05, "loss": 0.1944, "step": 2538 }, { "epoch": 0.055947600081530574, "grad_norm": 2.7877230644226074, "learning_rate": 2.9947109435570167e-05, "loss": 0.1698, "step": 2539 }, { "epoch": 0.05596963537104673, "grad_norm": 2.1384308338165283, "learning_rate": 2.9947019576840488e-05, "loss": 0.1979, "step": 2540 }, { "epoch": 0.05599167066056289, "grad_norm": 2.2328193187713623, "learning_rate": 2.9946929641977627e-05, "loss": 0.1743, "step": 2541 }, { "epoch": 0.056013705950079054, "grad_norm": 1.4868148565292358, "learning_rate": 2.9946839630982035e-05, "loss": 0.1249, "step": 2542 }, { "epoch": 0.05603574123959521, "grad_norm": 2.038062572479248, "learning_rate": 2.9946749543854177e-05, "loss": 0.2009, "step": 2543 }, { "epoch": 0.05605777652911137, "grad_norm": 1.0147933959960938, "learning_rate": 2.9946659380594506e-05, "loss": 0.1842, "step": 2544 }, { "epoch": 0.056079811818627534, "grad_norm": 2.088801383972168, "learning_rate": 2.9946569141203486e-05, "loss": 0.1721, "step": 2545 }, { "epoch": 0.05610184710814369, "grad_norm": 1.4150742292404175, "learning_rate": 2.9946478825681573e-05, "loss": 0.1645, "step": 2546 }, { "epoch": 0.05612388239765985, "grad_norm": 2.387108564376831, "learning_rate": 2.994638843402923e-05, "loss": 0.2224, "step": 2547 }, { "epoch": 0.056145917687176014, "grad_norm": 2.3279855251312256, "learning_rate": 2.994629796624691e-05, "loss": 0.1815, "step": 2548 }, { "epoch": 0.05616795297669217, "grad_norm": 2.030653953552246, "learning_rate": 2.9946207422335083e-05, "loss": 0.1657, "step": 2549 }, { "epoch": 0.05618998826620833, "grad_norm": 1.6080822944641113, "learning_rate": 2.994611680229421e-05, "loss": 0.1904, "step": 2550 }, { "epoch": 0.056212023555724494, "grad_norm": 2.5948941707611084, "learning_rate": 2.994602610612475e-05, "loss": 0.204, "step": 2551 }, { "epoch": 0.05623405884524065, "grad_norm": 1.5690038204193115, "learning_rate": 2.9945935333827157e-05, "loss": 0.1985, "step": 2552 }, { "epoch": 0.056256094134756816, "grad_norm": 1.3794426918029785, "learning_rate": 2.9945844485401908e-05, "loss": 0.1806, "step": 2553 }, { "epoch": 0.056278129424272974, "grad_norm": 1.6184827089309692, "learning_rate": 2.9945753560849454e-05, "loss": 0.1578, "step": 2554 }, { "epoch": 0.05630016471378913, "grad_norm": 3.172218084335327, "learning_rate": 2.994566256017026e-05, "loss": 0.25, "step": 2555 }, { "epoch": 0.056322200003305296, "grad_norm": 2.0232138633728027, "learning_rate": 2.99455714833648e-05, "loss": 0.1888, "step": 2556 }, { "epoch": 0.056344235292821454, "grad_norm": 1.288001537322998, "learning_rate": 2.9945480330433528e-05, "loss": 0.1903, "step": 2557 }, { "epoch": 0.05636627058233761, "grad_norm": 1.1336700916290283, "learning_rate": 2.9945389101376904e-05, "loss": 0.1916, "step": 2558 }, { "epoch": 0.056388305871853776, "grad_norm": 1.1483879089355469, "learning_rate": 2.9945297796195407e-05, "loss": 0.1743, "step": 2559 }, { "epoch": 0.056410341161369934, "grad_norm": 2.330904006958008, "learning_rate": 2.994520641488949e-05, "loss": 0.2167, "step": 2560 }, { "epoch": 0.05643237645088609, "grad_norm": 2.452141046524048, "learning_rate": 2.994511495745962e-05, "loss": 0.1332, "step": 2561 }, { "epoch": 0.056454411740402256, "grad_norm": 1.6071044206619263, "learning_rate": 2.9945023423906273e-05, "loss": 0.1688, "step": 2562 }, { "epoch": 0.056476447029918414, "grad_norm": 2.3824589252471924, "learning_rate": 2.99449318142299e-05, "loss": 0.1355, "step": 2563 }, { "epoch": 0.05649848231943457, "grad_norm": 1.9567972421646118, "learning_rate": 2.9944840128430975e-05, "loss": 0.1816, "step": 2564 }, { "epoch": 0.056520517608950736, "grad_norm": 1.888446569442749, "learning_rate": 2.9944748366509973e-05, "loss": 0.1665, "step": 2565 }, { "epoch": 0.056542552898466894, "grad_norm": 2.637150526046753, "learning_rate": 2.9944656528467343e-05, "loss": 0.2337, "step": 2566 }, { "epoch": 0.05656458818798305, "grad_norm": 2.0736329555511475, "learning_rate": 2.994456461430357e-05, "loss": 0.2018, "step": 2567 }, { "epoch": 0.056586623477499216, "grad_norm": 1.3570266962051392, "learning_rate": 2.9944472624019118e-05, "loss": 0.1785, "step": 2568 }, { "epoch": 0.056608658767015374, "grad_norm": 1.3440256118774414, "learning_rate": 2.9944380557614447e-05, "loss": 0.173, "step": 2569 }, { "epoch": 0.05663069405653154, "grad_norm": 2.188307523727417, "learning_rate": 2.994428841509004e-05, "loss": 0.1622, "step": 2570 }, { "epoch": 0.056652729346047696, "grad_norm": 2.001455783843994, "learning_rate": 2.994419619644635e-05, "loss": 0.257, "step": 2571 }, { "epoch": 0.056674764635563854, "grad_norm": 1.8112295866012573, "learning_rate": 2.9944103901683857e-05, "loss": 0.1329, "step": 2572 }, { "epoch": 0.05669679992508002, "grad_norm": 1.1597812175750732, "learning_rate": 2.9944011530803032e-05, "loss": 0.1445, "step": 2573 }, { "epoch": 0.056718835214596176, "grad_norm": 1.5887929201126099, "learning_rate": 2.994391908380434e-05, "loss": 0.1752, "step": 2574 }, { "epoch": 0.056740870504112334, "grad_norm": 1.8231061697006226, "learning_rate": 2.9943826560688256e-05, "loss": 0.1658, "step": 2575 }, { "epoch": 0.0567629057936285, "grad_norm": 2.6511895656585693, "learning_rate": 2.9943733961455246e-05, "loss": 0.1486, "step": 2576 }, { "epoch": 0.056784941083144656, "grad_norm": 1.6799246072769165, "learning_rate": 2.9943641286105793e-05, "loss": 0.1706, "step": 2577 }, { "epoch": 0.056806976372660814, "grad_norm": 3.1415412425994873, "learning_rate": 2.9943548534640352e-05, "loss": 0.2152, "step": 2578 }, { "epoch": 0.05682901166217698, "grad_norm": 1.6954090595245361, "learning_rate": 2.994345570705941e-05, "loss": 0.1662, "step": 2579 }, { "epoch": 0.056851046951693136, "grad_norm": 1.1742767095565796, "learning_rate": 2.994336280336344e-05, "loss": 0.1578, "step": 2580 }, { "epoch": 0.056873082241209294, "grad_norm": 1.9788527488708496, "learning_rate": 2.9943269823552903e-05, "loss": 0.1659, "step": 2581 }, { "epoch": 0.05689511753072546, "grad_norm": 1.8273531198501587, "learning_rate": 2.9943176767628283e-05, "loss": 0.1386, "step": 2582 }, { "epoch": 0.056917152820241616, "grad_norm": 2.095851182937622, "learning_rate": 2.994308363559005e-05, "loss": 0.2372, "step": 2583 }, { "epoch": 0.056939188109757774, "grad_norm": 1.589810848236084, "learning_rate": 2.9942990427438674e-05, "loss": 0.1104, "step": 2584 }, { "epoch": 0.05696122339927394, "grad_norm": 1.9802030324935913, "learning_rate": 2.9942897143174636e-05, "loss": 0.1707, "step": 2585 }, { "epoch": 0.056983258688790096, "grad_norm": 3.057753562927246, "learning_rate": 2.9942803782798413e-05, "loss": 0.1824, "step": 2586 }, { "epoch": 0.05700529397830626, "grad_norm": 1.4226266145706177, "learning_rate": 2.994271034631048e-05, "loss": 0.1821, "step": 2587 }, { "epoch": 0.05702732926782242, "grad_norm": 2.0229711532592773, "learning_rate": 2.9942616833711305e-05, "loss": 0.1894, "step": 2588 }, { "epoch": 0.057049364557338576, "grad_norm": 1.8330776691436768, "learning_rate": 2.9942523245001373e-05, "loss": 0.1705, "step": 2589 }, { "epoch": 0.05707139984685474, "grad_norm": 1.3216323852539062, "learning_rate": 2.9942429580181156e-05, "loss": 0.1799, "step": 2590 }, { "epoch": 0.0570934351363709, "grad_norm": 1.8880767822265625, "learning_rate": 2.994233583925113e-05, "loss": 0.149, "step": 2591 }, { "epoch": 0.057115470425887056, "grad_norm": 1.9355788230895996, "learning_rate": 2.994224202221178e-05, "loss": 0.1739, "step": 2592 }, { "epoch": 0.05713750571540322, "grad_norm": 1.5527534484863281, "learning_rate": 2.9942148129063576e-05, "loss": 0.1451, "step": 2593 }, { "epoch": 0.05715954100491938, "grad_norm": 1.604543924331665, "learning_rate": 2.9942054159807e-05, "loss": 0.1487, "step": 2594 }, { "epoch": 0.057181576294435536, "grad_norm": 1.496951937675476, "learning_rate": 2.9941960114442523e-05, "loss": 0.1854, "step": 2595 }, { "epoch": 0.0572036115839517, "grad_norm": 1.3756966590881348, "learning_rate": 2.9941865992970642e-05, "loss": 0.1789, "step": 2596 }, { "epoch": 0.05722564687346786, "grad_norm": 1.6290035247802734, "learning_rate": 2.9941771795391818e-05, "loss": 0.175, "step": 2597 }, { "epoch": 0.057247682162984016, "grad_norm": 1.252380609512329, "learning_rate": 2.9941677521706537e-05, "loss": 0.1637, "step": 2598 }, { "epoch": 0.05726971745250018, "grad_norm": 1.597776174545288, "learning_rate": 2.9941583171915282e-05, "loss": 0.1703, "step": 2599 }, { "epoch": 0.05729175274201634, "grad_norm": 2.237159490585327, "learning_rate": 2.994148874601853e-05, "loss": 0.1945, "step": 2600 }, { "epoch": 0.057313788031532496, "grad_norm": 2.0967395305633545, "learning_rate": 2.9941394244016766e-05, "loss": 0.2206, "step": 2601 }, { "epoch": 0.05733582332104866, "grad_norm": 1.4552119970321655, "learning_rate": 2.9941299665910467e-05, "loss": 0.1324, "step": 2602 }, { "epoch": 0.05735785861056482, "grad_norm": 1.8123095035552979, "learning_rate": 2.9941205011700118e-05, "loss": 0.1481, "step": 2603 }, { "epoch": 0.057379893900080976, "grad_norm": 1.7191150188446045, "learning_rate": 2.99411102813862e-05, "loss": 0.1553, "step": 2604 }, { "epoch": 0.05740192918959714, "grad_norm": 1.7671116590499878, "learning_rate": 2.9941015474969193e-05, "loss": 0.1837, "step": 2605 }, { "epoch": 0.0574239644791133, "grad_norm": 1.2886738777160645, "learning_rate": 2.9940920592449587e-05, "loss": 0.1563, "step": 2606 }, { "epoch": 0.05744599976862946, "grad_norm": 2.8106539249420166, "learning_rate": 2.9940825633827858e-05, "loss": 0.2174, "step": 2607 }, { "epoch": 0.05746803505814562, "grad_norm": 1.4211113452911377, "learning_rate": 2.9940730599104487e-05, "loss": 0.2231, "step": 2608 }, { "epoch": 0.05749007034766178, "grad_norm": 3.324939727783203, "learning_rate": 2.9940635488279974e-05, "loss": 0.1907, "step": 2609 }, { "epoch": 0.05751210563717794, "grad_norm": 1.293347954750061, "learning_rate": 2.9940540301354785e-05, "loss": 0.1215, "step": 2610 }, { "epoch": 0.0575341409266941, "grad_norm": 2.347712278366089, "learning_rate": 2.994044503832941e-05, "loss": 0.1849, "step": 2611 }, { "epoch": 0.05755617621621026, "grad_norm": 1.5861622095108032, "learning_rate": 2.9940349699204342e-05, "loss": 0.1738, "step": 2612 }, { "epoch": 0.05757821150572642, "grad_norm": 1.690598964691162, "learning_rate": 2.994025428398006e-05, "loss": 0.1178, "step": 2613 }, { "epoch": 0.05760024679524258, "grad_norm": 1.9226645231246948, "learning_rate": 2.994015879265705e-05, "loss": 0.1849, "step": 2614 }, { "epoch": 0.05762228208475874, "grad_norm": 1.1904118061065674, "learning_rate": 2.9940063225235802e-05, "loss": 0.1765, "step": 2615 }, { "epoch": 0.0576443173742749, "grad_norm": 1.4660824537277222, "learning_rate": 2.9939967581716798e-05, "loss": 0.1518, "step": 2616 }, { "epoch": 0.05766635266379106, "grad_norm": 1.626660943031311, "learning_rate": 2.993987186210053e-05, "loss": 0.1421, "step": 2617 }, { "epoch": 0.05768838795330722, "grad_norm": 1.4703315496444702, "learning_rate": 2.993977606638748e-05, "loss": 0.1924, "step": 2618 }, { "epoch": 0.05771042324282338, "grad_norm": 1.6147178411483765, "learning_rate": 2.993968019457814e-05, "loss": 0.2157, "step": 2619 }, { "epoch": 0.05773245853233954, "grad_norm": 1.9705368280410767, "learning_rate": 2.9939584246673003e-05, "loss": 0.1884, "step": 2620 }, { "epoch": 0.0577544938218557, "grad_norm": 1.5611681938171387, "learning_rate": 2.9939488222672552e-05, "loss": 0.1231, "step": 2621 }, { "epoch": 0.05777652911137186, "grad_norm": 1.3485231399536133, "learning_rate": 2.993939212257727e-05, "loss": 0.1569, "step": 2622 }, { "epoch": 0.05779856440088802, "grad_norm": 1.867701768875122, "learning_rate": 2.9939295946387655e-05, "loss": 0.1958, "step": 2623 }, { "epoch": 0.057820599690404186, "grad_norm": 1.3145182132720947, "learning_rate": 2.9939199694104195e-05, "loss": 0.1681, "step": 2624 }, { "epoch": 0.05784263497992034, "grad_norm": 1.5909085273742676, "learning_rate": 2.9939103365727377e-05, "loss": 0.1369, "step": 2625 }, { "epoch": 0.0578646702694365, "grad_norm": 1.421965479850769, "learning_rate": 2.9939006961257697e-05, "loss": 0.1581, "step": 2626 }, { "epoch": 0.057886705558952666, "grad_norm": 1.8178884983062744, "learning_rate": 2.9938910480695646e-05, "loss": 0.2111, "step": 2627 }, { "epoch": 0.05790874084846882, "grad_norm": 1.202623724937439, "learning_rate": 2.993881392404171e-05, "loss": 0.1389, "step": 2628 }, { "epoch": 0.05793077613798498, "grad_norm": 1.5372650623321533, "learning_rate": 2.9938717291296387e-05, "loss": 0.1445, "step": 2629 }, { "epoch": 0.057952811427501146, "grad_norm": 1.607681393623352, "learning_rate": 2.993862058246016e-05, "loss": 0.1439, "step": 2630 }, { "epoch": 0.0579748467170173, "grad_norm": 2.2076056003570557, "learning_rate": 2.9938523797533535e-05, "loss": 0.1944, "step": 2631 }, { "epoch": 0.05799688200653346, "grad_norm": 1.4591041803359985, "learning_rate": 2.9938426936516993e-05, "loss": 0.1725, "step": 2632 }, { "epoch": 0.058018917296049625, "grad_norm": 1.4598370790481567, "learning_rate": 2.993832999941103e-05, "loss": 0.1421, "step": 2633 }, { "epoch": 0.05804095258556578, "grad_norm": 2.459376573562622, "learning_rate": 2.9938232986216146e-05, "loss": 0.1479, "step": 2634 }, { "epoch": 0.05806298787508194, "grad_norm": 1.7511378526687622, "learning_rate": 2.9938135896932833e-05, "loss": 0.185, "step": 2635 }, { "epoch": 0.058085023164598105, "grad_norm": 1.4429845809936523, "learning_rate": 2.9938038731561578e-05, "loss": 0.1714, "step": 2636 }, { "epoch": 0.05810705845411426, "grad_norm": 2.33949613571167, "learning_rate": 2.9937941490102888e-05, "loss": 0.1953, "step": 2637 }, { "epoch": 0.05812909374363042, "grad_norm": 1.8051875829696655, "learning_rate": 2.9937844172557244e-05, "loss": 0.1831, "step": 2638 }, { "epoch": 0.058151129033146585, "grad_norm": 1.314457654953003, "learning_rate": 2.993774677892515e-05, "loss": 0.1637, "step": 2639 }, { "epoch": 0.05817316432266274, "grad_norm": 1.6085149049758911, "learning_rate": 2.9937649309207108e-05, "loss": 0.1691, "step": 2640 }, { "epoch": 0.05819519961217891, "grad_norm": 1.6806131601333618, "learning_rate": 2.99375517634036e-05, "loss": 0.1519, "step": 2641 }, { "epoch": 0.058217234901695065, "grad_norm": 1.652912974357605, "learning_rate": 2.9937454141515136e-05, "loss": 0.145, "step": 2642 }, { "epoch": 0.05823927019121122, "grad_norm": 1.4673515558242798, "learning_rate": 2.9937356443542208e-05, "loss": 0.1664, "step": 2643 }, { "epoch": 0.05826130548072739, "grad_norm": 1.3717620372772217, "learning_rate": 2.993725866948531e-05, "loss": 0.1785, "step": 2644 }, { "epoch": 0.058283340770243545, "grad_norm": 1.4334101676940918, "learning_rate": 2.9937160819344947e-05, "loss": 0.1574, "step": 2645 }, { "epoch": 0.0583053760597597, "grad_norm": 1.4145933389663696, "learning_rate": 2.993706289312161e-05, "loss": 0.1657, "step": 2646 }, { "epoch": 0.05832741134927587, "grad_norm": 1.8339284658432007, "learning_rate": 2.993696489081581e-05, "loss": 0.164, "step": 2647 }, { "epoch": 0.058349446638792025, "grad_norm": 1.73491370677948, "learning_rate": 2.993686681242803e-05, "loss": 0.2009, "step": 2648 }, { "epoch": 0.05837148192830818, "grad_norm": 1.4847162961959839, "learning_rate": 2.9936768657958778e-05, "loss": 0.1635, "step": 2649 }, { "epoch": 0.05839351721782435, "grad_norm": 1.7255388498306274, "learning_rate": 2.9936670427408558e-05, "loss": 0.1356, "step": 2650 }, { "epoch": 0.058415552507340505, "grad_norm": 1.6925787925720215, "learning_rate": 2.993657212077786e-05, "loss": 0.212, "step": 2651 }, { "epoch": 0.05843758779685666, "grad_norm": 1.2316019535064697, "learning_rate": 2.9936473738067197e-05, "loss": 0.1531, "step": 2652 }, { "epoch": 0.05845962308637283, "grad_norm": 1.5186293125152588, "learning_rate": 2.993637527927706e-05, "loss": 0.1358, "step": 2653 }, { "epoch": 0.058481658375888985, "grad_norm": 1.5146234035491943, "learning_rate": 2.9936276744407954e-05, "loss": 0.1489, "step": 2654 }, { "epoch": 0.05850369366540514, "grad_norm": 1.7576590776443481, "learning_rate": 2.9936178133460384e-05, "loss": 0.1265, "step": 2655 }, { "epoch": 0.05852572895492131, "grad_norm": 2.361778974533081, "learning_rate": 2.9936079446434846e-05, "loss": 0.162, "step": 2656 }, { "epoch": 0.058547764244437465, "grad_norm": 2.030977964401245, "learning_rate": 2.9935980683331848e-05, "loss": 0.1355, "step": 2657 }, { "epoch": 0.05856979953395363, "grad_norm": 1.540978193283081, "learning_rate": 2.993588184415189e-05, "loss": 0.1796, "step": 2658 }, { "epoch": 0.05859183482346979, "grad_norm": 1.4561067819595337, "learning_rate": 2.9935782928895476e-05, "loss": 0.157, "step": 2659 }, { "epoch": 0.058613870112985945, "grad_norm": 1.5937598943710327, "learning_rate": 2.9935683937563113e-05, "loss": 0.2073, "step": 2660 }, { "epoch": 0.05863590540250211, "grad_norm": 2.476640462875366, "learning_rate": 2.9935584870155298e-05, "loss": 0.1826, "step": 2661 }, { "epoch": 0.05865794069201827, "grad_norm": 1.353297472000122, "learning_rate": 2.9935485726672544e-05, "loss": 0.1873, "step": 2662 }, { "epoch": 0.058679975981534425, "grad_norm": 1.931408405303955, "learning_rate": 2.993538650711535e-05, "loss": 0.1952, "step": 2663 }, { "epoch": 0.05870201127105059, "grad_norm": 1.8506447076797485, "learning_rate": 2.9935287211484224e-05, "loss": 0.1543, "step": 2664 }, { "epoch": 0.05872404656056675, "grad_norm": 1.446828007698059, "learning_rate": 2.993518783977967e-05, "loss": 0.1315, "step": 2665 }, { "epoch": 0.058746081850082905, "grad_norm": 2.774026393890381, "learning_rate": 2.9935088392002197e-05, "loss": 0.2246, "step": 2666 }, { "epoch": 0.05876811713959907, "grad_norm": 2.119654655456543, "learning_rate": 2.9934988868152312e-05, "loss": 0.2126, "step": 2667 }, { "epoch": 0.05879015242911523, "grad_norm": 1.4488718509674072, "learning_rate": 2.993488926823052e-05, "loss": 0.1754, "step": 2668 }, { "epoch": 0.058812187718631385, "grad_norm": 2.0156209468841553, "learning_rate": 2.9934789592237326e-05, "loss": 0.159, "step": 2669 }, { "epoch": 0.05883422300814755, "grad_norm": 2.242840051651001, "learning_rate": 2.993468984017324e-05, "loss": 0.1957, "step": 2670 }, { "epoch": 0.05885625829766371, "grad_norm": 2.04524302482605, "learning_rate": 2.993459001203877e-05, "loss": 0.2142, "step": 2671 }, { "epoch": 0.058878293587179865, "grad_norm": 1.8821003437042236, "learning_rate": 2.9934490107834425e-05, "loss": 0.1435, "step": 2672 }, { "epoch": 0.05890032887669603, "grad_norm": 1.5884734392166138, "learning_rate": 2.993439012756071e-05, "loss": 0.1743, "step": 2673 }, { "epoch": 0.05892236416621219, "grad_norm": 1.0495851039886475, "learning_rate": 2.9934290071218142e-05, "loss": 0.1048, "step": 2674 }, { "epoch": 0.05894439945572835, "grad_norm": 1.617471694946289, "learning_rate": 2.9934189938807228e-05, "loss": 0.152, "step": 2675 }, { "epoch": 0.05896643474524451, "grad_norm": 1.3732693195343018, "learning_rate": 2.9934089730328472e-05, "loss": 0.1281, "step": 2676 }, { "epoch": 0.05898847003476067, "grad_norm": 2.04945707321167, "learning_rate": 2.993398944578239e-05, "loss": 0.1554, "step": 2677 }, { "epoch": 0.05901050532427683, "grad_norm": 1.562279224395752, "learning_rate": 2.9933889085169493e-05, "loss": 0.1568, "step": 2678 }, { "epoch": 0.05903254061379299, "grad_norm": 1.5751835107803345, "learning_rate": 2.9933788648490286e-05, "loss": 0.1324, "step": 2679 }, { "epoch": 0.05905457590330915, "grad_norm": 1.5758459568023682, "learning_rate": 2.993368813574529e-05, "loss": 0.1708, "step": 2680 }, { "epoch": 0.05907661119282531, "grad_norm": 1.8540949821472168, "learning_rate": 2.9933587546935014e-05, "loss": 0.1499, "step": 2681 }, { "epoch": 0.05909864648234147, "grad_norm": 1.395466685295105, "learning_rate": 2.9933486882059964e-05, "loss": 0.1752, "step": 2682 }, { "epoch": 0.05912068177185763, "grad_norm": 1.2705633640289307, "learning_rate": 2.993338614112066e-05, "loss": 0.1531, "step": 2683 }, { "epoch": 0.05914271706137379, "grad_norm": 1.5840809345245361, "learning_rate": 2.9933285324117607e-05, "loss": 0.1794, "step": 2684 }, { "epoch": 0.05916475235088995, "grad_norm": 1.7997381687164307, "learning_rate": 2.9933184431051333e-05, "loss": 0.1516, "step": 2685 }, { "epoch": 0.05918678764040611, "grad_norm": 1.465893030166626, "learning_rate": 2.9933083461922333e-05, "loss": 0.1837, "step": 2686 }, { "epoch": 0.05920882292992227, "grad_norm": 2.1719202995300293, "learning_rate": 2.993298241673114e-05, "loss": 0.2216, "step": 2687 }, { "epoch": 0.05923085821943843, "grad_norm": 1.8589484691619873, "learning_rate": 2.9932881295478256e-05, "loss": 0.1545, "step": 2688 }, { "epoch": 0.05925289350895459, "grad_norm": 1.3782005310058594, "learning_rate": 2.99327800981642e-05, "loss": 0.1386, "step": 2689 }, { "epoch": 0.05927492879847075, "grad_norm": 1.7886626720428467, "learning_rate": 2.9932678824789488e-05, "loss": 0.1486, "step": 2690 }, { "epoch": 0.05929696408798691, "grad_norm": 1.4519784450531006, "learning_rate": 2.9932577475354635e-05, "loss": 0.1779, "step": 2691 }, { "epoch": 0.05931899937750307, "grad_norm": 1.2375737428665161, "learning_rate": 2.9932476049860153e-05, "loss": 0.1722, "step": 2692 }, { "epoch": 0.05934103466701923, "grad_norm": 1.9135414361953735, "learning_rate": 2.9932374548306572e-05, "loss": 0.1449, "step": 2693 }, { "epoch": 0.05936306995653539, "grad_norm": 1.7007578611373901, "learning_rate": 2.9932272970694392e-05, "loss": 0.1409, "step": 2694 }, { "epoch": 0.059385105246051555, "grad_norm": 1.9921281337738037, "learning_rate": 2.9932171317024142e-05, "loss": 0.2111, "step": 2695 }, { "epoch": 0.05940714053556771, "grad_norm": 1.4035223722457886, "learning_rate": 2.9932069587296338e-05, "loss": 0.1796, "step": 2696 }, { "epoch": 0.05942917582508387, "grad_norm": 2.653144359588623, "learning_rate": 2.993196778151149e-05, "loss": 0.1873, "step": 2697 }, { "epoch": 0.059451211114600035, "grad_norm": 1.7764962911605835, "learning_rate": 2.9931865899670127e-05, "loss": 0.1549, "step": 2698 }, { "epoch": 0.05947324640411619, "grad_norm": 1.7947074174880981, "learning_rate": 2.9931763941772765e-05, "loss": 0.1521, "step": 2699 }, { "epoch": 0.05949528169363235, "grad_norm": 1.4346344470977783, "learning_rate": 2.993166190781992e-05, "loss": 0.1868, "step": 2700 }, { "epoch": 0.059517316983148515, "grad_norm": 1.893454670906067, "learning_rate": 2.9931559797812118e-05, "loss": 0.1761, "step": 2701 }, { "epoch": 0.05953935227266467, "grad_norm": 2.0859081745147705, "learning_rate": 2.9931457611749873e-05, "loss": 0.2166, "step": 2702 }, { "epoch": 0.05956138756218083, "grad_norm": 2.3591041564941406, "learning_rate": 2.9931355349633706e-05, "loss": 0.1628, "step": 2703 }, { "epoch": 0.059583422851696995, "grad_norm": 1.2016570568084717, "learning_rate": 2.993125301146414e-05, "loss": 0.185, "step": 2704 }, { "epoch": 0.05960545814121315, "grad_norm": 1.5664418935775757, "learning_rate": 2.9931150597241697e-05, "loss": 0.1458, "step": 2705 }, { "epoch": 0.05962749343072931, "grad_norm": 1.2148728370666504, "learning_rate": 2.993104810696689e-05, "loss": 0.1326, "step": 2706 }, { "epoch": 0.059649528720245475, "grad_norm": 1.6204180717468262, "learning_rate": 2.993094554064026e-05, "loss": 0.1264, "step": 2707 }, { "epoch": 0.05967156400976163, "grad_norm": 1.9480856657028198, "learning_rate": 2.993084289826231e-05, "loss": 0.1365, "step": 2708 }, { "epoch": 0.05969359929927779, "grad_norm": 1.6060278415679932, "learning_rate": 2.9930740179833572e-05, "loss": 0.1576, "step": 2709 }, { "epoch": 0.059715634588793955, "grad_norm": 1.3230929374694824, "learning_rate": 2.9930637385354572e-05, "loss": 0.1815, "step": 2710 }, { "epoch": 0.05973766987831011, "grad_norm": 1.1965749263763428, "learning_rate": 2.9930534514825823e-05, "loss": 0.1894, "step": 2711 }, { "epoch": 0.05975970516782628, "grad_norm": 1.4289723634719849, "learning_rate": 2.993043156824786e-05, "loss": 0.1883, "step": 2712 }, { "epoch": 0.059781740457342435, "grad_norm": 1.7419400215148926, "learning_rate": 2.9930328545621196e-05, "loss": 0.1901, "step": 2713 }, { "epoch": 0.05980377574685859, "grad_norm": 1.3611713647842407, "learning_rate": 2.993022544694637e-05, "loss": 0.1707, "step": 2714 }, { "epoch": 0.05982581103637476, "grad_norm": 1.8389160633087158, "learning_rate": 2.9930122272223894e-05, "loss": 0.1792, "step": 2715 }, { "epoch": 0.059847846325890915, "grad_norm": 1.7487516403198242, "learning_rate": 2.9930019021454306e-05, "loss": 0.1496, "step": 2716 }, { "epoch": 0.05986988161540707, "grad_norm": 1.5376334190368652, "learning_rate": 2.9929915694638117e-05, "loss": 0.1317, "step": 2717 }, { "epoch": 0.05989191690492324, "grad_norm": 1.8892722129821777, "learning_rate": 2.9929812291775866e-05, "loss": 0.1509, "step": 2718 }, { "epoch": 0.059913952194439395, "grad_norm": 1.2302181720733643, "learning_rate": 2.9929708812868074e-05, "loss": 0.1446, "step": 2719 }, { "epoch": 0.05993598748395555, "grad_norm": 3.692211627960205, "learning_rate": 2.992960525791527e-05, "loss": 0.1487, "step": 2720 }, { "epoch": 0.05995802277347172, "grad_norm": 1.267075777053833, "learning_rate": 2.992950162691798e-05, "loss": 0.1252, "step": 2721 }, { "epoch": 0.059980058062987875, "grad_norm": 1.8081293106079102, "learning_rate": 2.992939791987673e-05, "loss": 0.1478, "step": 2722 }, { "epoch": 0.06000209335250403, "grad_norm": 1.5171502828598022, "learning_rate": 2.992929413679206e-05, "loss": 0.1815, "step": 2723 }, { "epoch": 0.0600241286420202, "grad_norm": 2.2201595306396484, "learning_rate": 2.992919027766448e-05, "loss": 0.1824, "step": 2724 }, { "epoch": 0.060046163931536355, "grad_norm": 1.656495451927185, "learning_rate": 2.992908634249453e-05, "loss": 0.1861, "step": 2725 }, { "epoch": 0.06006819922105251, "grad_norm": 2.0191667079925537, "learning_rate": 2.9928982331282736e-05, "loss": 0.16, "step": 2726 }, { "epoch": 0.06009023451056868, "grad_norm": 1.4403971433639526, "learning_rate": 2.992887824402963e-05, "loss": 0.1412, "step": 2727 }, { "epoch": 0.060112269800084835, "grad_norm": 1.5162301063537598, "learning_rate": 2.9928774080735744e-05, "loss": 0.1679, "step": 2728 }, { "epoch": 0.060134305089601, "grad_norm": 2.403198003768921, "learning_rate": 2.9928669841401603e-05, "loss": 0.1266, "step": 2729 }, { "epoch": 0.06015634037911716, "grad_norm": 4.021018028259277, "learning_rate": 2.9928565526027746e-05, "loss": 0.1784, "step": 2730 }, { "epoch": 0.060178375668633315, "grad_norm": 2.6485698223114014, "learning_rate": 2.9928461134614694e-05, "loss": 0.1638, "step": 2731 }, { "epoch": 0.06020041095814948, "grad_norm": 2.232983112335205, "learning_rate": 2.992835666716299e-05, "loss": 0.2049, "step": 2732 }, { "epoch": 0.06022244624766564, "grad_norm": 2.6173276901245117, "learning_rate": 2.9928252123673154e-05, "loss": 0.1731, "step": 2733 }, { "epoch": 0.060244481537181795, "grad_norm": 1.5854474306106567, "learning_rate": 2.992814750414573e-05, "loss": 0.1508, "step": 2734 }, { "epoch": 0.06026651682669796, "grad_norm": 1.511539340019226, "learning_rate": 2.9928042808581243e-05, "loss": 0.1959, "step": 2735 }, { "epoch": 0.06028855211621412, "grad_norm": 5.07810115814209, "learning_rate": 2.992793803698023e-05, "loss": 0.1253, "step": 2736 }, { "epoch": 0.060310587405730275, "grad_norm": 2.051058530807495, "learning_rate": 2.992783318934322e-05, "loss": 0.2238, "step": 2737 }, { "epoch": 0.06033262269524644, "grad_norm": 1.730103850364685, "learning_rate": 2.9927728265670757e-05, "loss": 0.1659, "step": 2738 }, { "epoch": 0.0603546579847626, "grad_norm": 1.835688591003418, "learning_rate": 2.9927623265963366e-05, "loss": 0.1759, "step": 2739 }, { "epoch": 0.060376693274278755, "grad_norm": 2.1318509578704834, "learning_rate": 2.9927518190221584e-05, "loss": 0.242, "step": 2740 }, { "epoch": 0.06039872856379492, "grad_norm": 2.0854570865631104, "learning_rate": 2.9927413038445948e-05, "loss": 0.1888, "step": 2741 }, { "epoch": 0.06042076385331108, "grad_norm": 2.033756971359253, "learning_rate": 2.992730781063699e-05, "loss": 0.1879, "step": 2742 }, { "epoch": 0.060442799142827235, "grad_norm": 1.7774685621261597, "learning_rate": 2.9927202506795255e-05, "loss": 0.2361, "step": 2743 }, { "epoch": 0.0604648344323434, "grad_norm": 1.6360559463500977, "learning_rate": 2.992709712692127e-05, "loss": 0.1816, "step": 2744 }, { "epoch": 0.06048686972185956, "grad_norm": 1.564141035079956, "learning_rate": 2.9926991671015573e-05, "loss": 0.1702, "step": 2745 }, { "epoch": 0.06050890501137572, "grad_norm": 1.3170206546783447, "learning_rate": 2.9926886139078705e-05, "loss": 0.1449, "step": 2746 }, { "epoch": 0.06053094030089188, "grad_norm": 1.225872278213501, "learning_rate": 2.9926780531111198e-05, "loss": 0.105, "step": 2747 }, { "epoch": 0.06055297559040804, "grad_norm": 1.3207744359970093, "learning_rate": 2.9926674847113593e-05, "loss": 0.1459, "step": 2748 }, { "epoch": 0.0605750108799242, "grad_norm": 1.8465650081634521, "learning_rate": 2.992656908708643e-05, "loss": 0.1892, "step": 2749 }, { "epoch": 0.06059704616944036, "grad_norm": 2.1571133136749268, "learning_rate": 2.9926463251030253e-05, "loss": 0.1774, "step": 2750 }, { "epoch": 0.06061908145895652, "grad_norm": 3.496051788330078, "learning_rate": 2.9926357338945588e-05, "loss": 0.1729, "step": 2751 }, { "epoch": 0.06064111674847268, "grad_norm": 1.6517486572265625, "learning_rate": 2.9926251350832984e-05, "loss": 0.1743, "step": 2752 }, { "epoch": 0.06066315203798884, "grad_norm": 1.1628104448318481, "learning_rate": 2.9926145286692973e-05, "loss": 0.1501, "step": 2753 }, { "epoch": 0.060685187327505, "grad_norm": 1.7803945541381836, "learning_rate": 2.9926039146526103e-05, "loss": 0.178, "step": 2754 }, { "epoch": 0.06070722261702116, "grad_norm": 1.3735076189041138, "learning_rate": 2.992593293033291e-05, "loss": 0.1512, "step": 2755 }, { "epoch": 0.06072925790653732, "grad_norm": 2.0805299282073975, "learning_rate": 2.992582663811394e-05, "loss": 0.172, "step": 2756 }, { "epoch": 0.06075129319605348, "grad_norm": 2.2379682064056396, "learning_rate": 2.992572026986973e-05, "loss": 0.1456, "step": 2757 }, { "epoch": 0.06077332848556964, "grad_norm": 2.2822916507720947, "learning_rate": 2.992561382560082e-05, "loss": 0.1558, "step": 2758 }, { "epoch": 0.0607953637750858, "grad_norm": 1.7348213195800781, "learning_rate": 2.9925507305307756e-05, "loss": 0.15, "step": 2759 }, { "epoch": 0.06081739906460196, "grad_norm": 1.2566708326339722, "learning_rate": 2.9925400708991083e-05, "loss": 0.1621, "step": 2760 }, { "epoch": 0.06083943435411812, "grad_norm": 1.9724950790405273, "learning_rate": 2.9925294036651336e-05, "loss": 0.2031, "step": 2761 }, { "epoch": 0.06086146964363428, "grad_norm": 2.091853141784668, "learning_rate": 2.9925187288289067e-05, "loss": 0.1712, "step": 2762 }, { "epoch": 0.060883504933150444, "grad_norm": 2.3962063789367676, "learning_rate": 2.9925080463904815e-05, "loss": 0.1884, "step": 2763 }, { "epoch": 0.0609055402226666, "grad_norm": 1.8104946613311768, "learning_rate": 2.992497356349912e-05, "loss": 0.1396, "step": 2764 }, { "epoch": 0.06092757551218276, "grad_norm": 1.7008711099624634, "learning_rate": 2.9924866587072538e-05, "loss": 0.1661, "step": 2765 }, { "epoch": 0.060949610801698924, "grad_norm": 1.4681774377822876, "learning_rate": 2.9924759534625603e-05, "loss": 0.1521, "step": 2766 }, { "epoch": 0.06097164609121508, "grad_norm": 1.247511386871338, "learning_rate": 2.9924652406158865e-05, "loss": 0.1748, "step": 2767 }, { "epoch": 0.06099368138073124, "grad_norm": 2.3490703105926514, "learning_rate": 2.992454520167287e-05, "loss": 0.1671, "step": 2768 }, { "epoch": 0.061015716670247404, "grad_norm": 1.7812596559524536, "learning_rate": 2.9924437921168163e-05, "loss": 0.1881, "step": 2769 }, { "epoch": 0.06103775195976356, "grad_norm": 1.4287097454071045, "learning_rate": 2.9924330564645288e-05, "loss": 0.1373, "step": 2770 }, { "epoch": 0.06105978724927972, "grad_norm": 1.2340171337127686, "learning_rate": 2.9924223132104797e-05, "loss": 0.1123, "step": 2771 }, { "epoch": 0.061081822538795884, "grad_norm": 2.5502853393554688, "learning_rate": 2.9924115623547232e-05, "loss": 0.1232, "step": 2772 }, { "epoch": 0.06110385782831204, "grad_norm": 1.4717286825180054, "learning_rate": 2.992400803897315e-05, "loss": 0.1797, "step": 2773 }, { "epoch": 0.0611258931178282, "grad_norm": 1.4815773963928223, "learning_rate": 2.9923900378383085e-05, "loss": 0.1442, "step": 2774 }, { "epoch": 0.061147928407344364, "grad_norm": 1.5862069129943848, "learning_rate": 2.9923792641777597e-05, "loss": 0.1431, "step": 2775 }, { "epoch": 0.06116996369686052, "grad_norm": 2.1380667686462402, "learning_rate": 2.9923684829157224e-05, "loss": 0.1769, "step": 2776 }, { "epoch": 0.06119199898637668, "grad_norm": 1.7385858297348022, "learning_rate": 2.9923576940522525e-05, "loss": 0.1662, "step": 2777 }, { "epoch": 0.061214034275892844, "grad_norm": 1.9943515062332153, "learning_rate": 2.992346897587405e-05, "loss": 0.1458, "step": 2778 }, { "epoch": 0.061236069565409, "grad_norm": 2.1196653842926025, "learning_rate": 2.992336093521234e-05, "loss": 0.2074, "step": 2779 }, { "epoch": 0.061258104854925166, "grad_norm": 1.8979359865188599, "learning_rate": 2.9923252818537953e-05, "loss": 0.1625, "step": 2780 }, { "epoch": 0.061280140144441324, "grad_norm": 1.3834526538848877, "learning_rate": 2.9923144625851437e-05, "loss": 0.1278, "step": 2781 }, { "epoch": 0.06130217543395748, "grad_norm": 2.8072152137756348, "learning_rate": 2.992303635715334e-05, "loss": 0.1728, "step": 2782 }, { "epoch": 0.061324210723473646, "grad_norm": 1.5427178144454956, "learning_rate": 2.992292801244422e-05, "loss": 0.1765, "step": 2783 }, { "epoch": 0.061346246012989804, "grad_norm": 1.7004168033599854, "learning_rate": 2.9922819591724623e-05, "loss": 0.1607, "step": 2784 }, { "epoch": 0.06136828130250596, "grad_norm": 2.194347858428955, "learning_rate": 2.9922711094995105e-05, "loss": 0.1489, "step": 2785 }, { "epoch": 0.061390316592022126, "grad_norm": 0.934406578540802, "learning_rate": 2.9922602522256215e-05, "loss": 0.1496, "step": 2786 }, { "epoch": 0.061412351881538284, "grad_norm": 3.075256586074829, "learning_rate": 2.9922493873508508e-05, "loss": 0.1619, "step": 2787 }, { "epoch": 0.06143438717105444, "grad_norm": 1.9853085279464722, "learning_rate": 2.992238514875254e-05, "loss": 0.2031, "step": 2788 }, { "epoch": 0.061456422460570606, "grad_norm": 1.396536111831665, "learning_rate": 2.992227634798886e-05, "loss": 0.1657, "step": 2789 }, { "epoch": 0.061478457750086764, "grad_norm": 1.7285454273223877, "learning_rate": 2.9922167471218028e-05, "loss": 0.1942, "step": 2790 }, { "epoch": 0.06150049303960292, "grad_norm": 2.604261875152588, "learning_rate": 2.9922058518440594e-05, "loss": 0.212, "step": 2791 }, { "epoch": 0.061522528329119086, "grad_norm": 1.5080316066741943, "learning_rate": 2.9921949489657115e-05, "loss": 0.1705, "step": 2792 }, { "epoch": 0.061544563618635244, "grad_norm": 1.8139004707336426, "learning_rate": 2.992184038486814e-05, "loss": 0.1608, "step": 2793 }, { "epoch": 0.0615665989081514, "grad_norm": 1.567623496055603, "learning_rate": 2.9921731204074235e-05, "loss": 0.1871, "step": 2794 }, { "epoch": 0.061588634197667566, "grad_norm": 1.7676280736923218, "learning_rate": 2.992162194727595e-05, "loss": 0.1265, "step": 2795 }, { "epoch": 0.061610669487183724, "grad_norm": 1.0793695449829102, "learning_rate": 2.9921512614473845e-05, "loss": 0.158, "step": 2796 }, { "epoch": 0.06163270477669988, "grad_norm": 1.7518149614334106, "learning_rate": 2.992140320566847e-05, "loss": 0.2112, "step": 2797 }, { "epoch": 0.061654740066216046, "grad_norm": 2.2011332511901855, "learning_rate": 2.992129372086039e-05, "loss": 0.1773, "step": 2798 }, { "epoch": 0.061676775355732204, "grad_norm": 1.5113754272460938, "learning_rate": 2.992118416005016e-05, "loss": 0.2069, "step": 2799 }, { "epoch": 0.06169881064524837, "grad_norm": 1.6154820919036865, "learning_rate": 2.9921074523238336e-05, "loss": 0.1786, "step": 2800 }, { "epoch": 0.061720845934764526, "grad_norm": 1.6341689825057983, "learning_rate": 2.9920964810425483e-05, "loss": 0.1558, "step": 2801 }, { "epoch": 0.061742881224280684, "grad_norm": 1.7530335187911987, "learning_rate": 2.9920855021612148e-05, "loss": 0.156, "step": 2802 }, { "epoch": 0.06176491651379685, "grad_norm": 0.8229291439056396, "learning_rate": 2.99207451567989e-05, "loss": 0.1562, "step": 2803 }, { "epoch": 0.061786951803313006, "grad_norm": 1.9488059282302856, "learning_rate": 2.9920635215986302e-05, "loss": 0.1563, "step": 2804 }, { "epoch": 0.061808987092829164, "grad_norm": 1.509627103805542, "learning_rate": 2.99205251991749e-05, "loss": 0.1261, "step": 2805 }, { "epoch": 0.06183102238234533, "grad_norm": 2.1339271068573, "learning_rate": 2.9920415106365265e-05, "loss": 0.1604, "step": 2806 }, { "epoch": 0.061853057671861486, "grad_norm": 1.4909578561782837, "learning_rate": 2.992030493755795e-05, "loss": 0.1373, "step": 2807 }, { "epoch": 0.061875092961377644, "grad_norm": 2.026798725128174, "learning_rate": 2.9920194692753528e-05, "loss": 0.1485, "step": 2808 }, { "epoch": 0.06189712825089381, "grad_norm": 1.3498990535736084, "learning_rate": 2.9920084371952548e-05, "loss": 0.1355, "step": 2809 }, { "epoch": 0.061919163540409966, "grad_norm": 1.6562780141830444, "learning_rate": 2.9919973975155582e-05, "loss": 0.2157, "step": 2810 }, { "epoch": 0.061941198829926124, "grad_norm": 1.5109273195266724, "learning_rate": 2.9919863502363186e-05, "loss": 0.1289, "step": 2811 }, { "epoch": 0.06196323411944229, "grad_norm": 1.9800763130187988, "learning_rate": 2.9919752953575926e-05, "loss": 0.1414, "step": 2812 }, { "epoch": 0.061985269408958446, "grad_norm": 1.3184914588928223, "learning_rate": 2.991964232879436e-05, "loss": 0.1288, "step": 2813 }, { "epoch": 0.062007304698474604, "grad_norm": 1.5229713916778564, "learning_rate": 2.9919531628019055e-05, "loss": 0.1208, "step": 2814 }, { "epoch": 0.06202933998799077, "grad_norm": 1.5239989757537842, "learning_rate": 2.991942085125058e-05, "loss": 0.1358, "step": 2815 }, { "epoch": 0.062051375277506926, "grad_norm": 1.2591214179992676, "learning_rate": 2.991930999848949e-05, "loss": 0.1212, "step": 2816 }, { "epoch": 0.06207341056702309, "grad_norm": 1.732611894607544, "learning_rate": 2.991919906973636e-05, "loss": 0.1343, "step": 2817 }, { "epoch": 0.06209544585653925, "grad_norm": 1.5659713745117188, "learning_rate": 2.991908806499174e-05, "loss": 0.1018, "step": 2818 }, { "epoch": 0.062117481146055406, "grad_norm": 1.840762734413147, "learning_rate": 2.991897698425621e-05, "loss": 0.1136, "step": 2819 }, { "epoch": 0.06213951643557157, "grad_norm": 2.080864191055298, "learning_rate": 2.991886582753033e-05, "loss": 0.1906, "step": 2820 }, { "epoch": 0.06216155172508773, "grad_norm": 1.4707854986190796, "learning_rate": 2.9918754594814664e-05, "loss": 0.2156, "step": 2821 }, { "epoch": 0.062183587014603886, "grad_norm": 1.3991527557373047, "learning_rate": 2.9918643286109775e-05, "loss": 0.156, "step": 2822 }, { "epoch": 0.06220562230412005, "grad_norm": 1.4074407815933228, "learning_rate": 2.9918531901416247e-05, "loss": 0.1867, "step": 2823 }, { "epoch": 0.06222765759363621, "grad_norm": 1.639078140258789, "learning_rate": 2.991842044073463e-05, "loss": 0.1854, "step": 2824 }, { "epoch": 0.062249692883152366, "grad_norm": 1.8749959468841553, "learning_rate": 2.9918308904065498e-05, "loss": 0.1811, "step": 2825 }, { "epoch": 0.06227172817266853, "grad_norm": 1.1808221340179443, "learning_rate": 2.991819729140942e-05, "loss": 0.0977, "step": 2826 }, { "epoch": 0.06229376346218469, "grad_norm": 1.451369285583496, "learning_rate": 2.9918085602766965e-05, "loss": 0.2039, "step": 2827 }, { "epoch": 0.062315798751700846, "grad_norm": 1.3070321083068848, "learning_rate": 2.9917973838138695e-05, "loss": 0.1432, "step": 2828 }, { "epoch": 0.06233783404121701, "grad_norm": 1.6291406154632568, "learning_rate": 2.991786199752519e-05, "loss": 0.1852, "step": 2829 }, { "epoch": 0.06235986933073317, "grad_norm": 1.9998550415039062, "learning_rate": 2.9917750080927012e-05, "loss": 0.1583, "step": 2830 }, { "epoch": 0.062381904620249326, "grad_norm": 1.1984925270080566, "learning_rate": 2.9917638088344734e-05, "loss": 0.1355, "step": 2831 }, { "epoch": 0.06240393990976549, "grad_norm": 1.4915374517440796, "learning_rate": 2.9917526019778925e-05, "loss": 0.1815, "step": 2832 }, { "epoch": 0.06242597519928165, "grad_norm": 1.4429244995117188, "learning_rate": 2.9917413875230157e-05, "loss": 0.1239, "step": 2833 }, { "epoch": 0.06244801048879781, "grad_norm": 1.5120658874511719, "learning_rate": 2.9917301654699004e-05, "loss": 0.1682, "step": 2834 }, { "epoch": 0.06247004577831397, "grad_norm": 1.5446419715881348, "learning_rate": 2.9917189358186027e-05, "loss": 0.156, "step": 2835 }, { "epoch": 0.06249208106783013, "grad_norm": 1.3907856941223145, "learning_rate": 2.991707698569181e-05, "loss": 0.1643, "step": 2836 }, { "epoch": 0.06251411635734629, "grad_norm": 2.133911609649658, "learning_rate": 2.991696453721692e-05, "loss": 0.1641, "step": 2837 }, { "epoch": 0.06253615164686245, "grad_norm": 1.244144082069397, "learning_rate": 2.9916852012761934e-05, "loss": 0.1784, "step": 2838 }, { "epoch": 0.06255818693637862, "grad_norm": 1.7955330610275269, "learning_rate": 2.9916739412327417e-05, "loss": 0.2214, "step": 2839 }, { "epoch": 0.06258022222589477, "grad_norm": 1.479515552520752, "learning_rate": 2.9916626735913946e-05, "loss": 0.147, "step": 2840 }, { "epoch": 0.06260225751541093, "grad_norm": 1.9046554565429688, "learning_rate": 2.99165139835221e-05, "loss": 0.1835, "step": 2841 }, { "epoch": 0.0626242928049271, "grad_norm": 1.7199697494506836, "learning_rate": 2.9916401155152448e-05, "loss": 0.1538, "step": 2842 }, { "epoch": 0.06264632809444325, "grad_norm": 1.455493450164795, "learning_rate": 2.991628825080557e-05, "loss": 0.1602, "step": 2843 }, { "epoch": 0.06266836338395941, "grad_norm": 2.01560378074646, "learning_rate": 2.991617527048203e-05, "loss": 0.1833, "step": 2844 }, { "epoch": 0.06269039867347558, "grad_norm": 1.3062251806259155, "learning_rate": 2.9916062214182417e-05, "loss": 0.1224, "step": 2845 }, { "epoch": 0.06271243396299173, "grad_norm": 1.192774772644043, "learning_rate": 2.9915949081907293e-05, "loss": 0.1574, "step": 2846 }, { "epoch": 0.06273446925250789, "grad_norm": 1.521756649017334, "learning_rate": 2.9915835873657248e-05, "loss": 0.143, "step": 2847 }, { "epoch": 0.06275650454202406, "grad_norm": 1.8338675498962402, "learning_rate": 2.9915722589432853e-05, "loss": 0.1605, "step": 2848 }, { "epoch": 0.0627785398315402, "grad_norm": 1.7561672925949097, "learning_rate": 2.9915609229234678e-05, "loss": 0.1539, "step": 2849 }, { "epoch": 0.06280057512105637, "grad_norm": 1.5834851264953613, "learning_rate": 2.9915495793063313e-05, "loss": 0.134, "step": 2850 }, { "epoch": 0.06282261041057254, "grad_norm": 2.265413522720337, "learning_rate": 2.9915382280919326e-05, "loss": 0.1879, "step": 2851 }, { "epoch": 0.06284464570008869, "grad_norm": 1.9723600149154663, "learning_rate": 2.99152686928033e-05, "loss": 0.1282, "step": 2852 }, { "epoch": 0.06286668098960485, "grad_norm": 1.3034311532974243, "learning_rate": 2.9915155028715814e-05, "loss": 0.1624, "step": 2853 }, { "epoch": 0.06288871627912102, "grad_norm": 1.176019310951233, "learning_rate": 2.9915041288657444e-05, "loss": 0.1641, "step": 2854 }, { "epoch": 0.06291075156863717, "grad_norm": 1.7354710102081299, "learning_rate": 2.991492747262877e-05, "loss": 0.1523, "step": 2855 }, { "epoch": 0.06293278685815333, "grad_norm": 1.167008876800537, "learning_rate": 2.991481358063037e-05, "loss": 0.144, "step": 2856 }, { "epoch": 0.0629548221476695, "grad_norm": 1.6196650266647339, "learning_rate": 2.991469961266283e-05, "loss": 0.1467, "step": 2857 }, { "epoch": 0.06297685743718565, "grad_norm": 1.1299587488174438, "learning_rate": 2.9914585568726732e-05, "loss": 0.2025, "step": 2858 }, { "epoch": 0.06299889272670181, "grad_norm": 1.9097461700439453, "learning_rate": 2.9914471448822644e-05, "loss": 0.21, "step": 2859 }, { "epoch": 0.06302092801621798, "grad_norm": 1.5966689586639404, "learning_rate": 2.991435725295116e-05, "loss": 0.1773, "step": 2860 }, { "epoch": 0.06304296330573414, "grad_norm": 1.1042602062225342, "learning_rate": 2.991424298111285e-05, "loss": 0.1391, "step": 2861 }, { "epoch": 0.06306499859525029, "grad_norm": 1.5400662422180176, "learning_rate": 2.991412863330831e-05, "loss": 0.1586, "step": 2862 }, { "epoch": 0.06308703388476646, "grad_norm": 1.7530781030654907, "learning_rate": 2.991401420953811e-05, "loss": 0.1956, "step": 2863 }, { "epoch": 0.06310906917428262, "grad_norm": 1.8352174758911133, "learning_rate": 2.991389970980284e-05, "loss": 0.1632, "step": 2864 }, { "epoch": 0.06313110446379877, "grad_norm": 1.4428296089172363, "learning_rate": 2.9913785134103083e-05, "loss": 0.1372, "step": 2865 }, { "epoch": 0.06315313975331494, "grad_norm": 1.7394534349441528, "learning_rate": 2.991367048243942e-05, "loss": 0.1429, "step": 2866 }, { "epoch": 0.0631751750428311, "grad_norm": 1.4492026567459106, "learning_rate": 2.9913555754812433e-05, "loss": 0.1879, "step": 2867 }, { "epoch": 0.06319721033234725, "grad_norm": 1.4461615085601807, "learning_rate": 2.9913440951222713e-05, "loss": 0.1703, "step": 2868 }, { "epoch": 0.06321924562186342, "grad_norm": 1.5837053060531616, "learning_rate": 2.991332607167084e-05, "loss": 0.1642, "step": 2869 }, { "epoch": 0.06324128091137958, "grad_norm": 3.013435125350952, "learning_rate": 2.99132111161574e-05, "loss": 0.1545, "step": 2870 }, { "epoch": 0.06326331620089573, "grad_norm": 1.4921940565109253, "learning_rate": 2.9913096084682982e-05, "loss": 0.1966, "step": 2871 }, { "epoch": 0.0632853514904119, "grad_norm": 1.7622193098068237, "learning_rate": 2.9912980977248164e-05, "loss": 0.1734, "step": 2872 }, { "epoch": 0.06330738677992806, "grad_norm": 1.2382389307022095, "learning_rate": 2.9912865793853536e-05, "loss": 0.1668, "step": 2873 }, { "epoch": 0.06332942206944421, "grad_norm": 1.9982657432556152, "learning_rate": 2.9912750534499686e-05, "loss": 0.1699, "step": 2874 }, { "epoch": 0.06335145735896038, "grad_norm": 1.5516598224639893, "learning_rate": 2.9912635199187205e-05, "loss": 0.2081, "step": 2875 }, { "epoch": 0.06337349264847654, "grad_norm": 2.0371360778808594, "learning_rate": 2.9912519787916672e-05, "loss": 0.1941, "step": 2876 }, { "epoch": 0.06339552793799269, "grad_norm": 2.547736167907715, "learning_rate": 2.9912404300688683e-05, "loss": 0.2476, "step": 2877 }, { "epoch": 0.06341756322750886, "grad_norm": 1.9197725057601929, "learning_rate": 2.9912288737503817e-05, "loss": 0.1428, "step": 2878 }, { "epoch": 0.06343959851702502, "grad_norm": 1.1112046241760254, "learning_rate": 2.9912173098362674e-05, "loss": 0.2044, "step": 2879 }, { "epoch": 0.06346163380654117, "grad_norm": 1.6111559867858887, "learning_rate": 2.991205738326583e-05, "loss": 0.1784, "step": 2880 }, { "epoch": 0.06348366909605734, "grad_norm": 1.3199268579483032, "learning_rate": 2.9911941592213886e-05, "loss": 0.1465, "step": 2881 }, { "epoch": 0.0635057043855735, "grad_norm": 1.8369855880737305, "learning_rate": 2.9911825725207426e-05, "loss": 0.1483, "step": 2882 }, { "epoch": 0.06352773967508965, "grad_norm": 1.4511528015136719, "learning_rate": 2.991170978224704e-05, "loss": 0.2129, "step": 2883 }, { "epoch": 0.06354977496460582, "grad_norm": 1.2460554838180542, "learning_rate": 2.9911593763333323e-05, "loss": 0.1281, "step": 2884 }, { "epoch": 0.06357181025412198, "grad_norm": 1.0845766067504883, "learning_rate": 2.991147766846686e-05, "loss": 0.1792, "step": 2885 }, { "epoch": 0.06359384554363813, "grad_norm": 2.485926866531372, "learning_rate": 2.9911361497648248e-05, "loss": 0.1157, "step": 2886 }, { "epoch": 0.0636158808331543, "grad_norm": 2.191838026046753, "learning_rate": 2.991124525087807e-05, "loss": 0.1521, "step": 2887 }, { "epoch": 0.06363791612267046, "grad_norm": 1.5814483165740967, "learning_rate": 2.9911128928156927e-05, "loss": 0.1662, "step": 2888 }, { "epoch": 0.06365995141218661, "grad_norm": 2.315685510635376, "learning_rate": 2.991101252948541e-05, "loss": 0.1848, "step": 2889 }, { "epoch": 0.06368198670170278, "grad_norm": 1.640941858291626, "learning_rate": 2.9910896054864107e-05, "loss": 0.1378, "step": 2890 }, { "epoch": 0.06370402199121894, "grad_norm": 1.2749509811401367, "learning_rate": 2.9910779504293616e-05, "loss": 0.0937, "step": 2891 }, { "epoch": 0.06372605728073509, "grad_norm": 2.3528037071228027, "learning_rate": 2.991066287777453e-05, "loss": 0.1628, "step": 2892 }, { "epoch": 0.06374809257025126, "grad_norm": 1.960600733757019, "learning_rate": 2.9910546175307445e-05, "loss": 0.216, "step": 2893 }, { "epoch": 0.06377012785976742, "grad_norm": 1.3624860048294067, "learning_rate": 2.9910429396892948e-05, "loss": 0.1451, "step": 2894 }, { "epoch": 0.06379216314928358, "grad_norm": 1.5318291187286377, "learning_rate": 2.991031254253164e-05, "loss": 0.1428, "step": 2895 }, { "epoch": 0.06381419843879974, "grad_norm": 1.912523865699768, "learning_rate": 2.9910195612224115e-05, "loss": 0.1572, "step": 2896 }, { "epoch": 0.0638362337283159, "grad_norm": 1.5727477073669434, "learning_rate": 2.991007860597097e-05, "loss": 0.1673, "step": 2897 }, { "epoch": 0.06385826901783206, "grad_norm": 1.5427263975143433, "learning_rate": 2.9909961523772798e-05, "loss": 0.1531, "step": 2898 }, { "epoch": 0.06388030430734822, "grad_norm": 1.4503225088119507, "learning_rate": 2.990984436563019e-05, "loss": 0.1211, "step": 2899 }, { "epoch": 0.06390233959686438, "grad_norm": 2.180650234222412, "learning_rate": 2.990972713154376e-05, "loss": 0.1706, "step": 2900 }, { "epoch": 0.06392437488638054, "grad_norm": 1.796416163444519, "learning_rate": 2.990960982151409e-05, "loss": 0.1858, "step": 2901 }, { "epoch": 0.0639464101758967, "grad_norm": 1.2094484567642212, "learning_rate": 2.990949243554178e-05, "loss": 0.1613, "step": 2902 }, { "epoch": 0.06396844546541286, "grad_norm": 1.2655606269836426, "learning_rate": 2.9909374973627433e-05, "loss": 0.1826, "step": 2903 }, { "epoch": 0.06399048075492902, "grad_norm": 2.486957550048828, "learning_rate": 2.9909257435771645e-05, "loss": 0.1444, "step": 2904 }, { "epoch": 0.06401251604444518, "grad_norm": 1.2973802089691162, "learning_rate": 2.990913982197501e-05, "loss": 0.1489, "step": 2905 }, { "epoch": 0.06403455133396134, "grad_norm": 1.1871122121810913, "learning_rate": 2.9909022132238135e-05, "loss": 0.202, "step": 2906 }, { "epoch": 0.0640565866234775, "grad_norm": 1.8672869205474854, "learning_rate": 2.9908904366561607e-05, "loss": 0.1407, "step": 2907 }, { "epoch": 0.06407862191299366, "grad_norm": 2.7274341583251953, "learning_rate": 2.9908786524946042e-05, "loss": 0.1391, "step": 2908 }, { "epoch": 0.06410065720250982, "grad_norm": 2.2153947353363037, "learning_rate": 2.9908668607392032e-05, "loss": 0.1804, "step": 2909 }, { "epoch": 0.06412269249202598, "grad_norm": 1.408387303352356, "learning_rate": 2.990855061390018e-05, "loss": 0.1503, "step": 2910 }, { "epoch": 0.06414472778154214, "grad_norm": 1.3226358890533447, "learning_rate": 2.990843254447108e-05, "loss": 0.1628, "step": 2911 }, { "epoch": 0.0641667630710583, "grad_norm": 1.5219041109085083, "learning_rate": 2.990831439910534e-05, "loss": 0.1566, "step": 2912 }, { "epoch": 0.06418879836057446, "grad_norm": 1.146316409111023, "learning_rate": 2.990819617780356e-05, "loss": 0.1505, "step": 2913 }, { "epoch": 0.06421083365009062, "grad_norm": 2.338995933532715, "learning_rate": 2.9908077880566348e-05, "loss": 0.1289, "step": 2914 }, { "epoch": 0.06423286893960678, "grad_norm": 2.152070999145508, "learning_rate": 2.9907959507394294e-05, "loss": 0.1557, "step": 2915 }, { "epoch": 0.06425490422912294, "grad_norm": 1.3512217998504639, "learning_rate": 2.990784105828801e-05, "loss": 0.1674, "step": 2916 }, { "epoch": 0.0642769395186391, "grad_norm": 3.265937328338623, "learning_rate": 2.9907722533248096e-05, "loss": 0.1475, "step": 2917 }, { "epoch": 0.06429897480815526, "grad_norm": 1.5457594394683838, "learning_rate": 2.990760393227516e-05, "loss": 0.1637, "step": 2918 }, { "epoch": 0.06432101009767142, "grad_norm": 4.372171401977539, "learning_rate": 2.9907485255369805e-05, "loss": 0.1936, "step": 2919 }, { "epoch": 0.06434304538718758, "grad_norm": 2.0547568798065186, "learning_rate": 2.9907366502532628e-05, "loss": 0.1774, "step": 2920 }, { "epoch": 0.06436508067670374, "grad_norm": 2.2067649364471436, "learning_rate": 2.9907247673764242e-05, "loss": 0.2147, "step": 2921 }, { "epoch": 0.0643871159662199, "grad_norm": 2.7736923694610596, "learning_rate": 2.990712876906525e-05, "loss": 0.1749, "step": 2922 }, { "epoch": 0.06440915125573606, "grad_norm": 2.8670310974121094, "learning_rate": 2.9907009788436256e-05, "loss": 0.2091, "step": 2923 }, { "epoch": 0.06443118654525222, "grad_norm": 1.528747797012329, "learning_rate": 2.990689073187787e-05, "loss": 0.1357, "step": 2924 }, { "epoch": 0.06445322183476838, "grad_norm": 1.6019785404205322, "learning_rate": 2.9906771599390692e-05, "loss": 0.1643, "step": 2925 }, { "epoch": 0.06447525712428454, "grad_norm": 1.44288170337677, "learning_rate": 2.9906652390975333e-05, "loss": 0.1348, "step": 2926 }, { "epoch": 0.0644972924138007, "grad_norm": 1.7807064056396484, "learning_rate": 2.9906533106632404e-05, "loss": 0.1813, "step": 2927 }, { "epoch": 0.06451932770331686, "grad_norm": 1.2585233449935913, "learning_rate": 2.9906413746362503e-05, "loss": 0.1499, "step": 2928 }, { "epoch": 0.06454136299283303, "grad_norm": 1.4282281398773193, "learning_rate": 2.990629431016625e-05, "loss": 0.1456, "step": 2929 }, { "epoch": 0.06456339828234918, "grad_norm": 1.4848231077194214, "learning_rate": 2.9906174798044242e-05, "loss": 0.138, "step": 2930 }, { "epoch": 0.06458543357186534, "grad_norm": 1.4533660411834717, "learning_rate": 2.990605520999709e-05, "loss": 0.1447, "step": 2931 }, { "epoch": 0.06460746886138151, "grad_norm": 1.7495931386947632, "learning_rate": 2.990593554602541e-05, "loss": 0.1338, "step": 2932 }, { "epoch": 0.06462950415089766, "grad_norm": 1.2118852138519287, "learning_rate": 2.990581580612981e-05, "loss": 0.1776, "step": 2933 }, { "epoch": 0.06465153944041382, "grad_norm": 3.6388471126556396, "learning_rate": 2.9905695990310888e-05, "loss": 0.2278, "step": 2934 }, { "epoch": 0.06467357472992999, "grad_norm": 1.4443612098693848, "learning_rate": 2.9905576098569266e-05, "loss": 0.134, "step": 2935 }, { "epoch": 0.06469561001944614, "grad_norm": 1.8368353843688965, "learning_rate": 2.9905456130905557e-05, "loss": 0.1425, "step": 2936 }, { "epoch": 0.0647176453089623, "grad_norm": 2.5908100605010986, "learning_rate": 2.9905336087320364e-05, "loss": 0.2221, "step": 2937 }, { "epoch": 0.06473968059847847, "grad_norm": 1.4501570463180542, "learning_rate": 2.99052159678143e-05, "loss": 0.1539, "step": 2938 }, { "epoch": 0.06476171588799462, "grad_norm": 1.7345529794692993, "learning_rate": 2.9905095772387978e-05, "loss": 0.1587, "step": 2939 }, { "epoch": 0.06478375117751078, "grad_norm": 1.390223741531372, "learning_rate": 2.9904975501042015e-05, "loss": 0.1376, "step": 2940 }, { "epoch": 0.06480578646702695, "grad_norm": 1.853793740272522, "learning_rate": 2.9904855153777015e-05, "loss": 0.1507, "step": 2941 }, { "epoch": 0.0648278217565431, "grad_norm": 1.19053316116333, "learning_rate": 2.9904734730593596e-05, "loss": 0.1427, "step": 2942 }, { "epoch": 0.06484985704605926, "grad_norm": 1.6493276357650757, "learning_rate": 2.9904614231492373e-05, "loss": 0.1607, "step": 2943 }, { "epoch": 0.06487189233557543, "grad_norm": 2.424769878387451, "learning_rate": 2.9904493656473955e-05, "loss": 0.1574, "step": 2944 }, { "epoch": 0.06489392762509158, "grad_norm": 1.6881073713302612, "learning_rate": 2.9904373005538956e-05, "loss": 0.1264, "step": 2945 }, { "epoch": 0.06491596291460774, "grad_norm": 2.180544853210449, "learning_rate": 2.9904252278688e-05, "loss": 0.2108, "step": 2946 }, { "epoch": 0.06493799820412391, "grad_norm": 1.8090614080429077, "learning_rate": 2.990413147592169e-05, "loss": 0.1799, "step": 2947 }, { "epoch": 0.06496003349364006, "grad_norm": 1.2790864706039429, "learning_rate": 2.9904010597240642e-05, "loss": 0.2075, "step": 2948 }, { "epoch": 0.06498206878315622, "grad_norm": 1.644365906715393, "learning_rate": 2.9903889642645483e-05, "loss": 0.1577, "step": 2949 }, { "epoch": 0.06500410407267239, "grad_norm": 1.2187566757202148, "learning_rate": 2.990376861213682e-05, "loss": 0.1985, "step": 2950 }, { "epoch": 0.06502613936218854, "grad_norm": 2.07779860496521, "learning_rate": 2.990364750571527e-05, "loss": 0.1737, "step": 2951 }, { "epoch": 0.0650481746517047, "grad_norm": 1.5804557800292969, "learning_rate": 2.990352632338145e-05, "loss": 0.184, "step": 2952 }, { "epoch": 0.06507020994122087, "grad_norm": 1.2786881923675537, "learning_rate": 2.990340506513598e-05, "loss": 0.1553, "step": 2953 }, { "epoch": 0.06509224523073702, "grad_norm": 1.9189542531967163, "learning_rate": 2.9903283730979475e-05, "loss": 0.1815, "step": 2954 }, { "epoch": 0.06511428052025318, "grad_norm": 2.214138984680176, "learning_rate": 2.9903162320912555e-05, "loss": 0.1872, "step": 2955 }, { "epoch": 0.06513631580976935, "grad_norm": 1.501296877861023, "learning_rate": 2.990304083493584e-05, "loss": 0.1667, "step": 2956 }, { "epoch": 0.0651583510992855, "grad_norm": 1.7433313131332397, "learning_rate": 2.9902919273049947e-05, "loss": 0.2013, "step": 2957 }, { "epoch": 0.06518038638880166, "grad_norm": 2.5128490924835205, "learning_rate": 2.990279763525549e-05, "loss": 0.1427, "step": 2958 }, { "epoch": 0.06520242167831783, "grad_norm": 1.3338673114776611, "learning_rate": 2.9902675921553096e-05, "loss": 0.1306, "step": 2959 }, { "epoch": 0.06522445696783398, "grad_norm": 1.2897775173187256, "learning_rate": 2.9902554131943384e-05, "loss": 0.111, "step": 2960 }, { "epoch": 0.06524649225735014, "grad_norm": 1.8524935245513916, "learning_rate": 2.9902432266426967e-05, "loss": 0.1463, "step": 2961 }, { "epoch": 0.06526852754686631, "grad_norm": 1.4888097047805786, "learning_rate": 2.9902310325004476e-05, "loss": 0.162, "step": 2962 }, { "epoch": 0.06529056283638246, "grad_norm": 1.4158196449279785, "learning_rate": 2.9902188307676522e-05, "loss": 0.1764, "step": 2963 }, { "epoch": 0.06531259812589862, "grad_norm": 1.4083662033081055, "learning_rate": 2.9902066214443738e-05, "loss": 0.1695, "step": 2964 }, { "epoch": 0.06533463341541479, "grad_norm": 1.374375581741333, "learning_rate": 2.9901944045306735e-05, "loss": 0.165, "step": 2965 }, { "epoch": 0.06535666870493095, "grad_norm": 1.098288893699646, "learning_rate": 2.9901821800266144e-05, "loss": 0.1493, "step": 2966 }, { "epoch": 0.0653787039944471, "grad_norm": 1.9859330654144287, "learning_rate": 2.990169947932258e-05, "loss": 0.153, "step": 2967 }, { "epoch": 0.06540073928396327, "grad_norm": 1.335602045059204, "learning_rate": 2.990157708247667e-05, "loss": 0.1275, "step": 2968 }, { "epoch": 0.06542277457347943, "grad_norm": 1.6233576536178589, "learning_rate": 2.990145460972904e-05, "loss": 0.1905, "step": 2969 }, { "epoch": 0.06544480986299558, "grad_norm": 3.3902478218078613, "learning_rate": 2.9901332061080307e-05, "loss": 0.155, "step": 2970 }, { "epoch": 0.06546684515251175, "grad_norm": 1.5009124279022217, "learning_rate": 2.9901209436531103e-05, "loss": 0.1221, "step": 2971 }, { "epoch": 0.06548888044202791, "grad_norm": 1.172652244567871, "learning_rate": 2.9901086736082045e-05, "loss": 0.1767, "step": 2972 }, { "epoch": 0.06551091573154406, "grad_norm": 1.134953498840332, "learning_rate": 2.990096395973376e-05, "loss": 0.1452, "step": 2973 }, { "epoch": 0.06553295102106023, "grad_norm": 2.231576919555664, "learning_rate": 2.990084110748688e-05, "loss": 0.1824, "step": 2974 }, { "epoch": 0.0655549863105764, "grad_norm": 1.4543288946151733, "learning_rate": 2.9900718179342024e-05, "loss": 0.1503, "step": 2975 }, { "epoch": 0.06557702160009254, "grad_norm": 1.8136169910430908, "learning_rate": 2.9900595175299824e-05, "loss": 0.2141, "step": 2976 }, { "epoch": 0.06559905688960871, "grad_norm": 1.335862159729004, "learning_rate": 2.9900472095360897e-05, "loss": 0.1298, "step": 2977 }, { "epoch": 0.06562109217912487, "grad_norm": 1.279468059539795, "learning_rate": 2.9900348939525876e-05, "loss": 0.106, "step": 2978 }, { "epoch": 0.06564312746864102, "grad_norm": 1.527976155281067, "learning_rate": 2.9900225707795385e-05, "loss": 0.1587, "step": 2979 }, { "epoch": 0.06566516275815719, "grad_norm": 1.6721651554107666, "learning_rate": 2.990010240017006e-05, "loss": 0.1247, "step": 2980 }, { "epoch": 0.06568719804767335, "grad_norm": 1.616966962814331, "learning_rate": 2.989997901665052e-05, "loss": 0.1416, "step": 2981 }, { "epoch": 0.0657092333371895, "grad_norm": 1.4780761003494263, "learning_rate": 2.98998555572374e-05, "loss": 0.1092, "step": 2982 }, { "epoch": 0.06573126862670567, "grad_norm": 1.1594455242156982, "learning_rate": 2.989973202193132e-05, "loss": 0.1212, "step": 2983 }, { "epoch": 0.06575330391622183, "grad_norm": 2.170475959777832, "learning_rate": 2.989960841073292e-05, "loss": 0.1583, "step": 2984 }, { "epoch": 0.06577533920573798, "grad_norm": 1.4657068252563477, "learning_rate": 2.9899484723642822e-05, "loss": 0.1414, "step": 2985 }, { "epoch": 0.06579737449525415, "grad_norm": 1.656149983406067, "learning_rate": 2.9899360960661663e-05, "loss": 0.1514, "step": 2986 }, { "epoch": 0.06581940978477031, "grad_norm": 1.5626194477081299, "learning_rate": 2.9899237121790064e-05, "loss": 0.1304, "step": 2987 }, { "epoch": 0.06584144507428646, "grad_norm": 1.3644124269485474, "learning_rate": 2.9899113207028664e-05, "loss": 0.1424, "step": 2988 }, { "epoch": 0.06586348036380263, "grad_norm": 1.4207262992858887, "learning_rate": 2.989898921637809e-05, "loss": 0.1199, "step": 2989 }, { "epoch": 0.0658855156533188, "grad_norm": 2.541625738143921, "learning_rate": 2.9898865149838973e-05, "loss": 0.1737, "step": 2990 }, { "epoch": 0.06590755094283494, "grad_norm": 1.5693457126617432, "learning_rate": 2.9898741007411946e-05, "loss": 0.1704, "step": 2991 }, { "epoch": 0.06592958623235111, "grad_norm": 2.09085750579834, "learning_rate": 2.989861678909764e-05, "loss": 0.1541, "step": 2992 }, { "epoch": 0.06595162152186727, "grad_norm": 1.059754729270935, "learning_rate": 2.9898492494896696e-05, "loss": 0.1771, "step": 2993 }, { "epoch": 0.06597365681138342, "grad_norm": 1.3041106462478638, "learning_rate": 2.9898368124809733e-05, "loss": 0.1741, "step": 2994 }, { "epoch": 0.06599569210089959, "grad_norm": 1.483661413192749, "learning_rate": 2.98982436788374e-05, "loss": 0.1697, "step": 2995 }, { "epoch": 0.06601772739041575, "grad_norm": 1.8174779415130615, "learning_rate": 2.9898119156980318e-05, "loss": 0.1607, "step": 2996 }, { "epoch": 0.0660397626799319, "grad_norm": 1.278983473777771, "learning_rate": 2.9897994559239125e-05, "loss": 0.163, "step": 2997 }, { "epoch": 0.06606179796944807, "grad_norm": 2.0722923278808594, "learning_rate": 2.989786988561446e-05, "loss": 0.1748, "step": 2998 }, { "epoch": 0.06608383325896423, "grad_norm": 1.3122968673706055, "learning_rate": 2.9897745136106956e-05, "loss": 0.1596, "step": 2999 }, { "epoch": 0.0661058685484804, "grad_norm": 2.097005605697632, "learning_rate": 2.9897620310717243e-05, "loss": 0.1562, "step": 3000 }, { "epoch": 0.06612790383799655, "grad_norm": 1.4145175218582153, "learning_rate": 2.989749540944596e-05, "loss": 0.1649, "step": 3001 }, { "epoch": 0.06614993912751271, "grad_norm": 2.262136220932007, "learning_rate": 2.989737043229375e-05, "loss": 0.1253, "step": 3002 }, { "epoch": 0.06617197441702888, "grad_norm": 1.843327522277832, "learning_rate": 2.989724537926124e-05, "loss": 0.1786, "step": 3003 }, { "epoch": 0.06619400970654503, "grad_norm": 1.946624517440796, "learning_rate": 2.989712025034907e-05, "loss": 0.1535, "step": 3004 }, { "epoch": 0.0662160449960612, "grad_norm": 1.9314682483673096, "learning_rate": 2.9896995045557882e-05, "loss": 0.1633, "step": 3005 }, { "epoch": 0.06623808028557736, "grad_norm": 2.3425233364105225, "learning_rate": 2.9896869764888305e-05, "loss": 0.179, "step": 3006 }, { "epoch": 0.06626011557509351, "grad_norm": 1.76068913936615, "learning_rate": 2.9896744408340986e-05, "loss": 0.1428, "step": 3007 }, { "epoch": 0.06628215086460967, "grad_norm": 1.9138177633285522, "learning_rate": 2.9896618975916556e-05, "loss": 0.1506, "step": 3008 }, { "epoch": 0.06630418615412584, "grad_norm": 1.7002615928649902, "learning_rate": 2.9896493467615663e-05, "loss": 0.2154, "step": 3009 }, { "epoch": 0.06632622144364199, "grad_norm": 1.5627914667129517, "learning_rate": 2.9896367883438932e-05, "loss": 0.1907, "step": 3010 }, { "epoch": 0.06634825673315815, "grad_norm": 3.722507953643799, "learning_rate": 2.989624222338702e-05, "loss": 0.2478, "step": 3011 }, { "epoch": 0.06637029202267432, "grad_norm": 3.286135673522949, "learning_rate": 2.989611648746055e-05, "loss": 0.1565, "step": 3012 }, { "epoch": 0.06639232731219047, "grad_norm": 2.742868423461914, "learning_rate": 2.989599067566018e-05, "loss": 0.174, "step": 3013 }, { "epoch": 0.06641436260170663, "grad_norm": 1.5967633724212646, "learning_rate": 2.9895864787986532e-05, "loss": 0.1286, "step": 3014 }, { "epoch": 0.0664363978912228, "grad_norm": 1.8628087043762207, "learning_rate": 2.9895738824440264e-05, "loss": 0.181, "step": 3015 }, { "epoch": 0.06645843318073895, "grad_norm": 1.2856147289276123, "learning_rate": 2.9895612785022006e-05, "loss": 0.159, "step": 3016 }, { "epoch": 0.06648046847025511, "grad_norm": 1.5912848711013794, "learning_rate": 2.9895486669732408e-05, "loss": 0.1826, "step": 3017 }, { "epoch": 0.06650250375977128, "grad_norm": 2.228649854660034, "learning_rate": 2.9895360478572103e-05, "loss": 0.1572, "step": 3018 }, { "epoch": 0.06652453904928743, "grad_norm": 1.8212448358535767, "learning_rate": 2.9895234211541746e-05, "loss": 0.1862, "step": 3019 }, { "epoch": 0.0665465743388036, "grad_norm": 1.398868441581726, "learning_rate": 2.9895107868641974e-05, "loss": 0.1457, "step": 3020 }, { "epoch": 0.06656860962831976, "grad_norm": 1.8603438138961792, "learning_rate": 2.9894981449873427e-05, "loss": 0.1307, "step": 3021 }, { "epoch": 0.06659064491783591, "grad_norm": 2.0235538482666016, "learning_rate": 2.9894854955236753e-05, "loss": 0.1691, "step": 3022 }, { "epoch": 0.06661268020735207, "grad_norm": 1.8748985528945923, "learning_rate": 2.9894728384732594e-05, "loss": 0.2253, "step": 3023 }, { "epoch": 0.06663471549686824, "grad_norm": 1.4090602397918701, "learning_rate": 2.9894601738361598e-05, "loss": 0.1914, "step": 3024 }, { "epoch": 0.06665675078638439, "grad_norm": 2.117579221725464, "learning_rate": 2.989447501612441e-05, "loss": 0.1985, "step": 3025 }, { "epoch": 0.06667878607590055, "grad_norm": 1.3508034944534302, "learning_rate": 2.989434821802167e-05, "loss": 0.1943, "step": 3026 }, { "epoch": 0.06670082136541672, "grad_norm": 1.5825883150100708, "learning_rate": 2.9894221344054033e-05, "loss": 0.1709, "step": 3027 }, { "epoch": 0.06672285665493287, "grad_norm": 2.1386351585388184, "learning_rate": 2.989409439422213e-05, "loss": 0.1488, "step": 3028 }, { "epoch": 0.06674489194444903, "grad_norm": 1.403061032295227, "learning_rate": 2.9893967368526626e-05, "loss": 0.1482, "step": 3029 }, { "epoch": 0.0667669272339652, "grad_norm": 1.2961591482162476, "learning_rate": 2.9893840266968157e-05, "loss": 0.1429, "step": 3030 }, { "epoch": 0.06678896252348135, "grad_norm": 1.759058952331543, "learning_rate": 2.989371308954737e-05, "loss": 0.1828, "step": 3031 }, { "epoch": 0.06681099781299751, "grad_norm": 1.8556193113327026, "learning_rate": 2.989358583626492e-05, "loss": 0.1769, "step": 3032 }, { "epoch": 0.06683303310251368, "grad_norm": 2.4265384674072266, "learning_rate": 2.989345850712145e-05, "loss": 0.202, "step": 3033 }, { "epoch": 0.06685506839202983, "grad_norm": 1.7846285104751587, "learning_rate": 2.9893331102117606e-05, "loss": 0.1256, "step": 3034 }, { "epoch": 0.066877103681546, "grad_norm": 1.2249767780303955, "learning_rate": 2.9893203621254044e-05, "loss": 0.1297, "step": 3035 }, { "epoch": 0.06689913897106216, "grad_norm": 1.1815011501312256, "learning_rate": 2.9893076064531406e-05, "loss": 0.1284, "step": 3036 }, { "epoch": 0.06692117426057832, "grad_norm": 1.6574230194091797, "learning_rate": 2.9892948431950348e-05, "loss": 0.154, "step": 3037 }, { "epoch": 0.06694320955009447, "grad_norm": 1.9290598630905151, "learning_rate": 2.989282072351152e-05, "loss": 0.126, "step": 3038 }, { "epoch": 0.06696524483961064, "grad_norm": 0.8852233290672302, "learning_rate": 2.9892692939215564e-05, "loss": 0.1523, "step": 3039 }, { "epoch": 0.0669872801291268, "grad_norm": 1.1458041667938232, "learning_rate": 2.9892565079063136e-05, "loss": 0.1838, "step": 3040 }, { "epoch": 0.06700931541864295, "grad_norm": 1.4547781944274902, "learning_rate": 2.9892437143054893e-05, "loss": 0.1646, "step": 3041 }, { "epoch": 0.06703135070815912, "grad_norm": 2.009033679962158, "learning_rate": 2.989230913119148e-05, "loss": 0.2031, "step": 3042 }, { "epoch": 0.06705338599767528, "grad_norm": 1.2901915311813354, "learning_rate": 2.9892181043473552e-05, "loss": 0.1628, "step": 3043 }, { "epoch": 0.06707542128719143, "grad_norm": 1.2817975282669067, "learning_rate": 2.9892052879901756e-05, "loss": 0.156, "step": 3044 }, { "epoch": 0.0670974565767076, "grad_norm": 1.3762787580490112, "learning_rate": 2.9891924640476753e-05, "loss": 0.1492, "step": 3045 }, { "epoch": 0.06711949186622376, "grad_norm": 1.8412190675735474, "learning_rate": 2.989179632519919e-05, "loss": 0.1618, "step": 3046 }, { "epoch": 0.06714152715573991, "grad_norm": 1.546494483947754, "learning_rate": 2.9891667934069726e-05, "loss": 0.1579, "step": 3047 }, { "epoch": 0.06716356244525608, "grad_norm": 1.724806547164917, "learning_rate": 2.9891539467089007e-05, "loss": 0.1479, "step": 3048 }, { "epoch": 0.06718559773477224, "grad_norm": 1.219423532485962, "learning_rate": 2.9891410924257694e-05, "loss": 0.1133, "step": 3049 }, { "epoch": 0.0672076330242884, "grad_norm": 1.3022197484970093, "learning_rate": 2.9891282305576443e-05, "loss": 0.1371, "step": 3050 }, { "epoch": 0.06722966831380456, "grad_norm": 1.9057934284210205, "learning_rate": 2.9891153611045903e-05, "loss": 0.1727, "step": 3051 }, { "epoch": 0.06725170360332072, "grad_norm": 1.308097004890442, "learning_rate": 2.989102484066673e-05, "loss": 0.131, "step": 3052 }, { "epoch": 0.06727373889283687, "grad_norm": 1.8157172203063965, "learning_rate": 2.9890895994439586e-05, "loss": 0.1619, "step": 3053 }, { "epoch": 0.06729577418235304, "grad_norm": 2.7205424308776855, "learning_rate": 2.989076707236512e-05, "loss": 0.1704, "step": 3054 }, { "epoch": 0.0673178094718692, "grad_norm": 1.4365657567977905, "learning_rate": 2.9890638074443993e-05, "loss": 0.1594, "step": 3055 }, { "epoch": 0.06733984476138535, "grad_norm": 1.841927409172058, "learning_rate": 2.9890509000676867e-05, "loss": 0.2201, "step": 3056 }, { "epoch": 0.06736188005090152, "grad_norm": 1.469205617904663, "learning_rate": 2.9890379851064387e-05, "loss": 0.1717, "step": 3057 }, { "epoch": 0.06738391534041768, "grad_norm": 1.7826679944992065, "learning_rate": 2.989025062560722e-05, "loss": 0.1408, "step": 3058 }, { "epoch": 0.06740595062993383, "grad_norm": 1.356685996055603, "learning_rate": 2.989012132430602e-05, "loss": 0.1268, "step": 3059 }, { "epoch": 0.06742798591945, "grad_norm": 1.3846080303192139, "learning_rate": 2.988999194716145e-05, "loss": 0.2375, "step": 3060 }, { "epoch": 0.06745002120896616, "grad_norm": 1.6659431457519531, "learning_rate": 2.9889862494174166e-05, "loss": 0.185, "step": 3061 }, { "epoch": 0.06747205649848231, "grad_norm": 1.3837796449661255, "learning_rate": 2.988973296534483e-05, "loss": 0.1596, "step": 3062 }, { "epoch": 0.06749409178799848, "grad_norm": 3.234525442123413, "learning_rate": 2.9889603360674097e-05, "loss": 0.1413, "step": 3063 }, { "epoch": 0.06751612707751464, "grad_norm": 1.9616832733154297, "learning_rate": 2.9889473680162627e-05, "loss": 0.2189, "step": 3064 }, { "epoch": 0.0675381623670308, "grad_norm": 1.8835052251815796, "learning_rate": 2.9889343923811086e-05, "loss": 0.1691, "step": 3065 }, { "epoch": 0.06756019765654696, "grad_norm": 1.6683175563812256, "learning_rate": 2.9889214091620132e-05, "loss": 0.1874, "step": 3066 }, { "epoch": 0.06758223294606312, "grad_norm": 2.416278600692749, "learning_rate": 2.9889084183590425e-05, "loss": 0.1642, "step": 3067 }, { "epoch": 0.06760426823557927, "grad_norm": 0.923233151435852, "learning_rate": 2.988895419972263e-05, "loss": 0.1216, "step": 3068 }, { "epoch": 0.06762630352509544, "grad_norm": 1.0601754188537598, "learning_rate": 2.9888824140017406e-05, "loss": 0.1721, "step": 3069 }, { "epoch": 0.0676483388146116, "grad_norm": 1.3766626119613647, "learning_rate": 2.9888694004475418e-05, "loss": 0.1285, "step": 3070 }, { "epoch": 0.06767037410412777, "grad_norm": 1.712959885597229, "learning_rate": 2.9888563793097327e-05, "loss": 0.181, "step": 3071 }, { "epoch": 0.06769240939364392, "grad_norm": 0.7688530683517456, "learning_rate": 2.9888433505883797e-05, "loss": 0.1471, "step": 3072 }, { "epoch": 0.06771444468316008, "grad_norm": 1.4009026288986206, "learning_rate": 2.9888303142835486e-05, "loss": 0.1891, "step": 3073 }, { "epoch": 0.06773647997267625, "grad_norm": 1.294302225112915, "learning_rate": 2.9888172703953073e-05, "loss": 0.1705, "step": 3074 }, { "epoch": 0.0677585152621924, "grad_norm": 1.2757014036178589, "learning_rate": 2.9888042189237204e-05, "loss": 0.1288, "step": 3075 }, { "epoch": 0.06778055055170856, "grad_norm": 1.5276516675949097, "learning_rate": 2.9887911598688554e-05, "loss": 0.1333, "step": 3076 }, { "epoch": 0.06780258584122473, "grad_norm": 2.5457394123077393, "learning_rate": 2.9887780932307793e-05, "loss": 0.1541, "step": 3077 }, { "epoch": 0.06782462113074088, "grad_norm": 1.4437377452850342, "learning_rate": 2.9887650190095578e-05, "loss": 0.1642, "step": 3078 }, { "epoch": 0.06784665642025704, "grad_norm": 1.4493014812469482, "learning_rate": 2.988751937205257e-05, "loss": 0.1112, "step": 3079 }, { "epoch": 0.06786869170977321, "grad_norm": 2.053537368774414, "learning_rate": 2.9887388478179452e-05, "loss": 0.1952, "step": 3080 }, { "epoch": 0.06789072699928936, "grad_norm": 1.2795296907424927, "learning_rate": 2.9887257508476877e-05, "loss": 0.1523, "step": 3081 }, { "epoch": 0.06791276228880552, "grad_norm": 1.3770240545272827, "learning_rate": 2.9887126462945517e-05, "loss": 0.1581, "step": 3082 }, { "epoch": 0.06793479757832169, "grad_norm": 1.4269922971725464, "learning_rate": 2.9886995341586036e-05, "loss": 0.1914, "step": 3083 }, { "epoch": 0.06795683286783784, "grad_norm": 1.2497179508209229, "learning_rate": 2.9886864144399105e-05, "loss": 0.1231, "step": 3084 }, { "epoch": 0.067978868157354, "grad_norm": 1.8038631677627563, "learning_rate": 2.9886732871385396e-05, "loss": 0.1547, "step": 3085 }, { "epoch": 0.06800090344687017, "grad_norm": 1.8781346082687378, "learning_rate": 2.988660152254557e-05, "loss": 0.1863, "step": 3086 }, { "epoch": 0.06802293873638632, "grad_norm": 1.2882952690124512, "learning_rate": 2.9886470097880302e-05, "loss": 0.1513, "step": 3087 }, { "epoch": 0.06804497402590248, "grad_norm": 1.1505827903747559, "learning_rate": 2.988633859739026e-05, "loss": 0.1099, "step": 3088 }, { "epoch": 0.06806700931541865, "grad_norm": 1.1212162971496582, "learning_rate": 2.988620702107611e-05, "loss": 0.1111, "step": 3089 }, { "epoch": 0.0680890446049348, "grad_norm": 1.5276234149932861, "learning_rate": 2.9886075368938522e-05, "loss": 0.1545, "step": 3090 }, { "epoch": 0.06811107989445096, "grad_norm": 1.3793299198150635, "learning_rate": 2.988594364097818e-05, "loss": 0.1301, "step": 3091 }, { "epoch": 0.06813311518396713, "grad_norm": 1.3254400491714478, "learning_rate": 2.9885811837195735e-05, "loss": 0.1355, "step": 3092 }, { "epoch": 0.06815515047348328, "grad_norm": 1.7200933694839478, "learning_rate": 2.9885679957591872e-05, "loss": 0.1298, "step": 3093 }, { "epoch": 0.06817718576299944, "grad_norm": 1.370326280593872, "learning_rate": 2.9885548002167254e-05, "loss": 0.1985, "step": 3094 }, { "epoch": 0.06819922105251561, "grad_norm": 1.0121675729751587, "learning_rate": 2.988541597092256e-05, "loss": 0.1625, "step": 3095 }, { "epoch": 0.06822125634203176, "grad_norm": 1.6842589378356934, "learning_rate": 2.9885283863858467e-05, "loss": 0.129, "step": 3096 }, { "epoch": 0.06824329163154792, "grad_norm": 1.7971426248550415, "learning_rate": 2.9885151680975634e-05, "loss": 0.1504, "step": 3097 }, { "epoch": 0.06826532692106409, "grad_norm": 1.825276494026184, "learning_rate": 2.988501942227474e-05, "loss": 0.2197, "step": 3098 }, { "epoch": 0.06828736221058024, "grad_norm": 1.8774622678756714, "learning_rate": 2.9884887087756466e-05, "loss": 0.1713, "step": 3099 }, { "epoch": 0.0683093975000964, "grad_norm": 1.5706206560134888, "learning_rate": 2.9884754677421474e-05, "loss": 0.2122, "step": 3100 }, { "epoch": 0.06833143278961257, "grad_norm": 0.8016833066940308, "learning_rate": 2.9884622191270453e-05, "loss": 0.1179, "step": 3101 }, { "epoch": 0.06835346807912872, "grad_norm": 1.823183298110962, "learning_rate": 2.988448962930406e-05, "loss": 0.2325, "step": 3102 }, { "epoch": 0.06837550336864488, "grad_norm": 1.5115132331848145, "learning_rate": 2.9884356991522986e-05, "loss": 0.1855, "step": 3103 }, { "epoch": 0.06839753865816105, "grad_norm": 1.5442759990692139, "learning_rate": 2.9884224277927895e-05, "loss": 0.17, "step": 3104 }, { "epoch": 0.06841957394767721, "grad_norm": 1.5700974464416504, "learning_rate": 2.9884091488519475e-05, "loss": 0.1623, "step": 3105 }, { "epoch": 0.06844160923719336, "grad_norm": 1.5312305688858032, "learning_rate": 2.9883958623298392e-05, "loss": 0.1213, "step": 3106 }, { "epoch": 0.06846364452670953, "grad_norm": 1.8054795265197754, "learning_rate": 2.9883825682265325e-05, "loss": 0.1375, "step": 3107 }, { "epoch": 0.06848567981622569, "grad_norm": 1.68775475025177, "learning_rate": 2.9883692665420953e-05, "loss": 0.1824, "step": 3108 }, { "epoch": 0.06850771510574184, "grad_norm": 2.2054946422576904, "learning_rate": 2.9883559572765957e-05, "loss": 0.2084, "step": 3109 }, { "epoch": 0.06852975039525801, "grad_norm": 1.3288434743881226, "learning_rate": 2.988342640430101e-05, "loss": 0.1589, "step": 3110 }, { "epoch": 0.06855178568477417, "grad_norm": 1.2855861186981201, "learning_rate": 2.9883293160026787e-05, "loss": 0.1862, "step": 3111 }, { "epoch": 0.06857382097429032, "grad_norm": 1.4498380422592163, "learning_rate": 2.9883159839943973e-05, "loss": 0.1269, "step": 3112 }, { "epoch": 0.06859585626380649, "grad_norm": 1.6928950548171997, "learning_rate": 2.9883026444053244e-05, "loss": 0.1247, "step": 3113 }, { "epoch": 0.06861789155332265, "grad_norm": 1.192797064781189, "learning_rate": 2.988289297235528e-05, "loss": 0.1289, "step": 3114 }, { "epoch": 0.0686399268428388, "grad_norm": 1.9911714792251587, "learning_rate": 2.9882759424850766e-05, "loss": 0.1793, "step": 3115 }, { "epoch": 0.06866196213235497, "grad_norm": 1.2297618389129639, "learning_rate": 2.988262580154037e-05, "loss": 0.1669, "step": 3116 }, { "epoch": 0.06868399742187113, "grad_norm": 1.461985468864441, "learning_rate": 2.9882492102424785e-05, "loss": 0.1202, "step": 3117 }, { "epoch": 0.06870603271138728, "grad_norm": 1.7763322591781616, "learning_rate": 2.9882358327504685e-05, "loss": 0.1511, "step": 3118 }, { "epoch": 0.06872806800090345, "grad_norm": 1.5112929344177246, "learning_rate": 2.9882224476780757e-05, "loss": 0.1963, "step": 3119 }, { "epoch": 0.06875010329041961, "grad_norm": 1.2562175989151, "learning_rate": 2.9882090550253676e-05, "loss": 0.1209, "step": 3120 }, { "epoch": 0.06877213857993576, "grad_norm": 2.067446708679199, "learning_rate": 2.988195654792413e-05, "loss": 0.1817, "step": 3121 }, { "epoch": 0.06879417386945193, "grad_norm": 1.7443562746047974, "learning_rate": 2.9881822469792797e-05, "loss": 0.1565, "step": 3122 }, { "epoch": 0.06881620915896809, "grad_norm": 1.8928955793380737, "learning_rate": 2.988168831586036e-05, "loss": 0.2169, "step": 3123 }, { "epoch": 0.06883824444848424, "grad_norm": 2.002276659011841, "learning_rate": 2.9881554086127506e-05, "loss": 0.1647, "step": 3124 }, { "epoch": 0.06886027973800041, "grad_norm": 1.224876046180725, "learning_rate": 2.988141978059492e-05, "loss": 0.1412, "step": 3125 }, { "epoch": 0.06888231502751657, "grad_norm": 1.723605990409851, "learning_rate": 2.9881285399263275e-05, "loss": 0.2055, "step": 3126 }, { "epoch": 0.06890435031703272, "grad_norm": 2.2412397861480713, "learning_rate": 2.9881150942133274e-05, "loss": 0.1629, "step": 3127 }, { "epoch": 0.06892638560654889, "grad_norm": 0.9874823689460754, "learning_rate": 2.9881016409205582e-05, "loss": 0.1243, "step": 3128 }, { "epoch": 0.06894842089606505, "grad_norm": 1.5874379873275757, "learning_rate": 2.9880881800480903e-05, "loss": 0.1848, "step": 3129 }, { "epoch": 0.0689704561855812, "grad_norm": 1.6407359838485718, "learning_rate": 2.9880747115959907e-05, "loss": 0.145, "step": 3130 }, { "epoch": 0.06899249147509737, "grad_norm": 2.0953147411346436, "learning_rate": 2.9880612355643286e-05, "loss": 0.1505, "step": 3131 }, { "epoch": 0.06901452676461353, "grad_norm": 1.4758275747299194, "learning_rate": 2.9880477519531724e-05, "loss": 0.1638, "step": 3132 }, { "epoch": 0.06903656205412968, "grad_norm": 1.8581010103225708, "learning_rate": 2.9880342607625916e-05, "loss": 0.1363, "step": 3133 }, { "epoch": 0.06905859734364585, "grad_norm": 2.070641279220581, "learning_rate": 2.9880207619926543e-05, "loss": 0.1731, "step": 3134 }, { "epoch": 0.06908063263316201, "grad_norm": 1.8522040843963623, "learning_rate": 2.9880072556434292e-05, "loss": 0.1528, "step": 3135 }, { "epoch": 0.06910266792267816, "grad_norm": 1.7118523120880127, "learning_rate": 2.987993741714985e-05, "loss": 0.1207, "step": 3136 }, { "epoch": 0.06912470321219433, "grad_norm": 1.9249632358551025, "learning_rate": 2.987980220207391e-05, "loss": 0.1639, "step": 3137 }, { "epoch": 0.06914673850171049, "grad_norm": 1.385153889656067, "learning_rate": 2.987966691120716e-05, "loss": 0.1987, "step": 3138 }, { "epoch": 0.06916877379122664, "grad_norm": 1.2522029876708984, "learning_rate": 2.987953154455028e-05, "loss": 0.1672, "step": 3139 }, { "epoch": 0.06919080908074281, "grad_norm": 1.1091394424438477, "learning_rate": 2.9879396102103977e-05, "loss": 0.0944, "step": 3140 }, { "epoch": 0.06921284437025897, "grad_norm": 1.2377641201019287, "learning_rate": 2.9879260583868922e-05, "loss": 0.1334, "step": 3141 }, { "epoch": 0.06923487965977514, "grad_norm": 1.6813920736312866, "learning_rate": 2.987912498984582e-05, "loss": 0.164, "step": 3142 }, { "epoch": 0.06925691494929129, "grad_norm": 1.664158821105957, "learning_rate": 2.9878989320035356e-05, "loss": 0.1443, "step": 3143 }, { "epoch": 0.06927895023880745, "grad_norm": 1.1324841976165771, "learning_rate": 2.9878853574438217e-05, "loss": 0.1298, "step": 3144 }, { "epoch": 0.06930098552832362, "grad_norm": 1.2636933326721191, "learning_rate": 2.9878717753055098e-05, "loss": 0.1447, "step": 3145 }, { "epoch": 0.06932302081783977, "grad_norm": 1.3928414583206177, "learning_rate": 2.987858185588669e-05, "loss": 0.2071, "step": 3146 }, { "epoch": 0.06934505610735593, "grad_norm": 1.9360746145248413, "learning_rate": 2.9878445882933688e-05, "loss": 0.1553, "step": 3147 }, { "epoch": 0.0693670913968721, "grad_norm": 1.237013578414917, "learning_rate": 2.9878309834196786e-05, "loss": 0.1597, "step": 3148 }, { "epoch": 0.06938912668638825, "grad_norm": 5.748421669006348, "learning_rate": 2.987817370967667e-05, "loss": 0.1193, "step": 3149 }, { "epoch": 0.06941116197590441, "grad_norm": 1.7408303022384644, "learning_rate": 2.9878037509374036e-05, "loss": 0.2269, "step": 3150 }, { "epoch": 0.06943319726542058, "grad_norm": 2.0690343379974365, "learning_rate": 2.9877901233289577e-05, "loss": 0.1841, "step": 3151 }, { "epoch": 0.06945523255493673, "grad_norm": 1.6173371076583862, "learning_rate": 2.987776488142399e-05, "loss": 0.1484, "step": 3152 }, { "epoch": 0.06947726784445289, "grad_norm": 1.524403691291809, "learning_rate": 2.9877628453777976e-05, "loss": 0.1592, "step": 3153 }, { "epoch": 0.06949930313396906, "grad_norm": 1.9641584157943726, "learning_rate": 2.9877491950352215e-05, "loss": 0.1705, "step": 3154 }, { "epoch": 0.06952133842348521, "grad_norm": 3.1105172634124756, "learning_rate": 2.987735537114741e-05, "loss": 0.2186, "step": 3155 }, { "epoch": 0.06954337371300137, "grad_norm": 1.3001550436019897, "learning_rate": 2.987721871616426e-05, "loss": 0.1736, "step": 3156 }, { "epoch": 0.06956540900251754, "grad_norm": 0.9533627033233643, "learning_rate": 2.9877081985403454e-05, "loss": 0.1284, "step": 3157 }, { "epoch": 0.06958744429203369, "grad_norm": 1.8163353204727173, "learning_rate": 2.987694517886569e-05, "loss": 0.1612, "step": 3158 }, { "epoch": 0.06960947958154985, "grad_norm": 2.6729178428649902, "learning_rate": 2.9876808296551673e-05, "loss": 0.157, "step": 3159 }, { "epoch": 0.06963151487106602, "grad_norm": 1.589005470275879, "learning_rate": 2.9876671338462086e-05, "loss": 0.1845, "step": 3160 }, { "epoch": 0.06965355016058217, "grad_norm": 1.6714800596237183, "learning_rate": 2.987653430459764e-05, "loss": 0.1409, "step": 3161 }, { "epoch": 0.06967558545009833, "grad_norm": 1.7417577505111694, "learning_rate": 2.9876397194959025e-05, "loss": 0.1424, "step": 3162 }, { "epoch": 0.0696976207396145, "grad_norm": 1.744354009628296, "learning_rate": 2.987626000954694e-05, "loss": 0.1677, "step": 3163 }, { "epoch": 0.06971965602913065, "grad_norm": 2.100985288619995, "learning_rate": 2.987612274836209e-05, "loss": 0.1989, "step": 3164 }, { "epoch": 0.06974169131864681, "grad_norm": 1.606472134590149, "learning_rate": 2.9875985411405165e-05, "loss": 0.2351, "step": 3165 }, { "epoch": 0.06976372660816298, "grad_norm": 1.5110465288162231, "learning_rate": 2.9875847998676874e-05, "loss": 0.1393, "step": 3166 }, { "epoch": 0.06978576189767913, "grad_norm": 1.4817004203796387, "learning_rate": 2.987571051017791e-05, "loss": 0.1584, "step": 3167 }, { "epoch": 0.06980779718719529, "grad_norm": 2.454040288925171, "learning_rate": 2.9875572945908973e-05, "loss": 0.2012, "step": 3168 }, { "epoch": 0.06982983247671146, "grad_norm": 1.7901898622512817, "learning_rate": 2.9875435305870764e-05, "loss": 0.1226, "step": 3169 }, { "epoch": 0.06985186776622761, "grad_norm": 1.686184287071228, "learning_rate": 2.987529759006399e-05, "loss": 0.1635, "step": 3170 }, { "epoch": 0.06987390305574377, "grad_norm": 1.0361555814743042, "learning_rate": 2.987515979848935e-05, "loss": 0.1661, "step": 3171 }, { "epoch": 0.06989593834525994, "grad_norm": 1.943747639656067, "learning_rate": 2.9875021931147546e-05, "loss": 0.215, "step": 3172 }, { "epoch": 0.06991797363477609, "grad_norm": 0.9788911938667297, "learning_rate": 2.9874883988039272e-05, "loss": 0.1329, "step": 3173 }, { "epoch": 0.06994000892429225, "grad_norm": 1.3728818893432617, "learning_rate": 2.987474596916524e-05, "loss": 0.1718, "step": 3174 }, { "epoch": 0.06996204421380842, "grad_norm": 1.1086663007736206, "learning_rate": 2.9874607874526155e-05, "loss": 0.1252, "step": 3175 }, { "epoch": 0.06998407950332458, "grad_norm": 1.6523895263671875, "learning_rate": 2.9874469704122712e-05, "loss": 0.1836, "step": 3176 }, { "epoch": 0.07000611479284073, "grad_norm": 1.2583434581756592, "learning_rate": 2.9874331457955618e-05, "loss": 0.1473, "step": 3177 }, { "epoch": 0.0700281500823569, "grad_norm": 1.8560317754745483, "learning_rate": 2.9874193136025577e-05, "loss": 0.146, "step": 3178 }, { "epoch": 0.07005018537187306, "grad_norm": 1.2914094924926758, "learning_rate": 2.9874054738333294e-05, "loss": 0.1731, "step": 3179 }, { "epoch": 0.07007222066138921, "grad_norm": 1.8513919115066528, "learning_rate": 2.9873916264879477e-05, "loss": 0.1642, "step": 3180 }, { "epoch": 0.07009425595090538, "grad_norm": 1.8528647422790527, "learning_rate": 2.987377771566483e-05, "loss": 0.2032, "step": 3181 }, { "epoch": 0.07011629124042154, "grad_norm": 1.580547571182251, "learning_rate": 2.987363909069005e-05, "loss": 0.1658, "step": 3182 }, { "epoch": 0.07013832652993769, "grad_norm": 1.0699416399002075, "learning_rate": 2.9873500389955857e-05, "loss": 0.1238, "step": 3183 }, { "epoch": 0.07016036181945386, "grad_norm": 6.366967678070068, "learning_rate": 2.987336161346295e-05, "loss": 0.142, "step": 3184 }, { "epoch": 0.07018239710897002, "grad_norm": 1.9378585815429688, "learning_rate": 2.987322276121203e-05, "loss": 0.1937, "step": 3185 }, { "epoch": 0.07020443239848617, "grad_norm": 1.7736051082611084, "learning_rate": 2.9873083833203817e-05, "loss": 0.1541, "step": 3186 }, { "epoch": 0.07022646768800234, "grad_norm": 1.690146565437317, "learning_rate": 2.987294482943901e-05, "loss": 0.1506, "step": 3187 }, { "epoch": 0.0702485029775185, "grad_norm": 1.735878348350525, "learning_rate": 2.9872805749918323e-05, "loss": 0.1927, "step": 3188 }, { "epoch": 0.07027053826703465, "grad_norm": 1.9517849683761597, "learning_rate": 2.9872666594642455e-05, "loss": 0.1945, "step": 3189 }, { "epoch": 0.07029257355655082, "grad_norm": 1.4745969772338867, "learning_rate": 2.9872527363612125e-05, "loss": 0.1316, "step": 3190 }, { "epoch": 0.07031460884606698, "grad_norm": 1.2266377210617065, "learning_rate": 2.9872388056828036e-05, "loss": 0.1459, "step": 3191 }, { "epoch": 0.07033664413558313, "grad_norm": 1.5261659622192383, "learning_rate": 2.98722486742909e-05, "loss": 0.1721, "step": 3192 }, { "epoch": 0.0703586794250993, "grad_norm": 2.134145736694336, "learning_rate": 2.9872109216001427e-05, "loss": 0.1567, "step": 3193 }, { "epoch": 0.07038071471461546, "grad_norm": 1.0699775218963623, "learning_rate": 2.9871969681960327e-05, "loss": 0.1563, "step": 3194 }, { "epoch": 0.07040275000413161, "grad_norm": 1.4411932229995728, "learning_rate": 2.9871830072168308e-05, "loss": 0.183, "step": 3195 }, { "epoch": 0.07042478529364778, "grad_norm": 1.5861601829528809, "learning_rate": 2.9871690386626085e-05, "loss": 0.1804, "step": 3196 }, { "epoch": 0.07044682058316394, "grad_norm": 1.731249451637268, "learning_rate": 2.987155062533437e-05, "loss": 0.1388, "step": 3197 }, { "epoch": 0.07046885587268009, "grad_norm": 1.5694669485092163, "learning_rate": 2.987141078829387e-05, "loss": 0.1378, "step": 3198 }, { "epoch": 0.07049089116219626, "grad_norm": 2.160996675491333, "learning_rate": 2.98712708755053e-05, "loss": 0.1541, "step": 3199 }, { "epoch": 0.07051292645171242, "grad_norm": 1.5027124881744385, "learning_rate": 2.9871130886969376e-05, "loss": 0.1956, "step": 3200 }, { "epoch": 0.07053496174122857, "grad_norm": 2.527210235595703, "learning_rate": 2.9870990822686806e-05, "loss": 0.1555, "step": 3201 }, { "epoch": 0.07055699703074474, "grad_norm": 1.7874109745025635, "learning_rate": 2.9870850682658305e-05, "loss": 0.1796, "step": 3202 }, { "epoch": 0.0705790323202609, "grad_norm": 0.8988957405090332, "learning_rate": 2.9870710466884587e-05, "loss": 0.1801, "step": 3203 }, { "epoch": 0.07060106760977705, "grad_norm": 1.9763199090957642, "learning_rate": 2.9870570175366367e-05, "loss": 0.1157, "step": 3204 }, { "epoch": 0.07062310289929322, "grad_norm": 1.28822660446167, "learning_rate": 2.9870429808104357e-05, "loss": 0.1772, "step": 3205 }, { "epoch": 0.07064513818880938, "grad_norm": 2.0076050758361816, "learning_rate": 2.987028936509928e-05, "loss": 0.1776, "step": 3206 }, { "epoch": 0.07066717347832553, "grad_norm": 1.2598259449005127, "learning_rate": 2.987014884635184e-05, "loss": 0.1534, "step": 3207 }, { "epoch": 0.0706892087678417, "grad_norm": 1.3714957237243652, "learning_rate": 2.9870008251862755e-05, "loss": 0.168, "step": 3208 }, { "epoch": 0.07071124405735786, "grad_norm": 1.1969084739685059, "learning_rate": 2.9869867581632747e-05, "loss": 0.129, "step": 3209 }, { "epoch": 0.07073327934687403, "grad_norm": 1.6964261531829834, "learning_rate": 2.9869726835662534e-05, "loss": 0.1412, "step": 3210 }, { "epoch": 0.07075531463639018, "grad_norm": 1.2717299461364746, "learning_rate": 2.986958601395282e-05, "loss": 0.1762, "step": 3211 }, { "epoch": 0.07077734992590634, "grad_norm": 1.2442668676376343, "learning_rate": 2.9869445116504336e-05, "loss": 0.132, "step": 3212 }, { "epoch": 0.0707993852154225, "grad_norm": 1.3332550525665283, "learning_rate": 2.986930414331779e-05, "loss": 0.1333, "step": 3213 }, { "epoch": 0.07082142050493866, "grad_norm": 1.2596030235290527, "learning_rate": 2.986916309439391e-05, "loss": 0.1685, "step": 3214 }, { "epoch": 0.07084345579445482, "grad_norm": 1.4793659448623657, "learning_rate": 2.9869021969733407e-05, "loss": 0.1319, "step": 3215 }, { "epoch": 0.07086549108397099, "grad_norm": 1.1942013502120972, "learning_rate": 2.9868880769337004e-05, "loss": 0.1654, "step": 3216 }, { "epoch": 0.07088752637348714, "grad_norm": 1.2445998191833496, "learning_rate": 2.9868739493205412e-05, "loss": 0.1955, "step": 3217 }, { "epoch": 0.0709095616630033, "grad_norm": 1.545806646347046, "learning_rate": 2.9868598141339362e-05, "loss": 0.1271, "step": 3218 }, { "epoch": 0.07093159695251947, "grad_norm": 1.4506841897964478, "learning_rate": 2.986845671373957e-05, "loss": 0.1308, "step": 3219 }, { "epoch": 0.07095363224203562, "grad_norm": 1.320038080215454, "learning_rate": 2.986831521040675e-05, "loss": 0.1544, "step": 3220 }, { "epoch": 0.07097566753155178, "grad_norm": 1.9470442533493042, "learning_rate": 2.9868173631341627e-05, "loss": 0.1488, "step": 3221 }, { "epoch": 0.07099770282106795, "grad_norm": 1.8544121980667114, "learning_rate": 2.9868031976544926e-05, "loss": 0.1466, "step": 3222 }, { "epoch": 0.0710197381105841, "grad_norm": 1.5067306756973267, "learning_rate": 2.9867890246017364e-05, "loss": 0.1665, "step": 3223 }, { "epoch": 0.07104177340010026, "grad_norm": 1.542778730392456, "learning_rate": 2.9867748439759662e-05, "loss": 0.1359, "step": 3224 }, { "epoch": 0.07106380868961643, "grad_norm": 1.2319589853286743, "learning_rate": 2.986760655777255e-05, "loss": 0.1476, "step": 3225 }, { "epoch": 0.07108584397913258, "grad_norm": 2.3269336223602295, "learning_rate": 2.986746460005674e-05, "loss": 0.1725, "step": 3226 }, { "epoch": 0.07110787926864874, "grad_norm": 1.6626402139663696, "learning_rate": 2.986732256661296e-05, "loss": 0.1486, "step": 3227 }, { "epoch": 0.0711299145581649, "grad_norm": 1.5967917442321777, "learning_rate": 2.986718045744194e-05, "loss": 0.167, "step": 3228 }, { "epoch": 0.07115194984768106, "grad_norm": 1.715651512145996, "learning_rate": 2.986703827254439e-05, "loss": 0.1261, "step": 3229 }, { "epoch": 0.07117398513719722, "grad_norm": 1.0222139358520508, "learning_rate": 2.9866896011921046e-05, "loss": 0.1635, "step": 3230 }, { "epoch": 0.07119602042671339, "grad_norm": 1.3951607942581177, "learning_rate": 2.9866753675572626e-05, "loss": 0.147, "step": 3231 }, { "epoch": 0.07121805571622954, "grad_norm": 1.0285369157791138, "learning_rate": 2.9866611263499857e-05, "loss": 0.1356, "step": 3232 }, { "epoch": 0.0712400910057457, "grad_norm": 2.0223445892333984, "learning_rate": 2.986646877570347e-05, "loss": 0.1922, "step": 3233 }, { "epoch": 0.07126212629526187, "grad_norm": 2.7145636081695557, "learning_rate": 2.986632621218418e-05, "loss": 0.2319, "step": 3234 }, { "epoch": 0.07128416158477802, "grad_norm": 1.297234058380127, "learning_rate": 2.9866183572942724e-05, "loss": 0.1722, "step": 3235 }, { "epoch": 0.07130619687429418, "grad_norm": 1.4513130187988281, "learning_rate": 2.986604085797982e-05, "loss": 0.1609, "step": 3236 }, { "epoch": 0.07132823216381035, "grad_norm": 1.4290145635604858, "learning_rate": 2.9865898067296194e-05, "loss": 0.1091, "step": 3237 }, { "epoch": 0.0713502674533265, "grad_norm": 1.3852730989456177, "learning_rate": 2.9865755200892584e-05, "loss": 0.1228, "step": 3238 }, { "epoch": 0.07137230274284266, "grad_norm": 0.8795749545097351, "learning_rate": 2.9865612258769704e-05, "loss": 0.1208, "step": 3239 }, { "epoch": 0.07139433803235883, "grad_norm": 1.289507269859314, "learning_rate": 2.9865469240928296e-05, "loss": 0.1253, "step": 3240 }, { "epoch": 0.07141637332187498, "grad_norm": 1.533204197883606, "learning_rate": 2.986532614736908e-05, "loss": 0.1544, "step": 3241 }, { "epoch": 0.07143840861139114, "grad_norm": 2.113030433654785, "learning_rate": 2.9865182978092787e-05, "loss": 0.1624, "step": 3242 }, { "epoch": 0.0714604439009073, "grad_norm": 1.5854113101959229, "learning_rate": 2.986503973310014e-05, "loss": 0.1682, "step": 3243 }, { "epoch": 0.07148247919042346, "grad_norm": 1.8599835634231567, "learning_rate": 2.986489641239188e-05, "loss": 0.189, "step": 3244 }, { "epoch": 0.07150451447993962, "grad_norm": 1.2674528360366821, "learning_rate": 2.9864753015968733e-05, "loss": 0.1475, "step": 3245 }, { "epoch": 0.07152654976945579, "grad_norm": 1.8834182024002075, "learning_rate": 2.9864609543831425e-05, "loss": 0.166, "step": 3246 }, { "epoch": 0.07154858505897195, "grad_norm": 1.6750341653823853, "learning_rate": 2.986446599598069e-05, "loss": 0.1697, "step": 3247 }, { "epoch": 0.0715706203484881, "grad_norm": 1.4704740047454834, "learning_rate": 2.9864322372417258e-05, "loss": 0.1532, "step": 3248 }, { "epoch": 0.07159265563800427, "grad_norm": 1.63509202003479, "learning_rate": 2.986417867314186e-05, "loss": 0.1613, "step": 3249 }, { "epoch": 0.07161469092752043, "grad_norm": 1.062508225440979, "learning_rate": 2.986403489815523e-05, "loss": 0.128, "step": 3250 }, { "epoch": 0.07163672621703658, "grad_norm": 2.5451784133911133, "learning_rate": 2.98638910474581e-05, "loss": 0.2207, "step": 3251 }, { "epoch": 0.07165876150655275, "grad_norm": 2.1197173595428467, "learning_rate": 2.9863747121051205e-05, "loss": 0.1606, "step": 3252 }, { "epoch": 0.07168079679606891, "grad_norm": 1.8702377080917358, "learning_rate": 2.986360311893527e-05, "loss": 0.1494, "step": 3253 }, { "epoch": 0.07170283208558506, "grad_norm": 1.218501091003418, "learning_rate": 2.986345904111104e-05, "loss": 0.1511, "step": 3254 }, { "epoch": 0.07172486737510123, "grad_norm": 1.532772421836853, "learning_rate": 2.9863314887579236e-05, "loss": 0.1902, "step": 3255 }, { "epoch": 0.07174690266461739, "grad_norm": 1.792222261428833, "learning_rate": 2.9863170658340604e-05, "loss": 0.1471, "step": 3256 }, { "epoch": 0.07176893795413354, "grad_norm": 1.5429836511611938, "learning_rate": 2.986302635339587e-05, "loss": 0.1713, "step": 3257 }, { "epoch": 0.0717909732436497, "grad_norm": 2.1352033615112305, "learning_rate": 2.986288197274578e-05, "loss": 0.1209, "step": 3258 }, { "epoch": 0.07181300853316587, "grad_norm": 1.4483715295791626, "learning_rate": 2.9862737516391054e-05, "loss": 0.1568, "step": 3259 }, { "epoch": 0.07183504382268202, "grad_norm": 5.926307678222656, "learning_rate": 2.986259298433244e-05, "loss": 0.1383, "step": 3260 }, { "epoch": 0.07185707911219819, "grad_norm": 2.184730291366577, "learning_rate": 2.9862448376570667e-05, "loss": 0.1978, "step": 3261 }, { "epoch": 0.07187911440171435, "grad_norm": 2.345691680908203, "learning_rate": 2.9862303693106476e-05, "loss": 0.1506, "step": 3262 }, { "epoch": 0.0719011496912305, "grad_norm": 1.5815825462341309, "learning_rate": 2.98621589339406e-05, "loss": 0.1996, "step": 3263 }, { "epoch": 0.07192318498074667, "grad_norm": 1.8066651821136475, "learning_rate": 2.9862014099073786e-05, "loss": 0.1722, "step": 3264 }, { "epoch": 0.07194522027026283, "grad_norm": 2.1834936141967773, "learning_rate": 2.9861869188506754e-05, "loss": 0.1645, "step": 3265 }, { "epoch": 0.07196725555977898, "grad_norm": 1.5427495241165161, "learning_rate": 2.9861724202240258e-05, "loss": 0.1507, "step": 3266 }, { "epoch": 0.07198929084929515, "grad_norm": 1.4174931049346924, "learning_rate": 2.9861579140275034e-05, "loss": 0.1853, "step": 3267 }, { "epoch": 0.07201132613881131, "grad_norm": 1.6197237968444824, "learning_rate": 2.9861434002611814e-05, "loss": 0.1702, "step": 3268 }, { "epoch": 0.07203336142832746, "grad_norm": 1.0347882509231567, "learning_rate": 2.9861288789251343e-05, "loss": 0.1117, "step": 3269 }, { "epoch": 0.07205539671784363, "grad_norm": 1.9349427223205566, "learning_rate": 2.986114350019436e-05, "loss": 0.1941, "step": 3270 }, { "epoch": 0.07207743200735979, "grad_norm": 1.3696578741073608, "learning_rate": 2.98609981354416e-05, "loss": 0.1346, "step": 3271 }, { "epoch": 0.07209946729687594, "grad_norm": 1.0753240585327148, "learning_rate": 2.9860852694993812e-05, "loss": 0.1668, "step": 3272 }, { "epoch": 0.0721215025863921, "grad_norm": 1.3237608671188354, "learning_rate": 2.986070717885173e-05, "loss": 0.1227, "step": 3273 }, { "epoch": 0.07214353787590827, "grad_norm": 1.859129786491394, "learning_rate": 2.98605615870161e-05, "loss": 0.1679, "step": 3274 }, { "epoch": 0.07216557316542442, "grad_norm": 1.874139666557312, "learning_rate": 2.986041591948766e-05, "loss": 0.2133, "step": 3275 }, { "epoch": 0.07218760845494059, "grad_norm": 2.0871384143829346, "learning_rate": 2.9860270176267147e-05, "loss": 0.1417, "step": 3276 }, { "epoch": 0.07220964374445675, "grad_norm": 1.2501442432403564, "learning_rate": 2.9860124357355318e-05, "loss": 0.1452, "step": 3277 }, { "epoch": 0.0722316790339729, "grad_norm": 1.81416916847229, "learning_rate": 2.9859978462752902e-05, "loss": 0.2015, "step": 3278 }, { "epoch": 0.07225371432348907, "grad_norm": 1.9708384275436401, "learning_rate": 2.985983249246065e-05, "loss": 0.1651, "step": 3279 }, { "epoch": 0.07227574961300523, "grad_norm": 1.7939088344573975, "learning_rate": 2.9859686446479296e-05, "loss": 0.1936, "step": 3280 }, { "epoch": 0.0722977849025214, "grad_norm": 1.106080174446106, "learning_rate": 2.9859540324809597e-05, "loss": 0.1516, "step": 3281 }, { "epoch": 0.07231982019203755, "grad_norm": 2.009338617324829, "learning_rate": 2.985939412745229e-05, "loss": 0.2028, "step": 3282 }, { "epoch": 0.07234185548155371, "grad_norm": 1.2662841081619263, "learning_rate": 2.985924785440812e-05, "loss": 0.1471, "step": 3283 }, { "epoch": 0.07236389077106987, "grad_norm": 3.157073497772217, "learning_rate": 2.9859101505677832e-05, "loss": 0.1629, "step": 3284 }, { "epoch": 0.07238592606058603, "grad_norm": 1.9469321966171265, "learning_rate": 2.985895508126217e-05, "loss": 0.1344, "step": 3285 }, { "epoch": 0.07240796135010219, "grad_norm": 1.6548688411712646, "learning_rate": 2.9858808581161883e-05, "loss": 0.1784, "step": 3286 }, { "epoch": 0.07242999663961835, "grad_norm": 1.013047695159912, "learning_rate": 2.985866200537772e-05, "loss": 0.1087, "step": 3287 }, { "epoch": 0.0724520319291345, "grad_norm": 1.3311140537261963, "learning_rate": 2.9858515353910416e-05, "loss": 0.1647, "step": 3288 }, { "epoch": 0.07247406721865067, "grad_norm": 1.9844924211502075, "learning_rate": 2.9858368626760733e-05, "loss": 0.1233, "step": 3289 }, { "epoch": 0.07249610250816683, "grad_norm": 4.172825336456299, "learning_rate": 2.9858221823929408e-05, "loss": 0.16, "step": 3290 }, { "epoch": 0.07251813779768299, "grad_norm": 1.8061447143554688, "learning_rate": 2.985807494541719e-05, "loss": 0.1363, "step": 3291 }, { "epoch": 0.07254017308719915, "grad_norm": 1.5992103815078735, "learning_rate": 2.985792799122483e-05, "loss": 0.1764, "step": 3292 }, { "epoch": 0.07256220837671531, "grad_norm": 1.3839699029922485, "learning_rate": 2.9857780961353077e-05, "loss": 0.1403, "step": 3293 }, { "epoch": 0.07258424366623147, "grad_norm": 1.1646084785461426, "learning_rate": 2.9857633855802676e-05, "loss": 0.1822, "step": 3294 }, { "epoch": 0.07260627895574763, "grad_norm": 1.774279236793518, "learning_rate": 2.9857486674574378e-05, "loss": 0.1717, "step": 3295 }, { "epoch": 0.0726283142452638, "grad_norm": 1.7539812326431274, "learning_rate": 2.9857339417668934e-05, "loss": 0.1658, "step": 3296 }, { "epoch": 0.07265034953477995, "grad_norm": 1.7736239433288574, "learning_rate": 2.9857192085087093e-05, "loss": 0.186, "step": 3297 }, { "epoch": 0.07267238482429611, "grad_norm": 1.675864815711975, "learning_rate": 2.9857044676829604e-05, "loss": 0.1958, "step": 3298 }, { "epoch": 0.07269442011381227, "grad_norm": 1.9097576141357422, "learning_rate": 2.9856897192897226e-05, "loss": 0.1835, "step": 3299 }, { "epoch": 0.07271645540332843, "grad_norm": 1.3906757831573486, "learning_rate": 2.9856749633290697e-05, "loss": 0.1947, "step": 3300 }, { "epoch": 0.07273849069284459, "grad_norm": 1.464240312576294, "learning_rate": 2.9856601998010777e-05, "loss": 0.1692, "step": 3301 }, { "epoch": 0.07276052598236075, "grad_norm": 1.7020715475082397, "learning_rate": 2.9856454287058218e-05, "loss": 0.1018, "step": 3302 }, { "epoch": 0.0727825612718769, "grad_norm": 1.1473876237869263, "learning_rate": 2.985630650043377e-05, "loss": 0.118, "step": 3303 }, { "epoch": 0.07280459656139307, "grad_norm": 1.81227707862854, "learning_rate": 2.9856158638138186e-05, "loss": 0.1888, "step": 3304 }, { "epoch": 0.07282663185090923, "grad_norm": 0.9435046911239624, "learning_rate": 2.9856010700172217e-05, "loss": 0.1212, "step": 3305 }, { "epoch": 0.07284866714042539, "grad_norm": 2.319737434387207, "learning_rate": 2.9855862686536622e-05, "loss": 0.1755, "step": 3306 }, { "epoch": 0.07287070242994155, "grad_norm": 2.459463119506836, "learning_rate": 2.985571459723215e-05, "loss": 0.1293, "step": 3307 }, { "epoch": 0.07289273771945771, "grad_norm": 1.735272765159607, "learning_rate": 2.985556643225956e-05, "loss": 0.1924, "step": 3308 }, { "epoch": 0.07291477300897387, "grad_norm": 2.5587148666381836, "learning_rate": 2.9855418191619603e-05, "loss": 0.1474, "step": 3309 }, { "epoch": 0.07293680829849003, "grad_norm": 1.2560793161392212, "learning_rate": 2.9855269875313037e-05, "loss": 0.1443, "step": 3310 }, { "epoch": 0.0729588435880062, "grad_norm": 3.7650163173675537, "learning_rate": 2.985512148334061e-05, "loss": 0.1493, "step": 3311 }, { "epoch": 0.07298087887752235, "grad_norm": 4.344404697418213, "learning_rate": 2.985497301570309e-05, "loss": 0.1786, "step": 3312 }, { "epoch": 0.07300291416703851, "grad_norm": 1.2397181987762451, "learning_rate": 2.985482447240122e-05, "loss": 0.1653, "step": 3313 }, { "epoch": 0.07302494945655467, "grad_norm": 3.709240436553955, "learning_rate": 2.985467585343577e-05, "loss": 0.173, "step": 3314 }, { "epoch": 0.07304698474607084, "grad_norm": 1.7508376836776733, "learning_rate": 2.9854527158807487e-05, "loss": 0.1784, "step": 3315 }, { "epoch": 0.07306902003558699, "grad_norm": 2.3291501998901367, "learning_rate": 2.985437838851713e-05, "loss": 0.1416, "step": 3316 }, { "epoch": 0.07309105532510315, "grad_norm": 1.483102560043335, "learning_rate": 2.985422954256546e-05, "loss": 0.1809, "step": 3317 }, { "epoch": 0.07311309061461932, "grad_norm": 1.552636981010437, "learning_rate": 2.985408062095323e-05, "loss": 0.2011, "step": 3318 }, { "epoch": 0.07313512590413547, "grad_norm": 1.4898314476013184, "learning_rate": 2.9853931623681205e-05, "loss": 0.1439, "step": 3319 }, { "epoch": 0.07315716119365163, "grad_norm": 1.392641305923462, "learning_rate": 2.9853782550750142e-05, "loss": 0.191, "step": 3320 }, { "epoch": 0.0731791964831678, "grad_norm": 2.813627004623413, "learning_rate": 2.98536334021608e-05, "loss": 0.1753, "step": 3321 }, { "epoch": 0.07320123177268395, "grad_norm": 1.8103489875793457, "learning_rate": 2.9853484177913935e-05, "loss": 0.2066, "step": 3322 }, { "epoch": 0.07322326706220011, "grad_norm": 1.6979621648788452, "learning_rate": 2.985333487801031e-05, "loss": 0.1744, "step": 3323 }, { "epoch": 0.07324530235171628, "grad_norm": 1.4524015188217163, "learning_rate": 2.9853185502450688e-05, "loss": 0.1761, "step": 3324 }, { "epoch": 0.07326733764123243, "grad_norm": 1.679787278175354, "learning_rate": 2.9853036051235826e-05, "loss": 0.1454, "step": 3325 }, { "epoch": 0.0732893729307486, "grad_norm": 1.829405426979065, "learning_rate": 2.9852886524366488e-05, "loss": 0.1648, "step": 3326 }, { "epoch": 0.07331140822026476, "grad_norm": 1.7020734548568726, "learning_rate": 2.985273692184343e-05, "loss": 0.1567, "step": 3327 }, { "epoch": 0.07333344350978091, "grad_norm": 1.5860844850540161, "learning_rate": 2.9852587243667422e-05, "loss": 0.1737, "step": 3328 }, { "epoch": 0.07335547879929707, "grad_norm": 1.5758744478225708, "learning_rate": 2.9852437489839218e-05, "loss": 0.1299, "step": 3329 }, { "epoch": 0.07337751408881324, "grad_norm": 1.5402954816818237, "learning_rate": 2.985228766035959e-05, "loss": 0.1595, "step": 3330 }, { "epoch": 0.07339954937832939, "grad_norm": 1.3862390518188477, "learning_rate": 2.9852137755229297e-05, "loss": 0.163, "step": 3331 }, { "epoch": 0.07342158466784555, "grad_norm": 1.4669923782348633, "learning_rate": 2.9851987774449097e-05, "loss": 0.1571, "step": 3332 }, { "epoch": 0.07344361995736172, "grad_norm": 2.8261449337005615, "learning_rate": 2.9851837718019762e-05, "loss": 0.1782, "step": 3333 }, { "epoch": 0.07346565524687787, "grad_norm": 1.4095779657363892, "learning_rate": 2.9851687585942057e-05, "loss": 0.122, "step": 3334 }, { "epoch": 0.07348769053639403, "grad_norm": 1.9940929412841797, "learning_rate": 2.985153737821674e-05, "loss": 0.1686, "step": 3335 }, { "epoch": 0.0735097258259102, "grad_norm": 1.8710848093032837, "learning_rate": 2.9851387094844578e-05, "loss": 0.1345, "step": 3336 }, { "epoch": 0.07353176111542635, "grad_norm": 2.09440279006958, "learning_rate": 2.985123673582634e-05, "loss": 0.1578, "step": 3337 }, { "epoch": 0.07355379640494251, "grad_norm": 1.7632914781570435, "learning_rate": 2.9851086301162786e-05, "loss": 0.1646, "step": 3338 }, { "epoch": 0.07357583169445868, "grad_norm": 1.3822766542434692, "learning_rate": 2.985093579085469e-05, "loss": 0.1706, "step": 3339 }, { "epoch": 0.07359786698397483, "grad_norm": 1.856770634651184, "learning_rate": 2.9850785204902807e-05, "loss": 0.1621, "step": 3340 }, { "epoch": 0.073619902273491, "grad_norm": 1.800539493560791, "learning_rate": 2.985063454330792e-05, "loss": 0.131, "step": 3341 }, { "epoch": 0.07364193756300716, "grad_norm": 1.933201789855957, "learning_rate": 2.9850483806070785e-05, "loss": 0.152, "step": 3342 }, { "epoch": 0.07366397285252331, "grad_norm": 1.7243596315383911, "learning_rate": 2.985033299319217e-05, "loss": 0.154, "step": 3343 }, { "epoch": 0.07368600814203947, "grad_norm": 1.434842824935913, "learning_rate": 2.9850182104672844e-05, "loss": 0.1806, "step": 3344 }, { "epoch": 0.07370804343155564, "grad_norm": 1.3087530136108398, "learning_rate": 2.9850031140513583e-05, "loss": 0.1743, "step": 3345 }, { "epoch": 0.07373007872107179, "grad_norm": 1.4711660146713257, "learning_rate": 2.9849880100715147e-05, "loss": 0.1142, "step": 3346 }, { "epoch": 0.07375211401058795, "grad_norm": 1.4161202907562256, "learning_rate": 2.9849728985278306e-05, "loss": 0.1661, "step": 3347 }, { "epoch": 0.07377414930010412, "grad_norm": 1.4444071054458618, "learning_rate": 2.9849577794203838e-05, "loss": 0.1682, "step": 3348 }, { "epoch": 0.07379618458962027, "grad_norm": 1.3579422235488892, "learning_rate": 2.9849426527492502e-05, "loss": 0.1978, "step": 3349 }, { "epoch": 0.07381821987913643, "grad_norm": 1.8498339653015137, "learning_rate": 2.984927518514508e-05, "loss": 0.1256, "step": 3350 }, { "epoch": 0.0738402551686526, "grad_norm": 1.3892531394958496, "learning_rate": 2.984912376716233e-05, "loss": 0.1583, "step": 3351 }, { "epoch": 0.07386229045816876, "grad_norm": 1.2983022928237915, "learning_rate": 2.9848972273545033e-05, "loss": 0.1437, "step": 3352 }, { "epoch": 0.07388432574768491, "grad_norm": 1.4991966485977173, "learning_rate": 2.984882070429396e-05, "loss": 0.1926, "step": 3353 }, { "epoch": 0.07390636103720108, "grad_norm": 1.7059043645858765, "learning_rate": 2.9848669059409877e-05, "loss": 0.1902, "step": 3354 }, { "epoch": 0.07392839632671724, "grad_norm": 1.1443947553634644, "learning_rate": 2.9848517338893563e-05, "loss": 0.1426, "step": 3355 }, { "epoch": 0.0739504316162334, "grad_norm": 1.602525234222412, "learning_rate": 2.9848365542745785e-05, "loss": 0.1488, "step": 3356 }, { "epoch": 0.07397246690574956, "grad_norm": 1.1817550659179688, "learning_rate": 2.9848213670967316e-05, "loss": 0.1357, "step": 3357 }, { "epoch": 0.07399450219526572, "grad_norm": 1.3327163457870483, "learning_rate": 2.9848061723558938e-05, "loss": 0.1281, "step": 3358 }, { "epoch": 0.07401653748478187, "grad_norm": 1.5563485622406006, "learning_rate": 2.9847909700521415e-05, "loss": 0.1527, "step": 3359 }, { "epoch": 0.07403857277429804, "grad_norm": 1.3701008558273315, "learning_rate": 2.9847757601855532e-05, "loss": 0.1552, "step": 3360 }, { "epoch": 0.0740606080638142, "grad_norm": 1.6377484798431396, "learning_rate": 2.9847605427562054e-05, "loss": 0.1741, "step": 3361 }, { "epoch": 0.07408264335333035, "grad_norm": 1.2727476358413696, "learning_rate": 2.984745317764176e-05, "loss": 0.1365, "step": 3362 }, { "epoch": 0.07410467864284652, "grad_norm": 1.1852190494537354, "learning_rate": 2.9847300852095424e-05, "loss": 0.157, "step": 3363 }, { "epoch": 0.07412671393236268, "grad_norm": 1.3261843919754028, "learning_rate": 2.9847148450923826e-05, "loss": 0.1732, "step": 3364 }, { "epoch": 0.07414874922187883, "grad_norm": 1.0781145095825195, "learning_rate": 2.9846995974127734e-05, "loss": 0.1497, "step": 3365 }, { "epoch": 0.074170784511395, "grad_norm": 1.0347936153411865, "learning_rate": 2.984684342170793e-05, "loss": 0.1093, "step": 3366 }, { "epoch": 0.07419281980091116, "grad_norm": 2.378951072692871, "learning_rate": 2.9846690793665202e-05, "loss": 0.1538, "step": 3367 }, { "epoch": 0.07421485509042731, "grad_norm": 1.637381911277771, "learning_rate": 2.9846538090000304e-05, "loss": 0.1172, "step": 3368 }, { "epoch": 0.07423689037994348, "grad_norm": 1.204814076423645, "learning_rate": 2.984638531071403e-05, "loss": 0.1806, "step": 3369 }, { "epoch": 0.07425892566945964, "grad_norm": 1.5663213729858398, "learning_rate": 2.9846232455807155e-05, "loss": 0.1659, "step": 3370 }, { "epoch": 0.0742809609589758, "grad_norm": 1.4137616157531738, "learning_rate": 2.984607952528046e-05, "loss": 0.1366, "step": 3371 }, { "epoch": 0.07430299624849196, "grad_norm": 1.5874168872833252, "learning_rate": 2.9845926519134716e-05, "loss": 0.1792, "step": 3372 }, { "epoch": 0.07432503153800812, "grad_norm": 0.932112991809845, "learning_rate": 2.9845773437370708e-05, "loss": 0.126, "step": 3373 }, { "epoch": 0.07434706682752427, "grad_norm": 1.133541464805603, "learning_rate": 2.9845620279989216e-05, "loss": 0.1274, "step": 3374 }, { "epoch": 0.07436910211704044, "grad_norm": 1.0289231538772583, "learning_rate": 2.984546704699102e-05, "loss": 0.1604, "step": 3375 }, { "epoch": 0.0743911374065566, "grad_norm": 1.319379210472107, "learning_rate": 2.9845313738376895e-05, "loss": 0.1699, "step": 3376 }, { "epoch": 0.07441317269607275, "grad_norm": 1.451723575592041, "learning_rate": 2.984516035414763e-05, "loss": 0.1638, "step": 3377 }, { "epoch": 0.07443520798558892, "grad_norm": 2.1060242652893066, "learning_rate": 2.9845006894304006e-05, "loss": 0.1686, "step": 3378 }, { "epoch": 0.07445724327510508, "grad_norm": 1.1061238050460815, "learning_rate": 2.9844853358846797e-05, "loss": 0.1404, "step": 3379 }, { "epoch": 0.07447927856462123, "grad_norm": 1.0441690683364868, "learning_rate": 2.9844699747776793e-05, "loss": 0.1266, "step": 3380 }, { "epoch": 0.0745013138541374, "grad_norm": 1.6740634441375732, "learning_rate": 2.9844546061094766e-05, "loss": 0.1665, "step": 3381 }, { "epoch": 0.07452334914365356, "grad_norm": 1.6821675300598145, "learning_rate": 2.984439229880151e-05, "loss": 0.1559, "step": 3382 }, { "epoch": 0.07454538443316971, "grad_norm": 1.3355101346969604, "learning_rate": 2.9844238460897805e-05, "loss": 0.1386, "step": 3383 }, { "epoch": 0.07456741972268588, "grad_norm": 1.3453327417373657, "learning_rate": 2.984408454738443e-05, "loss": 0.1417, "step": 3384 }, { "epoch": 0.07458945501220204, "grad_norm": 1.5106405019760132, "learning_rate": 2.984393055826218e-05, "loss": 0.1514, "step": 3385 }, { "epoch": 0.07461149030171821, "grad_norm": 1.4501127004623413, "learning_rate": 2.9843776493531826e-05, "loss": 0.1227, "step": 3386 }, { "epoch": 0.07463352559123436, "grad_norm": 1.0206222534179688, "learning_rate": 2.984362235319416e-05, "loss": 0.133, "step": 3387 }, { "epoch": 0.07465556088075052, "grad_norm": 1.5742682218551636, "learning_rate": 2.9843468137249962e-05, "loss": 0.1505, "step": 3388 }, { "epoch": 0.07467759617026669, "grad_norm": 1.682254672050476, "learning_rate": 2.984331384570002e-05, "loss": 0.1873, "step": 3389 }, { "epoch": 0.07469963145978284, "grad_norm": 1.0026615858078003, "learning_rate": 2.9843159478545128e-05, "loss": 0.1488, "step": 3390 }, { "epoch": 0.074721666749299, "grad_norm": 1.376360535621643, "learning_rate": 2.984300503578606e-05, "loss": 0.1733, "step": 3391 }, { "epoch": 0.07474370203881517, "grad_norm": 1.15972900390625, "learning_rate": 2.9842850517423606e-05, "loss": 0.1165, "step": 3392 }, { "epoch": 0.07476573732833132, "grad_norm": 1.1188278198242188, "learning_rate": 2.9842695923458563e-05, "loss": 0.1009, "step": 3393 }, { "epoch": 0.07478777261784748, "grad_norm": 1.3811111450195312, "learning_rate": 2.9842541253891705e-05, "loss": 0.1597, "step": 3394 }, { "epoch": 0.07480980790736365, "grad_norm": 1.5872491598129272, "learning_rate": 2.9842386508723823e-05, "loss": 0.1939, "step": 3395 }, { "epoch": 0.0748318431968798, "grad_norm": 1.0623027086257935, "learning_rate": 2.9842231687955713e-05, "loss": 0.1094, "step": 3396 }, { "epoch": 0.07485387848639596, "grad_norm": 1.4707316160202026, "learning_rate": 2.9842076791588154e-05, "loss": 0.2058, "step": 3397 }, { "epoch": 0.07487591377591213, "grad_norm": 1.3312392234802246, "learning_rate": 2.9841921819621942e-05, "loss": 0.1436, "step": 3398 }, { "epoch": 0.07489794906542828, "grad_norm": 0.9528480172157288, "learning_rate": 2.984176677205786e-05, "loss": 0.0928, "step": 3399 }, { "epoch": 0.07491998435494444, "grad_norm": 1.8212255239486694, "learning_rate": 2.9841611648896704e-05, "loss": 0.1806, "step": 3400 }, { "epoch": 0.07494201964446061, "grad_norm": 1.433376669883728, "learning_rate": 2.9841456450139256e-05, "loss": 0.1383, "step": 3401 }, { "epoch": 0.07496405493397676, "grad_norm": 1.5917938947677612, "learning_rate": 2.9841301175786317e-05, "loss": 0.1552, "step": 3402 }, { "epoch": 0.07498609022349292, "grad_norm": 1.3486628532409668, "learning_rate": 2.9841145825838673e-05, "loss": 0.1112, "step": 3403 }, { "epoch": 0.07500812551300909, "grad_norm": 1.4264758825302124, "learning_rate": 2.9840990400297112e-05, "loss": 0.1444, "step": 3404 }, { "epoch": 0.07503016080252524, "grad_norm": 1.9573670625686646, "learning_rate": 2.984083489916243e-05, "loss": 0.1555, "step": 3405 }, { "epoch": 0.0750521960920414, "grad_norm": 1.0346369743347168, "learning_rate": 2.9840679322435417e-05, "loss": 0.1223, "step": 3406 }, { "epoch": 0.07507423138155757, "grad_norm": 2.143392324447632, "learning_rate": 2.9840523670116865e-05, "loss": 0.1647, "step": 3407 }, { "epoch": 0.07509626667107372, "grad_norm": 1.3865644931793213, "learning_rate": 2.9840367942207568e-05, "loss": 0.1553, "step": 3408 }, { "epoch": 0.07511830196058988, "grad_norm": 1.5151093006134033, "learning_rate": 2.9840212138708322e-05, "loss": 0.1252, "step": 3409 }, { "epoch": 0.07514033725010605, "grad_norm": 1.3179889917373657, "learning_rate": 2.9840056259619914e-05, "loss": 0.1349, "step": 3410 }, { "epoch": 0.0751623725396222, "grad_norm": 1.7781610488891602, "learning_rate": 2.9839900304943142e-05, "loss": 0.1587, "step": 3411 }, { "epoch": 0.07518440782913836, "grad_norm": 1.4621224403381348, "learning_rate": 2.9839744274678804e-05, "loss": 0.1879, "step": 3412 }, { "epoch": 0.07520644311865453, "grad_norm": 1.093132734298706, "learning_rate": 2.983958816882769e-05, "loss": 0.1201, "step": 3413 }, { "epoch": 0.07522847840817068, "grad_norm": 1.9078547954559326, "learning_rate": 2.983943198739059e-05, "loss": 0.188, "step": 3414 }, { "epoch": 0.07525051369768684, "grad_norm": 1.5801173448562622, "learning_rate": 2.983927573036831e-05, "loss": 0.1393, "step": 3415 }, { "epoch": 0.07527254898720301, "grad_norm": 2.9709644317626953, "learning_rate": 2.983911939776164e-05, "loss": 0.1623, "step": 3416 }, { "epoch": 0.07529458427671916, "grad_norm": 1.3929098844528198, "learning_rate": 2.9838962989571377e-05, "loss": 0.1228, "step": 3417 }, { "epoch": 0.07531661956623532, "grad_norm": 1.8392047882080078, "learning_rate": 2.9838806505798317e-05, "loss": 0.1691, "step": 3418 }, { "epoch": 0.07533865485575149, "grad_norm": 1.9499279260635376, "learning_rate": 2.9838649946443265e-05, "loss": 0.1915, "step": 3419 }, { "epoch": 0.07536069014526764, "grad_norm": 1.820739984512329, "learning_rate": 2.9838493311507e-05, "loss": 0.1547, "step": 3420 }, { "epoch": 0.0753827254347838, "grad_norm": 1.0835005044937134, "learning_rate": 2.983833660099034e-05, "loss": 0.1532, "step": 3421 }, { "epoch": 0.07540476072429997, "grad_norm": 1.4871702194213867, "learning_rate": 2.9838179814894072e-05, "loss": 0.1601, "step": 3422 }, { "epoch": 0.07542679601381613, "grad_norm": 1.534975528717041, "learning_rate": 2.9838022953219005e-05, "loss": 0.1059, "step": 3423 }, { "epoch": 0.07544883130333228, "grad_norm": 1.4115076065063477, "learning_rate": 2.9837866015965922e-05, "loss": 0.1331, "step": 3424 }, { "epoch": 0.07547086659284845, "grad_norm": 1.766175389289856, "learning_rate": 2.983770900313563e-05, "loss": 0.2067, "step": 3425 }, { "epoch": 0.07549290188236461, "grad_norm": 1.1485482454299927, "learning_rate": 2.983755191472893e-05, "loss": 0.1275, "step": 3426 }, { "epoch": 0.07551493717188076, "grad_norm": 1.2696558237075806, "learning_rate": 2.9837394750746626e-05, "loss": 0.1377, "step": 3427 }, { "epoch": 0.07553697246139693, "grad_norm": 0.8550655245780945, "learning_rate": 2.9837237511189514e-05, "loss": 0.1357, "step": 3428 }, { "epoch": 0.0755590077509131, "grad_norm": 2.0743188858032227, "learning_rate": 2.9837080196058394e-05, "loss": 0.1991, "step": 3429 }, { "epoch": 0.07558104304042924, "grad_norm": 1.5283328294754028, "learning_rate": 2.9836922805354063e-05, "loss": 0.1917, "step": 3430 }, { "epoch": 0.07560307832994541, "grad_norm": 1.3844622373580933, "learning_rate": 2.983676533907733e-05, "loss": 0.1851, "step": 3431 }, { "epoch": 0.07562511361946157, "grad_norm": 1.432653784751892, "learning_rate": 2.9836607797229e-05, "loss": 0.1266, "step": 3432 }, { "epoch": 0.07564714890897772, "grad_norm": 1.4283382892608643, "learning_rate": 2.983645017980987e-05, "loss": 0.17, "step": 3433 }, { "epoch": 0.07566918419849389, "grad_norm": 1.464066505432129, "learning_rate": 2.9836292486820737e-05, "loss": 0.134, "step": 3434 }, { "epoch": 0.07569121948801005, "grad_norm": 1.6448982954025269, "learning_rate": 2.9836134718262415e-05, "loss": 0.1996, "step": 3435 }, { "epoch": 0.0757132547775262, "grad_norm": 1.602181077003479, "learning_rate": 2.9835976874135702e-05, "loss": 0.1941, "step": 3436 }, { "epoch": 0.07573529006704237, "grad_norm": 1.456031322479248, "learning_rate": 2.9835818954441403e-05, "loss": 0.1996, "step": 3437 }, { "epoch": 0.07575732535655853, "grad_norm": 0.8993383646011353, "learning_rate": 2.9835660959180323e-05, "loss": 0.1351, "step": 3438 }, { "epoch": 0.07577936064607468, "grad_norm": 1.0347661972045898, "learning_rate": 2.9835502888353266e-05, "loss": 0.1123, "step": 3439 }, { "epoch": 0.07580139593559085, "grad_norm": 1.115238070487976, "learning_rate": 2.9835344741961037e-05, "loss": 0.1635, "step": 3440 }, { "epoch": 0.07582343122510701, "grad_norm": 1.547398328781128, "learning_rate": 2.983518652000444e-05, "loss": 0.1189, "step": 3441 }, { "epoch": 0.07584546651462316, "grad_norm": 1.92438805103302, "learning_rate": 2.9835028222484288e-05, "loss": 0.1679, "step": 3442 }, { "epoch": 0.07586750180413933, "grad_norm": 1.7119251489639282, "learning_rate": 2.983486984940138e-05, "loss": 0.1387, "step": 3443 }, { "epoch": 0.07588953709365549, "grad_norm": 2.0784599781036377, "learning_rate": 2.9834711400756523e-05, "loss": 0.1964, "step": 3444 }, { "epoch": 0.07591157238317164, "grad_norm": 1.4196940660476685, "learning_rate": 2.9834552876550527e-05, "loss": 0.165, "step": 3445 }, { "epoch": 0.07593360767268781, "grad_norm": 1.8447585105895996, "learning_rate": 2.98343942767842e-05, "loss": 0.1414, "step": 3446 }, { "epoch": 0.07595564296220397, "grad_norm": 1.326520323753357, "learning_rate": 2.9834235601458345e-05, "loss": 0.1741, "step": 3447 }, { "epoch": 0.07597767825172012, "grad_norm": 1.2849946022033691, "learning_rate": 2.9834076850573773e-05, "loss": 0.1465, "step": 3448 }, { "epoch": 0.07599971354123629, "grad_norm": 2.3287761211395264, "learning_rate": 2.9833918024131296e-05, "loss": 0.1566, "step": 3449 }, { "epoch": 0.07602174883075245, "grad_norm": 1.5331827402114868, "learning_rate": 2.983375912213172e-05, "loss": 0.1852, "step": 3450 }, { "epoch": 0.0760437841202686, "grad_norm": 1.1264362335205078, "learning_rate": 2.983360014457585e-05, "loss": 0.1241, "step": 3451 }, { "epoch": 0.07606581940978477, "grad_norm": 1.8006558418273926, "learning_rate": 2.98334410914645e-05, "loss": 0.1968, "step": 3452 }, { "epoch": 0.07608785469930093, "grad_norm": 1.9179927110671997, "learning_rate": 2.9833281962798484e-05, "loss": 0.1463, "step": 3453 }, { "epoch": 0.07610988998881708, "grad_norm": 2.3211476802825928, "learning_rate": 2.983312275857861e-05, "loss": 0.148, "step": 3454 }, { "epoch": 0.07613192527833325, "grad_norm": 1.3673983812332153, "learning_rate": 2.9832963478805685e-05, "loss": 0.1767, "step": 3455 }, { "epoch": 0.07615396056784941, "grad_norm": 1.4690572023391724, "learning_rate": 2.983280412348052e-05, "loss": 0.1355, "step": 3456 }, { "epoch": 0.07617599585736558, "grad_norm": 1.1389762163162231, "learning_rate": 2.9832644692603933e-05, "loss": 0.1344, "step": 3457 }, { "epoch": 0.07619803114688173, "grad_norm": 1.5922341346740723, "learning_rate": 2.9832485186176736e-05, "loss": 0.1084, "step": 3458 }, { "epoch": 0.07622006643639789, "grad_norm": 2.091843605041504, "learning_rate": 2.9832325604199735e-05, "loss": 0.1483, "step": 3459 }, { "epoch": 0.07624210172591406, "grad_norm": 1.5839483737945557, "learning_rate": 2.9832165946673746e-05, "loss": 0.1462, "step": 3460 }, { "epoch": 0.07626413701543021, "grad_norm": 3.5043270587921143, "learning_rate": 2.983200621359958e-05, "loss": 0.2099, "step": 3461 }, { "epoch": 0.07628617230494637, "grad_norm": 1.3660435676574707, "learning_rate": 2.9831846404978057e-05, "loss": 0.1881, "step": 3462 }, { "epoch": 0.07630820759446254, "grad_norm": 1.4706636667251587, "learning_rate": 2.9831686520809984e-05, "loss": 0.1224, "step": 3463 }, { "epoch": 0.07633024288397869, "grad_norm": 2.2093584537506104, "learning_rate": 2.9831526561096178e-05, "loss": 0.2039, "step": 3464 }, { "epoch": 0.07635227817349485, "grad_norm": 1.2984331846237183, "learning_rate": 2.9831366525837455e-05, "loss": 0.1964, "step": 3465 }, { "epoch": 0.07637431346301102, "grad_norm": 1.6869820356369019, "learning_rate": 2.9831206415034628e-05, "loss": 0.1538, "step": 3466 }, { "epoch": 0.07639634875252717, "grad_norm": 1.1270700693130493, "learning_rate": 2.9831046228688517e-05, "loss": 0.1183, "step": 3467 }, { "epoch": 0.07641838404204333, "grad_norm": 1.372898817062378, "learning_rate": 2.983088596679993e-05, "loss": 0.1878, "step": 3468 }, { "epoch": 0.0764404193315595, "grad_norm": 1.2652270793914795, "learning_rate": 2.9830725629369692e-05, "loss": 0.1701, "step": 3469 }, { "epoch": 0.07646245462107565, "grad_norm": 1.828790545463562, "learning_rate": 2.983056521639861e-05, "loss": 0.1358, "step": 3470 }, { "epoch": 0.07648448991059181, "grad_norm": 1.6411752700805664, "learning_rate": 2.983040472788751e-05, "loss": 0.1484, "step": 3471 }, { "epoch": 0.07650652520010798, "grad_norm": 2.2059786319732666, "learning_rate": 2.9830244163837204e-05, "loss": 0.1195, "step": 3472 }, { "epoch": 0.07652856048962413, "grad_norm": 1.5121115446090698, "learning_rate": 2.9830083524248516e-05, "loss": 0.1374, "step": 3473 }, { "epoch": 0.07655059577914029, "grad_norm": 1.4024879932403564, "learning_rate": 2.9829922809122257e-05, "loss": 0.1666, "step": 3474 }, { "epoch": 0.07657263106865646, "grad_norm": 1.0707240104675293, "learning_rate": 2.982976201845925e-05, "loss": 0.1795, "step": 3475 }, { "epoch": 0.07659466635817261, "grad_norm": 0.9103511571884155, "learning_rate": 2.982960115226031e-05, "loss": 0.1094, "step": 3476 }, { "epoch": 0.07661670164768877, "grad_norm": 1.9952512979507446, "learning_rate": 2.982944021052626e-05, "loss": 0.1635, "step": 3477 }, { "epoch": 0.07663873693720494, "grad_norm": 1.8970526456832886, "learning_rate": 2.982927919325792e-05, "loss": 0.1588, "step": 3478 }, { "epoch": 0.07666077222672109, "grad_norm": 1.4837653636932373, "learning_rate": 2.982911810045611e-05, "loss": 0.134, "step": 3479 }, { "epoch": 0.07668280751623725, "grad_norm": 1.4135853052139282, "learning_rate": 2.982895693212165e-05, "loss": 0.1599, "step": 3480 }, { "epoch": 0.07670484280575342, "grad_norm": 1.2711933851242065, "learning_rate": 2.982879568825536e-05, "loss": 0.1341, "step": 3481 }, { "epoch": 0.07672687809526957, "grad_norm": 1.383834719657898, "learning_rate": 2.9828634368858055e-05, "loss": 0.1413, "step": 3482 }, { "epoch": 0.07674891338478573, "grad_norm": 1.2537281513214111, "learning_rate": 2.9828472973930572e-05, "loss": 0.1363, "step": 3483 }, { "epoch": 0.0767709486743019, "grad_norm": 1.103349208831787, "learning_rate": 2.982831150347372e-05, "loss": 0.0963, "step": 3484 }, { "epoch": 0.07679298396381805, "grad_norm": 1.6083780527114868, "learning_rate": 2.982814995748833e-05, "loss": 0.1094, "step": 3485 }, { "epoch": 0.07681501925333421, "grad_norm": 1.4470911026000977, "learning_rate": 2.9827988335975216e-05, "loss": 0.1419, "step": 3486 }, { "epoch": 0.07683705454285038, "grad_norm": 1.6968108415603638, "learning_rate": 2.9827826638935208e-05, "loss": 0.1508, "step": 3487 }, { "epoch": 0.07685908983236653, "grad_norm": 1.0420544147491455, "learning_rate": 2.982766486636913e-05, "loss": 0.1771, "step": 3488 }, { "epoch": 0.07688112512188269, "grad_norm": 2.6120119094848633, "learning_rate": 2.9827503018277802e-05, "loss": 0.2252, "step": 3489 }, { "epoch": 0.07690316041139886, "grad_norm": 1.5684739351272583, "learning_rate": 2.9827341094662048e-05, "loss": 0.1117, "step": 3490 }, { "epoch": 0.07692519570091502, "grad_norm": 1.3780144453048706, "learning_rate": 2.9827179095522698e-05, "loss": 0.184, "step": 3491 }, { "epoch": 0.07694723099043117, "grad_norm": 1.3182148933410645, "learning_rate": 2.9827017020860572e-05, "loss": 0.1397, "step": 3492 }, { "epoch": 0.07696926627994734, "grad_norm": 1.4258983135223389, "learning_rate": 2.9826854870676495e-05, "loss": 0.1489, "step": 3493 }, { "epoch": 0.0769913015694635, "grad_norm": 1.802432894706726, "learning_rate": 2.9826692644971298e-05, "loss": 0.1497, "step": 3494 }, { "epoch": 0.07701333685897965, "grad_norm": 1.060014247894287, "learning_rate": 2.9826530343745806e-05, "loss": 0.154, "step": 3495 }, { "epoch": 0.07703537214849582, "grad_norm": 1.3550927639007568, "learning_rate": 2.9826367967000842e-05, "loss": 0.1601, "step": 3496 }, { "epoch": 0.07705740743801198, "grad_norm": 1.8283414840698242, "learning_rate": 2.9826205514737236e-05, "loss": 0.1811, "step": 3497 }, { "epoch": 0.07707944272752813, "grad_norm": 1.839292287826538, "learning_rate": 2.9826042986955814e-05, "loss": 0.2174, "step": 3498 }, { "epoch": 0.0771014780170443, "grad_norm": 1.4382554292678833, "learning_rate": 2.9825880383657408e-05, "loss": 0.1204, "step": 3499 }, { "epoch": 0.07712351330656046, "grad_norm": 1.6790986061096191, "learning_rate": 2.982571770484284e-05, "loss": 0.1645, "step": 3500 }, { "epoch": 0.07714554859607661, "grad_norm": 1.4357895851135254, "learning_rate": 2.9825554950512942e-05, "loss": 0.1562, "step": 3501 }, { "epoch": 0.07716758388559278, "grad_norm": 1.2819104194641113, "learning_rate": 2.9825392120668543e-05, "loss": 0.1897, "step": 3502 }, { "epoch": 0.07718961917510894, "grad_norm": 2.2484335899353027, "learning_rate": 2.9825229215310472e-05, "loss": 0.2421, "step": 3503 }, { "epoch": 0.07721165446462509, "grad_norm": 1.004550576210022, "learning_rate": 2.982506623443956e-05, "loss": 0.1196, "step": 3504 }, { "epoch": 0.07723368975414126, "grad_norm": 1.3966681957244873, "learning_rate": 2.9824903178056633e-05, "loss": 0.137, "step": 3505 }, { "epoch": 0.07725572504365742, "grad_norm": 0.9923416376113892, "learning_rate": 2.9824740046162525e-05, "loss": 0.1629, "step": 3506 }, { "epoch": 0.07727776033317357, "grad_norm": 1.3411346673965454, "learning_rate": 2.9824576838758066e-05, "loss": 0.1848, "step": 3507 }, { "epoch": 0.07729979562268974, "grad_norm": 2.9402084350585938, "learning_rate": 2.982441355584409e-05, "loss": 0.1235, "step": 3508 }, { "epoch": 0.0773218309122059, "grad_norm": 1.4411842823028564, "learning_rate": 2.9824250197421427e-05, "loss": 0.2085, "step": 3509 }, { "epoch": 0.07734386620172205, "grad_norm": 2.003737449645996, "learning_rate": 2.9824086763490904e-05, "loss": 0.1505, "step": 3510 }, { "epoch": 0.07736590149123822, "grad_norm": 1.3009033203125, "learning_rate": 2.9823923254053364e-05, "loss": 0.1354, "step": 3511 }, { "epoch": 0.07738793678075438, "grad_norm": 1.3903688192367554, "learning_rate": 2.9823759669109626e-05, "loss": 0.1063, "step": 3512 }, { "epoch": 0.07740997207027053, "grad_norm": 1.472434163093567, "learning_rate": 2.9823596008660535e-05, "loss": 0.1211, "step": 3513 }, { "epoch": 0.0774320073597867, "grad_norm": 2.48331618309021, "learning_rate": 2.982343227270692e-05, "loss": 0.1476, "step": 3514 }, { "epoch": 0.07745404264930286, "grad_norm": 1.1531717777252197, "learning_rate": 2.982326846124962e-05, "loss": 0.1679, "step": 3515 }, { "epoch": 0.07747607793881901, "grad_norm": 1.0938936471939087, "learning_rate": 2.9823104574289456e-05, "loss": 0.1221, "step": 3516 }, { "epoch": 0.07749811322833518, "grad_norm": 1.3243950605392456, "learning_rate": 2.9822940611827275e-05, "loss": 0.1786, "step": 3517 }, { "epoch": 0.07752014851785134, "grad_norm": 1.2098290920257568, "learning_rate": 2.982277657386391e-05, "loss": 0.1394, "step": 3518 }, { "epoch": 0.07754218380736749, "grad_norm": 1.4886304140090942, "learning_rate": 2.9822612460400196e-05, "loss": 0.1363, "step": 3519 }, { "epoch": 0.07756421909688366, "grad_norm": 1.864516258239746, "learning_rate": 2.982244827143697e-05, "loss": 0.1601, "step": 3520 }, { "epoch": 0.07758625438639982, "grad_norm": 1.0562318563461304, "learning_rate": 2.9822284006975063e-05, "loss": 0.1032, "step": 3521 }, { "epoch": 0.07760828967591597, "grad_norm": 1.6529942750930786, "learning_rate": 2.9822119667015314e-05, "loss": 0.1553, "step": 3522 }, { "epoch": 0.07763032496543214, "grad_norm": 1.7069637775421143, "learning_rate": 2.982195525155856e-05, "loss": 0.2122, "step": 3523 }, { "epoch": 0.0776523602549483, "grad_norm": 1.0592290163040161, "learning_rate": 2.9821790760605645e-05, "loss": 0.1478, "step": 3524 }, { "epoch": 0.07767439554446445, "grad_norm": 1.5061259269714355, "learning_rate": 2.98216261941574e-05, "loss": 0.1276, "step": 3525 }, { "epoch": 0.07769643083398062, "grad_norm": 2.2190587520599365, "learning_rate": 2.9821461552214665e-05, "loss": 0.1254, "step": 3526 }, { "epoch": 0.07771846612349678, "grad_norm": 1.183891773223877, "learning_rate": 2.982129683477828e-05, "loss": 0.1307, "step": 3527 }, { "epoch": 0.07774050141301295, "grad_norm": 1.3661549091339111, "learning_rate": 2.9821132041849078e-05, "loss": 0.1288, "step": 3528 }, { "epoch": 0.0777625367025291, "grad_norm": 1.2630345821380615, "learning_rate": 2.9820967173427906e-05, "loss": 0.1437, "step": 3529 }, { "epoch": 0.07778457199204526, "grad_norm": 1.1020935773849487, "learning_rate": 2.9820802229515598e-05, "loss": 0.1238, "step": 3530 }, { "epoch": 0.07780660728156143, "grad_norm": 1.7733856439590454, "learning_rate": 2.9820637210113002e-05, "loss": 0.1697, "step": 3531 }, { "epoch": 0.07782864257107758, "grad_norm": 1.2323613166809082, "learning_rate": 2.9820472115220947e-05, "loss": 0.1383, "step": 3532 }, { "epoch": 0.07785067786059374, "grad_norm": 1.6084802150726318, "learning_rate": 2.9820306944840285e-05, "loss": 0.2146, "step": 3533 }, { "epoch": 0.0778727131501099, "grad_norm": 1.6122636795043945, "learning_rate": 2.982014169897185e-05, "loss": 0.1728, "step": 3534 }, { "epoch": 0.07789474843962606, "grad_norm": 1.5480183362960815, "learning_rate": 2.9819976377616483e-05, "loss": 0.1393, "step": 3535 }, { "epoch": 0.07791678372914222, "grad_norm": 1.4748444557189941, "learning_rate": 2.9819810980775034e-05, "loss": 0.1637, "step": 3536 }, { "epoch": 0.07793881901865839, "grad_norm": 1.6238363981246948, "learning_rate": 2.981964550844834e-05, "loss": 0.1495, "step": 3537 }, { "epoch": 0.07796085430817454, "grad_norm": 1.4219831228256226, "learning_rate": 2.981947996063724e-05, "loss": 0.2042, "step": 3538 }, { "epoch": 0.0779828895976907, "grad_norm": 1.6138546466827393, "learning_rate": 2.981931433734259e-05, "loss": 0.2019, "step": 3539 }, { "epoch": 0.07800492488720687, "grad_norm": 1.2415282726287842, "learning_rate": 2.981914863856522e-05, "loss": 0.1731, "step": 3540 }, { "epoch": 0.07802696017672302, "grad_norm": 1.193829894065857, "learning_rate": 2.9818982864305976e-05, "loss": 0.1551, "step": 3541 }, { "epoch": 0.07804899546623918, "grad_norm": 0.7906174659729004, "learning_rate": 2.9818817014565714e-05, "loss": 0.1079, "step": 3542 }, { "epoch": 0.07807103075575535, "grad_norm": 1.4877651929855347, "learning_rate": 2.9818651089345265e-05, "loss": 0.1427, "step": 3543 }, { "epoch": 0.0780930660452715, "grad_norm": 1.3929518461227417, "learning_rate": 2.981848508864548e-05, "loss": 0.1752, "step": 3544 }, { "epoch": 0.07811510133478766, "grad_norm": 1.2358626127243042, "learning_rate": 2.9818319012467204e-05, "loss": 0.2055, "step": 3545 }, { "epoch": 0.07813713662430383, "grad_norm": 1.2230923175811768, "learning_rate": 2.9818152860811287e-05, "loss": 0.1938, "step": 3546 }, { "epoch": 0.07815917191381998, "grad_norm": 1.3451660871505737, "learning_rate": 2.9817986633678568e-05, "loss": 0.2203, "step": 3547 }, { "epoch": 0.07818120720333614, "grad_norm": 1.4525973796844482, "learning_rate": 2.9817820331069898e-05, "loss": 0.1575, "step": 3548 }, { "epoch": 0.0782032424928523, "grad_norm": 1.2960222959518433, "learning_rate": 2.9817653952986122e-05, "loss": 0.1702, "step": 3549 }, { "epoch": 0.07822527778236846, "grad_norm": 1.5977272987365723, "learning_rate": 2.9817487499428086e-05, "loss": 0.1389, "step": 3550 }, { "epoch": 0.07824731307188462, "grad_norm": 1.4555611610412598, "learning_rate": 2.9817320970396646e-05, "loss": 0.1494, "step": 3551 }, { "epoch": 0.07826934836140079, "grad_norm": 2.020268678665161, "learning_rate": 2.9817154365892643e-05, "loss": 0.1752, "step": 3552 }, { "epoch": 0.07829138365091694, "grad_norm": 1.5631242990493774, "learning_rate": 2.9816987685916927e-05, "loss": 0.1699, "step": 3553 }, { "epoch": 0.0783134189404331, "grad_norm": 1.4526729583740234, "learning_rate": 2.9816820930470345e-05, "loss": 0.1198, "step": 3554 }, { "epoch": 0.07833545422994927, "grad_norm": 1.9557536840438843, "learning_rate": 2.9816654099553747e-05, "loss": 0.1584, "step": 3555 }, { "epoch": 0.07835748951946542, "grad_norm": 1.194319725036621, "learning_rate": 2.981648719316799e-05, "loss": 0.1533, "step": 3556 }, { "epoch": 0.07837952480898158, "grad_norm": 1.0682060718536377, "learning_rate": 2.981632021131392e-05, "loss": 0.1661, "step": 3557 }, { "epoch": 0.07840156009849775, "grad_norm": 1.1128132343292236, "learning_rate": 2.9816153153992378e-05, "loss": 0.1344, "step": 3558 }, { "epoch": 0.0784235953880139, "grad_norm": 1.4081395864486694, "learning_rate": 2.9815986021204225e-05, "loss": 0.1669, "step": 3559 }, { "epoch": 0.07844563067753006, "grad_norm": 1.820986032485962, "learning_rate": 2.9815818812950313e-05, "loss": 0.1377, "step": 3560 }, { "epoch": 0.07846766596704623, "grad_norm": 1.3872147798538208, "learning_rate": 2.9815651529231494e-05, "loss": 0.1653, "step": 3561 }, { "epoch": 0.07848970125656239, "grad_norm": 1.023447036743164, "learning_rate": 2.9815484170048612e-05, "loss": 0.1208, "step": 3562 }, { "epoch": 0.07851173654607854, "grad_norm": 1.4417411088943481, "learning_rate": 2.9815316735402522e-05, "loss": 0.1629, "step": 3563 }, { "epoch": 0.0785337718355947, "grad_norm": 1.783839464187622, "learning_rate": 2.9815149225294083e-05, "loss": 0.1577, "step": 3564 }, { "epoch": 0.07855580712511087, "grad_norm": 2.549837112426758, "learning_rate": 2.9814981639724147e-05, "loss": 0.1728, "step": 3565 }, { "epoch": 0.07857784241462702, "grad_norm": 2.0011911392211914, "learning_rate": 2.9814813978693563e-05, "loss": 0.1494, "step": 3566 }, { "epoch": 0.07859987770414319, "grad_norm": 1.317466139793396, "learning_rate": 2.9814646242203186e-05, "loss": 0.1257, "step": 3567 }, { "epoch": 0.07862191299365935, "grad_norm": 1.3390145301818848, "learning_rate": 2.981447843025387e-05, "loss": 0.1727, "step": 3568 }, { "epoch": 0.0786439482831755, "grad_norm": 1.6396212577819824, "learning_rate": 2.9814310542846473e-05, "loss": 0.1171, "step": 3569 }, { "epoch": 0.07866598357269167, "grad_norm": 1.0853012800216675, "learning_rate": 2.9814142579981848e-05, "loss": 0.1658, "step": 3570 }, { "epoch": 0.07868801886220783, "grad_norm": 1.196773886680603, "learning_rate": 2.981397454166085e-05, "loss": 0.1332, "step": 3571 }, { "epoch": 0.07871005415172398, "grad_norm": 1.2715100049972534, "learning_rate": 2.9813806427884337e-05, "loss": 0.2006, "step": 3572 }, { "epoch": 0.07873208944124015, "grad_norm": 1.2979265451431274, "learning_rate": 2.981363823865317e-05, "loss": 0.1377, "step": 3573 }, { "epoch": 0.07875412473075631, "grad_norm": 1.4826536178588867, "learning_rate": 2.981346997396819e-05, "loss": 0.1809, "step": 3574 }, { "epoch": 0.07877616002027246, "grad_norm": 1.4876704216003418, "learning_rate": 2.981330163383027e-05, "loss": 0.1458, "step": 3575 }, { "epoch": 0.07879819530978863, "grad_norm": 1.0409356355667114, "learning_rate": 2.9813133218240256e-05, "loss": 0.1119, "step": 3576 }, { "epoch": 0.07882023059930479, "grad_norm": 1.4736738204956055, "learning_rate": 2.9812964727199014e-05, "loss": 0.1823, "step": 3577 }, { "epoch": 0.07884226588882094, "grad_norm": 1.591476559638977, "learning_rate": 2.9812796160707405e-05, "loss": 0.1611, "step": 3578 }, { "epoch": 0.0788643011783371, "grad_norm": 1.5362952947616577, "learning_rate": 2.981262751876627e-05, "loss": 0.1713, "step": 3579 }, { "epoch": 0.07888633646785327, "grad_norm": 1.8801900148391724, "learning_rate": 2.9812458801376487e-05, "loss": 0.1501, "step": 3580 }, { "epoch": 0.07890837175736942, "grad_norm": 1.311013102531433, "learning_rate": 2.9812290008538907e-05, "loss": 0.1536, "step": 3581 }, { "epoch": 0.07893040704688559, "grad_norm": 1.3895318508148193, "learning_rate": 2.981212114025439e-05, "loss": 0.1686, "step": 3582 }, { "epoch": 0.07895244233640175, "grad_norm": 2.254718780517578, "learning_rate": 2.98119521965238e-05, "loss": 0.2019, "step": 3583 }, { "epoch": 0.0789744776259179, "grad_norm": 1.320438265800476, "learning_rate": 2.9811783177347996e-05, "loss": 0.1615, "step": 3584 }, { "epoch": 0.07899651291543407, "grad_norm": 1.2784150838851929, "learning_rate": 2.9811614082727834e-05, "loss": 0.1715, "step": 3585 }, { "epoch": 0.07901854820495023, "grad_norm": 1.5578523874282837, "learning_rate": 2.9811444912664178e-05, "loss": 0.1649, "step": 3586 }, { "epoch": 0.07904058349446638, "grad_norm": 1.2578701972961426, "learning_rate": 2.9811275667157893e-05, "loss": 0.1528, "step": 3587 }, { "epoch": 0.07906261878398255, "grad_norm": 1.251830816268921, "learning_rate": 2.981110634620984e-05, "loss": 0.1242, "step": 3588 }, { "epoch": 0.07908465407349871, "grad_norm": 1.6642788648605347, "learning_rate": 2.9810936949820878e-05, "loss": 0.1754, "step": 3589 }, { "epoch": 0.07910668936301486, "grad_norm": 1.4900559186935425, "learning_rate": 2.981076747799187e-05, "loss": 0.1985, "step": 3590 }, { "epoch": 0.07912872465253103, "grad_norm": 1.6420010328292847, "learning_rate": 2.9810597930723682e-05, "loss": 0.1623, "step": 3591 }, { "epoch": 0.07915075994204719, "grad_norm": 1.7985576391220093, "learning_rate": 2.9810428308017177e-05, "loss": 0.1737, "step": 3592 }, { "epoch": 0.07917279523156334, "grad_norm": 1.9794745445251465, "learning_rate": 2.981025860987322e-05, "loss": 0.1631, "step": 3593 }, { "epoch": 0.0791948305210795, "grad_norm": 1.2911503314971924, "learning_rate": 2.981008883629267e-05, "loss": 0.1403, "step": 3594 }, { "epoch": 0.07921686581059567, "grad_norm": 1.4486854076385498, "learning_rate": 2.98099189872764e-05, "loss": 0.1598, "step": 3595 }, { "epoch": 0.07923890110011184, "grad_norm": 1.3243621587753296, "learning_rate": 2.9809749062825268e-05, "loss": 0.1307, "step": 3596 }, { "epoch": 0.07926093638962799, "grad_norm": 1.240605115890503, "learning_rate": 2.9809579062940148e-05, "loss": 0.1399, "step": 3597 }, { "epoch": 0.07928297167914415, "grad_norm": 1.2245365381240845, "learning_rate": 2.9809408987621895e-05, "loss": 0.1268, "step": 3598 }, { "epoch": 0.07930500696866032, "grad_norm": 1.642541766166687, "learning_rate": 2.980923883687138e-05, "loss": 0.1481, "step": 3599 }, { "epoch": 0.07932704225817647, "grad_norm": 1.525739073753357, "learning_rate": 2.9809068610689476e-05, "loss": 0.1393, "step": 3600 }, { "epoch": 0.07934907754769263, "grad_norm": 1.5757824182510376, "learning_rate": 2.980889830907704e-05, "loss": 0.1623, "step": 3601 }, { "epoch": 0.0793711128372088, "grad_norm": 1.6570765972137451, "learning_rate": 2.9808727932034945e-05, "loss": 0.1414, "step": 3602 }, { "epoch": 0.07939314812672495, "grad_norm": 1.2067419290542603, "learning_rate": 2.9808557479564054e-05, "loss": 0.162, "step": 3603 }, { "epoch": 0.07941518341624111, "grad_norm": 1.2270283699035645, "learning_rate": 2.9808386951665246e-05, "loss": 0.1844, "step": 3604 }, { "epoch": 0.07943721870575728, "grad_norm": 1.448468565940857, "learning_rate": 2.9808216348339376e-05, "loss": 0.1199, "step": 3605 }, { "epoch": 0.07945925399527343, "grad_norm": 1.2066417932510376, "learning_rate": 2.980804566958732e-05, "loss": 0.1062, "step": 3606 }, { "epoch": 0.07948128928478959, "grad_norm": 1.7133692502975464, "learning_rate": 2.9807874915409948e-05, "loss": 0.1606, "step": 3607 }, { "epoch": 0.07950332457430576, "grad_norm": 1.8759411573410034, "learning_rate": 2.9807704085808127e-05, "loss": 0.1601, "step": 3608 }, { "epoch": 0.0795253598638219, "grad_norm": 1.1476854085922241, "learning_rate": 2.9807533180782732e-05, "loss": 0.1198, "step": 3609 }, { "epoch": 0.07954739515333807, "grad_norm": 1.578167200088501, "learning_rate": 2.9807362200334627e-05, "loss": 0.189, "step": 3610 }, { "epoch": 0.07956943044285424, "grad_norm": 1.3585681915283203, "learning_rate": 2.9807191144464688e-05, "loss": 0.1667, "step": 3611 }, { "epoch": 0.07959146573237039, "grad_norm": 1.099989414215088, "learning_rate": 2.9807020013173784e-05, "loss": 0.1503, "step": 3612 }, { "epoch": 0.07961350102188655, "grad_norm": 0.8208405375480652, "learning_rate": 2.9806848806462785e-05, "loss": 0.1758, "step": 3613 }, { "epoch": 0.07963553631140272, "grad_norm": 1.9006013870239258, "learning_rate": 2.9806677524332565e-05, "loss": 0.1347, "step": 3614 }, { "epoch": 0.07965757160091887, "grad_norm": 1.533085823059082, "learning_rate": 2.9806506166784e-05, "loss": 0.1274, "step": 3615 }, { "epoch": 0.07967960689043503, "grad_norm": 2.2153382301330566, "learning_rate": 2.9806334733817957e-05, "loss": 0.1834, "step": 3616 }, { "epoch": 0.0797016421799512, "grad_norm": 1.24654221534729, "learning_rate": 2.980616322543531e-05, "loss": 0.1261, "step": 3617 }, { "epoch": 0.07972367746946735, "grad_norm": 1.3193159103393555, "learning_rate": 2.9805991641636935e-05, "loss": 0.1314, "step": 3618 }, { "epoch": 0.07974571275898351, "grad_norm": 1.5182358026504517, "learning_rate": 2.9805819982423707e-05, "loss": 0.2089, "step": 3619 }, { "epoch": 0.07976774804849968, "grad_norm": 1.1952013969421387, "learning_rate": 2.9805648247796496e-05, "loss": 0.1454, "step": 3620 }, { "epoch": 0.07978978333801583, "grad_norm": 1.8197096586227417, "learning_rate": 2.980547643775618e-05, "loss": 0.1427, "step": 3621 }, { "epoch": 0.07981181862753199, "grad_norm": 1.5781534910202026, "learning_rate": 2.980530455230363e-05, "loss": 0.1531, "step": 3622 }, { "epoch": 0.07983385391704816, "grad_norm": 2.2464852333068848, "learning_rate": 2.980513259143973e-05, "loss": 0.19, "step": 3623 }, { "epoch": 0.0798558892065643, "grad_norm": 2.360128164291382, "learning_rate": 2.980496055516535e-05, "loss": 0.1719, "step": 3624 }, { "epoch": 0.07987792449608047, "grad_norm": 4.377190113067627, "learning_rate": 2.9804788443481362e-05, "loss": 0.1956, "step": 3625 }, { "epoch": 0.07989995978559664, "grad_norm": 1.5422439575195312, "learning_rate": 2.9804616256388653e-05, "loss": 0.2033, "step": 3626 }, { "epoch": 0.07992199507511279, "grad_norm": 1.7816418409347534, "learning_rate": 2.980444399388809e-05, "loss": 0.1545, "step": 3627 }, { "epoch": 0.07994403036462895, "grad_norm": 1.371712327003479, "learning_rate": 2.980427165598056e-05, "loss": 0.1559, "step": 3628 }, { "epoch": 0.07996606565414512, "grad_norm": 1.126960277557373, "learning_rate": 2.980409924266693e-05, "loss": 0.1389, "step": 3629 }, { "epoch": 0.07998810094366127, "grad_norm": 1.130497694015503, "learning_rate": 2.9803926753948084e-05, "loss": 0.1399, "step": 3630 }, { "epoch": 0.08001013623317743, "grad_norm": 1.5073610544204712, "learning_rate": 2.9803754189824906e-05, "loss": 0.1209, "step": 3631 }, { "epoch": 0.0800321715226936, "grad_norm": 3.1734700202941895, "learning_rate": 2.9803581550298266e-05, "loss": 0.1969, "step": 3632 }, { "epoch": 0.08005420681220976, "grad_norm": 1.302956223487854, "learning_rate": 2.9803408835369048e-05, "loss": 0.1463, "step": 3633 }, { "epoch": 0.08007624210172591, "grad_norm": 1.5002354383468628, "learning_rate": 2.9803236045038132e-05, "loss": 0.181, "step": 3634 }, { "epoch": 0.08009827739124208, "grad_norm": 1.5527724027633667, "learning_rate": 2.9803063179306394e-05, "loss": 0.1554, "step": 3635 }, { "epoch": 0.08012031268075824, "grad_norm": 1.1024229526519775, "learning_rate": 2.9802890238174716e-05, "loss": 0.1789, "step": 3636 }, { "epoch": 0.08014234797027439, "grad_norm": 1.7010550498962402, "learning_rate": 2.9802717221643984e-05, "loss": 0.1606, "step": 3637 }, { "epoch": 0.08016438325979056, "grad_norm": 1.0384521484375, "learning_rate": 2.9802544129715075e-05, "loss": 0.1326, "step": 3638 }, { "epoch": 0.08018641854930672, "grad_norm": 1.3506510257720947, "learning_rate": 2.9802370962388866e-05, "loss": 0.1483, "step": 3639 }, { "epoch": 0.08020845383882287, "grad_norm": 1.3640363216400146, "learning_rate": 2.980219771966625e-05, "loss": 0.1765, "step": 3640 }, { "epoch": 0.08023048912833904, "grad_norm": 1.4964712858200073, "learning_rate": 2.9802024401548097e-05, "loss": 0.1307, "step": 3641 }, { "epoch": 0.0802525244178552, "grad_norm": 1.7355023622512817, "learning_rate": 2.9801851008035303e-05, "loss": 0.154, "step": 3642 }, { "epoch": 0.08027455970737135, "grad_norm": 1.3015531301498413, "learning_rate": 2.9801677539128743e-05, "loss": 0.2021, "step": 3643 }, { "epoch": 0.08029659499688752, "grad_norm": 1.1805437803268433, "learning_rate": 2.98015039948293e-05, "loss": 0.1699, "step": 3644 }, { "epoch": 0.08031863028640368, "grad_norm": 1.231331706047058, "learning_rate": 2.980133037513786e-05, "loss": 0.153, "step": 3645 }, { "epoch": 0.08034066557591983, "grad_norm": 1.576064944267273, "learning_rate": 2.9801156680055304e-05, "loss": 0.1297, "step": 3646 }, { "epoch": 0.080362700865436, "grad_norm": 1.8281248807907104, "learning_rate": 2.9800982909582528e-05, "loss": 0.1608, "step": 3647 }, { "epoch": 0.08038473615495216, "grad_norm": 1.2346562147140503, "learning_rate": 2.9800809063720404e-05, "loss": 0.1098, "step": 3648 }, { "epoch": 0.08040677144446831, "grad_norm": 1.6145538091659546, "learning_rate": 2.9800635142469823e-05, "loss": 0.1561, "step": 3649 }, { "epoch": 0.08042880673398448, "grad_norm": 1.8949456214904785, "learning_rate": 2.980046114583167e-05, "loss": 0.1909, "step": 3650 }, { "epoch": 0.08045084202350064, "grad_norm": 1.4934349060058594, "learning_rate": 2.9800287073806837e-05, "loss": 0.1535, "step": 3651 }, { "epoch": 0.08047287731301679, "grad_norm": 1.461346983909607, "learning_rate": 2.9800112926396196e-05, "loss": 0.1884, "step": 3652 }, { "epoch": 0.08049491260253296, "grad_norm": 1.1091657876968384, "learning_rate": 2.9799938703600653e-05, "loss": 0.1346, "step": 3653 }, { "epoch": 0.08051694789204912, "grad_norm": 1.6407588720321655, "learning_rate": 2.9799764405421082e-05, "loss": 0.1419, "step": 3654 }, { "epoch": 0.08053898318156527, "grad_norm": 1.4218682050704956, "learning_rate": 2.9799590031858374e-05, "loss": 0.1491, "step": 3655 }, { "epoch": 0.08056101847108144, "grad_norm": 1.4764026403427124, "learning_rate": 2.979941558291342e-05, "loss": 0.1137, "step": 3656 }, { "epoch": 0.0805830537605976, "grad_norm": 1.157416820526123, "learning_rate": 2.97992410585871e-05, "loss": 0.1259, "step": 3657 }, { "epoch": 0.08060508905011375, "grad_norm": 1.604703664779663, "learning_rate": 2.9799066458880314e-05, "loss": 0.1777, "step": 3658 }, { "epoch": 0.08062712433962992, "grad_norm": 1.2297924757003784, "learning_rate": 2.9798891783793947e-05, "loss": 0.1053, "step": 3659 }, { "epoch": 0.08064915962914608, "grad_norm": 1.3094375133514404, "learning_rate": 2.979871703332889e-05, "loss": 0.1424, "step": 3660 }, { "epoch": 0.08067119491866223, "grad_norm": 2.2137861251831055, "learning_rate": 2.9798542207486027e-05, "loss": 0.1595, "step": 3661 }, { "epoch": 0.0806932302081784, "grad_norm": 1.193697452545166, "learning_rate": 2.9798367306266252e-05, "loss": 0.1312, "step": 3662 }, { "epoch": 0.08071526549769456, "grad_norm": 1.4320729970932007, "learning_rate": 2.9798192329670458e-05, "loss": 0.1535, "step": 3663 }, { "epoch": 0.08073730078721071, "grad_norm": 1.280672550201416, "learning_rate": 2.979801727769954e-05, "loss": 0.1213, "step": 3664 }, { "epoch": 0.08075933607672688, "grad_norm": 1.074130654335022, "learning_rate": 2.9797842150354382e-05, "loss": 0.1134, "step": 3665 }, { "epoch": 0.08078137136624304, "grad_norm": 1.139928936958313, "learning_rate": 2.979766694763588e-05, "loss": 0.1534, "step": 3666 }, { "epoch": 0.0808034066557592, "grad_norm": 1.6598447561264038, "learning_rate": 2.979749166954492e-05, "loss": 0.1574, "step": 3667 }, { "epoch": 0.08082544194527536, "grad_norm": 1.0150073766708374, "learning_rate": 2.9797316316082403e-05, "loss": 0.1705, "step": 3668 }, { "epoch": 0.08084747723479152, "grad_norm": 2.009519338607788, "learning_rate": 2.979714088724922e-05, "loss": 0.1944, "step": 3669 }, { "epoch": 0.08086951252430769, "grad_norm": 1.1277328729629517, "learning_rate": 2.9796965383046263e-05, "loss": 0.1515, "step": 3670 }, { "epoch": 0.08089154781382384, "grad_norm": 1.223144769668579, "learning_rate": 2.979678980347443e-05, "loss": 0.1247, "step": 3671 }, { "epoch": 0.08091358310334, "grad_norm": 1.1170254945755005, "learning_rate": 2.9796614148534604e-05, "loss": 0.1201, "step": 3672 }, { "epoch": 0.08093561839285617, "grad_norm": 1.249659776687622, "learning_rate": 2.979643841822769e-05, "loss": 0.1624, "step": 3673 }, { "epoch": 0.08095765368237232, "grad_norm": 1.1825538873672485, "learning_rate": 2.9796262612554587e-05, "loss": 0.1644, "step": 3674 }, { "epoch": 0.08097968897188848, "grad_norm": 1.287843108177185, "learning_rate": 2.979608673151618e-05, "loss": 0.1582, "step": 3675 }, { "epoch": 0.08100172426140465, "grad_norm": 1.126434564590454, "learning_rate": 2.9795910775113366e-05, "loss": 0.1527, "step": 3676 }, { "epoch": 0.0810237595509208, "grad_norm": 1.413602352142334, "learning_rate": 2.9795734743347048e-05, "loss": 0.133, "step": 3677 }, { "epoch": 0.08104579484043696, "grad_norm": 8.166459083557129, "learning_rate": 2.9795558636218117e-05, "loss": 0.1551, "step": 3678 }, { "epoch": 0.08106783012995313, "grad_norm": 1.380866289138794, "learning_rate": 2.9795382453727473e-05, "loss": 0.1296, "step": 3679 }, { "epoch": 0.08108986541946928, "grad_norm": 1.6743242740631104, "learning_rate": 2.979520619587601e-05, "loss": 0.1712, "step": 3680 }, { "epoch": 0.08111190070898544, "grad_norm": 3.4570693969726562, "learning_rate": 2.979502986266463e-05, "loss": 0.1779, "step": 3681 }, { "epoch": 0.0811339359985016, "grad_norm": 1.3012171983718872, "learning_rate": 2.979485345409423e-05, "loss": 0.1269, "step": 3682 }, { "epoch": 0.08115597128801776, "grad_norm": 2.517420768737793, "learning_rate": 2.979467697016571e-05, "loss": 0.1492, "step": 3683 }, { "epoch": 0.08117800657753392, "grad_norm": 1.2252609729766846, "learning_rate": 2.979450041087996e-05, "loss": 0.1477, "step": 3684 }, { "epoch": 0.08120004186705009, "grad_norm": 1.744156837463379, "learning_rate": 2.979432377623789e-05, "loss": 0.1559, "step": 3685 }, { "epoch": 0.08122207715656624, "grad_norm": 1.8646531105041504, "learning_rate": 2.9794147066240397e-05, "loss": 0.1541, "step": 3686 }, { "epoch": 0.0812441124460824, "grad_norm": 1.80894935131073, "learning_rate": 2.9793970280888374e-05, "loss": 0.1558, "step": 3687 }, { "epoch": 0.08126614773559857, "grad_norm": 1.701719880104065, "learning_rate": 2.979379342018273e-05, "loss": 0.1766, "step": 3688 }, { "epoch": 0.08128818302511472, "grad_norm": 1.046486735343933, "learning_rate": 2.9793616484124367e-05, "loss": 0.1578, "step": 3689 }, { "epoch": 0.08131021831463088, "grad_norm": 1.509598731994629, "learning_rate": 2.979343947271418e-05, "loss": 0.1568, "step": 3690 }, { "epoch": 0.08133225360414705, "grad_norm": 1.1506160497665405, "learning_rate": 2.9793262385953067e-05, "loss": 0.1569, "step": 3691 }, { "epoch": 0.0813542888936632, "grad_norm": 1.375615119934082, "learning_rate": 2.9793085223841945e-05, "loss": 0.1586, "step": 3692 }, { "epoch": 0.08137632418317936, "grad_norm": 1.3297773599624634, "learning_rate": 2.97929079863817e-05, "loss": 0.1513, "step": 3693 }, { "epoch": 0.08139835947269553, "grad_norm": 1.5348035097122192, "learning_rate": 2.979273067357325e-05, "loss": 0.1611, "step": 3694 }, { "epoch": 0.08142039476221168, "grad_norm": 1.6325260400772095, "learning_rate": 2.9792553285417484e-05, "loss": 0.1636, "step": 3695 }, { "epoch": 0.08144243005172784, "grad_norm": 2.1680705547332764, "learning_rate": 2.979237582191531e-05, "loss": 0.1424, "step": 3696 }, { "epoch": 0.081464465341244, "grad_norm": 1.1905701160430908, "learning_rate": 2.9792198283067637e-05, "loss": 0.1409, "step": 3697 }, { "epoch": 0.08148650063076016, "grad_norm": 2.074068546295166, "learning_rate": 2.979202066887537e-05, "loss": 0.1572, "step": 3698 }, { "epoch": 0.08150853592027632, "grad_norm": 1.1137301921844482, "learning_rate": 2.9791842979339402e-05, "loss": 0.1523, "step": 3699 }, { "epoch": 0.08153057120979249, "grad_norm": 0.9988977909088135, "learning_rate": 2.9791665214460654e-05, "loss": 0.1571, "step": 3700 }, { "epoch": 0.08155260649930865, "grad_norm": 1.389758825302124, "learning_rate": 2.9791487374240016e-05, "loss": 0.1315, "step": 3701 }, { "epoch": 0.0815746417888248, "grad_norm": 1.3642151355743408, "learning_rate": 2.9791309458678402e-05, "loss": 0.1691, "step": 3702 }, { "epoch": 0.08159667707834097, "grad_norm": 1.4399672746658325, "learning_rate": 2.9791131467776718e-05, "loss": 0.159, "step": 3703 }, { "epoch": 0.08161871236785713, "grad_norm": 1.5657405853271484, "learning_rate": 2.979095340153587e-05, "loss": 0.1522, "step": 3704 }, { "epoch": 0.08164074765737328, "grad_norm": 1.2161178588867188, "learning_rate": 2.9790775259956767e-05, "loss": 0.1845, "step": 3705 }, { "epoch": 0.08166278294688945, "grad_norm": 1.4436615705490112, "learning_rate": 2.979059704304031e-05, "loss": 0.0981, "step": 3706 }, { "epoch": 0.08168481823640561, "grad_norm": 1.1389659643173218, "learning_rate": 2.9790418750787417e-05, "loss": 0.1597, "step": 3707 }, { "epoch": 0.08170685352592176, "grad_norm": 1.439871072769165, "learning_rate": 2.9790240383198984e-05, "loss": 0.1561, "step": 3708 }, { "epoch": 0.08172888881543793, "grad_norm": 1.5837923288345337, "learning_rate": 2.9790061940275924e-05, "loss": 0.1664, "step": 3709 }, { "epoch": 0.08175092410495409, "grad_norm": 1.6658506393432617, "learning_rate": 2.9789883422019154e-05, "loss": 0.1532, "step": 3710 }, { "epoch": 0.08177295939447024, "grad_norm": 1.4519520998001099, "learning_rate": 2.9789704828429574e-05, "loss": 0.1721, "step": 3711 }, { "epoch": 0.0817949946839864, "grad_norm": 1.9028972387313843, "learning_rate": 2.9789526159508096e-05, "loss": 0.1457, "step": 3712 }, { "epoch": 0.08181702997350257, "grad_norm": 1.5229545831680298, "learning_rate": 2.978934741525563e-05, "loss": 0.1623, "step": 3713 }, { "epoch": 0.08183906526301872, "grad_norm": 2.7569687366485596, "learning_rate": 2.978916859567309e-05, "loss": 0.1804, "step": 3714 }, { "epoch": 0.08186110055253489, "grad_norm": 1.2425538301467896, "learning_rate": 2.978898970076138e-05, "loss": 0.1478, "step": 3715 }, { "epoch": 0.08188313584205105, "grad_norm": 1.627504587173462, "learning_rate": 2.9788810730521416e-05, "loss": 0.1523, "step": 3716 }, { "epoch": 0.0819051711315672, "grad_norm": 1.5914452075958252, "learning_rate": 2.9788631684954107e-05, "loss": 0.1704, "step": 3717 }, { "epoch": 0.08192720642108337, "grad_norm": 1.5175235271453857, "learning_rate": 2.978845256406037e-05, "loss": 0.1278, "step": 3718 }, { "epoch": 0.08194924171059953, "grad_norm": 1.13139009475708, "learning_rate": 2.9788273367841113e-05, "loss": 0.1077, "step": 3719 }, { "epoch": 0.08197127700011568, "grad_norm": 1.544532060623169, "learning_rate": 2.9788094096297252e-05, "loss": 0.1945, "step": 3720 }, { "epoch": 0.08199331228963185, "grad_norm": 1.4610925912857056, "learning_rate": 2.9787914749429695e-05, "loss": 0.1518, "step": 3721 }, { "epoch": 0.08201534757914801, "grad_norm": 2.1833765506744385, "learning_rate": 2.9787735327239356e-05, "loss": 0.1808, "step": 3722 }, { "epoch": 0.08203738286866416, "grad_norm": 1.4178037643432617, "learning_rate": 2.9787555829727156e-05, "loss": 0.1674, "step": 3723 }, { "epoch": 0.08205941815818033, "grad_norm": 1.3852635622024536, "learning_rate": 2.9787376256894e-05, "loss": 0.1749, "step": 3724 }, { "epoch": 0.08208145344769649, "grad_norm": 1.4651329517364502, "learning_rate": 2.978719660874081e-05, "loss": 0.202, "step": 3725 }, { "epoch": 0.08210348873721264, "grad_norm": 1.731573462486267, "learning_rate": 2.9787016885268497e-05, "loss": 0.1616, "step": 3726 }, { "epoch": 0.0821255240267288, "grad_norm": 3.1613945960998535, "learning_rate": 2.978683708647798e-05, "loss": 0.1752, "step": 3727 }, { "epoch": 0.08214755931624497, "grad_norm": 1.7166757583618164, "learning_rate": 2.978665721237017e-05, "loss": 0.1037, "step": 3728 }, { "epoch": 0.08216959460576112, "grad_norm": 1.1400035619735718, "learning_rate": 2.978647726294598e-05, "loss": 0.1278, "step": 3729 }, { "epoch": 0.08219162989527729, "grad_norm": 1.7633854150772095, "learning_rate": 2.978629723820634e-05, "loss": 0.1718, "step": 3730 }, { "epoch": 0.08221366518479345, "grad_norm": 1.1552895307540894, "learning_rate": 2.9786117138152157e-05, "loss": 0.1761, "step": 3731 }, { "epoch": 0.0822357004743096, "grad_norm": 1.684139609336853, "learning_rate": 2.978593696278435e-05, "loss": 0.141, "step": 3732 }, { "epoch": 0.08225773576382577, "grad_norm": 1.2241828441619873, "learning_rate": 2.978575671210384e-05, "loss": 0.2025, "step": 3733 }, { "epoch": 0.08227977105334193, "grad_norm": 1.319454312324524, "learning_rate": 2.978557638611154e-05, "loss": 0.1702, "step": 3734 }, { "epoch": 0.08230180634285808, "grad_norm": 1.4172338247299194, "learning_rate": 2.978539598480837e-05, "loss": 0.1866, "step": 3735 }, { "epoch": 0.08232384163237425, "grad_norm": 1.5901979207992554, "learning_rate": 2.978521550819525e-05, "loss": 0.159, "step": 3736 }, { "epoch": 0.08234587692189041, "grad_norm": 1.2994614839553833, "learning_rate": 2.97850349562731e-05, "loss": 0.1806, "step": 3737 }, { "epoch": 0.08236791221140657, "grad_norm": 1.8589860200881958, "learning_rate": 2.9784854329042838e-05, "loss": 0.1638, "step": 3738 }, { "epoch": 0.08238994750092273, "grad_norm": 1.4300291538238525, "learning_rate": 2.9784673626505384e-05, "loss": 0.1401, "step": 3739 }, { "epoch": 0.08241198279043889, "grad_norm": 1.6516903638839722, "learning_rate": 2.978449284866166e-05, "loss": 0.1536, "step": 3740 }, { "epoch": 0.08243401807995505, "grad_norm": 1.6891610622406006, "learning_rate": 2.9784311995512584e-05, "loss": 0.1526, "step": 3741 }, { "epoch": 0.0824560533694712, "grad_norm": 1.5143210887908936, "learning_rate": 2.9784131067059083e-05, "loss": 0.1655, "step": 3742 }, { "epoch": 0.08247808865898737, "grad_norm": 1.2309550046920776, "learning_rate": 2.978395006330207e-05, "loss": 0.1778, "step": 3743 }, { "epoch": 0.08250012394850353, "grad_norm": 1.8251270055770874, "learning_rate": 2.9783768984242473e-05, "loss": 0.1559, "step": 3744 }, { "epoch": 0.08252215923801969, "grad_norm": 1.132058024406433, "learning_rate": 2.9783587829881216e-05, "loss": 0.1431, "step": 3745 }, { "epoch": 0.08254419452753585, "grad_norm": 1.062276840209961, "learning_rate": 2.9783406600219215e-05, "loss": 0.1269, "step": 3746 }, { "epoch": 0.08256622981705201, "grad_norm": 1.4523646831512451, "learning_rate": 2.9783225295257398e-05, "loss": 0.1279, "step": 3747 }, { "epoch": 0.08258826510656816, "grad_norm": 1.1794203519821167, "learning_rate": 2.9783043914996686e-05, "loss": 0.1797, "step": 3748 }, { "epoch": 0.08261030039608433, "grad_norm": 2.03406023979187, "learning_rate": 2.9782862459438004e-05, "loss": 0.1958, "step": 3749 }, { "epoch": 0.0826323356856005, "grad_norm": 1.7473907470703125, "learning_rate": 2.9782680928582278e-05, "loss": 0.17, "step": 3750 }, { "epoch": 0.08265437097511664, "grad_norm": 1.2960560321807861, "learning_rate": 2.9782499322430436e-05, "loss": 0.1563, "step": 3751 }, { "epoch": 0.08267640626463281, "grad_norm": 1.1250571012496948, "learning_rate": 2.9782317640983388e-05, "loss": 0.1174, "step": 3752 }, { "epoch": 0.08269844155414897, "grad_norm": 1.0632206201553345, "learning_rate": 2.9782135884242074e-05, "loss": 0.1541, "step": 3753 }, { "epoch": 0.08272047684366512, "grad_norm": 1.6131271123886108, "learning_rate": 2.9781954052207416e-05, "loss": 0.1777, "step": 3754 }, { "epoch": 0.08274251213318129, "grad_norm": 1.1151409149169922, "learning_rate": 2.9781772144880337e-05, "loss": 0.1399, "step": 3755 }, { "epoch": 0.08276454742269745, "grad_norm": 1.7933396100997925, "learning_rate": 2.9781590162261764e-05, "loss": 0.1712, "step": 3756 }, { "epoch": 0.0827865827122136, "grad_norm": 2.3192172050476074, "learning_rate": 2.978140810435263e-05, "loss": 0.1334, "step": 3757 }, { "epoch": 0.08280861800172977, "grad_norm": 2.0077152252197266, "learning_rate": 2.9781225971153856e-05, "loss": 0.1787, "step": 3758 }, { "epoch": 0.08283065329124593, "grad_norm": 1.3040574789047241, "learning_rate": 2.9781043762666374e-05, "loss": 0.1326, "step": 3759 }, { "epoch": 0.08285268858076208, "grad_norm": 1.0957655906677246, "learning_rate": 2.9780861478891108e-05, "loss": 0.1726, "step": 3760 }, { "epoch": 0.08287472387027825, "grad_norm": 2.22282338142395, "learning_rate": 2.9780679119828992e-05, "loss": 0.214, "step": 3761 }, { "epoch": 0.08289675915979441, "grad_norm": 1.312520980834961, "learning_rate": 2.9780496685480945e-05, "loss": 0.1701, "step": 3762 }, { "epoch": 0.08291879444931056, "grad_norm": 0.9316908717155457, "learning_rate": 2.9780314175847908e-05, "loss": 0.1511, "step": 3763 }, { "epoch": 0.08294082973882673, "grad_norm": 1.7560734748840332, "learning_rate": 2.97801315909308e-05, "loss": 0.1676, "step": 3764 }, { "epoch": 0.0829628650283429, "grad_norm": 2.299623727798462, "learning_rate": 2.9779948930730557e-05, "loss": 0.1502, "step": 3765 }, { "epoch": 0.08298490031785904, "grad_norm": 1.6898738145828247, "learning_rate": 2.9779766195248114e-05, "loss": 0.2178, "step": 3766 }, { "epoch": 0.08300693560737521, "grad_norm": 1.6196638345718384, "learning_rate": 2.977958338448439e-05, "loss": 0.2065, "step": 3767 }, { "epoch": 0.08302897089689137, "grad_norm": 1.081045150756836, "learning_rate": 2.9779400498440324e-05, "loss": 0.1351, "step": 3768 }, { "epoch": 0.08305100618640752, "grad_norm": 1.5362943410873413, "learning_rate": 2.9779217537116847e-05, "loss": 0.1237, "step": 3769 }, { "epoch": 0.08307304147592369, "grad_norm": 1.6933883428573608, "learning_rate": 2.977903450051489e-05, "loss": 0.1437, "step": 3770 }, { "epoch": 0.08309507676543985, "grad_norm": 1.4016588926315308, "learning_rate": 2.9778851388635386e-05, "loss": 0.1335, "step": 3771 }, { "epoch": 0.08311711205495602, "grad_norm": 1.7430317401885986, "learning_rate": 2.9778668201479268e-05, "loss": 0.1662, "step": 3772 }, { "epoch": 0.08313914734447217, "grad_norm": 1.5007545948028564, "learning_rate": 2.9778484939047464e-05, "loss": 0.1649, "step": 3773 }, { "epoch": 0.08316118263398833, "grad_norm": 1.8314929008483887, "learning_rate": 2.977830160134091e-05, "loss": 0.185, "step": 3774 }, { "epoch": 0.0831832179235045, "grad_norm": 2.424666404724121, "learning_rate": 2.9778118188360542e-05, "loss": 0.1789, "step": 3775 }, { "epoch": 0.08320525321302065, "grad_norm": 1.1102077960968018, "learning_rate": 2.9777934700107298e-05, "loss": 0.1611, "step": 3776 }, { "epoch": 0.08322728850253681, "grad_norm": 1.665010929107666, "learning_rate": 2.9777751136582105e-05, "loss": 0.2156, "step": 3777 }, { "epoch": 0.08324932379205298, "grad_norm": 1.5602840185165405, "learning_rate": 2.9777567497785902e-05, "loss": 0.1653, "step": 3778 }, { "epoch": 0.08327135908156913, "grad_norm": 1.3345590829849243, "learning_rate": 2.9777383783719624e-05, "loss": 0.1595, "step": 3779 }, { "epoch": 0.0832933943710853, "grad_norm": 1.4514285326004028, "learning_rate": 2.9777199994384202e-05, "loss": 0.1664, "step": 3780 }, { "epoch": 0.08331542966060146, "grad_norm": 1.529732584953308, "learning_rate": 2.977701612978058e-05, "loss": 0.1984, "step": 3781 }, { "epoch": 0.08333746495011761, "grad_norm": 1.8013964891433716, "learning_rate": 2.977683218990969e-05, "loss": 0.167, "step": 3782 }, { "epoch": 0.08335950023963377, "grad_norm": 1.1392018795013428, "learning_rate": 2.977664817477247e-05, "loss": 0.1318, "step": 3783 }, { "epoch": 0.08338153552914994, "grad_norm": 1.24851393699646, "learning_rate": 2.9776464084369855e-05, "loss": 0.1569, "step": 3784 }, { "epoch": 0.08340357081866609, "grad_norm": 1.2008720636367798, "learning_rate": 2.9776279918702788e-05, "loss": 0.1183, "step": 3785 }, { "epoch": 0.08342560610818225, "grad_norm": 1.539087176322937, "learning_rate": 2.97760956777722e-05, "loss": 0.1446, "step": 3786 }, { "epoch": 0.08344764139769842, "grad_norm": 1.0971620082855225, "learning_rate": 2.9775911361579037e-05, "loss": 0.102, "step": 3787 }, { "epoch": 0.08346967668721457, "grad_norm": 1.595312476158142, "learning_rate": 2.977572697012423e-05, "loss": 0.1631, "step": 3788 }, { "epoch": 0.08349171197673073, "grad_norm": 1.5031908750534058, "learning_rate": 2.9775542503408724e-05, "loss": 0.1563, "step": 3789 }, { "epoch": 0.0835137472662469, "grad_norm": 1.4984816312789917, "learning_rate": 2.9775357961433456e-05, "loss": 0.1557, "step": 3790 }, { "epoch": 0.08353578255576305, "grad_norm": 1.8884332180023193, "learning_rate": 2.977517334419937e-05, "loss": 0.1595, "step": 3791 }, { "epoch": 0.08355781784527921, "grad_norm": 1.6790733337402344, "learning_rate": 2.97749886517074e-05, "loss": 0.0897, "step": 3792 }, { "epoch": 0.08357985313479538, "grad_norm": 1.1722772121429443, "learning_rate": 2.977480388395849e-05, "loss": 0.1178, "step": 3793 }, { "epoch": 0.08360188842431153, "grad_norm": 1.4062429666519165, "learning_rate": 2.9774619040953582e-05, "loss": 0.1321, "step": 3794 }, { "epoch": 0.0836239237138277, "grad_norm": 1.4524927139282227, "learning_rate": 2.9774434122693617e-05, "loss": 0.133, "step": 3795 }, { "epoch": 0.08364595900334386, "grad_norm": 1.1788667440414429, "learning_rate": 2.9774249129179536e-05, "loss": 0.1439, "step": 3796 }, { "epoch": 0.08366799429286001, "grad_norm": 1.3895992040634155, "learning_rate": 2.9774064060412282e-05, "loss": 0.1416, "step": 3797 }, { "epoch": 0.08369002958237617, "grad_norm": 1.602611780166626, "learning_rate": 2.9773878916392792e-05, "loss": 0.1572, "step": 3798 }, { "epoch": 0.08371206487189234, "grad_norm": 1.323885202407837, "learning_rate": 2.9773693697122017e-05, "loss": 0.1317, "step": 3799 }, { "epoch": 0.08373410016140849, "grad_norm": 1.4390650987625122, "learning_rate": 2.9773508402600903e-05, "loss": 0.1201, "step": 3800 }, { "epoch": 0.08375613545092465, "grad_norm": 1.2269668579101562, "learning_rate": 2.9773323032830385e-05, "loss": 0.1494, "step": 3801 }, { "epoch": 0.08377817074044082, "grad_norm": 1.4060125350952148, "learning_rate": 2.977313758781141e-05, "loss": 0.148, "step": 3802 }, { "epoch": 0.08380020602995697, "grad_norm": 1.1105129718780518, "learning_rate": 2.977295206754492e-05, "loss": 0.1502, "step": 3803 }, { "epoch": 0.08382224131947313, "grad_norm": 1.6016513109207153, "learning_rate": 2.977276647203187e-05, "loss": 0.1451, "step": 3804 }, { "epoch": 0.0838442766089893, "grad_norm": 1.5972620248794556, "learning_rate": 2.9772580801273194e-05, "loss": 0.142, "step": 3805 }, { "epoch": 0.08386631189850546, "grad_norm": 1.8336368799209595, "learning_rate": 2.9772395055269845e-05, "loss": 0.1739, "step": 3806 }, { "epoch": 0.08388834718802161, "grad_norm": 1.3788447380065918, "learning_rate": 2.9772209234022766e-05, "loss": 0.107, "step": 3807 }, { "epoch": 0.08391038247753778, "grad_norm": 1.666706919670105, "learning_rate": 2.97720233375329e-05, "loss": 0.2153, "step": 3808 }, { "epoch": 0.08393241776705394, "grad_norm": 1.2634135484695435, "learning_rate": 2.9771837365801203e-05, "loss": 0.1658, "step": 3809 }, { "epoch": 0.0839544530565701, "grad_norm": 1.659598469734192, "learning_rate": 2.9771651318828613e-05, "loss": 0.1879, "step": 3810 }, { "epoch": 0.08397648834608626, "grad_norm": 1.0179212093353271, "learning_rate": 2.9771465196616085e-05, "loss": 0.1445, "step": 3811 }, { "epoch": 0.08399852363560242, "grad_norm": 1.5400065183639526, "learning_rate": 2.9771278999164565e-05, "loss": 0.1407, "step": 3812 }, { "epoch": 0.08402055892511857, "grad_norm": 1.6653939485549927, "learning_rate": 2.9771092726474994e-05, "loss": 0.1761, "step": 3813 }, { "epoch": 0.08404259421463474, "grad_norm": 1.0085581541061401, "learning_rate": 2.9770906378548328e-05, "loss": 0.1525, "step": 3814 }, { "epoch": 0.0840646295041509, "grad_norm": 1.0362865924835205, "learning_rate": 2.977071995538552e-05, "loss": 0.1101, "step": 3815 }, { "epoch": 0.08408666479366705, "grad_norm": 1.034735918045044, "learning_rate": 2.977053345698751e-05, "loss": 0.1007, "step": 3816 }, { "epoch": 0.08410870008318322, "grad_norm": 2.3333325386047363, "learning_rate": 2.9770346883355253e-05, "loss": 0.1553, "step": 3817 }, { "epoch": 0.08413073537269938, "grad_norm": 1.5664705038070679, "learning_rate": 2.9770160234489703e-05, "loss": 0.1278, "step": 3818 }, { "epoch": 0.08415277066221553, "grad_norm": 1.3183326721191406, "learning_rate": 2.9769973510391802e-05, "loss": 0.1922, "step": 3819 }, { "epoch": 0.0841748059517317, "grad_norm": 1.6856589317321777, "learning_rate": 2.9769786711062508e-05, "loss": 0.1543, "step": 3820 }, { "epoch": 0.08419684124124786, "grad_norm": 1.8691505193710327, "learning_rate": 2.9769599836502767e-05, "loss": 0.2332, "step": 3821 }, { "epoch": 0.08421887653076401, "grad_norm": 1.5195584297180176, "learning_rate": 2.976941288671354e-05, "loss": 0.1676, "step": 3822 }, { "epoch": 0.08424091182028018, "grad_norm": 1.4195653200149536, "learning_rate": 2.9769225861695766e-05, "loss": 0.0997, "step": 3823 }, { "epoch": 0.08426294710979634, "grad_norm": 1.5313047170639038, "learning_rate": 2.976903876145041e-05, "loss": 0.1076, "step": 3824 }, { "epoch": 0.0842849823993125, "grad_norm": 1.458722710609436, "learning_rate": 2.9768851585978418e-05, "loss": 0.1331, "step": 3825 }, { "epoch": 0.08430701768882866, "grad_norm": 1.753434419631958, "learning_rate": 2.9768664335280747e-05, "loss": 0.1406, "step": 3826 }, { "epoch": 0.08432905297834482, "grad_norm": 1.6398035287857056, "learning_rate": 2.9768477009358347e-05, "loss": 0.1688, "step": 3827 }, { "epoch": 0.08435108826786097, "grad_norm": 1.3682206869125366, "learning_rate": 2.976828960821217e-05, "loss": 0.141, "step": 3828 }, { "epoch": 0.08437312355737714, "grad_norm": 2.249514579772949, "learning_rate": 2.976810213184318e-05, "loss": 0.1418, "step": 3829 }, { "epoch": 0.0843951588468933, "grad_norm": 1.833763599395752, "learning_rate": 2.9767914580252326e-05, "loss": 0.2073, "step": 3830 }, { "epoch": 0.08441719413640945, "grad_norm": 0.9152593612670898, "learning_rate": 2.9767726953440564e-05, "loss": 0.1306, "step": 3831 }, { "epoch": 0.08443922942592562, "grad_norm": 2.0224428176879883, "learning_rate": 2.9767539251408848e-05, "loss": 0.1964, "step": 3832 }, { "epoch": 0.08446126471544178, "grad_norm": 1.435188889503479, "learning_rate": 2.976735147415814e-05, "loss": 0.1625, "step": 3833 }, { "epoch": 0.08448330000495793, "grad_norm": 1.2391259670257568, "learning_rate": 2.9767163621689383e-05, "loss": 0.1379, "step": 3834 }, { "epoch": 0.0845053352944741, "grad_norm": 1.6364551782608032, "learning_rate": 2.976697569400355e-05, "loss": 0.1957, "step": 3835 }, { "epoch": 0.08452737058399026, "grad_norm": 1.2775946855545044, "learning_rate": 2.976678769110159e-05, "loss": 0.0898, "step": 3836 }, { "epoch": 0.08454940587350641, "grad_norm": 1.3639332056045532, "learning_rate": 2.976659961298446e-05, "loss": 0.1378, "step": 3837 }, { "epoch": 0.08457144116302258, "grad_norm": 1.4393291473388672, "learning_rate": 2.9766411459653122e-05, "loss": 0.2198, "step": 3838 }, { "epoch": 0.08459347645253874, "grad_norm": 1.1808191537857056, "learning_rate": 2.976622323110853e-05, "loss": 0.1405, "step": 3839 }, { "epoch": 0.0846155117420549, "grad_norm": 1.4649548530578613, "learning_rate": 2.9766034927351643e-05, "loss": 0.1636, "step": 3840 }, { "epoch": 0.08463754703157106, "grad_norm": 1.210465908050537, "learning_rate": 2.9765846548383426e-05, "loss": 0.1559, "step": 3841 }, { "epoch": 0.08465958232108722, "grad_norm": 0.9852312207221985, "learning_rate": 2.9765658094204836e-05, "loss": 0.1357, "step": 3842 }, { "epoch": 0.08468161761060339, "grad_norm": 1.139630913734436, "learning_rate": 2.976546956481683e-05, "loss": 0.1634, "step": 3843 }, { "epoch": 0.08470365290011954, "grad_norm": 1.5497859716415405, "learning_rate": 2.9765280960220367e-05, "loss": 0.1464, "step": 3844 }, { "epoch": 0.0847256881896357, "grad_norm": 1.2496073246002197, "learning_rate": 2.976509228041641e-05, "loss": 0.1447, "step": 3845 }, { "epoch": 0.08474772347915187, "grad_norm": 0.9847736358642578, "learning_rate": 2.976490352540592e-05, "loss": 0.103, "step": 3846 }, { "epoch": 0.08476975876866802, "grad_norm": 1.7569773197174072, "learning_rate": 2.9764714695189862e-05, "loss": 0.1432, "step": 3847 }, { "epoch": 0.08479179405818418, "grad_norm": 1.392345905303955, "learning_rate": 2.9764525789769194e-05, "loss": 0.1258, "step": 3848 }, { "epoch": 0.08481382934770035, "grad_norm": 1.0169273614883423, "learning_rate": 2.9764336809144876e-05, "loss": 0.1438, "step": 3849 }, { "epoch": 0.0848358646372165, "grad_norm": 1.6104726791381836, "learning_rate": 2.9764147753317876e-05, "loss": 0.1671, "step": 3850 }, { "epoch": 0.08485789992673266, "grad_norm": 1.2148463726043701, "learning_rate": 2.9763958622289157e-05, "loss": 0.0909, "step": 3851 }, { "epoch": 0.08487993521624883, "grad_norm": 1.1119916439056396, "learning_rate": 2.9763769416059673e-05, "loss": 0.1234, "step": 3852 }, { "epoch": 0.08490197050576498, "grad_norm": 1.0799144506454468, "learning_rate": 2.9763580134630398e-05, "loss": 0.1121, "step": 3853 }, { "epoch": 0.08492400579528114, "grad_norm": 1.5639930963516235, "learning_rate": 2.976339077800229e-05, "loss": 0.1658, "step": 3854 }, { "epoch": 0.08494604108479731, "grad_norm": 1.113200068473816, "learning_rate": 2.9763201346176317e-05, "loss": 0.1086, "step": 3855 }, { "epoch": 0.08496807637431346, "grad_norm": 1.5429118871688843, "learning_rate": 2.9763011839153447e-05, "loss": 0.133, "step": 3856 }, { "epoch": 0.08499011166382962, "grad_norm": 1.161955714225769, "learning_rate": 2.976282225693464e-05, "loss": 0.1179, "step": 3857 }, { "epoch": 0.08501214695334579, "grad_norm": 1.6156373023986816, "learning_rate": 2.976263259952086e-05, "loss": 0.1697, "step": 3858 }, { "epoch": 0.08503418224286194, "grad_norm": 1.0500346422195435, "learning_rate": 2.9762442866913075e-05, "loss": 0.1398, "step": 3859 }, { "epoch": 0.0850562175323781, "grad_norm": 1.5031181573867798, "learning_rate": 2.9762253059112257e-05, "loss": 0.1543, "step": 3860 }, { "epoch": 0.08507825282189427, "grad_norm": 1.7657575607299805, "learning_rate": 2.9762063176119362e-05, "loss": 0.1496, "step": 3861 }, { "epoch": 0.08510028811141042, "grad_norm": 1.6537448167800903, "learning_rate": 2.9761873217935367e-05, "loss": 0.1622, "step": 3862 }, { "epoch": 0.08512232340092658, "grad_norm": 0.9599390625953674, "learning_rate": 2.9761683184561236e-05, "loss": 0.1207, "step": 3863 }, { "epoch": 0.08514435869044275, "grad_norm": 1.5175302028656006, "learning_rate": 2.9761493075997938e-05, "loss": 0.1233, "step": 3864 }, { "epoch": 0.0851663939799589, "grad_norm": 1.3358761072158813, "learning_rate": 2.976130289224644e-05, "loss": 0.1897, "step": 3865 }, { "epoch": 0.08518842926947506, "grad_norm": 1.3353630304336548, "learning_rate": 2.9761112633307705e-05, "loss": 0.1256, "step": 3866 }, { "epoch": 0.08521046455899123, "grad_norm": 1.244646430015564, "learning_rate": 2.976092229918271e-05, "loss": 0.158, "step": 3867 }, { "epoch": 0.08523249984850738, "grad_norm": 1.3607194423675537, "learning_rate": 2.9760731889872424e-05, "loss": 0.1716, "step": 3868 }, { "epoch": 0.08525453513802354, "grad_norm": 1.6431573629379272, "learning_rate": 2.9760541405377818e-05, "loss": 0.1608, "step": 3869 }, { "epoch": 0.08527657042753971, "grad_norm": 1.5320638418197632, "learning_rate": 2.9760350845699853e-05, "loss": 0.136, "step": 3870 }, { "epoch": 0.08529860571705586, "grad_norm": 1.1446245908737183, "learning_rate": 2.976016021083951e-05, "loss": 0.1638, "step": 3871 }, { "epoch": 0.08532064100657202, "grad_norm": 0.8866466283798218, "learning_rate": 2.9759969500797756e-05, "loss": 0.1052, "step": 3872 }, { "epoch": 0.08534267629608819, "grad_norm": 1.6609094142913818, "learning_rate": 2.9759778715575564e-05, "loss": 0.1751, "step": 3873 }, { "epoch": 0.08536471158560434, "grad_norm": 0.984656572341919, "learning_rate": 2.97595878551739e-05, "loss": 0.1529, "step": 3874 }, { "epoch": 0.0853867468751205, "grad_norm": 1.2665737867355347, "learning_rate": 2.975939691959374e-05, "loss": 0.1395, "step": 3875 }, { "epoch": 0.08540878216463667, "grad_norm": 0.8077554702758789, "learning_rate": 2.9759205908836057e-05, "loss": 0.1143, "step": 3876 }, { "epoch": 0.08543081745415283, "grad_norm": 0.943924605846405, "learning_rate": 2.9759014822901828e-05, "loss": 0.1182, "step": 3877 }, { "epoch": 0.08545285274366898, "grad_norm": 0.9321938157081604, "learning_rate": 2.975882366179202e-05, "loss": 0.1591, "step": 3878 }, { "epoch": 0.08547488803318515, "grad_norm": 1.7541919946670532, "learning_rate": 2.9758632425507612e-05, "loss": 0.1369, "step": 3879 }, { "epoch": 0.08549692332270131, "grad_norm": 1.1561312675476074, "learning_rate": 2.975844111404957e-05, "loss": 0.0876, "step": 3880 }, { "epoch": 0.08551895861221746, "grad_norm": 1.4000834226608276, "learning_rate": 2.975824972741887e-05, "loss": 0.1576, "step": 3881 }, { "epoch": 0.08554099390173363, "grad_norm": 1.6281415224075317, "learning_rate": 2.97580582656165e-05, "loss": 0.128, "step": 3882 }, { "epoch": 0.08556302919124979, "grad_norm": 0.9254798293113708, "learning_rate": 2.9757866728643415e-05, "loss": 0.131, "step": 3883 }, { "epoch": 0.08558506448076594, "grad_norm": 0.9698487520217896, "learning_rate": 2.975767511650061e-05, "loss": 0.1234, "step": 3884 }, { "epoch": 0.08560709977028211, "grad_norm": 1.6716840267181396, "learning_rate": 2.9757483429189048e-05, "loss": 0.1501, "step": 3885 }, { "epoch": 0.08562913505979827, "grad_norm": 0.9955593347549438, "learning_rate": 2.9757291666709712e-05, "loss": 0.1509, "step": 3886 }, { "epoch": 0.08565117034931442, "grad_norm": 1.7142646312713623, "learning_rate": 2.975709982906357e-05, "loss": 0.195, "step": 3887 }, { "epoch": 0.08567320563883059, "grad_norm": 1.2076972723007202, "learning_rate": 2.9756907916251608e-05, "loss": 0.1251, "step": 3888 }, { "epoch": 0.08569524092834675, "grad_norm": 1.2544745206832886, "learning_rate": 2.9756715928274805e-05, "loss": 0.1361, "step": 3889 }, { "epoch": 0.0857172762178629, "grad_norm": 1.1061272621154785, "learning_rate": 2.975652386513413e-05, "loss": 0.1585, "step": 3890 }, { "epoch": 0.08573931150737907, "grad_norm": 1.3763149976730347, "learning_rate": 2.9756331726830566e-05, "loss": 0.1364, "step": 3891 }, { "epoch": 0.08576134679689523, "grad_norm": 1.1404516696929932, "learning_rate": 2.9756139513365095e-05, "loss": 0.1263, "step": 3892 }, { "epoch": 0.08578338208641138, "grad_norm": 1.7001900672912598, "learning_rate": 2.9755947224738687e-05, "loss": 0.1707, "step": 3893 }, { "epoch": 0.08580541737592755, "grad_norm": 1.47564697265625, "learning_rate": 2.9755754860952333e-05, "loss": 0.1181, "step": 3894 }, { "epoch": 0.08582745266544371, "grad_norm": 1.2575873136520386, "learning_rate": 2.9755562422007e-05, "loss": 0.1463, "step": 3895 }, { "epoch": 0.08584948795495986, "grad_norm": 1.6599761247634888, "learning_rate": 2.975536990790368e-05, "loss": 0.1711, "step": 3896 }, { "epoch": 0.08587152324447603, "grad_norm": 1.2648552656173706, "learning_rate": 2.975517731864335e-05, "loss": 0.1623, "step": 3897 }, { "epoch": 0.08589355853399219, "grad_norm": 1.6090130805969238, "learning_rate": 2.9754984654226986e-05, "loss": 0.1148, "step": 3898 }, { "epoch": 0.08591559382350834, "grad_norm": 1.2989052534103394, "learning_rate": 2.9754791914655572e-05, "loss": 0.1024, "step": 3899 }, { "epoch": 0.08593762911302451, "grad_norm": 1.2298195362091064, "learning_rate": 2.9754599099930093e-05, "loss": 0.1517, "step": 3900 }, { "epoch": 0.08595966440254067, "grad_norm": 1.2894846200942993, "learning_rate": 2.975440621005153e-05, "loss": 0.1371, "step": 3901 }, { "epoch": 0.08598169969205682, "grad_norm": 1.773301124572754, "learning_rate": 2.9754213245020864e-05, "loss": 0.1248, "step": 3902 }, { "epoch": 0.08600373498157299, "grad_norm": 1.7659721374511719, "learning_rate": 2.9754020204839077e-05, "loss": 0.1454, "step": 3903 }, { "epoch": 0.08602577027108915, "grad_norm": 0.9404113292694092, "learning_rate": 2.975382708950715e-05, "loss": 0.149, "step": 3904 }, { "epoch": 0.0860478055606053, "grad_norm": 2.556300163269043, "learning_rate": 2.9753633899026075e-05, "loss": 0.1636, "step": 3905 }, { "epoch": 0.08606984085012147, "grad_norm": 1.3135998249053955, "learning_rate": 2.9753440633396826e-05, "loss": 0.1688, "step": 3906 }, { "epoch": 0.08609187613963763, "grad_norm": 1.0344562530517578, "learning_rate": 2.9753247292620397e-05, "loss": 0.1424, "step": 3907 }, { "epoch": 0.08611391142915378, "grad_norm": 1.9152387380599976, "learning_rate": 2.975305387669777e-05, "loss": 0.1399, "step": 3908 }, { "epoch": 0.08613594671866995, "grad_norm": 1.1622945070266724, "learning_rate": 2.975286038562992e-05, "loss": 0.1681, "step": 3909 }, { "epoch": 0.08615798200818611, "grad_norm": 1.7119511365890503, "learning_rate": 2.975266681941785e-05, "loss": 0.1186, "step": 3910 }, { "epoch": 0.08618001729770226, "grad_norm": 1.3770052194595337, "learning_rate": 2.9752473178062534e-05, "loss": 0.1368, "step": 3911 }, { "epoch": 0.08620205258721843, "grad_norm": 1.8402655124664307, "learning_rate": 2.975227946156496e-05, "loss": 0.2149, "step": 3912 }, { "epoch": 0.08622408787673459, "grad_norm": 1.9188209772109985, "learning_rate": 2.9752085669926115e-05, "loss": 0.1646, "step": 3913 }, { "epoch": 0.08624612316625076, "grad_norm": 1.1267346143722534, "learning_rate": 2.975189180314699e-05, "loss": 0.1419, "step": 3914 }, { "epoch": 0.08626815845576691, "grad_norm": 1.1895774602890015, "learning_rate": 2.9751697861228566e-05, "loss": 0.1383, "step": 3915 }, { "epoch": 0.08629019374528307, "grad_norm": 1.0238608121871948, "learning_rate": 2.9751503844171832e-05, "loss": 0.1165, "step": 3916 }, { "epoch": 0.08631222903479924, "grad_norm": 1.996035099029541, "learning_rate": 2.9751309751977784e-05, "loss": 0.1576, "step": 3917 }, { "epoch": 0.08633426432431539, "grad_norm": 2.4057395458221436, "learning_rate": 2.9751115584647404e-05, "loss": 0.1574, "step": 3918 }, { "epoch": 0.08635629961383155, "grad_norm": 1.4733330011367798, "learning_rate": 2.9750921342181684e-05, "loss": 0.1223, "step": 3919 }, { "epoch": 0.08637833490334772, "grad_norm": 1.5271358489990234, "learning_rate": 2.9750727024581604e-05, "loss": 0.1137, "step": 3920 }, { "epoch": 0.08640037019286387, "grad_norm": 1.421933650970459, "learning_rate": 2.9750532631848167e-05, "loss": 0.1564, "step": 3921 }, { "epoch": 0.08642240548238003, "grad_norm": 1.6943808794021606, "learning_rate": 2.975033816398236e-05, "loss": 0.1327, "step": 3922 }, { "epoch": 0.0864444407718962, "grad_norm": 2.418504238128662, "learning_rate": 2.9750143620985166e-05, "loss": 0.1644, "step": 3923 }, { "epoch": 0.08646647606141235, "grad_norm": 1.5964733362197876, "learning_rate": 2.9749949002857586e-05, "loss": 0.1551, "step": 3924 }, { "epoch": 0.08648851135092851, "grad_norm": 0.8540817499160767, "learning_rate": 2.9749754309600603e-05, "loss": 0.126, "step": 3925 }, { "epoch": 0.08651054664044468, "grad_norm": 1.6944249868392944, "learning_rate": 2.974955954121521e-05, "loss": 0.2123, "step": 3926 }, { "epoch": 0.08653258192996083, "grad_norm": 1.1301647424697876, "learning_rate": 2.9749364697702406e-05, "loss": 0.1357, "step": 3927 }, { "epoch": 0.08655461721947699, "grad_norm": 1.5056897401809692, "learning_rate": 2.9749169779063173e-05, "loss": 0.14, "step": 3928 }, { "epoch": 0.08657665250899316, "grad_norm": 1.120188593864441, "learning_rate": 2.974897478529851e-05, "loss": 0.1458, "step": 3929 }, { "epoch": 0.08659868779850931, "grad_norm": 2.083810329437256, "learning_rate": 2.9748779716409413e-05, "loss": 0.14, "step": 3930 }, { "epoch": 0.08662072308802547, "grad_norm": 1.4069604873657227, "learning_rate": 2.9748584572396868e-05, "loss": 0.1693, "step": 3931 }, { "epoch": 0.08664275837754164, "grad_norm": 0.8429020047187805, "learning_rate": 2.9748389353261875e-05, "loss": 0.1417, "step": 3932 }, { "epoch": 0.08666479366705779, "grad_norm": 1.6251832246780396, "learning_rate": 2.9748194059005428e-05, "loss": 0.1567, "step": 3933 }, { "epoch": 0.08668682895657395, "grad_norm": 1.1641285419464111, "learning_rate": 2.9747998689628518e-05, "loss": 0.167, "step": 3934 }, { "epoch": 0.08670886424609012, "grad_norm": 1.4429103136062622, "learning_rate": 2.974780324513214e-05, "loss": 0.1256, "step": 3935 }, { "epoch": 0.08673089953560627, "grad_norm": 1.7034579515457153, "learning_rate": 2.974760772551729e-05, "loss": 0.1805, "step": 3936 }, { "epoch": 0.08675293482512243, "grad_norm": 1.2646496295928955, "learning_rate": 2.9747412130784976e-05, "loss": 0.1146, "step": 3937 }, { "epoch": 0.0867749701146386, "grad_norm": 1.0820035934448242, "learning_rate": 2.9747216460936173e-05, "loss": 0.1204, "step": 3938 }, { "epoch": 0.08679700540415475, "grad_norm": 0.9515221118927002, "learning_rate": 2.9747020715971895e-05, "loss": 0.1179, "step": 3939 }, { "epoch": 0.08681904069367091, "grad_norm": 1.199346661567688, "learning_rate": 2.9746824895893128e-05, "loss": 0.1011, "step": 3940 }, { "epoch": 0.08684107598318708, "grad_norm": 0.968678891658783, "learning_rate": 2.9746629000700876e-05, "loss": 0.126, "step": 3941 }, { "epoch": 0.08686311127270323, "grad_norm": 1.251054286956787, "learning_rate": 2.9746433030396137e-05, "loss": 0.186, "step": 3942 }, { "epoch": 0.08688514656221939, "grad_norm": 1.6996339559555054, "learning_rate": 2.97462369849799e-05, "loss": 0.1719, "step": 3943 }, { "epoch": 0.08690718185173556, "grad_norm": 1.227756142616272, "learning_rate": 2.974604086445318e-05, "loss": 0.1695, "step": 3944 }, { "epoch": 0.08692921714125171, "grad_norm": 2.0868582725524902, "learning_rate": 2.974584466881696e-05, "loss": 0.1683, "step": 3945 }, { "epoch": 0.08695125243076787, "grad_norm": 0.8707588315010071, "learning_rate": 2.974564839807225e-05, "loss": 0.176, "step": 3946 }, { "epoch": 0.08697328772028404, "grad_norm": 2.4515740871429443, "learning_rate": 2.974545205222004e-05, "loss": 0.1597, "step": 3947 }, { "epoch": 0.0869953230098002, "grad_norm": 0.8675881028175354, "learning_rate": 2.974525563126134e-05, "loss": 0.1038, "step": 3948 }, { "epoch": 0.08701735829931635, "grad_norm": 1.220888614654541, "learning_rate": 2.9745059135197143e-05, "loss": 0.1289, "step": 3949 }, { "epoch": 0.08703939358883252, "grad_norm": 1.5431163311004639, "learning_rate": 2.9744862564028455e-05, "loss": 0.1181, "step": 3950 }, { "epoch": 0.08706142887834868, "grad_norm": 1.046609878540039, "learning_rate": 2.9744665917756272e-05, "loss": 0.1458, "step": 3951 }, { "epoch": 0.08708346416786483, "grad_norm": 1.4372023344039917, "learning_rate": 2.9744469196381602e-05, "loss": 0.1698, "step": 3952 }, { "epoch": 0.087105499457381, "grad_norm": 1.4931533336639404, "learning_rate": 2.9744272399905446e-05, "loss": 0.1179, "step": 3953 }, { "epoch": 0.08712753474689716, "grad_norm": 1.396679162979126, "learning_rate": 2.9744075528328802e-05, "loss": 0.1576, "step": 3954 }, { "epoch": 0.08714957003641331, "grad_norm": 1.5697828531265259, "learning_rate": 2.974387858165267e-05, "loss": 0.1621, "step": 3955 }, { "epoch": 0.08717160532592948, "grad_norm": 1.3554922342300415, "learning_rate": 2.9743681559878064e-05, "loss": 0.1446, "step": 3956 }, { "epoch": 0.08719364061544564, "grad_norm": 1.3562930822372437, "learning_rate": 2.974348446300598e-05, "loss": 0.1897, "step": 3957 }, { "epoch": 0.08721567590496179, "grad_norm": 1.058191180229187, "learning_rate": 2.9743287291037425e-05, "loss": 0.0953, "step": 3958 }, { "epoch": 0.08723771119447796, "grad_norm": 1.8820815086364746, "learning_rate": 2.97430900439734e-05, "loss": 0.1302, "step": 3959 }, { "epoch": 0.08725974648399412, "grad_norm": 1.4487050771713257, "learning_rate": 2.9742892721814912e-05, "loss": 0.1556, "step": 3960 }, { "epoch": 0.08728178177351027, "grad_norm": 1.356484293937683, "learning_rate": 2.9742695324562963e-05, "loss": 0.1563, "step": 3961 }, { "epoch": 0.08730381706302644, "grad_norm": 2.041181802749634, "learning_rate": 2.9742497852218568e-05, "loss": 0.1319, "step": 3962 }, { "epoch": 0.0873258523525426, "grad_norm": 1.5618427991867065, "learning_rate": 2.9742300304782717e-05, "loss": 0.1559, "step": 3963 }, { "epoch": 0.08734788764205875, "grad_norm": 1.2424241304397583, "learning_rate": 2.9742102682256433e-05, "loss": 0.1241, "step": 3964 }, { "epoch": 0.08736992293157492, "grad_norm": 1.6556065082550049, "learning_rate": 2.974190498464071e-05, "loss": 0.1813, "step": 3965 }, { "epoch": 0.08739195822109108, "grad_norm": 1.1826410293579102, "learning_rate": 2.974170721193656e-05, "loss": 0.1154, "step": 3966 }, { "epoch": 0.08741399351060723, "grad_norm": 1.2910255193710327, "learning_rate": 2.974150936414499e-05, "loss": 0.1701, "step": 3967 }, { "epoch": 0.0874360288001234, "grad_norm": 1.3324391841888428, "learning_rate": 2.974131144126701e-05, "loss": 0.1574, "step": 3968 }, { "epoch": 0.08745806408963956, "grad_norm": 0.9946239590644836, "learning_rate": 2.974111344330363e-05, "loss": 0.1265, "step": 3969 }, { "epoch": 0.08748009937915571, "grad_norm": 1.5240585803985596, "learning_rate": 2.9740915370255844e-05, "loss": 0.1491, "step": 3970 }, { "epoch": 0.08750213466867188, "grad_norm": 1.0981067419052124, "learning_rate": 2.9740717222124674e-05, "loss": 0.1279, "step": 3971 }, { "epoch": 0.08752416995818804, "grad_norm": 2.485450267791748, "learning_rate": 2.9740518998911132e-05, "loss": 0.1791, "step": 3972 }, { "epoch": 0.08754620524770419, "grad_norm": 2.5113658905029297, "learning_rate": 2.974032070061622e-05, "loss": 0.2016, "step": 3973 }, { "epoch": 0.08756824053722036, "grad_norm": 1.9198402166366577, "learning_rate": 2.9740122327240947e-05, "loss": 0.1255, "step": 3974 }, { "epoch": 0.08759027582673652, "grad_norm": 1.8307324647903442, "learning_rate": 2.973992387878633e-05, "loss": 0.1751, "step": 3975 }, { "epoch": 0.08761231111625267, "grad_norm": 2.0408875942230225, "learning_rate": 2.9739725355253374e-05, "loss": 0.1509, "step": 3976 }, { "epoch": 0.08763434640576884, "grad_norm": 1.7817416191101074, "learning_rate": 2.9739526756643093e-05, "loss": 0.187, "step": 3977 }, { "epoch": 0.087656381695285, "grad_norm": 1.4932093620300293, "learning_rate": 2.97393280829565e-05, "loss": 0.1606, "step": 3978 }, { "epoch": 0.08767841698480115, "grad_norm": 1.2367194890975952, "learning_rate": 2.9739129334194607e-05, "loss": 0.1337, "step": 3979 }, { "epoch": 0.08770045227431732, "grad_norm": 2.074218988418579, "learning_rate": 2.973893051035842e-05, "loss": 0.1455, "step": 3980 }, { "epoch": 0.08772248756383348, "grad_norm": 1.6109598875045776, "learning_rate": 2.973873161144896e-05, "loss": 0.139, "step": 3981 }, { "epoch": 0.08774452285334965, "grad_norm": 1.0123883485794067, "learning_rate": 2.9738532637467236e-05, "loss": 0.1307, "step": 3982 }, { "epoch": 0.0877665581428658, "grad_norm": 1.5198084115982056, "learning_rate": 2.9738333588414262e-05, "loss": 0.1722, "step": 3983 }, { "epoch": 0.08778859343238196, "grad_norm": 0.8729766011238098, "learning_rate": 2.9738134464291053e-05, "loss": 0.1916, "step": 3984 }, { "epoch": 0.08781062872189813, "grad_norm": 1.267144799232483, "learning_rate": 2.9737935265098616e-05, "loss": 0.1177, "step": 3985 }, { "epoch": 0.08783266401141428, "grad_norm": 1.2230567932128906, "learning_rate": 2.9737735990837973e-05, "loss": 0.1106, "step": 3986 }, { "epoch": 0.08785469930093044, "grad_norm": 1.2798104286193848, "learning_rate": 2.973753664151014e-05, "loss": 0.1344, "step": 3987 }, { "epoch": 0.0878767345904466, "grad_norm": 2.0710036754608154, "learning_rate": 2.9737337217116132e-05, "loss": 0.2352, "step": 3988 }, { "epoch": 0.08789876987996276, "grad_norm": 1.3694396018981934, "learning_rate": 2.973713771765696e-05, "loss": 0.1127, "step": 3989 }, { "epoch": 0.08792080516947892, "grad_norm": 1.2523820400238037, "learning_rate": 2.973693814313364e-05, "loss": 0.1081, "step": 3990 }, { "epoch": 0.08794284045899509, "grad_norm": 1.1567164659500122, "learning_rate": 2.9736738493547196e-05, "loss": 0.1343, "step": 3991 }, { "epoch": 0.08796487574851124, "grad_norm": 1.3499809503555298, "learning_rate": 2.973653876889864e-05, "loss": 0.1432, "step": 3992 }, { "epoch": 0.0879869110380274, "grad_norm": 1.425797939300537, "learning_rate": 2.9736338969188992e-05, "loss": 0.1542, "step": 3993 }, { "epoch": 0.08800894632754357, "grad_norm": 1.3952078819274902, "learning_rate": 2.9736139094419262e-05, "loss": 0.1686, "step": 3994 }, { "epoch": 0.08803098161705972, "grad_norm": 1.3779655694961548, "learning_rate": 2.9735939144590477e-05, "loss": 0.1735, "step": 3995 }, { "epoch": 0.08805301690657588, "grad_norm": 1.1414421796798706, "learning_rate": 2.9735739119703652e-05, "loss": 0.155, "step": 3996 }, { "epoch": 0.08807505219609205, "grad_norm": 1.325247883796692, "learning_rate": 2.97355390197598e-05, "loss": 0.1401, "step": 3997 }, { "epoch": 0.0880970874856082, "grad_norm": 1.2694101333618164, "learning_rate": 2.9735338844759955e-05, "loss": 0.1315, "step": 3998 }, { "epoch": 0.08811912277512436, "grad_norm": 1.724042534828186, "learning_rate": 2.9735138594705122e-05, "loss": 0.1548, "step": 3999 }, { "epoch": 0.08814115806464053, "grad_norm": 1.5493961572647095, "learning_rate": 2.973493826959633e-05, "loss": 0.1618, "step": 4000 }, { "epoch": 0.08816319335415668, "grad_norm": 2.0379183292388916, "learning_rate": 2.973473786943459e-05, "loss": 0.1575, "step": 4001 }, { "epoch": 0.08818522864367284, "grad_norm": 0.9275872111320496, "learning_rate": 2.9734537394220934e-05, "loss": 0.1137, "step": 4002 }, { "epoch": 0.088207263933189, "grad_norm": 1.3722326755523682, "learning_rate": 2.973433684395637e-05, "loss": 0.1432, "step": 4003 }, { "epoch": 0.08822929922270516, "grad_norm": 1.156246542930603, "learning_rate": 2.9734136218641937e-05, "loss": 0.135, "step": 4004 }, { "epoch": 0.08825133451222132, "grad_norm": 0.973895788192749, "learning_rate": 2.973393551827864e-05, "loss": 0.1272, "step": 4005 }, { "epoch": 0.08827336980173749, "grad_norm": 1.1854557991027832, "learning_rate": 2.9733734742867515e-05, "loss": 0.1106, "step": 4006 }, { "epoch": 0.08829540509125364, "grad_norm": 0.9256219267845154, "learning_rate": 2.9733533892409572e-05, "loss": 0.1258, "step": 4007 }, { "epoch": 0.0883174403807698, "grad_norm": 1.4211242198944092, "learning_rate": 2.9733332966905842e-05, "loss": 0.1734, "step": 4008 }, { "epoch": 0.08833947567028597, "grad_norm": 0.9615855813026428, "learning_rate": 2.973313196635735e-05, "loss": 0.1305, "step": 4009 }, { "epoch": 0.08836151095980212, "grad_norm": 1.641076683998108, "learning_rate": 2.9732930890765116e-05, "loss": 0.1226, "step": 4010 }, { "epoch": 0.08838354624931828, "grad_norm": 1.2708677053451538, "learning_rate": 2.973272974013016e-05, "loss": 0.0844, "step": 4011 }, { "epoch": 0.08840558153883445, "grad_norm": 0.8869088888168335, "learning_rate": 2.973252851445351e-05, "loss": 0.0867, "step": 4012 }, { "epoch": 0.0884276168283506, "grad_norm": 1.6113654375076294, "learning_rate": 2.9732327213736198e-05, "loss": 0.1802, "step": 4013 }, { "epoch": 0.08844965211786676, "grad_norm": 1.3031134605407715, "learning_rate": 2.973212583797924e-05, "loss": 0.1434, "step": 4014 }, { "epoch": 0.08847168740738293, "grad_norm": 1.661303997039795, "learning_rate": 2.9731924387183668e-05, "loss": 0.2138, "step": 4015 }, { "epoch": 0.08849372269689908, "grad_norm": 1.0849976539611816, "learning_rate": 2.97317228613505e-05, "loss": 0.1046, "step": 4016 }, { "epoch": 0.08851575798641524, "grad_norm": 1.3030110597610474, "learning_rate": 2.9731521260480774e-05, "loss": 0.1639, "step": 4017 }, { "epoch": 0.0885377932759314, "grad_norm": 1.0221471786499023, "learning_rate": 2.9731319584575506e-05, "loss": 0.1232, "step": 4018 }, { "epoch": 0.08855982856544757, "grad_norm": 1.438350796699524, "learning_rate": 2.9731117833635733e-05, "loss": 0.1504, "step": 4019 }, { "epoch": 0.08858186385496372, "grad_norm": 0.9829767942428589, "learning_rate": 2.973091600766247e-05, "loss": 0.1279, "step": 4020 }, { "epoch": 0.08860389914447989, "grad_norm": 1.2297406196594238, "learning_rate": 2.9730714106656757e-05, "loss": 0.1083, "step": 4021 }, { "epoch": 0.08862593443399605, "grad_norm": 1.24447500705719, "learning_rate": 2.9730512130619615e-05, "loss": 0.1369, "step": 4022 }, { "epoch": 0.0886479697235122, "grad_norm": 1.0718886852264404, "learning_rate": 2.973031007955208e-05, "loss": 0.1245, "step": 4023 }, { "epoch": 0.08867000501302837, "grad_norm": 1.7393550872802734, "learning_rate": 2.973010795345517e-05, "loss": 0.1575, "step": 4024 }, { "epoch": 0.08869204030254453, "grad_norm": 1.2897542715072632, "learning_rate": 2.972990575232993e-05, "loss": 0.1342, "step": 4025 }, { "epoch": 0.08871407559206068, "grad_norm": 1.4826009273529053, "learning_rate": 2.9729703476177375e-05, "loss": 0.1736, "step": 4026 }, { "epoch": 0.08873611088157685, "grad_norm": 1.4149984121322632, "learning_rate": 2.9729501124998546e-05, "loss": 0.1736, "step": 4027 }, { "epoch": 0.08875814617109301, "grad_norm": 1.1893579959869385, "learning_rate": 2.972929869879446e-05, "loss": 0.117, "step": 4028 }, { "epoch": 0.08878018146060916, "grad_norm": 1.2385942935943604, "learning_rate": 2.972909619756617e-05, "loss": 0.1422, "step": 4029 }, { "epoch": 0.08880221675012533, "grad_norm": 1.1141526699066162, "learning_rate": 2.9728893621314686e-05, "loss": 0.1761, "step": 4030 }, { "epoch": 0.08882425203964149, "grad_norm": 1.2720468044281006, "learning_rate": 2.9728690970041052e-05, "loss": 0.1221, "step": 4031 }, { "epoch": 0.08884628732915764, "grad_norm": 1.0573774576187134, "learning_rate": 2.9728488243746296e-05, "loss": 0.1015, "step": 4032 }, { "epoch": 0.0888683226186738, "grad_norm": 1.8310699462890625, "learning_rate": 2.9728285442431448e-05, "loss": 0.178, "step": 4033 }, { "epoch": 0.08889035790818997, "grad_norm": 1.3654351234436035, "learning_rate": 2.9728082566097548e-05, "loss": 0.1475, "step": 4034 }, { "epoch": 0.08891239319770612, "grad_norm": 1.7806789875030518, "learning_rate": 2.9727879614745627e-05, "loss": 0.1461, "step": 4035 }, { "epoch": 0.08893442848722229, "grad_norm": 1.4370856285095215, "learning_rate": 2.9727676588376716e-05, "loss": 0.0896, "step": 4036 }, { "epoch": 0.08895646377673845, "grad_norm": 1.7744667530059814, "learning_rate": 2.9727473486991848e-05, "loss": 0.1223, "step": 4037 }, { "epoch": 0.0889784990662546, "grad_norm": 1.2904584407806396, "learning_rate": 2.972727031059206e-05, "loss": 0.1509, "step": 4038 }, { "epoch": 0.08900053435577077, "grad_norm": 1.229675531387329, "learning_rate": 2.9727067059178392e-05, "loss": 0.1526, "step": 4039 }, { "epoch": 0.08902256964528693, "grad_norm": 1.5009464025497437, "learning_rate": 2.9726863732751872e-05, "loss": 0.0961, "step": 4040 }, { "epoch": 0.08904460493480308, "grad_norm": 1.2354488372802734, "learning_rate": 2.9726660331313537e-05, "loss": 0.2161, "step": 4041 }, { "epoch": 0.08906664022431925, "grad_norm": 1.9102885723114014, "learning_rate": 2.9726456854864425e-05, "loss": 0.1803, "step": 4042 }, { "epoch": 0.08908867551383541, "grad_norm": 1.505644679069519, "learning_rate": 2.9726253303405567e-05, "loss": 0.1292, "step": 4043 }, { "epoch": 0.08911071080335156, "grad_norm": 1.1402136087417603, "learning_rate": 2.972604967693801e-05, "loss": 0.1382, "step": 4044 }, { "epoch": 0.08913274609286773, "grad_norm": 1.5358837842941284, "learning_rate": 2.972584597546278e-05, "loss": 0.1568, "step": 4045 }, { "epoch": 0.08915478138238389, "grad_norm": 0.9923015832901001, "learning_rate": 2.9725642198980925e-05, "loss": 0.114, "step": 4046 }, { "epoch": 0.08917681667190004, "grad_norm": 1.5336521863937378, "learning_rate": 2.9725438347493473e-05, "loss": 0.1922, "step": 4047 }, { "epoch": 0.0891988519614162, "grad_norm": 1.6160478591918945, "learning_rate": 2.9725234421001468e-05, "loss": 0.166, "step": 4048 }, { "epoch": 0.08922088725093237, "grad_norm": 1.7749980688095093, "learning_rate": 2.972503041950595e-05, "loss": 0.1406, "step": 4049 }, { "epoch": 0.08924292254044852, "grad_norm": 1.3685095310211182, "learning_rate": 2.9724826343007952e-05, "loss": 0.1313, "step": 4050 }, { "epoch": 0.08926495782996469, "grad_norm": 1.1687678098678589, "learning_rate": 2.9724622191508522e-05, "loss": 0.1327, "step": 4051 }, { "epoch": 0.08928699311948085, "grad_norm": 2.328831195831299, "learning_rate": 2.9724417965008695e-05, "loss": 0.1398, "step": 4052 }, { "epoch": 0.08930902840899702, "grad_norm": 1.2327041625976562, "learning_rate": 2.9724213663509508e-05, "loss": 0.1397, "step": 4053 }, { "epoch": 0.08933106369851317, "grad_norm": 1.1793075799942017, "learning_rate": 2.9724009287012008e-05, "loss": 0.1512, "step": 4054 }, { "epoch": 0.08935309898802933, "grad_norm": 1.151798129081726, "learning_rate": 2.972380483551723e-05, "loss": 0.1579, "step": 4055 }, { "epoch": 0.0893751342775455, "grad_norm": 1.2170659303665161, "learning_rate": 2.972360030902622e-05, "loss": 0.1305, "step": 4056 }, { "epoch": 0.08939716956706165, "grad_norm": 1.2061400413513184, "learning_rate": 2.972339570754002e-05, "loss": 0.1586, "step": 4057 }, { "epoch": 0.08941920485657781, "grad_norm": 3.656730890274048, "learning_rate": 2.9723191031059667e-05, "loss": 0.1815, "step": 4058 }, { "epoch": 0.08944124014609398, "grad_norm": 2.2755908966064453, "learning_rate": 2.972298627958621e-05, "loss": 0.1367, "step": 4059 }, { "epoch": 0.08946327543561013, "grad_norm": 0.8313856720924377, "learning_rate": 2.972278145312069e-05, "loss": 0.1627, "step": 4060 }, { "epoch": 0.08948531072512629, "grad_norm": 1.2961111068725586, "learning_rate": 2.9722576551664142e-05, "loss": 0.12, "step": 4061 }, { "epoch": 0.08950734601464246, "grad_norm": 1.4680668115615845, "learning_rate": 2.9722371575217627e-05, "loss": 0.1816, "step": 4062 }, { "epoch": 0.0895293813041586, "grad_norm": 1.192241907119751, "learning_rate": 2.9722166523782168e-05, "loss": 0.1463, "step": 4063 }, { "epoch": 0.08955141659367477, "grad_norm": 1.281104326248169, "learning_rate": 2.9721961397358827e-05, "loss": 0.1677, "step": 4064 }, { "epoch": 0.08957345188319094, "grad_norm": 1.228758454322815, "learning_rate": 2.972175619594864e-05, "loss": 0.1185, "step": 4065 }, { "epoch": 0.08959548717270709, "grad_norm": 2.1718788146972656, "learning_rate": 2.9721550919552656e-05, "loss": 0.1365, "step": 4066 }, { "epoch": 0.08961752246222325, "grad_norm": 2.5789146423339844, "learning_rate": 2.9721345568171916e-05, "loss": 0.1516, "step": 4067 }, { "epoch": 0.08963955775173942, "grad_norm": 1.7970231771469116, "learning_rate": 2.972114014180747e-05, "loss": 0.1348, "step": 4068 }, { "epoch": 0.08966159304125557, "grad_norm": 2.0174598693847656, "learning_rate": 2.9720934640460363e-05, "loss": 0.1503, "step": 4069 }, { "epoch": 0.08968362833077173, "grad_norm": 1.589561104774475, "learning_rate": 2.9720729064131644e-05, "loss": 0.1213, "step": 4070 }, { "epoch": 0.0897056636202879, "grad_norm": 1.1702854633331299, "learning_rate": 2.9720523412822355e-05, "loss": 0.1102, "step": 4071 }, { "epoch": 0.08972769890980405, "grad_norm": 1.9111104011535645, "learning_rate": 2.972031768653355e-05, "loss": 0.199, "step": 4072 }, { "epoch": 0.08974973419932021, "grad_norm": 1.49502694606781, "learning_rate": 2.972011188526627e-05, "loss": 0.1454, "step": 4073 }, { "epoch": 0.08977176948883638, "grad_norm": 1.2761517763137817, "learning_rate": 2.971990600902157e-05, "loss": 0.1797, "step": 4074 }, { "epoch": 0.08979380477835253, "grad_norm": 1.4881021976470947, "learning_rate": 2.9719700057800494e-05, "loss": 0.1235, "step": 4075 }, { "epoch": 0.08981584006786869, "grad_norm": 1.6270304918289185, "learning_rate": 2.971949403160409e-05, "loss": 0.1226, "step": 4076 }, { "epoch": 0.08983787535738486, "grad_norm": 1.366849422454834, "learning_rate": 2.971928793043341e-05, "loss": 0.1625, "step": 4077 }, { "epoch": 0.089859910646901, "grad_norm": 1.5549880266189575, "learning_rate": 2.9719081754289503e-05, "loss": 0.1939, "step": 4078 }, { "epoch": 0.08988194593641717, "grad_norm": 1.3116545677185059, "learning_rate": 2.9718875503173423e-05, "loss": 0.1449, "step": 4079 }, { "epoch": 0.08990398122593334, "grad_norm": 1.4656387567520142, "learning_rate": 2.9718669177086216e-05, "loss": 0.1511, "step": 4080 }, { "epoch": 0.08992601651544949, "grad_norm": 1.303857445716858, "learning_rate": 2.9718462776028933e-05, "loss": 0.1777, "step": 4081 }, { "epoch": 0.08994805180496565, "grad_norm": 1.1582741737365723, "learning_rate": 2.9718256300002627e-05, "loss": 0.1545, "step": 4082 }, { "epoch": 0.08997008709448182, "grad_norm": 1.662026286125183, "learning_rate": 2.971804974900835e-05, "loss": 0.0984, "step": 4083 }, { "epoch": 0.08999212238399797, "grad_norm": 1.2092411518096924, "learning_rate": 2.971784312304715e-05, "loss": 0.1483, "step": 4084 }, { "epoch": 0.09001415767351413, "grad_norm": 1.352331519126892, "learning_rate": 2.9717636422120086e-05, "loss": 0.1373, "step": 4085 }, { "epoch": 0.0900361929630303, "grad_norm": 1.1089105606079102, "learning_rate": 2.9717429646228206e-05, "loss": 0.1396, "step": 4086 }, { "epoch": 0.09005822825254646, "grad_norm": 1.7411866188049316, "learning_rate": 2.9717222795372565e-05, "loss": 0.181, "step": 4087 }, { "epoch": 0.09008026354206261, "grad_norm": 1.1709284782409668, "learning_rate": 2.9717015869554216e-05, "loss": 0.1684, "step": 4088 }, { "epoch": 0.09010229883157878, "grad_norm": 1.3925384283065796, "learning_rate": 2.971680886877421e-05, "loss": 0.1696, "step": 4089 }, { "epoch": 0.09012433412109494, "grad_norm": 1.3900431394577026, "learning_rate": 2.971660179303361e-05, "loss": 0.15, "step": 4090 }, { "epoch": 0.09014636941061109, "grad_norm": 1.1701973676681519, "learning_rate": 2.9716394642333465e-05, "loss": 0.1252, "step": 4091 }, { "epoch": 0.09016840470012726, "grad_norm": 1.2992041110992432, "learning_rate": 2.9716187416674832e-05, "loss": 0.1414, "step": 4092 }, { "epoch": 0.09019043998964342, "grad_norm": 1.2133816480636597, "learning_rate": 2.9715980116058757e-05, "loss": 0.1483, "step": 4093 }, { "epoch": 0.09021247527915957, "grad_norm": 1.1148403882980347, "learning_rate": 2.971577274048631e-05, "loss": 0.113, "step": 4094 }, { "epoch": 0.09023451056867574, "grad_norm": 1.2788524627685547, "learning_rate": 2.9715565289958543e-05, "loss": 0.1851, "step": 4095 }, { "epoch": 0.0902565458581919, "grad_norm": 0.9692015051841736, "learning_rate": 2.971535776447651e-05, "loss": 0.1198, "step": 4096 }, { "epoch": 0.09027858114770805, "grad_norm": 1.1791367530822754, "learning_rate": 2.9715150164041266e-05, "loss": 0.1645, "step": 4097 }, { "epoch": 0.09030061643722422, "grad_norm": 2.1200687885284424, "learning_rate": 2.9714942488653874e-05, "loss": 0.1639, "step": 4098 }, { "epoch": 0.09032265172674038, "grad_norm": 2.4611597061157227, "learning_rate": 2.971473473831539e-05, "loss": 0.1421, "step": 4099 }, { "epoch": 0.09034468701625653, "grad_norm": 2.562612295150757, "learning_rate": 2.9714526913026862e-05, "loss": 0.243, "step": 4100 }, { "epoch": 0.0903667223057727, "grad_norm": 1.528943419456482, "learning_rate": 2.9714319012789367e-05, "loss": 0.1558, "step": 4101 }, { "epoch": 0.09038875759528886, "grad_norm": 0.890127956867218, "learning_rate": 2.9714111037603954e-05, "loss": 0.1588, "step": 4102 }, { "epoch": 0.09041079288480501, "grad_norm": 1.3094755411148071, "learning_rate": 2.9713902987471683e-05, "loss": 0.1271, "step": 4103 }, { "epoch": 0.09043282817432118, "grad_norm": 1.1306946277618408, "learning_rate": 2.971369486239361e-05, "loss": 0.116, "step": 4104 }, { "epoch": 0.09045486346383734, "grad_norm": 1.263218879699707, "learning_rate": 2.97134866623708e-05, "loss": 0.1389, "step": 4105 }, { "epoch": 0.09047689875335349, "grad_norm": 1.3124412298202515, "learning_rate": 2.9713278387404314e-05, "loss": 0.1278, "step": 4106 }, { "epoch": 0.09049893404286966, "grad_norm": 1.1496505737304688, "learning_rate": 2.971307003749521e-05, "loss": 0.1289, "step": 4107 }, { "epoch": 0.09052096933238582, "grad_norm": 1.434844732284546, "learning_rate": 2.9712861612644556e-05, "loss": 0.1214, "step": 4108 }, { "epoch": 0.09054300462190197, "grad_norm": 1.5576542615890503, "learning_rate": 2.9712653112853404e-05, "loss": 0.1618, "step": 4109 }, { "epoch": 0.09056503991141814, "grad_norm": 1.3140259981155396, "learning_rate": 2.9712444538122817e-05, "loss": 0.162, "step": 4110 }, { "epoch": 0.0905870752009343, "grad_norm": 1.3884679079055786, "learning_rate": 2.9712235888453867e-05, "loss": 0.1639, "step": 4111 }, { "epoch": 0.09060911049045045, "grad_norm": 1.9972598552703857, "learning_rate": 2.9712027163847604e-05, "loss": 0.1633, "step": 4112 }, { "epoch": 0.09063114577996662, "grad_norm": 1.616312026977539, "learning_rate": 2.97118183643051e-05, "loss": 0.1561, "step": 4113 }, { "epoch": 0.09065318106948278, "grad_norm": 1.4505643844604492, "learning_rate": 2.971160948982742e-05, "loss": 0.1511, "step": 4114 }, { "epoch": 0.09067521635899893, "grad_norm": 1.1788809299468994, "learning_rate": 2.971140054041562e-05, "loss": 0.1258, "step": 4115 }, { "epoch": 0.0906972516485151, "grad_norm": 1.3513659238815308, "learning_rate": 2.9711191516070767e-05, "loss": 0.1308, "step": 4116 }, { "epoch": 0.09071928693803126, "grad_norm": 1.6822997331619263, "learning_rate": 2.9710982416793927e-05, "loss": 0.1781, "step": 4117 }, { "epoch": 0.09074132222754741, "grad_norm": 1.8858987092971802, "learning_rate": 2.9710773242586167e-05, "loss": 0.1339, "step": 4118 }, { "epoch": 0.09076335751706358, "grad_norm": 1.6272329092025757, "learning_rate": 2.971056399344855e-05, "loss": 0.1744, "step": 4119 }, { "epoch": 0.09078539280657974, "grad_norm": 1.2392683029174805, "learning_rate": 2.9710354669382142e-05, "loss": 0.1707, "step": 4120 }, { "epoch": 0.09080742809609589, "grad_norm": 1.1404786109924316, "learning_rate": 2.971014527038801e-05, "loss": 0.1551, "step": 4121 }, { "epoch": 0.09082946338561206, "grad_norm": 1.2958368062973022, "learning_rate": 2.9709935796467216e-05, "loss": 0.1297, "step": 4122 }, { "epoch": 0.09085149867512822, "grad_norm": 1.5113224983215332, "learning_rate": 2.9709726247620835e-05, "loss": 0.1832, "step": 4123 }, { "epoch": 0.09087353396464438, "grad_norm": 1.9335392713546753, "learning_rate": 2.9709516623849928e-05, "loss": 0.15, "step": 4124 }, { "epoch": 0.09089556925416054, "grad_norm": 0.9614183306694031, "learning_rate": 2.970930692515557e-05, "loss": 0.0921, "step": 4125 }, { "epoch": 0.0909176045436767, "grad_norm": 0.7794470191001892, "learning_rate": 2.9709097151538817e-05, "loss": 0.0927, "step": 4126 }, { "epoch": 0.09093963983319286, "grad_norm": 1.526550531387329, "learning_rate": 2.970888730300075e-05, "loss": 0.1629, "step": 4127 }, { "epoch": 0.09096167512270902, "grad_norm": 1.8257253170013428, "learning_rate": 2.9708677379542426e-05, "loss": 0.1763, "step": 4128 }, { "epoch": 0.09098371041222518, "grad_norm": 1.2867496013641357, "learning_rate": 2.9708467381164926e-05, "loss": 0.1732, "step": 4129 }, { "epoch": 0.09100574570174134, "grad_norm": 1.7752816677093506, "learning_rate": 2.9708257307869314e-05, "loss": 0.1844, "step": 4130 }, { "epoch": 0.0910277809912575, "grad_norm": 1.3681825399398804, "learning_rate": 2.970804715965666e-05, "loss": 0.1772, "step": 4131 }, { "epoch": 0.09104981628077366, "grad_norm": 1.6233757734298706, "learning_rate": 2.9707836936528035e-05, "loss": 0.1766, "step": 4132 }, { "epoch": 0.09107185157028982, "grad_norm": 0.9426968693733215, "learning_rate": 2.970762663848451e-05, "loss": 0.1208, "step": 4133 }, { "epoch": 0.09109388685980598, "grad_norm": 1.8668895959854126, "learning_rate": 2.970741626552715e-05, "loss": 0.1342, "step": 4134 }, { "epoch": 0.09111592214932214, "grad_norm": 0.8114851117134094, "learning_rate": 2.9707205817657038e-05, "loss": 0.1254, "step": 4135 }, { "epoch": 0.0911379574388383, "grad_norm": 1.5805296897888184, "learning_rate": 2.9706995294875237e-05, "loss": 0.1477, "step": 4136 }, { "epoch": 0.09115999272835446, "grad_norm": 1.306397557258606, "learning_rate": 2.9706784697182825e-05, "loss": 0.1247, "step": 4137 }, { "epoch": 0.09118202801787062, "grad_norm": 1.708066463470459, "learning_rate": 2.9706574024580872e-05, "loss": 0.1576, "step": 4138 }, { "epoch": 0.09120406330738678, "grad_norm": 1.3391910791397095, "learning_rate": 2.9706363277070453e-05, "loss": 0.1512, "step": 4139 }, { "epoch": 0.09122609859690294, "grad_norm": 1.2479629516601562, "learning_rate": 2.9706152454652635e-05, "loss": 0.1352, "step": 4140 }, { "epoch": 0.0912481338864191, "grad_norm": 0.8948833346366882, "learning_rate": 2.9705941557328498e-05, "loss": 0.1036, "step": 4141 }, { "epoch": 0.09127016917593526, "grad_norm": 1.07700777053833, "learning_rate": 2.970573058509912e-05, "loss": 0.1042, "step": 4142 }, { "epoch": 0.09129220446545142, "grad_norm": 1.6338422298431396, "learning_rate": 2.970551953796556e-05, "loss": 0.1928, "step": 4143 }, { "epoch": 0.09131423975496758, "grad_norm": 1.3691496849060059, "learning_rate": 2.970530841592891e-05, "loss": 0.1945, "step": 4144 }, { "epoch": 0.09133627504448374, "grad_norm": 1.6449785232543945, "learning_rate": 2.9705097218990238e-05, "loss": 0.1423, "step": 4145 }, { "epoch": 0.0913583103339999, "grad_norm": 1.459526538848877, "learning_rate": 2.9704885947150622e-05, "loss": 0.1629, "step": 4146 }, { "epoch": 0.09138034562351606, "grad_norm": 1.3754475116729736, "learning_rate": 2.9704674600411132e-05, "loss": 0.155, "step": 4147 }, { "epoch": 0.09140238091303222, "grad_norm": 1.3962881565093994, "learning_rate": 2.9704463178772848e-05, "loss": 0.1379, "step": 4148 }, { "epoch": 0.09142441620254838, "grad_norm": 1.3305264711380005, "learning_rate": 2.9704251682236855e-05, "loss": 0.1199, "step": 4149 }, { "epoch": 0.09144645149206454, "grad_norm": 1.413077473640442, "learning_rate": 2.9704040110804217e-05, "loss": 0.1096, "step": 4150 }, { "epoch": 0.0914684867815807, "grad_norm": 1.6299028396606445, "learning_rate": 2.970382846447602e-05, "loss": 0.1719, "step": 4151 }, { "epoch": 0.09149052207109686, "grad_norm": 1.309860348701477, "learning_rate": 2.970361674325334e-05, "loss": 0.1353, "step": 4152 }, { "epoch": 0.09151255736061302, "grad_norm": 1.537609338760376, "learning_rate": 2.9703404947137254e-05, "loss": 0.1453, "step": 4153 }, { "epoch": 0.09153459265012918, "grad_norm": 1.4176830053329468, "learning_rate": 2.9703193076128846e-05, "loss": 0.1879, "step": 4154 }, { "epoch": 0.09155662793964534, "grad_norm": 1.7098702192306519, "learning_rate": 2.9702981130229186e-05, "loss": 0.1724, "step": 4155 }, { "epoch": 0.0915786632291615, "grad_norm": 1.649918556213379, "learning_rate": 2.9702769109439362e-05, "loss": 0.1116, "step": 4156 }, { "epoch": 0.09160069851867766, "grad_norm": 1.1246898174285889, "learning_rate": 2.9702557013760448e-05, "loss": 0.1221, "step": 4157 }, { "epoch": 0.09162273380819383, "grad_norm": 1.0720574855804443, "learning_rate": 2.9702344843193536e-05, "loss": 0.1699, "step": 4158 }, { "epoch": 0.09164476909770998, "grad_norm": 1.3850128650665283, "learning_rate": 2.9702132597739688e-05, "loss": 0.1427, "step": 4159 }, { "epoch": 0.09166680438722614, "grad_norm": 1.0804888010025024, "learning_rate": 2.9701920277399996e-05, "loss": 0.192, "step": 4160 }, { "epoch": 0.09168883967674231, "grad_norm": 1.0597010850906372, "learning_rate": 2.9701707882175544e-05, "loss": 0.1216, "step": 4161 }, { "epoch": 0.09171087496625846, "grad_norm": 1.0445525646209717, "learning_rate": 2.9701495412067404e-05, "loss": 0.1111, "step": 4162 }, { "epoch": 0.09173291025577462, "grad_norm": 2.0443832874298096, "learning_rate": 2.9701282867076668e-05, "loss": 0.1729, "step": 4163 }, { "epoch": 0.09175494554529079, "grad_norm": 1.105229377746582, "learning_rate": 2.9701070247204415e-05, "loss": 0.1159, "step": 4164 }, { "epoch": 0.09177698083480694, "grad_norm": 1.6768734455108643, "learning_rate": 2.970085755245173e-05, "loss": 0.1346, "step": 4165 }, { "epoch": 0.0917990161243231, "grad_norm": 1.0825284719467163, "learning_rate": 2.970064478281969e-05, "loss": 0.1755, "step": 4166 }, { "epoch": 0.09182105141383927, "grad_norm": 2.028141498565674, "learning_rate": 2.9700431938309386e-05, "loss": 0.1732, "step": 4167 }, { "epoch": 0.09184308670335542, "grad_norm": 1.1279646158218384, "learning_rate": 2.97002190189219e-05, "loss": 0.1324, "step": 4168 }, { "epoch": 0.09186512199287158, "grad_norm": 1.3609380722045898, "learning_rate": 2.970000602465831e-05, "loss": 0.1451, "step": 4169 }, { "epoch": 0.09188715728238775, "grad_norm": 1.0571491718292236, "learning_rate": 2.9699792955519712e-05, "loss": 0.1627, "step": 4170 }, { "epoch": 0.0919091925719039, "grad_norm": 1.4221582412719727, "learning_rate": 2.9699579811507185e-05, "loss": 0.1788, "step": 4171 }, { "epoch": 0.09193122786142006, "grad_norm": 1.2641557455062866, "learning_rate": 2.9699366592621813e-05, "loss": 0.1215, "step": 4172 }, { "epoch": 0.09195326315093623, "grad_norm": 1.056241750717163, "learning_rate": 2.9699153298864686e-05, "loss": 0.1117, "step": 4173 }, { "epoch": 0.09197529844045238, "grad_norm": 1.4859652519226074, "learning_rate": 2.969893993023689e-05, "loss": 0.1317, "step": 4174 }, { "epoch": 0.09199733372996854, "grad_norm": 1.91135573387146, "learning_rate": 2.9698726486739506e-05, "loss": 0.1037, "step": 4175 }, { "epoch": 0.09201936901948471, "grad_norm": 0.8291864991188049, "learning_rate": 2.9698512968373632e-05, "loss": 0.0993, "step": 4176 }, { "epoch": 0.09204140430900086, "grad_norm": 1.2171710729599, "learning_rate": 2.9698299375140346e-05, "loss": 0.1709, "step": 4177 }, { "epoch": 0.09206343959851702, "grad_norm": 1.1367970705032349, "learning_rate": 2.969808570704074e-05, "loss": 0.1318, "step": 4178 }, { "epoch": 0.09208547488803319, "grad_norm": 1.0211485624313354, "learning_rate": 2.9697871964075905e-05, "loss": 0.1651, "step": 4179 }, { "epoch": 0.09210751017754934, "grad_norm": 1.3890101909637451, "learning_rate": 2.9697658146246924e-05, "loss": 0.1878, "step": 4180 }, { "epoch": 0.0921295454670655, "grad_norm": 1.8527523279190063, "learning_rate": 2.9697444253554886e-05, "loss": 0.1416, "step": 4181 }, { "epoch": 0.09215158075658167, "grad_norm": 2.8979508876800537, "learning_rate": 2.9697230286000888e-05, "loss": 0.1607, "step": 4182 }, { "epoch": 0.09217361604609782, "grad_norm": 1.2263330221176147, "learning_rate": 2.969701624358601e-05, "loss": 0.1304, "step": 4183 }, { "epoch": 0.09219565133561398, "grad_norm": 1.7622133493423462, "learning_rate": 2.969680212631135e-05, "loss": 0.1697, "step": 4184 }, { "epoch": 0.09221768662513015, "grad_norm": 1.2274011373519897, "learning_rate": 2.9696587934177998e-05, "loss": 0.1703, "step": 4185 }, { "epoch": 0.0922397219146463, "grad_norm": 1.2354676723480225, "learning_rate": 2.9696373667187038e-05, "loss": 0.144, "step": 4186 }, { "epoch": 0.09226175720416246, "grad_norm": 1.548570990562439, "learning_rate": 2.9696159325339568e-05, "loss": 0.206, "step": 4187 }, { "epoch": 0.09228379249367863, "grad_norm": 1.173337697982788, "learning_rate": 2.9695944908636676e-05, "loss": 0.1289, "step": 4188 }, { "epoch": 0.09230582778319478, "grad_norm": 1.1509562730789185, "learning_rate": 2.969573041707946e-05, "loss": 0.1343, "step": 4189 }, { "epoch": 0.09232786307271094, "grad_norm": 0.6762080788612366, "learning_rate": 2.9695515850669004e-05, "loss": 0.1074, "step": 4190 }, { "epoch": 0.09234989836222711, "grad_norm": 1.3687902688980103, "learning_rate": 2.969530120940641e-05, "loss": 0.1324, "step": 4191 }, { "epoch": 0.09237193365174327, "grad_norm": 1.436623215675354, "learning_rate": 2.9695086493292762e-05, "loss": 0.1608, "step": 4192 }, { "epoch": 0.09239396894125942, "grad_norm": 1.0778757333755493, "learning_rate": 2.969487170232916e-05, "loss": 0.1161, "step": 4193 }, { "epoch": 0.09241600423077559, "grad_norm": 0.9770464897155762, "learning_rate": 2.96946568365167e-05, "loss": 0.1455, "step": 4194 }, { "epoch": 0.09243803952029175, "grad_norm": 1.4228752851486206, "learning_rate": 2.969444189585647e-05, "loss": 0.1568, "step": 4195 }, { "epoch": 0.0924600748098079, "grad_norm": 1.62016761302948, "learning_rate": 2.9694226880349567e-05, "loss": 0.2176, "step": 4196 }, { "epoch": 0.09248211009932407, "grad_norm": 1.3827300071716309, "learning_rate": 2.9694011789997087e-05, "loss": 0.1283, "step": 4197 }, { "epoch": 0.09250414538884023, "grad_norm": 1.1983085870742798, "learning_rate": 2.9693796624800127e-05, "loss": 0.1741, "step": 4198 }, { "epoch": 0.09252618067835638, "grad_norm": 1.3088997602462769, "learning_rate": 2.969358138475978e-05, "loss": 0.1465, "step": 4199 }, { "epoch": 0.09254821596787255, "grad_norm": 1.5658067464828491, "learning_rate": 2.969336606987714e-05, "loss": 0.1253, "step": 4200 }, { "epoch": 0.09257025125738871, "grad_norm": 1.5483126640319824, "learning_rate": 2.9693150680153317e-05, "loss": 0.1696, "step": 4201 }, { "epoch": 0.09259228654690486, "grad_norm": 1.2151247262954712, "learning_rate": 2.969293521558939e-05, "loss": 0.1487, "step": 4202 }, { "epoch": 0.09261432183642103, "grad_norm": 1.2234059572219849, "learning_rate": 2.9692719676186466e-05, "loss": 0.1265, "step": 4203 }, { "epoch": 0.0926363571259372, "grad_norm": 1.5084664821624756, "learning_rate": 2.9692504061945645e-05, "loss": 0.1886, "step": 4204 }, { "epoch": 0.09265839241545334, "grad_norm": 0.942454993724823, "learning_rate": 2.969228837286802e-05, "loss": 0.1346, "step": 4205 }, { "epoch": 0.09268042770496951, "grad_norm": 1.455119013786316, "learning_rate": 2.9692072608954693e-05, "loss": 0.1479, "step": 4206 }, { "epoch": 0.09270246299448567, "grad_norm": 0.940895140171051, "learning_rate": 2.9691856770206763e-05, "loss": 0.1097, "step": 4207 }, { "epoch": 0.09272449828400182, "grad_norm": 1.4470981359481812, "learning_rate": 2.9691640856625323e-05, "loss": 0.1452, "step": 4208 }, { "epoch": 0.09274653357351799, "grad_norm": 1.7590649127960205, "learning_rate": 2.9691424868211482e-05, "loss": 0.1451, "step": 4209 }, { "epoch": 0.09276856886303415, "grad_norm": 1.0454211235046387, "learning_rate": 2.9691208804966333e-05, "loss": 0.1859, "step": 4210 }, { "epoch": 0.0927906041525503, "grad_norm": 1.233324408531189, "learning_rate": 2.969099266689098e-05, "loss": 0.1268, "step": 4211 }, { "epoch": 0.09281263944206647, "grad_norm": 2.040360689163208, "learning_rate": 2.9690776453986527e-05, "loss": 0.1073, "step": 4212 }, { "epoch": 0.09283467473158263, "grad_norm": 1.6908093690872192, "learning_rate": 2.969056016625407e-05, "loss": 0.1421, "step": 4213 }, { "epoch": 0.09285671002109878, "grad_norm": 2.1368463039398193, "learning_rate": 2.969034380369471e-05, "loss": 0.1773, "step": 4214 }, { "epoch": 0.09287874531061495, "grad_norm": 5.002203464508057, "learning_rate": 2.9690127366309555e-05, "loss": 0.1397, "step": 4215 }, { "epoch": 0.09290078060013111, "grad_norm": 1.2822930812835693, "learning_rate": 2.96899108540997e-05, "loss": 0.115, "step": 4216 }, { "epoch": 0.09292281588964726, "grad_norm": 1.2057596445083618, "learning_rate": 2.9689694267066258e-05, "loss": 0.1243, "step": 4217 }, { "epoch": 0.09294485117916343, "grad_norm": 1.350618600845337, "learning_rate": 2.9689477605210317e-05, "loss": 0.1895, "step": 4218 }, { "epoch": 0.0929668864686796, "grad_norm": 1.9102470874786377, "learning_rate": 2.9689260868532996e-05, "loss": 0.1665, "step": 4219 }, { "epoch": 0.09298892175819574, "grad_norm": 1.1248260736465454, "learning_rate": 2.968904405703539e-05, "loss": 0.1174, "step": 4220 }, { "epoch": 0.09301095704771191, "grad_norm": 1.067699670791626, "learning_rate": 2.968882717071861e-05, "loss": 0.1558, "step": 4221 }, { "epoch": 0.09303299233722807, "grad_norm": 1.6549917459487915, "learning_rate": 2.968861020958375e-05, "loss": 0.1271, "step": 4222 }, { "epoch": 0.09305502762674422, "grad_norm": 1.331272006034851, "learning_rate": 2.9688393173631925e-05, "loss": 0.1299, "step": 4223 }, { "epoch": 0.09307706291626039, "grad_norm": 1.3973597288131714, "learning_rate": 2.9688176062864237e-05, "loss": 0.1181, "step": 4224 }, { "epoch": 0.09309909820577655, "grad_norm": 1.3521831035614014, "learning_rate": 2.968795887728179e-05, "loss": 0.1468, "step": 4225 }, { "epoch": 0.0931211334952927, "grad_norm": 1.2099953889846802, "learning_rate": 2.9687741616885692e-05, "loss": 0.1473, "step": 4226 }, { "epoch": 0.09314316878480887, "grad_norm": 1.9090055227279663, "learning_rate": 2.9687524281677055e-05, "loss": 0.1735, "step": 4227 }, { "epoch": 0.09316520407432503, "grad_norm": 0.9900835156440735, "learning_rate": 2.9687306871656974e-05, "loss": 0.0948, "step": 4228 }, { "epoch": 0.0931872393638412, "grad_norm": 1.4194930791854858, "learning_rate": 2.9687089386826566e-05, "loss": 0.1519, "step": 4229 }, { "epoch": 0.09320927465335735, "grad_norm": 1.4531935453414917, "learning_rate": 2.9686871827186934e-05, "loss": 0.1522, "step": 4230 }, { "epoch": 0.09323130994287351, "grad_norm": 1.3413023948669434, "learning_rate": 2.968665419273919e-05, "loss": 0.1273, "step": 4231 }, { "epoch": 0.09325334523238968, "grad_norm": 1.3741564750671387, "learning_rate": 2.968643648348444e-05, "loss": 0.1212, "step": 4232 }, { "epoch": 0.09327538052190583, "grad_norm": 1.1333428621292114, "learning_rate": 2.9686218699423795e-05, "loss": 0.1555, "step": 4233 }, { "epoch": 0.093297415811422, "grad_norm": 1.399710774421692, "learning_rate": 2.9686000840558363e-05, "loss": 0.1167, "step": 4234 }, { "epoch": 0.09331945110093816, "grad_norm": 1.181453824043274, "learning_rate": 2.9685782906889253e-05, "loss": 0.1368, "step": 4235 }, { "epoch": 0.09334148639045431, "grad_norm": 1.2257519960403442, "learning_rate": 2.968556489841757e-05, "loss": 0.1641, "step": 4236 }, { "epoch": 0.09336352167997047, "grad_norm": 1.1884102821350098, "learning_rate": 2.9685346815144438e-05, "loss": 0.1738, "step": 4237 }, { "epoch": 0.09338555696948664, "grad_norm": 1.1022474765777588, "learning_rate": 2.9685128657070957e-05, "loss": 0.1234, "step": 4238 }, { "epoch": 0.09340759225900279, "grad_norm": 0.9226749539375305, "learning_rate": 2.968491042419824e-05, "loss": 0.1242, "step": 4239 }, { "epoch": 0.09342962754851895, "grad_norm": 0.9423555731773376, "learning_rate": 2.96846921165274e-05, "loss": 0.0989, "step": 4240 }, { "epoch": 0.09345166283803512, "grad_norm": 1.4564566612243652, "learning_rate": 2.9684473734059548e-05, "loss": 0.1139, "step": 4241 }, { "epoch": 0.09347369812755127, "grad_norm": 1.1879379749298096, "learning_rate": 2.9684255276795798e-05, "loss": 0.1225, "step": 4242 }, { "epoch": 0.09349573341706743, "grad_norm": 1.0422930717468262, "learning_rate": 2.968403674473726e-05, "loss": 0.1401, "step": 4243 }, { "epoch": 0.0935177687065836, "grad_norm": 1.1604633331298828, "learning_rate": 2.968381813788505e-05, "loss": 0.1199, "step": 4244 }, { "epoch": 0.09353980399609975, "grad_norm": 1.5321110486984253, "learning_rate": 2.968359945624028e-05, "loss": 0.1519, "step": 4245 }, { "epoch": 0.09356183928561591, "grad_norm": 1.2075045108795166, "learning_rate": 2.9683380699804062e-05, "loss": 0.1181, "step": 4246 }, { "epoch": 0.09358387457513208, "grad_norm": 1.3931934833526611, "learning_rate": 2.9683161868577516e-05, "loss": 0.1173, "step": 4247 }, { "epoch": 0.09360590986464823, "grad_norm": 1.3126318454742432, "learning_rate": 2.968294296256175e-05, "loss": 0.1556, "step": 4248 }, { "epoch": 0.0936279451541644, "grad_norm": 1.6276956796646118, "learning_rate": 2.968272398175788e-05, "loss": 0.2081, "step": 4249 }, { "epoch": 0.09364998044368056, "grad_norm": 1.7913378477096558, "learning_rate": 2.9682504926167028e-05, "loss": 0.1623, "step": 4250 }, { "epoch": 0.09367201573319671, "grad_norm": 1.0910987854003906, "learning_rate": 2.96822857957903e-05, "loss": 0.1401, "step": 4251 }, { "epoch": 0.09369405102271287, "grad_norm": 1.3600879907608032, "learning_rate": 2.968206659062882e-05, "loss": 0.1361, "step": 4252 }, { "epoch": 0.09371608631222904, "grad_norm": 1.4331083297729492, "learning_rate": 2.9681847310683706e-05, "loss": 0.138, "step": 4253 }, { "epoch": 0.09373812160174519, "grad_norm": 1.0621490478515625, "learning_rate": 2.9681627955956063e-05, "loss": 0.1629, "step": 4254 }, { "epoch": 0.09376015689126135, "grad_norm": 1.1682462692260742, "learning_rate": 2.968140852644702e-05, "loss": 0.0802, "step": 4255 }, { "epoch": 0.09378219218077752, "grad_norm": 1.755866527557373, "learning_rate": 2.968118902215769e-05, "loss": 0.1756, "step": 4256 }, { "epoch": 0.09380422747029367, "grad_norm": 1.348648190498352, "learning_rate": 2.9680969443089194e-05, "loss": 0.1167, "step": 4257 }, { "epoch": 0.09382626275980983, "grad_norm": 1.5273597240447998, "learning_rate": 2.9680749789242643e-05, "loss": 0.2006, "step": 4258 }, { "epoch": 0.093848298049326, "grad_norm": 0.9524032473564148, "learning_rate": 2.9680530060619167e-05, "loss": 0.1038, "step": 4259 }, { "epoch": 0.09387033333884215, "grad_norm": 1.2587897777557373, "learning_rate": 2.9680310257219872e-05, "loss": 0.1467, "step": 4260 }, { "epoch": 0.09389236862835831, "grad_norm": 0.938700258731842, "learning_rate": 2.968009037904589e-05, "loss": 0.1358, "step": 4261 }, { "epoch": 0.09391440391787448, "grad_norm": 0.8195635080337524, "learning_rate": 2.9679870426098333e-05, "loss": 0.1094, "step": 4262 }, { "epoch": 0.09393643920739064, "grad_norm": 1.3182910680770874, "learning_rate": 2.9679650398378324e-05, "loss": 0.19, "step": 4263 }, { "epoch": 0.0939584744969068, "grad_norm": 1.2026528120040894, "learning_rate": 2.9679430295886984e-05, "loss": 0.1509, "step": 4264 }, { "epoch": 0.09398050978642296, "grad_norm": 1.2852396965026855, "learning_rate": 2.9679210118625436e-05, "loss": 0.1453, "step": 4265 }, { "epoch": 0.09400254507593912, "grad_norm": 1.1601810455322266, "learning_rate": 2.9678989866594795e-05, "loss": 0.135, "step": 4266 }, { "epoch": 0.09402458036545527, "grad_norm": 1.1295818090438843, "learning_rate": 2.9678769539796187e-05, "loss": 0.1566, "step": 4267 }, { "epoch": 0.09404661565497144, "grad_norm": 1.4905861616134644, "learning_rate": 2.967854913823074e-05, "loss": 0.141, "step": 4268 }, { "epoch": 0.0940686509444876, "grad_norm": 1.625435471534729, "learning_rate": 2.9678328661899568e-05, "loss": 0.1302, "step": 4269 }, { "epoch": 0.09409068623400375, "grad_norm": 1.5991135835647583, "learning_rate": 2.96781081108038e-05, "loss": 0.1352, "step": 4270 }, { "epoch": 0.09411272152351992, "grad_norm": 1.0017988681793213, "learning_rate": 2.967788748494455e-05, "loss": 0.1371, "step": 4271 }, { "epoch": 0.09413475681303608, "grad_norm": 1.4454938173294067, "learning_rate": 2.9677666784322954e-05, "loss": 0.1314, "step": 4272 }, { "epoch": 0.09415679210255223, "grad_norm": 1.1260815858840942, "learning_rate": 2.9677446008940122e-05, "loss": 0.1148, "step": 4273 }, { "epoch": 0.0941788273920684, "grad_norm": 1.6490544080734253, "learning_rate": 2.9677225158797196e-05, "loss": 0.1669, "step": 4274 }, { "epoch": 0.09420086268158456, "grad_norm": 1.2900620698928833, "learning_rate": 2.967700423389529e-05, "loss": 0.1781, "step": 4275 }, { "epoch": 0.09422289797110071, "grad_norm": 0.9841820001602173, "learning_rate": 2.9676783234235524e-05, "loss": 0.1336, "step": 4276 }, { "epoch": 0.09424493326061688, "grad_norm": 1.1415413618087769, "learning_rate": 2.967656215981904e-05, "loss": 0.1641, "step": 4277 }, { "epoch": 0.09426696855013304, "grad_norm": 1.2159348726272583, "learning_rate": 2.967634101064695e-05, "loss": 0.1124, "step": 4278 }, { "epoch": 0.0942890038396492, "grad_norm": 1.125894546508789, "learning_rate": 2.9676119786720387e-05, "loss": 0.1307, "step": 4279 }, { "epoch": 0.09431103912916536, "grad_norm": 1.517073154449463, "learning_rate": 2.967589848804047e-05, "loss": 0.1466, "step": 4280 }, { "epoch": 0.09433307441868152, "grad_norm": 1.368318796157837, "learning_rate": 2.967567711460834e-05, "loss": 0.1251, "step": 4281 }, { "epoch": 0.09435510970819767, "grad_norm": 1.3194609880447388, "learning_rate": 2.967545566642511e-05, "loss": 0.1507, "step": 4282 }, { "epoch": 0.09437714499771384, "grad_norm": 1.0512166023254395, "learning_rate": 2.967523414349192e-05, "loss": 0.1738, "step": 4283 }, { "epoch": 0.09439918028723, "grad_norm": 1.2822973728179932, "learning_rate": 2.9675012545809894e-05, "loss": 0.1574, "step": 4284 }, { "epoch": 0.09442121557674615, "grad_norm": 1.2217833995819092, "learning_rate": 2.9674790873380156e-05, "loss": 0.163, "step": 4285 }, { "epoch": 0.09444325086626232, "grad_norm": 1.0456230640411377, "learning_rate": 2.967456912620384e-05, "loss": 0.1564, "step": 4286 }, { "epoch": 0.09446528615577848, "grad_norm": 1.1174354553222656, "learning_rate": 2.9674347304282075e-05, "loss": 0.1212, "step": 4287 }, { "epoch": 0.09448732144529463, "grad_norm": 1.4295225143432617, "learning_rate": 2.9674125407615987e-05, "loss": 0.1806, "step": 4288 }, { "epoch": 0.0945093567348108, "grad_norm": 1.9378321170806885, "learning_rate": 2.9673903436206713e-05, "loss": 0.1535, "step": 4289 }, { "epoch": 0.09453139202432696, "grad_norm": 0.9119269847869873, "learning_rate": 2.9673681390055377e-05, "loss": 0.1164, "step": 4290 }, { "epoch": 0.09455342731384311, "grad_norm": 1.6130225658416748, "learning_rate": 2.9673459269163117e-05, "loss": 0.1684, "step": 4291 }, { "epoch": 0.09457546260335928, "grad_norm": 0.9043909311294556, "learning_rate": 2.9673237073531056e-05, "loss": 0.1198, "step": 4292 }, { "epoch": 0.09459749789287544, "grad_norm": 1.3361783027648926, "learning_rate": 2.9673014803160334e-05, "loss": 0.1266, "step": 4293 }, { "epoch": 0.0946195331823916, "grad_norm": 1.4566009044647217, "learning_rate": 2.9672792458052077e-05, "loss": 0.1738, "step": 4294 }, { "epoch": 0.09464156847190776, "grad_norm": 0.8446442484855652, "learning_rate": 2.967257003820742e-05, "loss": 0.1422, "step": 4295 }, { "epoch": 0.09466360376142392, "grad_norm": 1.1911393404006958, "learning_rate": 2.9672347543627496e-05, "loss": 0.1175, "step": 4296 }, { "epoch": 0.09468563905094007, "grad_norm": 3.0246665477752686, "learning_rate": 2.967212497431344e-05, "loss": 0.1266, "step": 4297 }, { "epoch": 0.09470767434045624, "grad_norm": 1.166644811630249, "learning_rate": 2.967190233026638e-05, "loss": 0.1059, "step": 4298 }, { "epoch": 0.0947297096299724, "grad_norm": 0.9982654452323914, "learning_rate": 2.9671679611487455e-05, "loss": 0.154, "step": 4299 }, { "epoch": 0.09475174491948857, "grad_norm": 1.4754621982574463, "learning_rate": 2.96714568179778e-05, "loss": 0.1617, "step": 4300 }, { "epoch": 0.09477378020900472, "grad_norm": 1.1115604639053345, "learning_rate": 2.9671233949738547e-05, "loss": 0.1082, "step": 4301 }, { "epoch": 0.09479581549852088, "grad_norm": 1.0483554601669312, "learning_rate": 2.967101100677083e-05, "loss": 0.1182, "step": 4302 }, { "epoch": 0.09481785078803705, "grad_norm": 1.0356664657592773, "learning_rate": 2.967078798907579e-05, "loss": 0.1385, "step": 4303 }, { "epoch": 0.0948398860775532, "grad_norm": 1.29021418094635, "learning_rate": 2.9670564896654558e-05, "loss": 0.1253, "step": 4304 }, { "epoch": 0.09486192136706936, "grad_norm": 1.3825592994689941, "learning_rate": 2.9670341729508276e-05, "loss": 0.1503, "step": 4305 }, { "epoch": 0.09488395665658553, "grad_norm": 1.1735780239105225, "learning_rate": 2.9670118487638072e-05, "loss": 0.1795, "step": 4306 }, { "epoch": 0.09490599194610168, "grad_norm": 1.3710064888000488, "learning_rate": 2.9669895171045088e-05, "loss": 0.0994, "step": 4307 }, { "epoch": 0.09492802723561784, "grad_norm": 1.2948824167251587, "learning_rate": 2.966967177973046e-05, "loss": 0.1856, "step": 4308 }, { "epoch": 0.09495006252513401, "grad_norm": 1.4926972389221191, "learning_rate": 2.966944831369533e-05, "loss": 0.1673, "step": 4309 }, { "epoch": 0.09497209781465016, "grad_norm": 1.6990371942520142, "learning_rate": 2.9669224772940833e-05, "loss": 0.1591, "step": 4310 }, { "epoch": 0.09499413310416632, "grad_norm": 1.3357582092285156, "learning_rate": 2.966900115746811e-05, "loss": 0.1438, "step": 4311 }, { "epoch": 0.09501616839368249, "grad_norm": 1.460811972618103, "learning_rate": 2.9668777467278295e-05, "loss": 0.1305, "step": 4312 }, { "epoch": 0.09503820368319864, "grad_norm": 1.2186665534973145, "learning_rate": 2.966855370237253e-05, "loss": 0.1496, "step": 4313 }, { "epoch": 0.0950602389727148, "grad_norm": 1.557902455329895, "learning_rate": 2.966832986275196e-05, "loss": 0.112, "step": 4314 }, { "epoch": 0.09508227426223097, "grad_norm": 1.8686095476150513, "learning_rate": 2.966810594841771e-05, "loss": 0.152, "step": 4315 }, { "epoch": 0.09510430955174712, "grad_norm": 1.8191701173782349, "learning_rate": 2.9667881959370938e-05, "loss": 0.1386, "step": 4316 }, { "epoch": 0.09512634484126328, "grad_norm": 0.9333981871604919, "learning_rate": 2.966765789561278e-05, "loss": 0.0953, "step": 4317 }, { "epoch": 0.09514838013077945, "grad_norm": 1.7608001232147217, "learning_rate": 2.9667433757144368e-05, "loss": 0.1263, "step": 4318 }, { "epoch": 0.0951704154202956, "grad_norm": 1.216834545135498, "learning_rate": 2.9667209543966857e-05, "loss": 0.1237, "step": 4319 }, { "epoch": 0.09519245070981176, "grad_norm": 1.433741569519043, "learning_rate": 2.966698525608138e-05, "loss": 0.2052, "step": 4320 }, { "epoch": 0.09521448599932793, "grad_norm": 1.5946850776672363, "learning_rate": 2.9666760893489078e-05, "loss": 0.1343, "step": 4321 }, { "epoch": 0.09523652128884408, "grad_norm": 1.1261920928955078, "learning_rate": 2.96665364561911e-05, "loss": 0.0921, "step": 4322 }, { "epoch": 0.09525855657836024, "grad_norm": 0.9143988490104675, "learning_rate": 2.966631194418859e-05, "loss": 0.1304, "step": 4323 }, { "epoch": 0.09528059186787641, "grad_norm": 1.1856927871704102, "learning_rate": 2.9666087357482688e-05, "loss": 0.1087, "step": 4324 }, { "epoch": 0.09530262715739256, "grad_norm": 0.9425997734069824, "learning_rate": 2.9665862696074538e-05, "loss": 0.1692, "step": 4325 }, { "epoch": 0.09532466244690872, "grad_norm": 0.9237701892852783, "learning_rate": 2.9665637959965286e-05, "loss": 0.162, "step": 4326 }, { "epoch": 0.09534669773642489, "grad_norm": 1.574305534362793, "learning_rate": 2.9665413149156073e-05, "loss": 0.1334, "step": 4327 }, { "epoch": 0.09536873302594104, "grad_norm": 1.5510807037353516, "learning_rate": 2.966518826364805e-05, "loss": 0.1561, "step": 4328 }, { "epoch": 0.0953907683154572, "grad_norm": 1.0692670345306396, "learning_rate": 2.966496330344235e-05, "loss": 0.1548, "step": 4329 }, { "epoch": 0.09541280360497337, "grad_norm": 1.3920978307724, "learning_rate": 2.966473826854014e-05, "loss": 0.123, "step": 4330 }, { "epoch": 0.09543483889448952, "grad_norm": 1.2900160551071167, "learning_rate": 2.9664513158942547e-05, "loss": 0.1408, "step": 4331 }, { "epoch": 0.09545687418400568, "grad_norm": 1.244904637336731, "learning_rate": 2.9664287974650727e-05, "loss": 0.1191, "step": 4332 }, { "epoch": 0.09547890947352185, "grad_norm": 0.8732189536094666, "learning_rate": 2.9664062715665824e-05, "loss": 0.1495, "step": 4333 }, { "epoch": 0.09550094476303801, "grad_norm": 1.083109736442566, "learning_rate": 2.9663837381988986e-05, "loss": 0.1884, "step": 4334 }, { "epoch": 0.09552298005255416, "grad_norm": 0.8832723498344421, "learning_rate": 2.9663611973621362e-05, "loss": 0.1018, "step": 4335 }, { "epoch": 0.09554501534207033, "grad_norm": 2.4945473670959473, "learning_rate": 2.9663386490564098e-05, "loss": 0.2027, "step": 4336 }, { "epoch": 0.09556705063158649, "grad_norm": 1.6998729705810547, "learning_rate": 2.966316093281834e-05, "loss": 0.1456, "step": 4337 }, { "epoch": 0.09558908592110264, "grad_norm": 2.533834457397461, "learning_rate": 2.9662935300385245e-05, "loss": 0.1852, "step": 4338 }, { "epoch": 0.09561112121061881, "grad_norm": 1.731974720954895, "learning_rate": 2.9662709593265957e-05, "loss": 0.1332, "step": 4339 }, { "epoch": 0.09563315650013497, "grad_norm": 1.0229668617248535, "learning_rate": 2.9662483811461625e-05, "loss": 0.1326, "step": 4340 }, { "epoch": 0.09565519178965112, "grad_norm": 1.1337999105453491, "learning_rate": 2.9662257954973403e-05, "loss": 0.1604, "step": 4341 }, { "epoch": 0.09567722707916729, "grad_norm": 1.225623607635498, "learning_rate": 2.9662032023802435e-05, "loss": 0.1981, "step": 4342 }, { "epoch": 0.09569926236868345, "grad_norm": 0.9814364314079285, "learning_rate": 2.966180601794988e-05, "loss": 0.1038, "step": 4343 }, { "epoch": 0.0957212976581996, "grad_norm": 1.4049209356307983, "learning_rate": 2.9661579937416882e-05, "loss": 0.1319, "step": 4344 }, { "epoch": 0.09574333294771577, "grad_norm": 1.5497900247573853, "learning_rate": 2.9661353782204598e-05, "loss": 0.1101, "step": 4345 }, { "epoch": 0.09576536823723193, "grad_norm": 1.2670903205871582, "learning_rate": 2.966112755231417e-05, "loss": 0.1119, "step": 4346 }, { "epoch": 0.09578740352674808, "grad_norm": 1.1329851150512695, "learning_rate": 2.9660901247746765e-05, "loss": 0.1613, "step": 4347 }, { "epoch": 0.09580943881626425, "grad_norm": 1.0275830030441284, "learning_rate": 2.9660674868503522e-05, "loss": 0.1262, "step": 4348 }, { "epoch": 0.09583147410578041, "grad_norm": 1.495768427848816, "learning_rate": 2.96604484145856e-05, "loss": 0.1582, "step": 4349 }, { "epoch": 0.09585350939529656, "grad_norm": 1.2110185623168945, "learning_rate": 2.966022188599416e-05, "loss": 0.1537, "step": 4350 }, { "epoch": 0.09587554468481273, "grad_norm": 1.675955057144165, "learning_rate": 2.9659995282730342e-05, "loss": 0.1416, "step": 4351 }, { "epoch": 0.09589757997432889, "grad_norm": 1.011019229888916, "learning_rate": 2.965976860479531e-05, "loss": 0.1136, "step": 4352 }, { "epoch": 0.09591961526384504, "grad_norm": 1.5575001239776611, "learning_rate": 2.9659541852190214e-05, "loss": 0.2237, "step": 4353 }, { "epoch": 0.09594165055336121, "grad_norm": 1.0337474346160889, "learning_rate": 2.9659315024916213e-05, "loss": 0.1329, "step": 4354 }, { "epoch": 0.09596368584287737, "grad_norm": 4.4801764488220215, "learning_rate": 2.9659088122974455e-05, "loss": 0.1479, "step": 4355 }, { "epoch": 0.09598572113239352, "grad_norm": 3.4218461513519287, "learning_rate": 2.9658861146366106e-05, "loss": 0.1936, "step": 4356 }, { "epoch": 0.09600775642190969, "grad_norm": 3.967980146408081, "learning_rate": 2.965863409509231e-05, "loss": 0.1713, "step": 4357 }, { "epoch": 0.09602979171142585, "grad_norm": 1.4628760814666748, "learning_rate": 2.9658406969154234e-05, "loss": 0.1766, "step": 4358 }, { "epoch": 0.096051827000942, "grad_norm": 1.1417440176010132, "learning_rate": 2.965817976855303e-05, "loss": 0.188, "step": 4359 }, { "epoch": 0.09607386229045817, "grad_norm": 15.594602584838867, "learning_rate": 2.9657952493289856e-05, "loss": 0.1697, "step": 4360 }, { "epoch": 0.09609589757997433, "grad_norm": 3.822144031524658, "learning_rate": 2.965772514336587e-05, "loss": 0.1294, "step": 4361 }, { "epoch": 0.09611793286949048, "grad_norm": 1.7516826391220093, "learning_rate": 2.965749771878223e-05, "loss": 0.1761, "step": 4362 }, { "epoch": 0.09613996815900665, "grad_norm": 1.9207724332809448, "learning_rate": 2.9657270219540094e-05, "loss": 0.1441, "step": 4363 }, { "epoch": 0.09616200344852281, "grad_norm": 3.570998430252075, "learning_rate": 2.9657042645640617e-05, "loss": 0.144, "step": 4364 }, { "epoch": 0.09618403873803896, "grad_norm": 1.1877175569534302, "learning_rate": 2.9656814997084968e-05, "loss": 0.1636, "step": 4365 }, { "epoch": 0.09620607402755513, "grad_norm": 1.991635799407959, "learning_rate": 2.9656587273874298e-05, "loss": 0.1773, "step": 4366 }, { "epoch": 0.09622810931707129, "grad_norm": 1.0779283046722412, "learning_rate": 2.9656359476009773e-05, "loss": 0.1299, "step": 4367 }, { "epoch": 0.09625014460658746, "grad_norm": 2.370695114135742, "learning_rate": 2.9656131603492544e-05, "loss": 0.1913, "step": 4368 }, { "epoch": 0.09627217989610361, "grad_norm": 1.7319204807281494, "learning_rate": 2.965590365632378e-05, "loss": 0.1369, "step": 4369 }, { "epoch": 0.09629421518561977, "grad_norm": 1.7489066123962402, "learning_rate": 2.9655675634504642e-05, "loss": 0.1534, "step": 4370 }, { "epoch": 0.09631625047513594, "grad_norm": 0.7599877715110779, "learning_rate": 2.9655447538036283e-05, "loss": 0.1032, "step": 4371 }, { "epoch": 0.09633828576465209, "grad_norm": 1.629650354385376, "learning_rate": 2.9655219366919876e-05, "loss": 0.1339, "step": 4372 }, { "epoch": 0.09636032105416825, "grad_norm": 1.230745553970337, "learning_rate": 2.965499112115658e-05, "loss": 0.1588, "step": 4373 }, { "epoch": 0.09638235634368442, "grad_norm": 1.4680615663528442, "learning_rate": 2.965476280074755e-05, "loss": 0.1351, "step": 4374 }, { "epoch": 0.09640439163320057, "grad_norm": 1.0329725742340088, "learning_rate": 2.9654534405693957e-05, "loss": 0.1494, "step": 4375 }, { "epoch": 0.09642642692271673, "grad_norm": 0.9047031402587891, "learning_rate": 2.9654305935996962e-05, "loss": 0.1271, "step": 4376 }, { "epoch": 0.0964484622122329, "grad_norm": 1.416498064994812, "learning_rate": 2.9654077391657726e-05, "loss": 0.1338, "step": 4377 }, { "epoch": 0.09647049750174905, "grad_norm": 1.6931761503219604, "learning_rate": 2.965384877267742e-05, "loss": 0.1241, "step": 4378 }, { "epoch": 0.09649253279126521, "grad_norm": 2.5604794025421143, "learning_rate": 2.9653620079057198e-05, "loss": 0.171, "step": 4379 }, { "epoch": 0.09651456808078138, "grad_norm": 1.6250576972961426, "learning_rate": 2.9653391310798237e-05, "loss": 0.1423, "step": 4380 }, { "epoch": 0.09653660337029753, "grad_norm": 1.2389519214630127, "learning_rate": 2.9653162467901693e-05, "loss": 0.1381, "step": 4381 }, { "epoch": 0.09655863865981369, "grad_norm": 1.0298978090286255, "learning_rate": 2.9652933550368737e-05, "loss": 0.1227, "step": 4382 }, { "epoch": 0.09658067394932986, "grad_norm": 1.4255914688110352, "learning_rate": 2.9652704558200536e-05, "loss": 0.189, "step": 4383 }, { "epoch": 0.09660270923884601, "grad_norm": 0.967198371887207, "learning_rate": 2.965247549139825e-05, "loss": 0.1513, "step": 4384 }, { "epoch": 0.09662474452836217, "grad_norm": 1.1261470317840576, "learning_rate": 2.9652246349963046e-05, "loss": 0.1625, "step": 4385 }, { "epoch": 0.09664677981787834, "grad_norm": 0.9627465605735779, "learning_rate": 2.9652017133896095e-05, "loss": 0.1505, "step": 4386 }, { "epoch": 0.09666881510739449, "grad_norm": 2.3068645000457764, "learning_rate": 2.9651787843198565e-05, "loss": 0.1803, "step": 4387 }, { "epoch": 0.09669085039691065, "grad_norm": 1.12228524684906, "learning_rate": 2.9651558477871626e-05, "loss": 0.1064, "step": 4388 }, { "epoch": 0.09671288568642682, "grad_norm": 1.5595908164978027, "learning_rate": 2.965132903791644e-05, "loss": 0.175, "step": 4389 }, { "epoch": 0.09673492097594297, "grad_norm": 1.6822786331176758, "learning_rate": 2.9651099523334175e-05, "loss": 0.1512, "step": 4390 }, { "epoch": 0.09675695626545913, "grad_norm": 1.924809455871582, "learning_rate": 2.965086993412601e-05, "loss": 0.1773, "step": 4391 }, { "epoch": 0.0967789915549753, "grad_norm": 0.7420297265052795, "learning_rate": 2.9650640270293102e-05, "loss": 0.1452, "step": 4392 }, { "epoch": 0.09680102684449145, "grad_norm": 1.790756106376648, "learning_rate": 2.965041053183663e-05, "loss": 0.1372, "step": 4393 }, { "epoch": 0.09682306213400761, "grad_norm": 0.9119150638580322, "learning_rate": 2.965018071875776e-05, "loss": 0.1586, "step": 4394 }, { "epoch": 0.09684509742352378, "grad_norm": 1.0082453489303589, "learning_rate": 2.9649950831057668e-05, "loss": 0.1335, "step": 4395 }, { "epoch": 0.09686713271303993, "grad_norm": 1.2498626708984375, "learning_rate": 2.9649720868737516e-05, "loss": 0.1023, "step": 4396 }, { "epoch": 0.09688916800255609, "grad_norm": 1.5294485092163086, "learning_rate": 2.9649490831798478e-05, "loss": 0.1259, "step": 4397 }, { "epoch": 0.09691120329207226, "grad_norm": 1.3243873119354248, "learning_rate": 2.9649260720241732e-05, "loss": 0.1692, "step": 4398 }, { "epoch": 0.09693323858158841, "grad_norm": 1.1755908727645874, "learning_rate": 2.9649030534068445e-05, "loss": 0.1356, "step": 4399 }, { "epoch": 0.09695527387110457, "grad_norm": 1.623102068901062, "learning_rate": 2.9648800273279788e-05, "loss": 0.142, "step": 4400 }, { "epoch": 0.09697730916062074, "grad_norm": 1.7284061908721924, "learning_rate": 2.9648569937876937e-05, "loss": 0.1453, "step": 4401 }, { "epoch": 0.09699934445013689, "grad_norm": 2.167863368988037, "learning_rate": 2.9648339527861062e-05, "loss": 0.1449, "step": 4402 }, { "epoch": 0.09702137973965305, "grad_norm": 1.3906171321868896, "learning_rate": 2.9648109043233344e-05, "loss": 0.1558, "step": 4403 }, { "epoch": 0.09704341502916922, "grad_norm": 1.851029634475708, "learning_rate": 2.9647878483994946e-05, "loss": 0.1618, "step": 4404 }, { "epoch": 0.09706545031868538, "grad_norm": 1.6431761980056763, "learning_rate": 2.964764785014705e-05, "loss": 0.129, "step": 4405 }, { "epoch": 0.09708748560820153, "grad_norm": 1.3770818710327148, "learning_rate": 2.9647417141690828e-05, "loss": 0.1741, "step": 4406 }, { "epoch": 0.0971095208977177, "grad_norm": 1.3700560331344604, "learning_rate": 2.9647186358627456e-05, "loss": 0.1374, "step": 4407 }, { "epoch": 0.09713155618723386, "grad_norm": 1.3701874017715454, "learning_rate": 2.964695550095811e-05, "loss": 0.1457, "step": 4408 }, { "epoch": 0.09715359147675001, "grad_norm": 1.162668228149414, "learning_rate": 2.964672456868397e-05, "loss": 0.1463, "step": 4409 }, { "epoch": 0.09717562676626618, "grad_norm": 2.2665047645568848, "learning_rate": 2.9646493561806202e-05, "loss": 0.1633, "step": 4410 }, { "epoch": 0.09719766205578234, "grad_norm": 1.2998325824737549, "learning_rate": 2.9646262480325988e-05, "loss": 0.1024, "step": 4411 }, { "epoch": 0.09721969734529849, "grad_norm": 1.9078729152679443, "learning_rate": 2.964603132424451e-05, "loss": 0.1468, "step": 4412 }, { "epoch": 0.09724173263481466, "grad_norm": 1.152191162109375, "learning_rate": 2.9645800093562937e-05, "loss": 0.107, "step": 4413 }, { "epoch": 0.09726376792433082, "grad_norm": 1.5358396768569946, "learning_rate": 2.964556878828245e-05, "loss": 0.1837, "step": 4414 }, { "epoch": 0.09728580321384697, "grad_norm": 0.9751743078231812, "learning_rate": 2.964533740840423e-05, "loss": 0.1143, "step": 4415 }, { "epoch": 0.09730783850336314, "grad_norm": 1.2620445489883423, "learning_rate": 2.964510595392945e-05, "loss": 0.1211, "step": 4416 }, { "epoch": 0.0973298737928793, "grad_norm": 1.549464464187622, "learning_rate": 2.9644874424859296e-05, "loss": 0.1602, "step": 4417 }, { "epoch": 0.09735190908239545, "grad_norm": 1.6708498001098633, "learning_rate": 2.964464282119494e-05, "loss": 0.1613, "step": 4418 }, { "epoch": 0.09737394437191162, "grad_norm": 1.1076580286026, "learning_rate": 2.9644411142937562e-05, "loss": 0.1719, "step": 4419 }, { "epoch": 0.09739597966142778, "grad_norm": 1.5158671140670776, "learning_rate": 2.964417939008835e-05, "loss": 0.1122, "step": 4420 }, { "epoch": 0.09741801495094393, "grad_norm": 1.8250137567520142, "learning_rate": 2.964394756264848e-05, "loss": 0.1599, "step": 4421 }, { "epoch": 0.0974400502404601, "grad_norm": 1.3976383209228516, "learning_rate": 2.9643715660619132e-05, "loss": 0.1443, "step": 4422 }, { "epoch": 0.09746208552997626, "grad_norm": 1.6622679233551025, "learning_rate": 2.9643483684001485e-05, "loss": 0.1263, "step": 4423 }, { "epoch": 0.09748412081949241, "grad_norm": 1.041143536567688, "learning_rate": 2.9643251632796726e-05, "loss": 0.1342, "step": 4424 }, { "epoch": 0.09750615610900858, "grad_norm": 1.1477783918380737, "learning_rate": 2.964301950700603e-05, "loss": 0.1308, "step": 4425 }, { "epoch": 0.09752819139852474, "grad_norm": 1.2793519496917725, "learning_rate": 2.964278730663059e-05, "loss": 0.1274, "step": 4426 }, { "epoch": 0.09755022668804089, "grad_norm": 1.6862133741378784, "learning_rate": 2.9642555031671575e-05, "loss": 0.1556, "step": 4427 }, { "epoch": 0.09757226197755706, "grad_norm": 1.7394464015960693, "learning_rate": 2.964232268213018e-05, "loss": 0.1538, "step": 4428 }, { "epoch": 0.09759429726707322, "grad_norm": 0.6582152247428894, "learning_rate": 2.9642090258007582e-05, "loss": 0.1375, "step": 4429 }, { "epoch": 0.09761633255658937, "grad_norm": 1.033286690711975, "learning_rate": 2.9641857759304966e-05, "loss": 0.1207, "step": 4430 }, { "epoch": 0.09763836784610554, "grad_norm": 0.9833794236183167, "learning_rate": 2.964162518602352e-05, "loss": 0.1292, "step": 4431 }, { "epoch": 0.0976604031356217, "grad_norm": 1.0270094871520996, "learning_rate": 2.9641392538164425e-05, "loss": 0.1269, "step": 4432 }, { "epoch": 0.09768243842513785, "grad_norm": 1.4375903606414795, "learning_rate": 2.9641159815728864e-05, "loss": 0.1305, "step": 4433 }, { "epoch": 0.09770447371465402, "grad_norm": 1.2253410816192627, "learning_rate": 2.9640927018718022e-05, "loss": 0.1808, "step": 4434 }, { "epoch": 0.09772650900417018, "grad_norm": 0.8869456648826599, "learning_rate": 2.9640694147133095e-05, "loss": 0.1449, "step": 4435 }, { "epoch": 0.09774854429368633, "grad_norm": 1.0591261386871338, "learning_rate": 2.964046120097526e-05, "loss": 0.1481, "step": 4436 }, { "epoch": 0.0977705795832025, "grad_norm": 1.202844500541687, "learning_rate": 2.96402281802457e-05, "loss": 0.1342, "step": 4437 }, { "epoch": 0.09779261487271866, "grad_norm": 1.437991738319397, "learning_rate": 2.963999508494561e-05, "loss": 0.1503, "step": 4438 }, { "epoch": 0.09781465016223483, "grad_norm": 0.8922904133796692, "learning_rate": 2.9639761915076175e-05, "loss": 0.1124, "step": 4439 }, { "epoch": 0.09783668545175098, "grad_norm": 1.5109285116195679, "learning_rate": 2.963952867063858e-05, "loss": 0.2083, "step": 4440 }, { "epoch": 0.09785872074126714, "grad_norm": 1.1942808628082275, "learning_rate": 2.9639295351634018e-05, "loss": 0.1533, "step": 4441 }, { "epoch": 0.0978807560307833, "grad_norm": 1.4903454780578613, "learning_rate": 2.9639061958063675e-05, "loss": 0.1627, "step": 4442 }, { "epoch": 0.09790279132029946, "grad_norm": 0.8896254301071167, "learning_rate": 2.9638828489928737e-05, "loss": 0.1209, "step": 4443 }, { "epoch": 0.09792482660981562, "grad_norm": 1.0523099899291992, "learning_rate": 2.9638594947230397e-05, "loss": 0.1567, "step": 4444 }, { "epoch": 0.09794686189933179, "grad_norm": 1.0073118209838867, "learning_rate": 2.9638361329969844e-05, "loss": 0.1314, "step": 4445 }, { "epoch": 0.09796889718884794, "grad_norm": 1.3074694871902466, "learning_rate": 2.9638127638148264e-05, "loss": 0.1079, "step": 4446 }, { "epoch": 0.0979909324783641, "grad_norm": 0.6082709431648254, "learning_rate": 2.9637893871766852e-05, "loss": 0.1273, "step": 4447 }, { "epoch": 0.09801296776788027, "grad_norm": 1.1389362812042236, "learning_rate": 2.9637660030826798e-05, "loss": 0.1504, "step": 4448 }, { "epoch": 0.09803500305739642, "grad_norm": 1.7915704250335693, "learning_rate": 2.963742611532929e-05, "loss": 0.179, "step": 4449 }, { "epoch": 0.09805703834691258, "grad_norm": 1.053702712059021, "learning_rate": 2.9637192125275522e-05, "loss": 0.1703, "step": 4450 }, { "epoch": 0.09807907363642875, "grad_norm": 1.4979901313781738, "learning_rate": 2.9636958060666688e-05, "loss": 0.1508, "step": 4451 }, { "epoch": 0.0981011089259449, "grad_norm": 1.216981053352356, "learning_rate": 2.9636723921503975e-05, "loss": 0.1786, "step": 4452 }, { "epoch": 0.09812314421546106, "grad_norm": 0.9184147119522095, "learning_rate": 2.963648970778858e-05, "loss": 0.1338, "step": 4453 }, { "epoch": 0.09814517950497723, "grad_norm": 2.03924298286438, "learning_rate": 2.9636255419521693e-05, "loss": 0.1694, "step": 4454 }, { "epoch": 0.09816721479449338, "grad_norm": 1.1469900608062744, "learning_rate": 2.963602105670451e-05, "loss": 0.1176, "step": 4455 }, { "epoch": 0.09818925008400954, "grad_norm": 1.70551598072052, "learning_rate": 2.963578661933822e-05, "loss": 0.1845, "step": 4456 }, { "epoch": 0.0982112853735257, "grad_norm": 2.2483508586883545, "learning_rate": 2.9635552107424025e-05, "loss": 0.1871, "step": 4457 }, { "epoch": 0.09823332066304186, "grad_norm": 1.5385500192642212, "learning_rate": 2.963531752096311e-05, "loss": 0.1742, "step": 4458 }, { "epoch": 0.09825535595255802, "grad_norm": 1.14451265335083, "learning_rate": 2.9635082859956675e-05, "loss": 0.1521, "step": 4459 }, { "epoch": 0.09827739124207419, "grad_norm": 0.9677631258964539, "learning_rate": 2.963484812440592e-05, "loss": 0.1198, "step": 4460 }, { "epoch": 0.09829942653159034, "grad_norm": 0.9407653212547302, "learning_rate": 2.9634613314312034e-05, "loss": 0.1319, "step": 4461 }, { "epoch": 0.0983214618211065, "grad_norm": 1.1819298267364502, "learning_rate": 2.9634378429676218e-05, "loss": 0.1218, "step": 4462 }, { "epoch": 0.09834349711062267, "grad_norm": 0.9873175621032715, "learning_rate": 2.9634143470499656e-05, "loss": 0.1437, "step": 4463 }, { "epoch": 0.09836553240013882, "grad_norm": 1.256596326828003, "learning_rate": 2.963390843678356e-05, "loss": 0.1189, "step": 4464 }, { "epoch": 0.09838756768965498, "grad_norm": 1.0832256078720093, "learning_rate": 2.963367332852912e-05, "loss": 0.1608, "step": 4465 }, { "epoch": 0.09840960297917115, "grad_norm": 1.1022288799285889, "learning_rate": 2.963343814573753e-05, "loss": 0.1132, "step": 4466 }, { "epoch": 0.0984316382686873, "grad_norm": 1.4461175203323364, "learning_rate": 2.963320288841e-05, "loss": 0.1538, "step": 4467 }, { "epoch": 0.09845367355820346, "grad_norm": 0.7121356725692749, "learning_rate": 2.9632967556547716e-05, "loss": 0.1235, "step": 4468 }, { "epoch": 0.09847570884771963, "grad_norm": 1.3774373531341553, "learning_rate": 2.9632732150151887e-05, "loss": 0.1124, "step": 4469 }, { "epoch": 0.09849774413723578, "grad_norm": 1.3012809753417969, "learning_rate": 2.96324966692237e-05, "loss": 0.1408, "step": 4470 }, { "epoch": 0.09851977942675194, "grad_norm": 1.3429338932037354, "learning_rate": 2.9632261113764366e-05, "loss": 0.1158, "step": 4471 }, { "epoch": 0.0985418147162681, "grad_norm": 1.5153281688690186, "learning_rate": 2.963202548377507e-05, "loss": 0.1631, "step": 4472 }, { "epoch": 0.09856385000578427, "grad_norm": 1.550444483757019, "learning_rate": 2.9631789779257033e-05, "loss": 0.1508, "step": 4473 }, { "epoch": 0.09858588529530042, "grad_norm": 1.2390258312225342, "learning_rate": 2.963155400021144e-05, "loss": 0.1502, "step": 4474 }, { "epoch": 0.09860792058481659, "grad_norm": 1.6442797183990479, "learning_rate": 2.9631318146639494e-05, "loss": 0.1738, "step": 4475 }, { "epoch": 0.09862995587433275, "grad_norm": 1.3247169256210327, "learning_rate": 2.9631082218542404e-05, "loss": 0.1263, "step": 4476 }, { "epoch": 0.0986519911638489, "grad_norm": 1.9226198196411133, "learning_rate": 2.9630846215921363e-05, "loss": 0.1602, "step": 4477 }, { "epoch": 0.09867402645336507, "grad_norm": 1.3400660753250122, "learning_rate": 2.9630610138777578e-05, "loss": 0.1293, "step": 4478 }, { "epoch": 0.09869606174288123, "grad_norm": 1.756956934928894, "learning_rate": 2.963037398711225e-05, "loss": 0.1382, "step": 4479 }, { "epoch": 0.09871809703239738, "grad_norm": 1.4944043159484863, "learning_rate": 2.963013776092658e-05, "loss": 0.1646, "step": 4480 }, { "epoch": 0.09874013232191355, "grad_norm": 1.8856302499771118, "learning_rate": 2.9629901460221777e-05, "loss": 0.1043, "step": 4481 }, { "epoch": 0.09876216761142971, "grad_norm": 1.0976743698120117, "learning_rate": 2.9629665084999037e-05, "loss": 0.1238, "step": 4482 }, { "epoch": 0.09878420290094586, "grad_norm": 2.8699958324432373, "learning_rate": 2.9629428635259567e-05, "loss": 0.1403, "step": 4483 }, { "epoch": 0.09880623819046203, "grad_norm": 1.6413533687591553, "learning_rate": 2.962919211100457e-05, "loss": 0.1828, "step": 4484 }, { "epoch": 0.09882827347997819, "grad_norm": 1.2457009553909302, "learning_rate": 2.9628955512235264e-05, "loss": 0.1398, "step": 4485 }, { "epoch": 0.09885030876949434, "grad_norm": 1.1238421201705933, "learning_rate": 2.962871883895283e-05, "loss": 0.1118, "step": 4486 }, { "epoch": 0.0988723440590105, "grad_norm": 1.0404186248779297, "learning_rate": 2.9628482091158496e-05, "loss": 0.1685, "step": 4487 }, { "epoch": 0.09889437934852667, "grad_norm": 0.9635049104690552, "learning_rate": 2.9628245268853453e-05, "loss": 0.1434, "step": 4488 }, { "epoch": 0.09891641463804282, "grad_norm": 2.8371336460113525, "learning_rate": 2.9628008372038914e-05, "loss": 0.1423, "step": 4489 }, { "epoch": 0.09893844992755899, "grad_norm": 1.6195720434188843, "learning_rate": 2.9627771400716083e-05, "loss": 0.1473, "step": 4490 }, { "epoch": 0.09896048521707515, "grad_norm": 1.0703338384628296, "learning_rate": 2.9627534354886168e-05, "loss": 0.1306, "step": 4491 }, { "epoch": 0.0989825205065913, "grad_norm": 1.4189814329147339, "learning_rate": 2.962729723455038e-05, "loss": 0.191, "step": 4492 }, { "epoch": 0.09900455579610747, "grad_norm": 1.4882780313491821, "learning_rate": 2.9627060039709918e-05, "loss": 0.124, "step": 4493 }, { "epoch": 0.09902659108562363, "grad_norm": 1.2254365682601929, "learning_rate": 2.9626822770366e-05, "loss": 0.174, "step": 4494 }, { "epoch": 0.09904862637513978, "grad_norm": 2.5673017501831055, "learning_rate": 2.962658542651983e-05, "loss": 0.2092, "step": 4495 }, { "epoch": 0.09907066166465595, "grad_norm": 2.0937626361846924, "learning_rate": 2.9626348008172614e-05, "loss": 0.1712, "step": 4496 }, { "epoch": 0.09909269695417211, "grad_norm": 1.2637770175933838, "learning_rate": 2.9626110515325567e-05, "loss": 0.1529, "step": 4497 }, { "epoch": 0.09911473224368826, "grad_norm": 1.423126220703125, "learning_rate": 2.96258729479799e-05, "loss": 0.1684, "step": 4498 }, { "epoch": 0.09913676753320443, "grad_norm": 1.2879669666290283, "learning_rate": 2.962563530613681e-05, "loss": 0.1708, "step": 4499 }, { "epoch": 0.09915880282272059, "grad_norm": 1.0913015604019165, "learning_rate": 2.962539758979752e-05, "loss": 0.1582, "step": 4500 }, { "epoch": 0.09918083811223674, "grad_norm": 1.3093211650848389, "learning_rate": 2.9625159798963238e-05, "loss": 0.159, "step": 4501 }, { "epoch": 0.0992028734017529, "grad_norm": 1.4442108869552612, "learning_rate": 2.9624921933635177e-05, "loss": 0.1241, "step": 4502 }, { "epoch": 0.09922490869126907, "grad_norm": 1.21658194065094, "learning_rate": 2.9624683993814543e-05, "loss": 0.1104, "step": 4503 }, { "epoch": 0.09924694398078522, "grad_norm": 1.0692682266235352, "learning_rate": 2.962444597950255e-05, "loss": 0.0927, "step": 4504 }, { "epoch": 0.09926897927030139, "grad_norm": 1.1667393445968628, "learning_rate": 2.9624207890700412e-05, "loss": 0.1617, "step": 4505 }, { "epoch": 0.09929101455981755, "grad_norm": 1.4199634790420532, "learning_rate": 2.9623969727409345e-05, "loss": 0.1425, "step": 4506 }, { "epoch": 0.0993130498493337, "grad_norm": 0.9378330707550049, "learning_rate": 2.9623731489630557e-05, "loss": 0.1212, "step": 4507 }, { "epoch": 0.09933508513884987, "grad_norm": 1.066419005393982, "learning_rate": 2.962349317736526e-05, "loss": 0.1524, "step": 4508 }, { "epoch": 0.09935712042836603, "grad_norm": 1.143380880355835, "learning_rate": 2.962325479061467e-05, "loss": 0.1559, "step": 4509 }, { "epoch": 0.0993791557178822, "grad_norm": 1.0179474353790283, "learning_rate": 2.9623016329380005e-05, "loss": 0.1313, "step": 4510 }, { "epoch": 0.09940119100739835, "grad_norm": 1.3424745798110962, "learning_rate": 2.9622777793662473e-05, "loss": 0.1164, "step": 4511 }, { "epoch": 0.09942322629691451, "grad_norm": 1.3432841300964355, "learning_rate": 2.9622539183463294e-05, "loss": 0.1198, "step": 4512 }, { "epoch": 0.09944526158643067, "grad_norm": 1.7638475894927979, "learning_rate": 2.962230049878368e-05, "loss": 0.1329, "step": 4513 }, { "epoch": 0.09946729687594683, "grad_norm": 1.2356292009353638, "learning_rate": 2.9622061739624852e-05, "loss": 0.1337, "step": 4514 }, { "epoch": 0.09948933216546299, "grad_norm": 0.9092584848403931, "learning_rate": 2.962182290598802e-05, "loss": 0.1242, "step": 4515 }, { "epoch": 0.09951136745497915, "grad_norm": 1.1158872842788696, "learning_rate": 2.9621583997874402e-05, "loss": 0.1481, "step": 4516 }, { "epoch": 0.0995334027444953, "grad_norm": 1.4072504043579102, "learning_rate": 2.962134501528522e-05, "loss": 0.1056, "step": 4517 }, { "epoch": 0.09955543803401147, "grad_norm": 0.8915873765945435, "learning_rate": 2.9621105958221685e-05, "loss": 0.1219, "step": 4518 }, { "epoch": 0.09957747332352763, "grad_norm": 1.8650071620941162, "learning_rate": 2.9620866826685017e-05, "loss": 0.1533, "step": 4519 }, { "epoch": 0.09959950861304379, "grad_norm": 1.8382484912872314, "learning_rate": 2.9620627620676432e-05, "loss": 0.1695, "step": 4520 }, { "epoch": 0.09962154390255995, "grad_norm": 1.4697784185409546, "learning_rate": 2.9620388340197155e-05, "loss": 0.1388, "step": 4521 }, { "epoch": 0.09964357919207611, "grad_norm": 1.6127572059631348, "learning_rate": 2.9620148985248395e-05, "loss": 0.133, "step": 4522 }, { "epoch": 0.09966561448159227, "grad_norm": 2.3143668174743652, "learning_rate": 2.961990955583138e-05, "loss": 0.1926, "step": 4523 }, { "epoch": 0.09968764977110843, "grad_norm": 1.4713325500488281, "learning_rate": 2.9619670051947323e-05, "loss": 0.1756, "step": 4524 }, { "epoch": 0.0997096850606246, "grad_norm": 1.2062630653381348, "learning_rate": 2.961943047359745e-05, "loss": 0.1245, "step": 4525 }, { "epoch": 0.09973172035014075, "grad_norm": 1.448156714439392, "learning_rate": 2.9619190820782978e-05, "loss": 0.105, "step": 4526 }, { "epoch": 0.09975375563965691, "grad_norm": 1.5702465772628784, "learning_rate": 2.9618951093505124e-05, "loss": 0.1711, "step": 4527 }, { "epoch": 0.09977579092917307, "grad_norm": 1.4475147724151611, "learning_rate": 2.9618711291765115e-05, "loss": 0.1558, "step": 4528 }, { "epoch": 0.09979782621868923, "grad_norm": 1.2143728733062744, "learning_rate": 2.9618471415564168e-05, "loss": 0.1619, "step": 4529 }, { "epoch": 0.09981986150820539, "grad_norm": 1.0564885139465332, "learning_rate": 2.961823146490351e-05, "loss": 0.1024, "step": 4530 }, { "epoch": 0.09984189679772155, "grad_norm": 1.2040129899978638, "learning_rate": 2.961799143978436e-05, "loss": 0.1476, "step": 4531 }, { "epoch": 0.0998639320872377, "grad_norm": 1.1463639736175537, "learning_rate": 2.961775134020794e-05, "loss": 0.0942, "step": 4532 }, { "epoch": 0.09988596737675387, "grad_norm": 2.9621384143829346, "learning_rate": 2.961751116617547e-05, "loss": 0.1506, "step": 4533 }, { "epoch": 0.09990800266627003, "grad_norm": 1.644389271736145, "learning_rate": 2.9617270917688183e-05, "loss": 0.192, "step": 4534 }, { "epoch": 0.09993003795578619, "grad_norm": 1.6764734983444214, "learning_rate": 2.9617030594747294e-05, "loss": 0.1625, "step": 4535 }, { "epoch": 0.09995207324530235, "grad_norm": 1.2339715957641602, "learning_rate": 2.9616790197354035e-05, "loss": 0.1166, "step": 4536 }, { "epoch": 0.09997410853481851, "grad_norm": 0.9319894313812256, "learning_rate": 2.9616549725509616e-05, "loss": 0.1205, "step": 4537 }, { "epoch": 0.09999614382433467, "grad_norm": 0.8100240230560303, "learning_rate": 2.9616309179215276e-05, "loss": 0.1623, "step": 4538 }, { "epoch": 0.10001817911385083, "grad_norm": 1.5963579416275024, "learning_rate": 2.9616068558472236e-05, "loss": 0.1408, "step": 4539 }, { "epoch": 0.100040214403367, "grad_norm": 0.9757643938064575, "learning_rate": 2.9615827863281725e-05, "loss": 0.126, "step": 4540 }, { "epoch": 0.10006224969288315, "grad_norm": 0.8988569378852844, "learning_rate": 2.9615587093644957e-05, "loss": 0.1221, "step": 4541 }, { "epoch": 0.10008428498239931, "grad_norm": 0.9632343053817749, "learning_rate": 2.961534624956317e-05, "loss": 0.1603, "step": 4542 }, { "epoch": 0.10010632027191547, "grad_norm": 1.0643278360366821, "learning_rate": 2.961510533103759e-05, "loss": 0.1242, "step": 4543 }, { "epoch": 0.10012835556143164, "grad_norm": 1.3656519651412964, "learning_rate": 2.9614864338069435e-05, "loss": 0.1712, "step": 4544 }, { "epoch": 0.10015039085094779, "grad_norm": 0.7923800349235535, "learning_rate": 2.961462327065994e-05, "loss": 0.1381, "step": 4545 }, { "epoch": 0.10017242614046395, "grad_norm": 1.4654390811920166, "learning_rate": 2.9614382128810336e-05, "loss": 0.1695, "step": 4546 }, { "epoch": 0.10019446142998012, "grad_norm": 1.2602695226669312, "learning_rate": 2.9614140912521846e-05, "loss": 0.1584, "step": 4547 }, { "epoch": 0.10021649671949627, "grad_norm": 1.5227164030075073, "learning_rate": 2.96138996217957e-05, "loss": 0.1206, "step": 4548 }, { "epoch": 0.10023853200901243, "grad_norm": 1.4054851531982422, "learning_rate": 2.9613658256633123e-05, "loss": 0.1347, "step": 4549 }, { "epoch": 0.1002605672985286, "grad_norm": 1.5285592079162598, "learning_rate": 2.961341681703535e-05, "loss": 0.1469, "step": 4550 }, { "epoch": 0.10028260258804475, "grad_norm": 1.2819525003433228, "learning_rate": 2.961317530300361e-05, "loss": 0.1658, "step": 4551 }, { "epoch": 0.10030463787756091, "grad_norm": 0.8856962323188782, "learning_rate": 2.9612933714539135e-05, "loss": 0.1483, "step": 4552 }, { "epoch": 0.10032667316707708, "grad_norm": 0.9995301365852356, "learning_rate": 2.9612692051643148e-05, "loss": 0.1535, "step": 4553 }, { "epoch": 0.10034870845659323, "grad_norm": 1.1738115549087524, "learning_rate": 2.9612450314316886e-05, "loss": 0.1282, "step": 4554 }, { "epoch": 0.1003707437461094, "grad_norm": 1.3634637594223022, "learning_rate": 2.9612208502561577e-05, "loss": 0.1487, "step": 4555 }, { "epoch": 0.10039277903562556, "grad_norm": 1.009402871131897, "learning_rate": 2.9611966616378456e-05, "loss": 0.1154, "step": 4556 }, { "epoch": 0.10041481432514171, "grad_norm": 1.160168170928955, "learning_rate": 2.961172465576875e-05, "loss": 0.1458, "step": 4557 }, { "epoch": 0.10043684961465787, "grad_norm": 1.682116985321045, "learning_rate": 2.9611482620733696e-05, "loss": 0.1435, "step": 4558 }, { "epoch": 0.10045888490417404, "grad_norm": 1.3417083024978638, "learning_rate": 2.961124051127453e-05, "loss": 0.1378, "step": 4559 }, { "epoch": 0.10048092019369019, "grad_norm": 1.1987659931182861, "learning_rate": 2.9610998327392477e-05, "loss": 0.148, "step": 4560 }, { "epoch": 0.10050295548320635, "grad_norm": 1.0827035903930664, "learning_rate": 2.961075606908878e-05, "loss": 0.1158, "step": 4561 }, { "epoch": 0.10052499077272252, "grad_norm": 1.4395335912704468, "learning_rate": 2.961051373636466e-05, "loss": 0.1323, "step": 4562 }, { "epoch": 0.10054702606223867, "grad_norm": 1.3762538433074951, "learning_rate": 2.961027132922136e-05, "loss": 0.1661, "step": 4563 }, { "epoch": 0.10056906135175483, "grad_norm": 1.0563108921051025, "learning_rate": 2.9610028847660117e-05, "loss": 0.1595, "step": 4564 }, { "epoch": 0.100591096641271, "grad_norm": 1.480657696723938, "learning_rate": 2.9609786291682162e-05, "loss": 0.1302, "step": 4565 }, { "epoch": 0.10061313193078715, "grad_norm": 1.3433500528335571, "learning_rate": 2.960954366128873e-05, "loss": 0.1303, "step": 4566 }, { "epoch": 0.10063516722030331, "grad_norm": 1.0935367345809937, "learning_rate": 2.9609300956481057e-05, "loss": 0.1165, "step": 4567 }, { "epoch": 0.10065720250981948, "grad_norm": 1.614682674407959, "learning_rate": 2.960905817726038e-05, "loss": 0.1305, "step": 4568 }, { "epoch": 0.10067923779933563, "grad_norm": 1.0536359548568726, "learning_rate": 2.9608815323627936e-05, "loss": 0.1126, "step": 4569 }, { "epoch": 0.1007012730888518, "grad_norm": 1.253447413444519, "learning_rate": 2.9608572395584966e-05, "loss": 0.1448, "step": 4570 }, { "epoch": 0.10072330837836796, "grad_norm": 1.6628267765045166, "learning_rate": 2.9608329393132695e-05, "loss": 0.1787, "step": 4571 }, { "epoch": 0.10074534366788411, "grad_norm": 2.0248537063598633, "learning_rate": 2.9608086316272372e-05, "loss": 0.1293, "step": 4572 }, { "epoch": 0.10076737895740027, "grad_norm": 2.2529616355895996, "learning_rate": 2.9607843165005236e-05, "loss": 0.1489, "step": 4573 }, { "epoch": 0.10078941424691644, "grad_norm": 1.4937567710876465, "learning_rate": 2.9607599939332516e-05, "loss": 0.161, "step": 4574 }, { "epoch": 0.10081144953643259, "grad_norm": 1.3628947734832764, "learning_rate": 2.960735663925546e-05, "loss": 0.1892, "step": 4575 }, { "epoch": 0.10083348482594875, "grad_norm": 1.1960277557373047, "learning_rate": 2.96071132647753e-05, "loss": 0.1996, "step": 4576 }, { "epoch": 0.10085552011546492, "grad_norm": 1.9565554857254028, "learning_rate": 2.9606869815893287e-05, "loss": 0.2082, "step": 4577 }, { "epoch": 0.10087755540498108, "grad_norm": 1.0273247957229614, "learning_rate": 2.9606626292610645e-05, "loss": 0.1219, "step": 4578 }, { "epoch": 0.10089959069449723, "grad_norm": 1.0987927913665771, "learning_rate": 2.9606382694928628e-05, "loss": 0.1682, "step": 4579 }, { "epoch": 0.1009216259840134, "grad_norm": 1.0386375188827515, "learning_rate": 2.9606139022848467e-05, "loss": 0.1057, "step": 4580 }, { "epoch": 0.10094366127352956, "grad_norm": 1.9991793632507324, "learning_rate": 2.9605895276371408e-05, "loss": 0.203, "step": 4581 }, { "epoch": 0.10096569656304571, "grad_norm": 0.9361546039581299, "learning_rate": 2.9605651455498696e-05, "loss": 0.1096, "step": 4582 }, { "epoch": 0.10098773185256188, "grad_norm": 1.8945695161819458, "learning_rate": 2.9605407560231568e-05, "loss": 0.1632, "step": 4583 }, { "epoch": 0.10100976714207804, "grad_norm": 1.4409347772598267, "learning_rate": 2.9605163590571264e-05, "loss": 0.1645, "step": 4584 }, { "epoch": 0.1010318024315942, "grad_norm": 1.584591269493103, "learning_rate": 2.960491954651903e-05, "loss": 0.159, "step": 4585 }, { "epoch": 0.10105383772111036, "grad_norm": 1.1336755752563477, "learning_rate": 2.9604675428076113e-05, "loss": 0.1095, "step": 4586 }, { "epoch": 0.10107587301062652, "grad_norm": 1.7025949954986572, "learning_rate": 2.960443123524375e-05, "loss": 0.1796, "step": 4587 }, { "epoch": 0.10109790830014267, "grad_norm": 0.9751589298248291, "learning_rate": 2.9604186968023186e-05, "loss": 0.1466, "step": 4588 }, { "epoch": 0.10111994358965884, "grad_norm": 1.3363927602767944, "learning_rate": 2.960394262641567e-05, "loss": 0.1344, "step": 4589 }, { "epoch": 0.101141978879175, "grad_norm": 1.821531057357788, "learning_rate": 2.9603698210422436e-05, "loss": 0.1602, "step": 4590 }, { "epoch": 0.10116401416869115, "grad_norm": 0.9728378057479858, "learning_rate": 2.960345372004474e-05, "loss": 0.0996, "step": 4591 }, { "epoch": 0.10118604945820732, "grad_norm": 2.283195972442627, "learning_rate": 2.9603209155283822e-05, "loss": 0.1502, "step": 4592 }, { "epoch": 0.10120808474772348, "grad_norm": 1.51436448097229, "learning_rate": 2.9602964516140935e-05, "loss": 0.1588, "step": 4593 }, { "epoch": 0.10123012003723963, "grad_norm": 0.9497044086456299, "learning_rate": 2.9602719802617313e-05, "loss": 0.0971, "step": 4594 }, { "epoch": 0.1012521553267558, "grad_norm": 2.142970085144043, "learning_rate": 2.9602475014714207e-05, "loss": 0.1421, "step": 4595 }, { "epoch": 0.10127419061627196, "grad_norm": 2.375281572341919, "learning_rate": 2.960223015243287e-05, "loss": 0.1703, "step": 4596 }, { "epoch": 0.10129622590578811, "grad_norm": 1.9491230249404907, "learning_rate": 2.960198521577454e-05, "loss": 0.1413, "step": 4597 }, { "epoch": 0.10131826119530428, "grad_norm": 0.9657316207885742, "learning_rate": 2.960174020474047e-05, "loss": 0.1376, "step": 4598 }, { "epoch": 0.10134029648482044, "grad_norm": 1.1177319288253784, "learning_rate": 2.9601495119331913e-05, "loss": 0.1825, "step": 4599 }, { "epoch": 0.1013623317743366, "grad_norm": 1.2039813995361328, "learning_rate": 2.9601249959550108e-05, "loss": 0.1486, "step": 4600 }, { "epoch": 0.10138436706385276, "grad_norm": 0.9019169211387634, "learning_rate": 2.9601004725396306e-05, "loss": 0.1057, "step": 4601 }, { "epoch": 0.10140640235336892, "grad_norm": 1.5172785520553589, "learning_rate": 2.960075941687176e-05, "loss": 0.1197, "step": 4602 }, { "epoch": 0.10142843764288507, "grad_norm": 1.2973037958145142, "learning_rate": 2.960051403397771e-05, "loss": 0.1452, "step": 4603 }, { "epoch": 0.10145047293240124, "grad_norm": 1.5130231380462646, "learning_rate": 2.960026857671542e-05, "loss": 0.1166, "step": 4604 }, { "epoch": 0.1014725082219174, "grad_norm": 1.456374168395996, "learning_rate": 2.9600023045086128e-05, "loss": 0.1153, "step": 4605 }, { "epoch": 0.10149454351143355, "grad_norm": 0.8464217185974121, "learning_rate": 2.959977743909109e-05, "loss": 0.184, "step": 4606 }, { "epoch": 0.10151657880094972, "grad_norm": 1.2299988269805908, "learning_rate": 2.959953175873156e-05, "loss": 0.1271, "step": 4607 }, { "epoch": 0.10153861409046588, "grad_norm": 1.1635454893112183, "learning_rate": 2.959928600400878e-05, "loss": 0.1627, "step": 4608 }, { "epoch": 0.10156064937998203, "grad_norm": 1.3795979022979736, "learning_rate": 2.959904017492401e-05, "loss": 0.1507, "step": 4609 }, { "epoch": 0.1015826846694982, "grad_norm": 0.6714494228363037, "learning_rate": 2.9598794271478507e-05, "loss": 0.1173, "step": 4610 }, { "epoch": 0.10160471995901436, "grad_norm": 1.0681427717208862, "learning_rate": 2.9598548293673508e-05, "loss": 0.1381, "step": 4611 }, { "epoch": 0.10162675524853051, "grad_norm": 0.9727092981338501, "learning_rate": 2.959830224151028e-05, "loss": 0.1575, "step": 4612 }, { "epoch": 0.10164879053804668, "grad_norm": 1.2452348470687866, "learning_rate": 2.9598056114990062e-05, "loss": 0.1371, "step": 4613 }, { "epoch": 0.10167082582756284, "grad_norm": 1.1295498609542847, "learning_rate": 2.9597809914114122e-05, "loss": 0.144, "step": 4614 }, { "epoch": 0.10169286111707901, "grad_norm": 1.1205837726593018, "learning_rate": 2.959756363888371e-05, "loss": 0.1567, "step": 4615 }, { "epoch": 0.10171489640659516, "grad_norm": 1.1169626712799072, "learning_rate": 2.9597317289300073e-05, "loss": 0.1253, "step": 4616 }, { "epoch": 0.10173693169611132, "grad_norm": 1.2026065587997437, "learning_rate": 2.9597070865364474e-05, "loss": 0.1772, "step": 4617 }, { "epoch": 0.10175896698562749, "grad_norm": 1.301572561264038, "learning_rate": 2.9596824367078167e-05, "loss": 0.1716, "step": 4618 }, { "epoch": 0.10178100227514364, "grad_norm": 1.4483143091201782, "learning_rate": 2.9596577794442404e-05, "loss": 0.125, "step": 4619 }, { "epoch": 0.1018030375646598, "grad_norm": 2.3523457050323486, "learning_rate": 2.9596331147458442e-05, "loss": 0.1852, "step": 4620 }, { "epoch": 0.10182507285417597, "grad_norm": 1.5796653032302856, "learning_rate": 2.959608442612754e-05, "loss": 0.1019, "step": 4621 }, { "epoch": 0.10184710814369212, "grad_norm": 0.9669278264045715, "learning_rate": 2.9595837630450953e-05, "loss": 0.092, "step": 4622 }, { "epoch": 0.10186914343320828, "grad_norm": 1.6997021436691284, "learning_rate": 2.959559076042994e-05, "loss": 0.1848, "step": 4623 }, { "epoch": 0.10189117872272445, "grad_norm": 2.029700517654419, "learning_rate": 2.9595343816065753e-05, "loss": 0.1829, "step": 4624 }, { "epoch": 0.1019132140122406, "grad_norm": 1.9226301908493042, "learning_rate": 2.9595096797359652e-05, "loss": 0.1509, "step": 4625 }, { "epoch": 0.10193524930175676, "grad_norm": 1.5451382398605347, "learning_rate": 2.9594849704312904e-05, "loss": 0.1536, "step": 4626 }, { "epoch": 0.10195728459127293, "grad_norm": 1.7096223831176758, "learning_rate": 2.959460253692675e-05, "loss": 0.1534, "step": 4627 }, { "epoch": 0.10197931988078908, "grad_norm": 1.5003249645233154, "learning_rate": 2.9594355295202467e-05, "loss": 0.1375, "step": 4628 }, { "epoch": 0.10200135517030524, "grad_norm": 1.421104907989502, "learning_rate": 2.9594107979141302e-05, "loss": 0.0947, "step": 4629 }, { "epoch": 0.10202339045982141, "grad_norm": 1.1728553771972656, "learning_rate": 2.9593860588744523e-05, "loss": 0.1202, "step": 4630 }, { "epoch": 0.10204542574933756, "grad_norm": 1.6683119535446167, "learning_rate": 2.9593613124013385e-05, "loss": 0.1636, "step": 4631 }, { "epoch": 0.10206746103885372, "grad_norm": 1.2435755729675293, "learning_rate": 2.9593365584949147e-05, "loss": 0.2226, "step": 4632 }, { "epoch": 0.10208949632836989, "grad_norm": 1.6500300168991089, "learning_rate": 2.9593117971553075e-05, "loss": 0.1163, "step": 4633 }, { "epoch": 0.10211153161788604, "grad_norm": 1.4361902475357056, "learning_rate": 2.9592870283826426e-05, "loss": 0.1177, "step": 4634 }, { "epoch": 0.1021335669074022, "grad_norm": 1.2954916954040527, "learning_rate": 2.9592622521770465e-05, "loss": 0.1216, "step": 4635 }, { "epoch": 0.10215560219691837, "grad_norm": 1.697572112083435, "learning_rate": 2.959237468538645e-05, "loss": 0.1267, "step": 4636 }, { "epoch": 0.10217763748643452, "grad_norm": 1.563018560409546, "learning_rate": 2.959212677467565e-05, "loss": 0.1763, "step": 4637 }, { "epoch": 0.10219967277595068, "grad_norm": 1.9903507232666016, "learning_rate": 2.9591878789639316e-05, "loss": 0.1712, "step": 4638 }, { "epoch": 0.10222170806546685, "grad_norm": 1.180751085281372, "learning_rate": 2.9591630730278724e-05, "loss": 0.1, "step": 4639 }, { "epoch": 0.102243743354983, "grad_norm": 1.1948333978652954, "learning_rate": 2.959138259659513e-05, "loss": 0.145, "step": 4640 }, { "epoch": 0.10226577864449916, "grad_norm": 1.2170320749282837, "learning_rate": 2.9591134388589804e-05, "loss": 0.1252, "step": 4641 }, { "epoch": 0.10228781393401533, "grad_norm": 0.8572614789009094, "learning_rate": 2.9590886106264003e-05, "loss": 0.1143, "step": 4642 }, { "epoch": 0.10230984922353148, "grad_norm": 1.0967074632644653, "learning_rate": 2.9590637749618994e-05, "loss": 0.1207, "step": 4643 }, { "epoch": 0.10233188451304764, "grad_norm": 1.6477841138839722, "learning_rate": 2.9590389318656045e-05, "loss": 0.1243, "step": 4644 }, { "epoch": 0.10235391980256381, "grad_norm": 1.440176248550415, "learning_rate": 2.9590140813376418e-05, "loss": 0.1371, "step": 4645 }, { "epoch": 0.10237595509207996, "grad_norm": 1.6751662492752075, "learning_rate": 2.9589892233781376e-05, "loss": 0.1927, "step": 4646 }, { "epoch": 0.10239799038159612, "grad_norm": 1.4990012645721436, "learning_rate": 2.9589643579872193e-05, "loss": 0.1577, "step": 4647 }, { "epoch": 0.10242002567111229, "grad_norm": 1.3540666103363037, "learning_rate": 2.9589394851650136e-05, "loss": 0.1369, "step": 4648 }, { "epoch": 0.10244206096062845, "grad_norm": 1.4234309196472168, "learning_rate": 2.9589146049116463e-05, "loss": 0.1569, "step": 4649 }, { "epoch": 0.1024640962501446, "grad_norm": 1.6164129972457886, "learning_rate": 2.9588897172272447e-05, "loss": 0.1565, "step": 4650 }, { "epoch": 0.10248613153966077, "grad_norm": 1.3131998777389526, "learning_rate": 2.9588648221119355e-05, "loss": 0.1468, "step": 4651 }, { "epoch": 0.10250816682917693, "grad_norm": 1.8752726316452026, "learning_rate": 2.9588399195658448e-05, "loss": 0.1579, "step": 4652 }, { "epoch": 0.10253020211869308, "grad_norm": 1.2589080333709717, "learning_rate": 2.958815009589101e-05, "loss": 0.1911, "step": 4653 }, { "epoch": 0.10255223740820925, "grad_norm": 2.244006872177124, "learning_rate": 2.9587900921818294e-05, "loss": 0.149, "step": 4654 }, { "epoch": 0.10257427269772541, "grad_norm": 1.1783239841461182, "learning_rate": 2.9587651673441582e-05, "loss": 0.1375, "step": 4655 }, { "epoch": 0.10259630798724156, "grad_norm": 0.9416871070861816, "learning_rate": 2.9587402350762133e-05, "loss": 0.1597, "step": 4656 }, { "epoch": 0.10261834327675773, "grad_norm": 1.0422234535217285, "learning_rate": 2.9587152953781222e-05, "loss": 0.1304, "step": 4657 }, { "epoch": 0.1026403785662739, "grad_norm": 1.9915167093276978, "learning_rate": 2.958690348250012e-05, "loss": 0.1521, "step": 4658 }, { "epoch": 0.10266241385579004, "grad_norm": 1.1729388236999512, "learning_rate": 2.9586653936920098e-05, "loss": 0.1608, "step": 4659 }, { "epoch": 0.10268444914530621, "grad_norm": 1.104317545890808, "learning_rate": 2.9586404317042422e-05, "loss": 0.1555, "step": 4660 }, { "epoch": 0.10270648443482237, "grad_norm": 1.398004412651062, "learning_rate": 2.958615462286837e-05, "loss": 0.1683, "step": 4661 }, { "epoch": 0.10272851972433852, "grad_norm": 0.95184725522995, "learning_rate": 2.9585904854399208e-05, "loss": 0.1271, "step": 4662 }, { "epoch": 0.10275055501385469, "grad_norm": 1.2543110847473145, "learning_rate": 2.9585655011636214e-05, "loss": 0.0996, "step": 4663 }, { "epoch": 0.10277259030337085, "grad_norm": 1.2202510833740234, "learning_rate": 2.9585405094580654e-05, "loss": 0.1327, "step": 4664 }, { "epoch": 0.102794625592887, "grad_norm": 1.1012014150619507, "learning_rate": 2.958515510323381e-05, "loss": 0.1313, "step": 4665 }, { "epoch": 0.10281666088240317, "grad_norm": 0.9675164222717285, "learning_rate": 2.9584905037596945e-05, "loss": 0.1622, "step": 4666 }, { "epoch": 0.10283869617191933, "grad_norm": 0.868854284286499, "learning_rate": 2.958465489767134e-05, "loss": 0.095, "step": 4667 }, { "epoch": 0.10286073146143548, "grad_norm": 1.6259037256240845, "learning_rate": 2.9584404683458263e-05, "loss": 0.1331, "step": 4668 }, { "epoch": 0.10288276675095165, "grad_norm": 1.345650315284729, "learning_rate": 2.9584154394958996e-05, "loss": 0.1594, "step": 4669 }, { "epoch": 0.10290480204046781, "grad_norm": 1.6196773052215576, "learning_rate": 2.9583904032174808e-05, "loss": 0.2008, "step": 4670 }, { "epoch": 0.10292683732998396, "grad_norm": 1.0553959608078003, "learning_rate": 2.9583653595106976e-05, "loss": 0.13, "step": 4671 }, { "epoch": 0.10294887261950013, "grad_norm": 0.9462540149688721, "learning_rate": 2.958340308375678e-05, "loss": 0.1224, "step": 4672 }, { "epoch": 0.1029709079090163, "grad_norm": 1.1122974157333374, "learning_rate": 2.9583152498125487e-05, "loss": 0.1497, "step": 4673 }, { "epoch": 0.10299294319853244, "grad_norm": 1.6447163820266724, "learning_rate": 2.958290183821438e-05, "loss": 0.1441, "step": 4674 }, { "epoch": 0.10301497848804861, "grad_norm": 2.1091079711914062, "learning_rate": 2.9582651104024736e-05, "loss": 0.1316, "step": 4675 }, { "epoch": 0.10303701377756477, "grad_norm": 1.1711947917938232, "learning_rate": 2.958240029555783e-05, "loss": 0.1181, "step": 4676 }, { "epoch": 0.10305904906708092, "grad_norm": 1.2045185565948486, "learning_rate": 2.9582149412814934e-05, "loss": 0.2096, "step": 4677 }, { "epoch": 0.10308108435659709, "grad_norm": 1.4958901405334473, "learning_rate": 2.9581898455797333e-05, "loss": 0.1747, "step": 4678 }, { "epoch": 0.10310311964611325, "grad_norm": 1.1161198616027832, "learning_rate": 2.9581647424506306e-05, "loss": 0.1313, "step": 4679 }, { "epoch": 0.1031251549356294, "grad_norm": 1.1141266822814941, "learning_rate": 2.958139631894313e-05, "loss": 0.1484, "step": 4680 }, { "epoch": 0.10314719022514557, "grad_norm": 0.9249712824821472, "learning_rate": 2.9581145139109085e-05, "loss": 0.0925, "step": 4681 }, { "epoch": 0.10316922551466173, "grad_norm": 1.2438844442367554, "learning_rate": 2.9580893885005445e-05, "loss": 0.1449, "step": 4682 }, { "epoch": 0.1031912608041779, "grad_norm": 1.7150150537490845, "learning_rate": 2.9580642556633496e-05, "loss": 0.1696, "step": 4683 }, { "epoch": 0.10321329609369405, "grad_norm": 0.9386174082756042, "learning_rate": 2.9580391153994514e-05, "loss": 0.1581, "step": 4684 }, { "epoch": 0.10323533138321021, "grad_norm": 1.6581748723983765, "learning_rate": 2.958013967708978e-05, "loss": 0.1273, "step": 4685 }, { "epoch": 0.10325736667272638, "grad_norm": 1.3670716285705566, "learning_rate": 2.957988812592058e-05, "loss": 0.1524, "step": 4686 }, { "epoch": 0.10327940196224253, "grad_norm": 1.2408212423324585, "learning_rate": 2.957963650048819e-05, "loss": 0.1407, "step": 4687 }, { "epoch": 0.1033014372517587, "grad_norm": 1.1762624979019165, "learning_rate": 2.9579384800793893e-05, "loss": 0.1231, "step": 4688 }, { "epoch": 0.10332347254127486, "grad_norm": 0.8659806251525879, "learning_rate": 2.957913302683897e-05, "loss": 0.1373, "step": 4689 }, { "epoch": 0.10334550783079101, "grad_norm": 1.2393285036087036, "learning_rate": 2.9578881178624704e-05, "loss": 0.1188, "step": 4690 }, { "epoch": 0.10336754312030717, "grad_norm": 0.880908727645874, "learning_rate": 2.957862925615238e-05, "loss": 0.109, "step": 4691 }, { "epoch": 0.10338957840982334, "grad_norm": 1.218152642250061, "learning_rate": 2.9578377259423283e-05, "loss": 0.1554, "step": 4692 }, { "epoch": 0.10341161369933949, "grad_norm": 1.2272874116897583, "learning_rate": 2.9578125188438687e-05, "loss": 0.1254, "step": 4693 }, { "epoch": 0.10343364898885565, "grad_norm": 1.104361891746521, "learning_rate": 2.9577873043199883e-05, "loss": 0.1357, "step": 4694 }, { "epoch": 0.10345568427837182, "grad_norm": 1.285139560699463, "learning_rate": 2.9577620823708154e-05, "loss": 0.1225, "step": 4695 }, { "epoch": 0.10347771956788797, "grad_norm": 1.8069730997085571, "learning_rate": 2.9577368529964786e-05, "loss": 0.201, "step": 4696 }, { "epoch": 0.10349975485740413, "grad_norm": 1.117639422416687, "learning_rate": 2.9577116161971065e-05, "loss": 0.1093, "step": 4697 }, { "epoch": 0.1035217901469203, "grad_norm": 1.272720456123352, "learning_rate": 2.9576863719728273e-05, "loss": 0.1143, "step": 4698 }, { "epoch": 0.10354382543643645, "grad_norm": 1.399203896522522, "learning_rate": 2.9576611203237696e-05, "loss": 0.1304, "step": 4699 }, { "epoch": 0.10356586072595261, "grad_norm": 0.874607264995575, "learning_rate": 2.9576358612500626e-05, "loss": 0.1124, "step": 4700 }, { "epoch": 0.10358789601546878, "grad_norm": 1.3796398639678955, "learning_rate": 2.957610594751834e-05, "loss": 0.1912, "step": 4701 }, { "epoch": 0.10360993130498493, "grad_norm": 1.278058409690857, "learning_rate": 2.957585320829213e-05, "loss": 0.1462, "step": 4702 }, { "epoch": 0.1036319665945011, "grad_norm": 1.9597958326339722, "learning_rate": 2.957560039482329e-05, "loss": 0.1586, "step": 4703 }, { "epoch": 0.10365400188401726, "grad_norm": 1.1126394271850586, "learning_rate": 2.9575347507113092e-05, "loss": 0.121, "step": 4704 }, { "epoch": 0.10367603717353341, "grad_norm": 0.9651889204978943, "learning_rate": 2.957509454516284e-05, "loss": 0.1127, "step": 4705 }, { "epoch": 0.10369807246304957, "grad_norm": 1.1136829853057861, "learning_rate": 2.9574841508973813e-05, "loss": 0.1896, "step": 4706 }, { "epoch": 0.10372010775256574, "grad_norm": 1.1689872741699219, "learning_rate": 2.9574588398547308e-05, "loss": 0.1137, "step": 4707 }, { "epoch": 0.10374214304208189, "grad_norm": 1.4469029903411865, "learning_rate": 2.9574335213884603e-05, "loss": 0.1452, "step": 4708 }, { "epoch": 0.10376417833159805, "grad_norm": 1.3407821655273438, "learning_rate": 2.957408195498699e-05, "loss": 0.1166, "step": 4709 }, { "epoch": 0.10378621362111422, "grad_norm": 1.273099660873413, "learning_rate": 2.957382862185577e-05, "loss": 0.121, "step": 4710 }, { "epoch": 0.10380824891063037, "grad_norm": 1.1446431875228882, "learning_rate": 2.9573575214492218e-05, "loss": 0.1279, "step": 4711 }, { "epoch": 0.10383028420014653, "grad_norm": 2.086514711380005, "learning_rate": 2.9573321732897637e-05, "loss": 0.1086, "step": 4712 }, { "epoch": 0.1038523194896627, "grad_norm": 1.6018478870391846, "learning_rate": 2.9573068177073314e-05, "loss": 0.1041, "step": 4713 }, { "epoch": 0.10387435477917885, "grad_norm": 1.9447509050369263, "learning_rate": 2.9572814547020543e-05, "loss": 0.1224, "step": 4714 }, { "epoch": 0.10389639006869501, "grad_norm": 1.1264790296554565, "learning_rate": 2.957256084274061e-05, "loss": 0.1076, "step": 4715 }, { "epoch": 0.10391842535821118, "grad_norm": 1.06694495677948, "learning_rate": 2.957230706423481e-05, "loss": 0.1687, "step": 4716 }, { "epoch": 0.10394046064772733, "grad_norm": 1.3544872999191284, "learning_rate": 2.9572053211504434e-05, "loss": 0.1388, "step": 4717 }, { "epoch": 0.1039624959372435, "grad_norm": 1.006235957145691, "learning_rate": 2.957179928455078e-05, "loss": 0.153, "step": 4718 }, { "epoch": 0.10398453122675966, "grad_norm": 1.4077699184417725, "learning_rate": 2.9571545283375133e-05, "loss": 0.1507, "step": 4719 }, { "epoch": 0.10400656651627582, "grad_norm": 1.170576810836792, "learning_rate": 2.9571291207978797e-05, "loss": 0.166, "step": 4720 }, { "epoch": 0.10402860180579197, "grad_norm": 0.9163298010826111, "learning_rate": 2.957103705836306e-05, "loss": 0.1244, "step": 4721 }, { "epoch": 0.10405063709530814, "grad_norm": 0.7966205477714539, "learning_rate": 2.9570782834529215e-05, "loss": 0.1634, "step": 4722 }, { "epoch": 0.1040726723848243, "grad_norm": 1.7005672454833984, "learning_rate": 2.9570528536478564e-05, "loss": 0.1555, "step": 4723 }, { "epoch": 0.10409470767434045, "grad_norm": 0.8483324646949768, "learning_rate": 2.9570274164212393e-05, "loss": 0.1287, "step": 4724 }, { "epoch": 0.10411674296385662, "grad_norm": 1.4741909503936768, "learning_rate": 2.9570019717732008e-05, "loss": 0.147, "step": 4725 }, { "epoch": 0.10413877825337278, "grad_norm": 0.8806145191192627, "learning_rate": 2.95697651970387e-05, "loss": 0.1095, "step": 4726 }, { "epoch": 0.10416081354288893, "grad_norm": 1.3392372131347656, "learning_rate": 2.9569510602133756e-05, "loss": 0.1558, "step": 4727 }, { "epoch": 0.1041828488324051, "grad_norm": 1.2220382690429688, "learning_rate": 2.956925593301849e-05, "loss": 0.1556, "step": 4728 }, { "epoch": 0.10420488412192126, "grad_norm": 1.54206383228302, "learning_rate": 2.9569001189694188e-05, "loss": 0.1734, "step": 4729 }, { "epoch": 0.10422691941143741, "grad_norm": 1.3873250484466553, "learning_rate": 2.956874637216215e-05, "loss": 0.1777, "step": 4730 }, { "epoch": 0.10424895470095358, "grad_norm": 1.1776721477508545, "learning_rate": 2.9568491480423673e-05, "loss": 0.1884, "step": 4731 }, { "epoch": 0.10427098999046974, "grad_norm": 1.2961007356643677, "learning_rate": 2.956823651448006e-05, "loss": 0.1434, "step": 4732 }, { "epoch": 0.1042930252799859, "grad_norm": 1.137699842453003, "learning_rate": 2.956798147433261e-05, "loss": 0.1541, "step": 4733 }, { "epoch": 0.10431506056950206, "grad_norm": 0.9601744413375854, "learning_rate": 2.956772635998261e-05, "loss": 0.1565, "step": 4734 }, { "epoch": 0.10433709585901822, "grad_norm": 1.0975899696350098, "learning_rate": 2.9567471171431373e-05, "loss": 0.144, "step": 4735 }, { "epoch": 0.10435913114853437, "grad_norm": 1.3715656995773315, "learning_rate": 2.956721590868019e-05, "loss": 0.1368, "step": 4736 }, { "epoch": 0.10438116643805054, "grad_norm": 0.9834102392196655, "learning_rate": 2.9566960571730367e-05, "loss": 0.1116, "step": 4737 }, { "epoch": 0.1044032017275667, "grad_norm": 1.5293010473251343, "learning_rate": 2.95667051605832e-05, "loss": 0.1905, "step": 4738 }, { "epoch": 0.10442523701708285, "grad_norm": 1.4416594505310059, "learning_rate": 2.9566449675239994e-05, "loss": 0.1298, "step": 4739 }, { "epoch": 0.10444727230659902, "grad_norm": 1.2265269756317139, "learning_rate": 2.9566194115702046e-05, "loss": 0.131, "step": 4740 }, { "epoch": 0.10446930759611518, "grad_norm": 1.5929664373397827, "learning_rate": 2.956593848197067e-05, "loss": 0.1282, "step": 4741 }, { "epoch": 0.10449134288563133, "grad_norm": 2.1335320472717285, "learning_rate": 2.956568277404715e-05, "loss": 0.1241, "step": 4742 }, { "epoch": 0.1045133781751475, "grad_norm": 1.1746333837509155, "learning_rate": 2.9565426991932798e-05, "loss": 0.1254, "step": 4743 }, { "epoch": 0.10453541346466366, "grad_norm": 1.1735786199569702, "learning_rate": 2.9565171135628918e-05, "loss": 0.1205, "step": 4744 }, { "epoch": 0.10455744875417981, "grad_norm": 1.6401594877243042, "learning_rate": 2.9564915205136807e-05, "loss": 0.1797, "step": 4745 }, { "epoch": 0.10457948404369598, "grad_norm": 0.9591293334960938, "learning_rate": 2.9564659200457777e-05, "loss": 0.1361, "step": 4746 }, { "epoch": 0.10460151933321214, "grad_norm": 1.5303189754486084, "learning_rate": 2.956440312159312e-05, "loss": 0.1557, "step": 4747 }, { "epoch": 0.1046235546227283, "grad_norm": 1.6599783897399902, "learning_rate": 2.9564146968544157e-05, "loss": 0.2172, "step": 4748 }, { "epoch": 0.10464558991224446, "grad_norm": 1.2294318675994873, "learning_rate": 2.956389074131218e-05, "loss": 0.1536, "step": 4749 }, { "epoch": 0.10466762520176062, "grad_norm": 1.2436459064483643, "learning_rate": 2.95636344398985e-05, "loss": 0.1138, "step": 4750 }, { "epoch": 0.10468966049127677, "grad_norm": 1.1048790216445923, "learning_rate": 2.9563378064304416e-05, "loss": 0.1164, "step": 4751 }, { "epoch": 0.10471169578079294, "grad_norm": 1.1243457794189453, "learning_rate": 2.956312161453124e-05, "loss": 0.1145, "step": 4752 }, { "epoch": 0.1047337310703091, "grad_norm": 1.7154560089111328, "learning_rate": 2.9562865090580272e-05, "loss": 0.1541, "step": 4753 }, { "epoch": 0.10475576635982527, "grad_norm": 1.7680487632751465, "learning_rate": 2.956260849245283e-05, "loss": 0.1364, "step": 4754 }, { "epoch": 0.10477780164934142, "grad_norm": 1.3181465864181519, "learning_rate": 2.9562351820150208e-05, "loss": 0.1285, "step": 4755 }, { "epoch": 0.10479983693885758, "grad_norm": 1.4053786993026733, "learning_rate": 2.9562095073673722e-05, "loss": 0.1758, "step": 4756 }, { "epoch": 0.10482187222837375, "grad_norm": 1.2100862264633179, "learning_rate": 2.9561838253024675e-05, "loss": 0.1049, "step": 4757 }, { "epoch": 0.1048439075178899, "grad_norm": 1.3387279510498047, "learning_rate": 2.9561581358204382e-05, "loss": 0.1724, "step": 4758 }, { "epoch": 0.10486594280740606, "grad_norm": 1.3560062646865845, "learning_rate": 2.9561324389214143e-05, "loss": 0.1596, "step": 4759 }, { "epoch": 0.10488797809692223, "grad_norm": 1.1648166179656982, "learning_rate": 2.956106734605527e-05, "loss": 0.1818, "step": 4760 }, { "epoch": 0.10491001338643838, "grad_norm": 1.169715166091919, "learning_rate": 2.9560810228729074e-05, "loss": 0.1155, "step": 4761 }, { "epoch": 0.10493204867595454, "grad_norm": 1.1560754776000977, "learning_rate": 2.9560553037236864e-05, "loss": 0.1544, "step": 4762 }, { "epoch": 0.10495408396547071, "grad_norm": 0.9208482503890991, "learning_rate": 2.956029577157995e-05, "loss": 0.1269, "step": 4763 }, { "epoch": 0.10497611925498686, "grad_norm": 1.0225739479064941, "learning_rate": 2.9560038431759638e-05, "loss": 0.133, "step": 4764 }, { "epoch": 0.10499815454450302, "grad_norm": 1.2810896635055542, "learning_rate": 2.955978101777725e-05, "loss": 0.1184, "step": 4765 }, { "epoch": 0.10502018983401919, "grad_norm": 1.2320750951766968, "learning_rate": 2.955952352963408e-05, "loss": 0.1131, "step": 4766 }, { "epoch": 0.10504222512353534, "grad_norm": 1.7092642784118652, "learning_rate": 2.9559265967331454e-05, "loss": 0.1697, "step": 4767 }, { "epoch": 0.1050642604130515, "grad_norm": 1.1653610467910767, "learning_rate": 2.955900833087068e-05, "loss": 0.0928, "step": 4768 }, { "epoch": 0.10508629570256767, "grad_norm": 1.1967872381210327, "learning_rate": 2.9558750620253068e-05, "loss": 0.1332, "step": 4769 }, { "epoch": 0.10510833099208382, "grad_norm": 1.360443115234375, "learning_rate": 2.9558492835479933e-05, "loss": 0.1587, "step": 4770 }, { "epoch": 0.10513036628159998, "grad_norm": 1.5923007726669312, "learning_rate": 2.9558234976552587e-05, "loss": 0.1842, "step": 4771 }, { "epoch": 0.10515240157111615, "grad_norm": 1.4298712015151978, "learning_rate": 2.955797704347234e-05, "loss": 0.1629, "step": 4772 }, { "epoch": 0.1051744368606323, "grad_norm": 1.3884801864624023, "learning_rate": 2.955771903624051e-05, "loss": 0.1691, "step": 4773 }, { "epoch": 0.10519647215014846, "grad_norm": 0.7923656105995178, "learning_rate": 2.9557460954858415e-05, "loss": 0.1173, "step": 4774 }, { "epoch": 0.10521850743966463, "grad_norm": 1.335106372833252, "learning_rate": 2.9557202799327365e-05, "loss": 0.1445, "step": 4775 }, { "epoch": 0.10524054272918078, "grad_norm": 1.7067385911941528, "learning_rate": 2.9556944569648673e-05, "loss": 0.1976, "step": 4776 }, { "epoch": 0.10526257801869694, "grad_norm": 1.2216925621032715, "learning_rate": 2.9556686265823655e-05, "loss": 0.1288, "step": 4777 }, { "epoch": 0.1052846133082131, "grad_norm": 1.4905470609664917, "learning_rate": 2.9556427887853628e-05, "loss": 0.1169, "step": 4778 }, { "epoch": 0.10530664859772926, "grad_norm": 0.9779342412948608, "learning_rate": 2.955616943573991e-05, "loss": 0.1301, "step": 4779 }, { "epoch": 0.10532868388724542, "grad_norm": 1.2082784175872803, "learning_rate": 2.9555910909483812e-05, "loss": 0.1394, "step": 4780 }, { "epoch": 0.10535071917676159, "grad_norm": 1.2356361150741577, "learning_rate": 2.955565230908666e-05, "loss": 0.1321, "step": 4781 }, { "epoch": 0.10537275446627774, "grad_norm": 2.0253329277038574, "learning_rate": 2.9555393634549758e-05, "loss": 0.165, "step": 4782 }, { "epoch": 0.1053947897557939, "grad_norm": 1.5975533723831177, "learning_rate": 2.955513488587444e-05, "loss": 0.1206, "step": 4783 }, { "epoch": 0.10541682504531007, "grad_norm": 0.8956909775733948, "learning_rate": 2.9554876063062008e-05, "loss": 0.1235, "step": 4784 }, { "epoch": 0.10543886033482622, "grad_norm": 1.1728034019470215, "learning_rate": 2.955461716611379e-05, "loss": 0.1893, "step": 4785 }, { "epoch": 0.10546089562434238, "grad_norm": 1.136936068534851, "learning_rate": 2.95543581950311e-05, "loss": 0.1379, "step": 4786 }, { "epoch": 0.10548293091385855, "grad_norm": 0.8592475056648254, "learning_rate": 2.9554099149815262e-05, "loss": 0.1375, "step": 4787 }, { "epoch": 0.1055049662033747, "grad_norm": 1.4076223373413086, "learning_rate": 2.955384003046759e-05, "loss": 0.1245, "step": 4788 }, { "epoch": 0.10552700149289086, "grad_norm": 1.1333733797073364, "learning_rate": 2.955358083698941e-05, "loss": 0.1707, "step": 4789 }, { "epoch": 0.10554903678240703, "grad_norm": 1.094512701034546, "learning_rate": 2.9553321569382033e-05, "loss": 0.1355, "step": 4790 }, { "epoch": 0.10557107207192319, "grad_norm": 1.2764198780059814, "learning_rate": 2.9553062227646785e-05, "loss": 0.1258, "step": 4791 }, { "epoch": 0.10559310736143934, "grad_norm": 0.8446369171142578, "learning_rate": 2.9552802811784994e-05, "loss": 0.1024, "step": 4792 }, { "epoch": 0.1056151426509555, "grad_norm": 1.091872215270996, "learning_rate": 2.955254332179797e-05, "loss": 0.1505, "step": 4793 }, { "epoch": 0.10563717794047167, "grad_norm": 1.470359206199646, "learning_rate": 2.9552283757687036e-05, "loss": 0.1168, "step": 4794 }, { "epoch": 0.10565921322998782, "grad_norm": 1.1449025869369507, "learning_rate": 2.9552024119453526e-05, "loss": 0.1909, "step": 4795 }, { "epoch": 0.10568124851950399, "grad_norm": 1.3532224893569946, "learning_rate": 2.9551764407098744e-05, "loss": 0.145, "step": 4796 }, { "epoch": 0.10570328380902015, "grad_norm": 1.240755558013916, "learning_rate": 2.955150462062403e-05, "loss": 0.1725, "step": 4797 }, { "epoch": 0.1057253190985363, "grad_norm": 1.3658889532089233, "learning_rate": 2.9551244760030693e-05, "loss": 0.1618, "step": 4798 }, { "epoch": 0.10574735438805247, "grad_norm": 1.243273377418518, "learning_rate": 2.9550984825320073e-05, "loss": 0.1831, "step": 4799 }, { "epoch": 0.10576938967756863, "grad_norm": 1.6943708658218384, "learning_rate": 2.9550724816493476e-05, "loss": 0.1939, "step": 4800 }, { "epoch": 0.10579142496708478, "grad_norm": 1.2792446613311768, "learning_rate": 2.955046473355224e-05, "loss": 0.1411, "step": 4801 }, { "epoch": 0.10581346025660095, "grad_norm": 1.4157054424285889, "learning_rate": 2.955020457649768e-05, "loss": 0.1265, "step": 4802 }, { "epoch": 0.10583549554611711, "grad_norm": 2.034445285797119, "learning_rate": 2.9549944345331128e-05, "loss": 0.1253, "step": 4803 }, { "epoch": 0.10585753083563326, "grad_norm": 1.8356975317001343, "learning_rate": 2.9549684040053903e-05, "loss": 0.1347, "step": 4804 }, { "epoch": 0.10587956612514943, "grad_norm": 0.9027142524719238, "learning_rate": 2.954942366066734e-05, "loss": 0.1016, "step": 4805 }, { "epoch": 0.10590160141466559, "grad_norm": 1.2647294998168945, "learning_rate": 2.954916320717276e-05, "loss": 0.1167, "step": 4806 }, { "epoch": 0.10592363670418174, "grad_norm": 0.9383460283279419, "learning_rate": 2.954890267957149e-05, "loss": 0.1099, "step": 4807 }, { "epoch": 0.1059456719936979, "grad_norm": 0.990497350692749, "learning_rate": 2.9548642077864857e-05, "loss": 0.1324, "step": 4808 }, { "epoch": 0.10596770728321407, "grad_norm": 1.4388412237167358, "learning_rate": 2.9548381402054188e-05, "loss": 0.1144, "step": 4809 }, { "epoch": 0.10598974257273022, "grad_norm": 1.2320438623428345, "learning_rate": 2.9548120652140807e-05, "loss": 0.1363, "step": 4810 }, { "epoch": 0.10601177786224639, "grad_norm": 1.0502026081085205, "learning_rate": 2.9547859828126054e-05, "loss": 0.1705, "step": 4811 }, { "epoch": 0.10603381315176255, "grad_norm": 1.2518975734710693, "learning_rate": 2.9547598930011245e-05, "loss": 0.158, "step": 4812 }, { "epoch": 0.1060558484412787, "grad_norm": 1.3157724142074585, "learning_rate": 2.954733795779771e-05, "loss": 0.1217, "step": 4813 }, { "epoch": 0.10607788373079487, "grad_norm": 1.4489144086837769, "learning_rate": 2.954707691148679e-05, "loss": 0.1171, "step": 4814 }, { "epoch": 0.10609991902031103, "grad_norm": 1.5479419231414795, "learning_rate": 2.95468157910798e-05, "loss": 0.1206, "step": 4815 }, { "epoch": 0.10612195430982718, "grad_norm": 1.1005041599273682, "learning_rate": 2.9546554596578078e-05, "loss": 0.1589, "step": 4816 }, { "epoch": 0.10614398959934335, "grad_norm": 1.1994149684906006, "learning_rate": 2.9546293327982954e-05, "loss": 0.1615, "step": 4817 }, { "epoch": 0.10616602488885951, "grad_norm": 1.164787769317627, "learning_rate": 2.954603198529576e-05, "loss": 0.1134, "step": 4818 }, { "epoch": 0.10618806017837566, "grad_norm": 1.2326580286026, "learning_rate": 2.954577056851782e-05, "loss": 0.1543, "step": 4819 }, { "epoch": 0.10621009546789183, "grad_norm": 1.1086980104446411, "learning_rate": 2.9545509077650473e-05, "loss": 0.154, "step": 4820 }, { "epoch": 0.10623213075740799, "grad_norm": 1.415518879890442, "learning_rate": 2.954524751269505e-05, "loss": 0.0954, "step": 4821 }, { "epoch": 0.10625416604692414, "grad_norm": 1.181395411491394, "learning_rate": 2.954498587365288e-05, "loss": 0.1266, "step": 4822 }, { "epoch": 0.1062762013364403, "grad_norm": 0.8549296855926514, "learning_rate": 2.9544724160525297e-05, "loss": 0.1382, "step": 4823 }, { "epoch": 0.10629823662595647, "grad_norm": 1.277246117591858, "learning_rate": 2.9544462373313635e-05, "loss": 0.1378, "step": 4824 }, { "epoch": 0.10632027191547264, "grad_norm": 1.5028053522109985, "learning_rate": 2.9544200512019227e-05, "loss": 0.1463, "step": 4825 }, { "epoch": 0.10634230720498879, "grad_norm": 1.8821998834609985, "learning_rate": 2.9543938576643402e-05, "loss": 0.1084, "step": 4826 }, { "epoch": 0.10636434249450495, "grad_norm": 1.5341590642929077, "learning_rate": 2.9543676567187506e-05, "loss": 0.1396, "step": 4827 }, { "epoch": 0.10638637778402112, "grad_norm": 1.498136281967163, "learning_rate": 2.9543414483652863e-05, "loss": 0.1689, "step": 4828 }, { "epoch": 0.10640841307353727, "grad_norm": 1.594367265701294, "learning_rate": 2.954315232604081e-05, "loss": 0.1269, "step": 4829 }, { "epoch": 0.10643044836305343, "grad_norm": 1.5574610233306885, "learning_rate": 2.9542890094352685e-05, "loss": 0.1624, "step": 4830 }, { "epoch": 0.1064524836525696, "grad_norm": 1.0880653858184814, "learning_rate": 2.9542627788589823e-05, "loss": 0.1086, "step": 4831 }, { "epoch": 0.10647451894208575, "grad_norm": 0.900382399559021, "learning_rate": 2.954236540875356e-05, "loss": 0.1059, "step": 4832 }, { "epoch": 0.10649655423160191, "grad_norm": 1.1851253509521484, "learning_rate": 2.9542102954845227e-05, "loss": 0.1204, "step": 4833 }, { "epoch": 0.10651858952111808, "grad_norm": 0.9420548677444458, "learning_rate": 2.9541840426866172e-05, "loss": 0.1336, "step": 4834 }, { "epoch": 0.10654062481063423, "grad_norm": 1.2201098203659058, "learning_rate": 2.9541577824817718e-05, "loss": 0.1086, "step": 4835 }, { "epoch": 0.10656266010015039, "grad_norm": 1.4168728590011597, "learning_rate": 2.9541315148701217e-05, "loss": 0.1522, "step": 4836 }, { "epoch": 0.10658469538966656, "grad_norm": 1.0699961185455322, "learning_rate": 2.9541052398517997e-05, "loss": 0.1274, "step": 4837 }, { "epoch": 0.1066067306791827, "grad_norm": 1.5851677656173706, "learning_rate": 2.9540789574269402e-05, "loss": 0.1605, "step": 4838 }, { "epoch": 0.10662876596869887, "grad_norm": 0.9519428610801697, "learning_rate": 2.9540526675956765e-05, "loss": 0.1373, "step": 4839 }, { "epoch": 0.10665080125821504, "grad_norm": 1.1810801029205322, "learning_rate": 2.954026370358143e-05, "loss": 0.1418, "step": 4840 }, { "epoch": 0.10667283654773119, "grad_norm": 0.8843642473220825, "learning_rate": 2.9540000657144737e-05, "loss": 0.1293, "step": 4841 }, { "epoch": 0.10669487183724735, "grad_norm": 1.3899072408676147, "learning_rate": 2.953973753664802e-05, "loss": 0.1251, "step": 4842 }, { "epoch": 0.10671690712676352, "grad_norm": 1.6387813091278076, "learning_rate": 2.9539474342092626e-05, "loss": 0.1458, "step": 4843 }, { "epoch": 0.10673894241627967, "grad_norm": 1.5426087379455566, "learning_rate": 2.953921107347989e-05, "loss": 0.1409, "step": 4844 }, { "epoch": 0.10676097770579583, "grad_norm": 1.22464919090271, "learning_rate": 2.9538947730811158e-05, "loss": 0.1438, "step": 4845 }, { "epoch": 0.106783012995312, "grad_norm": 1.503096103668213, "learning_rate": 2.9538684314087767e-05, "loss": 0.1068, "step": 4846 }, { "epoch": 0.10680504828482815, "grad_norm": 1.1083300113677979, "learning_rate": 2.9538420823311064e-05, "loss": 0.1584, "step": 4847 }, { "epoch": 0.10682708357434431, "grad_norm": 1.7327189445495605, "learning_rate": 2.953815725848238e-05, "loss": 0.2031, "step": 4848 }, { "epoch": 0.10684911886386048, "grad_norm": 1.5722763538360596, "learning_rate": 2.9537893619603074e-05, "loss": 0.1291, "step": 4849 }, { "epoch": 0.10687115415337663, "grad_norm": 1.744785189628601, "learning_rate": 2.953762990667447e-05, "loss": 0.1474, "step": 4850 }, { "epoch": 0.10689318944289279, "grad_norm": 0.7575812339782715, "learning_rate": 2.9537366119697932e-05, "loss": 0.1077, "step": 4851 }, { "epoch": 0.10691522473240896, "grad_norm": 0.9906724691390991, "learning_rate": 2.9537102258674786e-05, "loss": 0.1023, "step": 4852 }, { "epoch": 0.1069372600219251, "grad_norm": 1.04893958568573, "learning_rate": 2.9536838323606385e-05, "loss": 0.1528, "step": 4853 }, { "epoch": 0.10695929531144127, "grad_norm": 1.062923550605774, "learning_rate": 2.953657431449407e-05, "loss": 0.1542, "step": 4854 }, { "epoch": 0.10698133060095744, "grad_norm": 1.5946993827819824, "learning_rate": 2.9536310231339187e-05, "loss": 0.1695, "step": 4855 }, { "epoch": 0.10700336589047359, "grad_norm": 1.9261879920959473, "learning_rate": 2.953604607414308e-05, "loss": 0.1061, "step": 4856 }, { "epoch": 0.10702540117998975, "grad_norm": 1.6887046098709106, "learning_rate": 2.9535781842907098e-05, "loss": 0.1254, "step": 4857 }, { "epoch": 0.10704743646950592, "grad_norm": 1.3983029127120972, "learning_rate": 2.9535517537632586e-05, "loss": 0.1289, "step": 4858 }, { "epoch": 0.10706947175902208, "grad_norm": 1.0318524837493896, "learning_rate": 2.9535253158320886e-05, "loss": 0.0786, "step": 4859 }, { "epoch": 0.10709150704853823, "grad_norm": 1.577852487564087, "learning_rate": 2.9534988704973343e-05, "loss": 0.173, "step": 4860 }, { "epoch": 0.1071135423380544, "grad_norm": 1.130794644355774, "learning_rate": 2.9534724177591312e-05, "loss": 0.1165, "step": 4861 }, { "epoch": 0.10713557762757056, "grad_norm": 1.087151050567627, "learning_rate": 2.9534459576176142e-05, "loss": 0.128, "step": 4862 }, { "epoch": 0.10715761291708671, "grad_norm": 1.4552849531173706, "learning_rate": 2.9534194900729167e-05, "loss": 0.1249, "step": 4863 }, { "epoch": 0.10717964820660288, "grad_norm": 1.0526677370071411, "learning_rate": 2.9533930151251747e-05, "loss": 0.1581, "step": 4864 }, { "epoch": 0.10720168349611904, "grad_norm": 1.1557484865188599, "learning_rate": 2.953366532774523e-05, "loss": 0.1561, "step": 4865 }, { "epoch": 0.10722371878563519, "grad_norm": 0.885487973690033, "learning_rate": 2.9533400430210955e-05, "loss": 0.1436, "step": 4866 }, { "epoch": 0.10724575407515136, "grad_norm": 1.650610089302063, "learning_rate": 2.953313545865028e-05, "loss": 0.202, "step": 4867 }, { "epoch": 0.10726778936466752, "grad_norm": 1.1282252073287964, "learning_rate": 2.953287041306456e-05, "loss": 0.1197, "step": 4868 }, { "epoch": 0.10728982465418367, "grad_norm": 0.853327214717865, "learning_rate": 2.953260529345513e-05, "loss": 0.1409, "step": 4869 }, { "epoch": 0.10731185994369984, "grad_norm": 1.0626587867736816, "learning_rate": 2.9532340099823352e-05, "loss": 0.0824, "step": 4870 }, { "epoch": 0.107333895233216, "grad_norm": 1.3638947010040283, "learning_rate": 2.9532074832170572e-05, "loss": 0.1252, "step": 4871 }, { "epoch": 0.10735593052273215, "grad_norm": 1.32806396484375, "learning_rate": 2.9531809490498144e-05, "loss": 0.1201, "step": 4872 }, { "epoch": 0.10737796581224832, "grad_norm": 1.4351983070373535, "learning_rate": 2.9531544074807413e-05, "loss": 0.1827, "step": 4873 }, { "epoch": 0.10740000110176448, "grad_norm": 1.2626310586929321, "learning_rate": 2.953127858509974e-05, "loss": 0.1341, "step": 4874 }, { "epoch": 0.10742203639128063, "grad_norm": 1.571199893951416, "learning_rate": 2.9531013021376473e-05, "loss": 0.1879, "step": 4875 }, { "epoch": 0.1074440716807968, "grad_norm": 1.4720706939697266, "learning_rate": 2.9530747383638962e-05, "loss": 0.1752, "step": 4876 }, { "epoch": 0.10746610697031296, "grad_norm": 1.6514166593551636, "learning_rate": 2.9530481671888563e-05, "loss": 0.1461, "step": 4877 }, { "epoch": 0.10748814225982911, "grad_norm": 0.810860276222229, "learning_rate": 2.9530215886126626e-05, "loss": 0.119, "step": 4878 }, { "epoch": 0.10751017754934528, "grad_norm": 1.174309492111206, "learning_rate": 2.952995002635452e-05, "loss": 0.1518, "step": 4879 }, { "epoch": 0.10753221283886144, "grad_norm": 1.0744752883911133, "learning_rate": 2.9529684092573575e-05, "loss": 0.1307, "step": 4880 }, { "epoch": 0.10755424812837759, "grad_norm": 1.2615013122558594, "learning_rate": 2.9529418084785157e-05, "loss": 0.167, "step": 4881 }, { "epoch": 0.10757628341789376, "grad_norm": 1.3534034490585327, "learning_rate": 2.9529152002990626e-05, "loss": 0.1531, "step": 4882 }, { "epoch": 0.10759831870740992, "grad_norm": 0.5838123559951782, "learning_rate": 2.9528885847191332e-05, "loss": 0.1178, "step": 4883 }, { "epoch": 0.10762035399692607, "grad_norm": 1.365642786026001, "learning_rate": 2.952861961738863e-05, "loss": 0.1515, "step": 4884 }, { "epoch": 0.10764238928644224, "grad_norm": 0.7724793553352356, "learning_rate": 2.9528353313583884e-05, "loss": 0.1143, "step": 4885 }, { "epoch": 0.1076644245759584, "grad_norm": 0.9630743265151978, "learning_rate": 2.9528086935778437e-05, "loss": 0.1384, "step": 4886 }, { "epoch": 0.10768645986547455, "grad_norm": 1.4841474294662476, "learning_rate": 2.9527820483973654e-05, "loss": 0.1692, "step": 4887 }, { "epoch": 0.10770849515499072, "grad_norm": 1.4842090606689453, "learning_rate": 2.9527553958170886e-05, "loss": 0.1809, "step": 4888 }, { "epoch": 0.10773053044450688, "grad_norm": 1.221384048461914, "learning_rate": 2.9527287358371504e-05, "loss": 0.1325, "step": 4889 }, { "epoch": 0.10775256573402303, "grad_norm": 1.5206201076507568, "learning_rate": 2.952702068457685e-05, "loss": 0.1451, "step": 4890 }, { "epoch": 0.1077746010235392, "grad_norm": 1.328810453414917, "learning_rate": 2.9526753936788297e-05, "loss": 0.183, "step": 4891 }, { "epoch": 0.10779663631305536, "grad_norm": 1.1075937747955322, "learning_rate": 2.952648711500719e-05, "loss": 0.1624, "step": 4892 }, { "epoch": 0.10781867160257151, "grad_norm": 1.3356025218963623, "learning_rate": 2.9526220219234902e-05, "loss": 0.1231, "step": 4893 }, { "epoch": 0.10784070689208768, "grad_norm": 1.64133882522583, "learning_rate": 2.9525953249472778e-05, "loss": 0.1673, "step": 4894 }, { "epoch": 0.10786274218160384, "grad_norm": 1.3299624919891357, "learning_rate": 2.9525686205722184e-05, "loss": 0.1548, "step": 4895 }, { "epoch": 0.10788477747112, "grad_norm": 1.0223934650421143, "learning_rate": 2.9525419087984482e-05, "loss": 0.1502, "step": 4896 }, { "epoch": 0.10790681276063616, "grad_norm": 1.2656335830688477, "learning_rate": 2.9525151896261034e-05, "loss": 0.1728, "step": 4897 }, { "epoch": 0.10792884805015232, "grad_norm": 1.4979995489120483, "learning_rate": 2.95248846305532e-05, "loss": 0.1766, "step": 4898 }, { "epoch": 0.10795088333966849, "grad_norm": 1.1102540493011475, "learning_rate": 2.9524617290862337e-05, "loss": 0.1075, "step": 4899 }, { "epoch": 0.10797291862918464, "grad_norm": 1.5890326499938965, "learning_rate": 2.9524349877189806e-05, "loss": 0.1445, "step": 4900 }, { "epoch": 0.1079949539187008, "grad_norm": 0.7630766034126282, "learning_rate": 2.952408238953698e-05, "loss": 0.1002, "step": 4901 }, { "epoch": 0.10801698920821697, "grad_norm": 0.751106858253479, "learning_rate": 2.952381482790521e-05, "loss": 0.1289, "step": 4902 }, { "epoch": 0.10803902449773312, "grad_norm": 1.4498636722564697, "learning_rate": 2.952354719229586e-05, "loss": 0.1699, "step": 4903 }, { "epoch": 0.10806105978724928, "grad_norm": 1.487963080406189, "learning_rate": 2.95232794827103e-05, "loss": 0.1273, "step": 4904 }, { "epoch": 0.10808309507676545, "grad_norm": 0.9226353764533997, "learning_rate": 2.952301169914989e-05, "loss": 0.1182, "step": 4905 }, { "epoch": 0.1081051303662816, "grad_norm": 1.1999176740646362, "learning_rate": 2.9522743841615987e-05, "loss": 0.1553, "step": 4906 }, { "epoch": 0.10812716565579776, "grad_norm": 2.5266611576080322, "learning_rate": 2.952247591010997e-05, "loss": 0.164, "step": 4907 }, { "epoch": 0.10814920094531393, "grad_norm": 1.6951260566711426, "learning_rate": 2.952220790463319e-05, "loss": 0.1735, "step": 4908 }, { "epoch": 0.10817123623483008, "grad_norm": 1.2611569166183472, "learning_rate": 2.952193982518702e-05, "loss": 0.1018, "step": 4909 }, { "epoch": 0.10819327152434624, "grad_norm": 1.2190313339233398, "learning_rate": 2.9521671671772824e-05, "loss": 0.1568, "step": 4910 }, { "epoch": 0.1082153068138624, "grad_norm": 1.4039990901947021, "learning_rate": 2.9521403444391966e-05, "loss": 0.1425, "step": 4911 }, { "epoch": 0.10823734210337856, "grad_norm": 0.9324026703834534, "learning_rate": 2.9521135143045813e-05, "loss": 0.1204, "step": 4912 }, { "epoch": 0.10825937739289472, "grad_norm": 1.4826940298080444, "learning_rate": 2.9520866767735733e-05, "loss": 0.1543, "step": 4913 }, { "epoch": 0.10828141268241089, "grad_norm": 1.4874508380889893, "learning_rate": 2.952059831846309e-05, "loss": 0.141, "step": 4914 }, { "epoch": 0.10830344797192704, "grad_norm": 1.4454867839813232, "learning_rate": 2.9520329795229254e-05, "loss": 0.1485, "step": 4915 }, { "epoch": 0.1083254832614432, "grad_norm": 0.859623372554779, "learning_rate": 2.9520061198035594e-05, "loss": 0.1049, "step": 4916 }, { "epoch": 0.10834751855095937, "grad_norm": 1.0409133434295654, "learning_rate": 2.9519792526883475e-05, "loss": 0.1483, "step": 4917 }, { "epoch": 0.10836955384047552, "grad_norm": 0.8906166553497314, "learning_rate": 2.9519523781774267e-05, "loss": 0.1178, "step": 4918 }, { "epoch": 0.10839158912999168, "grad_norm": 1.5881483554840088, "learning_rate": 2.9519254962709335e-05, "loss": 0.1423, "step": 4919 }, { "epoch": 0.10841362441950785, "grad_norm": 0.9758525490760803, "learning_rate": 2.9518986069690054e-05, "loss": 0.1237, "step": 4920 }, { "epoch": 0.108435659709024, "grad_norm": 1.5188239812850952, "learning_rate": 2.9518717102717794e-05, "loss": 0.1447, "step": 4921 }, { "epoch": 0.10845769499854016, "grad_norm": 1.2873188257217407, "learning_rate": 2.9518448061793913e-05, "loss": 0.1464, "step": 4922 }, { "epoch": 0.10847973028805633, "grad_norm": 1.3669179677963257, "learning_rate": 2.9518178946919803e-05, "loss": 0.178, "step": 4923 }, { "epoch": 0.10850176557757248, "grad_norm": 1.1576746702194214, "learning_rate": 2.9517909758096814e-05, "loss": 0.1447, "step": 4924 }, { "epoch": 0.10852380086708864, "grad_norm": 1.3480232954025269, "learning_rate": 2.951764049532633e-05, "loss": 0.1061, "step": 4925 }, { "epoch": 0.1085458361566048, "grad_norm": 1.3753833770751953, "learning_rate": 2.9517371158609715e-05, "loss": 0.1467, "step": 4926 }, { "epoch": 0.10856787144612096, "grad_norm": 1.9235464334487915, "learning_rate": 2.9517101747948343e-05, "loss": 0.1904, "step": 4927 }, { "epoch": 0.10858990673563712, "grad_norm": 0.9144951701164246, "learning_rate": 2.951683226334359e-05, "loss": 0.1182, "step": 4928 }, { "epoch": 0.10861194202515329, "grad_norm": 1.3644942045211792, "learning_rate": 2.9516562704796824e-05, "loss": 0.1666, "step": 4929 }, { "epoch": 0.10863397731466945, "grad_norm": 1.6771725416183472, "learning_rate": 2.9516293072309422e-05, "loss": 0.1869, "step": 4930 }, { "epoch": 0.1086560126041856, "grad_norm": 1.759036660194397, "learning_rate": 2.951602336588275e-05, "loss": 0.162, "step": 4931 }, { "epoch": 0.10867804789370177, "grad_norm": 1.100511908531189, "learning_rate": 2.951575358551819e-05, "loss": 0.1509, "step": 4932 }, { "epoch": 0.10870008318321793, "grad_norm": 1.1517738103866577, "learning_rate": 2.9515483731217115e-05, "loss": 0.1229, "step": 4933 }, { "epoch": 0.10872211847273408, "grad_norm": 1.2200599908828735, "learning_rate": 2.95152138029809e-05, "loss": 0.1738, "step": 4934 }, { "epoch": 0.10874415376225025, "grad_norm": 1.0311156511306763, "learning_rate": 2.951494380081091e-05, "loss": 0.1348, "step": 4935 }, { "epoch": 0.10876618905176641, "grad_norm": 1.3920403718948364, "learning_rate": 2.9514673724708528e-05, "loss": 0.1514, "step": 4936 }, { "epoch": 0.10878822434128256, "grad_norm": 0.9315047860145569, "learning_rate": 2.9514403574675132e-05, "loss": 0.1778, "step": 4937 }, { "epoch": 0.10881025963079873, "grad_norm": 1.2887922525405884, "learning_rate": 2.95141333507121e-05, "loss": 0.1084, "step": 4938 }, { "epoch": 0.10883229492031489, "grad_norm": 1.2751342058181763, "learning_rate": 2.9513863052820796e-05, "loss": 0.1159, "step": 4939 }, { "epoch": 0.10885433020983104, "grad_norm": 1.183301568031311, "learning_rate": 2.951359268100261e-05, "loss": 0.1932, "step": 4940 }, { "epoch": 0.1088763654993472, "grad_norm": 0.9019517302513123, "learning_rate": 2.951332223525891e-05, "loss": 0.1687, "step": 4941 }, { "epoch": 0.10889840078886337, "grad_norm": 2.482651710510254, "learning_rate": 2.9513051715591082e-05, "loss": 0.1243, "step": 4942 }, { "epoch": 0.10892043607837952, "grad_norm": 1.2935104370117188, "learning_rate": 2.9512781122000498e-05, "loss": 0.1402, "step": 4943 }, { "epoch": 0.10894247136789569, "grad_norm": 0.8037744760513306, "learning_rate": 2.951251045448853e-05, "loss": 0.1556, "step": 4944 }, { "epoch": 0.10896450665741185, "grad_norm": 1.996126651763916, "learning_rate": 2.9512239713056572e-05, "loss": 0.1131, "step": 4945 }, { "epoch": 0.108986541946928, "grad_norm": 1.8054594993591309, "learning_rate": 2.9511968897705992e-05, "loss": 0.1361, "step": 4946 }, { "epoch": 0.10900857723644417, "grad_norm": 4.114927291870117, "learning_rate": 2.951169800843817e-05, "loss": 0.1625, "step": 4947 }, { "epoch": 0.10903061252596033, "grad_norm": 2.6602354049682617, "learning_rate": 2.9511427045254494e-05, "loss": 0.1206, "step": 4948 }, { "epoch": 0.10905264781547648, "grad_norm": 1.145135760307312, "learning_rate": 2.9511156008156334e-05, "loss": 0.1521, "step": 4949 }, { "epoch": 0.10907468310499265, "grad_norm": 1.2189888954162598, "learning_rate": 2.951088489714508e-05, "loss": 0.1328, "step": 4950 }, { "epoch": 0.10909671839450881, "grad_norm": 1.1812471151351929, "learning_rate": 2.9510613712222105e-05, "loss": 0.1125, "step": 4951 }, { "epoch": 0.10911875368402496, "grad_norm": 2.115724563598633, "learning_rate": 2.9510342453388787e-05, "loss": 0.103, "step": 4952 }, { "epoch": 0.10914078897354113, "grad_norm": 1.0908018350601196, "learning_rate": 2.951007112064652e-05, "loss": 0.1361, "step": 4953 }, { "epoch": 0.10916282426305729, "grad_norm": 1.9383941888809204, "learning_rate": 2.950979971399668e-05, "loss": 0.1596, "step": 4954 }, { "epoch": 0.10918485955257344, "grad_norm": 1.7188990116119385, "learning_rate": 2.950952823344065e-05, "loss": 0.1299, "step": 4955 }, { "epoch": 0.1092068948420896, "grad_norm": 1.805519700050354, "learning_rate": 2.9509256678979813e-05, "loss": 0.0993, "step": 4956 }, { "epoch": 0.10922893013160577, "grad_norm": 1.1084120273590088, "learning_rate": 2.9508985050615545e-05, "loss": 0.1269, "step": 4957 }, { "epoch": 0.10925096542112192, "grad_norm": 1.9898464679718018, "learning_rate": 2.9508713348349238e-05, "loss": 0.1308, "step": 4958 }, { "epoch": 0.10927300071063809, "grad_norm": 3.4329473972320557, "learning_rate": 2.9508441572182278e-05, "loss": 0.1656, "step": 4959 }, { "epoch": 0.10929503600015425, "grad_norm": 5.744546413421631, "learning_rate": 2.9508169722116043e-05, "loss": 0.1238, "step": 4960 }, { "epoch": 0.1093170712896704, "grad_norm": 1.0627655982971191, "learning_rate": 2.9507897798151917e-05, "loss": 0.1285, "step": 4961 }, { "epoch": 0.10933910657918657, "grad_norm": 1.585273027420044, "learning_rate": 2.9507625800291288e-05, "loss": 0.1317, "step": 4962 }, { "epoch": 0.10936114186870273, "grad_norm": 1.586495041847229, "learning_rate": 2.9507353728535545e-05, "loss": 0.0953, "step": 4963 }, { "epoch": 0.1093831771582189, "grad_norm": 1.4458253383636475, "learning_rate": 2.9507081582886067e-05, "loss": 0.1912, "step": 4964 }, { "epoch": 0.10940521244773505, "grad_norm": 1.7322993278503418, "learning_rate": 2.9506809363344247e-05, "loss": 0.2014, "step": 4965 }, { "epoch": 0.10942724773725121, "grad_norm": 1.0728346109390259, "learning_rate": 2.9506537069911464e-05, "loss": 0.1368, "step": 4966 }, { "epoch": 0.10944928302676737, "grad_norm": 1.1684679985046387, "learning_rate": 2.950626470258911e-05, "loss": 0.1339, "step": 4967 }, { "epoch": 0.10947131831628353, "grad_norm": 1.284021019935608, "learning_rate": 2.9505992261378572e-05, "loss": 0.1403, "step": 4968 }, { "epoch": 0.10949335360579969, "grad_norm": 1.1473933458328247, "learning_rate": 2.9505719746281237e-05, "loss": 0.1677, "step": 4969 }, { "epoch": 0.10951538889531585, "grad_norm": 0.941942036151886, "learning_rate": 2.950544715729849e-05, "loss": 0.1451, "step": 4970 }, { "epoch": 0.109537424184832, "grad_norm": 1.3321582078933716, "learning_rate": 2.950517449443172e-05, "loss": 0.1443, "step": 4971 }, { "epoch": 0.10955945947434817, "grad_norm": 1.4463647603988647, "learning_rate": 2.950490175768232e-05, "loss": 0.1426, "step": 4972 }, { "epoch": 0.10958149476386433, "grad_norm": 1.2439788579940796, "learning_rate": 2.950462894705168e-05, "loss": 0.1581, "step": 4973 }, { "epoch": 0.10960353005338049, "grad_norm": 1.1273046731948853, "learning_rate": 2.9504356062541187e-05, "loss": 0.1453, "step": 4974 }, { "epoch": 0.10962556534289665, "grad_norm": 1.248136281967163, "learning_rate": 2.950408310415223e-05, "loss": 0.1143, "step": 4975 }, { "epoch": 0.10964760063241281, "grad_norm": 1.0858114957809448, "learning_rate": 2.95038100718862e-05, "loss": 0.1569, "step": 4976 }, { "epoch": 0.10966963592192897, "grad_norm": 0.8513394594192505, "learning_rate": 2.9503536965744484e-05, "loss": 0.1164, "step": 4977 }, { "epoch": 0.10969167121144513, "grad_norm": 0.6383138298988342, "learning_rate": 2.9503263785728478e-05, "loss": 0.0909, "step": 4978 }, { "epoch": 0.1097137065009613, "grad_norm": 1.0523896217346191, "learning_rate": 2.9502990531839578e-05, "loss": 0.1498, "step": 4979 }, { "epoch": 0.10973574179047745, "grad_norm": 1.4255495071411133, "learning_rate": 2.9502717204079163e-05, "loss": 0.1681, "step": 4980 }, { "epoch": 0.10975777707999361, "grad_norm": 1.2734100818634033, "learning_rate": 2.9502443802448638e-05, "loss": 0.1981, "step": 4981 }, { "epoch": 0.10977981236950977, "grad_norm": 1.9841245412826538, "learning_rate": 2.9502170326949386e-05, "loss": 0.1499, "step": 4982 }, { "epoch": 0.10980184765902593, "grad_norm": 1.203171968460083, "learning_rate": 2.9501896777582804e-05, "loss": 0.1494, "step": 4983 }, { "epoch": 0.10982388294854209, "grad_norm": 1.1396111249923706, "learning_rate": 2.9501623154350286e-05, "loss": 0.1271, "step": 4984 }, { "epoch": 0.10984591823805825, "grad_norm": 0.7900856733322144, "learning_rate": 2.9501349457253222e-05, "loss": 0.0785, "step": 4985 }, { "epoch": 0.1098679535275744, "grad_norm": 1.1011857986450195, "learning_rate": 2.9501075686293015e-05, "loss": 0.1612, "step": 4986 }, { "epoch": 0.10988998881709057, "grad_norm": 1.520179271697998, "learning_rate": 2.950080184147105e-05, "loss": 0.1252, "step": 4987 }, { "epoch": 0.10991202410660673, "grad_norm": 1.0504223108291626, "learning_rate": 2.950052792278873e-05, "loss": 0.1041, "step": 4988 }, { "epoch": 0.10993405939612289, "grad_norm": 0.7643799185752869, "learning_rate": 2.9500253930247437e-05, "loss": 0.0902, "step": 4989 }, { "epoch": 0.10995609468563905, "grad_norm": 1.3745427131652832, "learning_rate": 2.949997986384858e-05, "loss": 0.1129, "step": 4990 }, { "epoch": 0.10997812997515521, "grad_norm": 2.3473174571990967, "learning_rate": 2.949970572359355e-05, "loss": 0.1165, "step": 4991 }, { "epoch": 0.11000016526467137, "grad_norm": 0.8793230056762695, "learning_rate": 2.949943150948374e-05, "loss": 0.146, "step": 4992 }, { "epoch": 0.11002220055418753, "grad_norm": 0.9096484184265137, "learning_rate": 2.9499157221520555e-05, "loss": 0.1358, "step": 4993 }, { "epoch": 0.1100442358437037, "grad_norm": 0.8424855470657349, "learning_rate": 2.9498882859705384e-05, "loss": 0.1246, "step": 4994 }, { "epoch": 0.11006627113321985, "grad_norm": 1.0030734539031982, "learning_rate": 2.949860842403963e-05, "loss": 0.1343, "step": 4995 }, { "epoch": 0.11008830642273601, "grad_norm": 1.1748158931732178, "learning_rate": 2.9498333914524687e-05, "loss": 0.1383, "step": 4996 }, { "epoch": 0.11011034171225217, "grad_norm": 1.038894772529602, "learning_rate": 2.9498059331161953e-05, "loss": 0.171, "step": 4997 }, { "epoch": 0.11013237700176833, "grad_norm": 1.0950801372528076, "learning_rate": 2.9497784673952834e-05, "loss": 0.122, "step": 4998 }, { "epoch": 0.11015441229128449, "grad_norm": 1.5058430433273315, "learning_rate": 2.9497509942898718e-05, "loss": 0.1392, "step": 4999 }, { "epoch": 0.11017644758080065, "grad_norm": 1.7291756868362427, "learning_rate": 2.9497235138001012e-05, "loss": 0.1086, "step": 5000 }, { "epoch": 0.11019848287031682, "grad_norm": 0.9883583188056946, "learning_rate": 2.9496960259261115e-05, "loss": 0.1156, "step": 5001 }, { "epoch": 0.11022051815983297, "grad_norm": 0.8547579646110535, "learning_rate": 2.949668530668042e-05, "loss": 0.1435, "step": 5002 }, { "epoch": 0.11024255344934913, "grad_norm": 1.360406517982483, "learning_rate": 2.9496410280260334e-05, "loss": 0.154, "step": 5003 }, { "epoch": 0.1102645887388653, "grad_norm": 1.4689862728118896, "learning_rate": 2.9496135180002263e-05, "loss": 0.143, "step": 5004 }, { "epoch": 0.11028662402838145, "grad_norm": 1.2211952209472656, "learning_rate": 2.9495860005907598e-05, "loss": 0.1421, "step": 5005 }, { "epoch": 0.11030865931789761, "grad_norm": 1.2365561723709106, "learning_rate": 2.9495584757977742e-05, "loss": 0.1632, "step": 5006 }, { "epoch": 0.11033069460741378, "grad_norm": 0.7040426135063171, "learning_rate": 2.9495309436214104e-05, "loss": 0.0812, "step": 5007 }, { "epoch": 0.11035272989692993, "grad_norm": 1.0375868082046509, "learning_rate": 2.949503404061808e-05, "loss": 0.1571, "step": 5008 }, { "epoch": 0.1103747651864461, "grad_norm": 1.0312215089797974, "learning_rate": 2.9494758571191077e-05, "loss": 0.0981, "step": 5009 }, { "epoch": 0.11039680047596226, "grad_norm": 1.1069717407226562, "learning_rate": 2.9494483027934493e-05, "loss": 0.1721, "step": 5010 }, { "epoch": 0.11041883576547841, "grad_norm": 1.0018260478973389, "learning_rate": 2.949420741084974e-05, "loss": 0.1301, "step": 5011 }, { "epoch": 0.11044087105499457, "grad_norm": 1.3097543716430664, "learning_rate": 2.949393171993821e-05, "loss": 0.1264, "step": 5012 }, { "epoch": 0.11046290634451074, "grad_norm": 0.6998305916786194, "learning_rate": 2.9493655955201313e-05, "loss": 0.1145, "step": 5013 }, { "epoch": 0.11048494163402689, "grad_norm": 0.9795088768005371, "learning_rate": 2.9493380116640462e-05, "loss": 0.1355, "step": 5014 }, { "epoch": 0.11050697692354305, "grad_norm": 1.042974591255188, "learning_rate": 2.9493104204257047e-05, "loss": 0.1447, "step": 5015 }, { "epoch": 0.11052901221305922, "grad_norm": 1.020890474319458, "learning_rate": 2.949282821805248e-05, "loss": 0.1393, "step": 5016 }, { "epoch": 0.11055104750257537, "grad_norm": 0.9902146458625793, "learning_rate": 2.949255215802817e-05, "loss": 0.1329, "step": 5017 }, { "epoch": 0.11057308279209153, "grad_norm": 1.840470552444458, "learning_rate": 2.9492276024185518e-05, "loss": 0.0824, "step": 5018 }, { "epoch": 0.1105951180816077, "grad_norm": 1.2093504667282104, "learning_rate": 2.9491999816525933e-05, "loss": 0.1197, "step": 5019 }, { "epoch": 0.11061715337112385, "grad_norm": 1.3700426816940308, "learning_rate": 2.949172353505082e-05, "loss": 0.1378, "step": 5020 }, { "epoch": 0.11063918866064001, "grad_norm": 0.839276909828186, "learning_rate": 2.9491447179761597e-05, "loss": 0.147, "step": 5021 }, { "epoch": 0.11066122395015618, "grad_norm": 1.5452160835266113, "learning_rate": 2.9491170750659654e-05, "loss": 0.1905, "step": 5022 }, { "epoch": 0.11068325923967233, "grad_norm": 0.9941307902336121, "learning_rate": 2.9490894247746413e-05, "loss": 0.0893, "step": 5023 }, { "epoch": 0.1107052945291885, "grad_norm": 0.9844896793365479, "learning_rate": 2.9490617671023274e-05, "loss": 0.1725, "step": 5024 }, { "epoch": 0.11072732981870466, "grad_norm": 1.800500750541687, "learning_rate": 2.9490341020491648e-05, "loss": 0.1436, "step": 5025 }, { "epoch": 0.11074936510822081, "grad_norm": 1.0006015300750732, "learning_rate": 2.9490064296152946e-05, "loss": 0.0957, "step": 5026 }, { "epoch": 0.11077140039773697, "grad_norm": 1.170661211013794, "learning_rate": 2.9489787498008577e-05, "loss": 0.1743, "step": 5027 }, { "epoch": 0.11079343568725314, "grad_norm": 0.7847769856452942, "learning_rate": 2.9489510626059948e-05, "loss": 0.143, "step": 5028 }, { "epoch": 0.11081547097676929, "grad_norm": 0.8957474827766418, "learning_rate": 2.9489233680308473e-05, "loss": 0.132, "step": 5029 }, { "epoch": 0.11083750626628545, "grad_norm": 0.9832891821861267, "learning_rate": 2.9488956660755564e-05, "loss": 0.1309, "step": 5030 }, { "epoch": 0.11085954155580162, "grad_norm": 1.5430395603179932, "learning_rate": 2.9488679567402625e-05, "loss": 0.158, "step": 5031 }, { "epoch": 0.11088157684531777, "grad_norm": 1.4189507961273193, "learning_rate": 2.948840240025107e-05, "loss": 0.1344, "step": 5032 }, { "epoch": 0.11090361213483393, "grad_norm": 1.0750645399093628, "learning_rate": 2.9488125159302318e-05, "loss": 0.1162, "step": 5033 }, { "epoch": 0.1109256474243501, "grad_norm": 1.2532426118850708, "learning_rate": 2.9487847844557772e-05, "loss": 0.1684, "step": 5034 }, { "epoch": 0.11094768271386626, "grad_norm": 1.0395339727401733, "learning_rate": 2.9487570456018846e-05, "loss": 0.1644, "step": 5035 }, { "epoch": 0.11096971800338241, "grad_norm": 1.0829188823699951, "learning_rate": 2.948729299368696e-05, "loss": 0.1819, "step": 5036 }, { "epoch": 0.11099175329289858, "grad_norm": 1.536545753479004, "learning_rate": 2.948701545756352e-05, "loss": 0.2107, "step": 5037 }, { "epoch": 0.11101378858241474, "grad_norm": 1.0400093793869019, "learning_rate": 2.948673784764994e-05, "loss": 0.1118, "step": 5038 }, { "epoch": 0.1110358238719309, "grad_norm": 0.919094443321228, "learning_rate": 2.948646016394764e-05, "loss": 0.1367, "step": 5039 }, { "epoch": 0.11105785916144706, "grad_norm": 1.3706742525100708, "learning_rate": 2.9486182406458023e-05, "loss": 0.1161, "step": 5040 }, { "epoch": 0.11107989445096322, "grad_norm": 1.3386179208755493, "learning_rate": 2.9485904575182516e-05, "loss": 0.1317, "step": 5041 }, { "epoch": 0.11110192974047937, "grad_norm": 0.9200447201728821, "learning_rate": 2.9485626670122526e-05, "loss": 0.1223, "step": 5042 }, { "epoch": 0.11112396502999554, "grad_norm": 1.1861003637313843, "learning_rate": 2.9485348691279474e-05, "loss": 0.1053, "step": 5043 }, { "epoch": 0.1111460003195117, "grad_norm": 1.0367099046707153, "learning_rate": 2.948507063865477e-05, "loss": 0.1454, "step": 5044 }, { "epoch": 0.11116803560902785, "grad_norm": 1.2838382720947266, "learning_rate": 2.9484792512249837e-05, "loss": 0.1631, "step": 5045 }, { "epoch": 0.11119007089854402, "grad_norm": 1.01443350315094, "learning_rate": 2.948451431206609e-05, "loss": 0.1084, "step": 5046 }, { "epoch": 0.11121210618806018, "grad_norm": 1.269279956817627, "learning_rate": 2.948423603810494e-05, "loss": 0.1478, "step": 5047 }, { "epoch": 0.11123414147757633, "grad_norm": 1.2531081438064575, "learning_rate": 2.9483957690367814e-05, "loss": 0.1703, "step": 5048 }, { "epoch": 0.1112561767670925, "grad_norm": 0.9860968589782715, "learning_rate": 2.948367926885612e-05, "loss": 0.1404, "step": 5049 }, { "epoch": 0.11127821205660866, "grad_norm": 1.26311457157135, "learning_rate": 2.948340077357128e-05, "loss": 0.0799, "step": 5050 }, { "epoch": 0.11130024734612481, "grad_norm": 1.1180002689361572, "learning_rate": 2.948312220451472e-05, "loss": 0.1212, "step": 5051 }, { "epoch": 0.11132228263564098, "grad_norm": 1.032301664352417, "learning_rate": 2.948284356168785e-05, "loss": 0.1453, "step": 5052 }, { "epoch": 0.11134431792515714, "grad_norm": 1.2811291217803955, "learning_rate": 2.9482564845092082e-05, "loss": 0.1325, "step": 5053 }, { "epoch": 0.1113663532146733, "grad_norm": 0.652556300163269, "learning_rate": 2.9482286054728854e-05, "loss": 0.1298, "step": 5054 }, { "epoch": 0.11138838850418946, "grad_norm": 1.478729486465454, "learning_rate": 2.9482007190599577e-05, "loss": 0.1649, "step": 5055 }, { "epoch": 0.11141042379370562, "grad_norm": 1.1466783285140991, "learning_rate": 2.9481728252705672e-05, "loss": 0.1958, "step": 5056 }, { "epoch": 0.11143245908322177, "grad_norm": 1.0499684810638428, "learning_rate": 2.9481449241048554e-05, "loss": 0.1193, "step": 5057 }, { "epoch": 0.11145449437273794, "grad_norm": 1.2945852279663086, "learning_rate": 2.9481170155629652e-05, "loss": 0.1265, "step": 5058 }, { "epoch": 0.1114765296622541, "grad_norm": 0.7118908166885376, "learning_rate": 2.9480890996450386e-05, "loss": 0.1122, "step": 5059 }, { "epoch": 0.11149856495177025, "grad_norm": 1.143561601638794, "learning_rate": 2.9480611763512175e-05, "loss": 0.1291, "step": 5060 }, { "epoch": 0.11152060024128642, "grad_norm": 1.3190968036651611, "learning_rate": 2.9480332456816446e-05, "loss": 0.1652, "step": 5061 }, { "epoch": 0.11154263553080258, "grad_norm": 0.9722155928611755, "learning_rate": 2.948005307636462e-05, "loss": 0.1612, "step": 5062 }, { "epoch": 0.11156467082031873, "grad_norm": 1.3672946691513062, "learning_rate": 2.9479773622158112e-05, "loss": 0.1362, "step": 5063 }, { "epoch": 0.1115867061098349, "grad_norm": 0.7850655913352966, "learning_rate": 2.947949409419836e-05, "loss": 0.1611, "step": 5064 }, { "epoch": 0.11160874139935106, "grad_norm": 0.992015540599823, "learning_rate": 2.9479214492486777e-05, "loss": 0.1192, "step": 5065 }, { "epoch": 0.11163077668886721, "grad_norm": 0.9700567722320557, "learning_rate": 2.947893481702479e-05, "loss": 0.1329, "step": 5066 }, { "epoch": 0.11165281197838338, "grad_norm": 1.1512162685394287, "learning_rate": 2.947865506781382e-05, "loss": 0.0971, "step": 5067 }, { "epoch": 0.11167484726789954, "grad_norm": 1.456598162651062, "learning_rate": 2.94783752448553e-05, "loss": 0.1203, "step": 5068 }, { "epoch": 0.11169688255741571, "grad_norm": 1.4589481353759766, "learning_rate": 2.9478095348150654e-05, "loss": 0.1786, "step": 5069 }, { "epoch": 0.11171891784693186, "grad_norm": 1.594496488571167, "learning_rate": 2.9477815377701297e-05, "loss": 0.1201, "step": 5070 }, { "epoch": 0.11174095313644802, "grad_norm": 0.9072903394699097, "learning_rate": 2.9477535333508668e-05, "loss": 0.1489, "step": 5071 }, { "epoch": 0.11176298842596419, "grad_norm": 1.049399971961975, "learning_rate": 2.9477255215574185e-05, "loss": 0.1572, "step": 5072 }, { "epoch": 0.11178502371548034, "grad_norm": 1.3033256530761719, "learning_rate": 2.947697502389928e-05, "loss": 0.1102, "step": 5073 }, { "epoch": 0.1118070590049965, "grad_norm": 1.0645238161087036, "learning_rate": 2.947669475848538e-05, "loss": 0.1332, "step": 5074 }, { "epoch": 0.11182909429451267, "grad_norm": 1.187144160270691, "learning_rate": 2.947641441933391e-05, "loss": 0.1065, "step": 5075 }, { "epoch": 0.11185112958402882, "grad_norm": 1.1627930402755737, "learning_rate": 2.947613400644629e-05, "loss": 0.1294, "step": 5076 }, { "epoch": 0.11187316487354498, "grad_norm": 0.7880868315696716, "learning_rate": 2.9475853519823965e-05, "loss": 0.1229, "step": 5077 }, { "epoch": 0.11189520016306115, "grad_norm": 1.262622594833374, "learning_rate": 2.9475572959468354e-05, "loss": 0.1243, "step": 5078 }, { "epoch": 0.1119172354525773, "grad_norm": 1.8692677021026611, "learning_rate": 2.9475292325380887e-05, "loss": 0.142, "step": 5079 }, { "epoch": 0.11193927074209346, "grad_norm": 1.0640026330947876, "learning_rate": 2.9475011617562996e-05, "loss": 0.0934, "step": 5080 }, { "epoch": 0.11196130603160963, "grad_norm": 0.9289239048957825, "learning_rate": 2.9474730836016104e-05, "loss": 0.0961, "step": 5081 }, { "epoch": 0.11198334132112578, "grad_norm": 1.317778468132019, "learning_rate": 2.9474449980741653e-05, "loss": 0.1625, "step": 5082 }, { "epoch": 0.11200537661064194, "grad_norm": 1.1273627281188965, "learning_rate": 2.9474169051741057e-05, "loss": 0.1464, "step": 5083 }, { "epoch": 0.11202741190015811, "grad_norm": 1.116272211074829, "learning_rate": 2.9473888049015763e-05, "loss": 0.1188, "step": 5084 }, { "epoch": 0.11204944718967426, "grad_norm": 1.262495517730713, "learning_rate": 2.9473606972567196e-05, "loss": 0.1114, "step": 5085 }, { "epoch": 0.11207148247919042, "grad_norm": 1.2396570444107056, "learning_rate": 2.9473325822396783e-05, "loss": 0.1157, "step": 5086 }, { "epoch": 0.11209351776870659, "grad_norm": 1.4517693519592285, "learning_rate": 2.9473044598505967e-05, "loss": 0.1101, "step": 5087 }, { "epoch": 0.11211555305822274, "grad_norm": 1.3712348937988281, "learning_rate": 2.9472763300896168e-05, "loss": 0.1198, "step": 5088 }, { "epoch": 0.1121375883477389, "grad_norm": 0.9891014695167542, "learning_rate": 2.9472481929568828e-05, "loss": 0.1035, "step": 5089 }, { "epoch": 0.11215962363725507, "grad_norm": 1.2278803586959839, "learning_rate": 2.9472200484525372e-05, "loss": 0.1153, "step": 5090 }, { "epoch": 0.11218165892677122, "grad_norm": 1.1974022388458252, "learning_rate": 2.947191896576724e-05, "loss": 0.1089, "step": 5091 }, { "epoch": 0.11220369421628738, "grad_norm": 0.7931689620018005, "learning_rate": 2.947163737329587e-05, "loss": 0.1059, "step": 5092 }, { "epoch": 0.11222572950580355, "grad_norm": 0.9265859723091125, "learning_rate": 2.9471355707112683e-05, "loss": 0.1213, "step": 5093 }, { "epoch": 0.1122477647953197, "grad_norm": 1.1974563598632812, "learning_rate": 2.947107396721913e-05, "loss": 0.156, "step": 5094 }, { "epoch": 0.11226980008483586, "grad_norm": 1.5347867012023926, "learning_rate": 2.947079215361663e-05, "loss": 0.1341, "step": 5095 }, { "epoch": 0.11229183537435203, "grad_norm": 1.2862931489944458, "learning_rate": 2.9470510266306626e-05, "loss": 0.1684, "step": 5096 }, { "epoch": 0.11231387066386818, "grad_norm": 1.6145015954971313, "learning_rate": 2.9470228305290557e-05, "loss": 0.149, "step": 5097 }, { "epoch": 0.11233590595338434, "grad_norm": 0.8508175015449524, "learning_rate": 2.946994627056985e-05, "loss": 0.1059, "step": 5098 }, { "epoch": 0.11235794124290051, "grad_norm": 1.0846848487854004, "learning_rate": 2.9469664162145953e-05, "loss": 0.1478, "step": 5099 }, { "epoch": 0.11237997653241666, "grad_norm": 1.37274968624115, "learning_rate": 2.9469381980020292e-05, "loss": 0.1822, "step": 5100 }, { "epoch": 0.11240201182193282, "grad_norm": 0.9226073622703552, "learning_rate": 2.9469099724194314e-05, "loss": 0.1042, "step": 5101 }, { "epoch": 0.11242404711144899, "grad_norm": 1.1860806941986084, "learning_rate": 2.946881739466945e-05, "loss": 0.1622, "step": 5102 }, { "epoch": 0.11244608240096514, "grad_norm": 0.9370497465133667, "learning_rate": 2.9468534991447142e-05, "loss": 0.1732, "step": 5103 }, { "epoch": 0.1124681176904813, "grad_norm": 1.1639204025268555, "learning_rate": 2.9468252514528825e-05, "loss": 0.1504, "step": 5104 }, { "epoch": 0.11249015297999747, "grad_norm": 1.67376708984375, "learning_rate": 2.946796996391594e-05, "loss": 0.1507, "step": 5105 }, { "epoch": 0.11251218826951363, "grad_norm": 0.8707050085067749, "learning_rate": 2.946768733960993e-05, "loss": 0.1785, "step": 5106 }, { "epoch": 0.11253422355902978, "grad_norm": 1.2568145990371704, "learning_rate": 2.9467404641612224e-05, "loss": 0.1176, "step": 5107 }, { "epoch": 0.11255625884854595, "grad_norm": 1.0898642539978027, "learning_rate": 2.946712186992427e-05, "loss": 0.1202, "step": 5108 }, { "epoch": 0.11257829413806211, "grad_norm": 1.3506404161453247, "learning_rate": 2.9466839024547507e-05, "loss": 0.1448, "step": 5109 }, { "epoch": 0.11260032942757826, "grad_norm": 1.180798053741455, "learning_rate": 2.946655610548337e-05, "loss": 0.1052, "step": 5110 }, { "epoch": 0.11262236471709443, "grad_norm": 1.6043707132339478, "learning_rate": 2.946627311273331e-05, "loss": 0.182, "step": 5111 }, { "epoch": 0.11264440000661059, "grad_norm": 1.1176601648330688, "learning_rate": 2.946599004629876e-05, "loss": 0.1846, "step": 5112 }, { "epoch": 0.11266643529612674, "grad_norm": 0.8873420357704163, "learning_rate": 2.9465706906181174e-05, "loss": 0.1171, "step": 5113 }, { "epoch": 0.11268847058564291, "grad_norm": 1.1199091672897339, "learning_rate": 2.9465423692381977e-05, "loss": 0.1218, "step": 5114 }, { "epoch": 0.11271050587515907, "grad_norm": 1.2902686595916748, "learning_rate": 2.9465140404902626e-05, "loss": 0.1662, "step": 5115 }, { "epoch": 0.11273254116467522, "grad_norm": 1.2030295133590698, "learning_rate": 2.9464857043744558e-05, "loss": 0.1378, "step": 5116 }, { "epoch": 0.11275457645419139, "grad_norm": 1.0702019929885864, "learning_rate": 2.9464573608909213e-05, "loss": 0.1001, "step": 5117 }, { "epoch": 0.11277661174370755, "grad_norm": 2.003079891204834, "learning_rate": 2.9464290100398037e-05, "loss": 0.126, "step": 5118 }, { "epoch": 0.1127986470332237, "grad_norm": 1.303318977355957, "learning_rate": 2.9464006518212478e-05, "loss": 0.1321, "step": 5119 }, { "epoch": 0.11282068232273987, "grad_norm": 1.1548645496368408, "learning_rate": 2.9463722862353978e-05, "loss": 0.1378, "step": 5120 }, { "epoch": 0.11284271761225603, "grad_norm": 1.3616503477096558, "learning_rate": 2.946343913282398e-05, "loss": 0.1182, "step": 5121 }, { "epoch": 0.11286475290177218, "grad_norm": 1.2971973419189453, "learning_rate": 2.9463155329623928e-05, "loss": 0.1433, "step": 5122 }, { "epoch": 0.11288678819128835, "grad_norm": 1.2452000379562378, "learning_rate": 2.9462871452755275e-05, "loss": 0.1587, "step": 5123 }, { "epoch": 0.11290882348080451, "grad_norm": 1.5546109676361084, "learning_rate": 2.946258750221946e-05, "loss": 0.1708, "step": 5124 }, { "epoch": 0.11293085877032066, "grad_norm": 1.2293388843536377, "learning_rate": 2.9462303478017933e-05, "loss": 0.1628, "step": 5125 }, { "epoch": 0.11295289405983683, "grad_norm": 0.9226176738739014, "learning_rate": 2.9462019380152136e-05, "loss": 0.0744, "step": 5126 }, { "epoch": 0.11297492934935299, "grad_norm": 1.1552413702011108, "learning_rate": 2.9461735208623522e-05, "loss": 0.1033, "step": 5127 }, { "epoch": 0.11299696463886914, "grad_norm": 3.041079521179199, "learning_rate": 2.9461450963433532e-05, "loss": 0.1132, "step": 5128 }, { "epoch": 0.11301899992838531, "grad_norm": 1.017012119293213, "learning_rate": 2.9461166644583618e-05, "loss": 0.1118, "step": 5129 }, { "epoch": 0.11304103521790147, "grad_norm": 1.365840196609497, "learning_rate": 2.9460882252075228e-05, "loss": 0.1461, "step": 5130 }, { "epoch": 0.11306307050741762, "grad_norm": 1.533913493156433, "learning_rate": 2.9460597785909812e-05, "loss": 0.1503, "step": 5131 }, { "epoch": 0.11308510579693379, "grad_norm": 1.0580910444259644, "learning_rate": 2.9460313246088823e-05, "loss": 0.1552, "step": 5132 }, { "epoch": 0.11310714108644995, "grad_norm": 2.404431104660034, "learning_rate": 2.9460028632613695e-05, "loss": 0.2169, "step": 5133 }, { "epoch": 0.1131291763759661, "grad_norm": 1.3738212585449219, "learning_rate": 2.945974394548589e-05, "loss": 0.112, "step": 5134 }, { "epoch": 0.11315121166548227, "grad_norm": 1.8058185577392578, "learning_rate": 2.9459459184706857e-05, "loss": 0.1616, "step": 5135 }, { "epoch": 0.11317324695499843, "grad_norm": 1.1756902933120728, "learning_rate": 2.945917435027804e-05, "loss": 0.1253, "step": 5136 }, { "epoch": 0.11319528224451458, "grad_norm": 1.137355089187622, "learning_rate": 2.94588894422009e-05, "loss": 0.1256, "step": 5137 }, { "epoch": 0.11321731753403075, "grad_norm": 0.8466055393218994, "learning_rate": 2.945860446047688e-05, "loss": 0.1259, "step": 5138 }, { "epoch": 0.11323935282354691, "grad_norm": 2.00675368309021, "learning_rate": 2.945831940510743e-05, "loss": 0.1529, "step": 5139 }, { "epoch": 0.11326138811306308, "grad_norm": 1.2878706455230713, "learning_rate": 2.945803427609401e-05, "loss": 0.1385, "step": 5140 }, { "epoch": 0.11328342340257923, "grad_norm": 0.8097013235092163, "learning_rate": 2.945774907343807e-05, "loss": 0.0998, "step": 5141 }, { "epoch": 0.11330545869209539, "grad_norm": 0.8953118324279785, "learning_rate": 2.945746379714106e-05, "loss": 0.1229, "step": 5142 }, { "epoch": 0.11332749398161156, "grad_norm": 0.6127476096153259, "learning_rate": 2.9457178447204434e-05, "loss": 0.1628, "step": 5143 }, { "epoch": 0.11334952927112771, "grad_norm": 1.3660694360733032, "learning_rate": 2.9456893023629644e-05, "loss": 0.1371, "step": 5144 }, { "epoch": 0.11337156456064387, "grad_norm": 1.5063637495040894, "learning_rate": 2.9456607526418147e-05, "loss": 0.1994, "step": 5145 }, { "epoch": 0.11339359985016004, "grad_norm": 1.2548550367355347, "learning_rate": 2.9456321955571392e-05, "loss": 0.1346, "step": 5146 }, { "epoch": 0.11341563513967619, "grad_norm": 1.2117851972579956, "learning_rate": 2.945603631109084e-05, "loss": 0.1242, "step": 5147 }, { "epoch": 0.11343767042919235, "grad_norm": 1.017140507698059, "learning_rate": 2.9455750592977947e-05, "loss": 0.1511, "step": 5148 }, { "epoch": 0.11345970571870852, "grad_norm": 1.1445516347885132, "learning_rate": 2.945546480123416e-05, "loss": 0.113, "step": 5149 }, { "epoch": 0.11348174100822467, "grad_norm": 1.060748815536499, "learning_rate": 2.9455178935860942e-05, "loss": 0.1344, "step": 5150 }, { "epoch": 0.11350377629774083, "grad_norm": 1.5857045650482178, "learning_rate": 2.9454892996859743e-05, "loss": 0.1496, "step": 5151 }, { "epoch": 0.113525811587257, "grad_norm": 1.7509394884109497, "learning_rate": 2.9454606984232024e-05, "loss": 0.1131, "step": 5152 }, { "epoch": 0.11354784687677315, "grad_norm": 0.8412020802497864, "learning_rate": 2.945432089797924e-05, "loss": 0.1461, "step": 5153 }, { "epoch": 0.11356988216628931, "grad_norm": 1.004692554473877, "learning_rate": 2.945403473810285e-05, "loss": 0.1665, "step": 5154 }, { "epoch": 0.11359191745580548, "grad_norm": 1.544537901878357, "learning_rate": 2.945374850460431e-05, "loss": 0.1419, "step": 5155 }, { "epoch": 0.11361395274532163, "grad_norm": 1.2992897033691406, "learning_rate": 2.9453462197485076e-05, "loss": 0.1056, "step": 5156 }, { "epoch": 0.11363598803483779, "grad_norm": 1.0474765300750732, "learning_rate": 2.945317581674661e-05, "loss": 0.1472, "step": 5157 }, { "epoch": 0.11365802332435396, "grad_norm": 3.4905550479888916, "learning_rate": 2.945288936239037e-05, "loss": 0.1715, "step": 5158 }, { "epoch": 0.11368005861387011, "grad_norm": 1.3992631435394287, "learning_rate": 2.9452602834417813e-05, "loss": 0.1922, "step": 5159 }, { "epoch": 0.11370209390338627, "grad_norm": 1.2340924739837646, "learning_rate": 2.94523162328304e-05, "loss": 0.1276, "step": 5160 }, { "epoch": 0.11372412919290244, "grad_norm": 1.6899669170379639, "learning_rate": 2.945202955762959e-05, "loss": 0.1149, "step": 5161 }, { "epoch": 0.11374616448241859, "grad_norm": 1.9815113544464111, "learning_rate": 2.9451742808816843e-05, "loss": 0.168, "step": 5162 }, { "epoch": 0.11376819977193475, "grad_norm": 1.0758174657821655, "learning_rate": 2.9451455986393626e-05, "loss": 0.137, "step": 5163 }, { "epoch": 0.11379023506145092, "grad_norm": 2.1571543216705322, "learning_rate": 2.945116909036139e-05, "loss": 0.1488, "step": 5164 }, { "epoch": 0.11381227035096707, "grad_norm": 1.2893835306167603, "learning_rate": 2.9450882120721596e-05, "loss": 0.1285, "step": 5165 }, { "epoch": 0.11383430564048323, "grad_norm": 0.9219574332237244, "learning_rate": 2.9450595077475717e-05, "loss": 0.1676, "step": 5166 }, { "epoch": 0.1138563409299994, "grad_norm": 0.7422761917114258, "learning_rate": 2.9450307960625203e-05, "loss": 0.0994, "step": 5167 }, { "epoch": 0.11387837621951555, "grad_norm": 1.1352754831314087, "learning_rate": 2.9450020770171523e-05, "loss": 0.1092, "step": 5168 }, { "epoch": 0.11390041150903171, "grad_norm": 1.7325351238250732, "learning_rate": 2.9449733506116142e-05, "loss": 0.1839, "step": 5169 }, { "epoch": 0.11392244679854788, "grad_norm": 1.359648585319519, "learning_rate": 2.944944616846052e-05, "loss": 0.1464, "step": 5170 }, { "epoch": 0.11394448208806403, "grad_norm": 1.4643101692199707, "learning_rate": 2.9449158757206117e-05, "loss": 0.2035, "step": 5171 }, { "epoch": 0.11396651737758019, "grad_norm": 0.9704395532608032, "learning_rate": 2.9448871272354397e-05, "loss": 0.1044, "step": 5172 }, { "epoch": 0.11398855266709636, "grad_norm": 0.9214587211608887, "learning_rate": 2.944858371390683e-05, "loss": 0.0982, "step": 5173 }, { "epoch": 0.11401058795661252, "grad_norm": 1.129062533378601, "learning_rate": 2.944829608186488e-05, "loss": 0.145, "step": 5174 }, { "epoch": 0.11403262324612867, "grad_norm": 1.433153748512268, "learning_rate": 2.944800837623001e-05, "loss": 0.1186, "step": 5175 }, { "epoch": 0.11405465853564484, "grad_norm": 0.9066229462623596, "learning_rate": 2.944772059700369e-05, "loss": 0.1352, "step": 5176 }, { "epoch": 0.114076693825161, "grad_norm": 1.1056177616119385, "learning_rate": 2.9447432744187372e-05, "loss": 0.1325, "step": 5177 }, { "epoch": 0.11409872911467715, "grad_norm": 1.1685324907302856, "learning_rate": 2.944714481778254e-05, "loss": 0.1386, "step": 5178 }, { "epoch": 0.11412076440419332, "grad_norm": 0.9942716360092163, "learning_rate": 2.9446856817790646e-05, "loss": 0.1331, "step": 5179 }, { "epoch": 0.11414279969370948, "grad_norm": 1.064029335975647, "learning_rate": 2.9446568744213166e-05, "loss": 0.1523, "step": 5180 }, { "epoch": 0.11416483498322563, "grad_norm": 1.6789791584014893, "learning_rate": 2.9446280597051568e-05, "loss": 0.1314, "step": 5181 }, { "epoch": 0.1141868702727418, "grad_norm": 0.983445405960083, "learning_rate": 2.9445992376307313e-05, "loss": 0.0896, "step": 5182 }, { "epoch": 0.11420890556225796, "grad_norm": 1.541459083557129, "learning_rate": 2.9445704081981872e-05, "loss": 0.1291, "step": 5183 }, { "epoch": 0.11423094085177411, "grad_norm": 1.466090202331543, "learning_rate": 2.9445415714076712e-05, "loss": 0.1069, "step": 5184 }, { "epoch": 0.11425297614129028, "grad_norm": 1.308444619178772, "learning_rate": 2.9445127272593306e-05, "loss": 0.1167, "step": 5185 }, { "epoch": 0.11427501143080644, "grad_norm": 1.1721034049987793, "learning_rate": 2.944483875753312e-05, "loss": 0.1255, "step": 5186 }, { "epoch": 0.11429704672032259, "grad_norm": 1.5102907419204712, "learning_rate": 2.9444550168897623e-05, "loss": 0.109, "step": 5187 }, { "epoch": 0.11431908200983876, "grad_norm": 1.0413970947265625, "learning_rate": 2.944426150668829e-05, "loss": 0.1135, "step": 5188 }, { "epoch": 0.11434111729935492, "grad_norm": 1.3455219268798828, "learning_rate": 2.9443972770906584e-05, "loss": 0.1349, "step": 5189 }, { "epoch": 0.11436315258887107, "grad_norm": 0.9671083092689514, "learning_rate": 2.944368396155398e-05, "loss": 0.1227, "step": 5190 }, { "epoch": 0.11438518787838724, "grad_norm": 1.7146052122116089, "learning_rate": 2.9443395078631952e-05, "loss": 0.1415, "step": 5191 }, { "epoch": 0.1144072231679034, "grad_norm": 1.0293482542037964, "learning_rate": 2.9443106122141964e-05, "loss": 0.1197, "step": 5192 }, { "epoch": 0.11442925845741955, "grad_norm": 3.100731611251831, "learning_rate": 2.944281709208549e-05, "loss": 0.1219, "step": 5193 }, { "epoch": 0.11445129374693572, "grad_norm": 1.3651683330535889, "learning_rate": 2.9442527988464006e-05, "loss": 0.1197, "step": 5194 }, { "epoch": 0.11447332903645188, "grad_norm": 1.237504005432129, "learning_rate": 2.944223881127898e-05, "loss": 0.1493, "step": 5195 }, { "epoch": 0.11449536432596803, "grad_norm": 1.1321882009506226, "learning_rate": 2.944194956053189e-05, "loss": 0.1178, "step": 5196 }, { "epoch": 0.1145173996154842, "grad_norm": 1.5354543924331665, "learning_rate": 2.9441660236224208e-05, "loss": 0.1624, "step": 5197 }, { "epoch": 0.11453943490500036, "grad_norm": 0.9418142437934875, "learning_rate": 2.9441370838357402e-05, "loss": 0.1129, "step": 5198 }, { "epoch": 0.11456147019451651, "grad_norm": 1.2716199159622192, "learning_rate": 2.9441081366932955e-05, "loss": 0.1534, "step": 5199 }, { "epoch": 0.11458350548403268, "grad_norm": 1.1124193668365479, "learning_rate": 2.944079182195233e-05, "loss": 0.1544, "step": 5200 }, { "epoch": 0.11460554077354884, "grad_norm": 1.2739537954330444, "learning_rate": 2.9440502203417015e-05, "loss": 0.115, "step": 5201 }, { "epoch": 0.11462757606306499, "grad_norm": 1.5329275131225586, "learning_rate": 2.9440212511328473e-05, "loss": 0.131, "step": 5202 }, { "epoch": 0.11464961135258116, "grad_norm": 1.535954236984253, "learning_rate": 2.943992274568819e-05, "loss": 0.1665, "step": 5203 }, { "epoch": 0.11467164664209732, "grad_norm": 1.228847622871399, "learning_rate": 2.9439632906497634e-05, "loss": 0.127, "step": 5204 }, { "epoch": 0.11469368193161347, "grad_norm": 0.8892544507980347, "learning_rate": 2.9439342993758284e-05, "loss": 0.1139, "step": 5205 }, { "epoch": 0.11471571722112964, "grad_norm": 1.772868037223816, "learning_rate": 2.9439053007471622e-05, "loss": 0.1781, "step": 5206 }, { "epoch": 0.1147377525106458, "grad_norm": 1.1910626888275146, "learning_rate": 2.9438762947639116e-05, "loss": 0.1553, "step": 5207 }, { "epoch": 0.11475978780016195, "grad_norm": 1.2287061214447021, "learning_rate": 2.9438472814262245e-05, "loss": 0.1377, "step": 5208 }, { "epoch": 0.11478182308967812, "grad_norm": 0.8407360315322876, "learning_rate": 2.9438182607342498e-05, "loss": 0.0989, "step": 5209 }, { "epoch": 0.11480385837919428, "grad_norm": 2.5598886013031006, "learning_rate": 2.9437892326881334e-05, "loss": 0.1415, "step": 5210 }, { "epoch": 0.11482589366871045, "grad_norm": 1.1106995344161987, "learning_rate": 2.943760197288025e-05, "loss": 0.1493, "step": 5211 }, { "epoch": 0.1148479289582266, "grad_norm": 1.2946299314498901, "learning_rate": 2.9437311545340717e-05, "loss": 0.1626, "step": 5212 }, { "epoch": 0.11486996424774276, "grad_norm": 1.5355132818222046, "learning_rate": 2.943702104426421e-05, "loss": 0.1424, "step": 5213 }, { "epoch": 0.11489199953725893, "grad_norm": 1.3235244750976562, "learning_rate": 2.9436730469652215e-05, "loss": 0.1068, "step": 5214 }, { "epoch": 0.11491403482677508, "grad_norm": 1.1413697004318237, "learning_rate": 2.9436439821506212e-05, "loss": 0.1457, "step": 5215 }, { "epoch": 0.11493607011629124, "grad_norm": 0.9633538126945496, "learning_rate": 2.9436149099827676e-05, "loss": 0.1211, "step": 5216 }, { "epoch": 0.1149581054058074, "grad_norm": 0.7115015387535095, "learning_rate": 2.9435858304618095e-05, "loss": 0.1049, "step": 5217 }, { "epoch": 0.11498014069532356, "grad_norm": 0.9086799621582031, "learning_rate": 2.9435567435878943e-05, "loss": 0.119, "step": 5218 }, { "epoch": 0.11500217598483972, "grad_norm": 1.181843876838684, "learning_rate": 2.943527649361171e-05, "loss": 0.1245, "step": 5219 }, { "epoch": 0.11502421127435589, "grad_norm": 0.8342474102973938, "learning_rate": 2.9434985477817866e-05, "loss": 0.1279, "step": 5220 }, { "epoch": 0.11504624656387204, "grad_norm": 1.183906078338623, "learning_rate": 2.9434694388498907e-05, "loss": 0.1633, "step": 5221 }, { "epoch": 0.1150682818533882, "grad_norm": 0.9419217109680176, "learning_rate": 2.9434403225656308e-05, "loss": 0.1251, "step": 5222 }, { "epoch": 0.11509031714290437, "grad_norm": 0.8923365473747253, "learning_rate": 2.943411198929155e-05, "loss": 0.1236, "step": 5223 }, { "epoch": 0.11511235243242052, "grad_norm": 0.9500687122344971, "learning_rate": 2.9433820679406124e-05, "loss": 0.119, "step": 5224 }, { "epoch": 0.11513438772193668, "grad_norm": 1.1423335075378418, "learning_rate": 2.9433529296001506e-05, "loss": 0.1651, "step": 5225 }, { "epoch": 0.11515642301145285, "grad_norm": 1.7644543647766113, "learning_rate": 2.9433237839079182e-05, "loss": 0.1182, "step": 5226 }, { "epoch": 0.115178458300969, "grad_norm": 0.9720064401626587, "learning_rate": 2.943294630864064e-05, "loss": 0.1317, "step": 5227 }, { "epoch": 0.11520049359048516, "grad_norm": 1.4783035516738892, "learning_rate": 2.943265470468736e-05, "loss": 0.1707, "step": 5228 }, { "epoch": 0.11522252888000133, "grad_norm": 1.5816699266433716, "learning_rate": 2.9432363027220832e-05, "loss": 0.1174, "step": 5229 }, { "epoch": 0.11524456416951748, "grad_norm": 0.8588054776191711, "learning_rate": 2.943207127624254e-05, "loss": 0.123, "step": 5230 }, { "epoch": 0.11526659945903364, "grad_norm": 0.9391513466835022, "learning_rate": 2.943177945175397e-05, "loss": 0.111, "step": 5231 }, { "epoch": 0.1152886347485498, "grad_norm": 1.8949426412582397, "learning_rate": 2.943148755375661e-05, "loss": 0.16, "step": 5232 }, { "epoch": 0.11531067003806596, "grad_norm": 1.3636159896850586, "learning_rate": 2.9431195582251946e-05, "loss": 0.2438, "step": 5233 }, { "epoch": 0.11533270532758212, "grad_norm": 1.7051987648010254, "learning_rate": 2.943090353724146e-05, "loss": 0.1165, "step": 5234 }, { "epoch": 0.11535474061709829, "grad_norm": 1.0822830200195312, "learning_rate": 2.9430611418726644e-05, "loss": 0.134, "step": 5235 }, { "epoch": 0.11537677590661444, "grad_norm": 1.2129721641540527, "learning_rate": 2.943031922670899e-05, "loss": 0.1185, "step": 5236 }, { "epoch": 0.1153988111961306, "grad_norm": 0.9992932081222534, "learning_rate": 2.943002696118998e-05, "loss": 0.1164, "step": 5237 }, { "epoch": 0.11542084648564677, "grad_norm": 1.1620193719863892, "learning_rate": 2.94297346221711e-05, "loss": 0.1339, "step": 5238 }, { "epoch": 0.11544288177516292, "grad_norm": 1.258603811264038, "learning_rate": 2.9429442209653853e-05, "loss": 0.1255, "step": 5239 }, { "epoch": 0.11546491706467908, "grad_norm": 1.1210565567016602, "learning_rate": 2.942914972363971e-05, "loss": 0.1642, "step": 5240 }, { "epoch": 0.11548695235419525, "grad_norm": 1.5491948127746582, "learning_rate": 2.9428857164130175e-05, "loss": 0.1519, "step": 5241 }, { "epoch": 0.1155089876437114, "grad_norm": 1.3474935293197632, "learning_rate": 2.9428564531126734e-05, "loss": 0.1353, "step": 5242 }, { "epoch": 0.11553102293322756, "grad_norm": 1.469917893409729, "learning_rate": 2.9428271824630875e-05, "loss": 0.1426, "step": 5243 }, { "epoch": 0.11555305822274373, "grad_norm": 1.069310188293457, "learning_rate": 2.9427979044644087e-05, "loss": 0.1084, "step": 5244 }, { "epoch": 0.11557509351225989, "grad_norm": 1.1568852663040161, "learning_rate": 2.942768619116787e-05, "loss": 0.1482, "step": 5245 }, { "epoch": 0.11559712880177604, "grad_norm": 0.9771581292152405, "learning_rate": 2.9427393264203704e-05, "loss": 0.1151, "step": 5246 }, { "epoch": 0.1156191640912922, "grad_norm": 1.0731574296951294, "learning_rate": 2.94271002637531e-05, "loss": 0.1317, "step": 5247 }, { "epoch": 0.11564119938080837, "grad_norm": 0.9567338228225708, "learning_rate": 2.9426807189817525e-05, "loss": 0.0748, "step": 5248 }, { "epoch": 0.11566323467032452, "grad_norm": 1.1286988258361816, "learning_rate": 2.942651404239849e-05, "loss": 0.121, "step": 5249 }, { "epoch": 0.11568526995984069, "grad_norm": 1.481779932975769, "learning_rate": 2.9426220821497483e-05, "loss": 0.1706, "step": 5250 }, { "epoch": 0.11570730524935685, "grad_norm": 1.8448208570480347, "learning_rate": 2.9425927527115995e-05, "loss": 0.1368, "step": 5251 }, { "epoch": 0.115729340538873, "grad_norm": 0.9619125723838806, "learning_rate": 2.9425634159255524e-05, "loss": 0.1545, "step": 5252 }, { "epoch": 0.11575137582838917, "grad_norm": 1.236469030380249, "learning_rate": 2.9425340717917564e-05, "loss": 0.1595, "step": 5253 }, { "epoch": 0.11577341111790533, "grad_norm": 1.3505078554153442, "learning_rate": 2.942504720310361e-05, "loss": 0.1649, "step": 5254 }, { "epoch": 0.11579544640742148, "grad_norm": 0.8253794312477112, "learning_rate": 2.942475361481515e-05, "loss": 0.1128, "step": 5255 }, { "epoch": 0.11581748169693765, "grad_norm": 1.105032205581665, "learning_rate": 2.942445995305369e-05, "loss": 0.1232, "step": 5256 }, { "epoch": 0.11583951698645381, "grad_norm": 1.0406111478805542, "learning_rate": 2.9424166217820718e-05, "loss": 0.1415, "step": 5257 }, { "epoch": 0.11586155227596996, "grad_norm": 1.3614425659179688, "learning_rate": 2.942387240911773e-05, "loss": 0.1954, "step": 5258 }, { "epoch": 0.11588358756548613, "grad_norm": 1.103408694267273, "learning_rate": 2.9423578526946227e-05, "loss": 0.139, "step": 5259 }, { "epoch": 0.11590562285500229, "grad_norm": 0.9963024258613586, "learning_rate": 2.9423284571307707e-05, "loss": 0.0817, "step": 5260 }, { "epoch": 0.11592765814451844, "grad_norm": 1.2069203853607178, "learning_rate": 2.9422990542203657e-05, "loss": 0.1434, "step": 5261 }, { "epoch": 0.1159496934340346, "grad_norm": 1.0673071146011353, "learning_rate": 2.9422696439635593e-05, "loss": 0.1011, "step": 5262 }, { "epoch": 0.11597172872355077, "grad_norm": 1.3425930738449097, "learning_rate": 2.942240226360499e-05, "loss": 0.1651, "step": 5263 }, { "epoch": 0.11599376401306692, "grad_norm": 1.224652886390686, "learning_rate": 2.9422108014113364e-05, "loss": 0.1344, "step": 5264 }, { "epoch": 0.11601579930258309, "grad_norm": 0.7047908902168274, "learning_rate": 2.942181369116221e-05, "loss": 0.0871, "step": 5265 }, { "epoch": 0.11603783459209925, "grad_norm": 1.518768072128296, "learning_rate": 2.9421519294753023e-05, "loss": 0.1267, "step": 5266 }, { "epoch": 0.1160598698816154, "grad_norm": 1.6110280752182007, "learning_rate": 2.9421224824887306e-05, "loss": 0.1333, "step": 5267 }, { "epoch": 0.11608190517113157, "grad_norm": 1.6853135824203491, "learning_rate": 2.942093028156656e-05, "loss": 0.1765, "step": 5268 }, { "epoch": 0.11610394046064773, "grad_norm": 1.721907138824463, "learning_rate": 2.9420635664792274e-05, "loss": 0.1074, "step": 5269 }, { "epoch": 0.11612597575016388, "grad_norm": 1.1764276027679443, "learning_rate": 2.9420340974565968e-05, "loss": 0.149, "step": 5270 }, { "epoch": 0.11614801103968005, "grad_norm": 0.8810126781463623, "learning_rate": 2.9420046210889125e-05, "loss": 0.083, "step": 5271 }, { "epoch": 0.11617004632919621, "grad_norm": 1.3104629516601562, "learning_rate": 2.941975137376326e-05, "loss": 0.1458, "step": 5272 }, { "epoch": 0.11619208161871236, "grad_norm": 1.1485856771469116, "learning_rate": 2.9419456463189865e-05, "loss": 0.1499, "step": 5273 }, { "epoch": 0.11621411690822853, "grad_norm": 1.191196322441101, "learning_rate": 2.9419161479170446e-05, "loss": 0.12, "step": 5274 }, { "epoch": 0.11623615219774469, "grad_norm": 1.0990195274353027, "learning_rate": 2.9418866421706504e-05, "loss": 0.1184, "step": 5275 }, { "epoch": 0.11625818748726084, "grad_norm": 1.0674103498458862, "learning_rate": 2.9418571290799544e-05, "loss": 0.1646, "step": 5276 }, { "epoch": 0.116280222776777, "grad_norm": 1.1375983953475952, "learning_rate": 2.9418276086451073e-05, "loss": 0.1781, "step": 5277 }, { "epoch": 0.11630225806629317, "grad_norm": 1.1604596376419067, "learning_rate": 2.9417980808662586e-05, "loss": 0.123, "step": 5278 }, { "epoch": 0.11632429335580932, "grad_norm": 1.1067322492599487, "learning_rate": 2.9417685457435593e-05, "loss": 0.1004, "step": 5279 }, { "epoch": 0.11634632864532549, "grad_norm": 1.409651517868042, "learning_rate": 2.94173900327716e-05, "loss": 0.1388, "step": 5280 }, { "epoch": 0.11636836393484165, "grad_norm": 1.2157045602798462, "learning_rate": 2.9417094534672103e-05, "loss": 0.1709, "step": 5281 }, { "epoch": 0.11639039922435782, "grad_norm": 1.5050668716430664, "learning_rate": 2.9416798963138613e-05, "loss": 0.1637, "step": 5282 }, { "epoch": 0.11641243451387397, "grad_norm": 1.6025817394256592, "learning_rate": 2.9416503318172636e-05, "loss": 0.1879, "step": 5283 }, { "epoch": 0.11643446980339013, "grad_norm": 1.3467869758605957, "learning_rate": 2.9416207599775675e-05, "loss": 0.1098, "step": 5284 }, { "epoch": 0.1164565050929063, "grad_norm": 1.1278165578842163, "learning_rate": 2.9415911807949242e-05, "loss": 0.1622, "step": 5285 }, { "epoch": 0.11647854038242245, "grad_norm": 1.3961800336837769, "learning_rate": 2.9415615942694837e-05, "loss": 0.1457, "step": 5286 }, { "epoch": 0.11650057567193861, "grad_norm": 1.1087454557418823, "learning_rate": 2.9415320004013968e-05, "loss": 0.1002, "step": 5287 }, { "epoch": 0.11652261096145478, "grad_norm": 0.9007149934768677, "learning_rate": 2.941502399190815e-05, "loss": 0.1377, "step": 5288 }, { "epoch": 0.11654464625097093, "grad_norm": 1.0241162776947021, "learning_rate": 2.9414727906378876e-05, "loss": 0.1284, "step": 5289 }, { "epoch": 0.11656668154048709, "grad_norm": 0.8590312600135803, "learning_rate": 2.9414431747427672e-05, "loss": 0.0855, "step": 5290 }, { "epoch": 0.11658871683000326, "grad_norm": 1.529437780380249, "learning_rate": 2.9414135515056035e-05, "loss": 0.1423, "step": 5291 }, { "epoch": 0.1166107521195194, "grad_norm": 1.209338903427124, "learning_rate": 2.9413839209265473e-05, "loss": 0.162, "step": 5292 }, { "epoch": 0.11663278740903557, "grad_norm": 1.0495446920394897, "learning_rate": 2.9413542830057503e-05, "loss": 0.1037, "step": 5293 }, { "epoch": 0.11665482269855174, "grad_norm": 1.0817927122116089, "learning_rate": 2.9413246377433627e-05, "loss": 0.132, "step": 5294 }, { "epoch": 0.11667685798806789, "grad_norm": 1.1858161687850952, "learning_rate": 2.9412949851395355e-05, "loss": 0.1577, "step": 5295 }, { "epoch": 0.11669889327758405, "grad_norm": 2.3655481338500977, "learning_rate": 2.9412653251944206e-05, "loss": 0.1522, "step": 5296 }, { "epoch": 0.11672092856710022, "grad_norm": 0.8589618802070618, "learning_rate": 2.9412356579081687e-05, "loss": 0.1176, "step": 5297 }, { "epoch": 0.11674296385661637, "grad_norm": 1.2096854448318481, "learning_rate": 2.9412059832809303e-05, "loss": 0.1055, "step": 5298 }, { "epoch": 0.11676499914613253, "grad_norm": 1.5441713333129883, "learning_rate": 2.941176301312857e-05, "loss": 0.1382, "step": 5299 }, { "epoch": 0.1167870344356487, "grad_norm": 1.31110417842865, "learning_rate": 2.9411466120041003e-05, "loss": 0.1409, "step": 5300 }, { "epoch": 0.11680906972516485, "grad_norm": 0.9082906246185303, "learning_rate": 2.941116915354811e-05, "loss": 0.1257, "step": 5301 }, { "epoch": 0.11683110501468101, "grad_norm": 1.0361257791519165, "learning_rate": 2.94108721136514e-05, "loss": 0.1021, "step": 5302 }, { "epoch": 0.11685314030419718, "grad_norm": 1.2241774797439575, "learning_rate": 2.9410575000352397e-05, "loss": 0.1531, "step": 5303 }, { "epoch": 0.11687517559371333, "grad_norm": 0.9948868155479431, "learning_rate": 2.94102778136526e-05, "loss": 0.1183, "step": 5304 }, { "epoch": 0.11689721088322949, "grad_norm": 2.2930116653442383, "learning_rate": 2.940998055355354e-05, "loss": 0.1307, "step": 5305 }, { "epoch": 0.11691924617274566, "grad_norm": 1.3552323579788208, "learning_rate": 2.9409683220056714e-05, "loss": 0.117, "step": 5306 }, { "epoch": 0.1169412814622618, "grad_norm": 1.1724175214767456, "learning_rate": 2.940938581316365e-05, "loss": 0.128, "step": 5307 }, { "epoch": 0.11696331675177797, "grad_norm": 1.305274248123169, "learning_rate": 2.9409088332875857e-05, "loss": 0.1583, "step": 5308 }, { "epoch": 0.11698535204129414, "grad_norm": 1.450268030166626, "learning_rate": 2.940879077919485e-05, "loss": 0.1495, "step": 5309 }, { "epoch": 0.11700738733081029, "grad_norm": 0.9213472604751587, "learning_rate": 2.9408493152122142e-05, "loss": 0.1242, "step": 5310 }, { "epoch": 0.11702942262032645, "grad_norm": 1.1492093801498413, "learning_rate": 2.9408195451659252e-05, "loss": 0.1425, "step": 5311 }, { "epoch": 0.11705145790984262, "grad_norm": 1.2775750160217285, "learning_rate": 2.94078976778077e-05, "loss": 0.1231, "step": 5312 }, { "epoch": 0.11707349319935877, "grad_norm": 1.221345067024231, "learning_rate": 2.9407599830568995e-05, "loss": 0.1361, "step": 5313 }, { "epoch": 0.11709552848887493, "grad_norm": 1.3495720624923706, "learning_rate": 2.940730190994466e-05, "loss": 0.1543, "step": 5314 }, { "epoch": 0.1171175637783911, "grad_norm": 1.1960949897766113, "learning_rate": 2.9407003915936207e-05, "loss": 0.1021, "step": 5315 }, { "epoch": 0.11713959906790726, "grad_norm": 0.8310087323188782, "learning_rate": 2.9406705848545167e-05, "loss": 0.0931, "step": 5316 }, { "epoch": 0.11716163435742341, "grad_norm": 1.0994782447814941, "learning_rate": 2.9406407707773042e-05, "loss": 0.1251, "step": 5317 }, { "epoch": 0.11718366964693958, "grad_norm": 1.0537004470825195, "learning_rate": 2.9406109493621356e-05, "loss": 0.0877, "step": 5318 }, { "epoch": 0.11720570493645574, "grad_norm": 1.3236078023910522, "learning_rate": 2.940581120609163e-05, "loss": 0.1348, "step": 5319 }, { "epoch": 0.11722774022597189, "grad_norm": 1.0800172090530396, "learning_rate": 2.9405512845185387e-05, "loss": 0.1094, "step": 5320 }, { "epoch": 0.11724977551548806, "grad_norm": 1.1160365343093872, "learning_rate": 2.9405214410904135e-05, "loss": 0.1364, "step": 5321 }, { "epoch": 0.11727181080500422, "grad_norm": 1.384151816368103, "learning_rate": 2.9404915903249404e-05, "loss": 0.1051, "step": 5322 }, { "epoch": 0.11729384609452037, "grad_norm": 0.8086443543434143, "learning_rate": 2.9404617322222713e-05, "loss": 0.1471, "step": 5323 }, { "epoch": 0.11731588138403654, "grad_norm": 1.2282696962356567, "learning_rate": 2.9404318667825582e-05, "loss": 0.1518, "step": 5324 }, { "epoch": 0.1173379166735527, "grad_norm": 1.7051687240600586, "learning_rate": 2.940401994005953e-05, "loss": 0.15, "step": 5325 }, { "epoch": 0.11735995196306885, "grad_norm": 1.5389095544815063, "learning_rate": 2.940372113892608e-05, "loss": 0.1529, "step": 5326 }, { "epoch": 0.11738198725258502, "grad_norm": 1.387407898902893, "learning_rate": 2.940342226442676e-05, "loss": 0.1253, "step": 5327 }, { "epoch": 0.11740402254210118, "grad_norm": 1.6386951208114624, "learning_rate": 2.940312331656308e-05, "loss": 0.149, "step": 5328 }, { "epoch": 0.11742605783161733, "grad_norm": 0.5689107179641724, "learning_rate": 2.940282429533657e-05, "loss": 0.1387, "step": 5329 }, { "epoch": 0.1174480931211335, "grad_norm": 0.9931432604789734, "learning_rate": 2.9402525200748757e-05, "loss": 0.1374, "step": 5330 }, { "epoch": 0.11747012841064966, "grad_norm": 1.0138577222824097, "learning_rate": 2.9402226032801157e-05, "loss": 0.1637, "step": 5331 }, { "epoch": 0.11749216370016581, "grad_norm": 1.413629174232483, "learning_rate": 2.9401926791495296e-05, "loss": 0.1462, "step": 5332 }, { "epoch": 0.11751419898968198, "grad_norm": 1.16161048412323, "learning_rate": 2.94016274768327e-05, "loss": 0.1448, "step": 5333 }, { "epoch": 0.11753623427919814, "grad_norm": 1.2961541414260864, "learning_rate": 2.940132808881489e-05, "loss": 0.1577, "step": 5334 }, { "epoch": 0.11755826956871429, "grad_norm": 1.1359919309616089, "learning_rate": 2.9401028627443395e-05, "loss": 0.0836, "step": 5335 }, { "epoch": 0.11758030485823046, "grad_norm": 1.3011317253112793, "learning_rate": 2.940072909271974e-05, "loss": 0.1194, "step": 5336 }, { "epoch": 0.11760234014774662, "grad_norm": 1.699021339416504, "learning_rate": 2.9400429484645452e-05, "loss": 0.1611, "step": 5337 }, { "epoch": 0.11762437543726277, "grad_norm": 1.305626392364502, "learning_rate": 2.940012980322205e-05, "loss": 0.0895, "step": 5338 }, { "epoch": 0.11764641072677894, "grad_norm": 1.0521106719970703, "learning_rate": 2.9399830048451065e-05, "loss": 0.1094, "step": 5339 }, { "epoch": 0.1176684460162951, "grad_norm": 1.0567547082901, "learning_rate": 2.9399530220334026e-05, "loss": 0.1762, "step": 5340 }, { "epoch": 0.11769048130581125, "grad_norm": 0.948239266872406, "learning_rate": 2.9399230318872458e-05, "loss": 0.1205, "step": 5341 }, { "epoch": 0.11771251659532742, "grad_norm": 0.899165689945221, "learning_rate": 2.939893034406789e-05, "loss": 0.1091, "step": 5342 }, { "epoch": 0.11773455188484358, "grad_norm": 1.0062613487243652, "learning_rate": 2.939863029592184e-05, "loss": 0.1494, "step": 5343 }, { "epoch": 0.11775658717435973, "grad_norm": 1.1293920278549194, "learning_rate": 2.9398330174435856e-05, "loss": 0.0828, "step": 5344 }, { "epoch": 0.1177786224638759, "grad_norm": 1.4175398349761963, "learning_rate": 2.9398029979611447e-05, "loss": 0.1331, "step": 5345 }, { "epoch": 0.11780065775339206, "grad_norm": 1.5152488946914673, "learning_rate": 2.9397729711450156e-05, "loss": 0.1361, "step": 5346 }, { "epoch": 0.11782269304290821, "grad_norm": 1.1308101415634155, "learning_rate": 2.9397429369953505e-05, "loss": 0.1615, "step": 5347 }, { "epoch": 0.11784472833242438, "grad_norm": 1.144761323928833, "learning_rate": 2.939712895512303e-05, "loss": 0.1384, "step": 5348 }, { "epoch": 0.11786676362194054, "grad_norm": 1.1673372983932495, "learning_rate": 2.9396828466960247e-05, "loss": 0.1624, "step": 5349 }, { "epoch": 0.1178887989114567, "grad_norm": 1.1073763370513916, "learning_rate": 2.93965279054667e-05, "loss": 0.1603, "step": 5350 }, { "epoch": 0.11791083420097286, "grad_norm": 0.8955050110816956, "learning_rate": 2.939622727064392e-05, "loss": 0.12, "step": 5351 }, { "epoch": 0.11793286949048902, "grad_norm": 0.8794278502464294, "learning_rate": 2.939592656249343e-05, "loss": 0.135, "step": 5352 }, { "epoch": 0.11795490478000518, "grad_norm": 0.9160248637199402, "learning_rate": 2.9395625781016773e-05, "loss": 0.1092, "step": 5353 }, { "epoch": 0.11797694006952134, "grad_norm": 2.071381092071533, "learning_rate": 2.9395324926215466e-05, "loss": 0.1403, "step": 5354 }, { "epoch": 0.1179989753590375, "grad_norm": 0.9337347745895386, "learning_rate": 2.9395023998091057e-05, "loss": 0.128, "step": 5355 }, { "epoch": 0.11802101064855366, "grad_norm": 1.9840266704559326, "learning_rate": 2.9394722996645066e-05, "loss": 0.1573, "step": 5356 }, { "epoch": 0.11804304593806982, "grad_norm": 1.802406668663025, "learning_rate": 2.9394421921879037e-05, "loss": 0.1815, "step": 5357 }, { "epoch": 0.11806508122758598, "grad_norm": 0.999614417552948, "learning_rate": 2.9394120773794493e-05, "loss": 0.1383, "step": 5358 }, { "epoch": 0.11808711651710214, "grad_norm": 1.1221622228622437, "learning_rate": 2.939381955239298e-05, "loss": 0.1319, "step": 5359 }, { "epoch": 0.1181091518066183, "grad_norm": 1.005614995956421, "learning_rate": 2.939351825767602e-05, "loss": 0.1826, "step": 5360 }, { "epoch": 0.11813118709613446, "grad_norm": 0.8516910076141357, "learning_rate": 2.939321688964515e-05, "loss": 0.1116, "step": 5361 }, { "epoch": 0.11815322238565062, "grad_norm": 1.3429832458496094, "learning_rate": 2.9392915448301917e-05, "loss": 0.1658, "step": 5362 }, { "epoch": 0.11817525767516678, "grad_norm": 1.3783624172210693, "learning_rate": 2.9392613933647842e-05, "loss": 0.1731, "step": 5363 }, { "epoch": 0.11819729296468294, "grad_norm": 1.8642839193344116, "learning_rate": 2.9392312345684465e-05, "loss": 0.1359, "step": 5364 }, { "epoch": 0.1182193282541991, "grad_norm": 0.6262840032577515, "learning_rate": 2.9392010684413328e-05, "loss": 0.1176, "step": 5365 }, { "epoch": 0.11824136354371526, "grad_norm": 1.5648481845855713, "learning_rate": 2.9391708949835957e-05, "loss": 0.133, "step": 5366 }, { "epoch": 0.11826339883323142, "grad_norm": 1.4503058195114136, "learning_rate": 2.9391407141953903e-05, "loss": 0.1537, "step": 5367 }, { "epoch": 0.11828543412274758, "grad_norm": 1.4431663751602173, "learning_rate": 2.939110526076869e-05, "loss": 0.1195, "step": 5368 }, { "epoch": 0.11830746941226374, "grad_norm": 3.60257625579834, "learning_rate": 2.939080330628186e-05, "loss": 0.1406, "step": 5369 }, { "epoch": 0.1183295047017799, "grad_norm": 1.2959716320037842, "learning_rate": 2.9390501278494955e-05, "loss": 0.1079, "step": 5370 }, { "epoch": 0.11835153999129606, "grad_norm": 1.5972355604171753, "learning_rate": 2.939019917740951e-05, "loss": 0.093, "step": 5371 }, { "epoch": 0.11837357528081222, "grad_norm": 1.059741497039795, "learning_rate": 2.9389897003027062e-05, "loss": 0.1484, "step": 5372 }, { "epoch": 0.11839561057032838, "grad_norm": 0.9942842721939087, "learning_rate": 2.9389594755349153e-05, "loss": 0.1666, "step": 5373 }, { "epoch": 0.11841764585984454, "grad_norm": 1.200891137123108, "learning_rate": 2.9389292434377318e-05, "loss": 0.1106, "step": 5374 }, { "epoch": 0.1184396811493607, "grad_norm": 1.2560981512069702, "learning_rate": 2.9388990040113107e-05, "loss": 0.1337, "step": 5375 }, { "epoch": 0.11846171643887686, "grad_norm": 1.020572304725647, "learning_rate": 2.9388687572558046e-05, "loss": 0.137, "step": 5376 }, { "epoch": 0.11848375172839302, "grad_norm": 0.8857278823852539, "learning_rate": 2.9388385031713686e-05, "loss": 0.1109, "step": 5377 }, { "epoch": 0.11850578701790918, "grad_norm": 0.906550943851471, "learning_rate": 2.9388082417581566e-05, "loss": 0.1107, "step": 5378 }, { "epoch": 0.11852782230742534, "grad_norm": 1.1143805980682373, "learning_rate": 2.938777973016323e-05, "loss": 0.206, "step": 5379 }, { "epoch": 0.1185498575969415, "grad_norm": 1.1102967262268066, "learning_rate": 2.9387476969460217e-05, "loss": 0.1421, "step": 5380 }, { "epoch": 0.11857189288645766, "grad_norm": 1.571099042892456, "learning_rate": 2.9387174135474064e-05, "loss": 0.1449, "step": 5381 }, { "epoch": 0.11859392817597382, "grad_norm": 1.0531268119812012, "learning_rate": 2.938687122820632e-05, "loss": 0.1243, "step": 5382 }, { "epoch": 0.11861596346548998, "grad_norm": 1.256518840789795, "learning_rate": 2.938656824765853e-05, "loss": 0.1843, "step": 5383 }, { "epoch": 0.11863799875500614, "grad_norm": 0.9034901857376099, "learning_rate": 2.9386265193832226e-05, "loss": 0.1418, "step": 5384 }, { "epoch": 0.1186600340445223, "grad_norm": 0.8708400130271912, "learning_rate": 2.9385962066728963e-05, "loss": 0.132, "step": 5385 }, { "epoch": 0.11868206933403846, "grad_norm": 1.1518117189407349, "learning_rate": 2.938565886635028e-05, "loss": 0.1172, "step": 5386 }, { "epoch": 0.11870410462355463, "grad_norm": 0.9838050603866577, "learning_rate": 2.9385355592697724e-05, "loss": 0.0797, "step": 5387 }, { "epoch": 0.11872613991307078, "grad_norm": 0.9949109554290771, "learning_rate": 2.9385052245772833e-05, "loss": 0.1407, "step": 5388 }, { "epoch": 0.11874817520258694, "grad_norm": 0.9052588939666748, "learning_rate": 2.938474882557716e-05, "loss": 0.1445, "step": 5389 }, { "epoch": 0.11877021049210311, "grad_norm": 1.5763611793518066, "learning_rate": 2.938444533211225e-05, "loss": 0.1413, "step": 5390 }, { "epoch": 0.11879224578161926, "grad_norm": 1.167481780052185, "learning_rate": 2.9384141765379643e-05, "loss": 0.0904, "step": 5391 }, { "epoch": 0.11881428107113542, "grad_norm": 0.7023677825927734, "learning_rate": 2.9383838125380885e-05, "loss": 0.0618, "step": 5392 }, { "epoch": 0.11883631636065159, "grad_norm": 1.6281176805496216, "learning_rate": 2.9383534412117535e-05, "loss": 0.1711, "step": 5393 }, { "epoch": 0.11885835165016774, "grad_norm": 1.141076922416687, "learning_rate": 2.9383230625591127e-05, "loss": 0.1539, "step": 5394 }, { "epoch": 0.1188803869396839, "grad_norm": 0.7453818917274475, "learning_rate": 2.9382926765803213e-05, "loss": 0.1316, "step": 5395 }, { "epoch": 0.11890242222920007, "grad_norm": 1.114253044128418, "learning_rate": 2.938262283275534e-05, "loss": 0.1382, "step": 5396 }, { "epoch": 0.11892445751871622, "grad_norm": 1.129976749420166, "learning_rate": 2.9382318826449055e-05, "loss": 0.1194, "step": 5397 }, { "epoch": 0.11894649280823238, "grad_norm": 1.3543504476547241, "learning_rate": 2.9382014746885907e-05, "loss": 0.1646, "step": 5398 }, { "epoch": 0.11896852809774855, "grad_norm": 1.255483627319336, "learning_rate": 2.938171059406745e-05, "loss": 0.1672, "step": 5399 }, { "epoch": 0.1189905633872647, "grad_norm": 0.8311688303947449, "learning_rate": 2.9381406367995222e-05, "loss": 0.1376, "step": 5400 }, { "epoch": 0.11901259867678086, "grad_norm": 0.8534373044967651, "learning_rate": 2.938110206867079e-05, "loss": 0.1191, "step": 5401 }, { "epoch": 0.11903463396629703, "grad_norm": 1.1727447509765625, "learning_rate": 2.9380797696095684e-05, "loss": 0.1178, "step": 5402 }, { "epoch": 0.11905666925581318, "grad_norm": 1.1880720853805542, "learning_rate": 2.9380493250271465e-05, "loss": 0.1231, "step": 5403 }, { "epoch": 0.11907870454532934, "grad_norm": 1.1754578351974487, "learning_rate": 2.9380188731199685e-05, "loss": 0.1019, "step": 5404 }, { "epoch": 0.11910073983484551, "grad_norm": 0.984504222869873, "learning_rate": 2.9379884138881893e-05, "loss": 0.1388, "step": 5405 }, { "epoch": 0.11912277512436166, "grad_norm": 0.8341430425643921, "learning_rate": 2.9379579473319637e-05, "loss": 0.1402, "step": 5406 }, { "epoch": 0.11914481041387782, "grad_norm": 1.3212370872497559, "learning_rate": 2.9379274734514474e-05, "loss": 0.1282, "step": 5407 }, { "epoch": 0.11916684570339399, "grad_norm": 0.8481693267822266, "learning_rate": 2.9378969922467956e-05, "loss": 0.1245, "step": 5408 }, { "epoch": 0.11918888099291014, "grad_norm": 1.0829384326934814, "learning_rate": 2.937866503718163e-05, "loss": 0.1151, "step": 5409 }, { "epoch": 0.1192109162824263, "grad_norm": 0.849390983581543, "learning_rate": 2.9378360078657055e-05, "loss": 0.1363, "step": 5410 }, { "epoch": 0.11923295157194247, "grad_norm": 1.1759425401687622, "learning_rate": 2.937805504689578e-05, "loss": 0.1002, "step": 5411 }, { "epoch": 0.11925498686145862, "grad_norm": 1.0058724880218506, "learning_rate": 2.9377749941899362e-05, "loss": 0.156, "step": 5412 }, { "epoch": 0.11927702215097478, "grad_norm": 0.9551980495452881, "learning_rate": 2.9377444763669348e-05, "loss": 0.1231, "step": 5413 }, { "epoch": 0.11929905744049095, "grad_norm": 1.8800299167633057, "learning_rate": 2.9377139512207305e-05, "loss": 0.09, "step": 5414 }, { "epoch": 0.1193210927300071, "grad_norm": 1.7009986639022827, "learning_rate": 2.937683418751478e-05, "loss": 0.1601, "step": 5415 }, { "epoch": 0.11934312801952326, "grad_norm": 1.0224833488464355, "learning_rate": 2.9376528789593328e-05, "loss": 0.1492, "step": 5416 }, { "epoch": 0.11936516330903943, "grad_norm": 1.6003721952438354, "learning_rate": 2.9376223318444508e-05, "loss": 0.1898, "step": 5417 }, { "epoch": 0.11938719859855558, "grad_norm": 0.7710843086242676, "learning_rate": 2.9375917774069866e-05, "loss": 0.0833, "step": 5418 }, { "epoch": 0.11940923388807174, "grad_norm": 1.0536065101623535, "learning_rate": 2.937561215647097e-05, "loss": 0.1211, "step": 5419 }, { "epoch": 0.11943126917758791, "grad_norm": 0.9197372794151306, "learning_rate": 2.9375306465649373e-05, "loss": 0.0979, "step": 5420 }, { "epoch": 0.11945330446710407, "grad_norm": 1.1981712579727173, "learning_rate": 2.9375000701606632e-05, "loss": 0.1918, "step": 5421 }, { "epoch": 0.11947533975662022, "grad_norm": 1.213307499885559, "learning_rate": 2.9374694864344304e-05, "loss": 0.1374, "step": 5422 }, { "epoch": 0.11949737504613639, "grad_norm": 0.9886043071746826, "learning_rate": 2.9374388953863946e-05, "loss": 0.1113, "step": 5423 }, { "epoch": 0.11951941033565255, "grad_norm": 1.0235850811004639, "learning_rate": 2.9374082970167117e-05, "loss": 0.0873, "step": 5424 }, { "epoch": 0.1195414456251687, "grad_norm": 1.7740771770477295, "learning_rate": 2.9373776913255377e-05, "loss": 0.1233, "step": 5425 }, { "epoch": 0.11956348091468487, "grad_norm": 2.832584857940674, "learning_rate": 2.9373470783130282e-05, "loss": 0.1349, "step": 5426 }, { "epoch": 0.11958551620420103, "grad_norm": 1.251365303993225, "learning_rate": 2.937316457979339e-05, "loss": 0.1497, "step": 5427 }, { "epoch": 0.11960755149371718, "grad_norm": 2.3181068897247314, "learning_rate": 2.9372858303246265e-05, "loss": 0.1525, "step": 5428 }, { "epoch": 0.11962958678323335, "grad_norm": 1.5551992654800415, "learning_rate": 2.937255195349047e-05, "loss": 0.1582, "step": 5429 }, { "epoch": 0.11965162207274951, "grad_norm": 1.2862999439239502, "learning_rate": 2.9372245530527553e-05, "loss": 0.1026, "step": 5430 }, { "epoch": 0.11967365736226566, "grad_norm": 2.3476269245147705, "learning_rate": 2.9371939034359088e-05, "loss": 0.1152, "step": 5431 }, { "epoch": 0.11969569265178183, "grad_norm": 1.37751305103302, "learning_rate": 2.9371632464986627e-05, "loss": 0.1812, "step": 5432 }, { "epoch": 0.119717727941298, "grad_norm": 1.2317378520965576, "learning_rate": 2.9371325822411742e-05, "loss": 0.1223, "step": 5433 }, { "epoch": 0.11973976323081414, "grad_norm": 1.0472674369812012, "learning_rate": 2.937101910663598e-05, "loss": 0.1361, "step": 5434 }, { "epoch": 0.11976179852033031, "grad_norm": 1.331249713897705, "learning_rate": 2.9370712317660912e-05, "loss": 0.1759, "step": 5435 }, { "epoch": 0.11978383380984647, "grad_norm": 1.210579514503479, "learning_rate": 2.9370405455488104e-05, "loss": 0.1742, "step": 5436 }, { "epoch": 0.11980586909936262, "grad_norm": 1.139346718788147, "learning_rate": 2.9370098520119114e-05, "loss": 0.1608, "step": 5437 }, { "epoch": 0.11982790438887879, "grad_norm": 0.9423390030860901, "learning_rate": 2.936979151155551e-05, "loss": 0.1405, "step": 5438 }, { "epoch": 0.11984993967839495, "grad_norm": 1.002389907836914, "learning_rate": 2.9369484429798846e-05, "loss": 0.1403, "step": 5439 }, { "epoch": 0.1198719749679111, "grad_norm": 1.3160401582717896, "learning_rate": 2.9369177274850692e-05, "loss": 0.1257, "step": 5440 }, { "epoch": 0.11989401025742727, "grad_norm": 0.9941200017929077, "learning_rate": 2.9368870046712617e-05, "loss": 0.137, "step": 5441 }, { "epoch": 0.11991604554694343, "grad_norm": 1.4745100736618042, "learning_rate": 2.936856274538618e-05, "loss": 0.1574, "step": 5442 }, { "epoch": 0.11993808083645958, "grad_norm": 0.7980933785438538, "learning_rate": 2.9368255370872944e-05, "loss": 0.1586, "step": 5443 }, { "epoch": 0.11996011612597575, "grad_norm": 1.439017653465271, "learning_rate": 2.9367947923174486e-05, "loss": 0.1521, "step": 5444 }, { "epoch": 0.11998215141549191, "grad_norm": 0.7645226120948792, "learning_rate": 2.9367640402292358e-05, "loss": 0.0918, "step": 5445 }, { "epoch": 0.12000418670500806, "grad_norm": 1.5300551652908325, "learning_rate": 2.9367332808228135e-05, "loss": 0.1133, "step": 5446 }, { "epoch": 0.12002622199452423, "grad_norm": 1.0271950960159302, "learning_rate": 2.9367025140983383e-05, "loss": 0.1399, "step": 5447 }, { "epoch": 0.1200482572840404, "grad_norm": 1.235772728919983, "learning_rate": 2.936671740055967e-05, "loss": 0.1141, "step": 5448 }, { "epoch": 0.12007029257355654, "grad_norm": 1.6607820987701416, "learning_rate": 2.9366409586958554e-05, "loss": 0.1005, "step": 5449 }, { "epoch": 0.12009232786307271, "grad_norm": 1.1810729503631592, "learning_rate": 2.9366101700181615e-05, "loss": 0.1206, "step": 5450 }, { "epoch": 0.12011436315258887, "grad_norm": 1.0803102254867554, "learning_rate": 2.9365793740230414e-05, "loss": 0.1318, "step": 5451 }, { "epoch": 0.12013639844210502, "grad_norm": 1.3959382772445679, "learning_rate": 2.9365485707106524e-05, "loss": 0.1197, "step": 5452 }, { "epoch": 0.12015843373162119, "grad_norm": 0.9625935554504395, "learning_rate": 2.9365177600811513e-05, "loss": 0.1461, "step": 5453 }, { "epoch": 0.12018046902113735, "grad_norm": 1.063158631324768, "learning_rate": 2.9364869421346947e-05, "loss": 0.1617, "step": 5454 }, { "epoch": 0.12020250431065352, "grad_norm": 1.4855903387069702, "learning_rate": 2.93645611687144e-05, "loss": 0.1521, "step": 5455 }, { "epoch": 0.12022453960016967, "grad_norm": 1.051536202430725, "learning_rate": 2.9364252842915437e-05, "loss": 0.1435, "step": 5456 }, { "epoch": 0.12024657488968583, "grad_norm": 1.2018840312957764, "learning_rate": 2.9363944443951637e-05, "loss": 0.1513, "step": 5457 }, { "epoch": 0.120268610179202, "grad_norm": 1.0748052597045898, "learning_rate": 2.936363597182456e-05, "loss": 0.1363, "step": 5458 }, { "epoch": 0.12029064546871815, "grad_norm": 1.2518261671066284, "learning_rate": 2.936332742653578e-05, "loss": 0.1568, "step": 5459 }, { "epoch": 0.12031268075823431, "grad_norm": 1.205395221710205, "learning_rate": 2.936301880808688e-05, "loss": 0.1716, "step": 5460 }, { "epoch": 0.12033471604775048, "grad_norm": 1.3197206258773804, "learning_rate": 2.9362710116479417e-05, "loss": 0.1579, "step": 5461 }, { "epoch": 0.12035675133726663, "grad_norm": 1.2144068479537964, "learning_rate": 2.936240135171497e-05, "loss": 0.1773, "step": 5462 }, { "epoch": 0.1203787866267828, "grad_norm": 1.296197772026062, "learning_rate": 2.936209251379511e-05, "loss": 0.1115, "step": 5463 }, { "epoch": 0.12040082191629896, "grad_norm": 1.3406529426574707, "learning_rate": 2.9361783602721416e-05, "loss": 0.1237, "step": 5464 }, { "epoch": 0.12042285720581511, "grad_norm": 0.7552421689033508, "learning_rate": 2.9361474618495458e-05, "loss": 0.138, "step": 5465 }, { "epoch": 0.12044489249533127, "grad_norm": 1.083383321762085, "learning_rate": 2.9361165561118802e-05, "loss": 0.1556, "step": 5466 }, { "epoch": 0.12046692778484744, "grad_norm": 3.3477656841278076, "learning_rate": 2.9360856430593032e-05, "loss": 0.1425, "step": 5467 }, { "epoch": 0.12048896307436359, "grad_norm": 9.941346168518066, "learning_rate": 2.9360547226919716e-05, "loss": 0.2144, "step": 5468 }, { "epoch": 0.12051099836387975, "grad_norm": 3.310298204421997, "learning_rate": 2.9360237950100436e-05, "loss": 0.1229, "step": 5469 }, { "epoch": 0.12053303365339592, "grad_norm": 3.783257484436035, "learning_rate": 2.935992860013676e-05, "loss": 0.1881, "step": 5470 }, { "epoch": 0.12055506894291207, "grad_norm": 1.4544410705566406, "learning_rate": 2.9359619177030273e-05, "loss": 0.114, "step": 5471 }, { "epoch": 0.12057710423242823, "grad_norm": 1.4105477333068848, "learning_rate": 2.935930968078254e-05, "loss": 0.1688, "step": 5472 }, { "epoch": 0.1205991395219444, "grad_norm": 1.1349095106124878, "learning_rate": 2.9359000111395147e-05, "loss": 0.1364, "step": 5473 }, { "epoch": 0.12062117481146055, "grad_norm": 1.9398016929626465, "learning_rate": 2.9358690468869664e-05, "loss": 0.1448, "step": 5474 }, { "epoch": 0.12064321010097671, "grad_norm": 1.074708104133606, "learning_rate": 2.9358380753207672e-05, "loss": 0.1392, "step": 5475 }, { "epoch": 0.12066524539049288, "grad_norm": 1.3784822225570679, "learning_rate": 2.9358070964410745e-05, "loss": 0.1408, "step": 5476 }, { "epoch": 0.12068728068000903, "grad_norm": 0.8989815711975098, "learning_rate": 2.9357761102480465e-05, "loss": 0.1489, "step": 5477 }, { "epoch": 0.1207093159695252, "grad_norm": 2.0339677333831787, "learning_rate": 2.935745116741841e-05, "loss": 0.133, "step": 5478 }, { "epoch": 0.12073135125904136, "grad_norm": 2.707134246826172, "learning_rate": 2.9357141159226154e-05, "loss": 0.1333, "step": 5479 }, { "epoch": 0.12075338654855751, "grad_norm": 0.9375060200691223, "learning_rate": 2.935683107790528e-05, "loss": 0.1532, "step": 5480 }, { "epoch": 0.12077542183807367, "grad_norm": 1.2935370206832886, "learning_rate": 2.935652092345737e-05, "loss": 0.15, "step": 5481 }, { "epoch": 0.12079745712758984, "grad_norm": 1.1083298921585083, "learning_rate": 2.9356210695883993e-05, "loss": 0.1578, "step": 5482 }, { "epoch": 0.12081949241710599, "grad_norm": 1.0316991806030273, "learning_rate": 2.935590039518674e-05, "loss": 0.1148, "step": 5483 }, { "epoch": 0.12084152770662215, "grad_norm": 1.1385834217071533, "learning_rate": 2.935559002136719e-05, "loss": 0.1111, "step": 5484 }, { "epoch": 0.12086356299613832, "grad_norm": 1.5029939413070679, "learning_rate": 2.9355279574426922e-05, "loss": 0.1386, "step": 5485 }, { "epoch": 0.12088559828565447, "grad_norm": 1.0552034378051758, "learning_rate": 2.9354969054367516e-05, "loss": 0.1283, "step": 5486 }, { "epoch": 0.12090763357517063, "grad_norm": 1.5254490375518799, "learning_rate": 2.935465846119055e-05, "loss": 0.1561, "step": 5487 }, { "epoch": 0.1209296688646868, "grad_norm": 1.4516539573669434, "learning_rate": 2.9354347794897622e-05, "loss": 0.1472, "step": 5488 }, { "epoch": 0.12095170415420295, "grad_norm": 2.2869465351104736, "learning_rate": 2.9354037055490296e-05, "loss": 0.1153, "step": 5489 }, { "epoch": 0.12097373944371911, "grad_norm": 1.1932424306869507, "learning_rate": 2.9353726242970162e-05, "loss": 0.1888, "step": 5490 }, { "epoch": 0.12099577473323528, "grad_norm": 1.0557242631912231, "learning_rate": 2.9353415357338803e-05, "loss": 0.1285, "step": 5491 }, { "epoch": 0.12101781002275144, "grad_norm": 1.136107325553894, "learning_rate": 2.9353104398597804e-05, "loss": 0.1649, "step": 5492 }, { "epoch": 0.1210398453122676, "grad_norm": 1.1200203895568848, "learning_rate": 2.9352793366748748e-05, "loss": 0.1406, "step": 5493 }, { "epoch": 0.12106188060178376, "grad_norm": 1.1499406099319458, "learning_rate": 2.935248226179322e-05, "loss": 0.1589, "step": 5494 }, { "epoch": 0.12108391589129992, "grad_norm": 0.9141344428062439, "learning_rate": 2.93521710837328e-05, "loss": 0.1136, "step": 5495 }, { "epoch": 0.12110595118081607, "grad_norm": 1.0838922262191772, "learning_rate": 2.9351859832569078e-05, "loss": 0.0875, "step": 5496 }, { "epoch": 0.12112798647033224, "grad_norm": 1.194629192352295, "learning_rate": 2.9351548508303635e-05, "loss": 0.1544, "step": 5497 }, { "epoch": 0.1211500217598484, "grad_norm": 1.0852994918823242, "learning_rate": 2.9351237110938062e-05, "loss": 0.1021, "step": 5498 }, { "epoch": 0.12117205704936455, "grad_norm": 1.1631149053573608, "learning_rate": 2.9350925640473947e-05, "loss": 0.1401, "step": 5499 }, { "epoch": 0.12119409233888072, "grad_norm": 1.0197523832321167, "learning_rate": 2.9350614096912866e-05, "loss": 0.1759, "step": 5500 }, { "epoch": 0.12121612762839688, "grad_norm": 1.1800233125686646, "learning_rate": 2.9350302480256414e-05, "loss": 0.127, "step": 5501 }, { "epoch": 0.12123816291791303, "grad_norm": 1.5230368375778198, "learning_rate": 2.9349990790506178e-05, "loss": 0.1273, "step": 5502 }, { "epoch": 0.1212601982074292, "grad_norm": 1.5529550313949585, "learning_rate": 2.9349679027663744e-05, "loss": 0.1517, "step": 5503 }, { "epoch": 0.12128223349694536, "grad_norm": 1.251625657081604, "learning_rate": 2.9349367191730695e-05, "loss": 0.0998, "step": 5504 }, { "epoch": 0.12130426878646151, "grad_norm": 1.1397559642791748, "learning_rate": 2.9349055282708627e-05, "loss": 0.116, "step": 5505 }, { "epoch": 0.12132630407597768, "grad_norm": 1.5672084093093872, "learning_rate": 2.9348743300599125e-05, "loss": 0.1695, "step": 5506 }, { "epoch": 0.12134833936549384, "grad_norm": 1.070047378540039, "learning_rate": 2.9348431245403778e-05, "loss": 0.1044, "step": 5507 }, { "epoch": 0.12137037465501, "grad_norm": 1.1827726364135742, "learning_rate": 2.9348119117124182e-05, "loss": 0.0946, "step": 5508 }, { "epoch": 0.12139240994452616, "grad_norm": 1.1843949556350708, "learning_rate": 2.9347806915761916e-05, "loss": 0.1581, "step": 5509 }, { "epoch": 0.12141444523404232, "grad_norm": 1.0965608358383179, "learning_rate": 2.934749464131858e-05, "loss": 0.1546, "step": 5510 }, { "epoch": 0.12143648052355847, "grad_norm": 1.36378812789917, "learning_rate": 2.9347182293795756e-05, "loss": 0.1678, "step": 5511 }, { "epoch": 0.12145851581307464, "grad_norm": 1.1165428161621094, "learning_rate": 2.934686987319504e-05, "loss": 0.1527, "step": 5512 }, { "epoch": 0.1214805511025908, "grad_norm": 1.372155785560608, "learning_rate": 2.9346557379518018e-05, "loss": 0.1905, "step": 5513 }, { "epoch": 0.12150258639210695, "grad_norm": 1.0845739841461182, "learning_rate": 2.934624481276629e-05, "loss": 0.1547, "step": 5514 }, { "epoch": 0.12152462168162312, "grad_norm": 1.4605989456176758, "learning_rate": 2.9345932172941448e-05, "loss": 0.1396, "step": 5515 }, { "epoch": 0.12154665697113928, "grad_norm": 0.8863297700881958, "learning_rate": 2.934561946004508e-05, "loss": 0.1151, "step": 5516 }, { "epoch": 0.12156869226065543, "grad_norm": 1.3961241245269775, "learning_rate": 2.9345306674078775e-05, "loss": 0.1763, "step": 5517 }, { "epoch": 0.1215907275501716, "grad_norm": 1.383792519569397, "learning_rate": 2.9344993815044133e-05, "loss": 0.142, "step": 5518 }, { "epoch": 0.12161276283968776, "grad_norm": 1.111184000968933, "learning_rate": 2.9344680882942742e-05, "loss": 0.1408, "step": 5519 }, { "epoch": 0.12163479812920391, "grad_norm": 0.8621214032173157, "learning_rate": 2.93443678777762e-05, "loss": 0.0877, "step": 5520 }, { "epoch": 0.12165683341872008, "grad_norm": 1.186832070350647, "learning_rate": 2.9344054799546105e-05, "loss": 0.1572, "step": 5521 }, { "epoch": 0.12167886870823624, "grad_norm": 1.120802640914917, "learning_rate": 2.9343741648254044e-05, "loss": 0.0934, "step": 5522 }, { "epoch": 0.1217009039977524, "grad_norm": 0.8022234439849854, "learning_rate": 2.9343428423901614e-05, "loss": 0.1345, "step": 5523 }, { "epoch": 0.12172293928726856, "grad_norm": 1.4729151725769043, "learning_rate": 2.934311512649041e-05, "loss": 0.1134, "step": 5524 }, { "epoch": 0.12174497457678472, "grad_norm": 1.6004897356033325, "learning_rate": 2.9342801756022034e-05, "loss": 0.0988, "step": 5525 }, { "epoch": 0.12176700986630089, "grad_norm": 1.0743930339813232, "learning_rate": 2.9342488312498074e-05, "loss": 0.1144, "step": 5526 }, { "epoch": 0.12178904515581704, "grad_norm": 0.9391145706176758, "learning_rate": 2.934217479592013e-05, "loss": 0.1218, "step": 5527 }, { "epoch": 0.1218110804453332, "grad_norm": 1.2060110569000244, "learning_rate": 2.9341861206289803e-05, "loss": 0.1006, "step": 5528 }, { "epoch": 0.12183311573484937, "grad_norm": 1.2599377632141113, "learning_rate": 2.934154754360868e-05, "loss": 0.0976, "step": 5529 }, { "epoch": 0.12185515102436552, "grad_norm": 1.0919605493545532, "learning_rate": 2.9341233807878365e-05, "loss": 0.1266, "step": 5530 }, { "epoch": 0.12187718631388168, "grad_norm": 0.853912353515625, "learning_rate": 2.934091999910046e-05, "loss": 0.1452, "step": 5531 }, { "epoch": 0.12189922160339785, "grad_norm": 0.904963493347168, "learning_rate": 2.9340606117276555e-05, "loss": 0.1106, "step": 5532 }, { "epoch": 0.121921256892914, "grad_norm": 1.0511924028396606, "learning_rate": 2.934029216240826e-05, "loss": 0.1624, "step": 5533 }, { "epoch": 0.12194329218243016, "grad_norm": 1.105526328086853, "learning_rate": 2.9339978134497163e-05, "loss": 0.1204, "step": 5534 }, { "epoch": 0.12196532747194633, "grad_norm": 1.3329176902770996, "learning_rate": 2.9339664033544864e-05, "loss": 0.1112, "step": 5535 }, { "epoch": 0.12198736276146248, "grad_norm": 1.4394973516464233, "learning_rate": 2.933934985955297e-05, "loss": 0.1352, "step": 5536 }, { "epoch": 0.12200939805097864, "grad_norm": 1.6423492431640625, "learning_rate": 2.933903561252308e-05, "loss": 0.1156, "step": 5537 }, { "epoch": 0.12203143334049481, "grad_norm": 1.3474199771881104, "learning_rate": 2.933872129245679e-05, "loss": 0.1307, "step": 5538 }, { "epoch": 0.12205346863001096, "grad_norm": 0.8203136920928955, "learning_rate": 2.9338406899355704e-05, "loss": 0.1519, "step": 5539 }, { "epoch": 0.12207550391952712, "grad_norm": 1.8378292322158813, "learning_rate": 2.9338092433221422e-05, "loss": 0.0975, "step": 5540 }, { "epoch": 0.12209753920904329, "grad_norm": 0.9811305403709412, "learning_rate": 2.9337777894055547e-05, "loss": 0.1592, "step": 5541 }, { "epoch": 0.12211957449855944, "grad_norm": 1.3023858070373535, "learning_rate": 2.9337463281859683e-05, "loss": 0.1567, "step": 5542 }, { "epoch": 0.1221416097880756, "grad_norm": 1.0187374353408813, "learning_rate": 2.9337148596635428e-05, "loss": 0.0954, "step": 5543 }, { "epoch": 0.12216364507759177, "grad_norm": 1.671921730041504, "learning_rate": 2.9336833838384385e-05, "loss": 0.1445, "step": 5544 }, { "epoch": 0.12218568036710792, "grad_norm": 1.5831915140151978, "learning_rate": 2.933651900710816e-05, "loss": 0.127, "step": 5545 }, { "epoch": 0.12220771565662408, "grad_norm": 1.1312912702560425, "learning_rate": 2.933620410280836e-05, "loss": 0.1337, "step": 5546 }, { "epoch": 0.12222975094614025, "grad_norm": 1.283092975616455, "learning_rate": 2.933588912548658e-05, "loss": 0.1474, "step": 5547 }, { "epoch": 0.1222517862356564, "grad_norm": 1.2862582206726074, "learning_rate": 2.933557407514443e-05, "loss": 0.1584, "step": 5548 }, { "epoch": 0.12227382152517256, "grad_norm": 1.5904268026351929, "learning_rate": 2.9335258951783518e-05, "loss": 0.1211, "step": 5549 }, { "epoch": 0.12229585681468873, "grad_norm": 1.0271846055984497, "learning_rate": 2.933494375540544e-05, "loss": 0.1249, "step": 5550 }, { "epoch": 0.12231789210420488, "grad_norm": 1.3088550567626953, "learning_rate": 2.9334628486011806e-05, "loss": 0.1468, "step": 5551 }, { "epoch": 0.12233992739372104, "grad_norm": 1.1539878845214844, "learning_rate": 2.9334313143604228e-05, "loss": 0.1622, "step": 5552 }, { "epoch": 0.12236196268323721, "grad_norm": 1.6727694272994995, "learning_rate": 2.9333997728184303e-05, "loss": 0.1667, "step": 5553 }, { "epoch": 0.12238399797275336, "grad_norm": 0.8469753861427307, "learning_rate": 2.933368223975364e-05, "loss": 0.0935, "step": 5554 }, { "epoch": 0.12240603326226952, "grad_norm": 1.3924912214279175, "learning_rate": 2.9333366678313847e-05, "loss": 0.1402, "step": 5555 }, { "epoch": 0.12242806855178569, "grad_norm": 0.9666466116905212, "learning_rate": 2.9333051043866533e-05, "loss": 0.1631, "step": 5556 }, { "epoch": 0.12245010384130184, "grad_norm": 1.1374057531356812, "learning_rate": 2.9332735336413304e-05, "loss": 0.1136, "step": 5557 }, { "epoch": 0.122472139130818, "grad_norm": 0.7999202013015747, "learning_rate": 2.933241955595577e-05, "loss": 0.099, "step": 5558 }, { "epoch": 0.12249417442033417, "grad_norm": 1.4987183809280396, "learning_rate": 2.933210370249553e-05, "loss": 0.1693, "step": 5559 }, { "epoch": 0.12251620970985033, "grad_norm": 1.1831989288330078, "learning_rate": 2.9331787776034206e-05, "loss": 0.152, "step": 5560 }, { "epoch": 0.12253824499936648, "grad_norm": 1.4371927976608276, "learning_rate": 2.9331471776573402e-05, "loss": 0.152, "step": 5561 }, { "epoch": 0.12256028028888265, "grad_norm": 0.9755521416664124, "learning_rate": 2.933115570411473e-05, "loss": 0.1168, "step": 5562 }, { "epoch": 0.12258231557839881, "grad_norm": 1.2040305137634277, "learning_rate": 2.933083955865979e-05, "loss": 0.1166, "step": 5563 }, { "epoch": 0.12260435086791496, "grad_norm": 0.9858829975128174, "learning_rate": 2.9330523340210203e-05, "loss": 0.1497, "step": 5564 }, { "epoch": 0.12262638615743113, "grad_norm": 1.1048619747161865, "learning_rate": 2.9330207048767575e-05, "loss": 0.1214, "step": 5565 }, { "epoch": 0.12264842144694729, "grad_norm": 1.1507396697998047, "learning_rate": 2.9329890684333518e-05, "loss": 0.1484, "step": 5566 }, { "epoch": 0.12267045673646344, "grad_norm": 1.3241244554519653, "learning_rate": 2.9329574246909643e-05, "loss": 0.1509, "step": 5567 }, { "epoch": 0.12269249202597961, "grad_norm": 0.9856612086296082, "learning_rate": 2.9329257736497566e-05, "loss": 0.1377, "step": 5568 }, { "epoch": 0.12271452731549577, "grad_norm": 1.2491514682769775, "learning_rate": 2.9328941153098893e-05, "loss": 0.1825, "step": 5569 }, { "epoch": 0.12273656260501192, "grad_norm": 1.2709611654281616, "learning_rate": 2.932862449671524e-05, "loss": 0.1128, "step": 5570 }, { "epoch": 0.12275859789452809, "grad_norm": 1.4535136222839355, "learning_rate": 2.9328307767348215e-05, "loss": 0.1051, "step": 5571 }, { "epoch": 0.12278063318404425, "grad_norm": 1.3238725662231445, "learning_rate": 2.9327990964999436e-05, "loss": 0.1506, "step": 5572 }, { "epoch": 0.1228026684735604, "grad_norm": 1.3334122896194458, "learning_rate": 2.932767408967052e-05, "loss": 0.1734, "step": 5573 }, { "epoch": 0.12282470376307657, "grad_norm": 1.125687837600708, "learning_rate": 2.9327357141363072e-05, "loss": 0.1322, "step": 5574 }, { "epoch": 0.12284673905259273, "grad_norm": 1.5204055309295654, "learning_rate": 2.9327040120078713e-05, "loss": 0.1625, "step": 5575 }, { "epoch": 0.12286877434210888, "grad_norm": 1.4894826412200928, "learning_rate": 2.9326723025819056e-05, "loss": 0.1158, "step": 5576 }, { "epoch": 0.12289080963162505, "grad_norm": 1.2635403871536255, "learning_rate": 2.932640585858572e-05, "loss": 0.1603, "step": 5577 }, { "epoch": 0.12291284492114121, "grad_norm": 0.8389015793800354, "learning_rate": 2.932608861838031e-05, "loss": 0.127, "step": 5578 }, { "epoch": 0.12293488021065736, "grad_norm": 1.1800869703292847, "learning_rate": 2.932577130520445e-05, "loss": 0.111, "step": 5579 }, { "epoch": 0.12295691550017353, "grad_norm": 1.1845520734786987, "learning_rate": 2.9325453919059754e-05, "loss": 0.1289, "step": 5580 }, { "epoch": 0.12297895078968969, "grad_norm": 1.3833800554275513, "learning_rate": 2.9325136459947842e-05, "loss": 0.1007, "step": 5581 }, { "epoch": 0.12300098607920584, "grad_norm": 1.1677826642990112, "learning_rate": 2.932481892787033e-05, "loss": 0.1086, "step": 5582 }, { "epoch": 0.12302302136872201, "grad_norm": 1.0954046249389648, "learning_rate": 2.9324501322828833e-05, "loss": 0.1477, "step": 5583 }, { "epoch": 0.12304505665823817, "grad_norm": 1.184915542602539, "learning_rate": 2.9324183644824963e-05, "loss": 0.1329, "step": 5584 }, { "epoch": 0.12306709194775432, "grad_norm": 1.252295970916748, "learning_rate": 2.932386589386035e-05, "loss": 0.148, "step": 5585 }, { "epoch": 0.12308912723727049, "grad_norm": 1.3997955322265625, "learning_rate": 2.9323548069936603e-05, "loss": 0.1443, "step": 5586 }, { "epoch": 0.12311116252678665, "grad_norm": 1.1244138479232788, "learning_rate": 2.932323017305535e-05, "loss": 0.1546, "step": 5587 }, { "epoch": 0.1231331978163028, "grad_norm": 1.0492854118347168, "learning_rate": 2.93229122032182e-05, "loss": 0.1188, "step": 5588 }, { "epoch": 0.12315523310581897, "grad_norm": 1.2824124097824097, "learning_rate": 2.932259416042678e-05, "loss": 0.154, "step": 5589 }, { "epoch": 0.12317726839533513, "grad_norm": 0.6468947529792786, "learning_rate": 2.932227604468271e-05, "loss": 0.0799, "step": 5590 }, { "epoch": 0.12319930368485128, "grad_norm": 1.0705419778823853, "learning_rate": 2.9321957855987604e-05, "loss": 0.1267, "step": 5591 }, { "epoch": 0.12322133897436745, "grad_norm": 0.9054783582687378, "learning_rate": 2.9321639594343087e-05, "loss": 0.1093, "step": 5592 }, { "epoch": 0.12324337426388361, "grad_norm": 0.9446547031402588, "learning_rate": 2.9321321259750782e-05, "loss": 0.1513, "step": 5593 }, { "epoch": 0.12326540955339976, "grad_norm": 1.1701850891113281, "learning_rate": 2.9321002852212308e-05, "loss": 0.1428, "step": 5594 }, { "epoch": 0.12328744484291593, "grad_norm": 1.480393886566162, "learning_rate": 2.9320684371729284e-05, "loss": 0.175, "step": 5595 }, { "epoch": 0.12330948013243209, "grad_norm": 1.2119817733764648, "learning_rate": 2.9320365818303337e-05, "loss": 0.1271, "step": 5596 }, { "epoch": 0.12333151542194826, "grad_norm": 1.1106410026550293, "learning_rate": 2.932004719193609e-05, "loss": 0.1247, "step": 5597 }, { "epoch": 0.12335355071146441, "grad_norm": 0.8344336748123169, "learning_rate": 2.9319728492629158e-05, "loss": 0.1315, "step": 5598 }, { "epoch": 0.12337558600098057, "grad_norm": 0.7915175557136536, "learning_rate": 2.931940972038417e-05, "loss": 0.0617, "step": 5599 }, { "epoch": 0.12339762129049674, "grad_norm": 0.9541646242141724, "learning_rate": 2.9319090875202754e-05, "loss": 0.1425, "step": 5600 }, { "epoch": 0.12341965658001289, "grad_norm": 1.608708143234253, "learning_rate": 2.931877195708653e-05, "loss": 0.1754, "step": 5601 }, { "epoch": 0.12344169186952905, "grad_norm": 1.1471649408340454, "learning_rate": 2.931845296603712e-05, "loss": 0.163, "step": 5602 }, { "epoch": 0.12346372715904522, "grad_norm": 1.1473948955535889, "learning_rate": 2.9318133902056153e-05, "loss": 0.1761, "step": 5603 }, { "epoch": 0.12348576244856137, "grad_norm": 1.1762455701828003, "learning_rate": 2.931781476514525e-05, "loss": 0.1724, "step": 5604 }, { "epoch": 0.12350779773807753, "grad_norm": 1.3381863832473755, "learning_rate": 2.931749555530604e-05, "loss": 0.1481, "step": 5605 }, { "epoch": 0.1235298330275937, "grad_norm": 0.9686430096626282, "learning_rate": 2.9317176272540148e-05, "loss": 0.1275, "step": 5606 }, { "epoch": 0.12355186831710985, "grad_norm": 1.0287554264068604, "learning_rate": 2.9316856916849194e-05, "loss": 0.1442, "step": 5607 }, { "epoch": 0.12357390360662601, "grad_norm": 1.246176838874817, "learning_rate": 2.9316537488234816e-05, "loss": 0.1554, "step": 5608 }, { "epoch": 0.12359593889614218, "grad_norm": 1.6275094747543335, "learning_rate": 2.9316217986698633e-05, "loss": 0.1281, "step": 5609 }, { "epoch": 0.12361797418565833, "grad_norm": 1.1472153663635254, "learning_rate": 2.9315898412242278e-05, "loss": 0.145, "step": 5610 }, { "epoch": 0.12364000947517449, "grad_norm": 0.8262772560119629, "learning_rate": 2.9315578764867374e-05, "loss": 0.1305, "step": 5611 }, { "epoch": 0.12366204476469066, "grad_norm": 1.0948472023010254, "learning_rate": 2.9315259044575547e-05, "loss": 0.1323, "step": 5612 }, { "epoch": 0.12368408005420681, "grad_norm": 1.0903372764587402, "learning_rate": 2.9314939251368434e-05, "loss": 0.1053, "step": 5613 }, { "epoch": 0.12370611534372297, "grad_norm": 1.243497610092163, "learning_rate": 2.9314619385247658e-05, "loss": 0.1835, "step": 5614 }, { "epoch": 0.12372815063323914, "grad_norm": 1.1509312391281128, "learning_rate": 2.9314299446214848e-05, "loss": 0.0963, "step": 5615 }, { "epoch": 0.12375018592275529, "grad_norm": 1.0692733526229858, "learning_rate": 2.9313979434271637e-05, "loss": 0.1413, "step": 5616 }, { "epoch": 0.12377222121227145, "grad_norm": 0.8440991044044495, "learning_rate": 2.9313659349419652e-05, "loss": 0.0849, "step": 5617 }, { "epoch": 0.12379425650178762, "grad_norm": 1.1382641792297363, "learning_rate": 2.9313339191660523e-05, "loss": 0.1065, "step": 5618 }, { "epoch": 0.12381629179130377, "grad_norm": 0.9597170948982239, "learning_rate": 2.9313018960995882e-05, "loss": 0.1386, "step": 5619 }, { "epoch": 0.12383832708081993, "grad_norm": 0.6203126311302185, "learning_rate": 2.9312698657427362e-05, "loss": 0.1161, "step": 5620 }, { "epoch": 0.1238603623703361, "grad_norm": 0.9357432723045349, "learning_rate": 2.9312378280956594e-05, "loss": 0.1049, "step": 5621 }, { "epoch": 0.12388239765985225, "grad_norm": 0.8153443932533264, "learning_rate": 2.9312057831585207e-05, "loss": 0.1044, "step": 5622 }, { "epoch": 0.12390443294936841, "grad_norm": 1.1306437253952026, "learning_rate": 2.9311737309314833e-05, "loss": 0.1507, "step": 5623 }, { "epoch": 0.12392646823888458, "grad_norm": 1.0206972360610962, "learning_rate": 2.9311416714147107e-05, "loss": 0.1595, "step": 5624 }, { "epoch": 0.12394850352840073, "grad_norm": 0.9082370400428772, "learning_rate": 2.931109604608366e-05, "loss": 0.1089, "step": 5625 }, { "epoch": 0.12397053881791689, "grad_norm": 0.990684986114502, "learning_rate": 2.9310775305126132e-05, "loss": 0.1381, "step": 5626 }, { "epoch": 0.12399257410743306, "grad_norm": 1.1271288394927979, "learning_rate": 2.9310454491276152e-05, "loss": 0.1946, "step": 5627 }, { "epoch": 0.12401460939694921, "grad_norm": 1.0477187633514404, "learning_rate": 2.9310133604535345e-05, "loss": 0.1215, "step": 5628 }, { "epoch": 0.12403664468646537, "grad_norm": 1.0499802827835083, "learning_rate": 2.9309812644905358e-05, "loss": 0.1248, "step": 5629 }, { "epoch": 0.12405867997598154, "grad_norm": 0.9939942955970764, "learning_rate": 2.9309491612387822e-05, "loss": 0.156, "step": 5630 }, { "epoch": 0.1240807152654977, "grad_norm": 1.5588680505752563, "learning_rate": 2.9309170506984373e-05, "loss": 0.11, "step": 5631 }, { "epoch": 0.12410275055501385, "grad_norm": 0.9948880076408386, "learning_rate": 2.9308849328696645e-05, "loss": 0.1192, "step": 5632 }, { "epoch": 0.12412478584453002, "grad_norm": 1.0148489475250244, "learning_rate": 2.930852807752627e-05, "loss": 0.0997, "step": 5633 }, { "epoch": 0.12414682113404618, "grad_norm": 1.3878456354141235, "learning_rate": 2.9308206753474894e-05, "loss": 0.1585, "step": 5634 }, { "epoch": 0.12416885642356233, "grad_norm": 1.3505439758300781, "learning_rate": 2.930788535654415e-05, "loss": 0.118, "step": 5635 }, { "epoch": 0.1241908917130785, "grad_norm": 1.3276759386062622, "learning_rate": 2.9307563886735668e-05, "loss": 0.1636, "step": 5636 }, { "epoch": 0.12421292700259466, "grad_norm": 1.2419060468673706, "learning_rate": 2.930724234405109e-05, "loss": 0.1477, "step": 5637 }, { "epoch": 0.12423496229211081, "grad_norm": 0.7954972386360168, "learning_rate": 2.930692072849206e-05, "loss": 0.1079, "step": 5638 }, { "epoch": 0.12425699758162698, "grad_norm": 1.370139718055725, "learning_rate": 2.9306599040060205e-05, "loss": 0.1028, "step": 5639 }, { "epoch": 0.12427903287114314, "grad_norm": 1.371835708618164, "learning_rate": 2.9306277278757172e-05, "loss": 0.1197, "step": 5640 }, { "epoch": 0.12430106816065929, "grad_norm": 1.9560110569000244, "learning_rate": 2.93059554445846e-05, "loss": 0.1172, "step": 5641 }, { "epoch": 0.12432310345017546, "grad_norm": 0.6814546585083008, "learning_rate": 2.9305633537544123e-05, "loss": 0.1325, "step": 5642 }, { "epoch": 0.12434513873969162, "grad_norm": 1.186448335647583, "learning_rate": 2.9305311557637384e-05, "loss": 0.1571, "step": 5643 }, { "epoch": 0.12436717402920777, "grad_norm": 1.092515230178833, "learning_rate": 2.9304989504866017e-05, "loss": 0.1394, "step": 5644 }, { "epoch": 0.12438920931872394, "grad_norm": 1.6285796165466309, "learning_rate": 2.9304667379231674e-05, "loss": 0.1677, "step": 5645 }, { "epoch": 0.1244112446082401, "grad_norm": 0.9930589199066162, "learning_rate": 2.9304345180735987e-05, "loss": 0.1328, "step": 5646 }, { "epoch": 0.12443327989775625, "grad_norm": 1.3843567371368408, "learning_rate": 2.93040229093806e-05, "loss": 0.1157, "step": 5647 }, { "epoch": 0.12445531518727242, "grad_norm": 2.375185966491699, "learning_rate": 2.9303700565167154e-05, "loss": 0.1325, "step": 5648 }, { "epoch": 0.12447735047678858, "grad_norm": 0.8536367416381836, "learning_rate": 2.9303378148097288e-05, "loss": 0.1626, "step": 5649 }, { "epoch": 0.12449938576630473, "grad_norm": 1.1977521181106567, "learning_rate": 2.9303055658172648e-05, "loss": 0.1159, "step": 5650 }, { "epoch": 0.1245214210558209, "grad_norm": 1.6338484287261963, "learning_rate": 2.930273309539488e-05, "loss": 0.1081, "step": 5651 }, { "epoch": 0.12454345634533706, "grad_norm": 0.9997483491897583, "learning_rate": 2.9302410459765618e-05, "loss": 0.1378, "step": 5652 }, { "epoch": 0.12456549163485321, "grad_norm": 1.3787254095077515, "learning_rate": 2.930208775128651e-05, "loss": 0.126, "step": 5653 }, { "epoch": 0.12458752692436938, "grad_norm": 1.1121922731399536, "learning_rate": 2.9301764969959203e-05, "loss": 0.1417, "step": 5654 }, { "epoch": 0.12460956221388554, "grad_norm": 0.9333186745643616, "learning_rate": 2.9301442115785333e-05, "loss": 0.1165, "step": 5655 }, { "epoch": 0.12463159750340169, "grad_norm": 1.4739876985549927, "learning_rate": 2.9301119188766556e-05, "loss": 0.1718, "step": 5656 }, { "epoch": 0.12465363279291786, "grad_norm": 1.0309959650039673, "learning_rate": 2.9300796188904503e-05, "loss": 0.1216, "step": 5657 }, { "epoch": 0.12467566808243402, "grad_norm": 0.8620862364768982, "learning_rate": 2.930047311620083e-05, "loss": 0.0823, "step": 5658 }, { "epoch": 0.12469770337195017, "grad_norm": 1.0308059453964233, "learning_rate": 2.9300149970657177e-05, "loss": 0.1184, "step": 5659 }, { "epoch": 0.12471973866146634, "grad_norm": 1.0709748268127441, "learning_rate": 2.9299826752275197e-05, "loss": 0.1195, "step": 5660 }, { "epoch": 0.1247417739509825, "grad_norm": 0.8934493660926819, "learning_rate": 2.9299503461056524e-05, "loss": 0.0892, "step": 5661 }, { "epoch": 0.12476380924049865, "grad_norm": 0.875177800655365, "learning_rate": 2.9299180097002817e-05, "loss": 0.1648, "step": 5662 }, { "epoch": 0.12478584453001482, "grad_norm": 1.3658133745193481, "learning_rate": 2.9298856660115715e-05, "loss": 0.1516, "step": 5663 }, { "epoch": 0.12480787981953098, "grad_norm": 1.0893056392669678, "learning_rate": 2.929853315039687e-05, "loss": 0.1146, "step": 5664 }, { "epoch": 0.12482991510904713, "grad_norm": 1.8233243227005005, "learning_rate": 2.9298209567847928e-05, "loss": 0.1467, "step": 5665 }, { "epoch": 0.1248519503985633, "grad_norm": 1.1411566734313965, "learning_rate": 2.9297885912470534e-05, "loss": 0.1158, "step": 5666 }, { "epoch": 0.12487398568807946, "grad_norm": 1.1189087629318237, "learning_rate": 2.9297562184266344e-05, "loss": 0.1293, "step": 5667 }, { "epoch": 0.12489602097759563, "grad_norm": 2.0615525245666504, "learning_rate": 2.9297238383237003e-05, "loss": 0.1172, "step": 5668 }, { "epoch": 0.12491805626711178, "grad_norm": 1.2328131198883057, "learning_rate": 2.929691450938416e-05, "loss": 0.1225, "step": 5669 }, { "epoch": 0.12494009155662794, "grad_norm": 0.8138203024864197, "learning_rate": 2.9296590562709465e-05, "loss": 0.1453, "step": 5670 }, { "epoch": 0.1249621268461441, "grad_norm": 1.3494631052017212, "learning_rate": 2.9296266543214565e-05, "loss": 0.0841, "step": 5671 }, { "epoch": 0.12498416213566026, "grad_norm": 1.1072945594787598, "learning_rate": 2.9295942450901113e-05, "loss": 0.11, "step": 5672 }, { "epoch": 0.1250061974251764, "grad_norm": 1.4290249347686768, "learning_rate": 2.9295618285770763e-05, "loss": 0.1788, "step": 5673 }, { "epoch": 0.12502823271469257, "grad_norm": 1.0755829811096191, "learning_rate": 2.929529404782516e-05, "loss": 0.1492, "step": 5674 }, { "epoch": 0.12505026800420874, "grad_norm": 0.8487015962600708, "learning_rate": 2.929496973706596e-05, "loss": 0.1415, "step": 5675 }, { "epoch": 0.1250723032937249, "grad_norm": 1.224932312965393, "learning_rate": 2.9294645353494817e-05, "loss": 0.1342, "step": 5676 }, { "epoch": 0.12509433858324107, "grad_norm": 0.7463926672935486, "learning_rate": 2.9294320897113377e-05, "loss": 0.1317, "step": 5677 }, { "epoch": 0.12511637387275723, "grad_norm": 1.078119158744812, "learning_rate": 2.9293996367923296e-05, "loss": 0.145, "step": 5678 }, { "epoch": 0.12513840916227337, "grad_norm": 1.0302300453186035, "learning_rate": 2.9293671765926223e-05, "loss": 0.1196, "step": 5679 }, { "epoch": 0.12516044445178953, "grad_norm": 0.8438450694084167, "learning_rate": 2.929334709112382e-05, "loss": 0.1212, "step": 5680 }, { "epoch": 0.1251824797413057, "grad_norm": 1.058343768119812, "learning_rate": 2.9293022343517734e-05, "loss": 0.1246, "step": 5681 }, { "epoch": 0.12520451503082186, "grad_norm": 1.7471708059310913, "learning_rate": 2.9292697523109618e-05, "loss": 0.1306, "step": 5682 }, { "epoch": 0.12522655032033803, "grad_norm": 0.7689395546913147, "learning_rate": 2.9292372629901134e-05, "loss": 0.1332, "step": 5683 }, { "epoch": 0.1252485856098542, "grad_norm": 0.8622406721115112, "learning_rate": 2.9292047663893926e-05, "loss": 0.1726, "step": 5684 }, { "epoch": 0.12527062089937036, "grad_norm": 1.173028826713562, "learning_rate": 2.9291722625089657e-05, "loss": 0.1262, "step": 5685 }, { "epoch": 0.1252926561888865, "grad_norm": 1.3466466665267944, "learning_rate": 2.9291397513489984e-05, "loss": 0.1203, "step": 5686 }, { "epoch": 0.12531469147840266, "grad_norm": 1.1921029090881348, "learning_rate": 2.9291072329096557e-05, "loss": 0.1501, "step": 5687 }, { "epoch": 0.12533672676791882, "grad_norm": 0.8782843351364136, "learning_rate": 2.929074707191104e-05, "loss": 0.127, "step": 5688 }, { "epoch": 0.12535876205743499, "grad_norm": 1.4010345935821533, "learning_rate": 2.9290421741935082e-05, "loss": 0.1715, "step": 5689 }, { "epoch": 0.12538079734695115, "grad_norm": 1.7873129844665527, "learning_rate": 2.929009633917034e-05, "loss": 0.1401, "step": 5690 }, { "epoch": 0.12540283263646732, "grad_norm": 0.6582939624786377, "learning_rate": 2.9289770863618476e-05, "loss": 0.1235, "step": 5691 }, { "epoch": 0.12542486792598345, "grad_norm": 1.3883877992630005, "learning_rate": 2.9289445315281144e-05, "loss": 0.0861, "step": 5692 }, { "epoch": 0.12544690321549962, "grad_norm": 0.9859576225280762, "learning_rate": 2.928911969416001e-05, "loss": 0.1121, "step": 5693 }, { "epoch": 0.12546893850501578, "grad_norm": 0.8333508968353271, "learning_rate": 2.9288794000256722e-05, "loss": 0.1492, "step": 5694 }, { "epoch": 0.12549097379453195, "grad_norm": 0.8884901404380798, "learning_rate": 2.928846823357295e-05, "loss": 0.1155, "step": 5695 }, { "epoch": 0.1255130090840481, "grad_norm": 1.1091753244400024, "learning_rate": 2.9288142394110342e-05, "loss": 0.14, "step": 5696 }, { "epoch": 0.12553504437356428, "grad_norm": 1.4187731742858887, "learning_rate": 2.928781648187056e-05, "loss": 0.1587, "step": 5697 }, { "epoch": 0.1255570796630804, "grad_norm": 1.2879048585891724, "learning_rate": 2.9287490496855273e-05, "loss": 0.1799, "step": 5698 }, { "epoch": 0.12557911495259658, "grad_norm": 0.7991167902946472, "learning_rate": 2.9287164439066133e-05, "loss": 0.1388, "step": 5699 }, { "epoch": 0.12560115024211274, "grad_norm": 0.9924678802490234, "learning_rate": 2.9286838308504805e-05, "loss": 0.1198, "step": 5700 }, { "epoch": 0.1256231855316289, "grad_norm": 1.0684043169021606, "learning_rate": 2.9286512105172948e-05, "loss": 0.1039, "step": 5701 }, { "epoch": 0.12564522082114507, "grad_norm": 2.0955493450164795, "learning_rate": 2.928618582907222e-05, "loss": 0.1358, "step": 5702 }, { "epoch": 0.12566725611066124, "grad_norm": 2.2263643741607666, "learning_rate": 2.9285859480204287e-05, "loss": 0.1537, "step": 5703 }, { "epoch": 0.12568929140017737, "grad_norm": 0.6635224223136902, "learning_rate": 2.9285533058570813e-05, "loss": 0.0917, "step": 5704 }, { "epoch": 0.12571132668969354, "grad_norm": 0.7638028860092163, "learning_rate": 2.928520656417346e-05, "loss": 0.1147, "step": 5705 }, { "epoch": 0.1257333619792097, "grad_norm": 1.1836273670196533, "learning_rate": 2.9284879997013887e-05, "loss": 0.1371, "step": 5706 }, { "epoch": 0.12575539726872587, "grad_norm": 0.9564293622970581, "learning_rate": 2.928455335709376e-05, "loss": 0.1653, "step": 5707 }, { "epoch": 0.12577743255824203, "grad_norm": 2.260915994644165, "learning_rate": 2.928422664441475e-05, "loss": 0.1408, "step": 5708 }, { "epoch": 0.1257994678477582, "grad_norm": 0.9398488998413086, "learning_rate": 2.9283899858978504e-05, "loss": 0.1506, "step": 5709 }, { "epoch": 0.12582150313727433, "grad_norm": 1.0152429342269897, "learning_rate": 2.92835730007867e-05, "loss": 0.1383, "step": 5710 }, { "epoch": 0.1258435384267905, "grad_norm": 0.987969696521759, "learning_rate": 2.9283246069841e-05, "loss": 0.1769, "step": 5711 }, { "epoch": 0.12586557371630666, "grad_norm": 0.7470666170120239, "learning_rate": 2.928291906614307e-05, "loss": 0.1358, "step": 5712 }, { "epoch": 0.12588760900582283, "grad_norm": 0.947406530380249, "learning_rate": 2.928259198969457e-05, "loss": 0.1199, "step": 5713 }, { "epoch": 0.125909644295339, "grad_norm": 1.5293251276016235, "learning_rate": 2.9282264840497174e-05, "loss": 0.1353, "step": 5714 }, { "epoch": 0.12593167958485516, "grad_norm": 1.0783333778381348, "learning_rate": 2.928193761855254e-05, "loss": 0.1273, "step": 5715 }, { "epoch": 0.1259537148743713, "grad_norm": 0.8338925838470459, "learning_rate": 2.928161032386234e-05, "loss": 0.1399, "step": 5716 }, { "epoch": 0.12597575016388746, "grad_norm": 0.9340003728866577, "learning_rate": 2.928128295642825e-05, "loss": 0.1374, "step": 5717 }, { "epoch": 0.12599778545340362, "grad_norm": 1.1928402185440063, "learning_rate": 2.9280955516251914e-05, "loss": 0.1074, "step": 5718 }, { "epoch": 0.12601982074291979, "grad_norm": 1.0286834239959717, "learning_rate": 2.928062800333502e-05, "loss": 0.1198, "step": 5719 }, { "epoch": 0.12604185603243595, "grad_norm": 1.2393803596496582, "learning_rate": 2.928030041767923e-05, "loss": 0.092, "step": 5720 }, { "epoch": 0.12606389132195212, "grad_norm": 1.5958542823791504, "learning_rate": 2.927997275928621e-05, "loss": 0.1142, "step": 5721 }, { "epoch": 0.12608592661146828, "grad_norm": 1.2169069051742554, "learning_rate": 2.9279645028157635e-05, "loss": 0.1697, "step": 5722 }, { "epoch": 0.12610796190098442, "grad_norm": 1.2753820419311523, "learning_rate": 2.9279317224295167e-05, "loss": 0.1109, "step": 5723 }, { "epoch": 0.12612999719050058, "grad_norm": 0.8901655673980713, "learning_rate": 2.9278989347700478e-05, "loss": 0.1596, "step": 5724 }, { "epoch": 0.12615203248001675, "grad_norm": 1.3847986459732056, "learning_rate": 2.9278661398375243e-05, "loss": 0.1684, "step": 5725 }, { "epoch": 0.1261740677695329, "grad_norm": 0.7918323874473572, "learning_rate": 2.9278333376321127e-05, "loss": 0.1108, "step": 5726 }, { "epoch": 0.12619610305904908, "grad_norm": 1.2906588315963745, "learning_rate": 2.9278005281539803e-05, "loss": 0.1213, "step": 5727 }, { "epoch": 0.12621813834856524, "grad_norm": 0.871727705001831, "learning_rate": 2.9277677114032942e-05, "loss": 0.112, "step": 5728 }, { "epoch": 0.12624017363808138, "grad_norm": 1.253167748451233, "learning_rate": 2.9277348873802214e-05, "loss": 0.1368, "step": 5729 }, { "epoch": 0.12626220892759754, "grad_norm": 1.0195902585983276, "learning_rate": 2.927702056084929e-05, "loss": 0.1375, "step": 5730 }, { "epoch": 0.1262842442171137, "grad_norm": 1.0493369102478027, "learning_rate": 2.9276692175175848e-05, "loss": 0.1387, "step": 5731 }, { "epoch": 0.12630627950662987, "grad_norm": 0.7152321338653564, "learning_rate": 2.9276363716783554e-05, "loss": 0.116, "step": 5732 }, { "epoch": 0.12632831479614604, "grad_norm": 0.7448320984840393, "learning_rate": 2.927603518567409e-05, "loss": 0.095, "step": 5733 }, { "epoch": 0.1263503500856622, "grad_norm": 0.9954557418823242, "learning_rate": 2.927570658184912e-05, "loss": 0.137, "step": 5734 }, { "epoch": 0.12637238537517834, "grad_norm": 0.6618568897247314, "learning_rate": 2.9275377905310316e-05, "loss": 0.1125, "step": 5735 }, { "epoch": 0.1263944206646945, "grad_norm": 1.2279772758483887, "learning_rate": 2.927504915605936e-05, "loss": 0.115, "step": 5736 }, { "epoch": 0.12641645595421067, "grad_norm": 0.93067467212677, "learning_rate": 2.9274720334097923e-05, "loss": 0.1298, "step": 5737 }, { "epoch": 0.12643849124372683, "grad_norm": 1.2875735759735107, "learning_rate": 2.9274391439427687e-05, "loss": 0.1551, "step": 5738 }, { "epoch": 0.126460526533243, "grad_norm": 1.327845811843872, "learning_rate": 2.9274062472050314e-05, "loss": 0.1537, "step": 5739 }, { "epoch": 0.12648256182275916, "grad_norm": 0.988480806350708, "learning_rate": 2.927373343196749e-05, "loss": 0.1271, "step": 5740 }, { "epoch": 0.1265045971122753, "grad_norm": 1.0186355113983154, "learning_rate": 2.9273404319180887e-05, "loss": 0.1369, "step": 5741 }, { "epoch": 0.12652663240179146, "grad_norm": 1.2932708263397217, "learning_rate": 2.927307513369218e-05, "loss": 0.1732, "step": 5742 }, { "epoch": 0.12654866769130763, "grad_norm": 1.1685181856155396, "learning_rate": 2.9272745875503048e-05, "loss": 0.0861, "step": 5743 }, { "epoch": 0.1265707029808238, "grad_norm": 1.0058932304382324, "learning_rate": 2.9272416544615173e-05, "loss": 0.1373, "step": 5744 }, { "epoch": 0.12659273827033996, "grad_norm": 1.4142422676086426, "learning_rate": 2.9272087141030218e-05, "loss": 0.091, "step": 5745 }, { "epoch": 0.12661477355985612, "grad_norm": 2.200241804122925, "learning_rate": 2.9271757664749877e-05, "loss": 0.1287, "step": 5746 }, { "epoch": 0.12663680884937226, "grad_norm": 1.4545090198516846, "learning_rate": 2.9271428115775816e-05, "loss": 0.1259, "step": 5747 }, { "epoch": 0.12665884413888842, "grad_norm": 1.1464005708694458, "learning_rate": 2.9271098494109724e-05, "loss": 0.1586, "step": 5748 }, { "epoch": 0.12668087942840459, "grad_norm": 1.255703330039978, "learning_rate": 2.9270768799753273e-05, "loss": 0.1198, "step": 5749 }, { "epoch": 0.12670291471792075, "grad_norm": 1.3522921800613403, "learning_rate": 2.9270439032708144e-05, "loss": 0.1268, "step": 5750 }, { "epoch": 0.12672495000743692, "grad_norm": 1.2127320766448975, "learning_rate": 2.9270109192976014e-05, "loss": 0.1846, "step": 5751 }, { "epoch": 0.12674698529695308, "grad_norm": 0.7546772360801697, "learning_rate": 2.9269779280558564e-05, "loss": 0.0926, "step": 5752 }, { "epoch": 0.12676902058646924, "grad_norm": 2.2403981685638428, "learning_rate": 2.926944929545748e-05, "loss": 0.1843, "step": 5753 }, { "epoch": 0.12679105587598538, "grad_norm": 2.0851268768310547, "learning_rate": 2.926911923767444e-05, "loss": 0.1592, "step": 5754 }, { "epoch": 0.12681309116550155, "grad_norm": 1.2873791456222534, "learning_rate": 2.9268789107211122e-05, "loss": 0.1232, "step": 5755 }, { "epoch": 0.1268351264550177, "grad_norm": 1.0116215944290161, "learning_rate": 2.9268458904069206e-05, "loss": 0.1005, "step": 5756 }, { "epoch": 0.12685716174453388, "grad_norm": 1.0407357215881348, "learning_rate": 2.9268128628250383e-05, "loss": 0.1616, "step": 5757 }, { "epoch": 0.12687919703405004, "grad_norm": 1.2113163471221924, "learning_rate": 2.9267798279756326e-05, "loss": 0.1441, "step": 5758 }, { "epoch": 0.1269012323235662, "grad_norm": 1.3974977731704712, "learning_rate": 2.9267467858588727e-05, "loss": 0.1331, "step": 5759 }, { "epoch": 0.12692326761308234, "grad_norm": 1.8438366651535034, "learning_rate": 2.9267137364749258e-05, "loss": 0.134, "step": 5760 }, { "epoch": 0.1269453029025985, "grad_norm": 0.9432473182678223, "learning_rate": 2.926680679823961e-05, "loss": 0.1591, "step": 5761 }, { "epoch": 0.12696733819211467, "grad_norm": 1.1971096992492676, "learning_rate": 2.926647615906146e-05, "loss": 0.1422, "step": 5762 }, { "epoch": 0.12698937348163084, "grad_norm": 0.7243394255638123, "learning_rate": 2.92661454472165e-05, "loss": 0.1279, "step": 5763 }, { "epoch": 0.127011408771147, "grad_norm": 1.8584766387939453, "learning_rate": 2.9265814662706413e-05, "loss": 0.1058, "step": 5764 }, { "epoch": 0.12703344406066316, "grad_norm": 1.7251906394958496, "learning_rate": 2.9265483805532876e-05, "loss": 0.1396, "step": 5765 }, { "epoch": 0.1270554793501793, "grad_norm": 1.0612179040908813, "learning_rate": 2.926515287569759e-05, "loss": 0.143, "step": 5766 }, { "epoch": 0.12707751463969547, "grad_norm": 0.8307333588600159, "learning_rate": 2.9264821873202222e-05, "loss": 0.1592, "step": 5767 }, { "epoch": 0.12709954992921163, "grad_norm": 1.1631062030792236, "learning_rate": 2.926449079804847e-05, "loss": 0.1416, "step": 5768 }, { "epoch": 0.1271215852187278, "grad_norm": 0.7619295716285706, "learning_rate": 2.926415965023802e-05, "loss": 0.131, "step": 5769 }, { "epoch": 0.12714362050824396, "grad_norm": 1.4801936149597168, "learning_rate": 2.9263828429772553e-05, "loss": 0.1154, "step": 5770 }, { "epoch": 0.12716565579776012, "grad_norm": 1.0932965278625488, "learning_rate": 2.9263497136653757e-05, "loss": 0.1416, "step": 5771 }, { "epoch": 0.12718769108727626, "grad_norm": 0.9679244160652161, "learning_rate": 2.9263165770883326e-05, "loss": 0.1397, "step": 5772 }, { "epoch": 0.12720972637679243, "grad_norm": 0.9011507034301758, "learning_rate": 2.926283433246294e-05, "loss": 0.1089, "step": 5773 }, { "epoch": 0.1272317616663086, "grad_norm": 0.997509241104126, "learning_rate": 2.9262502821394292e-05, "loss": 0.1488, "step": 5774 }, { "epoch": 0.12725379695582476, "grad_norm": 1.0262508392333984, "learning_rate": 2.9262171237679064e-05, "loss": 0.0797, "step": 5775 }, { "epoch": 0.12727583224534092, "grad_norm": 1.5319262742996216, "learning_rate": 2.9261839581318957e-05, "loss": 0.1045, "step": 5776 }, { "epoch": 0.12729786753485708, "grad_norm": 1.6120930910110474, "learning_rate": 2.926150785231565e-05, "loss": 0.1247, "step": 5777 }, { "epoch": 0.12731990282437322, "grad_norm": 1.8139655590057373, "learning_rate": 2.9261176050670837e-05, "loss": 0.1114, "step": 5778 }, { "epoch": 0.12734193811388939, "grad_norm": 1.0287113189697266, "learning_rate": 2.9260844176386202e-05, "loss": 0.1045, "step": 5779 }, { "epoch": 0.12736397340340555, "grad_norm": 1.264801263809204, "learning_rate": 2.9260512229463446e-05, "loss": 0.1284, "step": 5780 }, { "epoch": 0.12738600869292172, "grad_norm": 1.0585334300994873, "learning_rate": 2.9260180209904255e-05, "loss": 0.1153, "step": 5781 }, { "epoch": 0.12740804398243788, "grad_norm": 1.381146788597107, "learning_rate": 2.9259848117710317e-05, "loss": 0.1095, "step": 5782 }, { "epoch": 0.12743007927195404, "grad_norm": 0.9934626817703247, "learning_rate": 2.9259515952883322e-05, "loss": 0.1265, "step": 5783 }, { "epoch": 0.12745211456147018, "grad_norm": 1.309565782546997, "learning_rate": 2.9259183715424972e-05, "loss": 0.1325, "step": 5784 }, { "epoch": 0.12747414985098635, "grad_norm": 1.3131691217422485, "learning_rate": 2.925885140533695e-05, "loss": 0.115, "step": 5785 }, { "epoch": 0.1274961851405025, "grad_norm": 1.8481318950653076, "learning_rate": 2.925851902262095e-05, "loss": 0.1718, "step": 5786 }, { "epoch": 0.12751822043001868, "grad_norm": 0.8515716195106506, "learning_rate": 2.925818656727867e-05, "loss": 0.1094, "step": 5787 }, { "epoch": 0.12754025571953484, "grad_norm": 1.0083653926849365, "learning_rate": 2.92578540393118e-05, "loss": 0.1114, "step": 5788 }, { "epoch": 0.127562291009051, "grad_norm": 1.049816370010376, "learning_rate": 2.925752143872203e-05, "loss": 0.1509, "step": 5789 }, { "epoch": 0.12758432629856717, "grad_norm": 1.2684071063995361, "learning_rate": 2.925718876551106e-05, "loss": 0.135, "step": 5790 }, { "epoch": 0.1276063615880833, "grad_norm": 1.0540497303009033, "learning_rate": 2.925685601968058e-05, "loss": 0.1323, "step": 5791 }, { "epoch": 0.12762839687759947, "grad_norm": 1.2405973672866821, "learning_rate": 2.925652320123229e-05, "loss": 0.1399, "step": 5792 }, { "epoch": 0.12765043216711564, "grad_norm": 1.036934733390808, "learning_rate": 2.9256190310167882e-05, "loss": 0.1082, "step": 5793 }, { "epoch": 0.1276724674566318, "grad_norm": 1.206536889076233, "learning_rate": 2.9255857346489054e-05, "loss": 0.112, "step": 5794 }, { "epoch": 0.12769450274614796, "grad_norm": 1.5068050622940063, "learning_rate": 2.9255524310197496e-05, "loss": 0.1319, "step": 5795 }, { "epoch": 0.12771653803566413, "grad_norm": 1.0712189674377441, "learning_rate": 2.925519120129491e-05, "loss": 0.1048, "step": 5796 }, { "epoch": 0.12773857332518027, "grad_norm": 1.0335419178009033, "learning_rate": 2.925485801978299e-05, "loss": 0.0894, "step": 5797 }, { "epoch": 0.12776060861469643, "grad_norm": 0.944340169429779, "learning_rate": 2.9254524765663437e-05, "loss": 0.1351, "step": 5798 }, { "epoch": 0.1277826439042126, "grad_norm": 1.2645400762557983, "learning_rate": 2.9254191438937942e-05, "loss": 0.1217, "step": 5799 }, { "epoch": 0.12780467919372876, "grad_norm": 1.8656784296035767, "learning_rate": 2.925385803960821e-05, "loss": 0.1346, "step": 5800 }, { "epoch": 0.12782671448324492, "grad_norm": 1.1839561462402344, "learning_rate": 2.9253524567675934e-05, "loss": 0.1574, "step": 5801 }, { "epoch": 0.1278487497727611, "grad_norm": 0.8846469521522522, "learning_rate": 2.925319102314281e-05, "loss": 0.152, "step": 5802 }, { "epoch": 0.12787078506227723, "grad_norm": 0.9332731366157532, "learning_rate": 2.9252857406010547e-05, "loss": 0.1469, "step": 5803 }, { "epoch": 0.1278928203517934, "grad_norm": 1.3758395910263062, "learning_rate": 2.9252523716280833e-05, "loss": 0.1123, "step": 5804 }, { "epoch": 0.12791485564130955, "grad_norm": 0.7017694115638733, "learning_rate": 2.9252189953955374e-05, "loss": 0.1169, "step": 5805 }, { "epoch": 0.12793689093082572, "grad_norm": 1.2609367370605469, "learning_rate": 2.9251856119035874e-05, "loss": 0.1417, "step": 5806 }, { "epoch": 0.12795892622034188, "grad_norm": 1.2231332063674927, "learning_rate": 2.925152221152402e-05, "loss": 0.1389, "step": 5807 }, { "epoch": 0.12798096150985805, "grad_norm": 1.0724202394485474, "learning_rate": 2.9251188231421527e-05, "loss": 0.1116, "step": 5808 }, { "epoch": 0.12800299679937419, "grad_norm": 1.183199167251587, "learning_rate": 2.925085417873009e-05, "loss": 0.113, "step": 5809 }, { "epoch": 0.12802503208889035, "grad_norm": 1.1026400327682495, "learning_rate": 2.925052005345141e-05, "loss": 0.1626, "step": 5810 }, { "epoch": 0.12804706737840651, "grad_norm": 0.992868185043335, "learning_rate": 2.925018585558719e-05, "loss": 0.1061, "step": 5811 }, { "epoch": 0.12806910266792268, "grad_norm": 1.169004201889038, "learning_rate": 2.924985158513913e-05, "loss": 0.1444, "step": 5812 }, { "epoch": 0.12809113795743884, "grad_norm": 0.7153785824775696, "learning_rate": 2.9249517242108933e-05, "loss": 0.1007, "step": 5813 }, { "epoch": 0.128113173246955, "grad_norm": 0.8071415424346924, "learning_rate": 2.9249182826498312e-05, "loss": 0.091, "step": 5814 }, { "epoch": 0.12813520853647115, "grad_norm": 1.27409029006958, "learning_rate": 2.9248848338308952e-05, "loss": 0.1491, "step": 5815 }, { "epoch": 0.1281572438259873, "grad_norm": 1.1482073068618774, "learning_rate": 2.9248513777542573e-05, "loss": 0.0915, "step": 5816 }, { "epoch": 0.12817927911550347, "grad_norm": 1.4918644428253174, "learning_rate": 2.924817914420087e-05, "loss": 0.1304, "step": 5817 }, { "epoch": 0.12820131440501964, "grad_norm": 1.0766180753707886, "learning_rate": 2.924784443828555e-05, "loss": 0.1278, "step": 5818 }, { "epoch": 0.1282233496945358, "grad_norm": 1.0095897912979126, "learning_rate": 2.9247509659798315e-05, "loss": 0.1287, "step": 5819 }, { "epoch": 0.12824538498405197, "grad_norm": 1.0765299797058105, "learning_rate": 2.924717480874088e-05, "loss": 0.1217, "step": 5820 }, { "epoch": 0.1282674202735681, "grad_norm": 1.1418076753616333, "learning_rate": 2.924683988511494e-05, "loss": 0.1272, "step": 5821 }, { "epoch": 0.12828945556308427, "grad_norm": 1.1609970331192017, "learning_rate": 2.9246504888922206e-05, "loss": 0.1458, "step": 5822 }, { "epoch": 0.12831149085260043, "grad_norm": 1.1936451196670532, "learning_rate": 2.924616982016438e-05, "loss": 0.1044, "step": 5823 }, { "epoch": 0.1283335261421166, "grad_norm": 1.2599093914031982, "learning_rate": 2.924583467884317e-05, "loss": 0.1148, "step": 5824 }, { "epoch": 0.12835556143163276, "grad_norm": 1.1985105276107788, "learning_rate": 2.9245499464960295e-05, "loss": 0.14, "step": 5825 }, { "epoch": 0.12837759672114893, "grad_norm": 1.0002671480178833, "learning_rate": 2.9245164178517446e-05, "loss": 0.103, "step": 5826 }, { "epoch": 0.1283996320106651, "grad_norm": 1.2503489255905151, "learning_rate": 2.9244828819516335e-05, "loss": 0.0946, "step": 5827 }, { "epoch": 0.12842166730018123, "grad_norm": 0.9613163471221924, "learning_rate": 2.9244493387958673e-05, "loss": 0.1113, "step": 5828 }, { "epoch": 0.1284437025896974, "grad_norm": 1.5005741119384766, "learning_rate": 2.9244157883846167e-05, "loss": 0.1218, "step": 5829 }, { "epoch": 0.12846573787921356, "grad_norm": 0.7436609268188477, "learning_rate": 2.924382230718053e-05, "loss": 0.1116, "step": 5830 }, { "epoch": 0.12848777316872972, "grad_norm": 1.1668208837509155, "learning_rate": 2.9243486657963464e-05, "loss": 0.1181, "step": 5831 }, { "epoch": 0.1285098084582459, "grad_norm": 1.5828884840011597, "learning_rate": 2.9243150936196685e-05, "loss": 0.1379, "step": 5832 }, { "epoch": 0.12853184374776205, "grad_norm": 0.8887385129928589, "learning_rate": 2.9242815141881896e-05, "loss": 0.1118, "step": 5833 }, { "epoch": 0.1285538790372782, "grad_norm": 1.2672237157821655, "learning_rate": 2.9242479275020823e-05, "loss": 0.1922, "step": 5834 }, { "epoch": 0.12857591432679435, "grad_norm": 1.1805082559585571, "learning_rate": 2.9242143335615155e-05, "loss": 0.1084, "step": 5835 }, { "epoch": 0.12859794961631052, "grad_norm": 0.9897634983062744, "learning_rate": 2.9241807323666615e-05, "loss": 0.1212, "step": 5836 }, { "epoch": 0.12861998490582668, "grad_norm": 1.4044498205184937, "learning_rate": 2.924147123917692e-05, "loss": 0.111, "step": 5837 }, { "epoch": 0.12864202019534285, "grad_norm": 0.8331968784332275, "learning_rate": 2.924113508214777e-05, "loss": 0.1382, "step": 5838 }, { "epoch": 0.128664055484859, "grad_norm": 0.9093057513237, "learning_rate": 2.9240798852580882e-05, "loss": 0.0867, "step": 5839 }, { "epoch": 0.12868609077437515, "grad_norm": 1.1528843641281128, "learning_rate": 2.924046255047797e-05, "loss": 0.1061, "step": 5840 }, { "epoch": 0.12870812606389131, "grad_norm": 1.232616662979126, "learning_rate": 2.9240126175840746e-05, "loss": 0.1267, "step": 5841 }, { "epoch": 0.12873016135340748, "grad_norm": 0.9755323529243469, "learning_rate": 2.9239789728670922e-05, "loss": 0.0935, "step": 5842 }, { "epoch": 0.12875219664292364, "grad_norm": 0.8731821775436401, "learning_rate": 2.923945320897021e-05, "loss": 0.1045, "step": 5843 }, { "epoch": 0.1287742319324398, "grad_norm": 0.8520114421844482, "learning_rate": 2.9239116616740335e-05, "loss": 0.1324, "step": 5844 }, { "epoch": 0.12879626722195597, "grad_norm": 1.0304361581802368, "learning_rate": 2.9238779951982997e-05, "loss": 0.117, "step": 5845 }, { "epoch": 0.1288183025114721, "grad_norm": 1.1827235221862793, "learning_rate": 2.923844321469992e-05, "loss": 0.1289, "step": 5846 }, { "epoch": 0.12884033780098827, "grad_norm": 1.5250794887542725, "learning_rate": 2.9238106404892815e-05, "loss": 0.1215, "step": 5847 }, { "epoch": 0.12886237309050444, "grad_norm": 1.1617107391357422, "learning_rate": 2.9237769522563402e-05, "loss": 0.1272, "step": 5848 }, { "epoch": 0.1288844083800206, "grad_norm": 1.1001701354980469, "learning_rate": 2.9237432567713394e-05, "loss": 0.1312, "step": 5849 }, { "epoch": 0.12890644366953677, "grad_norm": 1.112402319908142, "learning_rate": 2.92370955403445e-05, "loss": 0.1166, "step": 5850 }, { "epoch": 0.12892847895905293, "grad_norm": 1.2562283277511597, "learning_rate": 2.9236758440458453e-05, "loss": 0.1223, "step": 5851 }, { "epoch": 0.12895051424856907, "grad_norm": 1.4831336736679077, "learning_rate": 2.9236421268056957e-05, "loss": 0.1512, "step": 5852 }, { "epoch": 0.12897254953808523, "grad_norm": 1.0951642990112305, "learning_rate": 2.923608402314173e-05, "loss": 0.1433, "step": 5853 }, { "epoch": 0.1289945848276014, "grad_norm": 0.7763475179672241, "learning_rate": 2.92357467057145e-05, "loss": 0.1477, "step": 5854 }, { "epoch": 0.12901662011711756, "grad_norm": 1.0169389247894287, "learning_rate": 2.9235409315776972e-05, "loss": 0.0987, "step": 5855 }, { "epoch": 0.12903865540663373, "grad_norm": 1.548058271408081, "learning_rate": 2.9235071853330872e-05, "loss": 0.1229, "step": 5856 }, { "epoch": 0.1290606906961499, "grad_norm": 1.1453660726547241, "learning_rate": 2.923473431837792e-05, "loss": 0.1204, "step": 5857 }, { "epoch": 0.12908272598566606, "grad_norm": 1.349799394607544, "learning_rate": 2.9234396710919833e-05, "loss": 0.1686, "step": 5858 }, { "epoch": 0.1291047612751822, "grad_norm": 1.6684367656707764, "learning_rate": 2.9234059030958325e-05, "loss": 0.1126, "step": 5859 }, { "epoch": 0.12912679656469836, "grad_norm": 1.0186854600906372, "learning_rate": 2.9233721278495128e-05, "loss": 0.1366, "step": 5860 }, { "epoch": 0.12914883185421452, "grad_norm": 0.9391649961471558, "learning_rate": 2.923338345353195e-05, "loss": 0.1037, "step": 5861 }, { "epoch": 0.1291708671437307, "grad_norm": 1.4798964262008667, "learning_rate": 2.9233045556070526e-05, "loss": 0.1356, "step": 5862 }, { "epoch": 0.12919290243324685, "grad_norm": 0.8362581729888916, "learning_rate": 2.923270758611256e-05, "loss": 0.1371, "step": 5863 }, { "epoch": 0.12921493772276302, "grad_norm": 1.4378193616867065, "learning_rate": 2.9232369543659786e-05, "loss": 0.1452, "step": 5864 }, { "epoch": 0.12923697301227915, "grad_norm": 1.1898746490478516, "learning_rate": 2.9232031428713924e-05, "loss": 0.112, "step": 5865 }, { "epoch": 0.12925900830179532, "grad_norm": 1.291774868965149, "learning_rate": 2.923169324127669e-05, "loss": 0.1564, "step": 5866 }, { "epoch": 0.12928104359131148, "grad_norm": 0.9212402105331421, "learning_rate": 2.9231354981349815e-05, "loss": 0.1276, "step": 5867 }, { "epoch": 0.12930307888082765, "grad_norm": 1.204946517944336, "learning_rate": 2.9231016648935013e-05, "loss": 0.1177, "step": 5868 }, { "epoch": 0.1293251141703438, "grad_norm": 1.0905722379684448, "learning_rate": 2.9230678244034015e-05, "loss": 0.1376, "step": 5869 }, { "epoch": 0.12934714945985998, "grad_norm": 1.126157522201538, "learning_rate": 2.923033976664854e-05, "loss": 0.1376, "step": 5870 }, { "epoch": 0.12936918474937611, "grad_norm": 0.9898471832275391, "learning_rate": 2.923000121678031e-05, "loss": 0.123, "step": 5871 }, { "epoch": 0.12939122003889228, "grad_norm": 1.2184613943099976, "learning_rate": 2.922966259443106e-05, "loss": 0.1471, "step": 5872 }, { "epoch": 0.12941325532840844, "grad_norm": 1.3957921266555786, "learning_rate": 2.9229323899602503e-05, "loss": 0.1301, "step": 5873 }, { "epoch": 0.1294352906179246, "grad_norm": 0.9624553918838501, "learning_rate": 2.922898513229637e-05, "loss": 0.1193, "step": 5874 }, { "epoch": 0.12945732590744077, "grad_norm": 1.577324390411377, "learning_rate": 2.922864629251439e-05, "loss": 0.1436, "step": 5875 }, { "epoch": 0.12947936119695694, "grad_norm": 1.0870720148086548, "learning_rate": 2.922830738025828e-05, "loss": 0.1309, "step": 5876 }, { "epoch": 0.12950139648647307, "grad_norm": 1.1639034748077393, "learning_rate": 2.922796839552977e-05, "loss": 0.1834, "step": 5877 }, { "epoch": 0.12952343177598924, "grad_norm": 0.9492626190185547, "learning_rate": 2.922762933833059e-05, "loss": 0.1445, "step": 5878 }, { "epoch": 0.1295454670655054, "grad_norm": 1.1081258058547974, "learning_rate": 2.9227290208662463e-05, "loss": 0.1392, "step": 5879 }, { "epoch": 0.12956750235502157, "grad_norm": 0.8587021231651306, "learning_rate": 2.922695100652712e-05, "loss": 0.1208, "step": 5880 }, { "epoch": 0.12958953764453773, "grad_norm": 1.0535175800323486, "learning_rate": 2.9226611731926285e-05, "loss": 0.1498, "step": 5881 }, { "epoch": 0.1296115729340539, "grad_norm": 0.9009025692939758, "learning_rate": 2.9226272384861686e-05, "loss": 0.1472, "step": 5882 }, { "epoch": 0.12963360822357003, "grad_norm": 1.1537960767745972, "learning_rate": 2.9225932965335052e-05, "loss": 0.149, "step": 5883 }, { "epoch": 0.1296556435130862, "grad_norm": 1.6272876262664795, "learning_rate": 2.9225593473348118e-05, "loss": 0.1745, "step": 5884 }, { "epoch": 0.12967767880260236, "grad_norm": 0.9539704918861389, "learning_rate": 2.9225253908902603e-05, "loss": 0.1233, "step": 5885 }, { "epoch": 0.12969971409211853, "grad_norm": 1.3122925758361816, "learning_rate": 2.9224914272000244e-05, "loss": 0.1249, "step": 5886 }, { "epoch": 0.1297217493816347, "grad_norm": 1.0691721439361572, "learning_rate": 2.922457456264277e-05, "loss": 0.1809, "step": 5887 }, { "epoch": 0.12974378467115086, "grad_norm": 0.9620074033737183, "learning_rate": 2.9224234780831905e-05, "loss": 0.1256, "step": 5888 }, { "epoch": 0.129765819960667, "grad_norm": 0.7059027552604675, "learning_rate": 2.922389492656939e-05, "loss": 0.0947, "step": 5889 }, { "epoch": 0.12978785525018316, "grad_norm": 1.2395130395889282, "learning_rate": 2.9223554999856946e-05, "loss": 0.1025, "step": 5890 }, { "epoch": 0.12980989053969932, "grad_norm": 1.145582675933838, "learning_rate": 2.922321500069631e-05, "loss": 0.083, "step": 5891 }, { "epoch": 0.1298319258292155, "grad_norm": 1.295172929763794, "learning_rate": 2.922287492908922e-05, "loss": 0.1042, "step": 5892 }, { "epoch": 0.12985396111873165, "grad_norm": 1.2926095724105835, "learning_rate": 2.9222534785037393e-05, "loss": 0.0926, "step": 5893 }, { "epoch": 0.12987599640824782, "grad_norm": 0.7959326505661011, "learning_rate": 2.922219456854257e-05, "loss": 0.1174, "step": 5894 }, { "epoch": 0.12989803169776398, "grad_norm": 1.3249973058700562, "learning_rate": 2.9221854279606487e-05, "loss": 0.1066, "step": 5895 }, { "epoch": 0.12992006698728012, "grad_norm": 0.9294772148132324, "learning_rate": 2.922151391823087e-05, "loss": 0.1222, "step": 5896 }, { "epoch": 0.12994210227679628, "grad_norm": 1.115046501159668, "learning_rate": 2.922117348441746e-05, "loss": 0.1331, "step": 5897 }, { "epoch": 0.12996413756631245, "grad_norm": 1.3245108127593994, "learning_rate": 2.9220832978167987e-05, "loss": 0.1524, "step": 5898 }, { "epoch": 0.1299861728558286, "grad_norm": 0.6428738832473755, "learning_rate": 2.9220492399484188e-05, "loss": 0.097, "step": 5899 }, { "epoch": 0.13000820814534478, "grad_norm": 1.150499939918518, "learning_rate": 2.9220151748367793e-05, "loss": 0.1295, "step": 5900 }, { "epoch": 0.13003024343486094, "grad_norm": 1.1581597328186035, "learning_rate": 2.9219811024820536e-05, "loss": 0.1316, "step": 5901 }, { "epoch": 0.13005227872437708, "grad_norm": 1.6447521448135376, "learning_rate": 2.9219470228844164e-05, "loss": 0.1562, "step": 5902 }, { "epoch": 0.13007431401389324, "grad_norm": 1.3723315000534058, "learning_rate": 2.92191293604404e-05, "loss": 0.1302, "step": 5903 }, { "epoch": 0.1300963493034094, "grad_norm": 1.0405099391937256, "learning_rate": 2.9218788419610984e-05, "loss": 0.0882, "step": 5904 }, { "epoch": 0.13011838459292557, "grad_norm": 2.054518222808838, "learning_rate": 2.9218447406357656e-05, "loss": 0.1489, "step": 5905 }, { "epoch": 0.13014041988244174, "grad_norm": 1.1495839357376099, "learning_rate": 2.921810632068215e-05, "loss": 0.1516, "step": 5906 }, { "epoch": 0.1301624551719579, "grad_norm": 1.3869301080703735, "learning_rate": 2.9217765162586207e-05, "loss": 0.1946, "step": 5907 }, { "epoch": 0.13018449046147404, "grad_norm": 1.436948537826538, "learning_rate": 2.9217423932071557e-05, "loss": 0.1441, "step": 5908 }, { "epoch": 0.1302065257509902, "grad_norm": 0.885657787322998, "learning_rate": 2.9217082629139945e-05, "loss": 0.0965, "step": 5909 }, { "epoch": 0.13022856104050637, "grad_norm": 0.9455963969230652, "learning_rate": 2.9216741253793107e-05, "loss": 0.0884, "step": 5910 }, { "epoch": 0.13025059633002253, "grad_norm": 1.0614734888076782, "learning_rate": 2.9216399806032783e-05, "loss": 0.1025, "step": 5911 }, { "epoch": 0.1302726316195387, "grad_norm": 1.070999026298523, "learning_rate": 2.9216058285860712e-05, "loss": 0.1334, "step": 5912 }, { "epoch": 0.13029466690905486, "grad_norm": 1.203698992729187, "learning_rate": 2.921571669327863e-05, "loss": 0.1272, "step": 5913 }, { "epoch": 0.130316702198571, "grad_norm": 1.0249032974243164, "learning_rate": 2.9215375028288285e-05, "loss": 0.1429, "step": 5914 }, { "epoch": 0.13033873748808716, "grad_norm": 0.9428771734237671, "learning_rate": 2.921503329089141e-05, "loss": 0.1021, "step": 5915 }, { "epoch": 0.13036077277760333, "grad_norm": 0.867654025554657, "learning_rate": 2.921469148108975e-05, "loss": 0.1112, "step": 5916 }, { "epoch": 0.1303828080671195, "grad_norm": 1.1452288627624512, "learning_rate": 2.9214349598885036e-05, "loss": 0.1293, "step": 5917 }, { "epoch": 0.13040484335663566, "grad_norm": 1.0686986446380615, "learning_rate": 2.9214007644279024e-05, "loss": 0.1635, "step": 5918 }, { "epoch": 0.13042687864615182, "grad_norm": 1.1061900854110718, "learning_rate": 2.9213665617273446e-05, "loss": 0.0665, "step": 5919 }, { "epoch": 0.13044891393566796, "grad_norm": 0.778343915939331, "learning_rate": 2.921332351787005e-05, "loss": 0.1465, "step": 5920 }, { "epoch": 0.13047094922518412, "grad_norm": 1.185791254043579, "learning_rate": 2.9212981346070572e-05, "loss": 0.1718, "step": 5921 }, { "epoch": 0.1304929845147003, "grad_norm": 1.3136039972305298, "learning_rate": 2.9212639101876758e-05, "loss": 0.1888, "step": 5922 }, { "epoch": 0.13051501980421645, "grad_norm": 0.9985994100570679, "learning_rate": 2.9212296785290355e-05, "loss": 0.1231, "step": 5923 }, { "epoch": 0.13053705509373262, "grad_norm": 0.8685779571533203, "learning_rate": 2.92119543963131e-05, "loss": 0.15, "step": 5924 }, { "epoch": 0.13055909038324878, "grad_norm": 1.09617018699646, "learning_rate": 2.921161193494674e-05, "loss": 0.1465, "step": 5925 }, { "epoch": 0.13058112567276492, "grad_norm": 0.8470308780670166, "learning_rate": 2.9211269401193023e-05, "loss": 0.1013, "step": 5926 }, { "epoch": 0.13060316096228108, "grad_norm": 1.3142648935317993, "learning_rate": 2.921092679505369e-05, "loss": 0.1237, "step": 5927 }, { "epoch": 0.13062519625179725, "grad_norm": 0.808910608291626, "learning_rate": 2.9210584116530484e-05, "loss": 0.1466, "step": 5928 }, { "epoch": 0.1306472315413134, "grad_norm": 0.9539929032325745, "learning_rate": 2.9210241365625156e-05, "loss": 0.0963, "step": 5929 }, { "epoch": 0.13066926683082958, "grad_norm": 1.090851068496704, "learning_rate": 2.9209898542339446e-05, "loss": 0.1431, "step": 5930 }, { "epoch": 0.13069130212034574, "grad_norm": 1.5495809316635132, "learning_rate": 2.92095556466751e-05, "loss": 0.161, "step": 5931 }, { "epoch": 0.1307133374098619, "grad_norm": 1.5214673280715942, "learning_rate": 2.920921267863387e-05, "loss": 0.109, "step": 5932 }, { "epoch": 0.13073537269937804, "grad_norm": 1.350699782371521, "learning_rate": 2.9208869638217502e-05, "loss": 0.1238, "step": 5933 }, { "epoch": 0.1307574079888942, "grad_norm": 1.3571014404296875, "learning_rate": 2.920852652542774e-05, "loss": 0.1805, "step": 5934 }, { "epoch": 0.13077944327841037, "grad_norm": 0.9466465711593628, "learning_rate": 2.9208183340266335e-05, "loss": 0.116, "step": 5935 }, { "epoch": 0.13080147856792654, "grad_norm": 0.8075543642044067, "learning_rate": 2.9207840082735032e-05, "loss": 0.1224, "step": 5936 }, { "epoch": 0.1308235138574427, "grad_norm": 1.2905131578445435, "learning_rate": 2.9207496752835582e-05, "loss": 0.1956, "step": 5937 }, { "epoch": 0.13084554914695887, "grad_norm": 0.8154808282852173, "learning_rate": 2.9207153350569727e-05, "loss": 0.118, "step": 5938 }, { "epoch": 0.130867584436475, "grad_norm": 0.954849123954773, "learning_rate": 2.9206809875939228e-05, "loss": 0.1243, "step": 5939 }, { "epoch": 0.13088961972599117, "grad_norm": 0.9978972673416138, "learning_rate": 2.9206466328945824e-05, "loss": 0.1185, "step": 5940 }, { "epoch": 0.13091165501550733, "grad_norm": 0.883063018321991, "learning_rate": 2.920612270959127e-05, "loss": 0.0792, "step": 5941 }, { "epoch": 0.1309336903050235, "grad_norm": 0.8686195015907288, "learning_rate": 2.9205779017877315e-05, "loss": 0.0998, "step": 5942 }, { "epoch": 0.13095572559453966, "grad_norm": 1.3799524307250977, "learning_rate": 2.920543525380571e-05, "loss": 0.1419, "step": 5943 }, { "epoch": 0.13097776088405583, "grad_norm": 1.3339033126831055, "learning_rate": 2.9205091417378203e-05, "loss": 0.1129, "step": 5944 }, { "epoch": 0.13099979617357196, "grad_norm": 1.3303344249725342, "learning_rate": 2.920474750859655e-05, "loss": 0.1507, "step": 5945 }, { "epoch": 0.13102183146308813, "grad_norm": 1.6230193376541138, "learning_rate": 2.9204403527462503e-05, "loss": 0.1262, "step": 5946 }, { "epoch": 0.1310438667526043, "grad_norm": 0.9430950880050659, "learning_rate": 2.9204059473977813e-05, "loss": 0.1745, "step": 5947 }, { "epoch": 0.13106590204212046, "grad_norm": 1.211592674255371, "learning_rate": 2.9203715348144227e-05, "loss": 0.1436, "step": 5948 }, { "epoch": 0.13108793733163662, "grad_norm": 1.0167243480682373, "learning_rate": 2.92033711499635e-05, "loss": 0.1174, "step": 5949 }, { "epoch": 0.1311099726211528, "grad_norm": 1.0945956707000732, "learning_rate": 2.920302687943739e-05, "loss": 0.1417, "step": 5950 }, { "epoch": 0.13113200791066892, "grad_norm": 1.050209403038025, "learning_rate": 2.9202682536567652e-05, "loss": 0.1213, "step": 5951 }, { "epoch": 0.1311540432001851, "grad_norm": 1.9382152557373047, "learning_rate": 2.920233812135603e-05, "loss": 0.1316, "step": 5952 }, { "epoch": 0.13117607848970125, "grad_norm": 1.4416325092315674, "learning_rate": 2.920199363380429e-05, "loss": 0.1224, "step": 5953 }, { "epoch": 0.13119811377921742, "grad_norm": 0.9096290469169617, "learning_rate": 2.9201649073914174e-05, "loss": 0.1021, "step": 5954 }, { "epoch": 0.13122014906873358, "grad_norm": 0.9010407328605652, "learning_rate": 2.920130444168745e-05, "loss": 0.1162, "step": 5955 }, { "epoch": 0.13124218435824975, "grad_norm": 2.0562779903411865, "learning_rate": 2.920095973712586e-05, "loss": 0.1726, "step": 5956 }, { "epoch": 0.13126421964776588, "grad_norm": 1.4551479816436768, "learning_rate": 2.9200614960231174e-05, "loss": 0.1476, "step": 5957 }, { "epoch": 0.13128625493728205, "grad_norm": 1.307591199874878, "learning_rate": 2.920027011100514e-05, "loss": 0.1437, "step": 5958 }, { "epoch": 0.1313082902267982, "grad_norm": 1.5841641426086426, "learning_rate": 2.9199925189449506e-05, "loss": 0.1537, "step": 5959 }, { "epoch": 0.13133032551631438, "grad_norm": 1.6225767135620117, "learning_rate": 2.919958019556605e-05, "loss": 0.1361, "step": 5960 }, { "epoch": 0.13135236080583054, "grad_norm": 0.636983335018158, "learning_rate": 2.9199235129356513e-05, "loss": 0.1308, "step": 5961 }, { "epoch": 0.1313743960953467, "grad_norm": 1.0540623664855957, "learning_rate": 2.9198889990822657e-05, "loss": 0.1207, "step": 5962 }, { "epoch": 0.13139643138486284, "grad_norm": 0.8513150811195374, "learning_rate": 2.919854477996624e-05, "loss": 0.1281, "step": 5963 }, { "epoch": 0.131418466674379, "grad_norm": 1.163131833076477, "learning_rate": 2.9198199496789023e-05, "loss": 0.1125, "step": 5964 }, { "epoch": 0.13144050196389517, "grad_norm": 1.048293113708496, "learning_rate": 2.9197854141292764e-05, "loss": 0.1358, "step": 5965 }, { "epoch": 0.13146253725341134, "grad_norm": 1.3085670471191406, "learning_rate": 2.9197508713479214e-05, "loss": 0.1148, "step": 5966 }, { "epoch": 0.1314845725429275, "grad_norm": 0.8382241725921631, "learning_rate": 2.9197163213350145e-05, "loss": 0.0777, "step": 5967 }, { "epoch": 0.13150660783244367, "grad_norm": 0.9749868512153625, "learning_rate": 2.91968176409073e-05, "loss": 0.1545, "step": 5968 }, { "epoch": 0.13152864312195983, "grad_norm": 1.4259803295135498, "learning_rate": 2.9196471996152462e-05, "loss": 0.097, "step": 5969 }, { "epoch": 0.13155067841147597, "grad_norm": 1.0435357093811035, "learning_rate": 2.9196126279087376e-05, "loss": 0.1173, "step": 5970 }, { "epoch": 0.13157271370099213, "grad_norm": 1.4690996408462524, "learning_rate": 2.9195780489713804e-05, "loss": 0.1046, "step": 5971 }, { "epoch": 0.1315947489905083, "grad_norm": 1.0907726287841797, "learning_rate": 2.9195434628033512e-05, "loss": 0.1173, "step": 5972 }, { "epoch": 0.13161678428002446, "grad_norm": 0.8675603270530701, "learning_rate": 2.9195088694048255e-05, "loss": 0.1166, "step": 5973 }, { "epoch": 0.13163881956954063, "grad_norm": 0.7621210217475891, "learning_rate": 2.9194742687759804e-05, "loss": 0.1559, "step": 5974 }, { "epoch": 0.1316608548590568, "grad_norm": 1.4736299514770508, "learning_rate": 2.9194396609169916e-05, "loss": 0.185, "step": 5975 }, { "epoch": 0.13168289014857293, "grad_norm": 1.1191946268081665, "learning_rate": 2.919405045828035e-05, "loss": 0.0976, "step": 5976 }, { "epoch": 0.1317049254380891, "grad_norm": 1.2170902490615845, "learning_rate": 2.919370423509288e-05, "loss": 0.1438, "step": 5977 }, { "epoch": 0.13172696072760526, "grad_norm": 1.5709798336029053, "learning_rate": 2.9193357939609257e-05, "loss": 0.1814, "step": 5978 }, { "epoch": 0.13174899601712142, "grad_norm": 2.0030059814453125, "learning_rate": 2.9193011571831255e-05, "loss": 0.1615, "step": 5979 }, { "epoch": 0.1317710313066376, "grad_norm": 1.3708871603012085, "learning_rate": 2.9192665131760634e-05, "loss": 0.1593, "step": 5980 }, { "epoch": 0.13179306659615375, "grad_norm": 1.2625528573989868, "learning_rate": 2.919231861939916e-05, "loss": 0.1362, "step": 5981 }, { "epoch": 0.1318151018856699, "grad_norm": 0.8424473404884338, "learning_rate": 2.9191972034748596e-05, "loss": 0.1216, "step": 5982 }, { "epoch": 0.13183713717518605, "grad_norm": 1.7158889770507812, "learning_rate": 2.9191625377810703e-05, "loss": 0.1079, "step": 5983 }, { "epoch": 0.13185917246470222, "grad_norm": 1.612646222114563, "learning_rate": 2.9191278648587256e-05, "loss": 0.0964, "step": 5984 }, { "epoch": 0.13188120775421838, "grad_norm": 1.678295373916626, "learning_rate": 2.9190931847080015e-05, "loss": 0.1372, "step": 5985 }, { "epoch": 0.13190324304373455, "grad_norm": 1.5071951150894165, "learning_rate": 2.919058497329075e-05, "loss": 0.178, "step": 5986 }, { "epoch": 0.1319252783332507, "grad_norm": 1.4490852355957031, "learning_rate": 2.9190238027221228e-05, "loss": 0.1276, "step": 5987 }, { "epoch": 0.13194731362276685, "grad_norm": 1.0426652431488037, "learning_rate": 2.9189891008873214e-05, "loss": 0.1885, "step": 5988 }, { "epoch": 0.131969348912283, "grad_norm": 1.3155231475830078, "learning_rate": 2.9189543918248472e-05, "loss": 0.159, "step": 5989 }, { "epoch": 0.13199138420179918, "grad_norm": 1.4327499866485596, "learning_rate": 2.9189196755348777e-05, "loss": 0.141, "step": 5990 }, { "epoch": 0.13201341949131534, "grad_norm": 1.1656585931777954, "learning_rate": 2.9188849520175887e-05, "loss": 0.1346, "step": 5991 }, { "epoch": 0.1320354547808315, "grad_norm": 1.145922303199768, "learning_rate": 2.9188502212731585e-05, "loss": 0.1257, "step": 5992 }, { "epoch": 0.13205749007034767, "grad_norm": 1.2929233312606812, "learning_rate": 2.9188154833017636e-05, "loss": 0.1143, "step": 5993 }, { "epoch": 0.1320795253598638, "grad_norm": 0.8638651967048645, "learning_rate": 2.9187807381035797e-05, "loss": 0.1153, "step": 5994 }, { "epoch": 0.13210156064937997, "grad_norm": 1.1595427989959717, "learning_rate": 2.9187459856787852e-05, "loss": 0.1582, "step": 5995 }, { "epoch": 0.13212359593889614, "grad_norm": 0.9697310328483582, "learning_rate": 2.9187112260275566e-05, "loss": 0.1145, "step": 5996 }, { "epoch": 0.1321456312284123, "grad_norm": 1.2796632051467896, "learning_rate": 2.918676459150071e-05, "loss": 0.1207, "step": 5997 }, { "epoch": 0.13216766651792847, "grad_norm": 1.1531363725662231, "learning_rate": 2.9186416850465053e-05, "loss": 0.1106, "step": 5998 }, { "epoch": 0.13218970180744463, "grad_norm": 1.197468876838684, "learning_rate": 2.9186069037170367e-05, "loss": 0.1724, "step": 5999 }, { "epoch": 0.1322117370969608, "grad_norm": 1.417424201965332, "learning_rate": 2.9185721151618425e-05, "loss": 0.1467, "step": 6000 }, { "epoch": 0.13223377238647693, "grad_norm": 1.4845900535583496, "learning_rate": 2.9185373193810997e-05, "loss": 0.1593, "step": 6001 }, { "epoch": 0.1322558076759931, "grad_norm": 1.6718841791152954, "learning_rate": 2.918502516374986e-05, "loss": 0.117, "step": 6002 }, { "epoch": 0.13227784296550926, "grad_norm": 0.8864200115203857, "learning_rate": 2.918467706143678e-05, "loss": 0.1542, "step": 6003 }, { "epoch": 0.13229987825502543, "grad_norm": 1.1100860834121704, "learning_rate": 2.9184328886873536e-05, "loss": 0.136, "step": 6004 }, { "epoch": 0.1323219135445416, "grad_norm": 1.1844383478164673, "learning_rate": 2.91839806400619e-05, "loss": 0.122, "step": 6005 }, { "epoch": 0.13234394883405776, "grad_norm": 1.0197833776474, "learning_rate": 2.9183632321003637e-05, "loss": 0.1246, "step": 6006 }, { "epoch": 0.1323659841235739, "grad_norm": 1.1767524480819702, "learning_rate": 2.9183283929700536e-05, "loss": 0.1227, "step": 6007 }, { "epoch": 0.13238801941309006, "grad_norm": 0.781581461429596, "learning_rate": 2.918293546615436e-05, "loss": 0.1036, "step": 6008 }, { "epoch": 0.13241005470260622, "grad_norm": 1.2185508012771606, "learning_rate": 2.9182586930366894e-05, "loss": 0.1377, "step": 6009 }, { "epoch": 0.1324320899921224, "grad_norm": 1.5265729427337646, "learning_rate": 2.91822383223399e-05, "loss": 0.1948, "step": 6010 }, { "epoch": 0.13245412528163855, "grad_norm": 1.167432427406311, "learning_rate": 2.9181889642075164e-05, "loss": 0.1542, "step": 6011 }, { "epoch": 0.13247616057115472, "grad_norm": 1.5404200553894043, "learning_rate": 2.918154088957446e-05, "loss": 0.1103, "step": 6012 }, { "epoch": 0.13249819586067085, "grad_norm": 1.1935381889343262, "learning_rate": 2.9181192064839565e-05, "loss": 0.1466, "step": 6013 }, { "epoch": 0.13252023115018702, "grad_norm": 1.4326393604278564, "learning_rate": 2.918084316787225e-05, "loss": 0.1231, "step": 6014 }, { "epoch": 0.13254226643970318, "grad_norm": 1.492486834526062, "learning_rate": 2.9180494198674298e-05, "loss": 0.1603, "step": 6015 }, { "epoch": 0.13256430172921935, "grad_norm": 0.6632392406463623, "learning_rate": 2.9180145157247488e-05, "loss": 0.1175, "step": 6016 }, { "epoch": 0.1325863370187355, "grad_norm": 0.7124050855636597, "learning_rate": 2.917979604359359e-05, "loss": 0.0995, "step": 6017 }, { "epoch": 0.13260837230825168, "grad_norm": 0.9827264547348022, "learning_rate": 2.9179446857714388e-05, "loss": 0.125, "step": 6018 }, { "epoch": 0.1326304075977678, "grad_norm": 1.068874478340149, "learning_rate": 2.9179097599611664e-05, "loss": 0.1297, "step": 6019 }, { "epoch": 0.13265244288728398, "grad_norm": 1.2275819778442383, "learning_rate": 2.9178748269287187e-05, "loss": 0.1393, "step": 6020 }, { "epoch": 0.13267447817680014, "grad_norm": 1.1606855392456055, "learning_rate": 2.9178398866742746e-05, "loss": 0.1277, "step": 6021 }, { "epoch": 0.1326965134663163, "grad_norm": 0.8233159184455872, "learning_rate": 2.917804939198011e-05, "loss": 0.1139, "step": 6022 }, { "epoch": 0.13271854875583247, "grad_norm": 0.8085532188415527, "learning_rate": 2.9177699845001072e-05, "loss": 0.1269, "step": 6023 }, { "epoch": 0.13274058404534864, "grad_norm": 1.0210903882980347, "learning_rate": 2.9177350225807406e-05, "loss": 0.1195, "step": 6024 }, { "epoch": 0.13276261933486477, "grad_norm": 1.3455613851547241, "learning_rate": 2.9177000534400888e-05, "loss": 0.1267, "step": 6025 }, { "epoch": 0.13278465462438094, "grad_norm": 1.30085027217865, "learning_rate": 2.9176650770783307e-05, "loss": 0.1187, "step": 6026 }, { "epoch": 0.1328066899138971, "grad_norm": 1.351058006286621, "learning_rate": 2.917630093495644e-05, "loss": 0.1581, "step": 6027 }, { "epoch": 0.13282872520341327, "grad_norm": 0.8461217880249023, "learning_rate": 2.9175951026922074e-05, "loss": 0.1394, "step": 6028 }, { "epoch": 0.13285076049292943, "grad_norm": 1.32439124584198, "learning_rate": 2.9175601046681986e-05, "loss": 0.1453, "step": 6029 }, { "epoch": 0.1328727957824456, "grad_norm": 1.6235507726669312, "learning_rate": 2.9175250994237958e-05, "loss": 0.1039, "step": 6030 }, { "epoch": 0.13289483107196173, "grad_norm": 0.901166558265686, "learning_rate": 2.917490086959178e-05, "loss": 0.1334, "step": 6031 }, { "epoch": 0.1329168663614779, "grad_norm": 0.8742064833641052, "learning_rate": 2.917455067274523e-05, "loss": 0.1262, "step": 6032 }, { "epoch": 0.13293890165099406, "grad_norm": 1.0094743967056274, "learning_rate": 2.9174200403700086e-05, "loss": 0.1022, "step": 6033 }, { "epoch": 0.13296093694051023, "grad_norm": 1.911965250968933, "learning_rate": 2.917385006245814e-05, "loss": 0.1362, "step": 6034 }, { "epoch": 0.1329829722300264, "grad_norm": 0.6685975790023804, "learning_rate": 2.9173499649021183e-05, "loss": 0.1022, "step": 6035 }, { "epoch": 0.13300500751954256, "grad_norm": 0.891527533531189, "learning_rate": 2.9173149163390983e-05, "loss": 0.0793, "step": 6036 }, { "epoch": 0.13302704280905872, "grad_norm": 0.9294047951698303, "learning_rate": 2.917279860556934e-05, "loss": 0.1658, "step": 6037 }, { "epoch": 0.13304907809857486, "grad_norm": 1.2770639657974243, "learning_rate": 2.917244797555803e-05, "loss": 0.12, "step": 6038 }, { "epoch": 0.13307111338809102, "grad_norm": 0.932063102722168, "learning_rate": 2.9172097273358847e-05, "loss": 0.0976, "step": 6039 }, { "epoch": 0.1330931486776072, "grad_norm": 1.4751063585281372, "learning_rate": 2.9171746498973567e-05, "loss": 0.1558, "step": 6040 }, { "epoch": 0.13311518396712335, "grad_norm": 1.0300102233886719, "learning_rate": 2.9171395652403986e-05, "loss": 0.1248, "step": 6041 }, { "epoch": 0.13313721925663952, "grad_norm": 0.7945769429206848, "learning_rate": 2.9171044733651888e-05, "loss": 0.1338, "step": 6042 }, { "epoch": 0.13315925454615568, "grad_norm": 0.6144121289253235, "learning_rate": 2.9170693742719057e-05, "loss": 0.1158, "step": 6043 }, { "epoch": 0.13318128983567182, "grad_norm": 1.2105128765106201, "learning_rate": 2.917034267960729e-05, "loss": 0.103, "step": 6044 }, { "epoch": 0.13320332512518798, "grad_norm": 1.3923498392105103, "learning_rate": 2.9169991544318363e-05, "loss": 0.124, "step": 6045 }, { "epoch": 0.13322536041470415, "grad_norm": 1.1421535015106201, "learning_rate": 2.9169640336854072e-05, "loss": 0.1409, "step": 6046 }, { "epoch": 0.1332473957042203, "grad_norm": 1.0783343315124512, "learning_rate": 2.9169289057216207e-05, "loss": 0.1232, "step": 6047 }, { "epoch": 0.13326943099373648, "grad_norm": 1.3886851072311401, "learning_rate": 2.9168937705406547e-05, "loss": 0.1136, "step": 6048 }, { "epoch": 0.13329146628325264, "grad_norm": 1.5828018188476562, "learning_rate": 2.91685862814269e-05, "loss": 0.1292, "step": 6049 }, { "epoch": 0.13331350157276878, "grad_norm": 0.9697394371032715, "learning_rate": 2.9168234785279037e-05, "loss": 0.1333, "step": 6050 }, { "epoch": 0.13333553686228494, "grad_norm": 1.0050617456436157, "learning_rate": 2.916788321696476e-05, "loss": 0.1509, "step": 6051 }, { "epoch": 0.1333575721518011, "grad_norm": 1.0253757238388062, "learning_rate": 2.9167531576485856e-05, "loss": 0.1098, "step": 6052 }, { "epoch": 0.13337960744131727, "grad_norm": 0.8125826120376587, "learning_rate": 2.916717986384412e-05, "loss": 0.1078, "step": 6053 }, { "epoch": 0.13340164273083344, "grad_norm": 1.102112889289856, "learning_rate": 2.9166828079041333e-05, "loss": 0.1266, "step": 6054 }, { "epoch": 0.1334236780203496, "grad_norm": 1.359518051147461, "learning_rate": 2.9166476222079296e-05, "loss": 0.1132, "step": 6055 }, { "epoch": 0.13344571330986574, "grad_norm": 0.7003359794616699, "learning_rate": 2.91661242929598e-05, "loss": 0.0919, "step": 6056 }, { "epoch": 0.1334677485993819, "grad_norm": 1.2143704891204834, "learning_rate": 2.9165772291684636e-05, "loss": 0.125, "step": 6057 }, { "epoch": 0.13348978388889807, "grad_norm": 1.6088718175888062, "learning_rate": 2.9165420218255598e-05, "loss": 0.1268, "step": 6058 }, { "epoch": 0.13351181917841423, "grad_norm": 1.0984878540039062, "learning_rate": 2.916506807267448e-05, "loss": 0.1206, "step": 6059 }, { "epoch": 0.1335338544679304, "grad_norm": 1.8343819379806519, "learning_rate": 2.9164715854943073e-05, "loss": 0.1596, "step": 6060 }, { "epoch": 0.13355588975744656, "grad_norm": 1.04345703125, "learning_rate": 2.9164363565063174e-05, "loss": 0.1249, "step": 6061 }, { "epoch": 0.1335779250469627, "grad_norm": 1.3572273254394531, "learning_rate": 2.916401120303657e-05, "loss": 0.1223, "step": 6062 }, { "epoch": 0.13359996033647886, "grad_norm": 0.7739447951316833, "learning_rate": 2.9163658768865073e-05, "loss": 0.1063, "step": 6063 }, { "epoch": 0.13362199562599503, "grad_norm": 1.2169382572174072, "learning_rate": 2.9163306262550455e-05, "loss": 0.1303, "step": 6064 }, { "epoch": 0.1336440309155112, "grad_norm": 1.5949727296829224, "learning_rate": 2.9162953684094528e-05, "loss": 0.0909, "step": 6065 }, { "epoch": 0.13366606620502736, "grad_norm": 0.9674323201179504, "learning_rate": 2.9162601033499083e-05, "loss": 0.1351, "step": 6066 }, { "epoch": 0.13368810149454352, "grad_norm": 0.9761917591094971, "learning_rate": 2.9162248310765917e-05, "loss": 0.1622, "step": 6067 }, { "epoch": 0.13371013678405966, "grad_norm": 1.170654535293579, "learning_rate": 2.9161895515896823e-05, "loss": 0.1299, "step": 6068 }, { "epoch": 0.13373217207357582, "grad_norm": 0.9067807197570801, "learning_rate": 2.9161542648893608e-05, "loss": 0.0931, "step": 6069 }, { "epoch": 0.133754207363092, "grad_norm": 1.2261285781860352, "learning_rate": 2.9161189709758056e-05, "loss": 0.0906, "step": 6070 }, { "epoch": 0.13377624265260815, "grad_norm": 1.362945318222046, "learning_rate": 2.916083669849197e-05, "loss": 0.1347, "step": 6071 }, { "epoch": 0.13379827794212432, "grad_norm": 1.708494782447815, "learning_rate": 2.9160483615097156e-05, "loss": 0.1215, "step": 6072 }, { "epoch": 0.13382031323164048, "grad_norm": 2.1678905487060547, "learning_rate": 2.91601304595754e-05, "loss": 0.1562, "step": 6073 }, { "epoch": 0.13384234852115665, "grad_norm": 1.2392340898513794, "learning_rate": 2.9159777231928502e-05, "loss": 0.1202, "step": 6074 }, { "epoch": 0.13386438381067278, "grad_norm": 1.224789023399353, "learning_rate": 2.9159423932158275e-05, "loss": 0.1232, "step": 6075 }, { "epoch": 0.13388641910018895, "grad_norm": 1.7725541591644287, "learning_rate": 2.9159070560266502e-05, "loss": 0.1518, "step": 6076 }, { "epoch": 0.1339084543897051, "grad_norm": 1.3312084674835205, "learning_rate": 2.9158717116254993e-05, "loss": 0.1445, "step": 6077 }, { "epoch": 0.13393048967922128, "grad_norm": 1.0834510326385498, "learning_rate": 2.9158363600125548e-05, "loss": 0.0847, "step": 6078 }, { "epoch": 0.13395252496873744, "grad_norm": 1.7638294696807861, "learning_rate": 2.9158010011879957e-05, "loss": 0.177, "step": 6079 }, { "epoch": 0.1339745602582536, "grad_norm": 1.1477752923965454, "learning_rate": 2.915765635152004e-05, "loss": 0.1173, "step": 6080 }, { "epoch": 0.13399659554776974, "grad_norm": 1.106188416481018, "learning_rate": 2.915730261904758e-05, "loss": 0.1243, "step": 6081 }, { "epoch": 0.1340186308372859, "grad_norm": 1.1963865756988525, "learning_rate": 2.9156948814464385e-05, "loss": 0.132, "step": 6082 }, { "epoch": 0.13404066612680207, "grad_norm": 1.6838806867599487, "learning_rate": 2.915659493777226e-05, "loss": 0.1692, "step": 6083 }, { "epoch": 0.13406270141631824, "grad_norm": 0.7136766314506531, "learning_rate": 2.9156240988973006e-05, "loss": 0.1392, "step": 6084 }, { "epoch": 0.1340847367058344, "grad_norm": 1.0953809022903442, "learning_rate": 2.9155886968068428e-05, "loss": 0.132, "step": 6085 }, { "epoch": 0.13410677199535057, "grad_norm": 0.9760899543762207, "learning_rate": 2.9155532875060322e-05, "loss": 0.1093, "step": 6086 }, { "epoch": 0.1341288072848667, "grad_norm": 0.6523982882499695, "learning_rate": 2.91551787099505e-05, "loss": 0.1125, "step": 6087 }, { "epoch": 0.13415084257438287, "grad_norm": 1.0331918001174927, "learning_rate": 2.915482447274076e-05, "loss": 0.096, "step": 6088 }, { "epoch": 0.13417287786389903, "grad_norm": 1.3244166374206543, "learning_rate": 2.9154470163432908e-05, "loss": 0.1367, "step": 6089 }, { "epoch": 0.1341949131534152, "grad_norm": 0.9439375996589661, "learning_rate": 2.9154115782028752e-05, "loss": 0.1399, "step": 6090 }, { "epoch": 0.13421694844293136, "grad_norm": 1.3144780397415161, "learning_rate": 2.915376132853009e-05, "loss": 0.123, "step": 6091 }, { "epoch": 0.13423898373244753, "grad_norm": 1.0439022779464722, "learning_rate": 2.9153406802938737e-05, "loss": 0.1304, "step": 6092 }, { "epoch": 0.13426101902196366, "grad_norm": 0.8176795244216919, "learning_rate": 2.915305220525649e-05, "loss": 0.1233, "step": 6093 }, { "epoch": 0.13428305431147983, "grad_norm": 1.3396247625350952, "learning_rate": 2.9152697535485157e-05, "loss": 0.1675, "step": 6094 }, { "epoch": 0.134305089600996, "grad_norm": 0.8310943245887756, "learning_rate": 2.9152342793626548e-05, "loss": 0.1063, "step": 6095 }, { "epoch": 0.13432712489051216, "grad_norm": 0.9674561619758606, "learning_rate": 2.915198797968247e-05, "loss": 0.1271, "step": 6096 }, { "epoch": 0.13434916018002832, "grad_norm": 1.2203725576400757, "learning_rate": 2.915163309365473e-05, "loss": 0.1728, "step": 6097 }, { "epoch": 0.13437119546954449, "grad_norm": 1.0319410562515259, "learning_rate": 2.9151278135545126e-05, "loss": 0.1497, "step": 6098 }, { "epoch": 0.13439323075906062, "grad_norm": 1.289292335510254, "learning_rate": 2.9150923105355478e-05, "loss": 0.1659, "step": 6099 }, { "epoch": 0.1344152660485768, "grad_norm": 1.0247966051101685, "learning_rate": 2.9150568003087592e-05, "loss": 0.1167, "step": 6100 }, { "epoch": 0.13443730133809295, "grad_norm": 1.1920580863952637, "learning_rate": 2.9150212828743277e-05, "loss": 0.1386, "step": 6101 }, { "epoch": 0.13445933662760912, "grad_norm": 1.8610070943832397, "learning_rate": 2.9149857582324336e-05, "loss": 0.1239, "step": 6102 }, { "epoch": 0.13448137191712528, "grad_norm": 1.5624436140060425, "learning_rate": 2.9149502263832584e-05, "loss": 0.106, "step": 6103 }, { "epoch": 0.13450340720664145, "grad_norm": 1.2977105379104614, "learning_rate": 2.9149146873269828e-05, "loss": 0.1328, "step": 6104 }, { "epoch": 0.1345254424961576, "grad_norm": 1.029268503189087, "learning_rate": 2.9148791410637883e-05, "loss": 0.0954, "step": 6105 }, { "epoch": 0.13454747778567375, "grad_norm": 1.1207351684570312, "learning_rate": 2.914843587593855e-05, "loss": 0.145, "step": 6106 }, { "epoch": 0.1345695130751899, "grad_norm": 1.1040862798690796, "learning_rate": 2.9148080269173652e-05, "loss": 0.0944, "step": 6107 }, { "epoch": 0.13459154836470608, "grad_norm": 1.2754274606704712, "learning_rate": 2.9147724590344994e-05, "loss": 0.0977, "step": 6108 }, { "epoch": 0.13461358365422224, "grad_norm": 0.9411560893058777, "learning_rate": 2.9147368839454388e-05, "loss": 0.1079, "step": 6109 }, { "epoch": 0.1346356189437384, "grad_norm": 0.859499454498291, "learning_rate": 2.9147013016503644e-05, "loss": 0.1065, "step": 6110 }, { "epoch": 0.13465765423325457, "grad_norm": 1.1225488185882568, "learning_rate": 2.9146657121494573e-05, "loss": 0.1091, "step": 6111 }, { "epoch": 0.1346796895227707, "grad_norm": 0.9148899912834167, "learning_rate": 2.9146301154428996e-05, "loss": 0.0955, "step": 6112 }, { "epoch": 0.13470172481228687, "grad_norm": 0.8871660232543945, "learning_rate": 2.9145945115308724e-05, "loss": 0.149, "step": 6113 }, { "epoch": 0.13472376010180304, "grad_norm": 0.97925865650177, "learning_rate": 2.914558900413556e-05, "loss": 0.1434, "step": 6114 }, { "epoch": 0.1347457953913192, "grad_norm": 1.109308123588562, "learning_rate": 2.9145232820911333e-05, "loss": 0.1378, "step": 6115 }, { "epoch": 0.13476783068083537, "grad_norm": 0.9546797275543213, "learning_rate": 2.9144876565637845e-05, "loss": 0.1149, "step": 6116 }, { "epoch": 0.13478986597035153, "grad_norm": 1.1031373739242554, "learning_rate": 2.914452023831692e-05, "loss": 0.1381, "step": 6117 }, { "epoch": 0.13481190125986767, "grad_norm": 1.0185843706130981, "learning_rate": 2.9144163838950366e-05, "loss": 0.1378, "step": 6118 }, { "epoch": 0.13483393654938383, "grad_norm": 1.1734288930892944, "learning_rate": 2.914380736754e-05, "loss": 0.1292, "step": 6119 }, { "epoch": 0.1348559718389, "grad_norm": 1.1670920848846436, "learning_rate": 2.914345082408764e-05, "loss": 0.1516, "step": 6120 }, { "epoch": 0.13487800712841616, "grad_norm": 0.9604728817939758, "learning_rate": 2.91430942085951e-05, "loss": 0.1083, "step": 6121 }, { "epoch": 0.13490004241793233, "grad_norm": 0.9017407894134521, "learning_rate": 2.91427375210642e-05, "loss": 0.1177, "step": 6122 }, { "epoch": 0.1349220777074485, "grad_norm": 0.8499129414558411, "learning_rate": 2.9142380761496747e-05, "loss": 0.1224, "step": 6123 }, { "epoch": 0.13494411299696463, "grad_norm": 1.0959917306900024, "learning_rate": 2.914202392989457e-05, "loss": 0.1104, "step": 6124 }, { "epoch": 0.1349661482864808, "grad_norm": 1.0991690158843994, "learning_rate": 2.914166702625948e-05, "loss": 0.1231, "step": 6125 }, { "epoch": 0.13498818357599696, "grad_norm": 0.7732475996017456, "learning_rate": 2.91413100505933e-05, "loss": 0.1167, "step": 6126 }, { "epoch": 0.13501021886551312, "grad_norm": 0.9611050486564636, "learning_rate": 2.914095300289784e-05, "loss": 0.1081, "step": 6127 }, { "epoch": 0.13503225415502929, "grad_norm": 1.2759113311767578, "learning_rate": 2.9140595883174928e-05, "loss": 0.1665, "step": 6128 }, { "epoch": 0.13505428944454545, "grad_norm": 0.7116525173187256, "learning_rate": 2.9140238691426378e-05, "loss": 0.0817, "step": 6129 }, { "epoch": 0.1350763247340616, "grad_norm": 0.9868769645690918, "learning_rate": 2.9139881427654006e-05, "loss": 0.1299, "step": 6130 }, { "epoch": 0.13509836002357775, "grad_norm": 1.0950236320495605, "learning_rate": 2.9139524091859636e-05, "loss": 0.1016, "step": 6131 }, { "epoch": 0.13512039531309392, "grad_norm": 1.627692461013794, "learning_rate": 2.913916668404509e-05, "loss": 0.1415, "step": 6132 }, { "epoch": 0.13514243060261008, "grad_norm": 1.1627440452575684, "learning_rate": 2.9138809204212184e-05, "loss": 0.1412, "step": 6133 }, { "epoch": 0.13516446589212625, "grad_norm": 0.6914593577384949, "learning_rate": 2.913845165236274e-05, "loss": 0.1105, "step": 6134 }, { "epoch": 0.1351865011816424, "grad_norm": 1.0389915704727173, "learning_rate": 2.9138094028498584e-05, "loss": 0.1575, "step": 6135 }, { "epoch": 0.13520853647115855, "grad_norm": 0.9732903838157654, "learning_rate": 2.9137736332621533e-05, "loss": 0.1336, "step": 6136 }, { "epoch": 0.1352305717606747, "grad_norm": 1.6023653745651245, "learning_rate": 2.9137378564733407e-05, "loss": 0.1135, "step": 6137 }, { "epoch": 0.13525260705019088, "grad_norm": 0.4914231598377228, "learning_rate": 2.913702072483603e-05, "loss": 0.0675, "step": 6138 }, { "epoch": 0.13527464233970704, "grad_norm": 0.9056505560874939, "learning_rate": 2.913666281293123e-05, "loss": 0.1158, "step": 6139 }, { "epoch": 0.1352966776292232, "grad_norm": 1.3402986526489258, "learning_rate": 2.913630482902082e-05, "loss": 0.1159, "step": 6140 }, { "epoch": 0.13531871291873937, "grad_norm": 1.295524001121521, "learning_rate": 2.913594677310663e-05, "loss": 0.1164, "step": 6141 }, { "epoch": 0.13534074820825553, "grad_norm": 1.1807585954666138, "learning_rate": 2.9135588645190488e-05, "loss": 0.1383, "step": 6142 }, { "epoch": 0.13536278349777167, "grad_norm": 1.307968020439148, "learning_rate": 2.9135230445274213e-05, "loss": 0.166, "step": 6143 }, { "epoch": 0.13538481878728784, "grad_norm": 1.10365891456604, "learning_rate": 2.9134872173359622e-05, "loss": 0.1527, "step": 6144 }, { "epoch": 0.135406854076804, "grad_norm": 1.032092809677124, "learning_rate": 2.9134513829448553e-05, "loss": 0.0779, "step": 6145 }, { "epoch": 0.13542888936632017, "grad_norm": 0.7282000184059143, "learning_rate": 2.9134155413542823e-05, "loss": 0.1227, "step": 6146 }, { "epoch": 0.13545092465583633, "grad_norm": 1.1553868055343628, "learning_rate": 2.9133796925644264e-05, "loss": 0.1139, "step": 6147 }, { "epoch": 0.1354729599453525, "grad_norm": 0.9060251116752625, "learning_rate": 2.9133438365754693e-05, "loss": 0.1198, "step": 6148 }, { "epoch": 0.13549499523486863, "grad_norm": 1.3710262775421143, "learning_rate": 2.9133079733875945e-05, "loss": 0.1516, "step": 6149 }, { "epoch": 0.1355170305243848, "grad_norm": 1.3801805973052979, "learning_rate": 2.9132721030009843e-05, "loss": 0.1525, "step": 6150 }, { "epoch": 0.13553906581390096, "grad_norm": 1.8282239437103271, "learning_rate": 2.9132362254158214e-05, "loss": 0.1317, "step": 6151 }, { "epoch": 0.13556110110341713, "grad_norm": 1.2170097827911377, "learning_rate": 2.9132003406322884e-05, "loss": 0.1583, "step": 6152 }, { "epoch": 0.1355831363929333, "grad_norm": 0.9724366664886475, "learning_rate": 2.9131644486505683e-05, "loss": 0.1009, "step": 6153 }, { "epoch": 0.13560517168244945, "grad_norm": 1.3174059391021729, "learning_rate": 2.9131285494708437e-05, "loss": 0.1306, "step": 6154 }, { "epoch": 0.1356272069719656, "grad_norm": 0.8785985708236694, "learning_rate": 2.913092643093298e-05, "loss": 0.1701, "step": 6155 }, { "epoch": 0.13564924226148176, "grad_norm": 2.1117236614227295, "learning_rate": 2.9130567295181134e-05, "loss": 0.1218, "step": 6156 }, { "epoch": 0.13567127755099792, "grad_norm": 0.9977061748504639, "learning_rate": 2.9130208087454735e-05, "loss": 0.1204, "step": 6157 }, { "epoch": 0.13569331284051409, "grad_norm": 0.9434179067611694, "learning_rate": 2.9129848807755604e-05, "loss": 0.1542, "step": 6158 }, { "epoch": 0.13571534813003025, "grad_norm": 1.0912721157073975, "learning_rate": 2.9129489456085578e-05, "loss": 0.1673, "step": 6159 }, { "epoch": 0.13573738341954641, "grad_norm": 1.1083505153656006, "learning_rate": 2.9129130032446485e-05, "loss": 0.1263, "step": 6160 }, { "epoch": 0.13575941870906255, "grad_norm": 1.068941593170166, "learning_rate": 2.912877053684016e-05, "loss": 0.1474, "step": 6161 }, { "epoch": 0.13578145399857872, "grad_norm": 0.9319553971290588, "learning_rate": 2.9128410969268426e-05, "loss": 0.1178, "step": 6162 }, { "epoch": 0.13580348928809488, "grad_norm": 1.0425912141799927, "learning_rate": 2.912805132973312e-05, "loss": 0.1127, "step": 6163 }, { "epoch": 0.13582552457761105, "grad_norm": 0.9779413342475891, "learning_rate": 2.912769161823607e-05, "loss": 0.1233, "step": 6164 }, { "epoch": 0.1358475598671272, "grad_norm": 1.0331050157546997, "learning_rate": 2.9127331834779118e-05, "loss": 0.091, "step": 6165 }, { "epoch": 0.13586959515664337, "grad_norm": 0.9100409150123596, "learning_rate": 2.9126971979364086e-05, "loss": 0.0913, "step": 6166 }, { "epoch": 0.1358916304461595, "grad_norm": 0.8878121972084045, "learning_rate": 2.9126612051992806e-05, "loss": 0.0978, "step": 6167 }, { "epoch": 0.13591366573567568, "grad_norm": 0.9497929215431213, "learning_rate": 2.9126252052667117e-05, "loss": 0.1353, "step": 6168 }, { "epoch": 0.13593570102519184, "grad_norm": 1.022297739982605, "learning_rate": 2.9125891981388853e-05, "loss": 0.1395, "step": 6169 }, { "epoch": 0.135957736314708, "grad_norm": 1.609654426574707, "learning_rate": 2.9125531838159846e-05, "loss": 0.1366, "step": 6170 }, { "epoch": 0.13597977160422417, "grad_norm": 1.3470884561538696, "learning_rate": 2.912517162298193e-05, "loss": 0.1736, "step": 6171 }, { "epoch": 0.13600180689374033, "grad_norm": 1.0741662979125977, "learning_rate": 2.9124811335856942e-05, "loss": 0.1544, "step": 6172 }, { "epoch": 0.13602384218325647, "grad_norm": 1.124998927116394, "learning_rate": 2.912445097678671e-05, "loss": 0.1299, "step": 6173 }, { "epoch": 0.13604587747277264, "grad_norm": 0.9027390480041504, "learning_rate": 2.9124090545773083e-05, "loss": 0.0922, "step": 6174 }, { "epoch": 0.1360679127622888, "grad_norm": 0.7653270959854126, "learning_rate": 2.9123730042817884e-05, "loss": 0.0969, "step": 6175 }, { "epoch": 0.13608994805180497, "grad_norm": 0.7863945364952087, "learning_rate": 2.9123369467922957e-05, "loss": 0.0993, "step": 6176 }, { "epoch": 0.13611198334132113, "grad_norm": 1.055405616760254, "learning_rate": 2.9123008821090135e-05, "loss": 0.158, "step": 6177 }, { "epoch": 0.1361340186308373, "grad_norm": 0.9511612057685852, "learning_rate": 2.9122648102321254e-05, "loss": 0.1465, "step": 6178 }, { "epoch": 0.13615605392035346, "grad_norm": 1.0755873918533325, "learning_rate": 2.9122287311618155e-05, "loss": 0.1287, "step": 6179 }, { "epoch": 0.1361780892098696, "grad_norm": 1.2847908735275269, "learning_rate": 2.912192644898267e-05, "loss": 0.1089, "step": 6180 }, { "epoch": 0.13620012449938576, "grad_norm": 1.3133116960525513, "learning_rate": 2.9121565514416645e-05, "loss": 0.1196, "step": 6181 }, { "epoch": 0.13622215978890193, "grad_norm": 1.3518627882003784, "learning_rate": 2.912120450792191e-05, "loss": 0.1323, "step": 6182 }, { "epoch": 0.1362441950784181, "grad_norm": 1.6364130973815918, "learning_rate": 2.9120843429500313e-05, "loss": 0.12, "step": 6183 }, { "epoch": 0.13626623036793425, "grad_norm": 0.9587088227272034, "learning_rate": 2.9120482279153685e-05, "loss": 0.1325, "step": 6184 }, { "epoch": 0.13628826565745042, "grad_norm": 1.7186477184295654, "learning_rate": 2.912012105688387e-05, "loss": 0.1711, "step": 6185 }, { "epoch": 0.13631030094696656, "grad_norm": 1.4127146005630493, "learning_rate": 2.9119759762692708e-05, "loss": 0.1332, "step": 6186 }, { "epoch": 0.13633233623648272, "grad_norm": 1.3305739164352417, "learning_rate": 2.9119398396582034e-05, "loss": 0.1332, "step": 6187 }, { "epoch": 0.13635437152599889, "grad_norm": 1.3431390523910522, "learning_rate": 2.9119036958553697e-05, "loss": 0.1448, "step": 6188 }, { "epoch": 0.13637640681551505, "grad_norm": 0.9628167748451233, "learning_rate": 2.9118675448609528e-05, "loss": 0.1154, "step": 6189 }, { "epoch": 0.13639844210503121, "grad_norm": 1.37088942527771, "learning_rate": 2.911831386675138e-05, "loss": 0.1261, "step": 6190 }, { "epoch": 0.13642047739454738, "grad_norm": 1.2614994049072266, "learning_rate": 2.9117952212981085e-05, "loss": 0.1474, "step": 6191 }, { "epoch": 0.13644251268406352, "grad_norm": 1.2858693599700928, "learning_rate": 2.911759048730049e-05, "loss": 0.1363, "step": 6192 }, { "epoch": 0.13646454797357968, "grad_norm": 1.0920827388763428, "learning_rate": 2.9117228689711437e-05, "loss": 0.1678, "step": 6193 }, { "epoch": 0.13648658326309585, "grad_norm": 0.784156084060669, "learning_rate": 2.9116866820215765e-05, "loss": 0.0958, "step": 6194 }, { "epoch": 0.136508618552612, "grad_norm": 1.3208706378936768, "learning_rate": 2.9116504878815323e-05, "loss": 0.1095, "step": 6195 }, { "epoch": 0.13653065384212817, "grad_norm": 1.0200836658477783, "learning_rate": 2.9116142865511952e-05, "loss": 0.1237, "step": 6196 }, { "epoch": 0.13655268913164434, "grad_norm": 1.157444953918457, "learning_rate": 2.9115780780307492e-05, "loss": 0.1203, "step": 6197 }, { "epoch": 0.13657472442116048, "grad_norm": 0.9774535298347473, "learning_rate": 2.9115418623203795e-05, "loss": 0.1545, "step": 6198 }, { "epoch": 0.13659675971067664, "grad_norm": 1.1723583936691284, "learning_rate": 2.9115056394202698e-05, "loss": 0.14, "step": 6199 }, { "epoch": 0.1366187950001928, "grad_norm": 1.1526521444320679, "learning_rate": 2.9114694093306057e-05, "loss": 0.1475, "step": 6200 }, { "epoch": 0.13664083028970897, "grad_norm": 0.8183822631835938, "learning_rate": 2.91143317205157e-05, "loss": 0.1206, "step": 6201 }, { "epoch": 0.13666286557922513, "grad_norm": 1.2381117343902588, "learning_rate": 2.911396927583349e-05, "loss": 0.1103, "step": 6202 }, { "epoch": 0.1366849008687413, "grad_norm": 1.378823161125183, "learning_rate": 2.9113606759261264e-05, "loss": 0.1251, "step": 6203 }, { "epoch": 0.13670693615825744, "grad_norm": 1.4059045314788818, "learning_rate": 2.911324417080087e-05, "loss": 0.0971, "step": 6204 }, { "epoch": 0.1367289714477736, "grad_norm": 1.0499314069747925, "learning_rate": 2.911288151045416e-05, "loss": 0.1222, "step": 6205 }, { "epoch": 0.13675100673728977, "grad_norm": 1.4976869821548462, "learning_rate": 2.9112518778222968e-05, "loss": 0.115, "step": 6206 }, { "epoch": 0.13677304202680593, "grad_norm": 0.9748196601867676, "learning_rate": 2.9112155974109152e-05, "loss": 0.1323, "step": 6207 }, { "epoch": 0.1367950773163221, "grad_norm": 1.6513420343399048, "learning_rate": 2.911179309811456e-05, "loss": 0.1602, "step": 6208 }, { "epoch": 0.13681711260583826, "grad_norm": 1.232593059539795, "learning_rate": 2.911143015024104e-05, "loss": 0.1084, "step": 6209 }, { "epoch": 0.13683914789535442, "grad_norm": 1.3076616525650024, "learning_rate": 2.911106713049044e-05, "loss": 0.1619, "step": 6210 }, { "epoch": 0.13686118318487056, "grad_norm": 0.8464810252189636, "learning_rate": 2.9110704038864606e-05, "loss": 0.1099, "step": 6211 }, { "epoch": 0.13688321847438673, "grad_norm": 0.995145320892334, "learning_rate": 2.9110340875365387e-05, "loss": 0.1123, "step": 6212 }, { "epoch": 0.1369052537639029, "grad_norm": 1.7082637548446655, "learning_rate": 2.9109977639994637e-05, "loss": 0.1144, "step": 6213 }, { "epoch": 0.13692728905341905, "grad_norm": 0.9738086462020874, "learning_rate": 2.9109614332754205e-05, "loss": 0.1181, "step": 6214 }, { "epoch": 0.13694932434293522, "grad_norm": 0.7611706256866455, "learning_rate": 2.9109250953645944e-05, "loss": 0.1053, "step": 6215 }, { "epoch": 0.13697135963245138, "grad_norm": 1.1956647634506226, "learning_rate": 2.91088875026717e-05, "loss": 0.1178, "step": 6216 }, { "epoch": 0.13699339492196752, "grad_norm": 0.833500325679779, "learning_rate": 2.9108523979833324e-05, "loss": 0.1521, "step": 6217 }, { "epoch": 0.13701543021148369, "grad_norm": 1.370188593864441, "learning_rate": 2.9108160385132675e-05, "loss": 0.1893, "step": 6218 }, { "epoch": 0.13703746550099985, "grad_norm": 0.6557140350341797, "learning_rate": 2.9107796718571597e-05, "loss": 0.1103, "step": 6219 }, { "epoch": 0.13705950079051601, "grad_norm": 0.901020884513855, "learning_rate": 2.910743298015194e-05, "loss": 0.0824, "step": 6220 }, { "epoch": 0.13708153608003218, "grad_norm": 1.0795210599899292, "learning_rate": 2.9107069169875568e-05, "loss": 0.0863, "step": 6221 }, { "epoch": 0.13710357136954834, "grad_norm": 1.1703853607177734, "learning_rate": 2.9106705287744327e-05, "loss": 0.1472, "step": 6222 }, { "epoch": 0.13712560665906448, "grad_norm": 0.7535966634750366, "learning_rate": 2.910634133376007e-05, "loss": 0.1159, "step": 6223 }, { "epoch": 0.13714764194858065, "grad_norm": 1.0367392301559448, "learning_rate": 2.910597730792465e-05, "loss": 0.1331, "step": 6224 }, { "epoch": 0.1371696772380968, "grad_norm": 1.4009332656860352, "learning_rate": 2.9105613210239924e-05, "loss": 0.1021, "step": 6225 }, { "epoch": 0.13719171252761297, "grad_norm": 1.07481849193573, "learning_rate": 2.9105249040707747e-05, "loss": 0.1045, "step": 6226 }, { "epoch": 0.13721374781712914, "grad_norm": 1.0745347738265991, "learning_rate": 2.9104884799329975e-05, "loss": 0.1068, "step": 6227 }, { "epoch": 0.1372357831066453, "grad_norm": 0.9941938519477844, "learning_rate": 2.9104520486108458e-05, "loss": 0.1198, "step": 6228 }, { "epoch": 0.13725781839616144, "grad_norm": 1.059322476387024, "learning_rate": 2.910415610104506e-05, "loss": 0.12, "step": 6229 }, { "epoch": 0.1372798536856776, "grad_norm": 1.183316946029663, "learning_rate": 2.9103791644141627e-05, "loss": 0.1134, "step": 6230 }, { "epoch": 0.13730188897519377, "grad_norm": 1.2085497379302979, "learning_rate": 2.910342711540002e-05, "loss": 0.0933, "step": 6231 }, { "epoch": 0.13732392426470993, "grad_norm": 0.9599869847297668, "learning_rate": 2.9103062514822097e-05, "loss": 0.1034, "step": 6232 }, { "epoch": 0.1373459595542261, "grad_norm": 0.9848796129226685, "learning_rate": 2.9102697842409713e-05, "loss": 0.1, "step": 6233 }, { "epoch": 0.13736799484374226, "grad_norm": 1.0096272230148315, "learning_rate": 2.910233309816473e-05, "loss": 0.1041, "step": 6234 }, { "epoch": 0.1373900301332584, "grad_norm": 1.296247124671936, "learning_rate": 2.9101968282088998e-05, "loss": 0.132, "step": 6235 }, { "epoch": 0.13741206542277457, "grad_norm": 1.0749928951263428, "learning_rate": 2.9101603394184384e-05, "loss": 0.1029, "step": 6236 }, { "epoch": 0.13743410071229073, "grad_norm": 1.41408371925354, "learning_rate": 2.910123843445274e-05, "loss": 0.121, "step": 6237 }, { "epoch": 0.1374561360018069, "grad_norm": 0.8813176155090332, "learning_rate": 2.9100873402895925e-05, "loss": 0.1028, "step": 6238 }, { "epoch": 0.13747817129132306, "grad_norm": 1.4679969549179077, "learning_rate": 2.91005082995158e-05, "loss": 0.2047, "step": 6239 }, { "epoch": 0.13750020658083922, "grad_norm": 1.0239815711975098, "learning_rate": 2.910014312431423e-05, "loss": 0.1053, "step": 6240 }, { "epoch": 0.13752224187035536, "grad_norm": 1.4167453050613403, "learning_rate": 2.9099777877293063e-05, "loss": 0.1361, "step": 6241 }, { "epoch": 0.13754427715987153, "grad_norm": 0.7917926907539368, "learning_rate": 2.909941255845417e-05, "loss": 0.0924, "step": 6242 }, { "epoch": 0.1375663124493877, "grad_norm": 0.7891364693641663, "learning_rate": 2.9099047167799408e-05, "loss": 0.1211, "step": 6243 }, { "epoch": 0.13758834773890385, "grad_norm": 1.4101037979125977, "learning_rate": 2.9098681705330638e-05, "loss": 0.1491, "step": 6244 }, { "epoch": 0.13761038302842002, "grad_norm": 0.7924978733062744, "learning_rate": 2.909831617104972e-05, "loss": 0.1303, "step": 6245 }, { "epoch": 0.13763241831793618, "grad_norm": 2.2200539112091064, "learning_rate": 2.909795056495852e-05, "loss": 0.1003, "step": 6246 }, { "epoch": 0.13765445360745235, "grad_norm": 1.4072973728179932, "learning_rate": 2.90975848870589e-05, "loss": 0.1262, "step": 6247 }, { "epoch": 0.13767648889696849, "grad_norm": 1.0533453226089478, "learning_rate": 2.9097219137352717e-05, "loss": 0.1374, "step": 6248 }, { "epoch": 0.13769852418648465, "grad_norm": 1.1765217781066895, "learning_rate": 2.9096853315841832e-05, "loss": 0.163, "step": 6249 }, { "epoch": 0.13772055947600081, "grad_norm": 0.7956830263137817, "learning_rate": 2.9096487422528128e-05, "loss": 0.1008, "step": 6250 }, { "epoch": 0.13774259476551698, "grad_norm": 1.0910543203353882, "learning_rate": 2.909612145741344e-05, "loss": 0.1221, "step": 6251 }, { "epoch": 0.13776463005503314, "grad_norm": 0.8588902950286865, "learning_rate": 2.9095755420499654e-05, "loss": 0.1232, "step": 6252 }, { "epoch": 0.1377866653445493, "grad_norm": 0.9152259826660156, "learning_rate": 2.9095389311788626e-05, "loss": 0.1071, "step": 6253 }, { "epoch": 0.13780870063406545, "grad_norm": 1.1413031816482544, "learning_rate": 2.9095023131282224e-05, "loss": 0.1191, "step": 6254 }, { "epoch": 0.1378307359235816, "grad_norm": 1.2185930013656616, "learning_rate": 2.9094656878982306e-05, "loss": 0.1215, "step": 6255 }, { "epoch": 0.13785277121309777, "grad_norm": 1.232330560684204, "learning_rate": 2.9094290554890745e-05, "loss": 0.0861, "step": 6256 }, { "epoch": 0.13787480650261394, "grad_norm": 1.650739312171936, "learning_rate": 2.9093924159009404e-05, "loss": 0.1607, "step": 6257 }, { "epoch": 0.1378968417921301, "grad_norm": 1.9765833616256714, "learning_rate": 2.9093557691340148e-05, "loss": 0.1484, "step": 6258 }, { "epoch": 0.13791887708164627, "grad_norm": 0.8071102499961853, "learning_rate": 2.909319115188485e-05, "loss": 0.1214, "step": 6259 }, { "epoch": 0.1379409123711624, "grad_norm": 1.4243587255477905, "learning_rate": 2.9092824540645365e-05, "loss": 0.1407, "step": 6260 }, { "epoch": 0.13796294766067857, "grad_norm": 0.9988555908203125, "learning_rate": 2.909245785762357e-05, "loss": 0.1435, "step": 6261 }, { "epoch": 0.13798498295019473, "grad_norm": 1.5103360414505005, "learning_rate": 2.9092091102821334e-05, "loss": 0.1432, "step": 6262 }, { "epoch": 0.1380070182397109, "grad_norm": 1.4718488454818726, "learning_rate": 2.9091724276240518e-05, "loss": 0.1586, "step": 6263 }, { "epoch": 0.13802905352922706, "grad_norm": 1.5488619804382324, "learning_rate": 2.9091357377882993e-05, "loss": 0.1015, "step": 6264 }, { "epoch": 0.13805108881874323, "grad_norm": 1.6500412225723267, "learning_rate": 2.9090990407750628e-05, "loss": 0.1234, "step": 6265 }, { "epoch": 0.13807312410825937, "grad_norm": 1.560711145401001, "learning_rate": 2.9090623365845295e-05, "loss": 0.1261, "step": 6266 }, { "epoch": 0.13809515939777553, "grad_norm": 1.2389785051345825, "learning_rate": 2.909025625216886e-05, "loss": 0.1461, "step": 6267 }, { "epoch": 0.1381171946872917, "grad_norm": 1.0768181085586548, "learning_rate": 2.9089889066723195e-05, "loss": 0.156, "step": 6268 }, { "epoch": 0.13813922997680786, "grad_norm": 2.059088945388794, "learning_rate": 2.9089521809510168e-05, "loss": 0.1776, "step": 6269 }, { "epoch": 0.13816126526632402, "grad_norm": 0.9786579608917236, "learning_rate": 2.9089154480531654e-05, "loss": 0.1225, "step": 6270 }, { "epoch": 0.1381833005558402, "grad_norm": 1.164352536201477, "learning_rate": 2.9088787079789517e-05, "loss": 0.1241, "step": 6271 }, { "epoch": 0.13820533584535633, "grad_norm": 0.8463819026947021, "learning_rate": 2.9088419607285634e-05, "loss": 0.1159, "step": 6272 }, { "epoch": 0.1382273711348725, "grad_norm": 1.3895262479782104, "learning_rate": 2.9088052063021876e-05, "loss": 0.1061, "step": 6273 }, { "epoch": 0.13824940642438865, "grad_norm": 0.7732024192810059, "learning_rate": 2.9087684447000115e-05, "loss": 0.108, "step": 6274 }, { "epoch": 0.13827144171390482, "grad_norm": 0.7300634384155273, "learning_rate": 2.908731675922222e-05, "loss": 0.0766, "step": 6275 }, { "epoch": 0.13829347700342098, "grad_norm": 1.5334546566009521, "learning_rate": 2.9086948999690066e-05, "loss": 0.14, "step": 6276 }, { "epoch": 0.13831551229293715, "grad_norm": 1.3579258918762207, "learning_rate": 2.908658116840553e-05, "loss": 0.1396, "step": 6277 }, { "epoch": 0.13833754758245329, "grad_norm": 1.9362927675247192, "learning_rate": 2.908621326537048e-05, "loss": 0.1331, "step": 6278 }, { "epoch": 0.13835958287196945, "grad_norm": 1.1310393810272217, "learning_rate": 2.9085845290586794e-05, "loss": 0.1503, "step": 6279 }, { "epoch": 0.13838161816148561, "grad_norm": 1.3510087728500366, "learning_rate": 2.9085477244056347e-05, "loss": 0.1308, "step": 6280 }, { "epoch": 0.13840365345100178, "grad_norm": 1.2539379596710205, "learning_rate": 2.9085109125781006e-05, "loss": 0.1575, "step": 6281 }, { "epoch": 0.13842568874051794, "grad_norm": 2.180391311645508, "learning_rate": 2.9084740935762653e-05, "loss": 0.1292, "step": 6282 }, { "epoch": 0.1384477240300341, "grad_norm": 1.1939278841018677, "learning_rate": 2.908437267400316e-05, "loss": 0.1241, "step": 6283 }, { "epoch": 0.13846975931955027, "grad_norm": 1.0969762802124023, "learning_rate": 2.9084004340504408e-05, "loss": 0.1219, "step": 6284 }, { "epoch": 0.1384917946090664, "grad_norm": 1.118644118309021, "learning_rate": 2.9083635935268267e-05, "loss": 0.16, "step": 6285 }, { "epoch": 0.13851382989858257, "grad_norm": 1.1128970384597778, "learning_rate": 2.9083267458296615e-05, "loss": 0.1451, "step": 6286 }, { "epoch": 0.13853586518809874, "grad_norm": 1.1115243434906006, "learning_rate": 2.908289890959133e-05, "loss": 0.1496, "step": 6287 }, { "epoch": 0.1385579004776149, "grad_norm": 0.8797773718833923, "learning_rate": 2.908253028915429e-05, "loss": 0.1365, "step": 6288 }, { "epoch": 0.13857993576713107, "grad_norm": 0.865687370300293, "learning_rate": 2.9082161596987372e-05, "loss": 0.0818, "step": 6289 }, { "epoch": 0.13860197105664723, "grad_norm": 1.2377623319625854, "learning_rate": 2.9081792833092453e-05, "loss": 0.1257, "step": 6290 }, { "epoch": 0.13862400634616337, "grad_norm": 0.8122766017913818, "learning_rate": 2.9081423997471415e-05, "loss": 0.141, "step": 6291 }, { "epoch": 0.13864604163567953, "grad_norm": 0.7991674542427063, "learning_rate": 2.908105509012613e-05, "loss": 0.1474, "step": 6292 }, { "epoch": 0.1386680769251957, "grad_norm": 1.1820467710494995, "learning_rate": 2.908068611105848e-05, "loss": 0.1427, "step": 6293 }, { "epoch": 0.13869011221471186, "grad_norm": 1.2756456136703491, "learning_rate": 2.9080317060270348e-05, "loss": 0.1546, "step": 6294 }, { "epoch": 0.13871214750422803, "grad_norm": 1.499467134475708, "learning_rate": 2.9079947937763608e-05, "loss": 0.125, "step": 6295 }, { "epoch": 0.1387341827937442, "grad_norm": 1.0144292116165161, "learning_rate": 2.907957874354014e-05, "loss": 0.1061, "step": 6296 }, { "epoch": 0.13875621808326033, "grad_norm": 1.0577524900436401, "learning_rate": 2.9079209477601834e-05, "loss": 0.1182, "step": 6297 }, { "epoch": 0.1387782533727765, "grad_norm": 0.982625424861908, "learning_rate": 2.907884013995056e-05, "loss": 0.1712, "step": 6298 }, { "epoch": 0.13880028866229266, "grad_norm": 1.2484954595565796, "learning_rate": 2.9078470730588205e-05, "loss": 0.1221, "step": 6299 }, { "epoch": 0.13882232395180882, "grad_norm": 0.975719690322876, "learning_rate": 2.907810124951665e-05, "loss": 0.1491, "step": 6300 }, { "epoch": 0.138844359241325, "grad_norm": 0.7664415240287781, "learning_rate": 2.9077731696737775e-05, "loss": 0.1198, "step": 6301 }, { "epoch": 0.13886639453084115, "grad_norm": 0.9872851967811584, "learning_rate": 2.9077362072253462e-05, "loss": 0.1425, "step": 6302 }, { "epoch": 0.1388884298203573, "grad_norm": 1.2659077644348145, "learning_rate": 2.9076992376065598e-05, "loss": 0.1577, "step": 6303 }, { "epoch": 0.13891046510987345, "grad_norm": 0.9814605712890625, "learning_rate": 2.9076622608176062e-05, "loss": 0.1134, "step": 6304 }, { "epoch": 0.13893250039938962, "grad_norm": 0.7517799735069275, "learning_rate": 2.9076252768586736e-05, "loss": 0.1205, "step": 6305 }, { "epoch": 0.13895453568890578, "grad_norm": 0.827630341053009, "learning_rate": 2.907588285729951e-05, "loss": 0.1585, "step": 6306 }, { "epoch": 0.13897657097842195, "grad_norm": 1.0583667755126953, "learning_rate": 2.9075512874316264e-05, "loss": 0.0971, "step": 6307 }, { "epoch": 0.1389986062679381, "grad_norm": 0.84892737865448, "learning_rate": 2.9075142819638882e-05, "loss": 0.079, "step": 6308 }, { "epoch": 0.13902064155745425, "grad_norm": 1.3609004020690918, "learning_rate": 2.9074772693269252e-05, "loss": 0.174, "step": 6309 }, { "epoch": 0.13904267684697041, "grad_norm": 1.2667872905731201, "learning_rate": 2.907440249520925e-05, "loss": 0.1523, "step": 6310 }, { "epoch": 0.13906471213648658, "grad_norm": 0.9100791811943054, "learning_rate": 2.9074032225460778e-05, "loss": 0.1033, "step": 6311 }, { "epoch": 0.13908674742600274, "grad_norm": 0.9245779514312744, "learning_rate": 2.9073661884025708e-05, "loss": 0.1296, "step": 6312 }, { "epoch": 0.1391087827155189, "grad_norm": 0.7478522658348083, "learning_rate": 2.9073291470905934e-05, "loss": 0.1411, "step": 6313 }, { "epoch": 0.13913081800503507, "grad_norm": 1.2580385208129883, "learning_rate": 2.907292098610334e-05, "loss": 0.0973, "step": 6314 }, { "epoch": 0.13915285329455124, "grad_norm": 0.8678098320960999, "learning_rate": 2.9072550429619807e-05, "loss": 0.142, "step": 6315 }, { "epoch": 0.13917488858406737, "grad_norm": 1.0085890293121338, "learning_rate": 2.9072179801457233e-05, "loss": 0.1494, "step": 6316 }, { "epoch": 0.13919692387358354, "grad_norm": 0.9740037322044373, "learning_rate": 2.90718091016175e-05, "loss": 0.1078, "step": 6317 }, { "epoch": 0.1392189591630997, "grad_norm": 0.9984180927276611, "learning_rate": 2.9071438330102497e-05, "loss": 0.1462, "step": 6318 }, { "epoch": 0.13924099445261587, "grad_norm": 1.5367732048034668, "learning_rate": 2.9071067486914115e-05, "loss": 0.1531, "step": 6319 }, { "epoch": 0.13926302974213203, "grad_norm": 1.305904746055603, "learning_rate": 2.9070696572054236e-05, "loss": 0.1364, "step": 6320 }, { "epoch": 0.1392850650316482, "grad_norm": 3.114441394805908, "learning_rate": 2.907032558552476e-05, "loss": 0.2021, "step": 6321 }, { "epoch": 0.13930710032116433, "grad_norm": 0.901787519454956, "learning_rate": 2.9069954527327567e-05, "loss": 0.1889, "step": 6322 }, { "epoch": 0.1393291356106805, "grad_norm": 3.0788204669952393, "learning_rate": 2.906958339746455e-05, "loss": 0.1316, "step": 6323 }, { "epoch": 0.13935117090019666, "grad_norm": 1.5490721464157104, "learning_rate": 2.9069212195937605e-05, "loss": 0.1084, "step": 6324 }, { "epoch": 0.13937320618971283, "grad_norm": 1.2877390384674072, "learning_rate": 2.906884092274861e-05, "loss": 0.1635, "step": 6325 }, { "epoch": 0.139395241479229, "grad_norm": 1.6974461078643799, "learning_rate": 2.9068469577899466e-05, "loss": 0.1337, "step": 6326 }, { "epoch": 0.13941727676874516, "grad_norm": 1.0850213766098022, "learning_rate": 2.9068098161392067e-05, "loss": 0.1316, "step": 6327 }, { "epoch": 0.1394393120582613, "grad_norm": 1.1552594900131226, "learning_rate": 2.9067726673228303e-05, "loss": 0.1439, "step": 6328 }, { "epoch": 0.13946134734777746, "grad_norm": 1.0049868822097778, "learning_rate": 2.9067355113410054e-05, "loss": 0.1307, "step": 6329 }, { "epoch": 0.13948338263729362, "grad_norm": 2.8873255252838135, "learning_rate": 2.9066983481939228e-05, "loss": 0.1399, "step": 6330 }, { "epoch": 0.1395054179268098, "grad_norm": 1.439745306968689, "learning_rate": 2.906661177881771e-05, "loss": 0.1102, "step": 6331 }, { "epoch": 0.13952745321632595, "grad_norm": 0.7317680716514587, "learning_rate": 2.9066240004047394e-05, "loss": 0.124, "step": 6332 }, { "epoch": 0.13954948850584212, "grad_norm": 1.1032809019088745, "learning_rate": 2.9065868157630177e-05, "loss": 0.0876, "step": 6333 }, { "epoch": 0.13957152379535825, "grad_norm": 1.1747969388961792, "learning_rate": 2.906549623956795e-05, "loss": 0.1284, "step": 6334 }, { "epoch": 0.13959355908487442, "grad_norm": 0.9438398480415344, "learning_rate": 2.9065124249862606e-05, "loss": 0.1024, "step": 6335 }, { "epoch": 0.13961559437439058, "grad_norm": 1.3394109010696411, "learning_rate": 2.9064752188516044e-05, "loss": 0.1197, "step": 6336 }, { "epoch": 0.13963762966390675, "grad_norm": 0.9345175623893738, "learning_rate": 2.9064380055530156e-05, "loss": 0.1396, "step": 6337 }, { "epoch": 0.1396596649534229, "grad_norm": 0.9049353003501892, "learning_rate": 2.906400785090684e-05, "loss": 0.1059, "step": 6338 }, { "epoch": 0.13968170024293908, "grad_norm": 1.3915575742721558, "learning_rate": 2.9063635574647988e-05, "loss": 0.1157, "step": 6339 }, { "epoch": 0.13970373553245521, "grad_norm": 1.5479718446731567, "learning_rate": 2.90632632267555e-05, "loss": 0.1125, "step": 6340 }, { "epoch": 0.13972577082197138, "grad_norm": 1.4250603914260864, "learning_rate": 2.906289080723127e-05, "loss": 0.1242, "step": 6341 }, { "epoch": 0.13974780611148754, "grad_norm": 1.0135124921798706, "learning_rate": 2.9062518316077196e-05, "loss": 0.1255, "step": 6342 }, { "epoch": 0.1397698414010037, "grad_norm": 1.0298070907592773, "learning_rate": 2.9062145753295176e-05, "loss": 0.115, "step": 6343 }, { "epoch": 0.13979187669051987, "grad_norm": 1.1237623691558838, "learning_rate": 2.9061773118887108e-05, "loss": 0.1383, "step": 6344 }, { "epoch": 0.13981391198003604, "grad_norm": 1.3144487142562866, "learning_rate": 2.906140041285489e-05, "loss": 0.1295, "step": 6345 }, { "epoch": 0.13983594726955217, "grad_norm": 1.092220425605774, "learning_rate": 2.9061027635200414e-05, "loss": 0.1037, "step": 6346 }, { "epoch": 0.13985798255906834, "grad_norm": 1.0162566900253296, "learning_rate": 2.906065478592559e-05, "loss": 0.0915, "step": 6347 }, { "epoch": 0.1398800178485845, "grad_norm": 1.0759578943252563, "learning_rate": 2.9060281865032304e-05, "loss": 0.141, "step": 6348 }, { "epoch": 0.13990205313810067, "grad_norm": 0.9960611462593079, "learning_rate": 2.9059908872522474e-05, "loss": 0.1217, "step": 6349 }, { "epoch": 0.13992408842761683, "grad_norm": 1.0804420709609985, "learning_rate": 2.905953580839798e-05, "loss": 0.1269, "step": 6350 }, { "epoch": 0.139946123717133, "grad_norm": 0.9582923650741577, "learning_rate": 2.9059162672660733e-05, "loss": 0.1186, "step": 6351 }, { "epoch": 0.13996815900664916, "grad_norm": 1.3401130437850952, "learning_rate": 2.905878946531263e-05, "loss": 0.165, "step": 6352 }, { "epoch": 0.1399901942961653, "grad_norm": 0.9650579690933228, "learning_rate": 2.9058416186355576e-05, "loss": 0.1485, "step": 6353 }, { "epoch": 0.14001222958568146, "grad_norm": 1.1215013265609741, "learning_rate": 2.905804283579147e-05, "loss": 0.1803, "step": 6354 }, { "epoch": 0.14003426487519763, "grad_norm": 0.9083779454231262, "learning_rate": 2.9057669413622207e-05, "loss": 0.1387, "step": 6355 }, { "epoch": 0.1400563001647138, "grad_norm": 0.9585243463516235, "learning_rate": 2.9057295919849703e-05, "loss": 0.1315, "step": 6356 }, { "epoch": 0.14007833545422996, "grad_norm": 1.2234047651290894, "learning_rate": 2.9056922354475848e-05, "loss": 0.1501, "step": 6357 }, { "epoch": 0.14010037074374612, "grad_norm": 0.8863241076469421, "learning_rate": 2.9056548717502555e-05, "loss": 0.1373, "step": 6358 }, { "epoch": 0.14012240603326226, "grad_norm": 0.6596371531486511, "learning_rate": 2.9056175008931718e-05, "loss": 0.1024, "step": 6359 }, { "epoch": 0.14014444132277842, "grad_norm": 1.2093286514282227, "learning_rate": 2.905580122876524e-05, "loss": 0.1133, "step": 6360 }, { "epoch": 0.1401664766122946, "grad_norm": 0.8871234059333801, "learning_rate": 2.9055427377005038e-05, "loss": 0.1599, "step": 6361 }, { "epoch": 0.14018851190181075, "grad_norm": 1.846752643585205, "learning_rate": 2.9055053453653e-05, "loss": 0.1259, "step": 6362 }, { "epoch": 0.14021054719132692, "grad_norm": 1.504440426826477, "learning_rate": 2.905467945871104e-05, "loss": 0.1421, "step": 6363 }, { "epoch": 0.14023258248084308, "grad_norm": 0.9890714287757874, "learning_rate": 2.9054305392181063e-05, "loss": 0.1138, "step": 6364 }, { "epoch": 0.14025461777035922, "grad_norm": 1.106597661972046, "learning_rate": 2.9053931254064966e-05, "loss": 0.1357, "step": 6365 }, { "epoch": 0.14027665305987538, "grad_norm": 1.4251741170883179, "learning_rate": 2.9053557044364667e-05, "loss": 0.1306, "step": 6366 }, { "epoch": 0.14029868834939155, "grad_norm": 1.2723442316055298, "learning_rate": 2.9053182763082064e-05, "loss": 0.1426, "step": 6367 }, { "epoch": 0.1403207236389077, "grad_norm": 0.9477279186248779, "learning_rate": 2.9052808410219065e-05, "loss": 0.124, "step": 6368 }, { "epoch": 0.14034275892842388, "grad_norm": 0.9916552901268005, "learning_rate": 2.9052433985777578e-05, "loss": 0.127, "step": 6369 }, { "epoch": 0.14036479421794004, "grad_norm": 1.045823097229004, "learning_rate": 2.9052059489759506e-05, "loss": 0.1336, "step": 6370 }, { "epoch": 0.14038682950745618, "grad_norm": 0.9658746719360352, "learning_rate": 2.905168492216676e-05, "loss": 0.1248, "step": 6371 }, { "epoch": 0.14040886479697234, "grad_norm": 1.138306736946106, "learning_rate": 2.905131028300125e-05, "loss": 0.1381, "step": 6372 }, { "epoch": 0.1404309000864885, "grad_norm": 0.7472760677337646, "learning_rate": 2.9050935572264884e-05, "loss": 0.104, "step": 6373 }, { "epoch": 0.14045293537600467, "grad_norm": 0.9481459856033325, "learning_rate": 2.9050560789959566e-05, "loss": 0.1298, "step": 6374 }, { "epoch": 0.14047497066552084, "grad_norm": 1.1349287033081055, "learning_rate": 2.9050185936087207e-05, "loss": 0.143, "step": 6375 }, { "epoch": 0.140497005955037, "grad_norm": 1.3982244729995728, "learning_rate": 2.9049811010649716e-05, "loss": 0.1098, "step": 6376 }, { "epoch": 0.14051904124455314, "grad_norm": 1.1848124265670776, "learning_rate": 2.9049436013649006e-05, "loss": 0.0896, "step": 6377 }, { "epoch": 0.1405410765340693, "grad_norm": 0.7718729972839355, "learning_rate": 2.9049060945086982e-05, "loss": 0.1194, "step": 6378 }, { "epoch": 0.14056311182358547, "grad_norm": 1.0919601917266846, "learning_rate": 2.9048685804965555e-05, "loss": 0.1395, "step": 6379 }, { "epoch": 0.14058514711310163, "grad_norm": 1.4781845808029175, "learning_rate": 2.9048310593286642e-05, "loss": 0.1043, "step": 6380 }, { "epoch": 0.1406071824026178, "grad_norm": 1.5794306993484497, "learning_rate": 2.9047935310052147e-05, "loss": 0.1218, "step": 6381 }, { "epoch": 0.14062921769213396, "grad_norm": 1.2724111080169678, "learning_rate": 2.904755995526399e-05, "loss": 0.15, "step": 6382 }, { "epoch": 0.1406512529816501, "grad_norm": 1.3231353759765625, "learning_rate": 2.9047184528924074e-05, "loss": 0.1414, "step": 6383 }, { "epoch": 0.14067328827116626, "grad_norm": 1.490126609802246, "learning_rate": 2.904680903103431e-05, "loss": 0.1869, "step": 6384 }, { "epoch": 0.14069532356068243, "grad_norm": 1.0901157855987549, "learning_rate": 2.904643346159662e-05, "loss": 0.1373, "step": 6385 }, { "epoch": 0.1407173588501986, "grad_norm": 1.175527572631836, "learning_rate": 2.904605782061291e-05, "loss": 0.1307, "step": 6386 }, { "epoch": 0.14073939413971476, "grad_norm": 0.9801735877990723, "learning_rate": 2.9045682108085093e-05, "loss": 0.1761, "step": 6387 }, { "epoch": 0.14076142942923092, "grad_norm": 1.2233431339263916, "learning_rate": 2.904530632401509e-05, "loss": 0.1596, "step": 6388 }, { "epoch": 0.1407834647187471, "grad_norm": 0.9226171970367432, "learning_rate": 2.904493046840481e-05, "loss": 0.141, "step": 6389 }, { "epoch": 0.14080550000826322, "grad_norm": 1.2002156972885132, "learning_rate": 2.9044554541256165e-05, "loss": 0.1659, "step": 6390 }, { "epoch": 0.1408275352977794, "grad_norm": 1.0626745223999023, "learning_rate": 2.9044178542571073e-05, "loss": 0.123, "step": 6391 }, { "epoch": 0.14084957058729555, "grad_norm": 0.8118122816085815, "learning_rate": 2.9043802472351447e-05, "loss": 0.1175, "step": 6392 }, { "epoch": 0.14087160587681172, "grad_norm": 0.9463830590248108, "learning_rate": 2.904342633059921e-05, "loss": 0.1663, "step": 6393 }, { "epoch": 0.14089364116632788, "grad_norm": 0.8901235461235046, "learning_rate": 2.9043050117316265e-05, "loss": 0.0836, "step": 6394 }, { "epoch": 0.14091567645584405, "grad_norm": 1.3902217149734497, "learning_rate": 2.904267383250454e-05, "loss": 0.1769, "step": 6395 }, { "epoch": 0.14093771174536018, "grad_norm": 1.2393033504486084, "learning_rate": 2.9042297476165948e-05, "loss": 0.134, "step": 6396 }, { "epoch": 0.14095974703487635, "grad_norm": 1.7413761615753174, "learning_rate": 2.90419210483024e-05, "loss": 0.1693, "step": 6397 }, { "epoch": 0.1409817823243925, "grad_norm": 1.1399592161178589, "learning_rate": 2.904154454891582e-05, "loss": 0.0959, "step": 6398 }, { "epoch": 0.14100381761390868, "grad_norm": 1.128195881843567, "learning_rate": 2.9041167978008123e-05, "loss": 0.0896, "step": 6399 }, { "epoch": 0.14102585290342484, "grad_norm": 1.0567984580993652, "learning_rate": 2.904079133558123e-05, "loss": 0.1209, "step": 6400 }, { "epoch": 0.141047888192941, "grad_norm": 0.805738091468811, "learning_rate": 2.9040414621637055e-05, "loss": 0.1277, "step": 6401 }, { "epoch": 0.14106992348245714, "grad_norm": 1.180895447731018, "learning_rate": 2.9040037836177523e-05, "loss": 0.1198, "step": 6402 }, { "epoch": 0.1410919587719733, "grad_norm": 0.898313045501709, "learning_rate": 2.903966097920455e-05, "loss": 0.1289, "step": 6403 }, { "epoch": 0.14111399406148947, "grad_norm": 0.8381532430648804, "learning_rate": 2.9039284050720053e-05, "loss": 0.1144, "step": 6404 }, { "epoch": 0.14113602935100564, "grad_norm": 1.2099807262420654, "learning_rate": 2.903890705072595e-05, "loss": 0.1113, "step": 6405 }, { "epoch": 0.1411580646405218, "grad_norm": 0.9108765125274658, "learning_rate": 2.903852997922417e-05, "loss": 0.0915, "step": 6406 }, { "epoch": 0.14118009993003797, "grad_norm": 0.614673376083374, "learning_rate": 2.903815283621663e-05, "loss": 0.1324, "step": 6407 }, { "epoch": 0.1412021352195541, "grad_norm": 1.5363279581069946, "learning_rate": 2.9037775621705243e-05, "loss": 0.1342, "step": 6408 }, { "epoch": 0.14122417050907027, "grad_norm": 1.1635414361953735, "learning_rate": 2.9037398335691942e-05, "loss": 0.1209, "step": 6409 }, { "epoch": 0.14124620579858643, "grad_norm": 1.0316004753112793, "learning_rate": 2.903702097817864e-05, "loss": 0.118, "step": 6410 }, { "epoch": 0.1412682410881026, "grad_norm": 1.0984686613082886, "learning_rate": 2.903664354916727e-05, "loss": 0.1388, "step": 6411 }, { "epoch": 0.14129027637761876, "grad_norm": 1.1705867052078247, "learning_rate": 2.9036266048659744e-05, "loss": 0.1671, "step": 6412 }, { "epoch": 0.14131231166713493, "grad_norm": 0.9363653063774109, "learning_rate": 2.9035888476657987e-05, "loss": 0.1234, "step": 6413 }, { "epoch": 0.14133434695665106, "grad_norm": 0.8790977001190186, "learning_rate": 2.9035510833163922e-05, "loss": 0.1204, "step": 6414 }, { "epoch": 0.14135638224616723, "grad_norm": 0.9526787996292114, "learning_rate": 2.9035133118179476e-05, "loss": 0.1209, "step": 6415 }, { "epoch": 0.1413784175356834, "grad_norm": 1.3069308996200562, "learning_rate": 2.9034755331706568e-05, "loss": 0.1297, "step": 6416 }, { "epoch": 0.14140045282519956, "grad_norm": 1.1470941305160522, "learning_rate": 2.903437747374713e-05, "loss": 0.1247, "step": 6417 }, { "epoch": 0.14142248811471572, "grad_norm": 0.9770815968513489, "learning_rate": 2.903399954430308e-05, "loss": 0.1645, "step": 6418 }, { "epoch": 0.1414445234042319, "grad_norm": 1.367133617401123, "learning_rate": 2.9033621543376342e-05, "loss": 0.1173, "step": 6419 }, { "epoch": 0.14146655869374805, "grad_norm": 0.5913064479827881, "learning_rate": 2.9033243470968848e-05, "loss": 0.1154, "step": 6420 }, { "epoch": 0.1414885939832642, "grad_norm": 0.7652662396430969, "learning_rate": 2.9032865327082517e-05, "loss": 0.1029, "step": 6421 }, { "epoch": 0.14151062927278035, "grad_norm": 0.9048731923103333, "learning_rate": 2.9032487111719276e-05, "loss": 0.1207, "step": 6422 }, { "epoch": 0.14153266456229652, "grad_norm": 1.1163763999938965, "learning_rate": 2.903210882488106e-05, "loss": 0.1414, "step": 6423 }, { "epoch": 0.14155469985181268, "grad_norm": 1.1673980951309204, "learning_rate": 2.9031730466569783e-05, "loss": 0.1583, "step": 6424 }, { "epoch": 0.14157673514132885, "grad_norm": 0.9549402594566345, "learning_rate": 2.9031352036787383e-05, "loss": 0.1289, "step": 6425 }, { "epoch": 0.141598770430845, "grad_norm": 1.2505557537078857, "learning_rate": 2.9030973535535778e-05, "loss": 0.131, "step": 6426 }, { "epoch": 0.14162080572036115, "grad_norm": 0.9436218738555908, "learning_rate": 2.90305949628169e-05, "loss": 0.1002, "step": 6427 }, { "epoch": 0.1416428410098773, "grad_norm": 1.1067688465118408, "learning_rate": 2.9030216318632686e-05, "loss": 0.124, "step": 6428 }, { "epoch": 0.14166487629939348, "grad_norm": 1.6291248798370361, "learning_rate": 2.9029837602985048e-05, "loss": 0.1081, "step": 6429 }, { "epoch": 0.14168691158890964, "grad_norm": 0.8714329600334167, "learning_rate": 2.9029458815875927e-05, "loss": 0.1272, "step": 6430 }, { "epoch": 0.1417089468784258, "grad_norm": 1.1872221231460571, "learning_rate": 2.902907995730725e-05, "loss": 0.1582, "step": 6431 }, { "epoch": 0.14173098216794197, "grad_norm": 1.5959641933441162, "learning_rate": 2.9028701027280947e-05, "loss": 0.1098, "step": 6432 }, { "epoch": 0.1417530174574581, "grad_norm": 1.0231082439422607, "learning_rate": 2.9028322025798944e-05, "loss": 0.0987, "step": 6433 }, { "epoch": 0.14177505274697427, "grad_norm": 1.1878718137741089, "learning_rate": 2.9027942952863177e-05, "loss": 0.1296, "step": 6434 }, { "epoch": 0.14179708803649044, "grad_norm": 1.2339870929718018, "learning_rate": 2.9027563808475572e-05, "loss": 0.1402, "step": 6435 }, { "epoch": 0.1418191233260066, "grad_norm": 1.2672390937805176, "learning_rate": 2.9027184592638062e-05, "loss": 0.1341, "step": 6436 }, { "epoch": 0.14184115861552277, "grad_norm": 1.1032490730285645, "learning_rate": 2.902680530535258e-05, "loss": 0.1089, "step": 6437 }, { "epoch": 0.14186319390503893, "grad_norm": 1.470568060874939, "learning_rate": 2.9026425946621056e-05, "loss": 0.1094, "step": 6438 }, { "epoch": 0.14188522919455507, "grad_norm": 1.1718679666519165, "learning_rate": 2.9026046516445423e-05, "loss": 0.1034, "step": 6439 }, { "epoch": 0.14190726448407123, "grad_norm": 1.3506981134414673, "learning_rate": 2.902566701482762e-05, "loss": 0.1234, "step": 6440 }, { "epoch": 0.1419292997735874, "grad_norm": 3.2984797954559326, "learning_rate": 2.9025287441769565e-05, "loss": 0.1178, "step": 6441 }, { "epoch": 0.14195133506310356, "grad_norm": 2.0422496795654297, "learning_rate": 2.9024907797273204e-05, "loss": 0.1289, "step": 6442 }, { "epoch": 0.14197337035261973, "grad_norm": 1.819911241531372, "learning_rate": 2.9024528081340465e-05, "loss": 0.1823, "step": 6443 }, { "epoch": 0.1419954056421359, "grad_norm": 0.9562391042709351, "learning_rate": 2.9024148293973288e-05, "loss": 0.1282, "step": 6444 }, { "epoch": 0.14201744093165203, "grad_norm": 1.9023982286453247, "learning_rate": 2.9023768435173603e-05, "loss": 0.1195, "step": 6445 }, { "epoch": 0.1420394762211682, "grad_norm": 0.849908173084259, "learning_rate": 2.9023388504943338e-05, "loss": 0.1034, "step": 6446 }, { "epoch": 0.14206151151068436, "grad_norm": 1.025579810142517, "learning_rate": 2.9023008503284438e-05, "loss": 0.0886, "step": 6447 }, { "epoch": 0.14208354680020052, "grad_norm": 1.0707751512527466, "learning_rate": 2.902262843019884e-05, "loss": 0.1028, "step": 6448 }, { "epoch": 0.1421055820897167, "grad_norm": 0.8921049237251282, "learning_rate": 2.9022248285688475e-05, "loss": 0.1036, "step": 6449 }, { "epoch": 0.14212761737923285, "grad_norm": 1.27056086063385, "learning_rate": 2.902186806975528e-05, "loss": 0.1616, "step": 6450 }, { "epoch": 0.142149652668749, "grad_norm": 1.438307285308838, "learning_rate": 2.902148778240119e-05, "loss": 0.1466, "step": 6451 }, { "epoch": 0.14217168795826515, "grad_norm": 1.283165693283081, "learning_rate": 2.9021107423628145e-05, "loss": 0.1068, "step": 6452 }, { "epoch": 0.14219372324778132, "grad_norm": 1.3851512670516968, "learning_rate": 2.9020726993438082e-05, "loss": 0.1127, "step": 6453 }, { "epoch": 0.14221575853729748, "grad_norm": 1.3144170045852661, "learning_rate": 2.9020346491832933e-05, "loss": 0.1372, "step": 6454 }, { "epoch": 0.14223779382681365, "grad_norm": 1.0193856954574585, "learning_rate": 2.9019965918814647e-05, "loss": 0.1434, "step": 6455 }, { "epoch": 0.1422598291163298, "grad_norm": 1.7367886304855347, "learning_rate": 2.9019585274385154e-05, "loss": 0.1308, "step": 6456 }, { "epoch": 0.14228186440584598, "grad_norm": 0.9515307545661926, "learning_rate": 2.9019204558546396e-05, "loss": 0.1188, "step": 6457 }, { "epoch": 0.1423038996953621, "grad_norm": 0.9369624853134155, "learning_rate": 2.9018823771300308e-05, "loss": 0.1506, "step": 6458 }, { "epoch": 0.14232593498487828, "grad_norm": 0.8485192656517029, "learning_rate": 2.901844291264884e-05, "loss": 0.1006, "step": 6459 }, { "epoch": 0.14234797027439444, "grad_norm": 1.1355599164962769, "learning_rate": 2.901806198259392e-05, "loss": 0.1115, "step": 6460 }, { "epoch": 0.1423700055639106, "grad_norm": 0.7289108037948608, "learning_rate": 2.9017680981137494e-05, "loss": 0.0829, "step": 6461 }, { "epoch": 0.14239204085342677, "grad_norm": 1.2380262613296509, "learning_rate": 2.9017299908281505e-05, "loss": 0.1308, "step": 6462 }, { "epoch": 0.14241407614294294, "grad_norm": 1.4732621908187866, "learning_rate": 2.9016918764027885e-05, "loss": 0.0788, "step": 6463 }, { "epoch": 0.14243611143245907, "grad_norm": 0.5741399526596069, "learning_rate": 2.9016537548378587e-05, "loss": 0.1176, "step": 6464 }, { "epoch": 0.14245814672197524, "grad_norm": 0.9254029989242554, "learning_rate": 2.9016156261335544e-05, "loss": 0.1268, "step": 6465 }, { "epoch": 0.1424801820114914, "grad_norm": 1.0583797693252563, "learning_rate": 2.9015774902900703e-05, "loss": 0.1283, "step": 6466 }, { "epoch": 0.14250221730100757, "grad_norm": 0.8623379468917847, "learning_rate": 2.9015393473076e-05, "loss": 0.1011, "step": 6467 }, { "epoch": 0.14252425259052373, "grad_norm": 0.8586390614509583, "learning_rate": 2.9015011971863387e-05, "loss": 0.093, "step": 6468 }, { "epoch": 0.1425462878800399, "grad_norm": 0.9283366203308105, "learning_rate": 2.90146303992648e-05, "loss": 0.1327, "step": 6469 }, { "epoch": 0.14256832316955603, "grad_norm": 1.2666863203048706, "learning_rate": 2.9014248755282188e-05, "loss": 0.0857, "step": 6470 }, { "epoch": 0.1425903584590722, "grad_norm": 1.280099868774414, "learning_rate": 2.9013867039917488e-05, "loss": 0.1452, "step": 6471 }, { "epoch": 0.14261239374858836, "grad_norm": 3.107599973678589, "learning_rate": 2.9013485253172654e-05, "loss": 0.0867, "step": 6472 }, { "epoch": 0.14263442903810453, "grad_norm": 0.9270930886268616, "learning_rate": 2.9013103395049617e-05, "loss": 0.0947, "step": 6473 }, { "epoch": 0.1426564643276207, "grad_norm": 1.9229308366775513, "learning_rate": 2.901272146555034e-05, "loss": 0.1374, "step": 6474 }, { "epoch": 0.14267849961713686, "grad_norm": 0.9742111563682556, "learning_rate": 2.9012339464676752e-05, "loss": 0.1235, "step": 6475 }, { "epoch": 0.142700534906653, "grad_norm": 1.180190920829773, "learning_rate": 2.9011957392430803e-05, "loss": 0.1319, "step": 6476 }, { "epoch": 0.14272257019616916, "grad_norm": 0.9555949568748474, "learning_rate": 2.9011575248814444e-05, "loss": 0.1085, "step": 6477 }, { "epoch": 0.14274460548568532, "grad_norm": 1.05086088180542, "learning_rate": 2.901119303382962e-05, "loss": 0.1317, "step": 6478 }, { "epoch": 0.1427666407752015, "grad_norm": 1.0644174814224243, "learning_rate": 2.9010810747478276e-05, "loss": 0.1978, "step": 6479 }, { "epoch": 0.14278867606471765, "grad_norm": 1.201434850692749, "learning_rate": 2.901042838976236e-05, "loss": 0.1494, "step": 6480 }, { "epoch": 0.14281071135423382, "grad_norm": 0.8258358836174011, "learning_rate": 2.9010045960683815e-05, "loss": 0.1106, "step": 6481 }, { "epoch": 0.14283274664374995, "grad_norm": 1.2791218757629395, "learning_rate": 2.90096634602446e-05, "loss": 0.1289, "step": 6482 }, { "epoch": 0.14285478193326612, "grad_norm": 1.0173150300979614, "learning_rate": 2.9009280888446654e-05, "loss": 0.1015, "step": 6483 }, { "epoch": 0.14287681722278228, "grad_norm": 0.7254548072814941, "learning_rate": 2.9008898245291923e-05, "loss": 0.1232, "step": 6484 }, { "epoch": 0.14289885251229845, "grad_norm": 0.8194951415061951, "learning_rate": 2.9008515530782367e-05, "loss": 0.1434, "step": 6485 }, { "epoch": 0.1429208878018146, "grad_norm": 1.0187236070632935, "learning_rate": 2.9008132744919926e-05, "loss": 0.1336, "step": 6486 }, { "epoch": 0.14294292309133078, "grad_norm": 0.7181689739227295, "learning_rate": 2.9007749887706555e-05, "loss": 0.0843, "step": 6487 }, { "epoch": 0.1429649583808469, "grad_norm": 1.2857469320297241, "learning_rate": 2.9007366959144202e-05, "loss": 0.1001, "step": 6488 }, { "epoch": 0.14298699367036308, "grad_norm": 0.9833446145057678, "learning_rate": 2.9006983959234813e-05, "loss": 0.1078, "step": 6489 }, { "epoch": 0.14300902895987924, "grad_norm": 1.5804450511932373, "learning_rate": 2.900660088798035e-05, "loss": 0.1178, "step": 6490 }, { "epoch": 0.1430310642493954, "grad_norm": 0.5509797930717468, "learning_rate": 2.900621774538276e-05, "loss": 0.1318, "step": 6491 }, { "epoch": 0.14305309953891157, "grad_norm": 1.2202428579330444, "learning_rate": 2.9005834531443988e-05, "loss": 0.1026, "step": 6492 }, { "epoch": 0.14307513482842774, "grad_norm": 1.0325944423675537, "learning_rate": 2.900545124616599e-05, "loss": 0.0894, "step": 6493 }, { "epoch": 0.1430971701179439, "grad_norm": 0.9533949494361877, "learning_rate": 2.900506788955072e-05, "loss": 0.112, "step": 6494 }, { "epoch": 0.14311920540746004, "grad_norm": 1.0757789611816406, "learning_rate": 2.9004684461600125e-05, "loss": 0.1043, "step": 6495 }, { "epoch": 0.1431412406969762, "grad_norm": 1.2113395929336548, "learning_rate": 2.9004300962316167e-05, "loss": 0.1696, "step": 6496 }, { "epoch": 0.14316327598649237, "grad_norm": 1.2411599159240723, "learning_rate": 2.900391739170079e-05, "loss": 0.1298, "step": 6497 }, { "epoch": 0.14318531127600853, "grad_norm": 1.9590473175048828, "learning_rate": 2.9003533749755958e-05, "loss": 0.1416, "step": 6498 }, { "epoch": 0.1432073465655247, "grad_norm": 1.1313190460205078, "learning_rate": 2.9003150036483613e-05, "loss": 0.1196, "step": 6499 }, { "epoch": 0.14322938185504086, "grad_norm": 1.2850079536437988, "learning_rate": 2.9002766251885718e-05, "loss": 0.1535, "step": 6500 }, { "epoch": 0.143251417144557, "grad_norm": 1.2456504106521606, "learning_rate": 2.9002382395964226e-05, "loss": 0.1738, "step": 6501 }, { "epoch": 0.14327345243407316, "grad_norm": 1.141010046005249, "learning_rate": 2.9001998468721093e-05, "loss": 0.1627, "step": 6502 }, { "epoch": 0.14329548772358933, "grad_norm": 1.3859083652496338, "learning_rate": 2.9001614470158274e-05, "loss": 0.1579, "step": 6503 }, { "epoch": 0.1433175230131055, "grad_norm": 1.0627765655517578, "learning_rate": 2.900123040027772e-05, "loss": 0.1182, "step": 6504 }, { "epoch": 0.14333955830262166, "grad_norm": 0.9822757244110107, "learning_rate": 2.9000846259081396e-05, "loss": 0.1564, "step": 6505 }, { "epoch": 0.14336159359213782, "grad_norm": 1.2357430458068848, "learning_rate": 2.9000462046571254e-05, "loss": 0.1227, "step": 6506 }, { "epoch": 0.14338362888165396, "grad_norm": 0.772235631942749, "learning_rate": 2.900007776274925e-05, "loss": 0.1307, "step": 6507 }, { "epoch": 0.14340566417117012, "grad_norm": 0.6680556535720825, "learning_rate": 2.8999693407617344e-05, "loss": 0.1486, "step": 6508 }, { "epoch": 0.1434276994606863, "grad_norm": 0.9783845543861389, "learning_rate": 2.8999308981177488e-05, "loss": 0.1185, "step": 6509 }, { "epoch": 0.14344973475020245, "grad_norm": 0.6709009408950806, "learning_rate": 2.8998924483431647e-05, "loss": 0.0993, "step": 6510 }, { "epoch": 0.14347177003971862, "grad_norm": 0.8657524585723877, "learning_rate": 2.8998539914381774e-05, "loss": 0.1348, "step": 6511 }, { "epoch": 0.14349380532923478, "grad_norm": 1.1916227340698242, "learning_rate": 2.8998155274029836e-05, "loss": 0.152, "step": 6512 }, { "epoch": 0.14351584061875092, "grad_norm": 0.7623853087425232, "learning_rate": 2.8997770562377784e-05, "loss": 0.1111, "step": 6513 }, { "epoch": 0.14353787590826708, "grad_norm": 1.4031747579574585, "learning_rate": 2.8997385779427582e-05, "loss": 0.0903, "step": 6514 }, { "epoch": 0.14355991119778325, "grad_norm": 0.8548832535743713, "learning_rate": 2.8997000925181183e-05, "loss": 0.1079, "step": 6515 }, { "epoch": 0.1435819464872994, "grad_norm": 1.027208924293518, "learning_rate": 2.8996615999640556e-05, "loss": 0.1147, "step": 6516 }, { "epoch": 0.14360398177681558, "grad_norm": 1.3050544261932373, "learning_rate": 2.899623100280766e-05, "loss": 0.1168, "step": 6517 }, { "epoch": 0.14362601706633174, "grad_norm": 1.2868521213531494, "learning_rate": 2.8995845934684453e-05, "loss": 0.1152, "step": 6518 }, { "epoch": 0.14364805235584788, "grad_norm": 0.7591838240623474, "learning_rate": 2.8995460795272896e-05, "loss": 0.1055, "step": 6519 }, { "epoch": 0.14367008764536404, "grad_norm": 1.1799018383026123, "learning_rate": 2.899507558457495e-05, "loss": 0.1302, "step": 6520 }, { "epoch": 0.1436921229348802, "grad_norm": 1.1423931121826172, "learning_rate": 2.8994690302592584e-05, "loss": 0.129, "step": 6521 }, { "epoch": 0.14371415822439637, "grad_norm": 0.9684941172599792, "learning_rate": 2.8994304949327753e-05, "loss": 0.1394, "step": 6522 }, { "epoch": 0.14373619351391254, "grad_norm": 1.3396090269088745, "learning_rate": 2.8993919524782426e-05, "loss": 0.1209, "step": 6523 }, { "epoch": 0.1437582288034287, "grad_norm": 1.0672577619552612, "learning_rate": 2.8993534028958558e-05, "loss": 0.113, "step": 6524 }, { "epoch": 0.14378026409294486, "grad_norm": 1.406863808631897, "learning_rate": 2.8993148461858118e-05, "loss": 0.1226, "step": 6525 }, { "epoch": 0.143802299382461, "grad_norm": 1.535662293434143, "learning_rate": 2.899276282348307e-05, "loss": 0.1546, "step": 6526 }, { "epoch": 0.14382433467197717, "grad_norm": 1.3513931035995483, "learning_rate": 2.8992377113835384e-05, "loss": 0.1344, "step": 6527 }, { "epoch": 0.14384636996149333, "grad_norm": 0.9388654828071594, "learning_rate": 2.899199133291701e-05, "loss": 0.1093, "step": 6528 }, { "epoch": 0.1438684052510095, "grad_norm": 0.696444571018219, "learning_rate": 2.8991605480729926e-05, "loss": 0.1427, "step": 6529 }, { "epoch": 0.14389044054052566, "grad_norm": 1.3646445274353027, "learning_rate": 2.8991219557276085e-05, "loss": 0.0982, "step": 6530 }, { "epoch": 0.14391247583004182, "grad_norm": 0.8578577041625977, "learning_rate": 2.8990833562557468e-05, "loss": 0.1291, "step": 6531 }, { "epoch": 0.14393451111955796, "grad_norm": 1.9113649129867554, "learning_rate": 2.8990447496576028e-05, "loss": 0.1126, "step": 6532 }, { "epoch": 0.14395654640907413, "grad_norm": 1.0934315919876099, "learning_rate": 2.899006135933374e-05, "loss": 0.1633, "step": 6533 }, { "epoch": 0.1439785816985903, "grad_norm": 0.9745647311210632, "learning_rate": 2.8989675150832566e-05, "loss": 0.1293, "step": 6534 }, { "epoch": 0.14400061698810646, "grad_norm": 0.898809015750885, "learning_rate": 2.8989288871074474e-05, "loss": 0.1378, "step": 6535 }, { "epoch": 0.14402265227762262, "grad_norm": 1.767519474029541, "learning_rate": 2.898890252006143e-05, "loss": 0.0732, "step": 6536 }, { "epoch": 0.14404468756713878, "grad_norm": 1.0883561372756958, "learning_rate": 2.8988516097795408e-05, "loss": 0.082, "step": 6537 }, { "epoch": 0.14406672285665492, "grad_norm": 0.6814450025558472, "learning_rate": 2.898812960427837e-05, "loss": 0.0807, "step": 6538 }, { "epoch": 0.1440887581461711, "grad_norm": 0.9469237923622131, "learning_rate": 2.8987743039512293e-05, "loss": 0.1482, "step": 6539 }, { "epoch": 0.14411079343568725, "grad_norm": 0.6957045793533325, "learning_rate": 2.8987356403499133e-05, "loss": 0.0997, "step": 6540 }, { "epoch": 0.14413282872520342, "grad_norm": 1.2658759355545044, "learning_rate": 2.8986969696240866e-05, "loss": 0.0947, "step": 6541 }, { "epoch": 0.14415486401471958, "grad_norm": 0.852369487285614, "learning_rate": 2.8986582917739465e-05, "loss": 0.0909, "step": 6542 }, { "epoch": 0.14417689930423574, "grad_norm": 0.852199137210846, "learning_rate": 2.8986196067996897e-05, "loss": 0.142, "step": 6543 }, { "epoch": 0.14419893459375188, "grad_norm": 1.0789337158203125, "learning_rate": 2.898580914701513e-05, "loss": 0.1401, "step": 6544 }, { "epoch": 0.14422096988326805, "grad_norm": 1.071327805519104, "learning_rate": 2.898542215479614e-05, "loss": 0.1348, "step": 6545 }, { "epoch": 0.1442430051727842, "grad_norm": 1.6262236833572388, "learning_rate": 2.8985035091341892e-05, "loss": 0.0905, "step": 6546 }, { "epoch": 0.14426504046230038, "grad_norm": 1.011544942855835, "learning_rate": 2.8984647956654364e-05, "loss": 0.154, "step": 6547 }, { "epoch": 0.14428707575181654, "grad_norm": 0.7955158352851868, "learning_rate": 2.8984260750735523e-05, "loss": 0.0921, "step": 6548 }, { "epoch": 0.1443091110413327, "grad_norm": 1.0020557641983032, "learning_rate": 2.8983873473587347e-05, "loss": 0.1253, "step": 6549 }, { "epoch": 0.14433114633084884, "grad_norm": 1.0442174673080444, "learning_rate": 2.8983486125211798e-05, "loss": 0.1653, "step": 6550 }, { "epoch": 0.144353181620365, "grad_norm": 1.2512810230255127, "learning_rate": 2.8983098705610858e-05, "loss": 0.1336, "step": 6551 }, { "epoch": 0.14437521690988117, "grad_norm": 1.1216065883636475, "learning_rate": 2.89827112147865e-05, "loss": 0.1215, "step": 6552 }, { "epoch": 0.14439725219939734, "grad_norm": 1.1417324542999268, "learning_rate": 2.8982323652740694e-05, "loss": 0.126, "step": 6553 }, { "epoch": 0.1444192874889135, "grad_norm": 1.198793888092041, "learning_rate": 2.8981936019475418e-05, "loss": 0.1229, "step": 6554 }, { "epoch": 0.14444132277842966, "grad_norm": 2.2768537998199463, "learning_rate": 2.8981548314992642e-05, "loss": 0.1294, "step": 6555 }, { "epoch": 0.1444633580679458, "grad_norm": 2.2822165489196777, "learning_rate": 2.8981160539294345e-05, "loss": 0.1524, "step": 6556 }, { "epoch": 0.14448539335746197, "grad_norm": 1.4204177856445312, "learning_rate": 2.8980772692382497e-05, "loss": 0.1466, "step": 6557 }, { "epoch": 0.14450742864697813, "grad_norm": 1.0504164695739746, "learning_rate": 2.898038477425908e-05, "loss": 0.1349, "step": 6558 }, { "epoch": 0.1445294639364943, "grad_norm": 1.3384310007095337, "learning_rate": 2.897999678492606e-05, "loss": 0.1325, "step": 6559 }, { "epoch": 0.14455149922601046, "grad_norm": 0.89284747838974, "learning_rate": 2.8979608724385424e-05, "loss": 0.1072, "step": 6560 }, { "epoch": 0.14457353451552662, "grad_norm": 1.1862603425979614, "learning_rate": 2.8979220592639148e-05, "loss": 0.1743, "step": 6561 }, { "epoch": 0.1445955698050428, "grad_norm": 1.00348699092865, "learning_rate": 2.8978832389689202e-05, "loss": 0.136, "step": 6562 }, { "epoch": 0.14461760509455893, "grad_norm": 1.31015145778656, "learning_rate": 2.8978444115537565e-05, "loss": 0.1701, "step": 6563 }, { "epoch": 0.1446396403840751, "grad_norm": 1.418237328529358, "learning_rate": 2.8978055770186215e-05, "loss": 0.1714, "step": 6564 }, { "epoch": 0.14466167567359126, "grad_norm": 1.1587004661560059, "learning_rate": 2.8977667353637138e-05, "loss": 0.183, "step": 6565 }, { "epoch": 0.14468371096310742, "grad_norm": 1.396111011505127, "learning_rate": 2.89772788658923e-05, "loss": 0.0961, "step": 6566 }, { "epoch": 0.14470574625262358, "grad_norm": 1.26401948928833, "learning_rate": 2.8976890306953688e-05, "loss": 0.1427, "step": 6567 }, { "epoch": 0.14472778154213975, "grad_norm": 1.1678494215011597, "learning_rate": 2.8976501676823278e-05, "loss": 0.0885, "step": 6568 }, { "epoch": 0.1447498168316559, "grad_norm": 0.7995563745498657, "learning_rate": 2.897611297550305e-05, "loss": 0.151, "step": 6569 }, { "epoch": 0.14477185212117205, "grad_norm": 1.192351222038269, "learning_rate": 2.8975724202994986e-05, "loss": 0.1117, "step": 6570 }, { "epoch": 0.14479388741068822, "grad_norm": 1.128954291343689, "learning_rate": 2.8975335359301062e-05, "loss": 0.1355, "step": 6571 }, { "epoch": 0.14481592270020438, "grad_norm": 0.8245959877967834, "learning_rate": 2.897494644442326e-05, "loss": 0.113, "step": 6572 }, { "epoch": 0.14483795798972054, "grad_norm": 0.9090444445610046, "learning_rate": 2.8974557458363563e-05, "loss": 0.1108, "step": 6573 }, { "epoch": 0.1448599932792367, "grad_norm": 1.1889325380325317, "learning_rate": 2.8974168401123952e-05, "loss": 0.163, "step": 6574 }, { "epoch": 0.14488202856875285, "grad_norm": 0.996295690536499, "learning_rate": 2.8973779272706404e-05, "loss": 0.1506, "step": 6575 }, { "epoch": 0.144904063858269, "grad_norm": 0.650809645652771, "learning_rate": 2.897339007311291e-05, "loss": 0.121, "step": 6576 }, { "epoch": 0.14492609914778518, "grad_norm": 0.9534240961074829, "learning_rate": 2.8973000802345442e-05, "loss": 0.1141, "step": 6577 }, { "epoch": 0.14494813443730134, "grad_norm": 0.8077347278594971, "learning_rate": 2.8972611460405996e-05, "loss": 0.122, "step": 6578 }, { "epoch": 0.1449701697268175, "grad_norm": 1.0452001094818115, "learning_rate": 2.897222204729654e-05, "loss": 0.1498, "step": 6579 }, { "epoch": 0.14499220501633367, "grad_norm": 0.9038552045822144, "learning_rate": 2.8971832563019067e-05, "loss": 0.0911, "step": 6580 }, { "epoch": 0.1450142403058498, "grad_norm": 0.7466464638710022, "learning_rate": 2.8971443007575557e-05, "loss": 0.1117, "step": 6581 }, { "epoch": 0.14503627559536597, "grad_norm": 0.9079005718231201, "learning_rate": 2.8971053380968e-05, "loss": 0.0962, "step": 6582 }, { "epoch": 0.14505831088488214, "grad_norm": 0.69721919298172, "learning_rate": 2.8970663683198373e-05, "loss": 0.1387, "step": 6583 }, { "epoch": 0.1450803461743983, "grad_norm": 1.398725986480713, "learning_rate": 2.897027391426866e-05, "loss": 0.1157, "step": 6584 }, { "epoch": 0.14510238146391446, "grad_norm": 0.9601684808731079, "learning_rate": 2.8969884074180857e-05, "loss": 0.085, "step": 6585 }, { "epoch": 0.14512441675343063, "grad_norm": 1.0439724922180176, "learning_rate": 2.8969494162936943e-05, "loss": 0.1358, "step": 6586 }, { "epoch": 0.14514645204294677, "grad_norm": 0.8845354914665222, "learning_rate": 2.89691041805389e-05, "loss": 0.1039, "step": 6587 }, { "epoch": 0.14516848733246293, "grad_norm": 1.165938138961792, "learning_rate": 2.8968714126988722e-05, "loss": 0.1481, "step": 6588 }, { "epoch": 0.1451905226219791, "grad_norm": 1.1342804431915283, "learning_rate": 2.896832400228839e-05, "loss": 0.1236, "step": 6589 }, { "epoch": 0.14521255791149526, "grad_norm": 1.2821847200393677, "learning_rate": 2.8967933806439894e-05, "loss": 0.1773, "step": 6590 }, { "epoch": 0.14523459320101142, "grad_norm": 1.1966397762298584, "learning_rate": 2.896754353944522e-05, "loss": 0.1536, "step": 6591 }, { "epoch": 0.1452566284905276, "grad_norm": 1.1805925369262695, "learning_rate": 2.8967153201306363e-05, "loss": 0.1607, "step": 6592 }, { "epoch": 0.14527866378004373, "grad_norm": 1.1930028200149536, "learning_rate": 2.8966762792025297e-05, "loss": 0.1075, "step": 6593 }, { "epoch": 0.1453006990695599, "grad_norm": 1.4530335664749146, "learning_rate": 2.8966372311604022e-05, "loss": 0.1312, "step": 6594 }, { "epoch": 0.14532273435907606, "grad_norm": 1.123054027557373, "learning_rate": 2.8965981760044522e-05, "loss": 0.1065, "step": 6595 }, { "epoch": 0.14534476964859222, "grad_norm": 1.5243207216262817, "learning_rate": 2.8965591137348793e-05, "loss": 0.1302, "step": 6596 }, { "epoch": 0.14536680493810838, "grad_norm": 0.824539065361023, "learning_rate": 2.8965200443518813e-05, "loss": 0.0805, "step": 6597 }, { "epoch": 0.14538884022762455, "grad_norm": 1.1448144912719727, "learning_rate": 2.8964809678556582e-05, "loss": 0.1666, "step": 6598 }, { "epoch": 0.14541087551714071, "grad_norm": 1.1561944484710693, "learning_rate": 2.8964418842464088e-05, "loss": 0.107, "step": 6599 }, { "epoch": 0.14543291080665685, "grad_norm": 1.2582151889801025, "learning_rate": 2.8964027935243314e-05, "loss": 0.1182, "step": 6600 }, { "epoch": 0.14545494609617302, "grad_norm": 1.320759654045105, "learning_rate": 2.896363695689626e-05, "loss": 0.1015, "step": 6601 }, { "epoch": 0.14547698138568918, "grad_norm": 1.2683069705963135, "learning_rate": 2.8963245907424916e-05, "loss": 0.1389, "step": 6602 }, { "epoch": 0.14549901667520534, "grad_norm": 1.0657895803451538, "learning_rate": 2.8962854786831278e-05, "loss": 0.1345, "step": 6603 }, { "epoch": 0.1455210519647215, "grad_norm": 0.9794647097587585, "learning_rate": 2.896246359511733e-05, "loss": 0.1349, "step": 6604 }, { "epoch": 0.14554308725423767, "grad_norm": 1.129123568534851, "learning_rate": 2.8962072332285064e-05, "loss": 0.1031, "step": 6605 }, { "epoch": 0.1455651225437538, "grad_norm": 0.8674828410148621, "learning_rate": 2.8961680998336477e-05, "loss": 0.1002, "step": 6606 }, { "epoch": 0.14558715783326998, "grad_norm": 1.0381083488464355, "learning_rate": 2.8961289593273565e-05, "loss": 0.1616, "step": 6607 }, { "epoch": 0.14560919312278614, "grad_norm": 1.2698469161987305, "learning_rate": 2.896089811709832e-05, "loss": 0.1583, "step": 6608 }, { "epoch": 0.1456312284123023, "grad_norm": 1.1614038944244385, "learning_rate": 2.8960506569812733e-05, "loss": 0.1705, "step": 6609 }, { "epoch": 0.14565326370181847, "grad_norm": 0.9886482357978821, "learning_rate": 2.8960114951418796e-05, "loss": 0.1289, "step": 6610 }, { "epoch": 0.14567529899133463, "grad_norm": 0.8656263947486877, "learning_rate": 2.895972326191851e-05, "loss": 0.088, "step": 6611 }, { "epoch": 0.14569733428085077, "grad_norm": 1.3470755815505981, "learning_rate": 2.895933150131387e-05, "loss": 0.1169, "step": 6612 }, { "epoch": 0.14571936957036694, "grad_norm": 0.9844469428062439, "learning_rate": 2.8958939669606865e-05, "loss": 0.1356, "step": 6613 }, { "epoch": 0.1457414048598831, "grad_norm": 1.1735835075378418, "learning_rate": 2.8958547766799495e-05, "loss": 0.0922, "step": 6614 }, { "epoch": 0.14576344014939926, "grad_norm": 0.7623171806335449, "learning_rate": 2.895815579289376e-05, "loss": 0.1035, "step": 6615 }, { "epoch": 0.14578547543891543, "grad_norm": 0.9101952314376831, "learning_rate": 2.895776374789165e-05, "loss": 0.1409, "step": 6616 }, { "epoch": 0.1458075107284316, "grad_norm": 1.0545815229415894, "learning_rate": 2.8957371631795162e-05, "loss": 0.1274, "step": 6617 }, { "epoch": 0.14582954601794773, "grad_norm": 1.18839430809021, "learning_rate": 2.8956979444606303e-05, "loss": 0.1513, "step": 6618 }, { "epoch": 0.1458515813074639, "grad_norm": 0.933493971824646, "learning_rate": 2.8956587186327056e-05, "loss": 0.1183, "step": 6619 }, { "epoch": 0.14587361659698006, "grad_norm": 0.9817913174629211, "learning_rate": 2.895619485695943e-05, "loss": 0.1379, "step": 6620 }, { "epoch": 0.14589565188649622, "grad_norm": 0.9363730549812317, "learning_rate": 2.895580245650542e-05, "loss": 0.1053, "step": 6621 }, { "epoch": 0.1459176871760124, "grad_norm": 0.8019535541534424, "learning_rate": 2.8955409984967023e-05, "loss": 0.1394, "step": 6622 }, { "epoch": 0.14593972246552855, "grad_norm": 1.0039087533950806, "learning_rate": 2.8955017442346236e-05, "loss": 0.106, "step": 6623 }, { "epoch": 0.1459617577550447, "grad_norm": 0.8627941608428955, "learning_rate": 2.8954624828645066e-05, "loss": 0.1203, "step": 6624 }, { "epoch": 0.14598379304456086, "grad_norm": 1.153628945350647, "learning_rate": 2.895423214386551e-05, "loss": 0.1239, "step": 6625 }, { "epoch": 0.14600582833407702, "grad_norm": 1.2388585805892944, "learning_rate": 2.8953839388009565e-05, "loss": 0.1227, "step": 6626 }, { "epoch": 0.14602786362359318, "grad_norm": 0.6647824645042419, "learning_rate": 2.8953446561079234e-05, "loss": 0.1225, "step": 6627 }, { "epoch": 0.14604989891310935, "grad_norm": 0.8586386442184448, "learning_rate": 2.8953053663076518e-05, "loss": 0.1354, "step": 6628 }, { "epoch": 0.14607193420262551, "grad_norm": 0.8824890851974487, "learning_rate": 2.8952660694003417e-05, "loss": 0.1055, "step": 6629 }, { "epoch": 0.14609396949214168, "grad_norm": 1.1241649389266968, "learning_rate": 2.895226765386193e-05, "loss": 0.1585, "step": 6630 }, { "epoch": 0.14611600478165782, "grad_norm": 0.9417952299118042, "learning_rate": 2.8951874542654065e-05, "loss": 0.1127, "step": 6631 }, { "epoch": 0.14613804007117398, "grad_norm": 0.83933424949646, "learning_rate": 2.895148136038182e-05, "loss": 0.0961, "step": 6632 }, { "epoch": 0.14616007536069014, "grad_norm": 0.8685826659202576, "learning_rate": 2.8951088107047202e-05, "loss": 0.0903, "step": 6633 }, { "epoch": 0.1461821106502063, "grad_norm": 1.2057002782821655, "learning_rate": 2.8950694782652206e-05, "loss": 0.1451, "step": 6634 }, { "epoch": 0.14620414593972247, "grad_norm": 0.8953686952590942, "learning_rate": 2.8950301387198844e-05, "loss": 0.1133, "step": 6635 }, { "epoch": 0.14622618122923864, "grad_norm": 0.9117991924285889, "learning_rate": 2.8949907920689118e-05, "loss": 0.0625, "step": 6636 }, { "epoch": 0.14624821651875478, "grad_norm": 1.0467274188995361, "learning_rate": 2.8949514383125026e-05, "loss": 0.1123, "step": 6637 }, { "epoch": 0.14627025180827094, "grad_norm": 1.0657079219818115, "learning_rate": 2.894912077450858e-05, "loss": 0.1165, "step": 6638 }, { "epoch": 0.1462922870977871, "grad_norm": 0.6406477689743042, "learning_rate": 2.894872709484178e-05, "loss": 0.1172, "step": 6639 }, { "epoch": 0.14631432238730327, "grad_norm": 0.9024988412857056, "learning_rate": 2.8948333344126636e-05, "loss": 0.1064, "step": 6640 }, { "epoch": 0.14633635767681943, "grad_norm": 1.006885290145874, "learning_rate": 2.8947939522365145e-05, "loss": 0.1403, "step": 6641 }, { "epoch": 0.1463583929663356, "grad_norm": 0.8803435564041138, "learning_rate": 2.8947545629559325e-05, "loss": 0.149, "step": 6642 }, { "epoch": 0.14638042825585174, "grad_norm": 0.6517943143844604, "learning_rate": 2.8947151665711175e-05, "loss": 0.1322, "step": 6643 }, { "epoch": 0.1464024635453679, "grad_norm": 0.8287487626075745, "learning_rate": 2.8946757630822696e-05, "loss": 0.0895, "step": 6644 }, { "epoch": 0.14642449883488406, "grad_norm": 1.886164903640747, "learning_rate": 2.894636352489591e-05, "loss": 0.1175, "step": 6645 }, { "epoch": 0.14644653412440023, "grad_norm": 1.2029829025268555, "learning_rate": 2.894596934793281e-05, "loss": 0.1093, "step": 6646 }, { "epoch": 0.1464685694139164, "grad_norm": 1.0872074365615845, "learning_rate": 2.894557509993541e-05, "loss": 0.1459, "step": 6647 }, { "epoch": 0.14649060470343256, "grad_norm": 0.8528841733932495, "learning_rate": 2.894518078090572e-05, "loss": 0.1346, "step": 6648 }, { "epoch": 0.1465126399929487, "grad_norm": 1.0051227807998657, "learning_rate": 2.8944786390845745e-05, "loss": 0.1053, "step": 6649 }, { "epoch": 0.14653467528246486, "grad_norm": 0.9378997683525085, "learning_rate": 2.8944391929757494e-05, "loss": 0.1266, "step": 6650 }, { "epoch": 0.14655671057198102, "grad_norm": 1.1121768951416016, "learning_rate": 2.894399739764298e-05, "loss": 0.1627, "step": 6651 }, { "epoch": 0.1465787458614972, "grad_norm": 0.9806087017059326, "learning_rate": 2.894360279450421e-05, "loss": 0.0905, "step": 6652 }, { "epoch": 0.14660078115101335, "grad_norm": 1.742502212524414, "learning_rate": 2.894320812034319e-05, "loss": 0.1244, "step": 6653 }, { "epoch": 0.14662281644052952, "grad_norm": 1.0658376216888428, "learning_rate": 2.894281337516194e-05, "loss": 0.1265, "step": 6654 }, { "epoch": 0.14664485173004566, "grad_norm": 1.568787693977356, "learning_rate": 2.894241855896246e-05, "loss": 0.1718, "step": 6655 }, { "epoch": 0.14666688701956182, "grad_norm": 0.992755115032196, "learning_rate": 2.8942023671746768e-05, "loss": 0.1356, "step": 6656 }, { "epoch": 0.14668892230907798, "grad_norm": 1.622086524963379, "learning_rate": 2.8941628713516873e-05, "loss": 0.1369, "step": 6657 }, { "epoch": 0.14671095759859415, "grad_norm": 1.3175325393676758, "learning_rate": 2.8941233684274787e-05, "loss": 0.1236, "step": 6658 }, { "epoch": 0.14673299288811031, "grad_norm": 0.9196764230728149, "learning_rate": 2.894083858402252e-05, "loss": 0.1293, "step": 6659 }, { "epoch": 0.14675502817762648, "grad_norm": 1.5462181568145752, "learning_rate": 2.894044341276209e-05, "loss": 0.129, "step": 6660 }, { "epoch": 0.14677706346714262, "grad_norm": 1.1466137170791626, "learning_rate": 2.8940048170495503e-05, "loss": 0.1001, "step": 6661 }, { "epoch": 0.14679909875665878, "grad_norm": 1.0626513957977295, "learning_rate": 2.8939652857224775e-05, "loss": 0.1176, "step": 6662 }, { "epoch": 0.14682113404617494, "grad_norm": 0.9015787839889526, "learning_rate": 2.8939257472951924e-05, "loss": 0.1069, "step": 6663 }, { "epoch": 0.1468431693356911, "grad_norm": 1.0436714887619019, "learning_rate": 2.8938862017678954e-05, "loss": 0.1194, "step": 6664 }, { "epoch": 0.14686520462520727, "grad_norm": 1.180490493774414, "learning_rate": 2.893846649140789e-05, "loss": 0.1378, "step": 6665 }, { "epoch": 0.14688723991472344, "grad_norm": 1.48264479637146, "learning_rate": 2.8938070894140743e-05, "loss": 0.1562, "step": 6666 }, { "epoch": 0.1469092752042396, "grad_norm": 1.5092238187789917, "learning_rate": 2.893767522587952e-05, "loss": 0.1458, "step": 6667 }, { "epoch": 0.14693131049375574, "grad_norm": 1.3545253276824951, "learning_rate": 2.8937279486626246e-05, "loss": 0.093, "step": 6668 }, { "epoch": 0.1469533457832719, "grad_norm": 1.0007808208465576, "learning_rate": 2.8936883676382935e-05, "loss": 0.119, "step": 6669 }, { "epoch": 0.14697538107278807, "grad_norm": 1.1006627082824707, "learning_rate": 2.8936487795151604e-05, "loss": 0.0887, "step": 6670 }, { "epoch": 0.14699741636230423, "grad_norm": 0.7853038311004639, "learning_rate": 2.893609184293426e-05, "loss": 0.1122, "step": 6671 }, { "epoch": 0.1470194516518204, "grad_norm": 1.101028561592102, "learning_rate": 2.8935695819732938e-05, "loss": 0.1223, "step": 6672 }, { "epoch": 0.14704148694133656, "grad_norm": 0.8576909303665161, "learning_rate": 2.8935299725549637e-05, "loss": 0.1408, "step": 6673 }, { "epoch": 0.1470635222308527, "grad_norm": 1.1729434728622437, "learning_rate": 2.8934903560386384e-05, "loss": 0.1422, "step": 6674 }, { "epoch": 0.14708555752036886, "grad_norm": 1.3636225461959839, "learning_rate": 2.8934507324245193e-05, "loss": 0.1791, "step": 6675 }, { "epoch": 0.14710759280988503, "grad_norm": 1.216578722000122, "learning_rate": 2.8934111017128085e-05, "loss": 0.1719, "step": 6676 }, { "epoch": 0.1471296280994012, "grad_norm": 0.9973180294036865, "learning_rate": 2.893371463903708e-05, "loss": 0.0974, "step": 6677 }, { "epoch": 0.14715166338891736, "grad_norm": 0.9193342924118042, "learning_rate": 2.8933318189974188e-05, "loss": 0.0909, "step": 6678 }, { "epoch": 0.14717369867843352, "grad_norm": 1.0792373418807983, "learning_rate": 2.8932921669941437e-05, "loss": 0.1221, "step": 6679 }, { "epoch": 0.14719573396794966, "grad_norm": 1.2469141483306885, "learning_rate": 2.8932525078940848e-05, "loss": 0.1415, "step": 6680 }, { "epoch": 0.14721776925746582, "grad_norm": 1.351414442062378, "learning_rate": 2.8932128416974435e-05, "loss": 0.133, "step": 6681 }, { "epoch": 0.147239804546982, "grad_norm": 0.7444019913673401, "learning_rate": 2.8931731684044226e-05, "loss": 0.1239, "step": 6682 }, { "epoch": 0.14726183983649815, "grad_norm": 0.9172148704528809, "learning_rate": 2.893133488015223e-05, "loss": 0.1167, "step": 6683 }, { "epoch": 0.14728387512601432, "grad_norm": 0.9886499047279358, "learning_rate": 2.8930938005300476e-05, "loss": 0.1207, "step": 6684 }, { "epoch": 0.14730591041553048, "grad_norm": 0.830700695514679, "learning_rate": 2.8930541059490985e-05, "loss": 0.1549, "step": 6685 }, { "epoch": 0.14732794570504662, "grad_norm": 1.2484346628189087, "learning_rate": 2.8930144042725778e-05, "loss": 0.1682, "step": 6686 }, { "epoch": 0.14734998099456278, "grad_norm": 1.0373820066452026, "learning_rate": 2.892974695500688e-05, "loss": 0.1421, "step": 6687 }, { "epoch": 0.14737201628407895, "grad_norm": 0.7966519594192505, "learning_rate": 2.892934979633631e-05, "loss": 0.1164, "step": 6688 }, { "epoch": 0.14739405157359511, "grad_norm": 1.3067580461502075, "learning_rate": 2.8928952566716093e-05, "loss": 0.135, "step": 6689 }, { "epoch": 0.14741608686311128, "grad_norm": 0.9889795184135437, "learning_rate": 2.8928555266148248e-05, "loss": 0.1439, "step": 6690 }, { "epoch": 0.14743812215262744, "grad_norm": 0.9968950152397156, "learning_rate": 2.8928157894634807e-05, "loss": 0.1202, "step": 6691 }, { "epoch": 0.14746015744214358, "grad_norm": 0.837858259677887, "learning_rate": 2.892776045217778e-05, "loss": 0.0629, "step": 6692 }, { "epoch": 0.14748219273165974, "grad_norm": 0.9686676263809204, "learning_rate": 2.892736293877921e-05, "loss": 0.1058, "step": 6693 }, { "epoch": 0.1475042280211759, "grad_norm": 1.047308325767517, "learning_rate": 2.8926965354441106e-05, "loss": 0.1293, "step": 6694 }, { "epoch": 0.14752626331069207, "grad_norm": 1.1508533954620361, "learning_rate": 2.8926567699165503e-05, "loss": 0.1123, "step": 6695 }, { "epoch": 0.14754829860020824, "grad_norm": 0.9968867301940918, "learning_rate": 2.892616997295442e-05, "loss": 0.1966, "step": 6696 }, { "epoch": 0.1475703338897244, "grad_norm": 0.9031082987785339, "learning_rate": 2.8925772175809884e-05, "loss": 0.1056, "step": 6697 }, { "epoch": 0.14759236917924054, "grad_norm": 1.038695216178894, "learning_rate": 2.892537430773393e-05, "loss": 0.1263, "step": 6698 }, { "epoch": 0.1476144044687567, "grad_norm": 1.4585762023925781, "learning_rate": 2.892497636872857e-05, "loss": 0.0859, "step": 6699 }, { "epoch": 0.14763643975827287, "grad_norm": 1.0173072814941406, "learning_rate": 2.8924578358795842e-05, "loss": 0.1055, "step": 6700 }, { "epoch": 0.14765847504778903, "grad_norm": 1.3692787885665894, "learning_rate": 2.8924180277937767e-05, "loss": 0.1106, "step": 6701 }, { "epoch": 0.1476805103373052, "grad_norm": 1.0289669036865234, "learning_rate": 2.8923782126156376e-05, "loss": 0.0948, "step": 6702 }, { "epoch": 0.14770254562682136, "grad_norm": 1.1889142990112305, "learning_rate": 2.8923383903453695e-05, "loss": 0.0881, "step": 6703 }, { "epoch": 0.14772458091633753, "grad_norm": 1.6947472095489502, "learning_rate": 2.8922985609831755e-05, "loss": 0.1728, "step": 6704 }, { "epoch": 0.14774661620585366, "grad_norm": 0.8925052285194397, "learning_rate": 2.8922587245292587e-05, "loss": 0.1306, "step": 6705 }, { "epoch": 0.14776865149536983, "grad_norm": 0.8737056851387024, "learning_rate": 2.892218880983821e-05, "loss": 0.125, "step": 6706 }, { "epoch": 0.147790686784886, "grad_norm": 0.978556752204895, "learning_rate": 2.8921790303470662e-05, "loss": 0.1198, "step": 6707 }, { "epoch": 0.14781272207440216, "grad_norm": 0.7949512004852295, "learning_rate": 2.892139172619197e-05, "loss": 0.1521, "step": 6708 }, { "epoch": 0.14783475736391832, "grad_norm": 0.8493893146514893, "learning_rate": 2.8920993078004163e-05, "loss": 0.1347, "step": 6709 }, { "epoch": 0.1478567926534345, "grad_norm": 0.8392461538314819, "learning_rate": 2.8920594358909275e-05, "loss": 0.1821, "step": 6710 }, { "epoch": 0.14787882794295062, "grad_norm": 0.9443186521530151, "learning_rate": 2.892019556890934e-05, "loss": 0.1405, "step": 6711 }, { "epoch": 0.1479008632324668, "grad_norm": 0.8323206305503845, "learning_rate": 2.8919796708006376e-05, "loss": 0.1447, "step": 6712 }, { "epoch": 0.14792289852198295, "grad_norm": 1.0212535858154297, "learning_rate": 2.891939777620243e-05, "loss": 0.1448, "step": 6713 }, { "epoch": 0.14794493381149912, "grad_norm": 0.941673219203949, "learning_rate": 2.8918998773499527e-05, "loss": 0.1147, "step": 6714 }, { "epoch": 0.14796696910101528, "grad_norm": 0.9350637793540955, "learning_rate": 2.8918599699899694e-05, "loss": 0.1443, "step": 6715 }, { "epoch": 0.14798900439053145, "grad_norm": 0.7351022958755493, "learning_rate": 2.891820055540497e-05, "loss": 0.1203, "step": 6716 }, { "epoch": 0.14801103968004758, "grad_norm": 0.8839039206504822, "learning_rate": 2.8917801340017388e-05, "loss": 0.1185, "step": 6717 }, { "epoch": 0.14803307496956375, "grad_norm": 1.199225902557373, "learning_rate": 2.8917402053738983e-05, "loss": 0.1273, "step": 6718 }, { "epoch": 0.14805511025907991, "grad_norm": 0.8282224535942078, "learning_rate": 2.8917002696571778e-05, "loss": 0.1337, "step": 6719 }, { "epoch": 0.14807714554859608, "grad_norm": 0.8432029485702515, "learning_rate": 2.8916603268517825e-05, "loss": 0.1126, "step": 6720 }, { "epoch": 0.14809918083811224, "grad_norm": 1.070697546005249, "learning_rate": 2.8916203769579144e-05, "loss": 0.1275, "step": 6721 }, { "epoch": 0.1481212161276284, "grad_norm": 1.1513978242874146, "learning_rate": 2.891580419975778e-05, "loss": 0.1276, "step": 6722 }, { "epoch": 0.14814325141714454, "grad_norm": 0.7421090602874756, "learning_rate": 2.8915404559055757e-05, "loss": 0.1329, "step": 6723 }, { "epoch": 0.1481652867066607, "grad_norm": 1.5645827054977417, "learning_rate": 2.8915004847475116e-05, "loss": 0.1366, "step": 6724 }, { "epoch": 0.14818732199617687, "grad_norm": 0.8468350172042847, "learning_rate": 2.8914605065017896e-05, "loss": 0.1533, "step": 6725 }, { "epoch": 0.14820935728569304, "grad_norm": 0.5803958773612976, "learning_rate": 2.8914205211686133e-05, "loss": 0.1412, "step": 6726 }, { "epoch": 0.1482313925752092, "grad_norm": 0.5515346527099609, "learning_rate": 2.891380528748186e-05, "loss": 0.088, "step": 6727 }, { "epoch": 0.14825342786472537, "grad_norm": 1.4336079359054565, "learning_rate": 2.8913405292407114e-05, "loss": 0.1322, "step": 6728 }, { "epoch": 0.1482754631542415, "grad_norm": 1.3477801084518433, "learning_rate": 2.891300522646393e-05, "loss": 0.1269, "step": 6729 }, { "epoch": 0.14829749844375767, "grad_norm": 0.993994414806366, "learning_rate": 2.8912605089654357e-05, "loss": 0.1525, "step": 6730 }, { "epoch": 0.14831953373327383, "grad_norm": 1.1693365573883057, "learning_rate": 2.891220488198042e-05, "loss": 0.113, "step": 6731 }, { "epoch": 0.14834156902279, "grad_norm": 0.9766979813575745, "learning_rate": 2.891180460344417e-05, "loss": 0.1048, "step": 6732 }, { "epoch": 0.14836360431230616, "grad_norm": 0.8651506900787354, "learning_rate": 2.8911404254047634e-05, "loss": 0.1476, "step": 6733 }, { "epoch": 0.14838563960182233, "grad_norm": 1.278844952583313, "learning_rate": 2.8911003833792856e-05, "loss": 0.165, "step": 6734 }, { "epoch": 0.1484076748913385, "grad_norm": 1.747117042541504, "learning_rate": 2.8910603342681875e-05, "loss": 0.0956, "step": 6735 }, { "epoch": 0.14842971018085463, "grad_norm": 1.282879114151001, "learning_rate": 2.8910202780716737e-05, "loss": 0.149, "step": 6736 }, { "epoch": 0.1484517454703708, "grad_norm": 1.3474563360214233, "learning_rate": 2.8909802147899475e-05, "loss": 0.1525, "step": 6737 }, { "epoch": 0.14847378075988696, "grad_norm": 0.7665067315101624, "learning_rate": 2.8909401444232132e-05, "loss": 0.1201, "step": 6738 }, { "epoch": 0.14849581604940312, "grad_norm": 1.4472774267196655, "learning_rate": 2.8909000669716744e-05, "loss": 0.1236, "step": 6739 }, { "epoch": 0.1485178513389193, "grad_norm": 0.7180964946746826, "learning_rate": 2.8908599824355358e-05, "loss": 0.1049, "step": 6740 }, { "epoch": 0.14853988662843545, "grad_norm": 1.1781589984893799, "learning_rate": 2.890819890815002e-05, "loss": 0.1095, "step": 6741 }, { "epoch": 0.1485619219179516, "grad_norm": 1.1855205297470093, "learning_rate": 2.8907797921102766e-05, "loss": 0.1535, "step": 6742 }, { "epoch": 0.14858395720746775, "grad_norm": 1.4279570579528809, "learning_rate": 2.8907396863215637e-05, "loss": 0.1715, "step": 6743 }, { "epoch": 0.14860599249698392, "grad_norm": 1.0689504146575928, "learning_rate": 2.8906995734490674e-05, "loss": 0.1747, "step": 6744 }, { "epoch": 0.14862802778650008, "grad_norm": 1.248265027999878, "learning_rate": 2.8906594534929928e-05, "loss": 0.1598, "step": 6745 }, { "epoch": 0.14865006307601625, "grad_norm": 0.9358317852020264, "learning_rate": 2.8906193264535442e-05, "loss": 0.0819, "step": 6746 }, { "epoch": 0.1486720983655324, "grad_norm": 0.7941707372665405, "learning_rate": 2.890579192330925e-05, "loss": 0.1104, "step": 6747 }, { "epoch": 0.14869413365504855, "grad_norm": 1.0217124223709106, "learning_rate": 2.890539051125341e-05, "loss": 0.1021, "step": 6748 }, { "epoch": 0.14871616894456471, "grad_norm": 1.0534690618515015, "learning_rate": 2.8904989028369958e-05, "loss": 0.1546, "step": 6749 }, { "epoch": 0.14873820423408088, "grad_norm": 1.1818991899490356, "learning_rate": 2.890458747466093e-05, "loss": 0.1448, "step": 6750 }, { "epoch": 0.14876023952359704, "grad_norm": 1.0363730192184448, "learning_rate": 2.8904185850128394e-05, "loss": 0.1023, "step": 6751 }, { "epoch": 0.1487822748131132, "grad_norm": 0.7552884221076965, "learning_rate": 2.8903784154774382e-05, "loss": 0.0994, "step": 6752 }, { "epoch": 0.14880431010262937, "grad_norm": 1.2644537687301636, "learning_rate": 2.8903382388600938e-05, "loss": 0.103, "step": 6753 }, { "epoch": 0.1488263453921455, "grad_norm": 0.8626387119293213, "learning_rate": 2.890298055161011e-05, "loss": 0.1077, "step": 6754 }, { "epoch": 0.14884838068166167, "grad_norm": 0.8655340075492859, "learning_rate": 2.8902578643803953e-05, "loss": 0.1068, "step": 6755 }, { "epoch": 0.14887041597117784, "grad_norm": 0.8260107636451721, "learning_rate": 2.8902176665184505e-05, "loss": 0.1307, "step": 6756 }, { "epoch": 0.148892451260694, "grad_norm": 1.2746344804763794, "learning_rate": 2.8901774615753817e-05, "loss": 0.1171, "step": 6757 }, { "epoch": 0.14891448655021017, "grad_norm": 1.083387017250061, "learning_rate": 2.8901372495513933e-05, "loss": 0.1337, "step": 6758 }, { "epoch": 0.14893652183972633, "grad_norm": 1.234425663948059, "learning_rate": 2.8900970304466906e-05, "loss": 0.1078, "step": 6759 }, { "epoch": 0.14895855712924247, "grad_norm": 0.6115223169326782, "learning_rate": 2.890056804261478e-05, "loss": 0.1002, "step": 6760 }, { "epoch": 0.14898059241875863, "grad_norm": 0.5517820119857788, "learning_rate": 2.8900165709959614e-05, "loss": 0.094, "step": 6761 }, { "epoch": 0.1490026277082748, "grad_norm": 1.0783714056015015, "learning_rate": 2.8899763306503445e-05, "loss": 0.1085, "step": 6762 }, { "epoch": 0.14902466299779096, "grad_norm": 0.9512399435043335, "learning_rate": 2.889936083224833e-05, "loss": 0.0934, "step": 6763 }, { "epoch": 0.14904669828730713, "grad_norm": 0.895061194896698, "learning_rate": 2.8898958287196317e-05, "loss": 0.1415, "step": 6764 }, { "epoch": 0.1490687335768233, "grad_norm": 0.9671791791915894, "learning_rate": 2.8898555671349452e-05, "loss": 0.1208, "step": 6765 }, { "epoch": 0.14909076886633943, "grad_norm": 1.0540153980255127, "learning_rate": 2.8898152984709797e-05, "loss": 0.1287, "step": 6766 }, { "epoch": 0.1491128041558556, "grad_norm": 1.0318214893341064, "learning_rate": 2.889775022727939e-05, "loss": 0.1375, "step": 6767 }, { "epoch": 0.14913483944537176, "grad_norm": 0.7755042314529419, "learning_rate": 2.8897347399060292e-05, "loss": 0.1236, "step": 6768 }, { "epoch": 0.14915687473488792, "grad_norm": 1.1383607387542725, "learning_rate": 2.889694450005455e-05, "loss": 0.1556, "step": 6769 }, { "epoch": 0.1491789100244041, "grad_norm": 0.8832283020019531, "learning_rate": 2.8896541530264217e-05, "loss": 0.1054, "step": 6770 }, { "epoch": 0.14920094531392025, "grad_norm": 0.7647735476493835, "learning_rate": 2.8896138489691343e-05, "loss": 0.1066, "step": 6771 }, { "epoch": 0.14922298060343642, "grad_norm": 1.088568925857544, "learning_rate": 2.889573537833799e-05, "loss": 0.1537, "step": 6772 }, { "epoch": 0.14924501589295255, "grad_norm": 0.7996931672096252, "learning_rate": 2.8895332196206206e-05, "loss": 0.0964, "step": 6773 }, { "epoch": 0.14926705118246872, "grad_norm": 1.2354761362075806, "learning_rate": 2.889492894329804e-05, "loss": 0.0717, "step": 6774 }, { "epoch": 0.14928908647198488, "grad_norm": 0.9609964489936829, "learning_rate": 2.889452561961555e-05, "loss": 0.1015, "step": 6775 }, { "epoch": 0.14931112176150105, "grad_norm": 1.3226674795150757, "learning_rate": 2.8894122225160787e-05, "loss": 0.1103, "step": 6776 }, { "epoch": 0.1493331570510172, "grad_norm": 0.72869473695755, "learning_rate": 2.8893718759935816e-05, "loss": 0.0899, "step": 6777 }, { "epoch": 0.14935519234053338, "grad_norm": 0.9807127118110657, "learning_rate": 2.889331522394268e-05, "loss": 0.128, "step": 6778 }, { "epoch": 0.1493772276300495, "grad_norm": 1.0222444534301758, "learning_rate": 2.8892911617183445e-05, "loss": 0.1258, "step": 6779 }, { "epoch": 0.14939926291956568, "grad_norm": 1.9377002716064453, "learning_rate": 2.8892507939660155e-05, "loss": 0.1387, "step": 6780 }, { "epoch": 0.14942129820908184, "grad_norm": 0.8670704364776611, "learning_rate": 2.8892104191374875e-05, "loss": 0.1435, "step": 6781 }, { "epoch": 0.149443333498598, "grad_norm": 0.8287898302078247, "learning_rate": 2.8891700372329656e-05, "loss": 0.1169, "step": 6782 }, { "epoch": 0.14946536878811417, "grad_norm": 0.818696141242981, "learning_rate": 2.889129648252656e-05, "loss": 0.1269, "step": 6783 }, { "epoch": 0.14948740407763034, "grad_norm": 1.0328619480133057, "learning_rate": 2.889089252196764e-05, "loss": 0.074, "step": 6784 }, { "epoch": 0.14950943936714647, "grad_norm": 1.4740557670593262, "learning_rate": 2.889048849065496e-05, "loss": 0.1131, "step": 6785 }, { "epoch": 0.14953147465666264, "grad_norm": 0.8702030777931213, "learning_rate": 2.889008438859057e-05, "loss": 0.1103, "step": 6786 }, { "epoch": 0.1495535099461788, "grad_norm": 1.3406747579574585, "learning_rate": 2.8889680215776535e-05, "loss": 0.1253, "step": 6787 }, { "epoch": 0.14957554523569497, "grad_norm": 0.912483274936676, "learning_rate": 2.8889275972214906e-05, "loss": 0.1481, "step": 6788 }, { "epoch": 0.14959758052521113, "grad_norm": 0.9852513670921326, "learning_rate": 2.888887165790775e-05, "loss": 0.1464, "step": 6789 }, { "epoch": 0.1496196158147273, "grad_norm": 0.8935797214508057, "learning_rate": 2.8888467272857118e-05, "loss": 0.1413, "step": 6790 }, { "epoch": 0.14964165110424343, "grad_norm": 0.8605386018753052, "learning_rate": 2.888806281706508e-05, "loss": 0.109, "step": 6791 }, { "epoch": 0.1496636863937596, "grad_norm": 1.5278301239013672, "learning_rate": 2.8887658290533686e-05, "loss": 0.1411, "step": 6792 }, { "epoch": 0.14968572168327576, "grad_norm": 0.9237605333328247, "learning_rate": 2.8887253693265007e-05, "loss": 0.0906, "step": 6793 }, { "epoch": 0.14970775697279193, "grad_norm": 0.9308066368103027, "learning_rate": 2.8886849025261094e-05, "loss": 0.1252, "step": 6794 }, { "epoch": 0.1497297922623081, "grad_norm": 1.1239335536956787, "learning_rate": 2.8886444286524014e-05, "loss": 0.1477, "step": 6795 }, { "epoch": 0.14975182755182426, "grad_norm": 1.2983754873275757, "learning_rate": 2.8886039477055825e-05, "loss": 0.1094, "step": 6796 }, { "epoch": 0.1497738628413404, "grad_norm": 1.1276346445083618, "learning_rate": 2.888563459685859e-05, "loss": 0.1124, "step": 6797 }, { "epoch": 0.14979589813085656, "grad_norm": 0.8829378485679626, "learning_rate": 2.888522964593437e-05, "loss": 0.1096, "step": 6798 }, { "epoch": 0.14981793342037272, "grad_norm": 1.512756109237671, "learning_rate": 2.8884824624285238e-05, "loss": 0.1222, "step": 6799 }, { "epoch": 0.1498399687098889, "grad_norm": 0.6195704340934753, "learning_rate": 2.8884419531913244e-05, "loss": 0.1005, "step": 6800 }, { "epoch": 0.14986200399940505, "grad_norm": 2.4523634910583496, "learning_rate": 2.8884014368820454e-05, "loss": 0.1297, "step": 6801 }, { "epoch": 0.14988403928892122, "grad_norm": 1.1937929391860962, "learning_rate": 2.8883609135008934e-05, "loss": 0.1208, "step": 6802 }, { "epoch": 0.14990607457843735, "grad_norm": 1.342334270477295, "learning_rate": 2.888320383048075e-05, "loss": 0.133, "step": 6803 }, { "epoch": 0.14992810986795352, "grad_norm": 0.9231439828872681, "learning_rate": 2.8882798455237964e-05, "loss": 0.0739, "step": 6804 }, { "epoch": 0.14995014515746968, "grad_norm": 1.6087597608566284, "learning_rate": 2.8882393009282638e-05, "loss": 0.1363, "step": 6805 }, { "epoch": 0.14997218044698585, "grad_norm": 1.29855477809906, "learning_rate": 2.8881987492616842e-05, "loss": 0.1148, "step": 6806 }, { "epoch": 0.149994215736502, "grad_norm": 1.4906126260757446, "learning_rate": 2.888158190524264e-05, "loss": 0.1212, "step": 6807 }, { "epoch": 0.15001625102601818, "grad_norm": 1.0868706703186035, "learning_rate": 2.8881176247162095e-05, "loss": 0.1099, "step": 6808 }, { "epoch": 0.15003828631553434, "grad_norm": 1.0367915630340576, "learning_rate": 2.888077051837728e-05, "loss": 0.1248, "step": 6809 }, { "epoch": 0.15006032160505048, "grad_norm": 1.0879297256469727, "learning_rate": 2.8880364718890253e-05, "loss": 0.1165, "step": 6810 }, { "epoch": 0.15008235689456664, "grad_norm": 0.8878112435340881, "learning_rate": 2.8879958848703086e-05, "loss": 0.1152, "step": 6811 }, { "epoch": 0.1501043921840828, "grad_norm": 1.4890249967575073, "learning_rate": 2.8879552907817847e-05, "loss": 0.151, "step": 6812 }, { "epoch": 0.15012642747359897, "grad_norm": 1.3153016567230225, "learning_rate": 2.8879146896236602e-05, "loss": 0.1253, "step": 6813 }, { "epoch": 0.15014846276311514, "grad_norm": 0.8617528676986694, "learning_rate": 2.8878740813961418e-05, "loss": 0.103, "step": 6814 }, { "epoch": 0.1501704980526313, "grad_norm": 1.714316725730896, "learning_rate": 2.887833466099437e-05, "loss": 0.1431, "step": 6815 }, { "epoch": 0.15019253334214744, "grad_norm": 1.721472144126892, "learning_rate": 2.8877928437337514e-05, "loss": 0.1569, "step": 6816 }, { "epoch": 0.1502145686316636, "grad_norm": 0.7765160202980042, "learning_rate": 2.887752214299293e-05, "loss": 0.1064, "step": 6817 }, { "epoch": 0.15023660392117977, "grad_norm": 1.5392884016036987, "learning_rate": 2.8877115777962682e-05, "loss": 0.1294, "step": 6818 }, { "epoch": 0.15025863921069593, "grad_norm": 4.424996376037598, "learning_rate": 2.8876709342248845e-05, "loss": 0.1152, "step": 6819 }, { "epoch": 0.1502806745002121, "grad_norm": 1.5563429594039917, "learning_rate": 2.8876302835853487e-05, "loss": 0.1127, "step": 6820 }, { "epoch": 0.15030270978972826, "grad_norm": 1.154129981994629, "learning_rate": 2.887589625877867e-05, "loss": 0.112, "step": 6821 }, { "epoch": 0.1503247450792444, "grad_norm": 0.8681195974349976, "learning_rate": 2.887548961102648e-05, "loss": 0.1524, "step": 6822 }, { "epoch": 0.15034678036876056, "grad_norm": 1.525896430015564, "learning_rate": 2.887508289259898e-05, "loss": 0.1368, "step": 6823 }, { "epoch": 0.15036881565827673, "grad_norm": 1.4980412721633911, "learning_rate": 2.887467610349824e-05, "loss": 0.1587, "step": 6824 }, { "epoch": 0.1503908509477929, "grad_norm": 1.0047776699066162, "learning_rate": 2.8874269243726335e-05, "loss": 0.1232, "step": 6825 }, { "epoch": 0.15041288623730906, "grad_norm": 1.234574794769287, "learning_rate": 2.8873862313285335e-05, "loss": 0.1667, "step": 6826 }, { "epoch": 0.15043492152682522, "grad_norm": 1.064023494720459, "learning_rate": 2.887345531217732e-05, "loss": 0.1341, "step": 6827 }, { "epoch": 0.15045695681634136, "grad_norm": 1.0915967226028442, "learning_rate": 2.8873048240404353e-05, "loss": 0.1305, "step": 6828 }, { "epoch": 0.15047899210585752, "grad_norm": 1.0523008108139038, "learning_rate": 2.8872641097968513e-05, "loss": 0.1459, "step": 6829 }, { "epoch": 0.1505010273953737, "grad_norm": 0.9973957538604736, "learning_rate": 2.8872233884871876e-05, "loss": 0.1327, "step": 6830 }, { "epoch": 0.15052306268488985, "grad_norm": 1.2393808364868164, "learning_rate": 2.8871826601116506e-05, "loss": 0.1201, "step": 6831 }, { "epoch": 0.15054509797440602, "grad_norm": 1.1502299308776855, "learning_rate": 2.887141924670449e-05, "loss": 0.125, "step": 6832 }, { "epoch": 0.15056713326392218, "grad_norm": 0.8199841976165771, "learning_rate": 2.8871011821637895e-05, "loss": 0.103, "step": 6833 }, { "epoch": 0.15058916855343832, "grad_norm": 0.9508376717567444, "learning_rate": 2.88706043259188e-05, "loss": 0.0985, "step": 6834 }, { "epoch": 0.15061120384295448, "grad_norm": 1.1966513395309448, "learning_rate": 2.887019675954928e-05, "loss": 0.148, "step": 6835 }, { "epoch": 0.15063323913247065, "grad_norm": 1.0365934371948242, "learning_rate": 2.8869789122531406e-05, "loss": 0.0982, "step": 6836 }, { "epoch": 0.1506552744219868, "grad_norm": 0.9071347117424011, "learning_rate": 2.886938141486726e-05, "loss": 0.1102, "step": 6837 }, { "epoch": 0.15067730971150298, "grad_norm": 1.3863846063613892, "learning_rate": 2.8868973636558918e-05, "loss": 0.0849, "step": 6838 }, { "epoch": 0.15069934500101914, "grad_norm": 1.3511823415756226, "learning_rate": 2.8868565787608457e-05, "loss": 0.1237, "step": 6839 }, { "epoch": 0.15072138029053528, "grad_norm": 0.8689388036727905, "learning_rate": 2.8868157868017953e-05, "loss": 0.0708, "step": 6840 }, { "epoch": 0.15074341558005144, "grad_norm": 1.0158387422561646, "learning_rate": 2.886774987778948e-05, "loss": 0.1072, "step": 6841 }, { "epoch": 0.1507654508695676, "grad_norm": 0.8225204348564148, "learning_rate": 2.886734181692513e-05, "loss": 0.1353, "step": 6842 }, { "epoch": 0.15078748615908377, "grad_norm": 0.8890590071678162, "learning_rate": 2.8866933685426965e-05, "loss": 0.1388, "step": 6843 }, { "epoch": 0.15080952144859994, "grad_norm": 1.0466954708099365, "learning_rate": 2.886652548329707e-05, "loss": 0.0884, "step": 6844 }, { "epoch": 0.1508315567381161, "grad_norm": 1.3971421718597412, "learning_rate": 2.8866117210537528e-05, "loss": 0.1803, "step": 6845 }, { "epoch": 0.15085359202763227, "grad_norm": 0.8636575937271118, "learning_rate": 2.8865708867150416e-05, "loss": 0.1609, "step": 6846 }, { "epoch": 0.1508756273171484, "grad_norm": 0.8592793941497803, "learning_rate": 2.8865300453137808e-05, "loss": 0.1618, "step": 6847 }, { "epoch": 0.15089766260666457, "grad_norm": 1.057731032371521, "learning_rate": 2.8864891968501796e-05, "loss": 0.1349, "step": 6848 }, { "epoch": 0.15091969789618073, "grad_norm": 0.7714976668357849, "learning_rate": 2.886448341324445e-05, "loss": 0.1238, "step": 6849 }, { "epoch": 0.1509417331856969, "grad_norm": 0.566016435623169, "learning_rate": 2.8864074787367858e-05, "loss": 0.1227, "step": 6850 }, { "epoch": 0.15096376847521306, "grad_norm": 1.0239813327789307, "learning_rate": 2.8863666090874096e-05, "loss": 0.1027, "step": 6851 }, { "epoch": 0.15098580376472923, "grad_norm": 1.4580087661743164, "learning_rate": 2.886325732376525e-05, "loss": 0.104, "step": 6852 }, { "epoch": 0.15100783905424536, "grad_norm": 1.0153999328613281, "learning_rate": 2.8862848486043398e-05, "loss": 0.1377, "step": 6853 }, { "epoch": 0.15102987434376153, "grad_norm": 0.748634934425354, "learning_rate": 2.8862439577710633e-05, "loss": 0.1549, "step": 6854 }, { "epoch": 0.1510519096332777, "grad_norm": 0.7985662817955017, "learning_rate": 2.8862030598769018e-05, "loss": 0.1218, "step": 6855 }, { "epoch": 0.15107394492279386, "grad_norm": 0.8183596134185791, "learning_rate": 2.8861621549220655e-05, "loss": 0.1249, "step": 6856 }, { "epoch": 0.15109598021231002, "grad_norm": 1.3527402877807617, "learning_rate": 2.886121242906762e-05, "loss": 0.1199, "step": 6857 }, { "epoch": 0.1511180155018262, "grad_norm": 1.2455202341079712, "learning_rate": 2.8860803238311993e-05, "loss": 0.1241, "step": 6858 }, { "epoch": 0.15114005079134232, "grad_norm": 0.7351599335670471, "learning_rate": 2.8860393976955865e-05, "loss": 0.1415, "step": 6859 }, { "epoch": 0.1511620860808585, "grad_norm": 0.8704385757446289, "learning_rate": 2.885998464500132e-05, "loss": 0.1317, "step": 6860 }, { "epoch": 0.15118412137037465, "grad_norm": 1.7626938819885254, "learning_rate": 2.885957524245044e-05, "loss": 0.206, "step": 6861 }, { "epoch": 0.15120615665989082, "grad_norm": 1.346386432647705, "learning_rate": 2.885916576930531e-05, "loss": 0.1563, "step": 6862 }, { "epoch": 0.15122819194940698, "grad_norm": 1.2841261625289917, "learning_rate": 2.8858756225568013e-05, "loss": 0.1785, "step": 6863 }, { "epoch": 0.15125022723892315, "grad_norm": 0.7288479208946228, "learning_rate": 2.8858346611240642e-05, "loss": 0.1031, "step": 6864 }, { "epoch": 0.15127226252843928, "grad_norm": 0.9394428133964539, "learning_rate": 2.8857936926325282e-05, "loss": 0.0926, "step": 6865 }, { "epoch": 0.15129429781795545, "grad_norm": 0.893475353717804, "learning_rate": 2.8857527170824018e-05, "loss": 0.1019, "step": 6866 }, { "epoch": 0.1513163331074716, "grad_norm": 1.3542944192886353, "learning_rate": 2.8857117344738935e-05, "loss": 0.1236, "step": 6867 }, { "epoch": 0.15133836839698778, "grad_norm": 1.0689342021942139, "learning_rate": 2.8856707448072125e-05, "loss": 0.0987, "step": 6868 }, { "epoch": 0.15136040368650394, "grad_norm": 0.6863809823989868, "learning_rate": 2.8856297480825675e-05, "loss": 0.1005, "step": 6869 }, { "epoch": 0.1513824389760201, "grad_norm": 1.1848442554473877, "learning_rate": 2.8855887443001664e-05, "loss": 0.1413, "step": 6870 }, { "epoch": 0.15140447426553624, "grad_norm": 1.1530325412750244, "learning_rate": 2.8855477334602194e-05, "loss": 0.1514, "step": 6871 }, { "epoch": 0.1514265095550524, "grad_norm": 1.0554299354553223, "learning_rate": 2.885506715562935e-05, "loss": 0.1282, "step": 6872 }, { "epoch": 0.15144854484456857, "grad_norm": 1.2364758253097534, "learning_rate": 2.8854656906085214e-05, "loss": 0.1203, "step": 6873 }, { "epoch": 0.15147058013408474, "grad_norm": 1.1269532442092896, "learning_rate": 2.8854246585971882e-05, "loss": 0.1077, "step": 6874 }, { "epoch": 0.1514926154236009, "grad_norm": 1.0994391441345215, "learning_rate": 2.8853836195291442e-05, "loss": 0.1242, "step": 6875 }, { "epoch": 0.15151465071311707, "grad_norm": 1.108291745185852, "learning_rate": 2.885342573404599e-05, "loss": 0.1407, "step": 6876 }, { "epoch": 0.15153668600263323, "grad_norm": 1.0812280178070068, "learning_rate": 2.8853015202237608e-05, "loss": 0.1436, "step": 6877 }, { "epoch": 0.15155872129214937, "grad_norm": 0.9325980544090271, "learning_rate": 2.8852604599868394e-05, "loss": 0.1168, "step": 6878 }, { "epoch": 0.15158075658166553, "grad_norm": 0.7758538722991943, "learning_rate": 2.8852193926940432e-05, "loss": 0.1204, "step": 6879 }, { "epoch": 0.1516027918711817, "grad_norm": 1.154237151145935, "learning_rate": 2.885178318345582e-05, "loss": 0.1338, "step": 6880 }, { "epoch": 0.15162482716069786, "grad_norm": 1.6735666990280151, "learning_rate": 2.8851372369416655e-05, "loss": 0.1203, "step": 6881 }, { "epoch": 0.15164686245021403, "grad_norm": 1.1279014348983765, "learning_rate": 2.8850961484825015e-05, "loss": 0.1231, "step": 6882 }, { "epoch": 0.1516688977397302, "grad_norm": 1.1953808069229126, "learning_rate": 2.8850550529683e-05, "loss": 0.1302, "step": 6883 }, { "epoch": 0.15169093302924633, "grad_norm": 0.6880269646644592, "learning_rate": 2.8850139503992708e-05, "loss": 0.1078, "step": 6884 }, { "epoch": 0.1517129683187625, "grad_norm": 1.1749851703643799, "learning_rate": 2.8849728407756225e-05, "loss": 0.129, "step": 6885 }, { "epoch": 0.15173500360827866, "grad_norm": 0.9728385806083679, "learning_rate": 2.8849317240975654e-05, "loss": 0.0775, "step": 6886 }, { "epoch": 0.15175703889779482, "grad_norm": 1.1717435121536255, "learning_rate": 2.884890600365308e-05, "loss": 0.1057, "step": 6887 }, { "epoch": 0.15177907418731099, "grad_norm": 2.3457136154174805, "learning_rate": 2.88484946957906e-05, "loss": 0.1845, "step": 6888 }, { "epoch": 0.15180110947682715, "grad_norm": 0.6401700973510742, "learning_rate": 2.8848083317390316e-05, "loss": 0.1036, "step": 6889 }, { "epoch": 0.1518231447663433, "grad_norm": 1.1309272050857544, "learning_rate": 2.884767186845431e-05, "loss": 0.1165, "step": 6890 }, { "epoch": 0.15184518005585945, "grad_norm": 1.070942759513855, "learning_rate": 2.884726034898469e-05, "loss": 0.1255, "step": 6891 }, { "epoch": 0.15186721534537562, "grad_norm": 0.9730523824691772, "learning_rate": 2.8846848758983545e-05, "loss": 0.1109, "step": 6892 }, { "epoch": 0.15188925063489178, "grad_norm": 1.8847912549972534, "learning_rate": 2.8846437098452977e-05, "loss": 0.1457, "step": 6893 }, { "epoch": 0.15191128592440795, "grad_norm": 1.2706230878829956, "learning_rate": 2.884602536739508e-05, "loss": 0.1146, "step": 6894 }, { "epoch": 0.1519333212139241, "grad_norm": 1.3581199645996094, "learning_rate": 2.8845613565811947e-05, "loss": 0.117, "step": 6895 }, { "epoch": 0.15195535650344025, "grad_norm": 1.2929558753967285, "learning_rate": 2.8845201693705685e-05, "loss": 0.1269, "step": 6896 }, { "epoch": 0.1519773917929564, "grad_norm": 0.9492595195770264, "learning_rate": 2.884478975107838e-05, "loss": 0.1478, "step": 6897 }, { "epoch": 0.15199942708247258, "grad_norm": 1.2771955728530884, "learning_rate": 2.884437773793214e-05, "loss": 0.139, "step": 6898 }, { "epoch": 0.15202146237198874, "grad_norm": 1.061789631843567, "learning_rate": 2.8843965654269063e-05, "loss": 0.1308, "step": 6899 }, { "epoch": 0.1520434976615049, "grad_norm": 1.3059182167053223, "learning_rate": 2.884355350009124e-05, "loss": 0.1392, "step": 6900 }, { "epoch": 0.15206553295102107, "grad_norm": 0.7539805769920349, "learning_rate": 2.884314127540078e-05, "loss": 0.118, "step": 6901 }, { "epoch": 0.1520875682405372, "grad_norm": 1.1132010221481323, "learning_rate": 2.884272898019977e-05, "loss": 0.1482, "step": 6902 }, { "epoch": 0.15210960353005337, "grad_norm": 0.886759877204895, "learning_rate": 2.8842316614490328e-05, "loss": 0.1437, "step": 6903 }, { "epoch": 0.15213163881956954, "grad_norm": 0.9581465721130371, "learning_rate": 2.884190417827454e-05, "loss": 0.1248, "step": 6904 }, { "epoch": 0.1521536741090857, "grad_norm": 1.357243299484253, "learning_rate": 2.8841491671554514e-05, "loss": 0.1892, "step": 6905 }, { "epoch": 0.15217570939860187, "grad_norm": 1.3701125383377075, "learning_rate": 2.8841079094332345e-05, "loss": 0.1625, "step": 6906 }, { "epoch": 0.15219774468811803, "grad_norm": 0.925881564617157, "learning_rate": 2.884066644661014e-05, "loss": 0.1123, "step": 6907 }, { "epoch": 0.15221977997763417, "grad_norm": 0.9540148377418518, "learning_rate": 2.884025372839e-05, "loss": 0.1228, "step": 6908 }, { "epoch": 0.15224181526715033, "grad_norm": 0.8999350666999817, "learning_rate": 2.8839840939674026e-05, "loss": 0.107, "step": 6909 }, { "epoch": 0.1522638505566665, "grad_norm": 1.051256775856018, "learning_rate": 2.883942808046432e-05, "loss": 0.0879, "step": 6910 }, { "epoch": 0.15228588584618266, "grad_norm": 0.7109893560409546, "learning_rate": 2.883901515076298e-05, "loss": 0.1155, "step": 6911 }, { "epoch": 0.15230792113569883, "grad_norm": 1.0822798013687134, "learning_rate": 2.883860215057212e-05, "loss": 0.1305, "step": 6912 }, { "epoch": 0.152329956425215, "grad_norm": 0.700615644454956, "learning_rate": 2.883818907989384e-05, "loss": 0.1085, "step": 6913 }, { "epoch": 0.15235199171473116, "grad_norm": 0.5607417225837708, "learning_rate": 2.8837775938730242e-05, "loss": 0.1213, "step": 6914 }, { "epoch": 0.1523740270042473, "grad_norm": 1.5555813312530518, "learning_rate": 2.8837362727083428e-05, "loss": 0.1647, "step": 6915 }, { "epoch": 0.15239606229376346, "grad_norm": 1.3066414594650269, "learning_rate": 2.8836949444955503e-05, "loss": 0.1092, "step": 6916 }, { "epoch": 0.15241809758327962, "grad_norm": 1.005247950553894, "learning_rate": 2.8836536092348583e-05, "loss": 0.0901, "step": 6917 }, { "epoch": 0.15244013287279579, "grad_norm": 1.2651042938232422, "learning_rate": 2.883612266926476e-05, "loss": 0.1564, "step": 6918 }, { "epoch": 0.15246216816231195, "grad_norm": 1.447564959526062, "learning_rate": 2.8835709175706143e-05, "loss": 0.1215, "step": 6919 }, { "epoch": 0.15248420345182812, "grad_norm": 1.481532335281372, "learning_rate": 2.8835295611674842e-05, "loss": 0.1449, "step": 6920 }, { "epoch": 0.15250623874134425, "grad_norm": 1.7879937887191772, "learning_rate": 2.8834881977172965e-05, "loss": 0.1675, "step": 6921 }, { "epoch": 0.15252827403086042, "grad_norm": 0.6285027265548706, "learning_rate": 2.883446827220261e-05, "loss": 0.0826, "step": 6922 }, { "epoch": 0.15255030932037658, "grad_norm": 1.2725932598114014, "learning_rate": 2.883405449676589e-05, "loss": 0.1162, "step": 6923 }, { "epoch": 0.15257234460989275, "grad_norm": 0.8570614457130432, "learning_rate": 2.883364065086491e-05, "loss": 0.1245, "step": 6924 }, { "epoch": 0.1525943798994089, "grad_norm": 1.2074267864227295, "learning_rate": 2.8833226734501782e-05, "loss": 0.157, "step": 6925 }, { "epoch": 0.15261641518892508, "grad_norm": 0.9381579756736755, "learning_rate": 2.8832812747678614e-05, "loss": 0.1258, "step": 6926 }, { "epoch": 0.1526384504784412, "grad_norm": 1.1454789638519287, "learning_rate": 2.8832398690397514e-05, "loss": 0.1083, "step": 6927 }, { "epoch": 0.15266048576795738, "grad_norm": 0.9532697796821594, "learning_rate": 2.883198456266059e-05, "loss": 0.1055, "step": 6928 }, { "epoch": 0.15268252105747354, "grad_norm": 1.3530004024505615, "learning_rate": 2.8831570364469944e-05, "loss": 0.114, "step": 6929 }, { "epoch": 0.1527045563469897, "grad_norm": 1.06985342502594, "learning_rate": 2.88311560958277e-05, "loss": 0.1253, "step": 6930 }, { "epoch": 0.15272659163650587, "grad_norm": 1.2544503211975098, "learning_rate": 2.883074175673596e-05, "loss": 0.1383, "step": 6931 }, { "epoch": 0.15274862692602204, "grad_norm": 1.5032306909561157, "learning_rate": 2.8830327347196834e-05, "loss": 0.098, "step": 6932 }, { "epoch": 0.15277066221553817, "grad_norm": 1.578044056892395, "learning_rate": 2.882991286721244e-05, "loss": 0.1389, "step": 6933 }, { "epoch": 0.15279269750505434, "grad_norm": 1.0032776594161987, "learning_rate": 2.8829498316784874e-05, "loss": 0.1065, "step": 6934 }, { "epoch": 0.1528147327945705, "grad_norm": 0.8692163228988647, "learning_rate": 2.8829083695916264e-05, "loss": 0.1305, "step": 6935 }, { "epoch": 0.15283676808408667, "grad_norm": 1.998332142829895, "learning_rate": 2.8828669004608716e-05, "loss": 0.1196, "step": 6936 }, { "epoch": 0.15285880337360283, "grad_norm": 0.7914785146713257, "learning_rate": 2.8828254242864333e-05, "loss": 0.1448, "step": 6937 }, { "epoch": 0.152880838663119, "grad_norm": 1.1061677932739258, "learning_rate": 2.8827839410685245e-05, "loss": 0.1317, "step": 6938 }, { "epoch": 0.15290287395263513, "grad_norm": 0.9656409025192261, "learning_rate": 2.882742450807355e-05, "loss": 0.1107, "step": 6939 }, { "epoch": 0.1529249092421513, "grad_norm": 0.977131724357605, "learning_rate": 2.8827009535031368e-05, "loss": 0.1472, "step": 6940 }, { "epoch": 0.15294694453166746, "grad_norm": 1.6409306526184082, "learning_rate": 2.8826594491560815e-05, "loss": 0.0922, "step": 6941 }, { "epoch": 0.15296897982118363, "grad_norm": 1.0761650800704956, "learning_rate": 2.8826179377664e-05, "loss": 0.1081, "step": 6942 }, { "epoch": 0.1529910151106998, "grad_norm": 1.2315114736557007, "learning_rate": 2.8825764193343035e-05, "loss": 0.1119, "step": 6943 }, { "epoch": 0.15301305040021596, "grad_norm": 1.135048747062683, "learning_rate": 2.8825348938600046e-05, "loss": 0.1286, "step": 6944 }, { "epoch": 0.1530350856897321, "grad_norm": 0.5918581485748291, "learning_rate": 2.8824933613437137e-05, "loss": 0.171, "step": 6945 }, { "epoch": 0.15305712097924826, "grad_norm": 0.9130322337150574, "learning_rate": 2.882451821785643e-05, "loss": 0.1114, "step": 6946 }, { "epoch": 0.15307915626876442, "grad_norm": 1.0199193954467773, "learning_rate": 2.8824102751860037e-05, "loss": 0.1266, "step": 6947 }, { "epoch": 0.15310119155828059, "grad_norm": 0.7094514966011047, "learning_rate": 2.8823687215450075e-05, "loss": 0.1646, "step": 6948 }, { "epoch": 0.15312322684779675, "grad_norm": 1.0404579639434814, "learning_rate": 2.8823271608628666e-05, "loss": 0.1344, "step": 6949 }, { "epoch": 0.15314526213731292, "grad_norm": 0.7953815460205078, "learning_rate": 2.8822855931397915e-05, "loss": 0.1365, "step": 6950 }, { "epoch": 0.15316729742682908, "grad_norm": 1.4908452033996582, "learning_rate": 2.882244018375995e-05, "loss": 0.1075, "step": 6951 }, { "epoch": 0.15318933271634522, "grad_norm": 1.0914536714553833, "learning_rate": 2.8822024365716882e-05, "loss": 0.1323, "step": 6952 }, { "epoch": 0.15321136800586138, "grad_norm": 1.0572082996368408, "learning_rate": 2.882160847727083e-05, "loss": 0.1072, "step": 6953 }, { "epoch": 0.15323340329537755, "grad_norm": 1.3865879774093628, "learning_rate": 2.8821192518423926e-05, "loss": 0.1314, "step": 6954 }, { "epoch": 0.1532554385848937, "grad_norm": 1.0734493732452393, "learning_rate": 2.8820776489178266e-05, "loss": 0.1194, "step": 6955 }, { "epoch": 0.15327747387440988, "grad_norm": 1.0279698371887207, "learning_rate": 2.8820360389535985e-05, "loss": 0.1193, "step": 6956 }, { "epoch": 0.15329950916392604, "grad_norm": 0.8029308319091797, "learning_rate": 2.8819944219499195e-05, "loss": 0.1476, "step": 6957 }, { "epoch": 0.15332154445344218, "grad_norm": 0.9646543264389038, "learning_rate": 2.8819527979070023e-05, "loss": 0.0996, "step": 6958 }, { "epoch": 0.15334357974295834, "grad_norm": 1.0196350812911987, "learning_rate": 2.8819111668250577e-05, "loss": 0.1272, "step": 6959 }, { "epoch": 0.1533656150324745, "grad_norm": 1.467056155204773, "learning_rate": 2.881869528704299e-05, "loss": 0.143, "step": 6960 }, { "epoch": 0.15338765032199067, "grad_norm": 0.7738966941833496, "learning_rate": 2.8818278835449373e-05, "loss": 0.1358, "step": 6961 }, { "epoch": 0.15340968561150684, "grad_norm": 1.1215537786483765, "learning_rate": 2.8817862313471857e-05, "loss": 0.1256, "step": 6962 }, { "epoch": 0.153431720901023, "grad_norm": 1.2848693132400513, "learning_rate": 2.8817445721112558e-05, "loss": 0.1407, "step": 6963 }, { "epoch": 0.15345375619053914, "grad_norm": 1.4175114631652832, "learning_rate": 2.88170290583736e-05, "loss": 0.1582, "step": 6964 }, { "epoch": 0.1534757914800553, "grad_norm": 1.3710168600082397, "learning_rate": 2.8816612325257098e-05, "loss": 0.1505, "step": 6965 }, { "epoch": 0.15349782676957147, "grad_norm": 1.8198801279067993, "learning_rate": 2.8816195521765184e-05, "loss": 0.0979, "step": 6966 }, { "epoch": 0.15351986205908763, "grad_norm": 0.9295082688331604, "learning_rate": 2.8815778647899976e-05, "loss": 0.1318, "step": 6967 }, { "epoch": 0.1535418973486038, "grad_norm": 1.1407724618911743, "learning_rate": 2.8815361703663597e-05, "loss": 0.1448, "step": 6968 }, { "epoch": 0.15356393263811996, "grad_norm": 0.6871122717857361, "learning_rate": 2.8814944689058177e-05, "loss": 0.1154, "step": 6969 }, { "epoch": 0.1535859679276361, "grad_norm": 0.8241013884544373, "learning_rate": 2.8814527604085836e-05, "loss": 0.1334, "step": 6970 }, { "epoch": 0.15360800321715226, "grad_norm": 1.1653659343719482, "learning_rate": 2.881411044874869e-05, "loss": 0.1415, "step": 6971 }, { "epoch": 0.15363003850666843, "grad_norm": 0.7842392921447754, "learning_rate": 2.8813693223048884e-05, "loss": 0.113, "step": 6972 }, { "epoch": 0.1536520737961846, "grad_norm": 1.182307481765747, "learning_rate": 2.8813275926988523e-05, "loss": 0.1366, "step": 6973 }, { "epoch": 0.15367410908570076, "grad_norm": 1.2733474969863892, "learning_rate": 2.8812858560569737e-05, "loss": 0.1191, "step": 6974 }, { "epoch": 0.15369614437521692, "grad_norm": 1.2770373821258545, "learning_rate": 2.8812441123794662e-05, "loss": 0.15, "step": 6975 }, { "epoch": 0.15371817966473306, "grad_norm": 0.8306282162666321, "learning_rate": 2.8812023616665414e-05, "loss": 0.1316, "step": 6976 }, { "epoch": 0.15374021495424922, "grad_norm": 1.0526458024978638, "learning_rate": 2.8811606039184124e-05, "loss": 0.1076, "step": 6977 }, { "epoch": 0.15376225024376539, "grad_norm": 1.1356306076049805, "learning_rate": 2.881118839135292e-05, "loss": 0.083, "step": 6978 }, { "epoch": 0.15378428553328155, "grad_norm": 1.1615488529205322, "learning_rate": 2.8810770673173926e-05, "loss": 0.1378, "step": 6979 }, { "epoch": 0.15380632082279772, "grad_norm": 0.9573104977607727, "learning_rate": 2.8810352884649274e-05, "loss": 0.121, "step": 6980 }, { "epoch": 0.15382835611231388, "grad_norm": 0.9610539674758911, "learning_rate": 2.8809935025781083e-05, "loss": 0.0787, "step": 6981 }, { "epoch": 0.15385039140183004, "grad_norm": 0.7302000522613525, "learning_rate": 2.880951709657149e-05, "loss": 0.0928, "step": 6982 }, { "epoch": 0.15387242669134618, "grad_norm": 1.089528203010559, "learning_rate": 2.8809099097022622e-05, "loss": 0.1325, "step": 6983 }, { "epoch": 0.15389446198086235, "grad_norm": 0.9197953343391418, "learning_rate": 2.880868102713661e-05, "loss": 0.0805, "step": 6984 }, { "epoch": 0.1539164972703785, "grad_norm": 1.4545165300369263, "learning_rate": 2.8808262886915574e-05, "loss": 0.1875, "step": 6985 }, { "epoch": 0.15393853255989468, "grad_norm": 2.8991405963897705, "learning_rate": 2.8807844676361656e-05, "loss": 0.1198, "step": 6986 }, { "epoch": 0.15396056784941084, "grad_norm": 0.7710418105125427, "learning_rate": 2.880742639547698e-05, "loss": 0.0949, "step": 6987 }, { "epoch": 0.153982603138927, "grad_norm": 1.5330675840377808, "learning_rate": 2.8807008044263674e-05, "loss": 0.1472, "step": 6988 }, { "epoch": 0.15400463842844314, "grad_norm": 0.552385687828064, "learning_rate": 2.8806589622723874e-05, "loss": 0.1028, "step": 6989 }, { "epoch": 0.1540266737179593, "grad_norm": 1.3143243789672852, "learning_rate": 2.880617113085971e-05, "loss": 0.1384, "step": 6990 }, { "epoch": 0.15404870900747547, "grad_norm": 0.683050811290741, "learning_rate": 2.8805752568673313e-05, "loss": 0.1011, "step": 6991 }, { "epoch": 0.15407074429699164, "grad_norm": 0.8522604703903198, "learning_rate": 2.8805333936166814e-05, "loss": 0.1034, "step": 6992 }, { "epoch": 0.1540927795865078, "grad_norm": 1.2530847787857056, "learning_rate": 2.8804915233342348e-05, "loss": 0.1467, "step": 6993 }, { "epoch": 0.15411481487602396, "grad_norm": 1.0052708387374878, "learning_rate": 2.8804496460202044e-05, "loss": 0.1247, "step": 6994 }, { "epoch": 0.1541368501655401, "grad_norm": 1.125457525253296, "learning_rate": 2.8804077616748038e-05, "loss": 0.0791, "step": 6995 }, { "epoch": 0.15415888545505627, "grad_norm": 1.18509042263031, "learning_rate": 2.880365870298246e-05, "loss": 0.1607, "step": 6996 }, { "epoch": 0.15418092074457243, "grad_norm": 1.2676466703414917, "learning_rate": 2.880323971890745e-05, "loss": 0.1046, "step": 6997 }, { "epoch": 0.1542029560340886, "grad_norm": 1.2671071290969849, "learning_rate": 2.8802820664525136e-05, "loss": 0.129, "step": 6998 }, { "epoch": 0.15422499132360476, "grad_norm": 1.096622109413147, "learning_rate": 2.8802401539837655e-05, "loss": 0.1566, "step": 6999 }, { "epoch": 0.15424702661312092, "grad_norm": 1.028202772140503, "learning_rate": 2.880198234484714e-05, "loss": 0.0998, "step": 7000 }, { "epoch": 0.15426906190263706, "grad_norm": 0.8190675377845764, "learning_rate": 2.880156307955573e-05, "loss": 0.1093, "step": 7001 }, { "epoch": 0.15429109719215323, "grad_norm": 0.7764113545417786, "learning_rate": 2.8801143743965557e-05, "loss": 0.0888, "step": 7002 }, { "epoch": 0.1543131324816694, "grad_norm": 1.4076796770095825, "learning_rate": 2.880072433807876e-05, "loss": 0.1259, "step": 7003 }, { "epoch": 0.15433516777118556, "grad_norm": 0.7674834132194519, "learning_rate": 2.8800304861897475e-05, "loss": 0.0889, "step": 7004 }, { "epoch": 0.15435720306070172, "grad_norm": 1.0004074573516846, "learning_rate": 2.8799885315423832e-05, "loss": 0.1, "step": 7005 }, { "epoch": 0.15437923835021788, "grad_norm": 1.0036075115203857, "learning_rate": 2.8799465698659973e-05, "loss": 0.1242, "step": 7006 }, { "epoch": 0.15440127363973402, "grad_norm": 1.6141079664230347, "learning_rate": 2.879904601160804e-05, "loss": 0.1369, "step": 7007 }, { "epoch": 0.15442330892925019, "grad_norm": 0.7793400287628174, "learning_rate": 2.8798626254270163e-05, "loss": 0.1071, "step": 7008 }, { "epoch": 0.15444534421876635, "grad_norm": 0.9557207226753235, "learning_rate": 2.8798206426648484e-05, "loss": 0.098, "step": 7009 }, { "epoch": 0.15446737950828252, "grad_norm": 0.9706737399101257, "learning_rate": 2.879778652874514e-05, "loss": 0.1083, "step": 7010 }, { "epoch": 0.15448941479779868, "grad_norm": 0.9912216067314148, "learning_rate": 2.879736656056227e-05, "loss": 0.1214, "step": 7011 }, { "epoch": 0.15451145008731484, "grad_norm": 0.738774299621582, "learning_rate": 2.8796946522102016e-05, "loss": 0.1414, "step": 7012 }, { "epoch": 0.15453348537683098, "grad_norm": 0.9084563851356506, "learning_rate": 2.879652641336651e-05, "loss": 0.1355, "step": 7013 }, { "epoch": 0.15455552066634715, "grad_norm": 0.9099307060241699, "learning_rate": 2.87961062343579e-05, "loss": 0.1371, "step": 7014 }, { "epoch": 0.1545775559558633, "grad_norm": 0.8380351066589355, "learning_rate": 2.8795685985078323e-05, "loss": 0.0742, "step": 7015 }, { "epoch": 0.15459959124537948, "grad_norm": 0.9208235144615173, "learning_rate": 2.8795265665529917e-05, "loss": 0.1315, "step": 7016 }, { "epoch": 0.15462162653489564, "grad_norm": 1.020677089691162, "learning_rate": 2.879484527571483e-05, "loss": 0.0954, "step": 7017 }, { "epoch": 0.1546436618244118, "grad_norm": 0.7451021671295166, "learning_rate": 2.8794424815635193e-05, "loss": 0.0882, "step": 7018 }, { "epoch": 0.15466569711392797, "grad_norm": 1.3991962671279907, "learning_rate": 2.8794004285293155e-05, "loss": 0.1344, "step": 7019 }, { "epoch": 0.1546877324034441, "grad_norm": 0.9691161513328552, "learning_rate": 2.879358368469086e-05, "loss": 0.1659, "step": 7020 }, { "epoch": 0.15470976769296027, "grad_norm": 0.8723606467247009, "learning_rate": 2.879316301383044e-05, "loss": 0.1337, "step": 7021 }, { "epoch": 0.15473180298247644, "grad_norm": 1.0403227806091309, "learning_rate": 2.879274227271405e-05, "loss": 0.1104, "step": 7022 }, { "epoch": 0.1547538382719926, "grad_norm": 0.7451967000961304, "learning_rate": 2.8792321461343826e-05, "loss": 0.1297, "step": 7023 }, { "epoch": 0.15477587356150876, "grad_norm": 0.8270876407623291, "learning_rate": 2.879190057972191e-05, "loss": 0.1072, "step": 7024 }, { "epoch": 0.15479790885102493, "grad_norm": 0.8963398337364197, "learning_rate": 2.879147962785045e-05, "loss": 0.1023, "step": 7025 }, { "epoch": 0.15481994414054107, "grad_norm": 1.9943368434906006, "learning_rate": 2.879105860573159e-05, "loss": 0.1384, "step": 7026 }, { "epoch": 0.15484197943005723, "grad_norm": 1.2866219282150269, "learning_rate": 2.8790637513367475e-05, "loss": 0.1194, "step": 7027 }, { "epoch": 0.1548640147195734, "grad_norm": 0.826291024684906, "learning_rate": 2.8790216350760245e-05, "loss": 0.0901, "step": 7028 }, { "epoch": 0.15488605000908956, "grad_norm": 0.8981908559799194, "learning_rate": 2.8789795117912048e-05, "loss": 0.186, "step": 7029 }, { "epoch": 0.15490808529860572, "grad_norm": 1.2521001100540161, "learning_rate": 2.878937381482503e-05, "loss": 0.1233, "step": 7030 }, { "epoch": 0.1549301205881219, "grad_norm": 0.9590343236923218, "learning_rate": 2.8788952441501338e-05, "loss": 0.1385, "step": 7031 }, { "epoch": 0.15495215587763803, "grad_norm": 1.1145546436309814, "learning_rate": 2.8788530997943118e-05, "loss": 0.0989, "step": 7032 }, { "epoch": 0.1549741911671542, "grad_norm": 1.1374379396438599, "learning_rate": 2.878810948415251e-05, "loss": 0.1009, "step": 7033 }, { "epoch": 0.15499622645667036, "grad_norm": 0.9605886936187744, "learning_rate": 2.878768790013167e-05, "loss": 0.1146, "step": 7034 }, { "epoch": 0.15501826174618652, "grad_norm": 0.9698588848114014, "learning_rate": 2.8787266245882746e-05, "loss": 0.1096, "step": 7035 }, { "epoch": 0.15504029703570268, "grad_norm": 0.9616273045539856, "learning_rate": 2.8786844521407875e-05, "loss": 0.1038, "step": 7036 }, { "epoch": 0.15506233232521885, "grad_norm": 1.2085827589035034, "learning_rate": 2.878642272670922e-05, "loss": 0.1511, "step": 7037 }, { "epoch": 0.15508436761473499, "grad_norm": 0.8739325404167175, "learning_rate": 2.8786000861788914e-05, "loss": 0.1296, "step": 7038 }, { "epoch": 0.15510640290425115, "grad_norm": 0.8091245293617249, "learning_rate": 2.8785578926649115e-05, "loss": 0.0987, "step": 7039 }, { "epoch": 0.15512843819376732, "grad_norm": 0.9597791433334351, "learning_rate": 2.878515692129197e-05, "loss": 0.1058, "step": 7040 }, { "epoch": 0.15515047348328348, "grad_norm": 1.1129156351089478, "learning_rate": 2.8784734845719628e-05, "loss": 0.1405, "step": 7041 }, { "epoch": 0.15517250877279964, "grad_norm": 1.049194097518921, "learning_rate": 2.8784312699934242e-05, "loss": 0.1379, "step": 7042 }, { "epoch": 0.1551945440623158, "grad_norm": 0.824809193611145, "learning_rate": 2.8783890483937956e-05, "loss": 0.087, "step": 7043 }, { "epoch": 0.15521657935183195, "grad_norm": 0.7221029996871948, "learning_rate": 2.8783468197732925e-05, "loss": 0.1159, "step": 7044 }, { "epoch": 0.1552386146413481, "grad_norm": 1.174919843673706, "learning_rate": 2.87830458413213e-05, "loss": 0.137, "step": 7045 }, { "epoch": 0.15526064993086428, "grad_norm": 0.8586699366569519, "learning_rate": 2.8782623414705234e-05, "loss": 0.0897, "step": 7046 }, { "epoch": 0.15528268522038044, "grad_norm": 0.9935662150382996, "learning_rate": 2.8782200917886878e-05, "loss": 0.1044, "step": 7047 }, { "epoch": 0.1553047205098966, "grad_norm": 1.0928900241851807, "learning_rate": 2.878177835086837e-05, "loss": 0.1386, "step": 7048 }, { "epoch": 0.15532675579941277, "grad_norm": 1.015173077583313, "learning_rate": 2.8781355713651888e-05, "loss": 0.0914, "step": 7049 }, { "epoch": 0.1553487910889289, "grad_norm": 1.2261686325073242, "learning_rate": 2.878093300623956e-05, "loss": 0.1056, "step": 7050 }, { "epoch": 0.15537082637844507, "grad_norm": 0.5165032148361206, "learning_rate": 2.878051022863356e-05, "loss": 0.0929, "step": 7051 }, { "epoch": 0.15539286166796124, "grad_norm": 1.9439560174942017, "learning_rate": 2.878008738083603e-05, "loss": 0.141, "step": 7052 }, { "epoch": 0.1554148969574774, "grad_norm": 1.9514498710632324, "learning_rate": 2.8779664462849123e-05, "loss": 0.1785, "step": 7053 }, { "epoch": 0.15543693224699356, "grad_norm": 0.9534239172935486, "learning_rate": 2.8779241474674993e-05, "loss": 0.1364, "step": 7054 }, { "epoch": 0.15545896753650973, "grad_norm": 0.6884186267852783, "learning_rate": 2.87788184163158e-05, "loss": 0.1046, "step": 7055 }, { "epoch": 0.1554810028260259, "grad_norm": 1.1127824783325195, "learning_rate": 2.8778395287773696e-05, "loss": 0.1547, "step": 7056 }, { "epoch": 0.15550303811554203, "grad_norm": 0.8284326791763306, "learning_rate": 2.877797208905084e-05, "loss": 0.066, "step": 7057 }, { "epoch": 0.1555250734050582, "grad_norm": 0.9464718699455261, "learning_rate": 2.877754882014938e-05, "loss": 0.1062, "step": 7058 }, { "epoch": 0.15554710869457436, "grad_norm": 0.7671053409576416, "learning_rate": 2.8777125481071476e-05, "loss": 0.1179, "step": 7059 }, { "epoch": 0.15556914398409052, "grad_norm": 1.325339913368225, "learning_rate": 2.8776702071819284e-05, "loss": 0.0848, "step": 7060 }, { "epoch": 0.1555911792736067, "grad_norm": 1.102015733718872, "learning_rate": 2.877627859239496e-05, "loss": 0.1268, "step": 7061 }, { "epoch": 0.15561321456312285, "grad_norm": 2.020414113998413, "learning_rate": 2.8775855042800663e-05, "loss": 0.1122, "step": 7062 }, { "epoch": 0.155635249852639, "grad_norm": 2.1591389179229736, "learning_rate": 2.8775431423038548e-05, "loss": 0.165, "step": 7063 }, { "epoch": 0.15565728514215516, "grad_norm": 1.214584469795227, "learning_rate": 2.8775007733110774e-05, "loss": 0.1516, "step": 7064 }, { "epoch": 0.15567932043167132, "grad_norm": 1.7368106842041016, "learning_rate": 2.8774583973019497e-05, "loss": 0.1402, "step": 7065 }, { "epoch": 0.15570135572118748, "grad_norm": 1.09493088722229, "learning_rate": 2.877416014276688e-05, "loss": 0.1517, "step": 7066 }, { "epoch": 0.15572339101070365, "grad_norm": 0.939522922039032, "learning_rate": 2.8773736242355076e-05, "loss": 0.1408, "step": 7067 }, { "epoch": 0.1557454263002198, "grad_norm": 0.9307454228401184, "learning_rate": 2.8773312271786253e-05, "loss": 0.1145, "step": 7068 }, { "epoch": 0.15576746158973595, "grad_norm": 1.0811201333999634, "learning_rate": 2.8772888231062562e-05, "loss": 0.0976, "step": 7069 }, { "epoch": 0.15578949687925212, "grad_norm": 0.8490140438079834, "learning_rate": 2.8772464120186167e-05, "loss": 0.097, "step": 7070 }, { "epoch": 0.15581153216876828, "grad_norm": 1.538356900215149, "learning_rate": 2.8772039939159224e-05, "loss": 0.1565, "step": 7071 }, { "epoch": 0.15583356745828444, "grad_norm": 1.0573543310165405, "learning_rate": 2.8771615687983895e-05, "loss": 0.1626, "step": 7072 }, { "epoch": 0.1558556027478006, "grad_norm": 0.6774840354919434, "learning_rate": 2.8771191366662347e-05, "loss": 0.1039, "step": 7073 }, { "epoch": 0.15587763803731677, "grad_norm": 0.8035805225372314, "learning_rate": 2.8770766975196733e-05, "loss": 0.1257, "step": 7074 }, { "epoch": 0.1558996733268329, "grad_norm": 1.029309868812561, "learning_rate": 2.877034251358922e-05, "loss": 0.0946, "step": 7075 }, { "epoch": 0.15592170861634908, "grad_norm": 1.0161949396133423, "learning_rate": 2.8769917981841968e-05, "loss": 0.1482, "step": 7076 }, { "epoch": 0.15594374390586524, "grad_norm": 1.215416669845581, "learning_rate": 2.876949337995714e-05, "loss": 0.1025, "step": 7077 }, { "epoch": 0.1559657791953814, "grad_norm": 1.294610857963562, "learning_rate": 2.8769068707936898e-05, "loss": 0.0991, "step": 7078 }, { "epoch": 0.15598781448489757, "grad_norm": 0.9748864769935608, "learning_rate": 2.8768643965783408e-05, "loss": 0.1541, "step": 7079 }, { "epoch": 0.15600984977441373, "grad_norm": 0.906201958656311, "learning_rate": 2.8768219153498825e-05, "loss": 0.0857, "step": 7080 }, { "epoch": 0.15603188506392987, "grad_norm": 0.9583567976951599, "learning_rate": 2.8767794271085325e-05, "loss": 0.1618, "step": 7081 }, { "epoch": 0.15605392035344604, "grad_norm": 1.2237225770950317, "learning_rate": 2.8767369318545063e-05, "loss": 0.1355, "step": 7082 }, { "epoch": 0.1560759556429622, "grad_norm": 1.1657540798187256, "learning_rate": 2.8766944295880207e-05, "loss": 0.1298, "step": 7083 }, { "epoch": 0.15609799093247836, "grad_norm": 1.6176918745040894, "learning_rate": 2.8766519203092922e-05, "loss": 0.172, "step": 7084 }, { "epoch": 0.15612002622199453, "grad_norm": 1.0659385919570923, "learning_rate": 2.876609404018537e-05, "loss": 0.1128, "step": 7085 }, { "epoch": 0.1561420615115107, "grad_norm": 0.7249936461448669, "learning_rate": 2.876566880715972e-05, "loss": 0.1438, "step": 7086 }, { "epoch": 0.15616409680102686, "grad_norm": 0.5878773331642151, "learning_rate": 2.876524350401814e-05, "loss": 0.1089, "step": 7087 }, { "epoch": 0.156186132090543, "grad_norm": 0.9283257722854614, "learning_rate": 2.8764818130762787e-05, "loss": 0.1216, "step": 7088 }, { "epoch": 0.15620816738005916, "grad_norm": 0.9283106327056885, "learning_rate": 2.8764392687395836e-05, "loss": 0.1377, "step": 7089 }, { "epoch": 0.15623020266957532, "grad_norm": 1.3371388912200928, "learning_rate": 2.8763967173919456e-05, "loss": 0.1435, "step": 7090 }, { "epoch": 0.1562522379590915, "grad_norm": 0.92357337474823, "learning_rate": 2.876354159033581e-05, "loss": 0.101, "step": 7091 }, { "epoch": 0.15627427324860765, "grad_norm": 1.2537888288497925, "learning_rate": 2.8763115936647065e-05, "loss": 0.1023, "step": 7092 }, { "epoch": 0.15629630853812382, "grad_norm": 0.9763243198394775, "learning_rate": 2.8762690212855386e-05, "loss": 0.137, "step": 7093 }, { "epoch": 0.15631834382763996, "grad_norm": 0.9983446598052979, "learning_rate": 2.8762264418962952e-05, "loss": 0.1165, "step": 7094 }, { "epoch": 0.15634037911715612, "grad_norm": 0.8520681858062744, "learning_rate": 2.8761838554971925e-05, "loss": 0.1384, "step": 7095 }, { "epoch": 0.15636241440667228, "grad_norm": 1.069167971611023, "learning_rate": 2.876141262088447e-05, "loss": 0.0999, "step": 7096 }, { "epoch": 0.15638444969618845, "grad_norm": 0.7558000683784485, "learning_rate": 2.8760986616702764e-05, "loss": 0.1015, "step": 7097 }, { "epoch": 0.1564064849857046, "grad_norm": 0.7301508784294128, "learning_rate": 2.876056054242897e-05, "loss": 0.1246, "step": 7098 }, { "epoch": 0.15642852027522078, "grad_norm": 0.6225627064704895, "learning_rate": 2.8760134398065267e-05, "loss": 0.0795, "step": 7099 }, { "epoch": 0.15645055556473692, "grad_norm": 0.9014896750450134, "learning_rate": 2.8759708183613823e-05, "loss": 0.1254, "step": 7100 }, { "epoch": 0.15647259085425308, "grad_norm": 1.0779600143432617, "learning_rate": 2.87592818990768e-05, "loss": 0.1238, "step": 7101 }, { "epoch": 0.15649462614376924, "grad_norm": 1.0397340059280396, "learning_rate": 2.8758855544456377e-05, "loss": 0.1437, "step": 7102 }, { "epoch": 0.1565166614332854, "grad_norm": 1.1746524572372437, "learning_rate": 2.875842911975473e-05, "loss": 0.1043, "step": 7103 }, { "epoch": 0.15653869672280157, "grad_norm": 1.091402530670166, "learning_rate": 2.8758002624974018e-05, "loss": 0.1461, "step": 7104 }, { "epoch": 0.15656073201231774, "grad_norm": 1.1661423444747925, "learning_rate": 2.8757576060116427e-05, "loss": 0.1576, "step": 7105 }, { "epoch": 0.15658276730183388, "grad_norm": 0.6294688582420349, "learning_rate": 2.8757149425184122e-05, "loss": 0.1651, "step": 7106 }, { "epoch": 0.15660480259135004, "grad_norm": 1.102013111114502, "learning_rate": 2.8756722720179276e-05, "loss": 0.1283, "step": 7107 }, { "epoch": 0.1566268378808662, "grad_norm": 1.0937882661819458, "learning_rate": 2.8756295945104066e-05, "loss": 0.1544, "step": 7108 }, { "epoch": 0.15664887317038237, "grad_norm": 1.45270836353302, "learning_rate": 2.8755869099960664e-05, "loss": 0.1258, "step": 7109 }, { "epoch": 0.15667090845989853, "grad_norm": 1.0048974752426147, "learning_rate": 2.8755442184751246e-05, "loss": 0.0853, "step": 7110 }, { "epoch": 0.1566929437494147, "grad_norm": 1.0763195753097534, "learning_rate": 2.875501519947798e-05, "loss": 0.1231, "step": 7111 }, { "epoch": 0.15671497903893084, "grad_norm": 1.096598744392395, "learning_rate": 2.875458814414305e-05, "loss": 0.0987, "step": 7112 }, { "epoch": 0.156737014328447, "grad_norm": 0.6345728635787964, "learning_rate": 2.8754161018748627e-05, "loss": 0.1139, "step": 7113 }, { "epoch": 0.15675904961796316, "grad_norm": 0.8005463480949402, "learning_rate": 2.875373382329688e-05, "loss": 0.0884, "step": 7114 }, { "epoch": 0.15678108490747933, "grad_norm": 0.7943748235702515, "learning_rate": 2.8753306557789998e-05, "loss": 0.1073, "step": 7115 }, { "epoch": 0.1568031201969955, "grad_norm": 0.9509829878807068, "learning_rate": 2.8752879222230145e-05, "loss": 0.1121, "step": 7116 }, { "epoch": 0.15682515548651166, "grad_norm": 0.8545540571212769, "learning_rate": 2.875245181661951e-05, "loss": 0.1242, "step": 7117 }, { "epoch": 0.1568471907760278, "grad_norm": 0.9385530352592468, "learning_rate": 2.875202434096026e-05, "loss": 0.1339, "step": 7118 }, { "epoch": 0.15686922606554396, "grad_norm": 1.6434582471847534, "learning_rate": 2.8751596795254572e-05, "loss": 0.1343, "step": 7119 }, { "epoch": 0.15689126135506012, "grad_norm": 0.9900014400482178, "learning_rate": 2.8751169179504634e-05, "loss": 0.1537, "step": 7120 }, { "epoch": 0.1569132966445763, "grad_norm": 0.9500130414962769, "learning_rate": 2.8750741493712616e-05, "loss": 0.1136, "step": 7121 }, { "epoch": 0.15693533193409245, "grad_norm": 1.15702486038208, "learning_rate": 2.8750313737880694e-05, "loss": 0.1318, "step": 7122 }, { "epoch": 0.15695736722360862, "grad_norm": 0.9574857950210571, "learning_rate": 2.8749885912011053e-05, "loss": 0.1435, "step": 7123 }, { "epoch": 0.15697940251312478, "grad_norm": 2.397357702255249, "learning_rate": 2.874945801610587e-05, "loss": 0.141, "step": 7124 }, { "epoch": 0.15700143780264092, "grad_norm": 1.15526282787323, "learning_rate": 2.8749030050167323e-05, "loss": 0.1156, "step": 7125 }, { "epoch": 0.15702347309215708, "grad_norm": 1.512020468711853, "learning_rate": 2.874860201419759e-05, "loss": 0.1388, "step": 7126 }, { "epoch": 0.15704550838167325, "grad_norm": 1.3839375972747803, "learning_rate": 2.874817390819886e-05, "loss": 0.1132, "step": 7127 }, { "epoch": 0.1570675436711894, "grad_norm": 1.134688138961792, "learning_rate": 2.8747745732173307e-05, "loss": 0.1161, "step": 7128 }, { "epoch": 0.15708957896070558, "grad_norm": 1.043833613395691, "learning_rate": 2.874731748612311e-05, "loss": 0.0744, "step": 7129 }, { "epoch": 0.15711161425022174, "grad_norm": 0.80210942029953, "learning_rate": 2.8746889170050457e-05, "loss": 0.1248, "step": 7130 }, { "epoch": 0.15713364953973788, "grad_norm": 1.303088665008545, "learning_rate": 2.874646078395752e-05, "loss": 0.0849, "step": 7131 }, { "epoch": 0.15715568482925404, "grad_norm": 0.9662603735923767, "learning_rate": 2.8746032327846494e-05, "loss": 0.1069, "step": 7132 }, { "epoch": 0.1571777201187702, "grad_norm": 0.712649405002594, "learning_rate": 2.874560380171955e-05, "loss": 0.1138, "step": 7133 }, { "epoch": 0.15719975540828637, "grad_norm": 0.8977270722389221, "learning_rate": 2.8745175205578876e-05, "loss": 0.0922, "step": 7134 }, { "epoch": 0.15722179069780254, "grad_norm": 1.4277307987213135, "learning_rate": 2.8744746539426656e-05, "loss": 0.1339, "step": 7135 }, { "epoch": 0.1572438259873187, "grad_norm": 0.7766823768615723, "learning_rate": 2.874431780326507e-05, "loss": 0.1422, "step": 7136 }, { "epoch": 0.15726586127683484, "grad_norm": 1.1530183553695679, "learning_rate": 2.87438889970963e-05, "loss": 0.131, "step": 7137 }, { "epoch": 0.157287896566351, "grad_norm": 1.597808837890625, "learning_rate": 2.8743460120922536e-05, "loss": 0.1918, "step": 7138 }, { "epoch": 0.15730993185586717, "grad_norm": 0.9622198343276978, "learning_rate": 2.8743031174745957e-05, "loss": 0.1117, "step": 7139 }, { "epoch": 0.15733196714538333, "grad_norm": 0.8593658208847046, "learning_rate": 2.8742602158568752e-05, "loss": 0.118, "step": 7140 }, { "epoch": 0.1573540024348995, "grad_norm": 1.4872257709503174, "learning_rate": 2.874217307239311e-05, "loss": 0.1197, "step": 7141 }, { "epoch": 0.15737603772441566, "grad_norm": 0.798893392086029, "learning_rate": 2.8741743916221204e-05, "loss": 0.1239, "step": 7142 }, { "epoch": 0.1573980730139318, "grad_norm": 1.0417472124099731, "learning_rate": 2.8741314690055235e-05, "loss": 0.0636, "step": 7143 }, { "epoch": 0.15742010830344796, "grad_norm": 1.009954571723938, "learning_rate": 2.874088539389737e-05, "loss": 0.096, "step": 7144 }, { "epoch": 0.15744214359296413, "grad_norm": 1.0832816362380981, "learning_rate": 2.874045602774982e-05, "loss": 0.1655, "step": 7145 }, { "epoch": 0.1574641788824803, "grad_norm": 1.0343173742294312, "learning_rate": 2.874002659161475e-05, "loss": 0.1095, "step": 7146 }, { "epoch": 0.15748621417199646, "grad_norm": 0.640903115272522, "learning_rate": 2.873959708549436e-05, "loss": 0.0974, "step": 7147 }, { "epoch": 0.15750824946151262, "grad_norm": 0.9793671369552612, "learning_rate": 2.873916750939084e-05, "loss": 0.1342, "step": 7148 }, { "epoch": 0.15753028475102876, "grad_norm": 1.168657660484314, "learning_rate": 2.8738737863306362e-05, "loss": 0.1429, "step": 7149 }, { "epoch": 0.15755232004054492, "grad_norm": 0.777934730052948, "learning_rate": 2.873830814724313e-05, "loss": 0.088, "step": 7150 }, { "epoch": 0.1575743553300611, "grad_norm": 0.8416200876235962, "learning_rate": 2.8737878361203327e-05, "loss": 0.1279, "step": 7151 }, { "epoch": 0.15759639061957725, "grad_norm": 1.3379459381103516, "learning_rate": 2.873744850518914e-05, "loss": 0.1144, "step": 7152 }, { "epoch": 0.15761842590909342, "grad_norm": 0.7935965657234192, "learning_rate": 2.8737018579202768e-05, "loss": 0.0975, "step": 7153 }, { "epoch": 0.15764046119860958, "grad_norm": 0.7857880592346191, "learning_rate": 2.873658858324639e-05, "loss": 0.1001, "step": 7154 }, { "epoch": 0.15766249648812572, "grad_norm": 1.1301807165145874, "learning_rate": 2.8736158517322197e-05, "loss": 0.1422, "step": 7155 }, { "epoch": 0.15768453177764188, "grad_norm": 0.7547856569290161, "learning_rate": 2.873572838143239e-05, "loss": 0.0974, "step": 7156 }, { "epoch": 0.15770656706715805, "grad_norm": 0.735641360282898, "learning_rate": 2.873529817557915e-05, "loss": 0.0995, "step": 7157 }, { "epoch": 0.1577286023566742, "grad_norm": 1.1265350580215454, "learning_rate": 2.8734867899764663e-05, "loss": 0.1144, "step": 7158 }, { "epoch": 0.15775063764619038, "grad_norm": 1.3118867874145508, "learning_rate": 2.8734437553991137e-05, "loss": 0.1532, "step": 7159 }, { "epoch": 0.15777267293570654, "grad_norm": 0.6764608025550842, "learning_rate": 2.8734007138260753e-05, "loss": 0.0845, "step": 7160 }, { "epoch": 0.1577947082252227, "grad_norm": 1.0542922019958496, "learning_rate": 2.8733576652575707e-05, "loss": 0.117, "step": 7161 }, { "epoch": 0.15781674351473884, "grad_norm": 0.9261777997016907, "learning_rate": 2.8733146096938188e-05, "loss": 0.1088, "step": 7162 }, { "epoch": 0.157838778804255, "grad_norm": 0.5183528661727905, "learning_rate": 2.8732715471350397e-05, "loss": 0.1324, "step": 7163 }, { "epoch": 0.15786081409377117, "grad_norm": 0.6920760869979858, "learning_rate": 2.8732284775814518e-05, "loss": 0.096, "step": 7164 }, { "epoch": 0.15788284938328734, "grad_norm": 0.8564231395721436, "learning_rate": 2.8731854010332748e-05, "loss": 0.1, "step": 7165 }, { "epoch": 0.1579048846728035, "grad_norm": 0.9137529134750366, "learning_rate": 2.8731423174907283e-05, "loss": 0.1797, "step": 7166 }, { "epoch": 0.15792691996231967, "grad_norm": 0.6189587712287903, "learning_rate": 2.873099226954032e-05, "loss": 0.0957, "step": 7167 }, { "epoch": 0.1579489552518358, "grad_norm": 1.0536634922027588, "learning_rate": 2.8730561294234047e-05, "loss": 0.1185, "step": 7168 }, { "epoch": 0.15797099054135197, "grad_norm": 1.5800340175628662, "learning_rate": 2.873013024899066e-05, "loss": 0.1257, "step": 7169 }, { "epoch": 0.15799302583086813, "grad_norm": 1.2069242000579834, "learning_rate": 2.872969913381236e-05, "loss": 0.1321, "step": 7170 }, { "epoch": 0.1580150611203843, "grad_norm": 1.1765213012695312, "learning_rate": 2.8729267948701342e-05, "loss": 0.131, "step": 7171 }, { "epoch": 0.15803709640990046, "grad_norm": 0.8625426888465881, "learning_rate": 2.8728836693659794e-05, "loss": 0.1336, "step": 7172 }, { "epoch": 0.15805913169941663, "grad_norm": 0.9813671708106995, "learning_rate": 2.8728405368689923e-05, "loss": 0.1358, "step": 7173 }, { "epoch": 0.15808116698893276, "grad_norm": 0.5914874076843262, "learning_rate": 2.8727973973793918e-05, "loss": 0.1042, "step": 7174 }, { "epoch": 0.15810320227844893, "grad_norm": 0.8301072120666504, "learning_rate": 2.8727542508973984e-05, "loss": 0.0933, "step": 7175 }, { "epoch": 0.1581252375679651, "grad_norm": 0.9435993432998657, "learning_rate": 2.8727110974232314e-05, "loss": 0.1263, "step": 7176 }, { "epoch": 0.15814727285748126, "grad_norm": 0.9825894832611084, "learning_rate": 2.8726679369571107e-05, "loss": 0.1485, "step": 7177 }, { "epoch": 0.15816930814699742, "grad_norm": 0.9771159291267395, "learning_rate": 2.8726247694992557e-05, "loss": 0.144, "step": 7178 }, { "epoch": 0.1581913434365136, "grad_norm": 1.0783779621124268, "learning_rate": 2.8725815950498875e-05, "loss": 0.1279, "step": 7179 }, { "epoch": 0.15821337872602972, "grad_norm": 1.3613353967666626, "learning_rate": 2.8725384136092242e-05, "loss": 0.1207, "step": 7180 }, { "epoch": 0.1582354140155459, "grad_norm": 0.8594164252281189, "learning_rate": 2.8724952251774874e-05, "loss": 0.116, "step": 7181 }, { "epoch": 0.15825744930506205, "grad_norm": 0.8605708479881287, "learning_rate": 2.8724520297548967e-05, "loss": 0.1294, "step": 7182 }, { "epoch": 0.15827948459457822, "grad_norm": 1.1119052171707153, "learning_rate": 2.872408827341671e-05, "loss": 0.1237, "step": 7183 }, { "epoch": 0.15830151988409438, "grad_norm": 1.3005239963531494, "learning_rate": 2.872365617938032e-05, "loss": 0.1762, "step": 7184 }, { "epoch": 0.15832355517361055, "grad_norm": 0.9562448859214783, "learning_rate": 2.8723224015441985e-05, "loss": 0.106, "step": 7185 }, { "epoch": 0.15834559046312668, "grad_norm": 0.8228528499603271, "learning_rate": 2.8722791781603912e-05, "loss": 0.103, "step": 7186 }, { "epoch": 0.15836762575264285, "grad_norm": 0.9457982778549194, "learning_rate": 2.8722359477868304e-05, "loss": 0.102, "step": 7187 }, { "epoch": 0.158389661042159, "grad_norm": 1.3629173040390015, "learning_rate": 2.872192710423736e-05, "loss": 0.1436, "step": 7188 }, { "epoch": 0.15841169633167518, "grad_norm": 0.7953154444694519, "learning_rate": 2.8721494660713278e-05, "loss": 0.0829, "step": 7189 }, { "epoch": 0.15843373162119134, "grad_norm": 1.0818562507629395, "learning_rate": 2.8721062147298272e-05, "loss": 0.126, "step": 7190 }, { "epoch": 0.1584557669107075, "grad_norm": 1.1063251495361328, "learning_rate": 2.8720629563994537e-05, "loss": 0.1576, "step": 7191 }, { "epoch": 0.15847780220022367, "grad_norm": 1.358365774154663, "learning_rate": 2.8720196910804278e-05, "loss": 0.1172, "step": 7192 }, { "epoch": 0.1584998374897398, "grad_norm": 1.2181726694107056, "learning_rate": 2.8719764187729698e-05, "loss": 0.1448, "step": 7193 }, { "epoch": 0.15852187277925597, "grad_norm": 1.2686070203781128, "learning_rate": 2.8719331394773e-05, "loss": 0.1385, "step": 7194 }, { "epoch": 0.15854390806877214, "grad_norm": 1.2856448888778687, "learning_rate": 2.871889853193639e-05, "loss": 0.141, "step": 7195 }, { "epoch": 0.1585659433582883, "grad_norm": 0.8681750893592834, "learning_rate": 2.8718465599222078e-05, "loss": 0.1153, "step": 7196 }, { "epoch": 0.15858797864780447, "grad_norm": 0.9359610080718994, "learning_rate": 2.8718032596632264e-05, "loss": 0.1048, "step": 7197 }, { "epoch": 0.15861001393732063, "grad_norm": 0.7779363393783569, "learning_rate": 2.871759952416915e-05, "loss": 0.1008, "step": 7198 }, { "epoch": 0.15863204922683677, "grad_norm": 1.038074016571045, "learning_rate": 2.8717166381834945e-05, "loss": 0.094, "step": 7199 }, { "epoch": 0.15865408451635293, "grad_norm": 0.7204107642173767, "learning_rate": 2.871673316963186e-05, "loss": 0.0766, "step": 7200 }, { "epoch": 0.1586761198058691, "grad_norm": 0.8919342756271362, "learning_rate": 2.8716299887562094e-05, "loss": 0.1131, "step": 7201 }, { "epoch": 0.15869815509538526, "grad_norm": 0.9180445075035095, "learning_rate": 2.8715866535627858e-05, "loss": 0.1389, "step": 7202 }, { "epoch": 0.15872019038490143, "grad_norm": 0.8436068296432495, "learning_rate": 2.8715433113831358e-05, "loss": 0.1181, "step": 7203 }, { "epoch": 0.1587422256744176, "grad_norm": 1.097171664237976, "learning_rate": 2.8714999622174803e-05, "loss": 0.0913, "step": 7204 }, { "epoch": 0.15876426096393373, "grad_norm": 0.8090340495109558, "learning_rate": 2.87145660606604e-05, "loss": 0.1002, "step": 7205 }, { "epoch": 0.1587862962534499, "grad_norm": 1.1431217193603516, "learning_rate": 2.8714132429290363e-05, "loss": 0.0957, "step": 7206 }, { "epoch": 0.15880833154296606, "grad_norm": 0.7162547707557678, "learning_rate": 2.871369872806689e-05, "loss": 0.123, "step": 7207 }, { "epoch": 0.15883036683248222, "grad_norm": 0.7144118547439575, "learning_rate": 2.8713264956992196e-05, "loss": 0.1152, "step": 7208 }, { "epoch": 0.1588524021219984, "grad_norm": 1.0627458095550537, "learning_rate": 2.8712831116068493e-05, "loss": 0.1031, "step": 7209 }, { "epoch": 0.15887443741151455, "grad_norm": 1.1488568782806396, "learning_rate": 2.8712397205297986e-05, "loss": 0.126, "step": 7210 }, { "epoch": 0.1588964727010307, "grad_norm": 1.1959015130996704, "learning_rate": 2.8711963224682883e-05, "loss": 0.1207, "step": 7211 }, { "epoch": 0.15891850799054685, "grad_norm": 1.3256627321243286, "learning_rate": 2.8711529174225404e-05, "loss": 0.0892, "step": 7212 }, { "epoch": 0.15894054328006302, "grad_norm": 1.1792793273925781, "learning_rate": 2.8711095053927752e-05, "loss": 0.0881, "step": 7213 }, { "epoch": 0.15896257856957918, "grad_norm": 1.0526238679885864, "learning_rate": 2.871066086379214e-05, "loss": 0.0786, "step": 7214 }, { "epoch": 0.15898461385909535, "grad_norm": 0.8970816731452942, "learning_rate": 2.8710226603820782e-05, "loss": 0.1112, "step": 7215 }, { "epoch": 0.1590066491486115, "grad_norm": 0.9302502274513245, "learning_rate": 2.8709792274015886e-05, "loss": 0.1454, "step": 7216 }, { "epoch": 0.15902868443812765, "grad_norm": 1.307547688484192, "learning_rate": 2.8709357874379666e-05, "loss": 0.1525, "step": 7217 }, { "epoch": 0.1590507197276438, "grad_norm": 1.1379625797271729, "learning_rate": 2.8708923404914334e-05, "loss": 0.1034, "step": 7218 }, { "epoch": 0.15907275501715998, "grad_norm": 1.104188323020935, "learning_rate": 2.8708488865622107e-05, "loss": 0.1286, "step": 7219 }, { "epoch": 0.15909479030667614, "grad_norm": 0.7204300761222839, "learning_rate": 2.8708054256505193e-05, "loss": 0.1298, "step": 7220 }, { "epoch": 0.1591168255961923, "grad_norm": 0.9198422431945801, "learning_rate": 2.8707619577565805e-05, "loss": 0.0684, "step": 7221 }, { "epoch": 0.15913886088570847, "grad_norm": 0.8943566083908081, "learning_rate": 2.8707184828806162e-05, "loss": 0.1034, "step": 7222 }, { "epoch": 0.1591608961752246, "grad_norm": 1.3234691619873047, "learning_rate": 2.870675001022848e-05, "loss": 0.1032, "step": 7223 }, { "epoch": 0.15918293146474077, "grad_norm": 1.334649920463562, "learning_rate": 2.8706315121834967e-05, "loss": 0.1272, "step": 7224 }, { "epoch": 0.15920496675425694, "grad_norm": 0.846883237361908, "learning_rate": 2.870588016362784e-05, "loss": 0.0991, "step": 7225 }, { "epoch": 0.1592270020437731, "grad_norm": 1.1652216911315918, "learning_rate": 2.8705445135609315e-05, "loss": 0.1183, "step": 7226 }, { "epoch": 0.15924903733328927, "grad_norm": 1.0203295946121216, "learning_rate": 2.8705010037781614e-05, "loss": 0.1333, "step": 7227 }, { "epoch": 0.15927107262280543, "grad_norm": 1.034435510635376, "learning_rate": 2.8704574870146943e-05, "loss": 0.1166, "step": 7228 }, { "epoch": 0.1592931079123216, "grad_norm": 1.4952107667922974, "learning_rate": 2.8704139632707528e-05, "loss": 0.132, "step": 7229 }, { "epoch": 0.15931514320183773, "grad_norm": 1.1427898406982422, "learning_rate": 2.8703704325465575e-05, "loss": 0.1109, "step": 7230 }, { "epoch": 0.1593371784913539, "grad_norm": 1.4022525548934937, "learning_rate": 2.870326894842331e-05, "loss": 0.0817, "step": 7231 }, { "epoch": 0.15935921378087006, "grad_norm": 0.8261712789535522, "learning_rate": 2.870283350158295e-05, "loss": 0.0961, "step": 7232 }, { "epoch": 0.15938124907038623, "grad_norm": 0.8919186592102051, "learning_rate": 2.870239798494671e-05, "loss": 0.1344, "step": 7233 }, { "epoch": 0.1594032843599024, "grad_norm": 1.7520365715026855, "learning_rate": 2.8701962398516804e-05, "loss": 0.1017, "step": 7234 }, { "epoch": 0.15942531964941856, "grad_norm": 1.2323999404907227, "learning_rate": 2.8701526742295457e-05, "loss": 0.095, "step": 7235 }, { "epoch": 0.1594473549389347, "grad_norm": 0.7148048281669617, "learning_rate": 2.8701091016284894e-05, "loss": 0.1162, "step": 7236 }, { "epoch": 0.15946939022845086, "grad_norm": 1.1898999214172363, "learning_rate": 2.8700655220487323e-05, "loss": 0.1418, "step": 7237 }, { "epoch": 0.15949142551796702, "grad_norm": 1.0815701484680176, "learning_rate": 2.870021935490497e-05, "loss": 0.0926, "step": 7238 }, { "epoch": 0.1595134608074832, "grad_norm": 1.0185681581497192, "learning_rate": 2.869978341954005e-05, "loss": 0.128, "step": 7239 }, { "epoch": 0.15953549609699935, "grad_norm": 1.2984851598739624, "learning_rate": 2.869934741439479e-05, "loss": 0.0923, "step": 7240 }, { "epoch": 0.15955753138651552, "grad_norm": 1.878377914428711, "learning_rate": 2.8698911339471406e-05, "loss": 0.1508, "step": 7241 }, { "epoch": 0.15957956667603165, "grad_norm": 2.2689552307128906, "learning_rate": 2.8698475194772117e-05, "loss": 0.1401, "step": 7242 }, { "epoch": 0.15960160196554782, "grad_norm": 0.7758011221885681, "learning_rate": 2.8698038980299154e-05, "loss": 0.086, "step": 7243 }, { "epoch": 0.15962363725506398, "grad_norm": 1.076303243637085, "learning_rate": 2.869760269605473e-05, "loss": 0.1449, "step": 7244 }, { "epoch": 0.15964567254458015, "grad_norm": 1.057460069656372, "learning_rate": 2.869716634204107e-05, "loss": 0.1344, "step": 7245 }, { "epoch": 0.1596677078340963, "grad_norm": 0.588752031326294, "learning_rate": 2.8696729918260397e-05, "loss": 0.1549, "step": 7246 }, { "epoch": 0.15968974312361248, "grad_norm": 1.0204120874404907, "learning_rate": 2.8696293424714933e-05, "loss": 0.152, "step": 7247 }, { "epoch": 0.1597117784131286, "grad_norm": 0.6353122591972351, "learning_rate": 2.8695856861406904e-05, "loss": 0.1107, "step": 7248 }, { "epoch": 0.15973381370264478, "grad_norm": 1.1775957345962524, "learning_rate": 2.8695420228338532e-05, "loss": 0.1189, "step": 7249 }, { "epoch": 0.15975584899216094, "grad_norm": 0.8571515083312988, "learning_rate": 2.8694983525512036e-05, "loss": 0.1334, "step": 7250 }, { "epoch": 0.1597778842816771, "grad_norm": 1.0924283266067505, "learning_rate": 2.869454675292965e-05, "loss": 0.1481, "step": 7251 }, { "epoch": 0.15979991957119327, "grad_norm": 0.8478392958641052, "learning_rate": 2.869410991059359e-05, "loss": 0.1133, "step": 7252 }, { "epoch": 0.15982195486070944, "grad_norm": 0.7284281849861145, "learning_rate": 2.869367299850609e-05, "loss": 0.0831, "step": 7253 }, { "epoch": 0.15984399015022557, "grad_norm": 1.0622717142105103, "learning_rate": 2.8693236016669368e-05, "loss": 0.1488, "step": 7254 }, { "epoch": 0.15986602543974174, "grad_norm": 1.069993257522583, "learning_rate": 2.8692798965085652e-05, "loss": 0.0906, "step": 7255 }, { "epoch": 0.1598880607292579, "grad_norm": 0.945221483707428, "learning_rate": 2.869236184375717e-05, "loss": 0.1247, "step": 7256 }, { "epoch": 0.15991009601877407, "grad_norm": 1.3999406099319458, "learning_rate": 2.8691924652686145e-05, "loss": 0.1433, "step": 7257 }, { "epoch": 0.15993213130829023, "grad_norm": 0.8676874041557312, "learning_rate": 2.8691487391874805e-05, "loss": 0.1259, "step": 7258 }, { "epoch": 0.1599541665978064, "grad_norm": 1.0685396194458008, "learning_rate": 2.8691050061325375e-05, "loss": 0.1136, "step": 7259 }, { "epoch": 0.15997620188732253, "grad_norm": 1.1311471462249756, "learning_rate": 2.869061266104009e-05, "loss": 0.1005, "step": 7260 }, { "epoch": 0.1599982371768387, "grad_norm": 1.4554861783981323, "learning_rate": 2.869017519102117e-05, "loss": 0.1538, "step": 7261 }, { "epoch": 0.16002027246635486, "grad_norm": 0.791666567325592, "learning_rate": 2.8689737651270852e-05, "loss": 0.0942, "step": 7262 }, { "epoch": 0.16004230775587103, "grad_norm": 1.3562085628509521, "learning_rate": 2.8689300041791352e-05, "loss": 0.1564, "step": 7263 }, { "epoch": 0.1600643430453872, "grad_norm": 1.211807370185852, "learning_rate": 2.8688862362584906e-05, "loss": 0.1301, "step": 7264 }, { "epoch": 0.16008637833490336, "grad_norm": 1.2759202718734741, "learning_rate": 2.868842461365375e-05, "loss": 0.1035, "step": 7265 }, { "epoch": 0.16010841362441952, "grad_norm": 1.054364800453186, "learning_rate": 2.8687986795000105e-05, "loss": 0.0754, "step": 7266 }, { "epoch": 0.16013044891393566, "grad_norm": 1.3668525218963623, "learning_rate": 2.86875489066262e-05, "loss": 0.1412, "step": 7267 }, { "epoch": 0.16015248420345182, "grad_norm": 1.0799304246902466, "learning_rate": 2.8687110948534273e-05, "loss": 0.1252, "step": 7268 }, { "epoch": 0.160174519492968, "grad_norm": 0.8105753064155579, "learning_rate": 2.8686672920726546e-05, "loss": 0.0788, "step": 7269 }, { "epoch": 0.16019655478248415, "grad_norm": 1.1540297269821167, "learning_rate": 2.8686234823205256e-05, "loss": 0.1176, "step": 7270 }, { "epoch": 0.16021859007200032, "grad_norm": 0.6264607906341553, "learning_rate": 2.8685796655972633e-05, "loss": 0.0775, "step": 7271 }, { "epoch": 0.16024062536151648, "grad_norm": 1.0211135149002075, "learning_rate": 2.8685358419030906e-05, "loss": 0.1114, "step": 7272 }, { "epoch": 0.16026266065103262, "grad_norm": 1.5818003416061401, "learning_rate": 2.8684920112382314e-05, "loss": 0.1231, "step": 7273 }, { "epoch": 0.16028469594054878, "grad_norm": 1.0022395849227905, "learning_rate": 2.8684481736029087e-05, "loss": 0.114, "step": 7274 }, { "epoch": 0.16030673123006495, "grad_norm": 0.8414058685302734, "learning_rate": 2.8684043289973452e-05, "loss": 0.1179, "step": 7275 }, { "epoch": 0.1603287665195811, "grad_norm": 1.2322256565093994, "learning_rate": 2.868360477421765e-05, "loss": 0.0975, "step": 7276 }, { "epoch": 0.16035080180909728, "grad_norm": 1.0566483736038208, "learning_rate": 2.868316618876391e-05, "loss": 0.1163, "step": 7277 }, { "epoch": 0.16037283709861344, "grad_norm": 1.267400860786438, "learning_rate": 2.8682727533614466e-05, "loss": 0.1294, "step": 7278 }, { "epoch": 0.16039487238812958, "grad_norm": 1.1844549179077148, "learning_rate": 2.868228880877155e-05, "loss": 0.1381, "step": 7279 }, { "epoch": 0.16041690767764574, "grad_norm": 0.7325690984725952, "learning_rate": 2.8681850014237405e-05, "loss": 0.0933, "step": 7280 }, { "epoch": 0.1604389429671619, "grad_norm": 1.3730956315994263, "learning_rate": 2.868141115001426e-05, "loss": 0.1661, "step": 7281 }, { "epoch": 0.16046097825667807, "grad_norm": 0.8945276737213135, "learning_rate": 2.868097221610435e-05, "loss": 0.112, "step": 7282 }, { "epoch": 0.16048301354619424, "grad_norm": 0.8206305503845215, "learning_rate": 2.8680533212509915e-05, "loss": 0.121, "step": 7283 }, { "epoch": 0.1605050488357104, "grad_norm": 0.8861514925956726, "learning_rate": 2.868009413923319e-05, "loss": 0.1203, "step": 7284 }, { "epoch": 0.16052708412522654, "grad_norm": 1.6615097522735596, "learning_rate": 2.8679654996276402e-05, "loss": 0.1733, "step": 7285 }, { "epoch": 0.1605491194147427, "grad_norm": 0.721468448638916, "learning_rate": 2.86792157836418e-05, "loss": 0.106, "step": 7286 }, { "epoch": 0.16057115470425887, "grad_norm": 1.0835776329040527, "learning_rate": 2.867877650133162e-05, "loss": 0.144, "step": 7287 }, { "epoch": 0.16059318999377503, "grad_norm": 0.9637993574142456, "learning_rate": 2.8678337149348094e-05, "loss": 0.1003, "step": 7288 }, { "epoch": 0.1606152252832912, "grad_norm": 0.8147832751274109, "learning_rate": 2.8677897727693456e-05, "loss": 0.0977, "step": 7289 }, { "epoch": 0.16063726057280736, "grad_norm": 1.4388175010681152, "learning_rate": 2.8677458236369954e-05, "loss": 0.1265, "step": 7290 }, { "epoch": 0.1606592958623235, "grad_norm": 0.6633421778678894, "learning_rate": 2.867701867537983e-05, "loss": 0.0798, "step": 7291 }, { "epoch": 0.16068133115183966, "grad_norm": 0.9597221612930298, "learning_rate": 2.867657904472531e-05, "loss": 0.1072, "step": 7292 }, { "epoch": 0.16070336644135583, "grad_norm": 0.8654293417930603, "learning_rate": 2.8676139344408636e-05, "loss": 0.164, "step": 7293 }, { "epoch": 0.160725401730872, "grad_norm": 0.9595441222190857, "learning_rate": 2.867569957443205e-05, "loss": 0.1303, "step": 7294 }, { "epoch": 0.16074743702038816, "grad_norm": 0.9927606582641602, "learning_rate": 2.8675259734797796e-05, "loss": 0.1462, "step": 7295 }, { "epoch": 0.16076947230990432, "grad_norm": 0.8906435966491699, "learning_rate": 2.8674819825508113e-05, "loss": 0.1147, "step": 7296 }, { "epoch": 0.16079150759942049, "grad_norm": 1.2468616962432861, "learning_rate": 2.8674379846565236e-05, "loss": 0.1472, "step": 7297 }, { "epoch": 0.16081354288893662, "grad_norm": 0.8233155012130737, "learning_rate": 2.867393979797141e-05, "loss": 0.1117, "step": 7298 }, { "epoch": 0.1608355781784528, "grad_norm": 0.8951942324638367, "learning_rate": 2.867349967972888e-05, "loss": 0.1692, "step": 7299 }, { "epoch": 0.16085761346796895, "grad_norm": 1.0203893184661865, "learning_rate": 2.867305949183988e-05, "loss": 0.1013, "step": 7300 }, { "epoch": 0.16087964875748512, "grad_norm": 0.7573996186256409, "learning_rate": 2.8672619234306656e-05, "loss": 0.0836, "step": 7301 }, { "epoch": 0.16090168404700128, "grad_norm": 1.3288307189941406, "learning_rate": 2.867217890713145e-05, "loss": 0.1147, "step": 7302 }, { "epoch": 0.16092371933651745, "grad_norm": 0.6556900143623352, "learning_rate": 2.8671738510316506e-05, "loss": 0.133, "step": 7303 }, { "epoch": 0.16094575462603358, "grad_norm": 1.5651087760925293, "learning_rate": 2.8671298043864068e-05, "loss": 0.1236, "step": 7304 }, { "epoch": 0.16096778991554975, "grad_norm": 0.7581206560134888, "learning_rate": 2.8670857507776373e-05, "loss": 0.1389, "step": 7305 }, { "epoch": 0.1609898252050659, "grad_norm": 0.7776009440422058, "learning_rate": 2.867041690205568e-05, "loss": 0.0901, "step": 7306 }, { "epoch": 0.16101186049458208, "grad_norm": 1.4886562824249268, "learning_rate": 2.866997622670421e-05, "loss": 0.1675, "step": 7307 }, { "epoch": 0.16103389578409824, "grad_norm": 0.7767108082771301, "learning_rate": 2.866953548172423e-05, "loss": 0.1051, "step": 7308 }, { "epoch": 0.1610559310736144, "grad_norm": 0.969808042049408, "learning_rate": 2.8669094667117972e-05, "loss": 0.1567, "step": 7309 }, { "epoch": 0.16107796636313054, "grad_norm": 1.151973843574524, "learning_rate": 2.8668653782887685e-05, "loss": 0.1223, "step": 7310 }, { "epoch": 0.1611000016526467, "grad_norm": 0.9968538880348206, "learning_rate": 2.8668212829035615e-05, "loss": 0.1268, "step": 7311 }, { "epoch": 0.16112203694216287, "grad_norm": 0.9545975923538208, "learning_rate": 2.8667771805564007e-05, "loss": 0.0914, "step": 7312 }, { "epoch": 0.16114407223167904, "grad_norm": 0.8171086311340332, "learning_rate": 2.866733071247511e-05, "loss": 0.0835, "step": 7313 }, { "epoch": 0.1611661075211952, "grad_norm": 0.9429458975791931, "learning_rate": 2.8666889549771168e-05, "loss": 0.1659, "step": 7314 }, { "epoch": 0.16118814281071137, "grad_norm": 0.6388448476791382, "learning_rate": 2.866644831745443e-05, "loss": 0.1074, "step": 7315 }, { "epoch": 0.1612101781002275, "grad_norm": 1.6264630556106567, "learning_rate": 2.866600701552714e-05, "loss": 0.1595, "step": 7316 }, { "epoch": 0.16123221338974367, "grad_norm": 0.8738031983375549, "learning_rate": 2.8665565643991547e-05, "loss": 0.1604, "step": 7317 }, { "epoch": 0.16125424867925983, "grad_norm": 1.8657184839248657, "learning_rate": 2.86651242028499e-05, "loss": 0.1355, "step": 7318 }, { "epoch": 0.161276283968776, "grad_norm": 1.3318926095962524, "learning_rate": 2.866468269210445e-05, "loss": 0.1581, "step": 7319 }, { "epoch": 0.16129831925829216, "grad_norm": 0.7495117783546448, "learning_rate": 2.866424111175744e-05, "loss": 0.1147, "step": 7320 }, { "epoch": 0.16132035454780833, "grad_norm": 1.159827709197998, "learning_rate": 2.8663799461811127e-05, "loss": 0.1192, "step": 7321 }, { "epoch": 0.16134238983732446, "grad_norm": 0.9980905652046204, "learning_rate": 2.8663357742267758e-05, "loss": 0.1048, "step": 7322 }, { "epoch": 0.16136442512684063, "grad_norm": 1.3284803628921509, "learning_rate": 2.8662915953129574e-05, "loss": 0.1105, "step": 7323 }, { "epoch": 0.1613864604163568, "grad_norm": 0.8237638473510742, "learning_rate": 2.8662474094398835e-05, "loss": 0.1567, "step": 7324 }, { "epoch": 0.16140849570587296, "grad_norm": 0.9479039907455444, "learning_rate": 2.866203216607779e-05, "loss": 0.1033, "step": 7325 }, { "epoch": 0.16143053099538912, "grad_norm": 0.9969599843025208, "learning_rate": 2.866159016816869e-05, "loss": 0.1277, "step": 7326 }, { "epoch": 0.16145256628490529, "grad_norm": 0.7929784655570984, "learning_rate": 2.8661148100673786e-05, "loss": 0.0851, "step": 7327 }, { "epoch": 0.16147460157442142, "grad_norm": 1.2536813020706177, "learning_rate": 2.8660705963595325e-05, "loss": 0.1443, "step": 7328 }, { "epoch": 0.1614966368639376, "grad_norm": 1.1416007280349731, "learning_rate": 2.8660263756935567e-05, "loss": 0.1194, "step": 7329 }, { "epoch": 0.16151867215345375, "grad_norm": 1.2574948072433472, "learning_rate": 2.8659821480696758e-05, "loss": 0.1293, "step": 7330 }, { "epoch": 0.16154070744296992, "grad_norm": 1.9008740186691284, "learning_rate": 2.8659379134881155e-05, "loss": 0.1254, "step": 7331 }, { "epoch": 0.16156274273248608, "grad_norm": 1.3530642986297607, "learning_rate": 2.8658936719491008e-05, "loss": 0.1659, "step": 7332 }, { "epoch": 0.16158477802200225, "grad_norm": 1.070029616355896, "learning_rate": 2.8658494234528573e-05, "loss": 0.1056, "step": 7333 }, { "epoch": 0.1616068133115184, "grad_norm": 0.7691171169281006, "learning_rate": 2.86580516799961e-05, "loss": 0.1292, "step": 7334 }, { "epoch": 0.16162884860103455, "grad_norm": 1.086041808128357, "learning_rate": 2.865760905589585e-05, "loss": 0.1346, "step": 7335 }, { "epoch": 0.1616508838905507, "grad_norm": 0.8235045075416565, "learning_rate": 2.8657166362230066e-05, "loss": 0.1589, "step": 7336 }, { "epoch": 0.16167291918006688, "grad_norm": 0.8809162378311157, "learning_rate": 2.865672359900102e-05, "loss": 0.143, "step": 7337 }, { "epoch": 0.16169495446958304, "grad_norm": 1.0940085649490356, "learning_rate": 2.8656280766210948e-05, "loss": 0.1124, "step": 7338 }, { "epoch": 0.1617169897590992, "grad_norm": 0.8597345352172852, "learning_rate": 2.8655837863862122e-05, "loss": 0.1163, "step": 7339 }, { "epoch": 0.16173902504861537, "grad_norm": 1.1054189205169678, "learning_rate": 2.865539489195679e-05, "loss": 0.1663, "step": 7340 }, { "epoch": 0.1617610603381315, "grad_norm": 1.195631504058838, "learning_rate": 2.865495185049721e-05, "loss": 0.1115, "step": 7341 }, { "epoch": 0.16178309562764767, "grad_norm": 1.6636056900024414, "learning_rate": 2.8654508739485635e-05, "loss": 0.1381, "step": 7342 }, { "epoch": 0.16180513091716384, "grad_norm": 1.1633409261703491, "learning_rate": 2.8654065558924324e-05, "loss": 0.1403, "step": 7343 }, { "epoch": 0.16182716620668, "grad_norm": 1.0105944871902466, "learning_rate": 2.8653622308815534e-05, "loss": 0.1032, "step": 7344 }, { "epoch": 0.16184920149619617, "grad_norm": 0.7109337449073792, "learning_rate": 2.8653178989161527e-05, "loss": 0.1011, "step": 7345 }, { "epoch": 0.16187123678571233, "grad_norm": 0.824859082698822, "learning_rate": 2.8652735599964556e-05, "loss": 0.0818, "step": 7346 }, { "epoch": 0.16189327207522847, "grad_norm": 0.6816232800483704, "learning_rate": 2.8652292141226886e-05, "loss": 0.1377, "step": 7347 }, { "epoch": 0.16191530736474463, "grad_norm": 1.011159896850586, "learning_rate": 2.8651848612950768e-05, "loss": 0.1255, "step": 7348 }, { "epoch": 0.1619373426542608, "grad_norm": 0.8798023462295532, "learning_rate": 2.8651405015138463e-05, "loss": 0.1229, "step": 7349 }, { "epoch": 0.16195937794377696, "grad_norm": 0.8327425122261047, "learning_rate": 2.8650961347792234e-05, "loss": 0.1097, "step": 7350 }, { "epoch": 0.16198141323329313, "grad_norm": 1.406904935836792, "learning_rate": 2.865051761091434e-05, "loss": 0.1589, "step": 7351 }, { "epoch": 0.1620034485228093, "grad_norm": 0.978162407875061, "learning_rate": 2.8650073804507036e-05, "loss": 0.0908, "step": 7352 }, { "epoch": 0.16202548381232543, "grad_norm": 0.6914889216423035, "learning_rate": 2.8649629928572588e-05, "loss": 0.0958, "step": 7353 }, { "epoch": 0.1620475191018416, "grad_norm": 0.7841501235961914, "learning_rate": 2.864918598311325e-05, "loss": 0.098, "step": 7354 }, { "epoch": 0.16206955439135776, "grad_norm": 0.9082311391830444, "learning_rate": 2.8648741968131297e-05, "loss": 0.1133, "step": 7355 }, { "epoch": 0.16209158968087392, "grad_norm": 1.1241148710250854, "learning_rate": 2.864829788362898e-05, "loss": 0.1208, "step": 7356 }, { "epoch": 0.16211362497039009, "grad_norm": 1.0571727752685547, "learning_rate": 2.8647853729608562e-05, "loss": 0.1012, "step": 7357 }, { "epoch": 0.16213566025990625, "grad_norm": 0.8844879269599915, "learning_rate": 2.8647409506072303e-05, "loss": 0.1167, "step": 7358 }, { "epoch": 0.1621576955494224, "grad_norm": 1.2228689193725586, "learning_rate": 2.8646965213022474e-05, "loss": 0.1388, "step": 7359 }, { "epoch": 0.16217973083893855, "grad_norm": 1.4878116846084595, "learning_rate": 2.8646520850461332e-05, "loss": 0.1124, "step": 7360 }, { "epoch": 0.16220176612845472, "grad_norm": 0.901008665561676, "learning_rate": 2.8646076418391143e-05, "loss": 0.1081, "step": 7361 }, { "epoch": 0.16222380141797088, "grad_norm": 0.9812731742858887, "learning_rate": 2.8645631916814165e-05, "loss": 0.1411, "step": 7362 }, { "epoch": 0.16224583670748705, "grad_norm": 0.7710779905319214, "learning_rate": 2.864518734573267e-05, "loss": 0.103, "step": 7363 }, { "epoch": 0.1622678719970032, "grad_norm": 1.0304237604141235, "learning_rate": 2.8644742705148917e-05, "loss": 0.1189, "step": 7364 }, { "epoch": 0.16228990728651935, "grad_norm": 0.8811103105545044, "learning_rate": 2.8644297995065173e-05, "loss": 0.0923, "step": 7365 }, { "epoch": 0.1623119425760355, "grad_norm": 0.8103997707366943, "learning_rate": 2.86438532154837e-05, "loss": 0.0814, "step": 7366 }, { "epoch": 0.16233397786555168, "grad_norm": 0.9942752122879028, "learning_rate": 2.864340836640677e-05, "loss": 0.1053, "step": 7367 }, { "epoch": 0.16235601315506784, "grad_norm": 1.25213623046875, "learning_rate": 2.864296344783664e-05, "loss": 0.1146, "step": 7368 }, { "epoch": 0.162378048444584, "grad_norm": 1.3354278802871704, "learning_rate": 2.8642518459775587e-05, "loss": 0.1464, "step": 7369 }, { "epoch": 0.16240008373410017, "grad_norm": 1.0335493087768555, "learning_rate": 2.8642073402225868e-05, "loss": 0.1048, "step": 7370 }, { "epoch": 0.16242211902361633, "grad_norm": 0.8132925033569336, "learning_rate": 2.8641628275189756e-05, "loss": 0.133, "step": 7371 }, { "epoch": 0.16244415431313247, "grad_norm": 1.430738091468811, "learning_rate": 2.8641183078669516e-05, "loss": 0.1211, "step": 7372 }, { "epoch": 0.16246618960264864, "grad_norm": 1.215957522392273, "learning_rate": 2.8640737812667414e-05, "loss": 0.1259, "step": 7373 }, { "epoch": 0.1624882248921648, "grad_norm": 0.6846634745597839, "learning_rate": 2.864029247718572e-05, "loss": 0.0976, "step": 7374 }, { "epoch": 0.16251026018168097, "grad_norm": 0.7313361763954163, "learning_rate": 2.8639847072226704e-05, "loss": 0.0695, "step": 7375 }, { "epoch": 0.16253229547119713, "grad_norm": 1.248329997062683, "learning_rate": 2.863940159779263e-05, "loss": 0.1311, "step": 7376 }, { "epoch": 0.1625543307607133, "grad_norm": 0.9310345649719238, "learning_rate": 2.863895605388577e-05, "loss": 0.1153, "step": 7377 }, { "epoch": 0.16257636605022943, "grad_norm": 0.8588307499885559, "learning_rate": 2.8638510440508396e-05, "loss": 0.0935, "step": 7378 }, { "epoch": 0.1625984013397456, "grad_norm": 0.9780210852622986, "learning_rate": 2.863806475766277e-05, "loss": 0.1123, "step": 7379 }, { "epoch": 0.16262043662926176, "grad_norm": 0.9119446873664856, "learning_rate": 2.863761900535117e-05, "loss": 0.1082, "step": 7380 }, { "epoch": 0.16264247191877793, "grad_norm": 1.420858383178711, "learning_rate": 2.8637173183575865e-05, "loss": 0.1544, "step": 7381 }, { "epoch": 0.1626645072082941, "grad_norm": 1.1862391233444214, "learning_rate": 2.8636727292339117e-05, "loss": 0.099, "step": 7382 }, { "epoch": 0.16268654249781025, "grad_norm": 1.0715535879135132, "learning_rate": 2.863628133164321e-05, "loss": 0.1094, "step": 7383 }, { "epoch": 0.1627085777873264, "grad_norm": 0.7964968681335449, "learning_rate": 2.863583530149041e-05, "loss": 0.1555, "step": 7384 }, { "epoch": 0.16273061307684256, "grad_norm": 1.1922659873962402, "learning_rate": 2.8635389201882988e-05, "loss": 0.1718, "step": 7385 }, { "epoch": 0.16275264836635872, "grad_norm": 0.5752620100975037, "learning_rate": 2.8634943032823215e-05, "loss": 0.094, "step": 7386 }, { "epoch": 0.16277468365587489, "grad_norm": 0.8021246194839478, "learning_rate": 2.863449679431337e-05, "loss": 0.1245, "step": 7387 }, { "epoch": 0.16279671894539105, "grad_norm": 0.9860267043113708, "learning_rate": 2.8634050486355714e-05, "loss": 0.1186, "step": 7388 }, { "epoch": 0.16281875423490721, "grad_norm": 0.9439428448677063, "learning_rate": 2.8633604108952534e-05, "loss": 0.117, "step": 7389 }, { "epoch": 0.16284078952442335, "grad_norm": 0.828016996383667, "learning_rate": 2.8633157662106096e-05, "loss": 0.1361, "step": 7390 }, { "epoch": 0.16286282481393952, "grad_norm": 1.0300877094268799, "learning_rate": 2.8632711145818675e-05, "loss": 0.1355, "step": 7391 }, { "epoch": 0.16288486010345568, "grad_norm": 0.9416230916976929, "learning_rate": 2.8632264560092546e-05, "loss": 0.0804, "step": 7392 }, { "epoch": 0.16290689539297185, "grad_norm": 1.2346559762954712, "learning_rate": 2.8631817904929983e-05, "loss": 0.1444, "step": 7393 }, { "epoch": 0.162928930682488, "grad_norm": 1.4255847930908203, "learning_rate": 2.8631371180333263e-05, "loss": 0.1204, "step": 7394 }, { "epoch": 0.16295096597200417, "grad_norm": 1.395524501800537, "learning_rate": 2.8630924386304652e-05, "loss": 0.1386, "step": 7395 }, { "epoch": 0.1629730012615203, "grad_norm": 0.8948676586151123, "learning_rate": 2.8630477522846442e-05, "loss": 0.1294, "step": 7396 }, { "epoch": 0.16299503655103648, "grad_norm": 1.3564237356185913, "learning_rate": 2.8630030589960898e-05, "loss": 0.1089, "step": 7397 }, { "epoch": 0.16301707184055264, "grad_norm": 1.188794493675232, "learning_rate": 2.8629583587650305e-05, "loss": 0.1387, "step": 7398 }, { "epoch": 0.1630391071300688, "grad_norm": 0.9747782945632935, "learning_rate": 2.862913651591692e-05, "loss": 0.107, "step": 7399 }, { "epoch": 0.16306114241958497, "grad_norm": 1.4117839336395264, "learning_rate": 2.8628689374763045e-05, "loss": 0.1394, "step": 7400 }, { "epoch": 0.16308317770910113, "grad_norm": 1.2007030248641968, "learning_rate": 2.8628242164190946e-05, "loss": 0.1344, "step": 7401 }, { "epoch": 0.1631052129986173, "grad_norm": 0.7963299751281738, "learning_rate": 2.8627794884202902e-05, "loss": 0.1016, "step": 7402 }, { "epoch": 0.16312724828813344, "grad_norm": 0.7817467451095581, "learning_rate": 2.862734753480119e-05, "loss": 0.1232, "step": 7403 }, { "epoch": 0.1631492835776496, "grad_norm": 1.1034917831420898, "learning_rate": 2.8626900115988086e-05, "loss": 0.1245, "step": 7404 }, { "epoch": 0.16317131886716577, "grad_norm": 1.0733851194381714, "learning_rate": 2.862645262776588e-05, "loss": 0.1158, "step": 7405 }, { "epoch": 0.16319335415668193, "grad_norm": 0.8351608514785767, "learning_rate": 2.8626005070136836e-05, "loss": 0.1026, "step": 7406 }, { "epoch": 0.1632153894461981, "grad_norm": 0.8617272973060608, "learning_rate": 2.862555744310324e-05, "loss": 0.1114, "step": 7407 }, { "epoch": 0.16323742473571426, "grad_norm": 0.7297794818878174, "learning_rate": 2.862510974666738e-05, "loss": 0.1097, "step": 7408 }, { "epoch": 0.1632594600252304, "grad_norm": 0.8872215151786804, "learning_rate": 2.8624661980831526e-05, "loss": 0.1066, "step": 7409 }, { "epoch": 0.16328149531474656, "grad_norm": 0.739374041557312, "learning_rate": 2.8624214145597968e-05, "loss": 0.1102, "step": 7410 }, { "epoch": 0.16330353060426273, "grad_norm": 1.263410210609436, "learning_rate": 2.8623766240968973e-05, "loss": 0.1388, "step": 7411 }, { "epoch": 0.1633255658937789, "grad_norm": 0.9722606539726257, "learning_rate": 2.8623318266946835e-05, "loss": 0.1515, "step": 7412 }, { "epoch": 0.16334760118329505, "grad_norm": 1.2151373624801636, "learning_rate": 2.8622870223533832e-05, "loss": 0.1415, "step": 7413 }, { "epoch": 0.16336963647281122, "grad_norm": 0.7177572846412659, "learning_rate": 2.8622422110732247e-05, "loss": 0.0959, "step": 7414 }, { "epoch": 0.16339167176232736, "grad_norm": 0.8635541796684265, "learning_rate": 2.862197392854436e-05, "loss": 0.1013, "step": 7415 }, { "epoch": 0.16341370705184352, "grad_norm": 0.7368127107620239, "learning_rate": 2.862152567697245e-05, "loss": 0.1, "step": 7416 }, { "epoch": 0.16343574234135969, "grad_norm": 0.9030117392539978, "learning_rate": 2.862107735601881e-05, "loss": 0.1532, "step": 7417 }, { "epoch": 0.16345777763087585, "grad_norm": 0.783500611782074, "learning_rate": 2.862062896568572e-05, "loss": 0.1704, "step": 7418 }, { "epoch": 0.16347981292039201, "grad_norm": 0.7905622720718384, "learning_rate": 2.862018050597546e-05, "loss": 0.0958, "step": 7419 }, { "epoch": 0.16350184820990818, "grad_norm": 1.0938711166381836, "learning_rate": 2.861973197689032e-05, "loss": 0.0962, "step": 7420 }, { "epoch": 0.16352388349942432, "grad_norm": 0.764582633972168, "learning_rate": 2.8619283378432578e-05, "loss": 0.1008, "step": 7421 }, { "epoch": 0.16354591878894048, "grad_norm": 0.9067771434783936, "learning_rate": 2.8618834710604526e-05, "loss": 0.1647, "step": 7422 }, { "epoch": 0.16356795407845665, "grad_norm": 0.9144341945648193, "learning_rate": 2.8618385973408443e-05, "loss": 0.0786, "step": 7423 }, { "epoch": 0.1635899893679728, "grad_norm": 0.7671423554420471, "learning_rate": 2.861793716684662e-05, "loss": 0.0767, "step": 7424 }, { "epoch": 0.16361202465748897, "grad_norm": 1.1518001556396484, "learning_rate": 2.861748829092134e-05, "loss": 0.0991, "step": 7425 }, { "epoch": 0.16363405994700514, "grad_norm": 0.8252785801887512, "learning_rate": 2.861703934563489e-05, "loss": 0.0829, "step": 7426 }, { "epoch": 0.16365609523652128, "grad_norm": 1.3224881887435913, "learning_rate": 2.8616590330989555e-05, "loss": 0.1315, "step": 7427 }, { "epoch": 0.16367813052603744, "grad_norm": 1.1305817365646362, "learning_rate": 2.8616141246987625e-05, "loss": 0.1037, "step": 7428 }, { "epoch": 0.1637001658155536, "grad_norm": 0.8496935367584229, "learning_rate": 2.8615692093631384e-05, "loss": 0.1076, "step": 7429 }, { "epoch": 0.16372220110506977, "grad_norm": 0.5319376587867737, "learning_rate": 2.8615242870923124e-05, "loss": 0.0785, "step": 7430 }, { "epoch": 0.16374423639458593, "grad_norm": 1.1032187938690186, "learning_rate": 2.861479357886513e-05, "loss": 0.1151, "step": 7431 }, { "epoch": 0.1637662716841021, "grad_norm": 0.7253339290618896, "learning_rate": 2.861434421745969e-05, "loss": 0.1302, "step": 7432 }, { "epoch": 0.16378830697361824, "grad_norm": 0.8660770058631897, "learning_rate": 2.8613894786709098e-05, "loss": 0.1053, "step": 7433 }, { "epoch": 0.1638103422631344, "grad_norm": 0.7910095453262329, "learning_rate": 2.8613445286615637e-05, "loss": 0.0788, "step": 7434 }, { "epoch": 0.16383237755265057, "grad_norm": 0.8508517742156982, "learning_rate": 2.86129957171816e-05, "loss": 0.1312, "step": 7435 }, { "epoch": 0.16385441284216673, "grad_norm": 1.1047887802124023, "learning_rate": 2.8612546078409274e-05, "loss": 0.1227, "step": 7436 }, { "epoch": 0.1638764481316829, "grad_norm": 0.6613368988037109, "learning_rate": 2.8612096370300954e-05, "loss": 0.1108, "step": 7437 }, { "epoch": 0.16389848342119906, "grad_norm": 0.8498991131782532, "learning_rate": 2.8611646592858927e-05, "loss": 0.1334, "step": 7438 }, { "epoch": 0.16392051871071522, "grad_norm": 0.7699877619743347, "learning_rate": 2.8611196746085486e-05, "loss": 0.1146, "step": 7439 }, { "epoch": 0.16394255400023136, "grad_norm": 0.9107869863510132, "learning_rate": 2.8610746829982916e-05, "loss": 0.0992, "step": 7440 }, { "epoch": 0.16396458928974753, "grad_norm": 0.8602940440177917, "learning_rate": 2.8610296844553518e-05, "loss": 0.116, "step": 7441 }, { "epoch": 0.1639866245792637, "grad_norm": 1.0752456188201904, "learning_rate": 2.8609846789799582e-05, "loss": 0.1191, "step": 7442 }, { "epoch": 0.16400865986877985, "grad_norm": 0.8297269940376282, "learning_rate": 2.8609396665723393e-05, "loss": 0.1259, "step": 7443 }, { "epoch": 0.16403069515829602, "grad_norm": 0.9489153027534485, "learning_rate": 2.860894647232725e-05, "loss": 0.1652, "step": 7444 }, { "epoch": 0.16405273044781218, "grad_norm": 1.0218560695648193, "learning_rate": 2.8608496209613446e-05, "loss": 0.1369, "step": 7445 }, { "epoch": 0.16407476573732832, "grad_norm": 1.1161378622055054, "learning_rate": 2.860804587758427e-05, "loss": 0.1347, "step": 7446 }, { "epoch": 0.16409680102684449, "grad_norm": 0.6409822702407837, "learning_rate": 2.8607595476242025e-05, "loss": 0.1288, "step": 7447 }, { "epoch": 0.16411883631636065, "grad_norm": 1.0359405279159546, "learning_rate": 2.8607145005588997e-05, "loss": 0.0668, "step": 7448 }, { "epoch": 0.16414087160587681, "grad_norm": 0.7520231008529663, "learning_rate": 2.860669446562748e-05, "loss": 0.0853, "step": 7449 }, { "epoch": 0.16416290689539298, "grad_norm": 0.6493680477142334, "learning_rate": 2.8606243856359774e-05, "loss": 0.0795, "step": 7450 }, { "epoch": 0.16418494218490914, "grad_norm": 0.8369352221488953, "learning_rate": 2.860579317778817e-05, "loss": 0.0795, "step": 7451 }, { "epoch": 0.16420697747442528, "grad_norm": 1.4447951316833496, "learning_rate": 2.860534242991497e-05, "loss": 0.1367, "step": 7452 }, { "epoch": 0.16422901276394145, "grad_norm": 0.8990039825439453, "learning_rate": 2.860489161274246e-05, "loss": 0.1151, "step": 7453 }, { "epoch": 0.1642510480534576, "grad_norm": 1.129550814628601, "learning_rate": 2.8604440726272943e-05, "loss": 0.125, "step": 7454 }, { "epoch": 0.16427308334297377, "grad_norm": 1.3853152990341187, "learning_rate": 2.8603989770508717e-05, "loss": 0.1245, "step": 7455 }, { "epoch": 0.16429511863248994, "grad_norm": 1.3063907623291016, "learning_rate": 2.8603538745452076e-05, "loss": 0.1549, "step": 7456 }, { "epoch": 0.1643171539220061, "grad_norm": 0.8628746867179871, "learning_rate": 2.8603087651105314e-05, "loss": 0.1516, "step": 7457 }, { "epoch": 0.16433918921152224, "grad_norm": 1.2425625324249268, "learning_rate": 2.8602636487470732e-05, "loss": 0.112, "step": 7458 }, { "epoch": 0.1643612245010384, "grad_norm": 0.8931795358657837, "learning_rate": 2.8602185254550627e-05, "loss": 0.0888, "step": 7459 }, { "epoch": 0.16438325979055457, "grad_norm": 1.259610652923584, "learning_rate": 2.8601733952347306e-05, "loss": 0.1694, "step": 7460 }, { "epoch": 0.16440529508007073, "grad_norm": 1.043932557106018, "learning_rate": 2.860128258086305e-05, "loss": 0.1352, "step": 7461 }, { "epoch": 0.1644273303695869, "grad_norm": 1.2557337284088135, "learning_rate": 2.8600831140100178e-05, "loss": 0.124, "step": 7462 }, { "epoch": 0.16444936565910306, "grad_norm": 1.2065011262893677, "learning_rate": 2.8600379630060976e-05, "loss": 0.1361, "step": 7463 }, { "epoch": 0.1644714009486192, "grad_norm": 1.6012784242630005, "learning_rate": 2.8599928050747743e-05, "loss": 0.1088, "step": 7464 }, { "epoch": 0.16449343623813537, "grad_norm": 1.088572382926941, "learning_rate": 2.8599476402162787e-05, "loss": 0.097, "step": 7465 }, { "epoch": 0.16451547152765153, "grad_norm": 0.892231822013855, "learning_rate": 2.8599024684308405e-05, "loss": 0.1127, "step": 7466 }, { "epoch": 0.1645375068171677, "grad_norm": 0.8908419013023376, "learning_rate": 2.8598572897186897e-05, "loss": 0.1247, "step": 7467 }, { "epoch": 0.16455954210668386, "grad_norm": 1.2179474830627441, "learning_rate": 2.8598121040800565e-05, "loss": 0.1256, "step": 7468 }, { "epoch": 0.16458157739620002, "grad_norm": 0.9968805313110352, "learning_rate": 2.8597669115151714e-05, "loss": 0.1111, "step": 7469 }, { "epoch": 0.16460361268571616, "grad_norm": 0.8296854496002197, "learning_rate": 2.859721712024264e-05, "loss": 0.112, "step": 7470 }, { "epoch": 0.16462564797523233, "grad_norm": 1.0082827806472778, "learning_rate": 2.8596765056075646e-05, "loss": 0.1277, "step": 7471 }, { "epoch": 0.1646476832647485, "grad_norm": 0.885989785194397, "learning_rate": 2.859631292265304e-05, "loss": 0.1555, "step": 7472 }, { "epoch": 0.16466971855426465, "grad_norm": 1.0918819904327393, "learning_rate": 2.859586071997712e-05, "loss": 0.1125, "step": 7473 }, { "epoch": 0.16469175384378082, "grad_norm": 1.1450084447860718, "learning_rate": 2.8595408448050193e-05, "loss": 0.1507, "step": 7474 }, { "epoch": 0.16471378913329698, "grad_norm": 1.9863154888153076, "learning_rate": 2.8594956106874554e-05, "loss": 0.0975, "step": 7475 }, { "epoch": 0.16473582442281315, "grad_norm": 0.9216499924659729, "learning_rate": 2.8594503696452516e-05, "loss": 0.0983, "step": 7476 }, { "epoch": 0.16475785971232929, "grad_norm": 0.8159122467041016, "learning_rate": 2.859405121678638e-05, "loss": 0.1191, "step": 7477 }, { "epoch": 0.16477989500184545, "grad_norm": 1.05491042137146, "learning_rate": 2.8593598667878457e-05, "loss": 0.1203, "step": 7478 }, { "epoch": 0.16480193029136161, "grad_norm": 0.8030838966369629, "learning_rate": 2.859314604973104e-05, "loss": 0.1088, "step": 7479 }, { "epoch": 0.16482396558087778, "grad_norm": 0.9247554540634155, "learning_rate": 2.8592693362346447e-05, "loss": 0.1223, "step": 7480 }, { "epoch": 0.16484600087039394, "grad_norm": 1.4289675951004028, "learning_rate": 2.8592240605726972e-05, "loss": 0.1234, "step": 7481 }, { "epoch": 0.1648680361599101, "grad_norm": 0.8402699828147888, "learning_rate": 2.859178777987493e-05, "loss": 0.1303, "step": 7482 }, { "epoch": 0.16489007144942625, "grad_norm": 0.8545598983764648, "learning_rate": 2.8591334884792627e-05, "loss": 0.1123, "step": 7483 }, { "epoch": 0.1649121067389424, "grad_norm": 0.9055925011634827, "learning_rate": 2.8590881920482358e-05, "loss": 0.1119, "step": 7484 }, { "epoch": 0.16493414202845857, "grad_norm": 0.949163019657135, "learning_rate": 2.8590428886946447e-05, "loss": 0.1211, "step": 7485 }, { "epoch": 0.16495617731797474, "grad_norm": 1.028517723083496, "learning_rate": 2.8589975784187196e-05, "loss": 0.1478, "step": 7486 }, { "epoch": 0.1649782126074909, "grad_norm": 0.8743226528167725, "learning_rate": 2.8589522612206905e-05, "loss": 0.128, "step": 7487 }, { "epoch": 0.16500024789700707, "grad_norm": 0.9749341011047363, "learning_rate": 2.8589069371007887e-05, "loss": 0.0957, "step": 7488 }, { "epoch": 0.1650222831865232, "grad_norm": 1.8463670015335083, "learning_rate": 2.8588616060592458e-05, "loss": 0.1306, "step": 7489 }, { "epoch": 0.16504431847603937, "grad_norm": 1.1781548261642456, "learning_rate": 2.8588162680962917e-05, "loss": 0.1059, "step": 7490 }, { "epoch": 0.16506635376555553, "grad_norm": 0.6454256772994995, "learning_rate": 2.8587709232121575e-05, "loss": 0.1246, "step": 7491 }, { "epoch": 0.1650883890550717, "grad_norm": 1.7855308055877686, "learning_rate": 2.8587255714070743e-05, "loss": 0.0881, "step": 7492 }, { "epoch": 0.16511042434458786, "grad_norm": 1.058336853981018, "learning_rate": 2.8586802126812736e-05, "loss": 0.104, "step": 7493 }, { "epoch": 0.16513245963410403, "grad_norm": 0.9898187518119812, "learning_rate": 2.858634847034986e-05, "loss": 0.1203, "step": 7494 }, { "epoch": 0.16515449492362017, "grad_norm": 0.7298930287361145, "learning_rate": 2.8585894744684422e-05, "loss": 0.1036, "step": 7495 }, { "epoch": 0.16517653021313633, "grad_norm": 1.0384657382965088, "learning_rate": 2.858544094981874e-05, "loss": 0.1158, "step": 7496 }, { "epoch": 0.1651985655026525, "grad_norm": 1.2581884860992432, "learning_rate": 2.858498708575512e-05, "loss": 0.132, "step": 7497 }, { "epoch": 0.16522060079216866, "grad_norm": 0.8713648915290833, "learning_rate": 2.8584533152495876e-05, "loss": 0.1009, "step": 7498 }, { "epoch": 0.16524263608168482, "grad_norm": 1.3067435026168823, "learning_rate": 2.8584079150043324e-05, "loss": 0.1193, "step": 7499 }, { "epoch": 0.165264671371201, "grad_norm": 1.1232792139053345, "learning_rate": 2.8583625078399767e-05, "loss": 0.1383, "step": 7500 }, { "epoch": 0.16528670666071713, "grad_norm": 1.0027506351470947, "learning_rate": 2.8583170937567524e-05, "loss": 0.1437, "step": 7501 }, { "epoch": 0.1653087419502333, "grad_norm": 1.0128663778305054, "learning_rate": 2.858271672754891e-05, "loss": 0.1039, "step": 7502 }, { "epoch": 0.16533077723974945, "grad_norm": 0.7899908423423767, "learning_rate": 2.8582262448346237e-05, "loss": 0.1443, "step": 7503 }, { "epoch": 0.16535281252926562, "grad_norm": 1.5040810108184814, "learning_rate": 2.8581808099961817e-05, "loss": 0.1397, "step": 7504 }, { "epoch": 0.16537484781878178, "grad_norm": 1.3733025789260864, "learning_rate": 2.8581353682397964e-05, "loss": 0.1215, "step": 7505 }, { "epoch": 0.16539688310829795, "grad_norm": 0.8595752716064453, "learning_rate": 2.858089919565699e-05, "loss": 0.0979, "step": 7506 }, { "epoch": 0.1654189183978141, "grad_norm": 1.1741124391555786, "learning_rate": 2.858044463974122e-05, "loss": 0.117, "step": 7507 }, { "epoch": 0.16544095368733025, "grad_norm": 0.9888565540313721, "learning_rate": 2.8579990014652962e-05, "loss": 0.1154, "step": 7508 }, { "epoch": 0.16546298897684641, "grad_norm": 0.876626193523407, "learning_rate": 2.857953532039453e-05, "loss": 0.0904, "step": 7509 }, { "epoch": 0.16548502426636258, "grad_norm": 1.0394734144210815, "learning_rate": 2.8579080556968247e-05, "loss": 0.1004, "step": 7510 }, { "epoch": 0.16550705955587874, "grad_norm": 0.7887173891067505, "learning_rate": 2.857862572437642e-05, "loss": 0.0838, "step": 7511 }, { "epoch": 0.1655290948453949, "grad_norm": 0.8992505669593811, "learning_rate": 2.857817082262137e-05, "loss": 0.1179, "step": 7512 }, { "epoch": 0.16555113013491107, "grad_norm": 0.7396728992462158, "learning_rate": 2.8577715851705425e-05, "loss": 0.0864, "step": 7513 }, { "epoch": 0.1655731654244272, "grad_norm": 1.0778313875198364, "learning_rate": 2.8577260811630882e-05, "loss": 0.1222, "step": 7514 }, { "epoch": 0.16559520071394337, "grad_norm": 1.0324764251708984, "learning_rate": 2.8576805702400073e-05, "loss": 0.1307, "step": 7515 }, { "epoch": 0.16561723600345954, "grad_norm": 0.8970540165901184, "learning_rate": 2.857635052401531e-05, "loss": 0.1071, "step": 7516 }, { "epoch": 0.1656392712929757, "grad_norm": 1.6915431022644043, "learning_rate": 2.8575895276478915e-05, "loss": 0.1157, "step": 7517 }, { "epoch": 0.16566130658249187, "grad_norm": 2.3423261642456055, "learning_rate": 2.8575439959793206e-05, "loss": 0.1482, "step": 7518 }, { "epoch": 0.16568334187200803, "grad_norm": 1.012584924697876, "learning_rate": 2.85749845739605e-05, "loss": 0.1303, "step": 7519 }, { "epoch": 0.16570537716152417, "grad_norm": 0.939263105392456, "learning_rate": 2.857452911898312e-05, "loss": 0.1394, "step": 7520 }, { "epoch": 0.16572741245104033, "grad_norm": 1.6378313302993774, "learning_rate": 2.8574073594863383e-05, "loss": 0.165, "step": 7521 }, { "epoch": 0.1657494477405565, "grad_norm": 1.1127042770385742, "learning_rate": 2.857361800160361e-05, "loss": 0.1119, "step": 7522 }, { "epoch": 0.16577148303007266, "grad_norm": 0.8801605701446533, "learning_rate": 2.8573162339206123e-05, "loss": 0.1138, "step": 7523 }, { "epoch": 0.16579351831958883, "grad_norm": 1.3003302812576294, "learning_rate": 2.857270660767324e-05, "loss": 0.1278, "step": 7524 }, { "epoch": 0.165815553609105, "grad_norm": 0.991942822933197, "learning_rate": 2.857225080700728e-05, "loss": 0.201, "step": 7525 }, { "epoch": 0.16583758889862113, "grad_norm": 1.1520038843154907, "learning_rate": 2.8571794937210575e-05, "loss": 0.0836, "step": 7526 }, { "epoch": 0.1658596241881373, "grad_norm": 0.8817051649093628, "learning_rate": 2.857133899828544e-05, "loss": 0.1377, "step": 7527 }, { "epoch": 0.16588165947765346, "grad_norm": 0.854163408279419, "learning_rate": 2.8570882990234196e-05, "loss": 0.1365, "step": 7528 }, { "epoch": 0.16590369476716962, "grad_norm": 0.5116380453109741, "learning_rate": 2.857042691305917e-05, "loss": 0.1257, "step": 7529 }, { "epoch": 0.1659257300566858, "grad_norm": 1.1190191507339478, "learning_rate": 2.856997076676268e-05, "loss": 0.1224, "step": 7530 }, { "epoch": 0.16594776534620195, "grad_norm": 1.1563994884490967, "learning_rate": 2.856951455134705e-05, "loss": 0.0908, "step": 7531 }, { "epoch": 0.1659698006357181, "grad_norm": 1.1089102029800415, "learning_rate": 2.8569058266814612e-05, "loss": 0.1164, "step": 7532 }, { "epoch": 0.16599183592523425, "grad_norm": 0.8709161281585693, "learning_rate": 2.856860191316768e-05, "loss": 0.1319, "step": 7533 }, { "epoch": 0.16601387121475042, "grad_norm": 0.940718412399292, "learning_rate": 2.856814549040858e-05, "loss": 0.1075, "step": 7534 }, { "epoch": 0.16603590650426658, "grad_norm": 0.6825205087661743, "learning_rate": 2.8567688998539648e-05, "loss": 0.106, "step": 7535 }, { "epoch": 0.16605794179378275, "grad_norm": 0.8180345296859741, "learning_rate": 2.856723243756319e-05, "loss": 0.1283, "step": 7536 }, { "epoch": 0.1660799770832989, "grad_norm": 0.7152637839317322, "learning_rate": 2.8566775807481547e-05, "loss": 0.1363, "step": 7537 }, { "epoch": 0.16610201237281505, "grad_norm": 0.9502095580101013, "learning_rate": 2.8566319108297038e-05, "loss": 0.0942, "step": 7538 }, { "epoch": 0.16612404766233121, "grad_norm": 0.9780780673027039, "learning_rate": 2.8565862340011998e-05, "loss": 0.1218, "step": 7539 }, { "epoch": 0.16614608295184738, "grad_norm": 1.167179822921753, "learning_rate": 2.8565405502628737e-05, "loss": 0.1179, "step": 7540 }, { "epoch": 0.16616811824136354, "grad_norm": 1.0050232410430908, "learning_rate": 2.8564948596149597e-05, "loss": 0.1056, "step": 7541 }, { "epoch": 0.1661901535308797, "grad_norm": 1.0785311460494995, "learning_rate": 2.8564491620576896e-05, "loss": 0.1669, "step": 7542 }, { "epoch": 0.16621218882039587, "grad_norm": 0.735733151435852, "learning_rate": 2.8564034575912967e-05, "loss": 0.0792, "step": 7543 }, { "epoch": 0.16623422410991204, "grad_norm": 0.9395398497581482, "learning_rate": 2.8563577462160135e-05, "loss": 0.1095, "step": 7544 }, { "epoch": 0.16625625939942817, "grad_norm": 0.7225688695907593, "learning_rate": 2.8563120279320735e-05, "loss": 0.1623, "step": 7545 }, { "epoch": 0.16627829468894434, "grad_norm": 0.7730019092559814, "learning_rate": 2.8562663027397083e-05, "loss": 0.1279, "step": 7546 }, { "epoch": 0.1663003299784605, "grad_norm": 1.0657017230987549, "learning_rate": 2.8562205706391518e-05, "loss": 0.1404, "step": 7547 }, { "epoch": 0.16632236526797667, "grad_norm": 1.0558170080184937, "learning_rate": 2.8561748316306374e-05, "loss": 0.0921, "step": 7548 }, { "epoch": 0.16634440055749283, "grad_norm": 0.6181915402412415, "learning_rate": 2.8561290857143962e-05, "loss": 0.1146, "step": 7549 }, { "epoch": 0.166366435847009, "grad_norm": 0.8608088493347168, "learning_rate": 2.8560833328906634e-05, "loss": 0.099, "step": 7550 }, { "epoch": 0.16638847113652513, "grad_norm": 1.1753084659576416, "learning_rate": 2.8560375731596702e-05, "loss": 0.1006, "step": 7551 }, { "epoch": 0.1664105064260413, "grad_norm": 0.9988616704940796, "learning_rate": 2.855991806521651e-05, "loss": 0.0899, "step": 7552 }, { "epoch": 0.16643254171555746, "grad_norm": 1.3833218812942505, "learning_rate": 2.8559460329768375e-05, "loss": 0.0932, "step": 7553 }, { "epoch": 0.16645457700507363, "grad_norm": 1.2475959062576294, "learning_rate": 2.8559002525254645e-05, "loss": 0.1387, "step": 7554 }, { "epoch": 0.1664766122945898, "grad_norm": 1.574678897857666, "learning_rate": 2.8558544651677644e-05, "loss": 0.1592, "step": 7555 }, { "epoch": 0.16649864758410596, "grad_norm": 0.754074215888977, "learning_rate": 2.85580867090397e-05, "loss": 0.0976, "step": 7556 }, { "epoch": 0.1665206828736221, "grad_norm": 1.126680850982666, "learning_rate": 2.8557628697343156e-05, "loss": 0.1234, "step": 7557 }, { "epoch": 0.16654271816313826, "grad_norm": 1.4091874361038208, "learning_rate": 2.8557170616590335e-05, "loss": 0.1025, "step": 7558 }, { "epoch": 0.16656475345265442, "grad_norm": 0.9909861087799072, "learning_rate": 2.8556712466783577e-05, "loss": 0.134, "step": 7559 }, { "epoch": 0.1665867887421706, "grad_norm": 1.2825225591659546, "learning_rate": 2.8556254247925206e-05, "loss": 0.2008, "step": 7560 }, { "epoch": 0.16660882403168675, "grad_norm": 0.9395831823348999, "learning_rate": 2.855579596001757e-05, "loss": 0.1379, "step": 7561 }, { "epoch": 0.16663085932120292, "grad_norm": 0.760533332824707, "learning_rate": 2.855533760306299e-05, "loss": 0.13, "step": 7562 }, { "epoch": 0.16665289461071905, "grad_norm": 1.2239035367965698, "learning_rate": 2.8554879177063814e-05, "loss": 0.1481, "step": 7563 }, { "epoch": 0.16667492990023522, "grad_norm": 1.2983405590057373, "learning_rate": 2.8554420682022364e-05, "loss": 0.1259, "step": 7564 }, { "epoch": 0.16669696518975138, "grad_norm": 1.1008427143096924, "learning_rate": 2.8553962117940984e-05, "loss": 0.1471, "step": 7565 }, { "epoch": 0.16671900047926755, "grad_norm": 1.1871768236160278, "learning_rate": 2.8553503484822006e-05, "loss": 0.1497, "step": 7566 }, { "epoch": 0.1667410357687837, "grad_norm": 2.775264263153076, "learning_rate": 2.8553044782667767e-05, "loss": 0.1273, "step": 7567 }, { "epoch": 0.16676307105829988, "grad_norm": 1.36098051071167, "learning_rate": 2.85525860114806e-05, "loss": 0.1601, "step": 7568 }, { "epoch": 0.16678510634781601, "grad_norm": 1.6972991228103638, "learning_rate": 2.8552127171262846e-05, "loss": 0.0685, "step": 7569 }, { "epoch": 0.16680714163733218, "grad_norm": 1.5103713274002075, "learning_rate": 2.8551668262016844e-05, "loss": 0.1725, "step": 7570 }, { "epoch": 0.16682917692684834, "grad_norm": 1.7382478713989258, "learning_rate": 2.8551209283744928e-05, "loss": 0.1455, "step": 7571 }, { "epoch": 0.1668512122163645, "grad_norm": 1.2114284038543701, "learning_rate": 2.8550750236449435e-05, "loss": 0.1167, "step": 7572 }, { "epoch": 0.16687324750588067, "grad_norm": 1.7258644104003906, "learning_rate": 2.8550291120132706e-05, "loss": 0.1375, "step": 7573 }, { "epoch": 0.16689528279539684, "grad_norm": 0.7665854692459106, "learning_rate": 2.8549831934797077e-05, "loss": 0.119, "step": 7574 }, { "epoch": 0.16691731808491297, "grad_norm": 1.4581223726272583, "learning_rate": 2.8549372680444885e-05, "loss": 0.1057, "step": 7575 }, { "epoch": 0.16693935337442914, "grad_norm": 0.8802015781402588, "learning_rate": 2.8548913357078478e-05, "loss": 0.1123, "step": 7576 }, { "epoch": 0.1669613886639453, "grad_norm": 1.325311303138733, "learning_rate": 2.8548453964700187e-05, "loss": 0.1673, "step": 7577 }, { "epoch": 0.16698342395346147, "grad_norm": 0.801673412322998, "learning_rate": 2.8547994503312356e-05, "loss": 0.0893, "step": 7578 }, { "epoch": 0.16700545924297763, "grad_norm": 1.8185783624649048, "learning_rate": 2.854753497291732e-05, "loss": 0.1355, "step": 7579 }, { "epoch": 0.1670274945324938, "grad_norm": 1.2645171880722046, "learning_rate": 2.8547075373517428e-05, "loss": 0.1497, "step": 7580 }, { "epoch": 0.16704952982200996, "grad_norm": 1.3812410831451416, "learning_rate": 2.8546615705115013e-05, "loss": 0.136, "step": 7581 }, { "epoch": 0.1670715651115261, "grad_norm": 1.5599219799041748, "learning_rate": 2.8546155967712423e-05, "loss": 0.1228, "step": 7582 }, { "epoch": 0.16709360040104226, "grad_norm": 1.3892920017242432, "learning_rate": 2.8545696161311996e-05, "loss": 0.1083, "step": 7583 }, { "epoch": 0.16711563569055843, "grad_norm": 1.097118616104126, "learning_rate": 2.8545236285916074e-05, "loss": 0.1355, "step": 7584 }, { "epoch": 0.1671376709800746, "grad_norm": 3.007668972015381, "learning_rate": 2.8544776341527e-05, "loss": 0.1314, "step": 7585 }, { "epoch": 0.16715970626959076, "grad_norm": 0.8129276037216187, "learning_rate": 2.8544316328147117e-05, "loss": 0.1281, "step": 7586 }, { "epoch": 0.16718174155910692, "grad_norm": 1.7811516523361206, "learning_rate": 2.854385624577877e-05, "loss": 0.1756, "step": 7587 }, { "epoch": 0.16720377684862306, "grad_norm": 2.4910824298858643, "learning_rate": 2.8543396094424293e-05, "loss": 0.1304, "step": 7588 }, { "epoch": 0.16722581213813922, "grad_norm": 1.255258560180664, "learning_rate": 2.8542935874086044e-05, "loss": 0.1157, "step": 7589 }, { "epoch": 0.1672478474276554, "grad_norm": 0.9241337776184082, "learning_rate": 2.854247558476636e-05, "loss": 0.1484, "step": 7590 }, { "epoch": 0.16726988271717155, "grad_norm": 0.6642576456069946, "learning_rate": 2.8542015226467584e-05, "loss": 0.1233, "step": 7591 }, { "epoch": 0.16729191800668772, "grad_norm": 0.8250662684440613, "learning_rate": 2.854155479919206e-05, "loss": 0.1078, "step": 7592 }, { "epoch": 0.16731395329620388, "grad_norm": 0.9401500821113586, "learning_rate": 2.8541094302942144e-05, "loss": 0.1228, "step": 7593 }, { "epoch": 0.16733598858572002, "grad_norm": 1.1669143438339233, "learning_rate": 2.8540633737720167e-05, "loss": 0.1053, "step": 7594 }, { "epoch": 0.16735802387523618, "grad_norm": 1.1902618408203125, "learning_rate": 2.8540173103528484e-05, "loss": 0.1193, "step": 7595 }, { "epoch": 0.16738005916475235, "grad_norm": 0.8614186644554138, "learning_rate": 2.8539712400369438e-05, "loss": 0.1365, "step": 7596 }, { "epoch": 0.1674020944542685, "grad_norm": 1.1743565797805786, "learning_rate": 2.8539251628245376e-05, "loss": 0.1185, "step": 7597 }, { "epoch": 0.16742412974378468, "grad_norm": 1.5368202924728394, "learning_rate": 2.8538790787158642e-05, "loss": 0.1351, "step": 7598 }, { "epoch": 0.16744616503330084, "grad_norm": 0.9671428799629211, "learning_rate": 2.853832987711159e-05, "loss": 0.117, "step": 7599 }, { "epoch": 0.16746820032281698, "grad_norm": 0.8579176664352417, "learning_rate": 2.853786889810656e-05, "loss": 0.1161, "step": 7600 }, { "epoch": 0.16749023561233314, "grad_norm": 1.6495707035064697, "learning_rate": 2.853740785014591e-05, "loss": 0.1416, "step": 7601 }, { "epoch": 0.1675122709018493, "grad_norm": 1.0735414028167725, "learning_rate": 2.8536946733231977e-05, "loss": 0.1259, "step": 7602 }, { "epoch": 0.16753430619136547, "grad_norm": 1.1373403072357178, "learning_rate": 2.8536485547367123e-05, "loss": 0.093, "step": 7603 }, { "epoch": 0.16755634148088164, "grad_norm": 0.8213560581207275, "learning_rate": 2.8536024292553682e-05, "loss": 0.0975, "step": 7604 }, { "epoch": 0.1675783767703978, "grad_norm": 0.8134408593177795, "learning_rate": 2.8535562968794012e-05, "loss": 0.1307, "step": 7605 }, { "epoch": 0.16760041205991394, "grad_norm": 0.7489710450172424, "learning_rate": 2.8535101576090462e-05, "loss": 0.1459, "step": 7606 }, { "epoch": 0.1676224473494301, "grad_norm": 1.8394745588302612, "learning_rate": 2.8534640114445385e-05, "loss": 0.1662, "step": 7607 }, { "epoch": 0.16764448263894627, "grad_norm": 1.140064001083374, "learning_rate": 2.853417858386112e-05, "loss": 0.164, "step": 7608 }, { "epoch": 0.16766651792846243, "grad_norm": 0.805046558380127, "learning_rate": 2.8533716984340035e-05, "loss": 0.1094, "step": 7609 }, { "epoch": 0.1676885532179786, "grad_norm": 0.9126074314117432, "learning_rate": 2.8533255315884468e-05, "loss": 0.1259, "step": 7610 }, { "epoch": 0.16771058850749476, "grad_norm": 0.8233598470687866, "learning_rate": 2.853279357849677e-05, "loss": 0.1094, "step": 7611 }, { "epoch": 0.16773262379701093, "grad_norm": 0.8403860926628113, "learning_rate": 2.8532331772179307e-05, "loss": 0.1249, "step": 7612 }, { "epoch": 0.16775465908652706, "grad_norm": 0.6689436435699463, "learning_rate": 2.8531869896934415e-05, "loss": 0.0899, "step": 7613 }, { "epoch": 0.16777669437604323, "grad_norm": 1.2344541549682617, "learning_rate": 2.8531407952764453e-05, "loss": 0.148, "step": 7614 }, { "epoch": 0.1677987296655594, "grad_norm": 1.1445673704147339, "learning_rate": 2.8530945939671776e-05, "loss": 0.1062, "step": 7615 }, { "epoch": 0.16782076495507556, "grad_norm": 0.8674038052558899, "learning_rate": 2.8530483857658736e-05, "loss": 0.1434, "step": 7616 }, { "epoch": 0.16784280024459172, "grad_norm": 0.6893543601036072, "learning_rate": 2.8530021706727686e-05, "loss": 0.1288, "step": 7617 }, { "epoch": 0.1678648355341079, "grad_norm": 1.2633105516433716, "learning_rate": 2.8529559486880982e-05, "loss": 0.1265, "step": 7618 }, { "epoch": 0.16788687082362402, "grad_norm": 1.1689056158065796, "learning_rate": 2.8529097198120972e-05, "loss": 0.1104, "step": 7619 }, { "epoch": 0.1679089061131402, "grad_norm": 0.9998787045478821, "learning_rate": 2.852863484045002e-05, "loss": 0.0945, "step": 7620 }, { "epoch": 0.16793094140265635, "grad_norm": 0.8998631238937378, "learning_rate": 2.852817241387047e-05, "loss": 0.119, "step": 7621 }, { "epoch": 0.16795297669217252, "grad_norm": 0.8352821469306946, "learning_rate": 2.8527709918384688e-05, "loss": 0.1449, "step": 7622 }, { "epoch": 0.16797501198168868, "grad_norm": 0.5322898030281067, "learning_rate": 2.8527247353995024e-05, "loss": 0.1246, "step": 7623 }, { "epoch": 0.16799704727120485, "grad_norm": 0.920602023601532, "learning_rate": 2.8526784720703833e-05, "loss": 0.1164, "step": 7624 }, { "epoch": 0.16801908256072098, "grad_norm": 0.7946732044219971, "learning_rate": 2.852632201851348e-05, "loss": 0.1112, "step": 7625 }, { "epoch": 0.16804111785023715, "grad_norm": 0.969957172870636, "learning_rate": 2.8525859247426308e-05, "loss": 0.1292, "step": 7626 }, { "epoch": 0.1680631531397533, "grad_norm": 0.9143491983413696, "learning_rate": 2.8525396407444686e-05, "loss": 0.1095, "step": 7627 }, { "epoch": 0.16808518842926948, "grad_norm": 0.9059811234474182, "learning_rate": 2.8524933498570968e-05, "loss": 0.145, "step": 7628 }, { "epoch": 0.16810722371878564, "grad_norm": 0.8922221064567566, "learning_rate": 2.852447052080751e-05, "loss": 0.1333, "step": 7629 }, { "epoch": 0.1681292590083018, "grad_norm": 1.367566704750061, "learning_rate": 2.852400747415667e-05, "loss": 0.1225, "step": 7630 }, { "epoch": 0.16815129429781794, "grad_norm": 0.7914758324623108, "learning_rate": 2.852354435862081e-05, "loss": 0.1269, "step": 7631 }, { "epoch": 0.1681733295873341, "grad_norm": 0.9087560772895813, "learning_rate": 2.8523081174202283e-05, "loss": 0.1435, "step": 7632 }, { "epoch": 0.16819536487685027, "grad_norm": 0.8273312449455261, "learning_rate": 2.8522617920903456e-05, "loss": 0.0866, "step": 7633 }, { "epoch": 0.16821740016636644, "grad_norm": 0.9941393733024597, "learning_rate": 2.8522154598726682e-05, "loss": 0.1277, "step": 7634 }, { "epoch": 0.1682394354558826, "grad_norm": 1.16139554977417, "learning_rate": 2.8521691207674325e-05, "loss": 0.172, "step": 7635 }, { "epoch": 0.16826147074539877, "grad_norm": 0.7896468043327332, "learning_rate": 2.8521227747748744e-05, "loss": 0.0918, "step": 7636 }, { "epoch": 0.1682835060349149, "grad_norm": 1.0123343467712402, "learning_rate": 2.85207642189523e-05, "loss": 0.0891, "step": 7637 }, { "epoch": 0.16830554132443107, "grad_norm": 1.3138530254364014, "learning_rate": 2.8520300621287346e-05, "loss": 0.1005, "step": 7638 }, { "epoch": 0.16832757661394723, "grad_norm": 0.9423213601112366, "learning_rate": 2.8519836954756262e-05, "loss": 0.1115, "step": 7639 }, { "epoch": 0.1683496119034634, "grad_norm": 0.9634920954704285, "learning_rate": 2.8519373219361394e-05, "loss": 0.1361, "step": 7640 }, { "epoch": 0.16837164719297956, "grad_norm": 0.9621050953865051, "learning_rate": 2.8518909415105105e-05, "loss": 0.1176, "step": 7641 }, { "epoch": 0.16839368248249573, "grad_norm": 0.9934105277061462, "learning_rate": 2.8518445541989767e-05, "loss": 0.113, "step": 7642 }, { "epoch": 0.16841571777201186, "grad_norm": 1.1566392183303833, "learning_rate": 2.8517981600017728e-05, "loss": 0.1136, "step": 7643 }, { "epoch": 0.16843775306152803, "grad_norm": 0.8939020037651062, "learning_rate": 2.8517517589191367e-05, "loss": 0.0803, "step": 7644 }, { "epoch": 0.1684597883510442, "grad_norm": 0.7254804372787476, "learning_rate": 2.851705350951304e-05, "loss": 0.1104, "step": 7645 }, { "epoch": 0.16848182364056036, "grad_norm": 0.6146678924560547, "learning_rate": 2.8516589360985107e-05, "loss": 0.1085, "step": 7646 }, { "epoch": 0.16850385893007652, "grad_norm": 0.8854680061340332, "learning_rate": 2.8516125143609938e-05, "loss": 0.1189, "step": 7647 }, { "epoch": 0.1685258942195927, "grad_norm": 0.7924444675445557, "learning_rate": 2.8515660857389896e-05, "loss": 0.1007, "step": 7648 }, { "epoch": 0.16854792950910885, "grad_norm": 0.9776450991630554, "learning_rate": 2.8515196502327346e-05, "loss": 0.1064, "step": 7649 }, { "epoch": 0.168569964798625, "grad_norm": 0.7108548879623413, "learning_rate": 2.8514732078424654e-05, "loss": 0.1135, "step": 7650 }, { "epoch": 0.16859200008814115, "grad_norm": 1.2209086418151855, "learning_rate": 2.8514267585684183e-05, "loss": 0.1332, "step": 7651 }, { "epoch": 0.16861403537765732, "grad_norm": 1.1504852771759033, "learning_rate": 2.85138030241083e-05, "loss": 0.1753, "step": 7652 }, { "epoch": 0.16863607066717348, "grad_norm": 1.121938943862915, "learning_rate": 2.8513338393699373e-05, "loss": 0.1281, "step": 7653 }, { "epoch": 0.16865810595668965, "grad_norm": 0.9296425580978394, "learning_rate": 2.8512873694459764e-05, "loss": 0.1187, "step": 7654 }, { "epoch": 0.1686801412462058, "grad_norm": 1.8395581245422363, "learning_rate": 2.8512408926391847e-05, "loss": 0.165, "step": 7655 }, { "epoch": 0.16870217653572195, "grad_norm": 1.3940569162368774, "learning_rate": 2.8511944089497983e-05, "loss": 0.1262, "step": 7656 }, { "epoch": 0.1687242118252381, "grad_norm": 1.1878081560134888, "learning_rate": 2.8511479183780544e-05, "loss": 0.1351, "step": 7657 }, { "epoch": 0.16874624711475428, "grad_norm": 1.2573145627975464, "learning_rate": 2.8511014209241895e-05, "loss": 0.1052, "step": 7658 }, { "epoch": 0.16876828240427044, "grad_norm": 1.2573117017745972, "learning_rate": 2.8510549165884404e-05, "loss": 0.1641, "step": 7659 }, { "epoch": 0.1687903176937866, "grad_norm": 0.9160093069076538, "learning_rate": 2.8510084053710443e-05, "loss": 0.1205, "step": 7660 }, { "epoch": 0.16881235298330277, "grad_norm": 0.8689572811126709, "learning_rate": 2.850961887272238e-05, "loss": 0.1146, "step": 7661 }, { "epoch": 0.1688343882728189, "grad_norm": 0.831255316734314, "learning_rate": 2.850915362292258e-05, "loss": 0.1051, "step": 7662 }, { "epoch": 0.16885642356233507, "grad_norm": 1.2638438940048218, "learning_rate": 2.8508688304313416e-05, "loss": 0.1633, "step": 7663 }, { "epoch": 0.16887845885185124, "grad_norm": 1.0126433372497559, "learning_rate": 2.8508222916897268e-05, "loss": 0.1003, "step": 7664 }, { "epoch": 0.1689004941413674, "grad_norm": 0.8658202290534973, "learning_rate": 2.8507757460676487e-05, "loss": 0.0771, "step": 7665 }, { "epoch": 0.16892252943088357, "grad_norm": 1.429708480834961, "learning_rate": 2.8507291935653457e-05, "loss": 0.1413, "step": 7666 }, { "epoch": 0.16894456472039973, "grad_norm": 1.1763585805892944, "learning_rate": 2.8506826341830545e-05, "loss": 0.1134, "step": 7667 }, { "epoch": 0.16896660000991587, "grad_norm": 0.9745129346847534, "learning_rate": 2.8506360679210122e-05, "loss": 0.0978, "step": 7668 }, { "epoch": 0.16898863529943203, "grad_norm": 0.8569708466529846, "learning_rate": 2.8505894947794565e-05, "loss": 0.095, "step": 7669 }, { "epoch": 0.1690106705889482, "grad_norm": 1.1670053005218506, "learning_rate": 2.8505429147586244e-05, "loss": 0.1136, "step": 7670 }, { "epoch": 0.16903270587846436, "grad_norm": 1.3851956129074097, "learning_rate": 2.8504963278587528e-05, "loss": 0.0971, "step": 7671 }, { "epoch": 0.16905474116798053, "grad_norm": 1.4785748720169067, "learning_rate": 2.850449734080079e-05, "loss": 0.1171, "step": 7672 }, { "epoch": 0.1690767764574967, "grad_norm": 0.9082746505737305, "learning_rate": 2.8504031334228408e-05, "loss": 0.137, "step": 7673 }, { "epoch": 0.16909881174701283, "grad_norm": 1.129695177078247, "learning_rate": 2.850356525887275e-05, "loss": 0.1113, "step": 7674 }, { "epoch": 0.169120847036529, "grad_norm": 0.636954128742218, "learning_rate": 2.85030991147362e-05, "loss": 0.1038, "step": 7675 }, { "epoch": 0.16914288232604516, "grad_norm": 0.8435788154602051, "learning_rate": 2.8502632901821117e-05, "loss": 0.1341, "step": 7676 }, { "epoch": 0.16916491761556132, "grad_norm": 1.0446982383728027, "learning_rate": 2.8502166620129887e-05, "loss": 0.0844, "step": 7677 }, { "epoch": 0.1691869529050775, "grad_norm": 0.6830188035964966, "learning_rate": 2.850170026966488e-05, "loss": 0.1001, "step": 7678 }, { "epoch": 0.16920898819459365, "grad_norm": 0.8024254441261292, "learning_rate": 2.8501233850428476e-05, "loss": 0.1232, "step": 7679 }, { "epoch": 0.1692310234841098, "grad_norm": 0.936691164970398, "learning_rate": 2.850076736242305e-05, "loss": 0.1228, "step": 7680 }, { "epoch": 0.16925305877362595, "grad_norm": 0.8776817321777344, "learning_rate": 2.8500300805650975e-05, "loss": 0.1076, "step": 7681 }, { "epoch": 0.16927509406314212, "grad_norm": 0.9048954844474792, "learning_rate": 2.8499834180114625e-05, "loss": 0.1105, "step": 7682 }, { "epoch": 0.16929712935265828, "grad_norm": 1.2760467529296875, "learning_rate": 2.8499367485816384e-05, "loss": 0.1556, "step": 7683 }, { "epoch": 0.16931916464217445, "grad_norm": 0.9208431243896484, "learning_rate": 2.849890072275862e-05, "loss": 0.1056, "step": 7684 }, { "epoch": 0.1693411999316906, "grad_norm": 0.8498951196670532, "learning_rate": 2.849843389094372e-05, "loss": 0.1132, "step": 7685 }, { "epoch": 0.16936323522120678, "grad_norm": 0.8939398527145386, "learning_rate": 2.849796699037406e-05, "loss": 0.1215, "step": 7686 }, { "epoch": 0.1693852705107229, "grad_norm": 0.9743553996086121, "learning_rate": 2.8497500021052017e-05, "loss": 0.1283, "step": 7687 }, { "epoch": 0.16940730580023908, "grad_norm": 0.9009155631065369, "learning_rate": 2.849703298297996e-05, "loss": 0.1458, "step": 7688 }, { "epoch": 0.16942934108975524, "grad_norm": 0.598538339138031, "learning_rate": 2.8496565876160284e-05, "loss": 0.0942, "step": 7689 }, { "epoch": 0.1694513763792714, "grad_norm": 0.9134876132011414, "learning_rate": 2.8496098700595358e-05, "loss": 0.1316, "step": 7690 }, { "epoch": 0.16947341166878757, "grad_norm": 0.8986063599586487, "learning_rate": 2.8495631456287565e-05, "loss": 0.1219, "step": 7691 }, { "epoch": 0.16949544695830374, "grad_norm": 1.1624709367752075, "learning_rate": 2.849516414323928e-05, "loss": 0.1178, "step": 7692 }, { "epoch": 0.16951748224781987, "grad_norm": 1.0874003171920776, "learning_rate": 2.8494696761452895e-05, "loss": 0.1672, "step": 7693 }, { "epoch": 0.16953951753733604, "grad_norm": 0.6510258913040161, "learning_rate": 2.8494229310930777e-05, "loss": 0.0664, "step": 7694 }, { "epoch": 0.1695615528268522, "grad_norm": 0.8729498386383057, "learning_rate": 2.8493761791675314e-05, "loss": 0.1377, "step": 7695 }, { "epoch": 0.16958358811636837, "grad_norm": 0.8961494565010071, "learning_rate": 2.8493294203688883e-05, "loss": 0.1409, "step": 7696 }, { "epoch": 0.16960562340588453, "grad_norm": 1.0493594408035278, "learning_rate": 2.8492826546973878e-05, "loss": 0.1437, "step": 7697 }, { "epoch": 0.1696276586954007, "grad_norm": 0.9615578651428223, "learning_rate": 2.8492358821532668e-05, "loss": 0.1345, "step": 7698 }, { "epoch": 0.16964969398491683, "grad_norm": 1.130708932876587, "learning_rate": 2.8491891027367637e-05, "loss": 0.1117, "step": 7699 }, { "epoch": 0.169671729274433, "grad_norm": 1.0128523111343384, "learning_rate": 2.849142316448117e-05, "loss": 0.1272, "step": 7700 }, { "epoch": 0.16969376456394916, "grad_norm": 0.5720304846763611, "learning_rate": 2.8490955232875648e-05, "loss": 0.1027, "step": 7701 }, { "epoch": 0.16971579985346533, "grad_norm": 1.2205148935317993, "learning_rate": 2.8490487232553464e-05, "loss": 0.1326, "step": 7702 }, { "epoch": 0.1697378351429815, "grad_norm": 1.2899808883666992, "learning_rate": 2.849001916351699e-05, "loss": 0.0952, "step": 7703 }, { "epoch": 0.16975987043249766, "grad_norm": 0.7539896965026855, "learning_rate": 2.8489551025768614e-05, "loss": 0.133, "step": 7704 }, { "epoch": 0.1697819057220138, "grad_norm": 1.1316996812820435, "learning_rate": 2.848908281931072e-05, "loss": 0.1424, "step": 7705 }, { "epoch": 0.16980394101152996, "grad_norm": 0.9371705651283264, "learning_rate": 2.8488614544145697e-05, "loss": 0.184, "step": 7706 }, { "epoch": 0.16982597630104612, "grad_norm": 2.1047983169555664, "learning_rate": 2.8488146200275928e-05, "loss": 0.1338, "step": 7707 }, { "epoch": 0.1698480115905623, "grad_norm": 2.128472089767456, "learning_rate": 2.848767778770379e-05, "loss": 0.1692, "step": 7708 }, { "epoch": 0.16987004688007845, "grad_norm": 0.6016837358474731, "learning_rate": 2.848720930643168e-05, "loss": 0.1253, "step": 7709 }, { "epoch": 0.16989208216959462, "grad_norm": 0.7058019638061523, "learning_rate": 2.8486740756461983e-05, "loss": 0.0851, "step": 7710 }, { "epoch": 0.16991411745911075, "grad_norm": 1.350139856338501, "learning_rate": 2.848627213779708e-05, "loss": 0.1049, "step": 7711 }, { "epoch": 0.16993615274862692, "grad_norm": 1.1021169424057007, "learning_rate": 2.8485803450439368e-05, "loss": 0.1195, "step": 7712 }, { "epoch": 0.16995818803814308, "grad_norm": 0.8817929625511169, "learning_rate": 2.848533469439122e-05, "loss": 0.1036, "step": 7713 }, { "epoch": 0.16998022332765925, "grad_norm": 1.2858258485794067, "learning_rate": 2.8484865869655032e-05, "loss": 0.1117, "step": 7714 }, { "epoch": 0.1700022586171754, "grad_norm": 1.5113204717636108, "learning_rate": 2.8484396976233194e-05, "loss": 0.1251, "step": 7715 }, { "epoch": 0.17002429390669158, "grad_norm": 0.9318037629127502, "learning_rate": 2.848392801412809e-05, "loss": 0.1235, "step": 7716 }, { "epoch": 0.1700463291962077, "grad_norm": 0.7269327044487, "learning_rate": 2.8483458983342108e-05, "loss": 0.1018, "step": 7717 }, { "epoch": 0.17006836448572388, "grad_norm": 1.0822359323501587, "learning_rate": 2.8482989883877642e-05, "loss": 0.1041, "step": 7718 }, { "epoch": 0.17009039977524004, "grad_norm": 1.0694020986557007, "learning_rate": 2.8482520715737076e-05, "loss": 0.1349, "step": 7719 }, { "epoch": 0.1701124350647562, "grad_norm": 1.1409579515457153, "learning_rate": 2.8482051478922802e-05, "loss": 0.1501, "step": 7720 }, { "epoch": 0.17013447035427237, "grad_norm": 0.9546922445297241, "learning_rate": 2.8481582173437212e-05, "loss": 0.1557, "step": 7721 }, { "epoch": 0.17015650564378854, "grad_norm": 0.9458362460136414, "learning_rate": 2.8481112799282695e-05, "loss": 0.1163, "step": 7722 }, { "epoch": 0.1701785409333047, "grad_norm": 1.3678253889083862, "learning_rate": 2.848064335646164e-05, "loss": 0.1069, "step": 7723 }, { "epoch": 0.17020057622282084, "grad_norm": 0.9006319046020508, "learning_rate": 2.8480173844976438e-05, "loss": 0.1302, "step": 7724 }, { "epoch": 0.170222611512337, "grad_norm": 0.9422777891159058, "learning_rate": 2.847970426482948e-05, "loss": 0.1085, "step": 7725 }, { "epoch": 0.17024464680185317, "grad_norm": 1.2313423156738281, "learning_rate": 2.8479234616023172e-05, "loss": 0.156, "step": 7726 }, { "epoch": 0.17026668209136933, "grad_norm": 0.8356850147247314, "learning_rate": 2.8478764898559883e-05, "loss": 0.1184, "step": 7727 }, { "epoch": 0.1702887173808855, "grad_norm": 1.352412223815918, "learning_rate": 2.847829511244202e-05, "loss": 0.1509, "step": 7728 }, { "epoch": 0.17031075267040166, "grad_norm": 0.89490807056427, "learning_rate": 2.8477825257671968e-05, "loss": 0.1258, "step": 7729 }, { "epoch": 0.1703327879599178, "grad_norm": 1.2763147354125977, "learning_rate": 2.847735533425213e-05, "loss": 0.1216, "step": 7730 }, { "epoch": 0.17035482324943396, "grad_norm": 0.925615668296814, "learning_rate": 2.8476885342184892e-05, "loss": 0.1142, "step": 7731 }, { "epoch": 0.17037685853895013, "grad_norm": 1.3971751928329468, "learning_rate": 2.8476415281472648e-05, "loss": 0.1966, "step": 7732 }, { "epoch": 0.1703988938284663, "grad_norm": 0.9194199442863464, "learning_rate": 2.8475945152117796e-05, "loss": 0.0992, "step": 7733 }, { "epoch": 0.17042092911798246, "grad_norm": 1.2435907125473022, "learning_rate": 2.847547495412273e-05, "loss": 0.0864, "step": 7734 }, { "epoch": 0.17044296440749862, "grad_norm": 1.0004932880401611, "learning_rate": 2.8475004687489845e-05, "loss": 0.1438, "step": 7735 }, { "epoch": 0.17046499969701476, "grad_norm": 0.9113984704017639, "learning_rate": 2.8474534352221534e-05, "loss": 0.1097, "step": 7736 }, { "epoch": 0.17048703498653092, "grad_norm": 1.2977975606918335, "learning_rate": 2.8474063948320193e-05, "loss": 0.1575, "step": 7737 }, { "epoch": 0.1705090702760471, "grad_norm": 0.818925678730011, "learning_rate": 2.8473593475788214e-05, "loss": 0.0975, "step": 7738 }, { "epoch": 0.17053110556556325, "grad_norm": 0.7523373365402222, "learning_rate": 2.8473122934628006e-05, "loss": 0.152, "step": 7739 }, { "epoch": 0.17055314085507942, "grad_norm": 0.8419925570487976, "learning_rate": 2.8472652324841958e-05, "loss": 0.1251, "step": 7740 }, { "epoch": 0.17057517614459558, "grad_norm": 1.165907859802246, "learning_rate": 2.8472181646432465e-05, "loss": 0.136, "step": 7741 }, { "epoch": 0.17059721143411172, "grad_norm": 0.9478792548179626, "learning_rate": 2.8471710899401926e-05, "loss": 0.1051, "step": 7742 }, { "epoch": 0.17061924672362788, "grad_norm": 0.8598949313163757, "learning_rate": 2.847124008375274e-05, "loss": 0.1034, "step": 7743 }, { "epoch": 0.17064128201314405, "grad_norm": 1.2059104442596436, "learning_rate": 2.84707691994873e-05, "loss": 0.1415, "step": 7744 }, { "epoch": 0.1706633173026602, "grad_norm": 0.700881838798523, "learning_rate": 2.8470298246608017e-05, "loss": 0.1191, "step": 7745 }, { "epoch": 0.17068535259217638, "grad_norm": 1.5438377857208252, "learning_rate": 2.8469827225117275e-05, "loss": 0.1341, "step": 7746 }, { "epoch": 0.17070738788169254, "grad_norm": 0.9055706858634949, "learning_rate": 2.8469356135017487e-05, "loss": 0.0988, "step": 7747 }, { "epoch": 0.17072942317120868, "grad_norm": 1.258599042892456, "learning_rate": 2.8468884976311036e-05, "loss": 0.1174, "step": 7748 }, { "epoch": 0.17075145846072484, "grad_norm": 1.4887995719909668, "learning_rate": 2.8468413749000338e-05, "loss": 0.143, "step": 7749 }, { "epoch": 0.170773493750241, "grad_norm": 0.5239977240562439, "learning_rate": 2.8467942453087783e-05, "loss": 0.0811, "step": 7750 }, { "epoch": 0.17079552903975717, "grad_norm": 0.7145633101463318, "learning_rate": 2.846747108857578e-05, "loss": 0.1073, "step": 7751 }, { "epoch": 0.17081756432927334, "grad_norm": 0.7519392371177673, "learning_rate": 2.846699965546672e-05, "loss": 0.0964, "step": 7752 }, { "epoch": 0.1708395996187895, "grad_norm": 1.4531054496765137, "learning_rate": 2.846652815376301e-05, "loss": 0.0861, "step": 7753 }, { "epoch": 0.17086163490830567, "grad_norm": 1.0169296264648438, "learning_rate": 2.8466056583467053e-05, "loss": 0.1003, "step": 7754 }, { "epoch": 0.1708836701978218, "grad_norm": 2.5287811756134033, "learning_rate": 2.8465584944581246e-05, "loss": 0.1035, "step": 7755 }, { "epoch": 0.17090570548733797, "grad_norm": 2.0044312477111816, "learning_rate": 2.8465113237107993e-05, "loss": 0.1721, "step": 7756 }, { "epoch": 0.17092774077685413, "grad_norm": 0.7082241177558899, "learning_rate": 2.84646414610497e-05, "loss": 0.1113, "step": 7757 }, { "epoch": 0.1709497760663703, "grad_norm": 0.8054293990135193, "learning_rate": 2.846416961640877e-05, "loss": 0.1053, "step": 7758 }, { "epoch": 0.17097181135588646, "grad_norm": 0.7551010847091675, "learning_rate": 2.84636977031876e-05, "loss": 0.1238, "step": 7759 }, { "epoch": 0.17099384664540263, "grad_norm": 0.7983087301254272, "learning_rate": 2.84632257213886e-05, "loss": 0.1111, "step": 7760 }, { "epoch": 0.17101588193491876, "grad_norm": 0.8068163394927979, "learning_rate": 2.846275367101417e-05, "loss": 0.1087, "step": 7761 }, { "epoch": 0.17103791722443493, "grad_norm": 0.9807559251785278, "learning_rate": 2.846228155206672e-05, "loss": 0.0954, "step": 7762 }, { "epoch": 0.1710599525139511, "grad_norm": 1.0537701845169067, "learning_rate": 2.8461809364548646e-05, "loss": 0.1295, "step": 7763 }, { "epoch": 0.17108198780346726, "grad_norm": 0.928584098815918, "learning_rate": 2.846133710846236e-05, "loss": 0.0789, "step": 7764 }, { "epoch": 0.17110402309298342, "grad_norm": 1.0650256872177124, "learning_rate": 2.8460864783810268e-05, "loss": 0.1039, "step": 7765 }, { "epoch": 0.17112605838249959, "grad_norm": 1.1563360691070557, "learning_rate": 2.8460392390594774e-05, "loss": 0.0747, "step": 7766 }, { "epoch": 0.17114809367201572, "grad_norm": 1.0248702764511108, "learning_rate": 2.8459919928818278e-05, "loss": 0.118, "step": 7767 }, { "epoch": 0.1711701289615319, "grad_norm": 0.9120998382568359, "learning_rate": 2.8459447398483197e-05, "loss": 0.0979, "step": 7768 }, { "epoch": 0.17119216425104805, "grad_norm": 0.8883855938911438, "learning_rate": 2.845897479959193e-05, "loss": 0.1047, "step": 7769 }, { "epoch": 0.17121419954056422, "grad_norm": 0.7507833242416382, "learning_rate": 2.8458502132146888e-05, "loss": 0.1393, "step": 7770 }, { "epoch": 0.17123623483008038, "grad_norm": 0.8147630095481873, "learning_rate": 2.845802939615048e-05, "loss": 0.1149, "step": 7771 }, { "epoch": 0.17125827011959655, "grad_norm": 1.0617767572402954, "learning_rate": 2.845755659160511e-05, "loss": 0.1093, "step": 7772 }, { "epoch": 0.17128030540911268, "grad_norm": 1.3975261449813843, "learning_rate": 2.8457083718513186e-05, "loss": 0.1762, "step": 7773 }, { "epoch": 0.17130234069862885, "grad_norm": 1.0197248458862305, "learning_rate": 2.845661077687712e-05, "loss": 0.1099, "step": 7774 }, { "epoch": 0.171324375988145, "grad_norm": 1.4466925859451294, "learning_rate": 2.845613776669932e-05, "loss": 0.1259, "step": 7775 }, { "epoch": 0.17134641127766118, "grad_norm": 0.6759363412857056, "learning_rate": 2.8455664687982194e-05, "loss": 0.0984, "step": 7776 }, { "epoch": 0.17136844656717734, "grad_norm": 0.9879265427589417, "learning_rate": 2.8455191540728148e-05, "loss": 0.142, "step": 7777 }, { "epoch": 0.1713904818566935, "grad_norm": 1.3306101560592651, "learning_rate": 2.8454718324939602e-05, "loss": 0.1676, "step": 7778 }, { "epoch": 0.17141251714620964, "grad_norm": 1.1842771768569946, "learning_rate": 2.8454245040618953e-05, "loss": 0.1178, "step": 7779 }, { "epoch": 0.1714345524357258, "grad_norm": 1.2852303981781006, "learning_rate": 2.8453771687768628e-05, "loss": 0.1148, "step": 7780 }, { "epoch": 0.17145658772524197, "grad_norm": 0.8585781455039978, "learning_rate": 2.8453298266391024e-05, "loss": 0.1234, "step": 7781 }, { "epoch": 0.17147862301475814, "grad_norm": 0.9527220129966736, "learning_rate": 2.845282477648856e-05, "loss": 0.1142, "step": 7782 }, { "epoch": 0.1715006583042743, "grad_norm": 1.0707554817199707, "learning_rate": 2.8452351218063646e-05, "loss": 0.1245, "step": 7783 }, { "epoch": 0.17152269359379047, "grad_norm": 1.105903148651123, "learning_rate": 2.8451877591118695e-05, "loss": 0.1191, "step": 7784 }, { "epoch": 0.1715447288833066, "grad_norm": 1.6052603721618652, "learning_rate": 2.8451403895656113e-05, "loss": 0.1182, "step": 7785 }, { "epoch": 0.17156676417282277, "grad_norm": 1.1593483686447144, "learning_rate": 2.845093013167832e-05, "loss": 0.1236, "step": 7786 }, { "epoch": 0.17158879946233893, "grad_norm": 0.5009661316871643, "learning_rate": 2.8450456299187725e-05, "loss": 0.106, "step": 7787 }, { "epoch": 0.1716108347518551, "grad_norm": 1.3468221426010132, "learning_rate": 2.8449982398186747e-05, "loss": 0.137, "step": 7788 }, { "epoch": 0.17163287004137126, "grad_norm": 0.6846663355827332, "learning_rate": 2.84495084286778e-05, "loss": 0.1034, "step": 7789 }, { "epoch": 0.17165490533088743, "grad_norm": 1.556594967842102, "learning_rate": 2.8449034390663286e-05, "loss": 0.0965, "step": 7790 }, { "epoch": 0.1716769406204036, "grad_norm": 0.8162702918052673, "learning_rate": 2.844856028414563e-05, "loss": 0.0947, "step": 7791 }, { "epoch": 0.17169897590991973, "grad_norm": 0.8723856210708618, "learning_rate": 2.8448086109127247e-05, "loss": 0.1113, "step": 7792 }, { "epoch": 0.1717210111994359, "grad_norm": 0.9974804520606995, "learning_rate": 2.844761186561055e-05, "loss": 0.1047, "step": 7793 }, { "epoch": 0.17174304648895206, "grad_norm": 1.1853681802749634, "learning_rate": 2.844713755359795e-05, "loss": 0.1016, "step": 7794 }, { "epoch": 0.17176508177846822, "grad_norm": 0.8800833821296692, "learning_rate": 2.8446663173091873e-05, "loss": 0.1214, "step": 7795 }, { "epoch": 0.17178711706798439, "grad_norm": 0.7170263528823853, "learning_rate": 2.8446188724094727e-05, "loss": 0.083, "step": 7796 }, { "epoch": 0.17180915235750055, "grad_norm": 0.9664093255996704, "learning_rate": 2.8445714206608933e-05, "loss": 0.1254, "step": 7797 }, { "epoch": 0.1718311876470167, "grad_norm": 0.7228020429611206, "learning_rate": 2.8445239620636904e-05, "loss": 0.1303, "step": 7798 }, { "epoch": 0.17185322293653285, "grad_norm": 0.8062392473220825, "learning_rate": 2.844476496618106e-05, "loss": 0.1176, "step": 7799 }, { "epoch": 0.17187525822604902, "grad_norm": 0.9276073575019836, "learning_rate": 2.844429024324382e-05, "loss": 0.1045, "step": 7800 }, { "epoch": 0.17189729351556518, "grad_norm": 0.8686889410018921, "learning_rate": 2.8443815451827603e-05, "loss": 0.1167, "step": 7801 }, { "epoch": 0.17191932880508135, "grad_norm": 0.9857856035232544, "learning_rate": 2.844334059193482e-05, "loss": 0.0995, "step": 7802 }, { "epoch": 0.1719413640945975, "grad_norm": 1.2433418035507202, "learning_rate": 2.8442865663567894e-05, "loss": 0.0961, "step": 7803 }, { "epoch": 0.17196339938411365, "grad_norm": 0.9199880361557007, "learning_rate": 2.844239066672925e-05, "loss": 0.0809, "step": 7804 }, { "epoch": 0.1719854346736298, "grad_norm": 1.3142588138580322, "learning_rate": 2.8441915601421298e-05, "loss": 0.155, "step": 7805 }, { "epoch": 0.17200746996314598, "grad_norm": 0.9713699817657471, "learning_rate": 2.844144046764646e-05, "loss": 0.0991, "step": 7806 }, { "epoch": 0.17202950525266214, "grad_norm": 1.2218693494796753, "learning_rate": 2.8440965265407164e-05, "loss": 0.1488, "step": 7807 }, { "epoch": 0.1720515405421783, "grad_norm": 0.6816913485527039, "learning_rate": 2.8440489994705816e-05, "loss": 0.1012, "step": 7808 }, { "epoch": 0.17207357583169447, "grad_norm": 0.8800411224365234, "learning_rate": 2.844001465554485e-05, "loss": 0.1345, "step": 7809 }, { "epoch": 0.1720956111212106, "grad_norm": 1.2025779485702515, "learning_rate": 2.8439539247926684e-05, "loss": 0.1339, "step": 7810 }, { "epoch": 0.17211764641072677, "grad_norm": 0.6827034950256348, "learning_rate": 2.8439063771853737e-05, "loss": 0.1071, "step": 7811 }, { "epoch": 0.17213968170024294, "grad_norm": 1.0377075672149658, "learning_rate": 2.843858822732843e-05, "loss": 0.103, "step": 7812 }, { "epoch": 0.1721617169897591, "grad_norm": 0.9167094826698303, "learning_rate": 2.8438112614353187e-05, "loss": 0.124, "step": 7813 }, { "epoch": 0.17218375227927527, "grad_norm": 0.9969541430473328, "learning_rate": 2.8437636932930432e-05, "loss": 0.1486, "step": 7814 }, { "epoch": 0.17220578756879143, "grad_norm": 0.8044317960739136, "learning_rate": 2.8437161183062583e-05, "loss": 0.1126, "step": 7815 }, { "epoch": 0.17222782285830757, "grad_norm": 0.6268166899681091, "learning_rate": 2.8436685364752066e-05, "loss": 0.0998, "step": 7816 }, { "epoch": 0.17224985814782373, "grad_norm": 0.8875779509544373, "learning_rate": 2.8436209478001307e-05, "loss": 0.1204, "step": 7817 }, { "epoch": 0.1722718934373399, "grad_norm": 0.7606516480445862, "learning_rate": 2.843573352281273e-05, "loss": 0.1103, "step": 7818 }, { "epoch": 0.17229392872685606, "grad_norm": 0.8900364637374878, "learning_rate": 2.843525749918876e-05, "loss": 0.1342, "step": 7819 }, { "epoch": 0.17231596401637223, "grad_norm": 1.2055652141571045, "learning_rate": 2.843478140713181e-05, "loss": 0.125, "step": 7820 }, { "epoch": 0.1723379993058884, "grad_norm": 1.2624036073684692, "learning_rate": 2.843430524664432e-05, "loss": 0.1176, "step": 7821 }, { "epoch": 0.17236003459540453, "grad_norm": 0.7557306885719299, "learning_rate": 2.8433829017728706e-05, "loss": 0.1378, "step": 7822 }, { "epoch": 0.1723820698849207, "grad_norm": 0.9814712405204773, "learning_rate": 2.8433352720387396e-05, "loss": 0.1166, "step": 7823 }, { "epoch": 0.17240410517443686, "grad_norm": 1.1481386423110962, "learning_rate": 2.8432876354622823e-05, "loss": 0.1406, "step": 7824 }, { "epoch": 0.17242614046395302, "grad_norm": 1.1133666038513184, "learning_rate": 2.8432399920437405e-05, "loss": 0.1101, "step": 7825 }, { "epoch": 0.17244817575346919, "grad_norm": 1.1652649641036987, "learning_rate": 2.843192341783357e-05, "loss": 0.0921, "step": 7826 }, { "epoch": 0.17247021104298535, "grad_norm": 1.3419981002807617, "learning_rate": 2.8431446846813748e-05, "loss": 0.1705, "step": 7827 }, { "epoch": 0.17249224633250151, "grad_norm": 0.8713136911392212, "learning_rate": 2.843097020738036e-05, "loss": 0.1157, "step": 7828 }, { "epoch": 0.17251428162201765, "grad_norm": 0.8330708742141724, "learning_rate": 2.8430493499535847e-05, "loss": 0.1146, "step": 7829 }, { "epoch": 0.17253631691153382, "grad_norm": 1.5136046409606934, "learning_rate": 2.8430016723282624e-05, "loss": 0.1614, "step": 7830 }, { "epoch": 0.17255835220104998, "grad_norm": 1.321852207183838, "learning_rate": 2.8429539878623122e-05, "loss": 0.1366, "step": 7831 }, { "epoch": 0.17258038749056615, "grad_norm": 1.1217738389968872, "learning_rate": 2.8429062965559773e-05, "loss": 0.1347, "step": 7832 }, { "epoch": 0.1726024227800823, "grad_norm": 1.002733588218689, "learning_rate": 2.8428585984095007e-05, "loss": 0.1076, "step": 7833 }, { "epoch": 0.17262445806959847, "grad_norm": 1.2478018999099731, "learning_rate": 2.842810893423125e-05, "loss": 0.1353, "step": 7834 }, { "epoch": 0.1726464933591146, "grad_norm": 1.0033632516860962, "learning_rate": 2.8427631815970935e-05, "loss": 0.1891, "step": 7835 }, { "epoch": 0.17266852864863078, "grad_norm": 1.1687878370285034, "learning_rate": 2.8427154629316493e-05, "loss": 0.1235, "step": 7836 }, { "epoch": 0.17269056393814694, "grad_norm": 1.0231572389602661, "learning_rate": 2.8426677374270345e-05, "loss": 0.1179, "step": 7837 }, { "epoch": 0.1727125992276631, "grad_norm": 0.6071959733963013, "learning_rate": 2.8426200050834932e-05, "loss": 0.1067, "step": 7838 }, { "epoch": 0.17273463451717927, "grad_norm": 0.7469664216041565, "learning_rate": 2.8425722659012685e-05, "loss": 0.0869, "step": 7839 }, { "epoch": 0.17275666980669543, "grad_norm": 1.5225924253463745, "learning_rate": 2.8425245198806032e-05, "loss": 0.1357, "step": 7840 }, { "epoch": 0.17277870509621157, "grad_norm": 0.6820778250694275, "learning_rate": 2.8424767670217404e-05, "loss": 0.1019, "step": 7841 }, { "epoch": 0.17280074038572774, "grad_norm": 0.995760440826416, "learning_rate": 2.842429007324924e-05, "loss": 0.1365, "step": 7842 }, { "epoch": 0.1728227756752439, "grad_norm": 0.7073440551757812, "learning_rate": 2.8423812407903966e-05, "loss": 0.0941, "step": 7843 }, { "epoch": 0.17284481096476007, "grad_norm": 0.9719186425209045, "learning_rate": 2.8423334674184015e-05, "loss": 0.1227, "step": 7844 }, { "epoch": 0.17286684625427623, "grad_norm": 1.0820165872573853, "learning_rate": 2.8422856872091822e-05, "loss": 0.1283, "step": 7845 }, { "epoch": 0.1728888815437924, "grad_norm": 1.3283774852752686, "learning_rate": 2.842237900162982e-05, "loss": 0.1297, "step": 7846 }, { "epoch": 0.17291091683330853, "grad_norm": 1.1078304052352905, "learning_rate": 2.8421901062800448e-05, "loss": 0.1112, "step": 7847 }, { "epoch": 0.1729329521228247, "grad_norm": 0.878961443901062, "learning_rate": 2.8421423055606136e-05, "loss": 0.0719, "step": 7848 }, { "epoch": 0.17295498741234086, "grad_norm": 0.9973087310791016, "learning_rate": 2.8420944980049315e-05, "loss": 0.1095, "step": 7849 }, { "epoch": 0.17297702270185702, "grad_norm": 1.2892193794250488, "learning_rate": 2.842046683613243e-05, "loss": 0.1006, "step": 7850 }, { "epoch": 0.1729990579913732, "grad_norm": 0.6302319765090942, "learning_rate": 2.8419988623857905e-05, "loss": 0.1183, "step": 7851 }, { "epoch": 0.17302109328088935, "grad_norm": 0.9056283235549927, "learning_rate": 2.8419510343228184e-05, "loss": 0.1047, "step": 7852 }, { "epoch": 0.1730431285704055, "grad_norm": 0.8149581551551819, "learning_rate": 2.8419031994245703e-05, "loss": 0.0667, "step": 7853 }, { "epoch": 0.17306516385992166, "grad_norm": 0.7794713973999023, "learning_rate": 2.8418553576912898e-05, "loss": 0.0984, "step": 7854 }, { "epoch": 0.17308719914943782, "grad_norm": 1.091882586479187, "learning_rate": 2.84180750912322e-05, "loss": 0.0944, "step": 7855 }, { "epoch": 0.17310923443895398, "grad_norm": 1.7499901056289673, "learning_rate": 2.841759653720605e-05, "loss": 0.1531, "step": 7856 }, { "epoch": 0.17313126972847015, "grad_norm": 0.6822894811630249, "learning_rate": 2.8417117914836887e-05, "loss": 0.1167, "step": 7857 }, { "epoch": 0.17315330501798631, "grad_norm": 1.045979380607605, "learning_rate": 2.8416639224127146e-05, "loss": 0.1124, "step": 7858 }, { "epoch": 0.17317534030750248, "grad_norm": 0.8148297667503357, "learning_rate": 2.841616046507927e-05, "loss": 0.1429, "step": 7859 }, { "epoch": 0.17319737559701862, "grad_norm": 0.8273358941078186, "learning_rate": 2.841568163769569e-05, "loss": 0.1555, "step": 7860 }, { "epoch": 0.17321941088653478, "grad_norm": 1.1355257034301758, "learning_rate": 2.8415202741978853e-05, "loss": 0.1254, "step": 7861 }, { "epoch": 0.17324144617605094, "grad_norm": 0.9826634526252747, "learning_rate": 2.8414723777931193e-05, "loss": 0.0944, "step": 7862 }, { "epoch": 0.1732634814655671, "grad_norm": 1.0829393863677979, "learning_rate": 2.841424474555515e-05, "loss": 0.1234, "step": 7863 }, { "epoch": 0.17328551675508327, "grad_norm": 1.0069376230239868, "learning_rate": 2.841376564485317e-05, "loss": 0.1276, "step": 7864 }, { "epoch": 0.17330755204459944, "grad_norm": 0.6530994176864624, "learning_rate": 2.8413286475827686e-05, "loss": 0.0919, "step": 7865 }, { "epoch": 0.17332958733411558, "grad_norm": 1.328481912612915, "learning_rate": 2.8412807238481143e-05, "loss": 0.0905, "step": 7866 }, { "epoch": 0.17335162262363174, "grad_norm": 1.0022767782211304, "learning_rate": 2.8412327932815975e-05, "loss": 0.125, "step": 7867 }, { "epoch": 0.1733736579131479, "grad_norm": 1.0332731008529663, "learning_rate": 2.8411848558834635e-05, "loss": 0.138, "step": 7868 }, { "epoch": 0.17339569320266407, "grad_norm": 1.1049247980117798, "learning_rate": 2.841136911653955e-05, "loss": 0.1223, "step": 7869 }, { "epoch": 0.17341772849218023, "grad_norm": 0.9727563858032227, "learning_rate": 2.8410889605933177e-05, "loss": 0.1153, "step": 7870 }, { "epoch": 0.1734397637816964, "grad_norm": 1.426585078239441, "learning_rate": 2.8410410027017948e-05, "loss": 0.1106, "step": 7871 }, { "epoch": 0.17346179907121254, "grad_norm": 0.6575146317481995, "learning_rate": 2.8409930379796315e-05, "loss": 0.0764, "step": 7872 }, { "epoch": 0.1734838343607287, "grad_norm": 0.6397500038146973, "learning_rate": 2.840945066427071e-05, "loss": 0.1169, "step": 7873 }, { "epoch": 0.17350586965024486, "grad_norm": 0.7899208664894104, "learning_rate": 2.8408970880443582e-05, "loss": 0.1139, "step": 7874 }, { "epoch": 0.17352790493976103, "grad_norm": 1.578128457069397, "learning_rate": 2.8408491028317378e-05, "loss": 0.1372, "step": 7875 }, { "epoch": 0.1735499402292772, "grad_norm": 1.1281331777572632, "learning_rate": 2.840801110789454e-05, "loss": 0.0804, "step": 7876 }, { "epoch": 0.17357197551879336, "grad_norm": 0.8054883480072021, "learning_rate": 2.840753111917751e-05, "loss": 0.1141, "step": 7877 }, { "epoch": 0.1735940108083095, "grad_norm": 0.8420571684837341, "learning_rate": 2.8407051062168732e-05, "loss": 0.1175, "step": 7878 }, { "epoch": 0.17361604609782566, "grad_norm": 0.764415442943573, "learning_rate": 2.840657093687066e-05, "loss": 0.108, "step": 7879 }, { "epoch": 0.17363808138734182, "grad_norm": 1.1862205266952515, "learning_rate": 2.8406090743285724e-05, "loss": 0.0952, "step": 7880 }, { "epoch": 0.173660116676858, "grad_norm": 1.3906352519989014, "learning_rate": 2.8405610481416382e-05, "loss": 0.0983, "step": 7881 }, { "epoch": 0.17368215196637415, "grad_norm": 0.6839585304260254, "learning_rate": 2.840513015126508e-05, "loss": 0.1035, "step": 7882 }, { "epoch": 0.17370418725589032, "grad_norm": 0.8345476388931274, "learning_rate": 2.840464975283426e-05, "loss": 0.0849, "step": 7883 }, { "epoch": 0.17372622254540646, "grad_norm": 0.9193453192710876, "learning_rate": 2.8404169286126374e-05, "loss": 0.1057, "step": 7884 }, { "epoch": 0.17374825783492262, "grad_norm": 1.1139072179794312, "learning_rate": 2.8403688751143862e-05, "loss": 0.118, "step": 7885 }, { "epoch": 0.17377029312443878, "grad_norm": 1.074684977531433, "learning_rate": 2.8403208147889176e-05, "loss": 0.1016, "step": 7886 }, { "epoch": 0.17379232841395495, "grad_norm": 0.6680117845535278, "learning_rate": 2.8402727476364766e-05, "loss": 0.1003, "step": 7887 }, { "epoch": 0.17381436370347111, "grad_norm": 0.6582584381103516, "learning_rate": 2.840224673657307e-05, "loss": 0.1344, "step": 7888 }, { "epoch": 0.17383639899298728, "grad_norm": 0.9851492047309875, "learning_rate": 2.8401765928516555e-05, "loss": 0.1273, "step": 7889 }, { "epoch": 0.17385843428250342, "grad_norm": 0.9639062881469727, "learning_rate": 2.840128505219765e-05, "loss": 0.1414, "step": 7890 }, { "epoch": 0.17388046957201958, "grad_norm": 0.7834540605545044, "learning_rate": 2.840080410761882e-05, "loss": 0.1055, "step": 7891 }, { "epoch": 0.17390250486153574, "grad_norm": 0.9936104416847229, "learning_rate": 2.840032309478251e-05, "loss": 0.0927, "step": 7892 }, { "epoch": 0.1739245401510519, "grad_norm": 1.1415016651153564, "learning_rate": 2.8399842013691163e-05, "loss": 0.1266, "step": 7893 }, { "epoch": 0.17394657544056807, "grad_norm": 1.6378471851348877, "learning_rate": 2.839936086434724e-05, "loss": 0.1017, "step": 7894 }, { "epoch": 0.17396861073008424, "grad_norm": 0.8544865250587463, "learning_rate": 2.8398879646753182e-05, "loss": 0.1028, "step": 7895 }, { "epoch": 0.1739906460196004, "grad_norm": 1.1406382322311401, "learning_rate": 2.839839836091145e-05, "loss": 0.0898, "step": 7896 }, { "epoch": 0.17401268130911654, "grad_norm": 0.6490355730056763, "learning_rate": 2.8397917006824485e-05, "loss": 0.1009, "step": 7897 }, { "epoch": 0.1740347165986327, "grad_norm": 0.8737879991531372, "learning_rate": 2.8397435584494752e-05, "loss": 0.1123, "step": 7898 }, { "epoch": 0.17405675188814887, "grad_norm": 1.1786385774612427, "learning_rate": 2.839695409392469e-05, "loss": 0.129, "step": 7899 }, { "epoch": 0.17407878717766503, "grad_norm": 1.553914189338684, "learning_rate": 2.8396472535116752e-05, "loss": 0.1235, "step": 7900 }, { "epoch": 0.1741008224671812, "grad_norm": 2.159771203994751, "learning_rate": 2.8395990908073407e-05, "loss": 0.0939, "step": 7901 }, { "epoch": 0.17412285775669736, "grad_norm": 0.9545218348503113, "learning_rate": 2.8395509212797086e-05, "loss": 0.0983, "step": 7902 }, { "epoch": 0.1741448930462135, "grad_norm": 0.9358788728713989, "learning_rate": 2.839502744929026e-05, "loss": 0.1345, "step": 7903 }, { "epoch": 0.17416692833572966, "grad_norm": 1.1288833618164062, "learning_rate": 2.8394545617555374e-05, "loss": 0.1658, "step": 7904 }, { "epoch": 0.17418896362524583, "grad_norm": 1.137123703956604, "learning_rate": 2.8394063717594883e-05, "loss": 0.0962, "step": 7905 }, { "epoch": 0.174210998914762, "grad_norm": 1.7428202629089355, "learning_rate": 2.8393581749411248e-05, "loss": 0.1671, "step": 7906 }, { "epoch": 0.17423303420427816, "grad_norm": 1.0523408651351929, "learning_rate": 2.8393099713006915e-05, "loss": 0.1277, "step": 7907 }, { "epoch": 0.17425506949379432, "grad_norm": 1.1733828783035278, "learning_rate": 2.839261760838434e-05, "loss": 0.1219, "step": 7908 }, { "epoch": 0.17427710478331046, "grad_norm": 1.0391112565994263, "learning_rate": 2.839213543554599e-05, "loss": 0.1072, "step": 7909 }, { "epoch": 0.17429914007282662, "grad_norm": 1.032392144203186, "learning_rate": 2.839165319449431e-05, "loss": 0.117, "step": 7910 }, { "epoch": 0.1743211753623428, "grad_norm": 0.9347168803215027, "learning_rate": 2.8391170885231756e-05, "loss": 0.1527, "step": 7911 }, { "epoch": 0.17434321065185895, "grad_norm": 0.8261913657188416, "learning_rate": 2.8390688507760788e-05, "loss": 0.0925, "step": 7912 }, { "epoch": 0.17436524594137512, "grad_norm": 0.899602472782135, "learning_rate": 2.8390206062083864e-05, "loss": 0.1821, "step": 7913 }, { "epoch": 0.17438728123089128, "grad_norm": 1.177286148071289, "learning_rate": 2.838972354820344e-05, "loss": 0.1053, "step": 7914 }, { "epoch": 0.17440931652040742, "grad_norm": 1.3825417757034302, "learning_rate": 2.8389240966121973e-05, "loss": 0.1523, "step": 7915 }, { "epoch": 0.17443135180992358, "grad_norm": 1.1391609907150269, "learning_rate": 2.8388758315841927e-05, "loss": 0.1326, "step": 7916 }, { "epoch": 0.17445338709943975, "grad_norm": 1.0294570922851562, "learning_rate": 2.8388275597365746e-05, "loss": 0.158, "step": 7917 }, { "epoch": 0.17447542238895591, "grad_norm": 0.7821930050849915, "learning_rate": 2.8387792810695906e-05, "loss": 0.1299, "step": 7918 }, { "epoch": 0.17449745767847208, "grad_norm": 1.298586130142212, "learning_rate": 2.8387309955834858e-05, "loss": 0.122, "step": 7919 }, { "epoch": 0.17451949296798824, "grad_norm": 0.9638880491256714, "learning_rate": 2.8386827032785055e-05, "loss": 0.1467, "step": 7920 }, { "epoch": 0.17454152825750438, "grad_norm": 1.029123067855835, "learning_rate": 2.838634404154897e-05, "loss": 0.0937, "step": 7921 }, { "epoch": 0.17456356354702054, "grad_norm": 0.7953442931175232, "learning_rate": 2.838586098212905e-05, "loss": 0.1106, "step": 7922 }, { "epoch": 0.1745855988365367, "grad_norm": 0.6248211860656738, "learning_rate": 2.8385377854527767e-05, "loss": 0.0972, "step": 7923 }, { "epoch": 0.17460763412605287, "grad_norm": 1.1062880754470825, "learning_rate": 2.838489465874758e-05, "loss": 0.0944, "step": 7924 }, { "epoch": 0.17462966941556904, "grad_norm": 1.1099061965942383, "learning_rate": 2.838441139479094e-05, "loss": 0.1177, "step": 7925 }, { "epoch": 0.1746517047050852, "grad_norm": 1.4486629962921143, "learning_rate": 2.8383928062660317e-05, "loss": 0.1851, "step": 7926 }, { "epoch": 0.17467373999460134, "grad_norm": 0.7871185541152954, "learning_rate": 2.8383444662358173e-05, "loss": 0.0872, "step": 7927 }, { "epoch": 0.1746957752841175, "grad_norm": 1.2611278295516968, "learning_rate": 2.838296119388697e-05, "loss": 0.0877, "step": 7928 }, { "epoch": 0.17471781057363367, "grad_norm": 0.6777531504631042, "learning_rate": 2.8382477657249166e-05, "loss": 0.111, "step": 7929 }, { "epoch": 0.17473984586314983, "grad_norm": 0.7841746211051941, "learning_rate": 2.8381994052447227e-05, "loss": 0.1346, "step": 7930 }, { "epoch": 0.174761881152666, "grad_norm": 0.9092212915420532, "learning_rate": 2.8381510379483615e-05, "loss": 0.108, "step": 7931 }, { "epoch": 0.17478391644218216, "grad_norm": 1.3814723491668701, "learning_rate": 2.8381026638360796e-05, "loss": 0.1335, "step": 7932 }, { "epoch": 0.17480595173169833, "grad_norm": 0.9218538403511047, "learning_rate": 2.8380542829081238e-05, "loss": 0.0984, "step": 7933 }, { "epoch": 0.17482798702121446, "grad_norm": 1.0564523935317993, "learning_rate": 2.8380058951647396e-05, "loss": 0.1339, "step": 7934 }, { "epoch": 0.17485002231073063, "grad_norm": 1.0714755058288574, "learning_rate": 2.8379575006061737e-05, "loss": 0.1177, "step": 7935 }, { "epoch": 0.1748720576002468, "grad_norm": 1.5766844749450684, "learning_rate": 2.837909099232673e-05, "loss": 0.1542, "step": 7936 }, { "epoch": 0.17489409288976296, "grad_norm": 0.7981528043746948, "learning_rate": 2.837860691044484e-05, "loss": 0.1121, "step": 7937 }, { "epoch": 0.17491612817927912, "grad_norm": 1.2782968282699585, "learning_rate": 2.8378122760418528e-05, "loss": 0.1027, "step": 7938 }, { "epoch": 0.1749381634687953, "grad_norm": 0.7647474408149719, "learning_rate": 2.837763854225026e-05, "loss": 0.0873, "step": 7939 }, { "epoch": 0.17496019875831142, "grad_norm": 2.2678680419921875, "learning_rate": 2.8377154255942512e-05, "loss": 0.1379, "step": 7940 }, { "epoch": 0.1749822340478276, "grad_norm": 1.3540163040161133, "learning_rate": 2.837666990149774e-05, "loss": 0.143, "step": 7941 }, { "epoch": 0.17500426933734375, "grad_norm": 0.7850008606910706, "learning_rate": 2.837618547891842e-05, "loss": 0.1388, "step": 7942 }, { "epoch": 0.17502630462685992, "grad_norm": 0.7532703876495361, "learning_rate": 2.8375700988207007e-05, "loss": 0.1204, "step": 7943 }, { "epoch": 0.17504833991637608, "grad_norm": 0.6544438004493713, "learning_rate": 2.8375216429365978e-05, "loss": 0.104, "step": 7944 }, { "epoch": 0.17507037520589225, "grad_norm": 0.9216213226318359, "learning_rate": 2.83747318023978e-05, "loss": 0.1099, "step": 7945 }, { "epoch": 0.17509241049540838, "grad_norm": 0.9813146591186523, "learning_rate": 2.8374247107304945e-05, "loss": 0.0824, "step": 7946 }, { "epoch": 0.17511444578492455, "grad_norm": 0.9335001707077026, "learning_rate": 2.8373762344089874e-05, "loss": 0.1069, "step": 7947 }, { "epoch": 0.17513648107444071, "grad_norm": 0.7578890919685364, "learning_rate": 2.837327751275506e-05, "loss": 0.1044, "step": 7948 }, { "epoch": 0.17515851636395688, "grad_norm": 1.0325878858566284, "learning_rate": 2.8372792613302973e-05, "loss": 0.1274, "step": 7949 }, { "epoch": 0.17518055165347304, "grad_norm": 0.8407610654830933, "learning_rate": 2.837230764573608e-05, "loss": 0.1582, "step": 7950 }, { "epoch": 0.1752025869429892, "grad_norm": 0.9862577319145203, "learning_rate": 2.8371822610056854e-05, "loss": 0.1101, "step": 7951 }, { "epoch": 0.17522462223250534, "grad_norm": 1.4031243324279785, "learning_rate": 2.8371337506267766e-05, "loss": 0.0948, "step": 7952 }, { "epoch": 0.1752466575220215, "grad_norm": 0.7558484673500061, "learning_rate": 2.837085233437129e-05, "loss": 0.1085, "step": 7953 }, { "epoch": 0.17526869281153767, "grad_norm": 0.6084149479866028, "learning_rate": 2.837036709436989e-05, "loss": 0.128, "step": 7954 }, { "epoch": 0.17529072810105384, "grad_norm": 1.4855595827102661, "learning_rate": 2.8369881786266037e-05, "loss": 0.101, "step": 7955 }, { "epoch": 0.17531276339057, "grad_norm": 1.0000841617584229, "learning_rate": 2.8369396410062208e-05, "loss": 0.0821, "step": 7956 }, { "epoch": 0.17533479868008617, "grad_norm": 0.7817990183830261, "learning_rate": 2.8368910965760874e-05, "loss": 0.1009, "step": 7957 }, { "epoch": 0.1753568339696023, "grad_norm": 0.9654856324195862, "learning_rate": 2.8368425453364508e-05, "loss": 0.1299, "step": 7958 }, { "epoch": 0.17537886925911847, "grad_norm": 1.2158677577972412, "learning_rate": 2.836793987287558e-05, "loss": 0.0976, "step": 7959 }, { "epoch": 0.17540090454863463, "grad_norm": 1.1749670505523682, "learning_rate": 2.8367454224296574e-05, "loss": 0.1283, "step": 7960 }, { "epoch": 0.1754229398381508, "grad_norm": 0.7661373019218445, "learning_rate": 2.836696850762995e-05, "loss": 0.1115, "step": 7961 }, { "epoch": 0.17544497512766696, "grad_norm": 1.0965213775634766, "learning_rate": 2.8366482722878183e-05, "loss": 0.128, "step": 7962 }, { "epoch": 0.17546701041718313, "grad_norm": 1.411818027496338, "learning_rate": 2.836599687004376e-05, "loss": 0.1552, "step": 7963 }, { "epoch": 0.1754890457066993, "grad_norm": 1.0872831344604492, "learning_rate": 2.836551094912914e-05, "loss": 0.1407, "step": 7964 }, { "epoch": 0.17551108099621543, "grad_norm": 1.2534019947052002, "learning_rate": 2.8365024960136812e-05, "loss": 0.1503, "step": 7965 }, { "epoch": 0.1755331162857316, "grad_norm": 0.7025652527809143, "learning_rate": 2.8364538903069245e-05, "loss": 0.1417, "step": 7966 }, { "epoch": 0.17555515157524776, "grad_norm": 0.9902952909469604, "learning_rate": 2.836405277792891e-05, "loss": 0.1492, "step": 7967 }, { "epoch": 0.17557718686476392, "grad_norm": 1.2637563943862915, "learning_rate": 2.836356658471829e-05, "loss": 0.1291, "step": 7968 }, { "epoch": 0.1755992221542801, "grad_norm": 0.8154842257499695, "learning_rate": 2.836308032343986e-05, "loss": 0.1191, "step": 7969 }, { "epoch": 0.17562125744379625, "grad_norm": 0.8042294383049011, "learning_rate": 2.8362593994096094e-05, "loss": 0.1458, "step": 7970 }, { "epoch": 0.1756432927333124, "grad_norm": 1.0810192823410034, "learning_rate": 2.8362107596689473e-05, "loss": 0.1198, "step": 7971 }, { "epoch": 0.17566532802282855, "grad_norm": 0.5744432210922241, "learning_rate": 2.836162113122247e-05, "loss": 0.1048, "step": 7972 }, { "epoch": 0.17568736331234472, "grad_norm": 0.9441022872924805, "learning_rate": 2.836113459769757e-05, "loss": 0.1088, "step": 7973 }, { "epoch": 0.17570939860186088, "grad_norm": 0.7346789836883545, "learning_rate": 2.8360647996117247e-05, "loss": 0.0923, "step": 7974 }, { "epoch": 0.17573143389137705, "grad_norm": 1.164918065071106, "learning_rate": 2.8360161326483974e-05, "loss": 0.1179, "step": 7975 }, { "epoch": 0.1757534691808932, "grad_norm": 1.0001115798950195, "learning_rate": 2.8359674588800236e-05, "loss": 0.1636, "step": 7976 }, { "epoch": 0.17577550447040935, "grad_norm": 0.9898844957351685, "learning_rate": 2.8359187783068515e-05, "loss": 0.0924, "step": 7977 }, { "epoch": 0.17579753975992551, "grad_norm": 0.7569364905357361, "learning_rate": 2.8358700909291282e-05, "loss": 0.1521, "step": 7978 }, { "epoch": 0.17581957504944168, "grad_norm": 1.020242691040039, "learning_rate": 2.8358213967471025e-05, "loss": 0.1673, "step": 7979 }, { "epoch": 0.17584161033895784, "grad_norm": 0.9172633290290833, "learning_rate": 2.835772695761022e-05, "loss": 0.082, "step": 7980 }, { "epoch": 0.175863645628474, "grad_norm": 0.7165647149085999, "learning_rate": 2.835723987971135e-05, "loss": 0.1158, "step": 7981 }, { "epoch": 0.17588568091799017, "grad_norm": 0.6248421669006348, "learning_rate": 2.835675273377689e-05, "loss": 0.0914, "step": 7982 }, { "epoch": 0.1759077162075063, "grad_norm": 1.0828242301940918, "learning_rate": 2.8356265519809333e-05, "loss": 0.1218, "step": 7983 }, { "epoch": 0.17592975149702247, "grad_norm": 0.9898620247840881, "learning_rate": 2.835577823781115e-05, "loss": 0.1135, "step": 7984 }, { "epoch": 0.17595178678653864, "grad_norm": 1.2417685985565186, "learning_rate": 2.835529088778483e-05, "loss": 0.1577, "step": 7985 }, { "epoch": 0.1759738220760548, "grad_norm": 1.3380228281021118, "learning_rate": 2.8354803469732844e-05, "loss": 0.097, "step": 7986 }, { "epoch": 0.17599585736557097, "grad_norm": 1.737255334854126, "learning_rate": 2.835431598365769e-05, "loss": 0.1388, "step": 7987 }, { "epoch": 0.17601789265508713, "grad_norm": 0.8054887652397156, "learning_rate": 2.835382842956184e-05, "loss": 0.0926, "step": 7988 }, { "epoch": 0.17603992794460327, "grad_norm": 0.9120156764984131, "learning_rate": 2.835334080744778e-05, "loss": 0.1157, "step": 7989 }, { "epoch": 0.17606196323411943, "grad_norm": 1.3992904424667358, "learning_rate": 2.8352853117318e-05, "loss": 0.0969, "step": 7990 }, { "epoch": 0.1760839985236356, "grad_norm": 1.180298089981079, "learning_rate": 2.835236535917497e-05, "loss": 0.1396, "step": 7991 }, { "epoch": 0.17610603381315176, "grad_norm": 1.063805341720581, "learning_rate": 2.835187753302119e-05, "loss": 0.0836, "step": 7992 }, { "epoch": 0.17612806910266793, "grad_norm": 1.1191554069519043, "learning_rate": 2.8351389638859135e-05, "loss": 0.126, "step": 7993 }, { "epoch": 0.1761501043921841, "grad_norm": 1.1393879652023315, "learning_rate": 2.8350901676691294e-05, "loss": 0.1231, "step": 7994 }, { "epoch": 0.17617213968170023, "grad_norm": 0.7953516840934753, "learning_rate": 2.835041364652015e-05, "loss": 0.1288, "step": 7995 }, { "epoch": 0.1761941749712164, "grad_norm": 1.737613320350647, "learning_rate": 2.834992554834819e-05, "loss": 0.1311, "step": 7996 }, { "epoch": 0.17621621026073256, "grad_norm": 0.9887511730194092, "learning_rate": 2.8349437382177906e-05, "loss": 0.1068, "step": 7997 }, { "epoch": 0.17623824555024872, "grad_norm": 0.9502782821655273, "learning_rate": 2.8348949148011773e-05, "loss": 0.1142, "step": 7998 }, { "epoch": 0.1762602808397649, "grad_norm": 0.8374917507171631, "learning_rate": 2.8348460845852287e-05, "loss": 0.1157, "step": 7999 }, { "epoch": 0.17628231612928105, "grad_norm": 0.7526827454566956, "learning_rate": 2.834797247570193e-05, "loss": 0.0696, "step": 8000 }, { "epoch": 0.17630435141879722, "grad_norm": 1.2671897411346436, "learning_rate": 2.834748403756319e-05, "loss": 0.1629, "step": 8001 }, { "epoch": 0.17632638670831335, "grad_norm": 0.9428536891937256, "learning_rate": 2.834699553143856e-05, "loss": 0.117, "step": 8002 }, { "epoch": 0.17634842199782952, "grad_norm": 0.8050314784049988, "learning_rate": 2.8346506957330522e-05, "loss": 0.1218, "step": 8003 }, { "epoch": 0.17637045728734568, "grad_norm": 0.9367786049842834, "learning_rate": 2.8346018315241565e-05, "loss": 0.0969, "step": 8004 }, { "epoch": 0.17639249257686185, "grad_norm": 0.7864695191383362, "learning_rate": 2.8345529605174186e-05, "loss": 0.0917, "step": 8005 }, { "epoch": 0.176414527866378, "grad_norm": 1.0088517665863037, "learning_rate": 2.834504082713086e-05, "loss": 0.0879, "step": 8006 }, { "epoch": 0.17643656315589418, "grad_norm": 1.0758980512619019, "learning_rate": 2.8344551981114094e-05, "loss": 0.1476, "step": 8007 }, { "epoch": 0.17645859844541031, "grad_norm": 0.8804036378860474, "learning_rate": 2.8344063067126366e-05, "loss": 0.0891, "step": 8008 }, { "epoch": 0.17648063373492648, "grad_norm": 0.9942534565925598, "learning_rate": 2.8343574085170165e-05, "loss": 0.1166, "step": 8009 }, { "epoch": 0.17650266902444264, "grad_norm": 1.073250651359558, "learning_rate": 2.834308503524799e-05, "loss": 0.1293, "step": 8010 }, { "epoch": 0.1765247043139588, "grad_norm": 1.3711906671524048, "learning_rate": 2.8342595917362325e-05, "loss": 0.128, "step": 8011 }, { "epoch": 0.17654673960347497, "grad_norm": 1.1091324090957642, "learning_rate": 2.8342106731515665e-05, "loss": 0.0915, "step": 8012 }, { "epoch": 0.17656877489299114, "grad_norm": 0.714914858341217, "learning_rate": 2.8341617477710502e-05, "loss": 0.1074, "step": 8013 }, { "epoch": 0.17659081018250727, "grad_norm": 1.219940423965454, "learning_rate": 2.8341128155949325e-05, "loss": 0.1257, "step": 8014 }, { "epoch": 0.17661284547202344, "grad_norm": 1.0554248094558716, "learning_rate": 2.834063876623463e-05, "loss": 0.1299, "step": 8015 }, { "epoch": 0.1766348807615396, "grad_norm": 0.7124664783477783, "learning_rate": 2.834014930856891e-05, "loss": 0.0989, "step": 8016 }, { "epoch": 0.17665691605105577, "grad_norm": 0.8569059371948242, "learning_rate": 2.833965978295465e-05, "loss": 0.1046, "step": 8017 }, { "epoch": 0.17667895134057193, "grad_norm": 0.9074584245681763, "learning_rate": 2.8339170189394354e-05, "loss": 0.1343, "step": 8018 }, { "epoch": 0.1767009866300881, "grad_norm": 0.9199174642562866, "learning_rate": 2.833868052789051e-05, "loss": 0.1316, "step": 8019 }, { "epoch": 0.17672302191960423, "grad_norm": 1.222174882888794, "learning_rate": 2.8338190798445615e-05, "loss": 0.087, "step": 8020 }, { "epoch": 0.1767450572091204, "grad_norm": 1.2421551942825317, "learning_rate": 2.833770100106216e-05, "loss": 0.09, "step": 8021 }, { "epoch": 0.17676709249863656, "grad_norm": 0.9188095927238464, "learning_rate": 2.833721113574264e-05, "loss": 0.0882, "step": 8022 }, { "epoch": 0.17678912778815273, "grad_norm": 0.7604076862335205, "learning_rate": 2.8336721202489552e-05, "loss": 0.1015, "step": 8023 }, { "epoch": 0.1768111630776689, "grad_norm": 0.8489280343055725, "learning_rate": 2.8336231201305396e-05, "loss": 0.1118, "step": 8024 }, { "epoch": 0.17683319836718506, "grad_norm": 2.67594838142395, "learning_rate": 2.8335741132192657e-05, "loss": 0.1154, "step": 8025 }, { "epoch": 0.1768552336567012, "grad_norm": 1.398889183998108, "learning_rate": 2.833525099515384e-05, "loss": 0.1396, "step": 8026 }, { "epoch": 0.17687726894621736, "grad_norm": 1.2417194843292236, "learning_rate": 2.833476079019144e-05, "loss": 0.1298, "step": 8027 }, { "epoch": 0.17689930423573352, "grad_norm": 1.260744333267212, "learning_rate": 2.8334270517307953e-05, "loss": 0.1218, "step": 8028 }, { "epoch": 0.1769213395252497, "grad_norm": 0.7428362965583801, "learning_rate": 2.833378017650587e-05, "loss": 0.1374, "step": 8029 }, { "epoch": 0.17694337481476585, "grad_norm": 0.6504249572753906, "learning_rate": 2.83332897677877e-05, "loss": 0.1079, "step": 8030 }, { "epoch": 0.17696541010428202, "grad_norm": 0.859271228313446, "learning_rate": 2.8332799291155932e-05, "loss": 0.0949, "step": 8031 }, { "epoch": 0.17698744539379815, "grad_norm": 0.9781113266944885, "learning_rate": 2.833230874661307e-05, "loss": 0.1242, "step": 8032 }, { "epoch": 0.17700948068331432, "grad_norm": 0.9217545986175537, "learning_rate": 2.8331818134161607e-05, "loss": 0.128, "step": 8033 }, { "epoch": 0.17703151597283048, "grad_norm": 0.8313372135162354, "learning_rate": 2.833132745380405e-05, "loss": 0.1295, "step": 8034 }, { "epoch": 0.17705355126234665, "grad_norm": 1.1521767377853394, "learning_rate": 2.8330836705542895e-05, "loss": 0.118, "step": 8035 }, { "epoch": 0.1770755865518628, "grad_norm": 1.1957440376281738, "learning_rate": 2.8330345889380636e-05, "loss": 0.138, "step": 8036 }, { "epoch": 0.17709762184137898, "grad_norm": 0.5165948867797852, "learning_rate": 2.8329855005319775e-05, "loss": 0.1058, "step": 8037 }, { "epoch": 0.17711965713089514, "grad_norm": 1.1799726486206055, "learning_rate": 2.8329364053362817e-05, "loss": 0.1378, "step": 8038 }, { "epoch": 0.17714169242041128, "grad_norm": 0.8636168241500854, "learning_rate": 2.832887303351226e-05, "loss": 0.1297, "step": 8039 }, { "epoch": 0.17716372770992744, "grad_norm": 1.224596381187439, "learning_rate": 2.83283819457706e-05, "loss": 0.1091, "step": 8040 }, { "epoch": 0.1771857629994436, "grad_norm": 0.948966383934021, "learning_rate": 2.8327890790140356e-05, "loss": 0.1187, "step": 8041 }, { "epoch": 0.17720779828895977, "grad_norm": 0.9135875701904297, "learning_rate": 2.8327399566624004e-05, "loss": 0.1249, "step": 8042 }, { "epoch": 0.17722983357847594, "grad_norm": 1.0094975233078003, "learning_rate": 2.832690827522407e-05, "loss": 0.1159, "step": 8043 }, { "epoch": 0.1772518688679921, "grad_norm": 0.799690306186676, "learning_rate": 2.8326416915943042e-05, "loss": 0.0875, "step": 8044 }, { "epoch": 0.17727390415750824, "grad_norm": 1.2259783744812012, "learning_rate": 2.8325925488783423e-05, "loss": 0.1416, "step": 8045 }, { "epoch": 0.1772959394470244, "grad_norm": 1.0586280822753906, "learning_rate": 2.8325433993747722e-05, "loss": 0.126, "step": 8046 }, { "epoch": 0.17731797473654057, "grad_norm": 0.9340918660163879, "learning_rate": 2.8324942430838442e-05, "loss": 0.1178, "step": 8047 }, { "epoch": 0.17734001002605673, "grad_norm": 0.8461371660232544, "learning_rate": 2.8324450800058082e-05, "loss": 0.1335, "step": 8048 }, { "epoch": 0.1773620453155729, "grad_norm": 0.9249021410942078, "learning_rate": 2.8323959101409146e-05, "loss": 0.1244, "step": 8049 }, { "epoch": 0.17738408060508906, "grad_norm": 0.6824926733970642, "learning_rate": 2.8323467334894144e-05, "loss": 0.0779, "step": 8050 }, { "epoch": 0.1774061158946052, "grad_norm": 1.0126023292541504, "learning_rate": 2.832297550051558e-05, "loss": 0.1686, "step": 8051 }, { "epoch": 0.17742815118412136, "grad_norm": 0.9469605684280396, "learning_rate": 2.8322483598275958e-05, "loss": 0.1091, "step": 8052 }, { "epoch": 0.17745018647363753, "grad_norm": 1.0529574155807495, "learning_rate": 2.8321991628177784e-05, "loss": 0.1656, "step": 8053 }, { "epoch": 0.1774722217631537, "grad_norm": 0.7189494371414185, "learning_rate": 2.8321499590223556e-05, "loss": 0.0786, "step": 8054 }, { "epoch": 0.17749425705266986, "grad_norm": 1.7254530191421509, "learning_rate": 2.8321007484415792e-05, "loss": 0.1363, "step": 8055 }, { "epoch": 0.17751629234218602, "grad_norm": 0.814119815826416, "learning_rate": 2.8320515310756993e-05, "loss": 0.0873, "step": 8056 }, { "epoch": 0.17753832763170216, "grad_norm": 1.1308571100234985, "learning_rate": 2.832002306924967e-05, "loss": 0.1194, "step": 8057 }, { "epoch": 0.17756036292121832, "grad_norm": 0.8189984560012817, "learning_rate": 2.831953075989632e-05, "loss": 0.1189, "step": 8058 }, { "epoch": 0.1775823982107345, "grad_norm": 0.860804557800293, "learning_rate": 2.8319038382699462e-05, "loss": 0.0996, "step": 8059 }, { "epoch": 0.17760443350025065, "grad_norm": 0.7755157947540283, "learning_rate": 2.8318545937661596e-05, "loss": 0.1095, "step": 8060 }, { "epoch": 0.17762646878976682, "grad_norm": 1.1963458061218262, "learning_rate": 2.8318053424785235e-05, "loss": 0.1083, "step": 8061 }, { "epoch": 0.17764850407928298, "grad_norm": 0.9337216019630432, "learning_rate": 2.831756084407289e-05, "loss": 0.0849, "step": 8062 }, { "epoch": 0.17767053936879912, "grad_norm": 0.9637287855148315, "learning_rate": 2.8317068195527062e-05, "loss": 0.1252, "step": 8063 }, { "epoch": 0.17769257465831528, "grad_norm": 0.8375436067581177, "learning_rate": 2.8316575479150266e-05, "loss": 0.1134, "step": 8064 }, { "epoch": 0.17771460994783145, "grad_norm": 1.2335807085037231, "learning_rate": 2.831608269494501e-05, "loss": 0.1017, "step": 8065 }, { "epoch": 0.1777366452373476, "grad_norm": 1.1479074954986572, "learning_rate": 2.8315589842913803e-05, "loss": 0.1134, "step": 8066 }, { "epoch": 0.17775868052686378, "grad_norm": 0.8424142599105835, "learning_rate": 2.8315096923059154e-05, "loss": 0.1281, "step": 8067 }, { "epoch": 0.17778071581637994, "grad_norm": 1.225188970565796, "learning_rate": 2.8314603935383583e-05, "loss": 0.1261, "step": 8068 }, { "epoch": 0.1778027511058961, "grad_norm": 0.9478654861450195, "learning_rate": 2.831411087988959e-05, "loss": 0.1173, "step": 8069 }, { "epoch": 0.17782478639541224, "grad_norm": 1.1857510805130005, "learning_rate": 2.831361775657969e-05, "loss": 0.1118, "step": 8070 }, { "epoch": 0.1778468216849284, "grad_norm": 1.2037297487258911, "learning_rate": 2.8313124565456398e-05, "loss": 0.127, "step": 8071 }, { "epoch": 0.17786885697444457, "grad_norm": 0.8526139855384827, "learning_rate": 2.831263130652222e-05, "loss": 0.1285, "step": 8072 }, { "epoch": 0.17789089226396074, "grad_norm": 0.6650679707527161, "learning_rate": 2.8312137979779674e-05, "loss": 0.13, "step": 8073 }, { "epoch": 0.1779129275534769, "grad_norm": 1.0326330661773682, "learning_rate": 2.831164458523127e-05, "loss": 0.1438, "step": 8074 }, { "epoch": 0.17793496284299307, "grad_norm": 1.030229926109314, "learning_rate": 2.8311151122879527e-05, "loss": 0.0811, "step": 8075 }, { "epoch": 0.1779569981325092, "grad_norm": 0.623976469039917, "learning_rate": 2.831065759272695e-05, "loss": 0.0538, "step": 8076 }, { "epoch": 0.17797903342202537, "grad_norm": 1.1767128705978394, "learning_rate": 2.8310163994776053e-05, "loss": 0.1822, "step": 8077 }, { "epoch": 0.17800106871154153, "grad_norm": 0.9464853405952454, "learning_rate": 2.8309670329029358e-05, "loss": 0.1215, "step": 8078 }, { "epoch": 0.1780231040010577, "grad_norm": 0.7801374793052673, "learning_rate": 2.830917659548937e-05, "loss": 0.1072, "step": 8079 }, { "epoch": 0.17804513929057386, "grad_norm": 0.7094269394874573, "learning_rate": 2.8308682794158613e-05, "loss": 0.1004, "step": 8080 }, { "epoch": 0.17806717458009003, "grad_norm": 0.6460886001586914, "learning_rate": 2.8308188925039598e-05, "loss": 0.1205, "step": 8081 }, { "epoch": 0.17808920986960616, "grad_norm": 0.8830350637435913, "learning_rate": 2.8307694988134837e-05, "loss": 0.0948, "step": 8082 }, { "epoch": 0.17811124515912233, "grad_norm": 0.9377965331077576, "learning_rate": 2.8307200983446854e-05, "loss": 0.1432, "step": 8083 }, { "epoch": 0.1781332804486385, "grad_norm": 0.8859236836433411, "learning_rate": 2.8306706910978156e-05, "loss": 0.1138, "step": 8084 }, { "epoch": 0.17815531573815466, "grad_norm": 0.628831148147583, "learning_rate": 2.830621277073126e-05, "loss": 0.0854, "step": 8085 }, { "epoch": 0.17817735102767082, "grad_norm": 1.0330618619918823, "learning_rate": 2.8305718562708695e-05, "loss": 0.1245, "step": 8086 }, { "epoch": 0.178199386317187, "grad_norm": 0.7236546277999878, "learning_rate": 2.830522428691297e-05, "loss": 0.078, "step": 8087 }, { "epoch": 0.17822142160670312, "grad_norm": 0.7090126872062683, "learning_rate": 2.8304729943346597e-05, "loss": 0.1065, "step": 8088 }, { "epoch": 0.1782434568962193, "grad_norm": 1.1798027753829956, "learning_rate": 2.8304235532012102e-05, "loss": 0.1143, "step": 8089 }, { "epoch": 0.17826549218573545, "grad_norm": 0.9618349671363831, "learning_rate": 2.8303741052912004e-05, "loss": 0.1179, "step": 8090 }, { "epoch": 0.17828752747525162, "grad_norm": 0.5944963097572327, "learning_rate": 2.830324650604882e-05, "loss": 0.0741, "step": 8091 }, { "epoch": 0.17830956276476778, "grad_norm": 0.6777182817459106, "learning_rate": 2.8302751891425062e-05, "loss": 0.1139, "step": 8092 }, { "epoch": 0.17833159805428395, "grad_norm": 0.7583731412887573, "learning_rate": 2.830225720904326e-05, "loss": 0.1041, "step": 8093 }, { "epoch": 0.17835363334380008, "grad_norm": 1.045560598373413, "learning_rate": 2.8301762458905927e-05, "loss": 0.1034, "step": 8094 }, { "epoch": 0.17837566863331625, "grad_norm": 1.2348190546035767, "learning_rate": 2.8301267641015584e-05, "loss": 0.135, "step": 8095 }, { "epoch": 0.1783977039228324, "grad_norm": 0.8532559871673584, "learning_rate": 2.830077275537475e-05, "loss": 0.1151, "step": 8096 }, { "epoch": 0.17841973921234858, "grad_norm": 1.3559536933898926, "learning_rate": 2.8300277801985954e-05, "loss": 0.0963, "step": 8097 }, { "epoch": 0.17844177450186474, "grad_norm": 1.2415211200714111, "learning_rate": 2.8299782780851705e-05, "loss": 0.1296, "step": 8098 }, { "epoch": 0.1784638097913809, "grad_norm": 0.9876164197921753, "learning_rate": 2.8299287691974533e-05, "loss": 0.0899, "step": 8099 }, { "epoch": 0.17848584508089704, "grad_norm": 1.0228029489517212, "learning_rate": 2.829879253535696e-05, "loss": 0.1115, "step": 8100 }, { "epoch": 0.1785078803704132, "grad_norm": 1.4960027933120728, "learning_rate": 2.82982973110015e-05, "loss": 0.119, "step": 8101 }, { "epoch": 0.17852991565992937, "grad_norm": 0.512981116771698, "learning_rate": 2.8297802018910686e-05, "loss": 0.1234, "step": 8102 }, { "epoch": 0.17855195094944554, "grad_norm": 1.058111548423767, "learning_rate": 2.8297306659087033e-05, "loss": 0.1331, "step": 8103 }, { "epoch": 0.1785739862389617, "grad_norm": 1.226070761680603, "learning_rate": 2.8296811231533065e-05, "loss": 0.1188, "step": 8104 }, { "epoch": 0.17859602152847787, "grad_norm": 0.7485634088516235, "learning_rate": 2.829631573625131e-05, "loss": 0.141, "step": 8105 }, { "epoch": 0.17861805681799403, "grad_norm": 0.5652481317520142, "learning_rate": 2.8295820173244285e-05, "loss": 0.0872, "step": 8106 }, { "epoch": 0.17864009210751017, "grad_norm": 0.6430205702781677, "learning_rate": 2.8295324542514522e-05, "loss": 0.1356, "step": 8107 }, { "epoch": 0.17866212739702633, "grad_norm": 0.8752095699310303, "learning_rate": 2.8294828844064536e-05, "loss": 0.0995, "step": 8108 }, { "epoch": 0.1786841626865425, "grad_norm": 1.1584930419921875, "learning_rate": 2.8294333077896865e-05, "loss": 0.1099, "step": 8109 }, { "epoch": 0.17870619797605866, "grad_norm": 0.968569815158844, "learning_rate": 2.8293837244014025e-05, "loss": 0.1387, "step": 8110 }, { "epoch": 0.17872823326557483, "grad_norm": 1.1590397357940674, "learning_rate": 2.829334134241854e-05, "loss": 0.1088, "step": 8111 }, { "epoch": 0.178750268555091, "grad_norm": 0.5560265183448792, "learning_rate": 2.8292845373112942e-05, "loss": 0.0934, "step": 8112 }, { "epoch": 0.17877230384460713, "grad_norm": 0.6645358204841614, "learning_rate": 2.829234933609975e-05, "loss": 0.0871, "step": 8113 }, { "epoch": 0.1787943391341233, "grad_norm": 1.7625598907470703, "learning_rate": 2.8291853231381497e-05, "loss": 0.1907, "step": 8114 }, { "epoch": 0.17881637442363946, "grad_norm": 1.1230391263961792, "learning_rate": 2.8291357058960707e-05, "loss": 0.1225, "step": 8115 }, { "epoch": 0.17883840971315562, "grad_norm": 1.4745675325393677, "learning_rate": 2.829086081883991e-05, "loss": 0.1345, "step": 8116 }, { "epoch": 0.1788604450026718, "grad_norm": 0.8998678922653198, "learning_rate": 2.8290364511021632e-05, "loss": 0.0876, "step": 8117 }, { "epoch": 0.17888248029218795, "grad_norm": 0.8351501226425171, "learning_rate": 2.82898681355084e-05, "loss": 0.0858, "step": 8118 }, { "epoch": 0.1789045155817041, "grad_norm": 0.8091592788696289, "learning_rate": 2.8289371692302746e-05, "loss": 0.1426, "step": 8119 }, { "epoch": 0.17892655087122025, "grad_norm": 0.7716228365898132, "learning_rate": 2.828887518140719e-05, "loss": 0.1193, "step": 8120 }, { "epoch": 0.17894858616073642, "grad_norm": 1.2308056354522705, "learning_rate": 2.8288378602824267e-05, "loss": 0.1244, "step": 8121 }, { "epoch": 0.17897062145025258, "grad_norm": 0.6928300261497498, "learning_rate": 2.8287881956556508e-05, "loss": 0.1203, "step": 8122 }, { "epoch": 0.17899265673976875, "grad_norm": 1.2265585660934448, "learning_rate": 2.8287385242606443e-05, "loss": 0.1312, "step": 8123 }, { "epoch": 0.1790146920292849, "grad_norm": 0.9614922404289246, "learning_rate": 2.82868884609766e-05, "loss": 0.1194, "step": 8124 }, { "epoch": 0.17903672731880105, "grad_norm": 1.0110244750976562, "learning_rate": 2.8286391611669504e-05, "loss": 0.12, "step": 8125 }, { "epoch": 0.1790587626083172, "grad_norm": 1.2694364786148071, "learning_rate": 2.8285894694687692e-05, "loss": 0.0788, "step": 8126 }, { "epoch": 0.17908079789783338, "grad_norm": 1.5657236576080322, "learning_rate": 2.8285397710033696e-05, "loss": 0.1369, "step": 8127 }, { "epoch": 0.17910283318734954, "grad_norm": 1.2363369464874268, "learning_rate": 2.828490065771005e-05, "loss": 0.1042, "step": 8128 }, { "epoch": 0.1791248684768657, "grad_norm": 1.0043079853057861, "learning_rate": 2.828440353771927e-05, "loss": 0.1149, "step": 8129 }, { "epoch": 0.17914690376638187, "grad_norm": 0.6103073954582214, "learning_rate": 2.8283906350063906e-05, "loss": 0.0903, "step": 8130 }, { "epoch": 0.179168939055898, "grad_norm": 0.9655864238739014, "learning_rate": 2.8283409094746482e-05, "loss": 0.1051, "step": 8131 }, { "epoch": 0.17919097434541417, "grad_norm": 1.1534414291381836, "learning_rate": 2.8282911771769537e-05, "loss": 0.1004, "step": 8132 }, { "epoch": 0.17921300963493034, "grad_norm": 0.8011594414710999, "learning_rate": 2.828241438113559e-05, "loss": 0.1045, "step": 8133 }, { "epoch": 0.1792350449244465, "grad_norm": 1.201500415802002, "learning_rate": 2.8281916922847192e-05, "loss": 0.1301, "step": 8134 }, { "epoch": 0.17925708021396267, "grad_norm": 1.4342869520187378, "learning_rate": 2.8281419396906868e-05, "loss": 0.143, "step": 8135 }, { "epoch": 0.17927911550347883, "grad_norm": 0.8389982581138611, "learning_rate": 2.828092180331715e-05, "loss": 0.1258, "step": 8136 }, { "epoch": 0.17930115079299497, "grad_norm": 0.8442405462265015, "learning_rate": 2.8280424142080575e-05, "loss": 0.1247, "step": 8137 }, { "epoch": 0.17932318608251113, "grad_norm": 1.0692121982574463, "learning_rate": 2.8279926413199678e-05, "loss": 0.1252, "step": 8138 }, { "epoch": 0.1793452213720273, "grad_norm": 0.8064397573471069, "learning_rate": 2.8279428616676993e-05, "loss": 0.0882, "step": 8139 }, { "epoch": 0.17936725666154346, "grad_norm": 1.0716828107833862, "learning_rate": 2.8278930752515065e-05, "loss": 0.0981, "step": 8140 }, { "epoch": 0.17938929195105963, "grad_norm": 0.9391974210739136, "learning_rate": 2.8278432820716417e-05, "loss": 0.1269, "step": 8141 }, { "epoch": 0.1794113272405758, "grad_norm": 1.0991579294204712, "learning_rate": 2.8277934821283588e-05, "loss": 0.1277, "step": 8142 }, { "epoch": 0.17943336253009196, "grad_norm": 0.8194456100463867, "learning_rate": 2.8277436754219122e-05, "loss": 0.1016, "step": 8143 }, { "epoch": 0.1794553978196081, "grad_norm": 1.1693755388259888, "learning_rate": 2.8276938619525547e-05, "loss": 0.1304, "step": 8144 }, { "epoch": 0.17947743310912426, "grad_norm": 0.892099142074585, "learning_rate": 2.8276440417205403e-05, "loss": 0.1514, "step": 8145 }, { "epoch": 0.17949946839864042, "grad_norm": 1.0622570514678955, "learning_rate": 2.827594214726123e-05, "loss": 0.1102, "step": 8146 }, { "epoch": 0.1795215036881566, "grad_norm": 0.967136561870575, "learning_rate": 2.8275443809695563e-05, "loss": 0.1048, "step": 8147 }, { "epoch": 0.17954353897767275, "grad_norm": 0.9406852126121521, "learning_rate": 2.8274945404510947e-05, "loss": 0.0916, "step": 8148 }, { "epoch": 0.17956557426718892, "grad_norm": 0.7954489588737488, "learning_rate": 2.8274446931709914e-05, "loss": 0.1419, "step": 8149 }, { "epoch": 0.17958760955670505, "grad_norm": 1.0535911321640015, "learning_rate": 2.8273948391295e-05, "loss": 0.1231, "step": 8150 }, { "epoch": 0.17960964484622122, "grad_norm": 1.0506618022918701, "learning_rate": 2.827344978326875e-05, "loss": 0.1152, "step": 8151 }, { "epoch": 0.17963168013573738, "grad_norm": 0.7885473966598511, "learning_rate": 2.8272951107633704e-05, "loss": 0.0996, "step": 8152 }, { "epoch": 0.17965371542525355, "grad_norm": 0.9829795956611633, "learning_rate": 2.82724523643924e-05, "loss": 0.1163, "step": 8153 }, { "epoch": 0.1796757507147697, "grad_norm": 1.2354069948196411, "learning_rate": 2.827195355354738e-05, "loss": 0.1076, "step": 8154 }, { "epoch": 0.17969778600428588, "grad_norm": 0.6933909058570862, "learning_rate": 2.8271454675101183e-05, "loss": 0.1314, "step": 8155 }, { "epoch": 0.179719821293802, "grad_norm": 1.2442879676818848, "learning_rate": 2.827095572905635e-05, "loss": 0.1492, "step": 8156 }, { "epoch": 0.17974185658331818, "grad_norm": 0.6056470274925232, "learning_rate": 2.8270456715415424e-05, "loss": 0.1035, "step": 8157 }, { "epoch": 0.17976389187283434, "grad_norm": 1.281330943107605, "learning_rate": 2.8269957634180945e-05, "loss": 0.1393, "step": 8158 }, { "epoch": 0.1797859271623505, "grad_norm": 1.0972000360488892, "learning_rate": 2.826945848535545e-05, "loss": 0.1029, "step": 8159 }, { "epoch": 0.17980796245186667, "grad_norm": 0.9087929725646973, "learning_rate": 2.82689592689415e-05, "loss": 0.1316, "step": 8160 }, { "epoch": 0.17982999774138284, "grad_norm": 6.0672383308410645, "learning_rate": 2.8268459984941614e-05, "loss": 0.0924, "step": 8161 }, { "epoch": 0.17985203303089897, "grad_norm": 1.089316964149475, "learning_rate": 2.826796063335835e-05, "loss": 0.1224, "step": 8162 }, { "epoch": 0.17987406832041514, "grad_norm": 1.0264875888824463, "learning_rate": 2.8267461214194253e-05, "loss": 0.1079, "step": 8163 }, { "epoch": 0.1798961036099313, "grad_norm": 1.3821889162063599, "learning_rate": 2.8266961727451853e-05, "loss": 0.0984, "step": 8164 }, { "epoch": 0.17991813889944747, "grad_norm": 0.9051612019538879, "learning_rate": 2.8266462173133708e-05, "loss": 0.1229, "step": 8165 }, { "epoch": 0.17994017418896363, "grad_norm": 0.7847467660903931, "learning_rate": 2.8265962551242357e-05, "loss": 0.1321, "step": 8166 }, { "epoch": 0.1799622094784798, "grad_norm": 1.0572353601455688, "learning_rate": 2.826546286178034e-05, "loss": 0.1975, "step": 8167 }, { "epoch": 0.17998424476799593, "grad_norm": 2.791656970977783, "learning_rate": 2.8264963104750213e-05, "loss": 0.1449, "step": 8168 }, { "epoch": 0.1800062800575121, "grad_norm": 1.0789016485214233, "learning_rate": 2.826446328015451e-05, "loss": 0.1488, "step": 8169 }, { "epoch": 0.18002831534702826, "grad_norm": 1.0018459558486938, "learning_rate": 2.826396338799579e-05, "loss": 0.1298, "step": 8170 }, { "epoch": 0.18005035063654443, "grad_norm": 0.6419495344161987, "learning_rate": 2.8263463428276585e-05, "loss": 0.0834, "step": 8171 }, { "epoch": 0.1800723859260606, "grad_norm": 0.9101114869117737, "learning_rate": 2.8262963400999454e-05, "loss": 0.1471, "step": 8172 }, { "epoch": 0.18009442121557676, "grad_norm": 1.1104487180709839, "learning_rate": 2.826246330616693e-05, "loss": 0.1302, "step": 8173 }, { "epoch": 0.18011645650509292, "grad_norm": 0.7856417894363403, "learning_rate": 2.8261963143781576e-05, "loss": 0.0845, "step": 8174 }, { "epoch": 0.18013849179460906, "grad_norm": 0.8755954504013062, "learning_rate": 2.8261462913845927e-05, "loss": 0.1289, "step": 8175 }, { "epoch": 0.18016052708412522, "grad_norm": 0.8185240626335144, "learning_rate": 2.826096261636254e-05, "loss": 0.1267, "step": 8176 }, { "epoch": 0.1801825623736414, "grad_norm": 1.2793042659759521, "learning_rate": 2.8260462251333955e-05, "loss": 0.1131, "step": 8177 }, { "epoch": 0.18020459766315755, "grad_norm": 0.7361956238746643, "learning_rate": 2.8259961818762732e-05, "loss": 0.0749, "step": 8178 }, { "epoch": 0.18022663295267372, "grad_norm": 1.232555866241455, "learning_rate": 2.8259461318651405e-05, "loss": 0.17, "step": 8179 }, { "epoch": 0.18024866824218988, "grad_norm": 0.8751382827758789, "learning_rate": 2.8258960751002537e-05, "loss": 0.151, "step": 8180 }, { "epoch": 0.18027070353170602, "grad_norm": 0.9873619675636292, "learning_rate": 2.8258460115818672e-05, "loss": 0.1291, "step": 8181 }, { "epoch": 0.18029273882122218, "grad_norm": 1.587741494178772, "learning_rate": 2.8257959413102357e-05, "loss": 0.1354, "step": 8182 }, { "epoch": 0.18031477411073835, "grad_norm": 0.7071094512939453, "learning_rate": 2.8257458642856147e-05, "loss": 0.1026, "step": 8183 }, { "epoch": 0.1803368094002545, "grad_norm": 2.0059773921966553, "learning_rate": 2.8256957805082587e-05, "loss": 0.1981, "step": 8184 }, { "epoch": 0.18035884468977068, "grad_norm": 0.6986569166183472, "learning_rate": 2.8256456899784234e-05, "loss": 0.1163, "step": 8185 }, { "epoch": 0.18038087997928684, "grad_norm": 0.7222525477409363, "learning_rate": 2.825595592696364e-05, "loss": 0.0891, "step": 8186 }, { "epoch": 0.18040291526880298, "grad_norm": 0.9508698582649231, "learning_rate": 2.8255454886623354e-05, "loss": 0.1116, "step": 8187 }, { "epoch": 0.18042495055831914, "grad_norm": 1.3853200674057007, "learning_rate": 2.825495377876592e-05, "loss": 0.1349, "step": 8188 }, { "epoch": 0.1804469858478353, "grad_norm": 1.2098859548568726, "learning_rate": 2.825445260339391e-05, "loss": 0.1465, "step": 8189 }, { "epoch": 0.18046902113735147, "grad_norm": 0.8542657494544983, "learning_rate": 2.8253951360509857e-05, "loss": 0.1161, "step": 8190 }, { "epoch": 0.18049105642686764, "grad_norm": 1.196505069732666, "learning_rate": 2.8253450050116327e-05, "loss": 0.0982, "step": 8191 }, { "epoch": 0.1805130917163838, "grad_norm": 1.119523048400879, "learning_rate": 2.8252948672215867e-05, "loss": 0.1449, "step": 8192 }, { "epoch": 0.18053512700589994, "grad_norm": 0.8572479486465454, "learning_rate": 2.825244722681103e-05, "loss": 0.1683, "step": 8193 }, { "epoch": 0.1805571622954161, "grad_norm": 0.8567147254943848, "learning_rate": 2.8251945713904382e-05, "loss": 0.1346, "step": 8194 }, { "epoch": 0.18057919758493227, "grad_norm": 0.8813665509223938, "learning_rate": 2.825144413349846e-05, "loss": 0.1447, "step": 8195 }, { "epoch": 0.18060123287444843, "grad_norm": 0.8803912401199341, "learning_rate": 2.825094248559583e-05, "loss": 0.0954, "step": 8196 }, { "epoch": 0.1806232681639646, "grad_norm": 1.263717770576477, "learning_rate": 2.825044077019904e-05, "loss": 0.1495, "step": 8197 }, { "epoch": 0.18064530345348076, "grad_norm": 0.9097087383270264, "learning_rate": 2.8249938987310652e-05, "loss": 0.1079, "step": 8198 }, { "epoch": 0.1806673387429969, "grad_norm": 0.8601842522621155, "learning_rate": 2.8249437136933222e-05, "loss": 0.1241, "step": 8199 }, { "epoch": 0.18068937403251306, "grad_norm": 0.9581282138824463, "learning_rate": 2.82489352190693e-05, "loss": 0.1158, "step": 8200 }, { "epoch": 0.18071140932202923, "grad_norm": 1.3444100618362427, "learning_rate": 2.824843323372145e-05, "loss": 0.1302, "step": 8201 }, { "epoch": 0.1807334446115454, "grad_norm": 0.8529081344604492, "learning_rate": 2.824793118089222e-05, "loss": 0.1037, "step": 8202 }, { "epoch": 0.18075547990106156, "grad_norm": 1.0254563093185425, "learning_rate": 2.824742906058418e-05, "loss": 0.1402, "step": 8203 }, { "epoch": 0.18077751519057772, "grad_norm": 0.9826001524925232, "learning_rate": 2.8246926872799876e-05, "loss": 0.1339, "step": 8204 }, { "epoch": 0.18079955048009386, "grad_norm": 1.306275725364685, "learning_rate": 2.824642461754187e-05, "loss": 0.1552, "step": 8205 }, { "epoch": 0.18082158576961002, "grad_norm": 0.902676522731781, "learning_rate": 2.8245922294812718e-05, "loss": 0.1045, "step": 8206 }, { "epoch": 0.1808436210591262, "grad_norm": 1.2081327438354492, "learning_rate": 2.8245419904614987e-05, "loss": 0.1183, "step": 8207 }, { "epoch": 0.18086565634864235, "grad_norm": 1.2309114933013916, "learning_rate": 2.8244917446951223e-05, "loss": 0.1563, "step": 8208 }, { "epoch": 0.18088769163815852, "grad_norm": 0.7082921862602234, "learning_rate": 2.8244414921823993e-05, "loss": 0.0958, "step": 8209 }, { "epoch": 0.18090972692767468, "grad_norm": 0.8459116220474243, "learning_rate": 2.824391232923586e-05, "loss": 0.1405, "step": 8210 }, { "epoch": 0.18093176221719084, "grad_norm": 0.7166637182235718, "learning_rate": 2.8243409669189374e-05, "loss": 0.095, "step": 8211 }, { "epoch": 0.18095379750670698, "grad_norm": 0.7547314167022705, "learning_rate": 2.8242906941687107e-05, "loss": 0.0916, "step": 8212 }, { "epoch": 0.18097583279622315, "grad_norm": 1.038482427597046, "learning_rate": 2.824240414673161e-05, "loss": 0.1246, "step": 8213 }, { "epoch": 0.1809978680857393, "grad_norm": 0.503993570804596, "learning_rate": 2.8241901284325444e-05, "loss": 0.0668, "step": 8214 }, { "epoch": 0.18101990337525548, "grad_norm": 1.1359059810638428, "learning_rate": 2.8241398354471177e-05, "loss": 0.1415, "step": 8215 }, { "epoch": 0.18104193866477164, "grad_norm": 1.1856698989868164, "learning_rate": 2.8240895357171367e-05, "loss": 0.1113, "step": 8216 }, { "epoch": 0.1810639739542878, "grad_norm": 0.8225911259651184, "learning_rate": 2.8240392292428577e-05, "loss": 0.1337, "step": 8217 }, { "epoch": 0.18108600924380394, "grad_norm": 0.9570598006248474, "learning_rate": 2.8239889160245373e-05, "loss": 0.1122, "step": 8218 }, { "epoch": 0.1811080445333201, "grad_norm": 1.3322477340698242, "learning_rate": 2.823938596062431e-05, "loss": 0.1514, "step": 8219 }, { "epoch": 0.18113007982283627, "grad_norm": 0.9039791822433472, "learning_rate": 2.8238882693567954e-05, "loss": 0.1534, "step": 8220 }, { "epoch": 0.18115211511235244, "grad_norm": 1.0099371671676636, "learning_rate": 2.823837935907887e-05, "loss": 0.1291, "step": 8221 }, { "epoch": 0.1811741504018686, "grad_norm": 0.9177562594413757, "learning_rate": 2.8237875957159622e-05, "loss": 0.1231, "step": 8222 }, { "epoch": 0.18119618569138476, "grad_norm": 1.1285274028778076, "learning_rate": 2.8237372487812768e-05, "loss": 0.1233, "step": 8223 }, { "epoch": 0.1812182209809009, "grad_norm": 1.8459060192108154, "learning_rate": 2.8236868951040885e-05, "loss": 0.1618, "step": 8224 }, { "epoch": 0.18124025627041707, "grad_norm": 1.2198822498321533, "learning_rate": 2.8236365346846525e-05, "loss": 0.1687, "step": 8225 }, { "epoch": 0.18126229155993323, "grad_norm": 0.7225150465965271, "learning_rate": 2.823586167523226e-05, "loss": 0.1334, "step": 8226 }, { "epoch": 0.1812843268494494, "grad_norm": 1.0026416778564453, "learning_rate": 2.8235357936200654e-05, "loss": 0.1234, "step": 8227 }, { "epoch": 0.18130636213896556, "grad_norm": 0.8477429747581482, "learning_rate": 2.8234854129754274e-05, "loss": 0.1308, "step": 8228 }, { "epoch": 0.18132839742848172, "grad_norm": 0.9973001480102539, "learning_rate": 2.8234350255895682e-05, "loss": 0.1188, "step": 8229 }, { "epoch": 0.18135043271799786, "grad_norm": 2.456803321838379, "learning_rate": 2.823384631462745e-05, "loss": 0.1156, "step": 8230 }, { "epoch": 0.18137246800751403, "grad_norm": 0.8312556147575378, "learning_rate": 2.823334230595214e-05, "loss": 0.1494, "step": 8231 }, { "epoch": 0.1813945032970302, "grad_norm": 1.2444261312484741, "learning_rate": 2.8232838229872323e-05, "loss": 0.1168, "step": 8232 }, { "epoch": 0.18141653858654636, "grad_norm": 0.7521578073501587, "learning_rate": 2.8232334086390564e-05, "loss": 0.1156, "step": 8233 }, { "epoch": 0.18143857387606252, "grad_norm": 1.0661213397979736, "learning_rate": 2.8231829875509435e-05, "loss": 0.0998, "step": 8234 }, { "epoch": 0.18146060916557868, "grad_norm": 1.2607940435409546, "learning_rate": 2.8231325597231496e-05, "loss": 0.0926, "step": 8235 }, { "epoch": 0.18148264445509482, "grad_norm": 0.6783353686332703, "learning_rate": 2.8230821251559324e-05, "loss": 0.0566, "step": 8236 }, { "epoch": 0.18150467974461099, "grad_norm": 1.4997718334197998, "learning_rate": 2.8230316838495482e-05, "loss": 0.0839, "step": 8237 }, { "epoch": 0.18152671503412715, "grad_norm": 0.7343640923500061, "learning_rate": 2.8229812358042543e-05, "loss": 0.0703, "step": 8238 }, { "epoch": 0.18154875032364332, "grad_norm": 1.372804880142212, "learning_rate": 2.8229307810203074e-05, "loss": 0.1414, "step": 8239 }, { "epoch": 0.18157078561315948, "grad_norm": 1.5789411067962646, "learning_rate": 2.8228803194979647e-05, "loss": 0.1646, "step": 8240 }, { "epoch": 0.18159282090267564, "grad_norm": 1.0295511484146118, "learning_rate": 2.8228298512374834e-05, "loss": 0.098, "step": 8241 }, { "epoch": 0.18161485619219178, "grad_norm": 0.9073466658592224, "learning_rate": 2.82277937623912e-05, "loss": 0.117, "step": 8242 }, { "epoch": 0.18163689148170795, "grad_norm": 0.9675446152687073, "learning_rate": 2.822728894503132e-05, "loss": 0.1157, "step": 8243 }, { "epoch": 0.1816589267712241, "grad_norm": 0.709438145160675, "learning_rate": 2.8226784060297763e-05, "loss": 0.0995, "step": 8244 }, { "epoch": 0.18168096206074028, "grad_norm": 1.2301620244979858, "learning_rate": 2.8226279108193104e-05, "loss": 0.1008, "step": 8245 }, { "epoch": 0.18170299735025644, "grad_norm": 1.932910680770874, "learning_rate": 2.8225774088719914e-05, "loss": 0.1264, "step": 8246 }, { "epoch": 0.1817250326397726, "grad_norm": 1.5175715684890747, "learning_rate": 2.822526900188076e-05, "loss": 0.1206, "step": 8247 }, { "epoch": 0.18174706792928877, "grad_norm": 1.352060317993164, "learning_rate": 2.8224763847678224e-05, "loss": 0.1078, "step": 8248 }, { "epoch": 0.1817691032188049, "grad_norm": 1.0876420736312866, "learning_rate": 2.8224258626114868e-05, "loss": 0.113, "step": 8249 }, { "epoch": 0.18179113850832107, "grad_norm": 0.9292151927947998, "learning_rate": 2.8223753337193276e-05, "loss": 0.1125, "step": 8250 }, { "epoch": 0.18181317379783724, "grad_norm": 0.6459176540374756, "learning_rate": 2.8223247980916018e-05, "loss": 0.1447, "step": 8251 }, { "epoch": 0.1818352090873534, "grad_norm": 0.6865086555480957, "learning_rate": 2.8222742557285665e-05, "loss": 0.1242, "step": 8252 }, { "epoch": 0.18185724437686956, "grad_norm": 0.9142616987228394, "learning_rate": 2.8222237066304794e-05, "loss": 0.0992, "step": 8253 }, { "epoch": 0.18187927966638573, "grad_norm": 0.9529656171798706, "learning_rate": 2.8221731507975978e-05, "loss": 0.1217, "step": 8254 }, { "epoch": 0.18190131495590187, "grad_norm": 0.9434441328048706, "learning_rate": 2.822122588230179e-05, "loss": 0.1192, "step": 8255 }, { "epoch": 0.18192335024541803, "grad_norm": 0.624236524105072, "learning_rate": 2.822072018928482e-05, "loss": 0.0884, "step": 8256 }, { "epoch": 0.1819453855349342, "grad_norm": 1.4414108991622925, "learning_rate": 2.8220214428927625e-05, "loss": 0.1068, "step": 8257 }, { "epoch": 0.18196742082445036, "grad_norm": 0.5960507988929749, "learning_rate": 2.8219708601232787e-05, "loss": 0.1007, "step": 8258 }, { "epoch": 0.18198945611396652, "grad_norm": 1.031895637512207, "learning_rate": 2.8219202706202886e-05, "loss": 0.1038, "step": 8259 }, { "epoch": 0.1820114914034827, "grad_norm": 0.9671198129653931, "learning_rate": 2.82186967438405e-05, "loss": 0.13, "step": 8260 }, { "epoch": 0.18203352669299883, "grad_norm": 0.6156777143478394, "learning_rate": 2.8218190714148202e-05, "loss": 0.1009, "step": 8261 }, { "epoch": 0.182055561982515, "grad_norm": 1.3330153226852417, "learning_rate": 2.821768461712857e-05, "loss": 0.131, "step": 8262 }, { "epoch": 0.18207759727203116, "grad_norm": 1.052006721496582, "learning_rate": 2.821717845278418e-05, "loss": 0.1261, "step": 8263 }, { "epoch": 0.18209963256154732, "grad_norm": 1.137007474899292, "learning_rate": 2.8216672221117616e-05, "loss": 0.1158, "step": 8264 }, { "epoch": 0.18212166785106348, "grad_norm": 0.8591668605804443, "learning_rate": 2.8216165922131448e-05, "loss": 0.1026, "step": 8265 }, { "epoch": 0.18214370314057965, "grad_norm": 0.7479172348976135, "learning_rate": 2.8215659555828264e-05, "loss": 0.1159, "step": 8266 }, { "epoch": 0.18216573843009579, "grad_norm": 0.96507728099823, "learning_rate": 2.8215153122210637e-05, "loss": 0.1335, "step": 8267 }, { "epoch": 0.18218777371961195, "grad_norm": 1.0149946212768555, "learning_rate": 2.8214646621281152e-05, "loss": 0.1526, "step": 8268 }, { "epoch": 0.18220980900912812, "grad_norm": 1.2969191074371338, "learning_rate": 2.821414005304238e-05, "loss": 0.0905, "step": 8269 }, { "epoch": 0.18223184429864428, "grad_norm": 1.5454343557357788, "learning_rate": 2.8213633417496913e-05, "loss": 0.1217, "step": 8270 }, { "epoch": 0.18225387958816044, "grad_norm": 1.0168073177337646, "learning_rate": 2.821312671464732e-05, "loss": 0.1087, "step": 8271 }, { "epoch": 0.1822759148776766, "grad_norm": 0.8161487579345703, "learning_rate": 2.821261994449619e-05, "loss": 0.1027, "step": 8272 }, { "epoch": 0.18229795016719275, "grad_norm": 1.1489405632019043, "learning_rate": 2.82121131070461e-05, "loss": 0.1172, "step": 8273 }, { "epoch": 0.1823199854567089, "grad_norm": 0.8879993557929993, "learning_rate": 2.8211606202299636e-05, "loss": 0.1152, "step": 8274 }, { "epoch": 0.18234202074622508, "grad_norm": 0.7983719110488892, "learning_rate": 2.8211099230259373e-05, "loss": 0.1248, "step": 8275 }, { "epoch": 0.18236405603574124, "grad_norm": 1.1863446235656738, "learning_rate": 2.82105921909279e-05, "loss": 0.1367, "step": 8276 }, { "epoch": 0.1823860913252574, "grad_norm": 0.9861000180244446, "learning_rate": 2.8210085084307797e-05, "loss": 0.1331, "step": 8277 }, { "epoch": 0.18240812661477357, "grad_norm": 0.7067725658416748, "learning_rate": 2.8209577910401646e-05, "loss": 0.1023, "step": 8278 }, { "epoch": 0.18243016190428973, "grad_norm": 1.0418540239334106, "learning_rate": 2.820907066921203e-05, "loss": 0.147, "step": 8279 }, { "epoch": 0.18245219719380587, "grad_norm": 0.8640226125717163, "learning_rate": 2.8208563360741535e-05, "loss": 0.1168, "step": 8280 }, { "epoch": 0.18247423248332204, "grad_norm": 0.8612459897994995, "learning_rate": 2.8208055984992746e-05, "loss": 0.1338, "step": 8281 }, { "epoch": 0.1824962677728382, "grad_norm": 0.6524250507354736, "learning_rate": 2.820754854196824e-05, "loss": 0.1197, "step": 8282 }, { "epoch": 0.18251830306235436, "grad_norm": 0.7476158738136292, "learning_rate": 2.8207041031670614e-05, "loss": 0.1228, "step": 8283 }, { "epoch": 0.18254033835187053, "grad_norm": 0.7158517241477966, "learning_rate": 2.8206533454102436e-05, "loss": 0.1063, "step": 8284 }, { "epoch": 0.1825623736413867, "grad_norm": 0.9789985418319702, "learning_rate": 2.820602580926631e-05, "loss": 0.1333, "step": 8285 }, { "epoch": 0.18258440893090283, "grad_norm": 0.7365802526473999, "learning_rate": 2.820551809716481e-05, "loss": 0.0955, "step": 8286 }, { "epoch": 0.182606444220419, "grad_norm": 0.8396393060684204, "learning_rate": 2.8205010317800527e-05, "loss": 0.1277, "step": 8287 }, { "epoch": 0.18262847950993516, "grad_norm": 1.1158241033554077, "learning_rate": 2.820450247117604e-05, "loss": 0.1434, "step": 8288 }, { "epoch": 0.18265051479945132, "grad_norm": 0.8941175937652588, "learning_rate": 2.8203994557293947e-05, "loss": 0.1196, "step": 8289 }, { "epoch": 0.1826725500889675, "grad_norm": 0.7139265537261963, "learning_rate": 2.8203486576156825e-05, "loss": 0.134, "step": 8290 }, { "epoch": 0.18269458537848365, "grad_norm": 0.9880359172821045, "learning_rate": 2.8202978527767267e-05, "loss": 0.1106, "step": 8291 }, { "epoch": 0.1827166206679998, "grad_norm": 0.9695279002189636, "learning_rate": 2.8202470412127856e-05, "loss": 0.1254, "step": 8292 }, { "epoch": 0.18273865595751596, "grad_norm": 0.7129496335983276, "learning_rate": 2.8201962229241188e-05, "loss": 0.1169, "step": 8293 }, { "epoch": 0.18276069124703212, "grad_norm": 1.0049452781677246, "learning_rate": 2.8201453979109845e-05, "loss": 0.1086, "step": 8294 }, { "epoch": 0.18278272653654828, "grad_norm": 0.6705955266952515, "learning_rate": 2.8200945661736416e-05, "loss": 0.1216, "step": 8295 }, { "epoch": 0.18280476182606445, "grad_norm": 1.0999451875686646, "learning_rate": 2.8200437277123498e-05, "loss": 0.1186, "step": 8296 }, { "epoch": 0.1828267971155806, "grad_norm": 1.457047939300537, "learning_rate": 2.819992882527367e-05, "loss": 0.1356, "step": 8297 }, { "epoch": 0.18284883240509675, "grad_norm": 1.3656197786331177, "learning_rate": 2.8199420306189527e-05, "loss": 0.0901, "step": 8298 }, { "epoch": 0.18287086769461292, "grad_norm": 0.5840558409690857, "learning_rate": 2.8198911719873656e-05, "loss": 0.1182, "step": 8299 }, { "epoch": 0.18289290298412908, "grad_norm": 1.1074837446212769, "learning_rate": 2.8198403066328652e-05, "loss": 0.1067, "step": 8300 }, { "epoch": 0.18291493827364524, "grad_norm": 0.8868113160133362, "learning_rate": 2.8197894345557106e-05, "loss": 0.0778, "step": 8301 }, { "epoch": 0.1829369735631614, "grad_norm": 0.9338480830192566, "learning_rate": 2.8197385557561604e-05, "loss": 0.1242, "step": 8302 }, { "epoch": 0.18295900885267757, "grad_norm": 0.7214270830154419, "learning_rate": 2.819687670234474e-05, "loss": 0.1024, "step": 8303 }, { "epoch": 0.1829810441421937, "grad_norm": 0.5412704944610596, "learning_rate": 2.8196367779909106e-05, "loss": 0.1009, "step": 8304 }, { "epoch": 0.18300307943170988, "grad_norm": 1.101303219795227, "learning_rate": 2.8195858790257295e-05, "loss": 0.1602, "step": 8305 }, { "epoch": 0.18302511472122604, "grad_norm": 0.9364234209060669, "learning_rate": 2.81953497333919e-05, "loss": 0.0905, "step": 8306 }, { "epoch": 0.1830471500107422, "grad_norm": 1.2245008945465088, "learning_rate": 2.819484060931551e-05, "loss": 0.1216, "step": 8307 }, { "epoch": 0.18306918530025837, "grad_norm": 0.6728138327598572, "learning_rate": 2.8194331418030726e-05, "loss": 0.1262, "step": 8308 }, { "epoch": 0.18309122058977453, "grad_norm": 0.8204889893531799, "learning_rate": 2.8193822159540134e-05, "loss": 0.1273, "step": 8309 }, { "epoch": 0.18311325587929067, "grad_norm": 1.0947296619415283, "learning_rate": 2.8193312833846328e-05, "loss": 0.1469, "step": 8310 }, { "epoch": 0.18313529116880684, "grad_norm": 0.8058573603630066, "learning_rate": 2.819280344095191e-05, "loss": 0.1564, "step": 8311 }, { "epoch": 0.183157326458323, "grad_norm": 0.9783034920692444, "learning_rate": 2.8192293980859466e-05, "loss": 0.1119, "step": 8312 }, { "epoch": 0.18317936174783916, "grad_norm": 0.9718941450119019, "learning_rate": 2.8191784453571594e-05, "loss": 0.124, "step": 8313 }, { "epoch": 0.18320139703735533, "grad_norm": 1.3919703960418701, "learning_rate": 2.8191274859090893e-05, "loss": 0.0866, "step": 8314 }, { "epoch": 0.1832234323268715, "grad_norm": 0.733392596244812, "learning_rate": 2.8190765197419953e-05, "loss": 0.113, "step": 8315 }, { "epoch": 0.18324546761638766, "grad_norm": 1.0509763956069946, "learning_rate": 2.819025546856137e-05, "loss": 0.1226, "step": 8316 }, { "epoch": 0.1832675029059038, "grad_norm": 1.0090445280075073, "learning_rate": 2.8189745672517748e-05, "loss": 0.1057, "step": 8317 }, { "epoch": 0.18328953819541996, "grad_norm": 0.6331803202629089, "learning_rate": 2.8189235809291677e-05, "loss": 0.0674, "step": 8318 }, { "epoch": 0.18331157348493612, "grad_norm": 1.2147364616394043, "learning_rate": 2.8188725878885755e-05, "loss": 0.1176, "step": 8319 }, { "epoch": 0.1833336087744523, "grad_norm": 1.1013853549957275, "learning_rate": 2.8188215881302582e-05, "loss": 0.1379, "step": 8320 }, { "epoch": 0.18335564406396845, "grad_norm": 0.970494270324707, "learning_rate": 2.818770581654475e-05, "loss": 0.1356, "step": 8321 }, { "epoch": 0.18337767935348462, "grad_norm": 1.064067006111145, "learning_rate": 2.818719568461486e-05, "loss": 0.069, "step": 8322 }, { "epoch": 0.18339971464300076, "grad_norm": 0.8789283633232117, "learning_rate": 2.8186685485515512e-05, "loss": 0.0594, "step": 8323 }, { "epoch": 0.18342174993251692, "grad_norm": 0.9406716227531433, "learning_rate": 2.8186175219249304e-05, "loss": 0.1004, "step": 8324 }, { "epoch": 0.18344378522203308, "grad_norm": 0.8667736053466797, "learning_rate": 2.8185664885818835e-05, "loss": 0.1233, "step": 8325 }, { "epoch": 0.18346582051154925, "grad_norm": 1.0011801719665527, "learning_rate": 2.8185154485226707e-05, "loss": 0.1114, "step": 8326 }, { "epoch": 0.1834878558010654, "grad_norm": 1.0358786582946777, "learning_rate": 2.818464401747551e-05, "loss": 0.0757, "step": 8327 }, { "epoch": 0.18350989109058158, "grad_norm": 0.9951615929603577, "learning_rate": 2.8184133482567854e-05, "loss": 0.1666, "step": 8328 }, { "epoch": 0.18353192638009772, "grad_norm": 0.6333675980567932, "learning_rate": 2.8183622880506333e-05, "loss": 0.0864, "step": 8329 }, { "epoch": 0.18355396166961388, "grad_norm": 1.3016284704208374, "learning_rate": 2.818311221129356e-05, "loss": 0.1302, "step": 8330 }, { "epoch": 0.18357599695913004, "grad_norm": 0.8560965657234192, "learning_rate": 2.818260147493212e-05, "loss": 0.0779, "step": 8331 }, { "epoch": 0.1835980322486462, "grad_norm": 0.8905362486839294, "learning_rate": 2.818209067142462e-05, "loss": 0.1215, "step": 8332 }, { "epoch": 0.18362006753816237, "grad_norm": 0.9620047807693481, "learning_rate": 2.8181579800773666e-05, "loss": 0.0925, "step": 8333 }, { "epoch": 0.18364210282767854, "grad_norm": 1.066811203956604, "learning_rate": 2.818106886298186e-05, "loss": 0.0945, "step": 8334 }, { "epoch": 0.18366413811719468, "grad_norm": 0.771003782749176, "learning_rate": 2.81805578580518e-05, "loss": 0.1219, "step": 8335 }, { "epoch": 0.18368617340671084, "grad_norm": 0.8568634986877441, "learning_rate": 2.818004678598609e-05, "loss": 0.0964, "step": 8336 }, { "epoch": 0.183708208696227, "grad_norm": 0.8447065353393555, "learning_rate": 2.8179535646787334e-05, "loss": 0.1417, "step": 8337 }, { "epoch": 0.18373024398574317, "grad_norm": 0.8867367506027222, "learning_rate": 2.817902444045814e-05, "loss": 0.1089, "step": 8338 }, { "epoch": 0.18375227927525933, "grad_norm": 0.8716073632240295, "learning_rate": 2.81785131670011e-05, "loss": 0.0933, "step": 8339 }, { "epoch": 0.1837743145647755, "grad_norm": 0.6151139736175537, "learning_rate": 2.817800182641883e-05, "loss": 0.1302, "step": 8340 }, { "epoch": 0.18379634985429164, "grad_norm": 1.3061965703964233, "learning_rate": 2.817749041871393e-05, "loss": 0.1255, "step": 8341 }, { "epoch": 0.1838183851438078, "grad_norm": 1.005265712738037, "learning_rate": 2.8176978943889008e-05, "loss": 0.0983, "step": 8342 }, { "epoch": 0.18384042043332396, "grad_norm": 0.9665177464485168, "learning_rate": 2.817646740194666e-05, "loss": 0.103, "step": 8343 }, { "epoch": 0.18386245572284013, "grad_norm": 1.0100102424621582, "learning_rate": 2.81759557928895e-05, "loss": 0.164, "step": 8344 }, { "epoch": 0.1838844910123563, "grad_norm": 1.1356803178787231, "learning_rate": 2.8175444116720136e-05, "loss": 0.123, "step": 8345 }, { "epoch": 0.18390652630187246, "grad_norm": 0.8080905675888062, "learning_rate": 2.817493237344116e-05, "loss": 0.088, "step": 8346 }, { "epoch": 0.1839285615913886, "grad_norm": 0.8414477705955505, "learning_rate": 2.8174420563055198e-05, "loss": 0.1111, "step": 8347 }, { "epoch": 0.18395059688090476, "grad_norm": 1.8002766370773315, "learning_rate": 2.8173908685564842e-05, "loss": 0.1682, "step": 8348 }, { "epoch": 0.18397263217042092, "grad_norm": 1.0833897590637207, "learning_rate": 2.8173396740972705e-05, "loss": 0.0852, "step": 8349 }, { "epoch": 0.1839946674599371, "grad_norm": 1.0486570596694946, "learning_rate": 2.8172884729281397e-05, "loss": 0.1069, "step": 8350 }, { "epoch": 0.18401670274945325, "grad_norm": 3.9111781120300293, "learning_rate": 2.8172372650493526e-05, "loss": 0.1048, "step": 8351 }, { "epoch": 0.18403873803896942, "grad_norm": 3.3300728797912598, "learning_rate": 2.8171860504611687e-05, "loss": 0.1228, "step": 8352 }, { "epoch": 0.18406077332848558, "grad_norm": 0.900748610496521, "learning_rate": 2.817134829163851e-05, "loss": 0.1232, "step": 8353 }, { "epoch": 0.18408280861800172, "grad_norm": 1.2239465713500977, "learning_rate": 2.817083601157659e-05, "loss": 0.1574, "step": 8354 }, { "epoch": 0.18410484390751788, "grad_norm": 0.9044646620750427, "learning_rate": 2.8170323664428534e-05, "loss": 0.0979, "step": 8355 }, { "epoch": 0.18412687919703405, "grad_norm": 1.0439057350158691, "learning_rate": 2.816981125019696e-05, "loss": 0.152, "step": 8356 }, { "epoch": 0.1841489144865502, "grad_norm": 0.8227195143699646, "learning_rate": 2.8169298768884477e-05, "loss": 0.1016, "step": 8357 }, { "epoch": 0.18417094977606638, "grad_norm": 0.7313869595527649, "learning_rate": 2.8168786220493693e-05, "loss": 0.1018, "step": 8358 }, { "epoch": 0.18419298506558254, "grad_norm": 0.9674569964408875, "learning_rate": 2.816827360502722e-05, "loss": 0.1128, "step": 8359 }, { "epoch": 0.18421502035509868, "grad_norm": 1.112461805343628, "learning_rate": 2.8167760922487668e-05, "loss": 0.1439, "step": 8360 }, { "epoch": 0.18423705564461484, "grad_norm": 0.8119076490402222, "learning_rate": 2.8167248172877644e-05, "loss": 0.082, "step": 8361 }, { "epoch": 0.184259090934131, "grad_norm": 2.2853357791900635, "learning_rate": 2.816673535619977e-05, "loss": 0.1175, "step": 8362 }, { "epoch": 0.18428112622364717, "grad_norm": 1.918006181716919, "learning_rate": 2.816622247245665e-05, "loss": 0.1459, "step": 8363 }, { "epoch": 0.18430316151316334, "grad_norm": 0.9361270666122437, "learning_rate": 2.8165709521650897e-05, "loss": 0.1333, "step": 8364 }, { "epoch": 0.1843251968026795, "grad_norm": 1.0097953081130981, "learning_rate": 2.8165196503785126e-05, "loss": 0.1016, "step": 8365 }, { "epoch": 0.18434723209219564, "grad_norm": 1.2501124143600464, "learning_rate": 2.816468341886195e-05, "loss": 0.1007, "step": 8366 }, { "epoch": 0.1843692673817118, "grad_norm": 0.7577731609344482, "learning_rate": 2.8164170266883986e-05, "loss": 0.1043, "step": 8367 }, { "epoch": 0.18439130267122797, "grad_norm": 1.020944595336914, "learning_rate": 2.816365704785384e-05, "loss": 0.1346, "step": 8368 }, { "epoch": 0.18441333796074413, "grad_norm": 1.2456705570220947, "learning_rate": 2.8163143761774127e-05, "loss": 0.0915, "step": 8369 }, { "epoch": 0.1844353732502603, "grad_norm": 1.1688902378082275, "learning_rate": 2.8162630408647467e-05, "loss": 0.1104, "step": 8370 }, { "epoch": 0.18445740853977646, "grad_norm": 0.6440054774284363, "learning_rate": 2.8162116988476475e-05, "loss": 0.1308, "step": 8371 }, { "epoch": 0.1844794438292926, "grad_norm": 1.3486034870147705, "learning_rate": 2.816160350126376e-05, "loss": 0.1344, "step": 8372 }, { "epoch": 0.18450147911880876, "grad_norm": 0.948086142539978, "learning_rate": 2.8161089947011943e-05, "loss": 0.1234, "step": 8373 }, { "epoch": 0.18452351440832493, "grad_norm": 1.0066752433776855, "learning_rate": 2.816057632572363e-05, "loss": 0.1135, "step": 8374 }, { "epoch": 0.1845455496978411, "grad_norm": 0.8778305053710938, "learning_rate": 2.816006263740145e-05, "loss": 0.0838, "step": 8375 }, { "epoch": 0.18456758498735726, "grad_norm": 0.8309217095375061, "learning_rate": 2.8159548882048017e-05, "loss": 0.1061, "step": 8376 }, { "epoch": 0.18458962027687342, "grad_norm": 1.3333244323730469, "learning_rate": 2.815903505966594e-05, "loss": 0.1321, "step": 8377 }, { "epoch": 0.18461165556638956, "grad_norm": 0.725572943687439, "learning_rate": 2.815852117025784e-05, "loss": 0.112, "step": 8378 }, { "epoch": 0.18463369085590572, "grad_norm": 0.9967916011810303, "learning_rate": 2.815800721382634e-05, "loss": 0.1248, "step": 8379 }, { "epoch": 0.1846557261454219, "grad_norm": 1.0634994506835938, "learning_rate": 2.815749319037405e-05, "loss": 0.1575, "step": 8380 }, { "epoch": 0.18467776143493805, "grad_norm": 1.4002147912979126, "learning_rate": 2.815697909990359e-05, "loss": 0.1639, "step": 8381 }, { "epoch": 0.18469979672445422, "grad_norm": 0.9396655559539795, "learning_rate": 2.8156464942417584e-05, "loss": 0.0763, "step": 8382 }, { "epoch": 0.18472183201397038, "grad_norm": 1.3153408765792847, "learning_rate": 2.8155950717918647e-05, "loss": 0.1415, "step": 8383 }, { "epoch": 0.18474386730348655, "grad_norm": 0.8719276785850525, "learning_rate": 2.8155436426409396e-05, "loss": 0.0872, "step": 8384 }, { "epoch": 0.18476590259300268, "grad_norm": 1.007948875427246, "learning_rate": 2.8154922067892456e-05, "loss": 0.1113, "step": 8385 }, { "epoch": 0.18478793788251885, "grad_norm": 1.0007685422897339, "learning_rate": 2.815440764237044e-05, "loss": 0.1346, "step": 8386 }, { "epoch": 0.184809973172035, "grad_norm": 1.244273066520691, "learning_rate": 2.8153893149845972e-05, "loss": 0.1247, "step": 8387 }, { "epoch": 0.18483200846155118, "grad_norm": 0.9893920421600342, "learning_rate": 2.8153378590321676e-05, "loss": 0.1342, "step": 8388 }, { "epoch": 0.18485404375106734, "grad_norm": 0.8589494824409485, "learning_rate": 2.8152863963800168e-05, "loss": 0.1059, "step": 8389 }, { "epoch": 0.1848760790405835, "grad_norm": 1.0286961793899536, "learning_rate": 2.815234927028407e-05, "loss": 0.1211, "step": 8390 }, { "epoch": 0.18489811433009964, "grad_norm": 0.6094275712966919, "learning_rate": 2.8151834509776002e-05, "loss": 0.1, "step": 8391 }, { "epoch": 0.1849201496196158, "grad_norm": 0.8803830742835999, "learning_rate": 2.8151319682278593e-05, "loss": 0.1132, "step": 8392 }, { "epoch": 0.18494218490913197, "grad_norm": 0.7841712832450867, "learning_rate": 2.8150804787794457e-05, "loss": 0.1155, "step": 8393 }, { "epoch": 0.18496422019864814, "grad_norm": 1.252092957496643, "learning_rate": 2.8150289826326224e-05, "loss": 0.1309, "step": 8394 }, { "epoch": 0.1849862554881643, "grad_norm": 1.0062146186828613, "learning_rate": 2.814977479787651e-05, "loss": 0.1324, "step": 8395 }, { "epoch": 0.18500829077768047, "grad_norm": 0.8675413727760315, "learning_rate": 2.8149259702447943e-05, "loss": 0.1232, "step": 8396 }, { "epoch": 0.1850303260671966, "grad_norm": 0.9657523036003113, "learning_rate": 2.8148744540043143e-05, "loss": 0.1321, "step": 8397 }, { "epoch": 0.18505236135671277, "grad_norm": 0.9518755674362183, "learning_rate": 2.8148229310664734e-05, "loss": 0.1103, "step": 8398 }, { "epoch": 0.18507439664622893, "grad_norm": 0.7528077363967896, "learning_rate": 2.8147714014315348e-05, "loss": 0.1006, "step": 8399 }, { "epoch": 0.1850964319357451, "grad_norm": 1.2271559238433838, "learning_rate": 2.8147198650997602e-05, "loss": 0.1461, "step": 8400 }, { "epoch": 0.18511846722526126, "grad_norm": 1.1893365383148193, "learning_rate": 2.8146683220714122e-05, "loss": 0.1347, "step": 8401 }, { "epoch": 0.18514050251477743, "grad_norm": 0.9899778962135315, "learning_rate": 2.8146167723467535e-05, "loss": 0.1432, "step": 8402 }, { "epoch": 0.18516253780429356, "grad_norm": 0.804707407951355, "learning_rate": 2.814565215926047e-05, "loss": 0.1107, "step": 8403 }, { "epoch": 0.18518457309380973, "grad_norm": 0.9852098822593689, "learning_rate": 2.8145136528095547e-05, "loss": 0.0889, "step": 8404 }, { "epoch": 0.1852066083833259, "grad_norm": 1.0430315732955933, "learning_rate": 2.8144620829975394e-05, "loss": 0.1394, "step": 8405 }, { "epoch": 0.18522864367284206, "grad_norm": 1.515787124633789, "learning_rate": 2.8144105064902637e-05, "loss": 0.1091, "step": 8406 }, { "epoch": 0.18525067896235822, "grad_norm": 1.0770620107650757, "learning_rate": 2.8143589232879907e-05, "loss": 0.143, "step": 8407 }, { "epoch": 0.1852727142518744, "grad_norm": 0.7145488262176514, "learning_rate": 2.8143073333909828e-05, "loss": 0.1308, "step": 8408 }, { "epoch": 0.18529474954139052, "grad_norm": 1.0018705129623413, "learning_rate": 2.8142557367995025e-05, "loss": 0.1316, "step": 8409 }, { "epoch": 0.1853167848309067, "grad_norm": 0.7376157641410828, "learning_rate": 2.8142041335138135e-05, "loss": 0.1202, "step": 8410 }, { "epoch": 0.18533882012042285, "grad_norm": 1.0131936073303223, "learning_rate": 2.814152523534178e-05, "loss": 0.1329, "step": 8411 }, { "epoch": 0.18536085540993902, "grad_norm": 1.082776665687561, "learning_rate": 2.814100906860859e-05, "loss": 0.1358, "step": 8412 }, { "epoch": 0.18538289069945518, "grad_norm": 1.0208743810653687, "learning_rate": 2.814049283494119e-05, "loss": 0.137, "step": 8413 }, { "epoch": 0.18540492598897135, "grad_norm": 1.0833141803741455, "learning_rate": 2.813997653434222e-05, "loss": 0.0976, "step": 8414 }, { "epoch": 0.18542696127848748, "grad_norm": 0.8165774941444397, "learning_rate": 2.81394601668143e-05, "loss": 0.1215, "step": 8415 }, { "epoch": 0.18544899656800365, "grad_norm": 0.612270176410675, "learning_rate": 2.8138943732360062e-05, "loss": 0.0966, "step": 8416 }, { "epoch": 0.1854710318575198, "grad_norm": 1.1804581880569458, "learning_rate": 2.8138427230982136e-05, "loss": 0.1254, "step": 8417 }, { "epoch": 0.18549306714703598, "grad_norm": 1.0670851469039917, "learning_rate": 2.8137910662683163e-05, "loss": 0.1043, "step": 8418 }, { "epoch": 0.18551510243655214, "grad_norm": 0.8923131227493286, "learning_rate": 2.8137394027465758e-05, "loss": 0.0979, "step": 8419 }, { "epoch": 0.1855371377260683, "grad_norm": 0.8594187498092651, "learning_rate": 2.8136877325332563e-05, "loss": 0.099, "step": 8420 }, { "epoch": 0.18555917301558447, "grad_norm": 0.9073629379272461, "learning_rate": 2.8136360556286206e-05, "loss": 0.1187, "step": 8421 }, { "epoch": 0.1855812083051006, "grad_norm": 0.6905140280723572, "learning_rate": 2.8135843720329324e-05, "loss": 0.1276, "step": 8422 }, { "epoch": 0.18560324359461677, "grad_norm": 1.1670325994491577, "learning_rate": 2.8135326817464544e-05, "loss": 0.1296, "step": 8423 }, { "epoch": 0.18562527888413294, "grad_norm": 0.8344406485557556, "learning_rate": 2.8134809847694502e-05, "loss": 0.0913, "step": 8424 }, { "epoch": 0.1856473141736491, "grad_norm": 0.6183489561080933, "learning_rate": 2.813429281102183e-05, "loss": 0.0967, "step": 8425 }, { "epoch": 0.18566934946316527, "grad_norm": 2.0434179306030273, "learning_rate": 2.813377570744916e-05, "loss": 0.1855, "step": 8426 }, { "epoch": 0.18569138475268143, "grad_norm": 1.1317517757415771, "learning_rate": 2.8133258536979124e-05, "loss": 0.1337, "step": 8427 }, { "epoch": 0.18571342004219757, "grad_norm": 0.7834945321083069, "learning_rate": 2.8132741299614366e-05, "loss": 0.0967, "step": 8428 }, { "epoch": 0.18573545533171373, "grad_norm": 0.7403382658958435, "learning_rate": 2.813222399535751e-05, "loss": 0.1143, "step": 8429 }, { "epoch": 0.1857574906212299, "grad_norm": 0.773851752281189, "learning_rate": 2.8131706624211196e-05, "loss": 0.1183, "step": 8430 }, { "epoch": 0.18577952591074606, "grad_norm": 1.0103682279586792, "learning_rate": 2.8131189186178065e-05, "loss": 0.1166, "step": 8431 }, { "epoch": 0.18580156120026223, "grad_norm": 0.5328928828239441, "learning_rate": 2.8130671681260738e-05, "loss": 0.0961, "step": 8432 }, { "epoch": 0.1858235964897784, "grad_norm": 2.0866341590881348, "learning_rate": 2.813015410946186e-05, "loss": 0.1058, "step": 8433 }, { "epoch": 0.18584563177929453, "grad_norm": 1.0348711013793945, "learning_rate": 2.8129636470784067e-05, "loss": 0.1282, "step": 8434 }, { "epoch": 0.1858676670688107, "grad_norm": 1.0153512954711914, "learning_rate": 2.812911876523e-05, "loss": 0.1381, "step": 8435 }, { "epoch": 0.18588970235832686, "grad_norm": 1.156865119934082, "learning_rate": 2.8128600992802283e-05, "loss": 0.1241, "step": 8436 }, { "epoch": 0.18591173764784302, "grad_norm": 1.1968249082565308, "learning_rate": 2.8128083153503562e-05, "loss": 0.1382, "step": 8437 }, { "epoch": 0.1859337729373592, "grad_norm": 0.8432978987693787, "learning_rate": 2.8127565247336472e-05, "loss": 0.1068, "step": 8438 }, { "epoch": 0.18595580822687535, "grad_norm": 0.8706362843513489, "learning_rate": 2.812704727430366e-05, "loss": 0.1145, "step": 8439 }, { "epoch": 0.1859778435163915, "grad_norm": 1.1755019426345825, "learning_rate": 2.812652923440775e-05, "loss": 0.1312, "step": 8440 }, { "epoch": 0.18599987880590765, "grad_norm": 3.261587381362915, "learning_rate": 2.8126011127651394e-05, "loss": 0.1824, "step": 8441 }, { "epoch": 0.18602191409542382, "grad_norm": 0.8163377046585083, "learning_rate": 2.812549295403722e-05, "loss": 0.0783, "step": 8442 }, { "epoch": 0.18604394938493998, "grad_norm": 1.5412976741790771, "learning_rate": 2.812497471356787e-05, "loss": 0.0784, "step": 8443 }, { "epoch": 0.18606598467445615, "grad_norm": 0.9180991649627686, "learning_rate": 2.812445640624599e-05, "loss": 0.1012, "step": 8444 }, { "epoch": 0.1860880199639723, "grad_norm": 0.7821518182754517, "learning_rate": 2.8123938032074218e-05, "loss": 0.1074, "step": 8445 }, { "epoch": 0.18611005525348845, "grad_norm": 0.6977172493934631, "learning_rate": 2.8123419591055184e-05, "loss": 0.0864, "step": 8446 }, { "epoch": 0.1861320905430046, "grad_norm": 1.0591990947723389, "learning_rate": 2.8122901083191545e-05, "loss": 0.1174, "step": 8447 }, { "epoch": 0.18615412583252078, "grad_norm": 0.7220872044563293, "learning_rate": 2.8122382508485928e-05, "loss": 0.0922, "step": 8448 }, { "epoch": 0.18617616112203694, "grad_norm": 1.1219788789749146, "learning_rate": 2.812186386694098e-05, "loss": 0.1121, "step": 8449 }, { "epoch": 0.1861981964115531, "grad_norm": 0.8943859338760376, "learning_rate": 2.8121345158559348e-05, "loss": 0.133, "step": 8450 }, { "epoch": 0.18622023170106927, "grad_norm": 0.7301913499832153, "learning_rate": 2.8120826383343664e-05, "loss": 0.1189, "step": 8451 }, { "epoch": 0.1862422669905854, "grad_norm": 0.8581724166870117, "learning_rate": 2.8120307541296576e-05, "loss": 0.1216, "step": 8452 }, { "epoch": 0.18626430228010157, "grad_norm": 1.1382229328155518, "learning_rate": 2.811978863242073e-05, "loss": 0.1143, "step": 8453 }, { "epoch": 0.18628633756961774, "grad_norm": 0.8234755992889404, "learning_rate": 2.8119269656718758e-05, "loss": 0.1157, "step": 8454 }, { "epoch": 0.1863083728591339, "grad_norm": 1.0306814908981323, "learning_rate": 2.8118750614193316e-05, "loss": 0.1137, "step": 8455 }, { "epoch": 0.18633040814865007, "grad_norm": 1.3815760612487793, "learning_rate": 2.8118231504847042e-05, "loss": 0.1789, "step": 8456 }, { "epoch": 0.18635244343816623, "grad_norm": 1.0062110424041748, "learning_rate": 2.8117712328682576e-05, "loss": 0.1571, "step": 8457 }, { "epoch": 0.1863744787276824, "grad_norm": 0.7948285341262817, "learning_rate": 2.8117193085702573e-05, "loss": 0.1141, "step": 8458 }, { "epoch": 0.18639651401719853, "grad_norm": 0.9445581436157227, "learning_rate": 2.811667377590967e-05, "loss": 0.1296, "step": 8459 }, { "epoch": 0.1864185493067147, "grad_norm": 1.222618818283081, "learning_rate": 2.8116154399306508e-05, "loss": 0.1216, "step": 8460 }, { "epoch": 0.18644058459623086, "grad_norm": 1.066314697265625, "learning_rate": 2.8115634955895745e-05, "loss": 0.0801, "step": 8461 }, { "epoch": 0.18646261988574703, "grad_norm": 1.00542151927948, "learning_rate": 2.8115115445680018e-05, "loss": 0.0965, "step": 8462 }, { "epoch": 0.1864846551752632, "grad_norm": 0.752498209476471, "learning_rate": 2.8114595868661974e-05, "loss": 0.1069, "step": 8463 }, { "epoch": 0.18650669046477936, "grad_norm": 0.9100804328918457, "learning_rate": 2.8114076224844262e-05, "loss": 0.1446, "step": 8464 }, { "epoch": 0.1865287257542955, "grad_norm": 0.9311417937278748, "learning_rate": 2.811355651422953e-05, "loss": 0.0832, "step": 8465 }, { "epoch": 0.18655076104381166, "grad_norm": 1.05082368850708, "learning_rate": 2.8113036736820414e-05, "loss": 0.1277, "step": 8466 }, { "epoch": 0.18657279633332782, "grad_norm": 0.6448616981506348, "learning_rate": 2.811251689261958e-05, "loss": 0.077, "step": 8467 }, { "epoch": 0.186594831622844, "grad_norm": 1.096203327178955, "learning_rate": 2.811199698162966e-05, "loss": 0.1511, "step": 8468 }, { "epoch": 0.18661686691236015, "grad_norm": 1.3469723463058472, "learning_rate": 2.811147700385331e-05, "loss": 0.1386, "step": 8469 }, { "epoch": 0.18663890220187632, "grad_norm": 0.8025110960006714, "learning_rate": 2.8110956959293177e-05, "loss": 0.1092, "step": 8470 }, { "epoch": 0.18666093749139245, "grad_norm": 0.8409789204597473, "learning_rate": 2.8110436847951906e-05, "loss": 0.1089, "step": 8471 }, { "epoch": 0.18668297278090862, "grad_norm": 1.1171777248382568, "learning_rate": 2.8109916669832158e-05, "loss": 0.1477, "step": 8472 }, { "epoch": 0.18670500807042478, "grad_norm": 0.8103532791137695, "learning_rate": 2.8109396424936566e-05, "loss": 0.1086, "step": 8473 }, { "epoch": 0.18672704335994095, "grad_norm": 0.894807755947113, "learning_rate": 2.810887611326779e-05, "loss": 0.0869, "step": 8474 }, { "epoch": 0.1867490786494571, "grad_norm": 0.8292887210845947, "learning_rate": 2.8108355734828482e-05, "loss": 0.0886, "step": 8475 }, { "epoch": 0.18677111393897328, "grad_norm": 0.946520209312439, "learning_rate": 2.8107835289621288e-05, "loss": 0.1067, "step": 8476 }, { "epoch": 0.1867931492284894, "grad_norm": 0.6233481168746948, "learning_rate": 2.8107314777648856e-05, "loss": 0.0912, "step": 8477 }, { "epoch": 0.18681518451800558, "grad_norm": 1.4556912183761597, "learning_rate": 2.8106794198913843e-05, "loss": 0.0992, "step": 8478 }, { "epoch": 0.18683721980752174, "grad_norm": 0.9316186904907227, "learning_rate": 2.8106273553418898e-05, "loss": 0.1163, "step": 8479 }, { "epoch": 0.1868592550970379, "grad_norm": 1.337815284729004, "learning_rate": 2.8105752841166675e-05, "loss": 0.1486, "step": 8480 }, { "epoch": 0.18688129038655407, "grad_norm": 1.3647617101669312, "learning_rate": 2.8105232062159825e-05, "loss": 0.0972, "step": 8481 }, { "epoch": 0.18690332567607024, "grad_norm": 1.0475972890853882, "learning_rate": 2.8104711216401003e-05, "loss": 0.1268, "step": 8482 }, { "epoch": 0.18692536096558637, "grad_norm": 0.9585366249084473, "learning_rate": 2.8104190303892857e-05, "loss": 0.0994, "step": 8483 }, { "epoch": 0.18694739625510254, "grad_norm": 1.345690369606018, "learning_rate": 2.8103669324638044e-05, "loss": 0.093, "step": 8484 }, { "epoch": 0.1869694315446187, "grad_norm": 0.5923279523849487, "learning_rate": 2.8103148278639213e-05, "loss": 0.1289, "step": 8485 }, { "epoch": 0.18699146683413487, "grad_norm": 0.9083855748176575, "learning_rate": 2.8102627165899024e-05, "loss": 0.1356, "step": 8486 }, { "epoch": 0.18701350212365103, "grad_norm": 0.7554202079772949, "learning_rate": 2.810210598642013e-05, "loss": 0.1354, "step": 8487 }, { "epoch": 0.1870355374131672, "grad_norm": 0.931341290473938, "learning_rate": 2.810158474020518e-05, "loss": 0.127, "step": 8488 }, { "epoch": 0.18705757270268336, "grad_norm": 1.0113016366958618, "learning_rate": 2.8101063427256835e-05, "loss": 0.1391, "step": 8489 }, { "epoch": 0.1870796079921995, "grad_norm": 0.9406793117523193, "learning_rate": 2.8100542047577754e-05, "loss": 0.1211, "step": 8490 }, { "epoch": 0.18710164328171566, "grad_norm": 0.6437817811965942, "learning_rate": 2.810002060117058e-05, "loss": 0.1146, "step": 8491 }, { "epoch": 0.18712367857123183, "grad_norm": 0.7101818323135376, "learning_rate": 2.8099499088037977e-05, "loss": 0.0872, "step": 8492 }, { "epoch": 0.187145713860748, "grad_norm": 0.7441216111183167, "learning_rate": 2.80989775081826e-05, "loss": 0.0997, "step": 8493 }, { "epoch": 0.18716774915026416, "grad_norm": 1.1310187578201294, "learning_rate": 2.8098455861607114e-05, "loss": 0.1328, "step": 8494 }, { "epoch": 0.18718978443978032, "grad_norm": 1.189483404159546, "learning_rate": 2.8097934148314162e-05, "loss": 0.1131, "step": 8495 }, { "epoch": 0.18721181972929646, "grad_norm": 0.6767001748085022, "learning_rate": 2.8097412368306408e-05, "loss": 0.0813, "step": 8496 }, { "epoch": 0.18723385501881262, "grad_norm": 2.0332376956939697, "learning_rate": 2.809689052158651e-05, "loss": 0.1243, "step": 8497 }, { "epoch": 0.1872558903083288, "grad_norm": 0.7588891386985779, "learning_rate": 2.8096368608157127e-05, "loss": 0.1133, "step": 8498 }, { "epoch": 0.18727792559784495, "grad_norm": 0.9647932648658752, "learning_rate": 2.8095846628020912e-05, "loss": 0.1121, "step": 8499 }, { "epoch": 0.18729996088736112, "grad_norm": 0.7347208857536316, "learning_rate": 2.8095324581180534e-05, "loss": 0.0897, "step": 8500 }, { "epoch": 0.18732199617687728, "grad_norm": 1.3306658267974854, "learning_rate": 2.8094802467638643e-05, "loss": 0.1358, "step": 8501 }, { "epoch": 0.18734403146639342, "grad_norm": 0.9327565431594849, "learning_rate": 2.8094280287397894e-05, "loss": 0.1, "step": 8502 }, { "epoch": 0.18736606675590958, "grad_norm": 0.8439404368400574, "learning_rate": 2.8093758040460965e-05, "loss": 0.0919, "step": 8503 }, { "epoch": 0.18738810204542575, "grad_norm": 0.7591493725776672, "learning_rate": 2.8093235726830498e-05, "loss": 0.1176, "step": 8504 }, { "epoch": 0.1874101373349419, "grad_norm": 1.1769219636917114, "learning_rate": 2.809271334650916e-05, "loss": 0.1371, "step": 8505 }, { "epoch": 0.18743217262445808, "grad_norm": 1.2203235626220703, "learning_rate": 2.8092190899499616e-05, "loss": 0.1236, "step": 8506 }, { "epoch": 0.18745420791397424, "grad_norm": 1.2109507322311401, "learning_rate": 2.809166838580452e-05, "loss": 0.1338, "step": 8507 }, { "epoch": 0.18747624320349038, "grad_norm": 0.8323813676834106, "learning_rate": 2.8091145805426536e-05, "loss": 0.0925, "step": 8508 }, { "epoch": 0.18749827849300654, "grad_norm": 0.709090530872345, "learning_rate": 2.8090623158368332e-05, "loss": 0.1079, "step": 8509 }, { "epoch": 0.1875203137825227, "grad_norm": 1.0713895559310913, "learning_rate": 2.809010044463256e-05, "loss": 0.0987, "step": 8510 }, { "epoch": 0.18754234907203887, "grad_norm": 1.1835952997207642, "learning_rate": 2.808957766422189e-05, "loss": 0.118, "step": 8511 }, { "epoch": 0.18756438436155504, "grad_norm": 0.8425343036651611, "learning_rate": 2.808905481713898e-05, "loss": 0.0978, "step": 8512 }, { "epoch": 0.1875864196510712, "grad_norm": 0.7962018251419067, "learning_rate": 2.8088531903386492e-05, "loss": 0.0876, "step": 8513 }, { "epoch": 0.18760845494058734, "grad_norm": 0.9502881169319153, "learning_rate": 2.8088008922967094e-05, "loss": 0.1149, "step": 8514 }, { "epoch": 0.1876304902301035, "grad_norm": 1.4548264741897583, "learning_rate": 2.808748587588345e-05, "loss": 0.1727, "step": 8515 }, { "epoch": 0.18765252551961967, "grad_norm": 1.0591243505477905, "learning_rate": 2.8086962762138216e-05, "loss": 0.1479, "step": 8516 }, { "epoch": 0.18767456080913583, "grad_norm": 0.7140644192695618, "learning_rate": 2.8086439581734072e-05, "loss": 0.0913, "step": 8517 }, { "epoch": 0.187696596098652, "grad_norm": 0.9519171714782715, "learning_rate": 2.808591633467367e-05, "loss": 0.1304, "step": 8518 }, { "epoch": 0.18771863138816816, "grad_norm": 0.7270119786262512, "learning_rate": 2.808539302095968e-05, "loss": 0.1375, "step": 8519 }, { "epoch": 0.1877406666776843, "grad_norm": 0.7163782119750977, "learning_rate": 2.8084869640594763e-05, "loss": 0.1054, "step": 8520 }, { "epoch": 0.18776270196720046, "grad_norm": 0.8065650463104248, "learning_rate": 2.808434619358159e-05, "loss": 0.1046, "step": 8521 }, { "epoch": 0.18778473725671663, "grad_norm": 1.364554762840271, "learning_rate": 2.8083822679922827e-05, "loss": 0.1168, "step": 8522 }, { "epoch": 0.1878067725462328, "grad_norm": 0.7363299131393433, "learning_rate": 2.8083299099621137e-05, "loss": 0.1028, "step": 8523 }, { "epoch": 0.18782880783574896, "grad_norm": 1.0156170129776, "learning_rate": 2.8082775452679188e-05, "loss": 0.0919, "step": 8524 }, { "epoch": 0.18785084312526512, "grad_norm": 0.7029877305030823, "learning_rate": 2.8082251739099652e-05, "loss": 0.0972, "step": 8525 }, { "epoch": 0.18787287841478129, "grad_norm": 0.641346275806427, "learning_rate": 2.808172795888519e-05, "loss": 0.1221, "step": 8526 }, { "epoch": 0.18789491370429742, "grad_norm": 1.0373929738998413, "learning_rate": 2.8081204112038473e-05, "loss": 0.1045, "step": 8527 }, { "epoch": 0.1879169489938136, "grad_norm": 0.8690173029899597, "learning_rate": 2.8080680198562167e-05, "loss": 0.1084, "step": 8528 }, { "epoch": 0.18793898428332975, "grad_norm": 0.9969693422317505, "learning_rate": 2.8080156218458944e-05, "loss": 0.1002, "step": 8529 }, { "epoch": 0.18796101957284592, "grad_norm": 0.7426447868347168, "learning_rate": 2.8079632171731473e-05, "loss": 0.1189, "step": 8530 }, { "epoch": 0.18798305486236208, "grad_norm": 0.8996516466140747, "learning_rate": 2.8079108058382422e-05, "loss": 0.144, "step": 8531 }, { "epoch": 0.18800509015187825, "grad_norm": 1.2433077096939087, "learning_rate": 2.8078583878414457e-05, "loss": 0.0905, "step": 8532 }, { "epoch": 0.18802712544139438, "grad_norm": 1.1654374599456787, "learning_rate": 2.807805963183025e-05, "loss": 0.1158, "step": 8533 }, { "epoch": 0.18804916073091055, "grad_norm": 0.8721588253974915, "learning_rate": 2.8077535318632476e-05, "loss": 0.1266, "step": 8534 }, { "epoch": 0.1880711960204267, "grad_norm": 0.5319401621818542, "learning_rate": 2.80770109388238e-05, "loss": 0.079, "step": 8535 }, { "epoch": 0.18809323130994288, "grad_norm": 0.8196228742599487, "learning_rate": 2.8076486492406893e-05, "loss": 0.1431, "step": 8536 }, { "epoch": 0.18811526659945904, "grad_norm": 0.9787854552268982, "learning_rate": 2.807596197938443e-05, "loss": 0.1356, "step": 8537 }, { "epoch": 0.1881373018889752, "grad_norm": 1.1156203746795654, "learning_rate": 2.807543739975908e-05, "loss": 0.0965, "step": 8538 }, { "epoch": 0.18815933717849134, "grad_norm": 1.0586130619049072, "learning_rate": 2.8074912753533514e-05, "loss": 0.0848, "step": 8539 }, { "epoch": 0.1881813724680075, "grad_norm": 1.6154510974884033, "learning_rate": 2.807438804071041e-05, "loss": 0.2058, "step": 8540 }, { "epoch": 0.18820340775752367, "grad_norm": 0.48777785897254944, "learning_rate": 2.8073863261292437e-05, "loss": 0.1149, "step": 8541 }, { "epoch": 0.18822544304703984, "grad_norm": 1.1886662244796753, "learning_rate": 2.8073338415282265e-05, "loss": 0.1563, "step": 8542 }, { "epoch": 0.188247478336556, "grad_norm": 0.9618158936500549, "learning_rate": 2.807281350268257e-05, "loss": 0.1241, "step": 8543 }, { "epoch": 0.18826951362607217, "grad_norm": 0.9495559334754944, "learning_rate": 2.8072288523496027e-05, "loss": 0.1194, "step": 8544 }, { "epoch": 0.1882915489155883, "grad_norm": 0.9726928472518921, "learning_rate": 2.8071763477725308e-05, "loss": 0.1137, "step": 8545 }, { "epoch": 0.18831358420510447, "grad_norm": 1.1721488237380981, "learning_rate": 2.8071238365373087e-05, "loss": 0.1632, "step": 8546 }, { "epoch": 0.18833561949462063, "grad_norm": 1.3728524446487427, "learning_rate": 2.8070713186442043e-05, "loss": 0.1152, "step": 8547 }, { "epoch": 0.1883576547841368, "grad_norm": 1.1761090755462646, "learning_rate": 2.8070187940934842e-05, "loss": 0.1039, "step": 8548 }, { "epoch": 0.18837969007365296, "grad_norm": 0.8552990555763245, "learning_rate": 2.8069662628854166e-05, "loss": 0.1355, "step": 8549 }, { "epoch": 0.18840172536316913, "grad_norm": 0.8693185448646545, "learning_rate": 2.8069137250202697e-05, "loss": 0.1219, "step": 8550 }, { "epoch": 0.18842376065268526, "grad_norm": 1.0932323932647705, "learning_rate": 2.8068611804983095e-05, "loss": 0.111, "step": 8551 }, { "epoch": 0.18844579594220143, "grad_norm": 1.4173732995986938, "learning_rate": 2.806808629319805e-05, "loss": 0.0878, "step": 8552 }, { "epoch": 0.1884678312317176, "grad_norm": 0.829919159412384, "learning_rate": 2.8067560714850233e-05, "loss": 0.1167, "step": 8553 }, { "epoch": 0.18848986652123376, "grad_norm": 0.8619887828826904, "learning_rate": 2.8067035069942322e-05, "loss": 0.091, "step": 8554 }, { "epoch": 0.18851190181074992, "grad_norm": 1.0408802032470703, "learning_rate": 2.8066509358476996e-05, "loss": 0.1003, "step": 8555 }, { "epoch": 0.18853393710026609, "grad_norm": 0.8288174271583557, "learning_rate": 2.806598358045693e-05, "loss": 0.0628, "step": 8556 }, { "epoch": 0.18855597238978222, "grad_norm": 0.8320563435554504, "learning_rate": 2.80654577358848e-05, "loss": 0.0934, "step": 8557 }, { "epoch": 0.1885780076792984, "grad_norm": 0.9483838677406311, "learning_rate": 2.806493182476329e-05, "loss": 0.1416, "step": 8558 }, { "epoch": 0.18860004296881455, "grad_norm": 0.8567589521408081, "learning_rate": 2.8064405847095074e-05, "loss": 0.098, "step": 8559 }, { "epoch": 0.18862207825833072, "grad_norm": 1.3758729696273804, "learning_rate": 2.8063879802882835e-05, "loss": 0.152, "step": 8560 }, { "epoch": 0.18864411354784688, "grad_norm": 0.8545607924461365, "learning_rate": 2.8063353692129254e-05, "loss": 0.121, "step": 8561 }, { "epoch": 0.18866614883736305, "grad_norm": 1.0109354257583618, "learning_rate": 2.8062827514837002e-05, "loss": 0.1214, "step": 8562 }, { "epoch": 0.1886881841268792, "grad_norm": 0.9388295412063599, "learning_rate": 2.8062301271008768e-05, "loss": 0.1197, "step": 8563 }, { "epoch": 0.18871021941639535, "grad_norm": 1.01569402217865, "learning_rate": 2.806177496064723e-05, "loss": 0.1158, "step": 8564 }, { "epoch": 0.1887322547059115, "grad_norm": 1.2122341394424438, "learning_rate": 2.8061248583755063e-05, "loss": 0.112, "step": 8565 }, { "epoch": 0.18875428999542768, "grad_norm": 0.6123252511024475, "learning_rate": 2.806072214033496e-05, "loss": 0.1111, "step": 8566 }, { "epoch": 0.18877632528494384, "grad_norm": 0.9504433870315552, "learning_rate": 2.806019563038959e-05, "loss": 0.1285, "step": 8567 }, { "epoch": 0.18879836057446, "grad_norm": 0.9866661429405212, "learning_rate": 2.805966905392164e-05, "loss": 0.1599, "step": 8568 }, { "epoch": 0.18882039586397617, "grad_norm": 0.9137100577354431, "learning_rate": 2.8059142410933794e-05, "loss": 0.1107, "step": 8569 }, { "epoch": 0.1888424311534923, "grad_norm": 0.7552719116210938, "learning_rate": 2.8058615701428733e-05, "loss": 0.1046, "step": 8570 }, { "epoch": 0.18886446644300847, "grad_norm": 1.02833092212677, "learning_rate": 2.805808892540914e-05, "loss": 0.1141, "step": 8571 }, { "epoch": 0.18888650173252464, "grad_norm": 0.9164617657661438, "learning_rate": 2.8057562082877698e-05, "loss": 0.1181, "step": 8572 }, { "epoch": 0.1889085370220408, "grad_norm": 0.964702844619751, "learning_rate": 2.8057035173837085e-05, "loss": 0.0902, "step": 8573 }, { "epoch": 0.18893057231155697, "grad_norm": 0.6008089780807495, "learning_rate": 2.805650819829e-05, "loss": 0.0907, "step": 8574 }, { "epoch": 0.18895260760107313, "grad_norm": 1.2647087574005127, "learning_rate": 2.805598115623911e-05, "loss": 0.1041, "step": 8575 }, { "epoch": 0.18897464289058927, "grad_norm": 0.6471463441848755, "learning_rate": 2.8055454047687108e-05, "loss": 0.123, "step": 8576 }, { "epoch": 0.18899667818010543, "grad_norm": 1.0428129434585571, "learning_rate": 2.8054926872636675e-05, "loss": 0.1263, "step": 8577 }, { "epoch": 0.1890187134696216, "grad_norm": 0.77830570936203, "learning_rate": 2.8054399631090504e-05, "loss": 0.1141, "step": 8578 }, { "epoch": 0.18904074875913776, "grad_norm": 1.1426750421524048, "learning_rate": 2.8053872323051274e-05, "loss": 0.0888, "step": 8579 }, { "epoch": 0.18906278404865393, "grad_norm": 0.7521190643310547, "learning_rate": 2.805334494852167e-05, "loss": 0.1507, "step": 8580 }, { "epoch": 0.1890848193381701, "grad_norm": 0.7629895806312561, "learning_rate": 2.8052817507504385e-05, "loss": 0.1003, "step": 8581 }, { "epoch": 0.18910685462768623, "grad_norm": 1.0794073343276978, "learning_rate": 2.8052290000002096e-05, "loss": 0.1261, "step": 8582 }, { "epoch": 0.1891288899172024, "grad_norm": 0.9125657677650452, "learning_rate": 2.8051762426017495e-05, "loss": 0.1327, "step": 8583 }, { "epoch": 0.18915092520671856, "grad_norm": 0.7562851905822754, "learning_rate": 2.8051234785553276e-05, "loss": 0.1144, "step": 8584 }, { "epoch": 0.18917296049623472, "grad_norm": 0.8543620705604553, "learning_rate": 2.805070707861211e-05, "loss": 0.0879, "step": 8585 }, { "epoch": 0.18919499578575089, "grad_norm": 0.7019694447517395, "learning_rate": 2.8050179305196698e-05, "loss": 0.1254, "step": 8586 }, { "epoch": 0.18921703107526705, "grad_norm": 0.7729482650756836, "learning_rate": 2.8049651465309726e-05, "loss": 0.0877, "step": 8587 }, { "epoch": 0.1892390663647832, "grad_norm": 0.7874580025672913, "learning_rate": 2.8049123558953883e-05, "loss": 0.1433, "step": 8588 }, { "epoch": 0.18926110165429935, "grad_norm": 0.6513700485229492, "learning_rate": 2.8048595586131855e-05, "loss": 0.0907, "step": 8589 }, { "epoch": 0.18928313694381552, "grad_norm": 0.8076662421226501, "learning_rate": 2.804806754684633e-05, "loss": 0.1445, "step": 8590 }, { "epoch": 0.18930517223333168, "grad_norm": 0.5007103085517883, "learning_rate": 2.8047539441100006e-05, "loss": 0.1274, "step": 8591 }, { "epoch": 0.18932720752284785, "grad_norm": 1.7604676485061646, "learning_rate": 2.804701126889556e-05, "loss": 0.1329, "step": 8592 }, { "epoch": 0.189349242812364, "grad_norm": 1.3084068298339844, "learning_rate": 2.804648303023569e-05, "loss": 0.1359, "step": 8593 }, { "epoch": 0.18937127810188015, "grad_norm": 1.141374945640564, "learning_rate": 2.8045954725123093e-05, "loss": 0.1148, "step": 8594 }, { "epoch": 0.1893933133913963, "grad_norm": 0.8791689872741699, "learning_rate": 2.8045426353560445e-05, "loss": 0.1426, "step": 8595 }, { "epoch": 0.18941534868091248, "grad_norm": 0.784315824508667, "learning_rate": 2.804489791555045e-05, "loss": 0.1137, "step": 8596 }, { "epoch": 0.18943738397042864, "grad_norm": 1.0922842025756836, "learning_rate": 2.804436941109579e-05, "loss": 0.1403, "step": 8597 }, { "epoch": 0.1894594192599448, "grad_norm": 0.8289377093315125, "learning_rate": 2.8043840840199164e-05, "loss": 0.1165, "step": 8598 }, { "epoch": 0.18948145454946097, "grad_norm": 0.7868360877037048, "learning_rate": 2.8043312202863265e-05, "loss": 0.1243, "step": 8599 }, { "epoch": 0.18950348983897713, "grad_norm": 1.1034235954284668, "learning_rate": 2.8042783499090775e-05, "loss": 0.1358, "step": 8600 }, { "epoch": 0.18952552512849327, "grad_norm": 1.3921209573745728, "learning_rate": 2.8042254728884402e-05, "loss": 0.1006, "step": 8601 }, { "epoch": 0.18954756041800944, "grad_norm": 1.2355750799179077, "learning_rate": 2.804172589224683e-05, "loss": 0.1031, "step": 8602 }, { "epoch": 0.1895695957075256, "grad_norm": 0.7402353286743164, "learning_rate": 2.804119698918075e-05, "loss": 0.1225, "step": 8603 }, { "epoch": 0.18959163099704177, "grad_norm": 0.7381260395050049, "learning_rate": 2.8040668019688867e-05, "loss": 0.0931, "step": 8604 }, { "epoch": 0.18961366628655793, "grad_norm": 0.828952968120575, "learning_rate": 2.8040138983773867e-05, "loss": 0.0942, "step": 8605 }, { "epoch": 0.1896357015760741, "grad_norm": 1.0356069803237915, "learning_rate": 2.8039609881438446e-05, "loss": 0.0882, "step": 8606 }, { "epoch": 0.18965773686559023, "grad_norm": 0.6856621503829956, "learning_rate": 2.8039080712685298e-05, "loss": 0.0869, "step": 8607 }, { "epoch": 0.1896797721551064, "grad_norm": 0.8473640084266663, "learning_rate": 2.803855147751712e-05, "loss": 0.152, "step": 8608 }, { "epoch": 0.18970180744462256, "grad_norm": 0.7373166680335999, "learning_rate": 2.803802217593661e-05, "loss": 0.0954, "step": 8609 }, { "epoch": 0.18972384273413873, "grad_norm": 1.0807149410247803, "learning_rate": 2.8037492807946458e-05, "loss": 0.1099, "step": 8610 }, { "epoch": 0.1897458780236549, "grad_norm": 1.2210736274719238, "learning_rate": 2.8036963373549365e-05, "loss": 0.1002, "step": 8611 }, { "epoch": 0.18976791331317105, "grad_norm": 0.582604706287384, "learning_rate": 2.8036433872748026e-05, "loss": 0.0872, "step": 8612 }, { "epoch": 0.1897899486026872, "grad_norm": 0.6093863248825073, "learning_rate": 2.803590430554514e-05, "loss": 0.0799, "step": 8613 }, { "epoch": 0.18981198389220336, "grad_norm": 0.8487274646759033, "learning_rate": 2.8035374671943405e-05, "loss": 0.0814, "step": 8614 }, { "epoch": 0.18983401918171952, "grad_norm": 1.2465612888336182, "learning_rate": 2.8034844971945514e-05, "loss": 0.1714, "step": 8615 }, { "epoch": 0.18985605447123569, "grad_norm": 1.0503569841384888, "learning_rate": 2.803431520555417e-05, "loss": 0.1189, "step": 8616 }, { "epoch": 0.18987808976075185, "grad_norm": 1.1981736421585083, "learning_rate": 2.8033785372772063e-05, "loss": 0.1671, "step": 8617 }, { "epoch": 0.18990012505026801, "grad_norm": 1.026045322418213, "learning_rate": 2.8033255473601903e-05, "loss": 0.1329, "step": 8618 }, { "epoch": 0.18992216033978415, "grad_norm": 0.6804265975952148, "learning_rate": 2.8032725508046383e-05, "loss": 0.101, "step": 8619 }, { "epoch": 0.18994419562930032, "grad_norm": 1.0339552164077759, "learning_rate": 2.8032195476108206e-05, "loss": 0.1153, "step": 8620 }, { "epoch": 0.18996623091881648, "grad_norm": 0.9691339135169983, "learning_rate": 2.8031665377790063e-05, "loss": 0.0983, "step": 8621 }, { "epoch": 0.18998826620833265, "grad_norm": 1.2156418561935425, "learning_rate": 2.8031135213094664e-05, "loss": 0.1372, "step": 8622 }, { "epoch": 0.1900103014978488, "grad_norm": 0.8066543340682983, "learning_rate": 2.8030604982024703e-05, "loss": 0.1124, "step": 8623 }, { "epoch": 0.19003233678736497, "grad_norm": 0.552056074142456, "learning_rate": 2.803007468458289e-05, "loss": 0.079, "step": 8624 }, { "epoch": 0.1900543720768811, "grad_norm": 0.8703342080116272, "learning_rate": 2.802954432077191e-05, "loss": 0.1323, "step": 8625 }, { "epoch": 0.19007640736639728, "grad_norm": 1.188797950744629, "learning_rate": 2.802901389059448e-05, "loss": 0.0928, "step": 8626 }, { "epoch": 0.19009844265591344, "grad_norm": 0.9745607376098633, "learning_rate": 2.802848339405329e-05, "loss": 0.1025, "step": 8627 }, { "epoch": 0.1901204779454296, "grad_norm": 1.3676947355270386, "learning_rate": 2.8027952831151054e-05, "loss": 0.1395, "step": 8628 }, { "epoch": 0.19014251323494577, "grad_norm": 0.8934836387634277, "learning_rate": 2.8027422201890468e-05, "loss": 0.1495, "step": 8629 }, { "epoch": 0.19016454852446193, "grad_norm": 0.5325299501419067, "learning_rate": 2.8026891506274228e-05, "loss": 0.1578, "step": 8630 }, { "epoch": 0.1901865838139781, "grad_norm": 0.8510494828224182, "learning_rate": 2.802636074430505e-05, "loss": 0.0722, "step": 8631 }, { "epoch": 0.19020861910349424, "grad_norm": 0.8148751854896545, "learning_rate": 2.8025829915985632e-05, "loss": 0.1497, "step": 8632 }, { "epoch": 0.1902306543930104, "grad_norm": 0.7377264499664307, "learning_rate": 2.8025299021318676e-05, "loss": 0.0968, "step": 8633 }, { "epoch": 0.19025268968252657, "grad_norm": 1.1245026588439941, "learning_rate": 2.8024768060306885e-05, "loss": 0.0808, "step": 8634 }, { "epoch": 0.19027472497204273, "grad_norm": 0.9375279545783997, "learning_rate": 2.8024237032952963e-05, "loss": 0.0963, "step": 8635 }, { "epoch": 0.1902967602615589, "grad_norm": 0.902955949306488, "learning_rate": 2.8023705939259624e-05, "loss": 0.1019, "step": 8636 }, { "epoch": 0.19031879555107506, "grad_norm": 0.789815366268158, "learning_rate": 2.8023174779229564e-05, "loss": 0.1172, "step": 8637 }, { "epoch": 0.1903408308405912, "grad_norm": 1.079276442527771, "learning_rate": 2.8022643552865492e-05, "loss": 0.1482, "step": 8638 }, { "epoch": 0.19036286613010736, "grad_norm": 0.972590982913971, "learning_rate": 2.802211226017011e-05, "loss": 0.0775, "step": 8639 }, { "epoch": 0.19038490141962353, "grad_norm": 0.8901461362838745, "learning_rate": 2.8021580901146133e-05, "loss": 0.1556, "step": 8640 }, { "epoch": 0.1904069367091397, "grad_norm": 0.6133711934089661, "learning_rate": 2.8021049475796257e-05, "loss": 0.115, "step": 8641 }, { "epoch": 0.19042897199865585, "grad_norm": 1.2704702615737915, "learning_rate": 2.80205179841232e-05, "loss": 0.1323, "step": 8642 }, { "epoch": 0.19045100728817202, "grad_norm": 0.8153851628303528, "learning_rate": 2.801998642612966e-05, "loss": 0.0857, "step": 8643 }, { "epoch": 0.19047304257768816, "grad_norm": 1.4204822778701782, "learning_rate": 2.8019454801818345e-05, "loss": 0.097, "step": 8644 }, { "epoch": 0.19049507786720432, "grad_norm": 1.747043490409851, "learning_rate": 2.8018923111191966e-05, "loss": 0.1717, "step": 8645 }, { "epoch": 0.19051711315672049, "grad_norm": 0.943139374256134, "learning_rate": 2.801839135425323e-05, "loss": 0.1601, "step": 8646 }, { "epoch": 0.19053914844623665, "grad_norm": 0.6887418031692505, "learning_rate": 2.8017859531004842e-05, "loss": 0.1032, "step": 8647 }, { "epoch": 0.19056118373575281, "grad_norm": 1.0347462892532349, "learning_rate": 2.8017327641449517e-05, "loss": 0.1226, "step": 8648 }, { "epoch": 0.19058321902526898, "grad_norm": 0.9031670093536377, "learning_rate": 2.8016795685589964e-05, "loss": 0.104, "step": 8649 }, { "epoch": 0.19060525431478512, "grad_norm": 0.8049482107162476, "learning_rate": 2.801626366342889e-05, "loss": 0.0822, "step": 8650 }, { "epoch": 0.19062728960430128, "grad_norm": 0.8566566109657288, "learning_rate": 2.8015731574969005e-05, "loss": 0.1214, "step": 8651 }, { "epoch": 0.19064932489381745, "grad_norm": 1.247414231300354, "learning_rate": 2.801519942021302e-05, "loss": 0.1061, "step": 8652 }, { "epoch": 0.1906713601833336, "grad_norm": 0.7028439044952393, "learning_rate": 2.8014667199163644e-05, "loss": 0.1217, "step": 8653 }, { "epoch": 0.19069339547284977, "grad_norm": 0.7078353762626648, "learning_rate": 2.801413491182359e-05, "loss": 0.1566, "step": 8654 }, { "epoch": 0.19071543076236594, "grad_norm": 0.9349830746650696, "learning_rate": 2.801360255819557e-05, "loss": 0.1058, "step": 8655 }, { "epoch": 0.19073746605188208, "grad_norm": 1.0142325162887573, "learning_rate": 2.801307013828229e-05, "loss": 0.1039, "step": 8656 }, { "epoch": 0.19075950134139824, "grad_norm": 1.9618048667907715, "learning_rate": 2.801253765208647e-05, "loss": 0.0836, "step": 8657 }, { "epoch": 0.1907815366309144, "grad_norm": 1.0777353048324585, "learning_rate": 2.8012005099610813e-05, "loss": 0.0908, "step": 8658 }, { "epoch": 0.19080357192043057, "grad_norm": 1.1623899936676025, "learning_rate": 2.8011472480858037e-05, "loss": 0.1269, "step": 8659 }, { "epoch": 0.19082560720994673, "grad_norm": 1.248423457145691, "learning_rate": 2.8010939795830856e-05, "loss": 0.1367, "step": 8660 }, { "epoch": 0.1908476424994629, "grad_norm": 1.755171775817871, "learning_rate": 2.801040704453198e-05, "loss": 0.133, "step": 8661 }, { "epoch": 0.19086967778897904, "grad_norm": 0.7213233709335327, "learning_rate": 2.8009874226964128e-05, "loss": 0.1147, "step": 8662 }, { "epoch": 0.1908917130784952, "grad_norm": 0.7943090796470642, "learning_rate": 2.800934134313001e-05, "loss": 0.1287, "step": 8663 }, { "epoch": 0.19091374836801137, "grad_norm": 1.154269814491272, "learning_rate": 2.8008808393032334e-05, "loss": 0.0789, "step": 8664 }, { "epoch": 0.19093578365752753, "grad_norm": 0.8735660910606384, "learning_rate": 2.8008275376673823e-05, "loss": 0.101, "step": 8665 }, { "epoch": 0.1909578189470437, "grad_norm": 0.8028945922851562, "learning_rate": 2.8007742294057196e-05, "loss": 0.0926, "step": 8666 }, { "epoch": 0.19097985423655986, "grad_norm": 1.0123094320297241, "learning_rate": 2.8007209145185157e-05, "loss": 0.1423, "step": 8667 }, { "epoch": 0.19100188952607602, "grad_norm": 0.777423620223999, "learning_rate": 2.8006675930060427e-05, "loss": 0.091, "step": 8668 }, { "epoch": 0.19102392481559216, "grad_norm": 1.256392478942871, "learning_rate": 2.8006142648685723e-05, "loss": 0.1506, "step": 8669 }, { "epoch": 0.19104596010510833, "grad_norm": 0.8956641554832458, "learning_rate": 2.800560930106376e-05, "loss": 0.146, "step": 8670 }, { "epoch": 0.1910679953946245, "grad_norm": 0.6482170820236206, "learning_rate": 2.8005075887197253e-05, "loss": 0.0917, "step": 8671 }, { "epoch": 0.19109003068414065, "grad_norm": 0.7276037931442261, "learning_rate": 2.8004542407088924e-05, "loss": 0.1149, "step": 8672 }, { "epoch": 0.19111206597365682, "grad_norm": 0.5657858848571777, "learning_rate": 2.8004008860741484e-05, "loss": 0.0814, "step": 8673 }, { "epoch": 0.19113410126317298, "grad_norm": 0.8750911951065063, "learning_rate": 2.8003475248157654e-05, "loss": 0.1016, "step": 8674 }, { "epoch": 0.19115613655268912, "grad_norm": 0.95833820104599, "learning_rate": 2.800294156934015e-05, "loss": 0.1232, "step": 8675 }, { "epoch": 0.19117817184220529, "grad_norm": 0.7282090783119202, "learning_rate": 2.80024078242917e-05, "loss": 0.1217, "step": 8676 }, { "epoch": 0.19120020713172145, "grad_norm": 0.7130253911018372, "learning_rate": 2.8001874013015003e-05, "loss": 0.0856, "step": 8677 }, { "epoch": 0.19122224242123761, "grad_norm": 1.0609261989593506, "learning_rate": 2.8001340135512795e-05, "loss": 0.1363, "step": 8678 }, { "epoch": 0.19124427771075378, "grad_norm": 1.1359162330627441, "learning_rate": 2.800080619178779e-05, "loss": 0.1555, "step": 8679 }, { "epoch": 0.19126631300026994, "grad_norm": 0.8058481216430664, "learning_rate": 2.8000272181842707e-05, "loss": 0.1363, "step": 8680 }, { "epoch": 0.19128834828978608, "grad_norm": 1.1200975179672241, "learning_rate": 2.7999738105680272e-05, "loss": 0.1411, "step": 8681 }, { "epoch": 0.19131038357930225, "grad_norm": 1.1613755226135254, "learning_rate": 2.799920396330319e-05, "loss": 0.0759, "step": 8682 }, { "epoch": 0.1913324188688184, "grad_norm": 0.8804620504379272, "learning_rate": 2.7998669754714198e-05, "loss": 0.1145, "step": 8683 }, { "epoch": 0.19135445415833457, "grad_norm": 1.2194972038269043, "learning_rate": 2.7998135479916008e-05, "loss": 0.13, "step": 8684 }, { "epoch": 0.19137648944785074, "grad_norm": 1.0520576238632202, "learning_rate": 2.7997601138911344e-05, "loss": 0.1556, "step": 8685 }, { "epoch": 0.1913985247373669, "grad_norm": 0.6957688927650452, "learning_rate": 2.7997066731702924e-05, "loss": 0.1002, "step": 8686 }, { "epoch": 0.19142056002688304, "grad_norm": 1.1687697172164917, "learning_rate": 2.799653225829348e-05, "loss": 0.1221, "step": 8687 }, { "epoch": 0.1914425953163992, "grad_norm": 0.6123858690261841, "learning_rate": 2.7995997718685724e-05, "loss": 0.1209, "step": 8688 }, { "epoch": 0.19146463060591537, "grad_norm": 1.3388911485671997, "learning_rate": 2.7995463112882385e-05, "loss": 0.1417, "step": 8689 }, { "epoch": 0.19148666589543153, "grad_norm": 1.081186056137085, "learning_rate": 2.7994928440886184e-05, "loss": 0.1102, "step": 8690 }, { "epoch": 0.1915087011849477, "grad_norm": 1.0751821994781494, "learning_rate": 2.7994393702699843e-05, "loss": 0.0819, "step": 8691 }, { "epoch": 0.19153073647446386, "grad_norm": 1.265729308128357, "learning_rate": 2.7993858898326083e-05, "loss": 0.098, "step": 8692 }, { "epoch": 0.19155277176398, "grad_norm": 1.09869384765625, "learning_rate": 2.7993324027767638e-05, "loss": 0.1319, "step": 8693 }, { "epoch": 0.19157480705349617, "grad_norm": 1.5278294086456299, "learning_rate": 2.7992789091027223e-05, "loss": 0.1197, "step": 8694 }, { "epoch": 0.19159684234301233, "grad_norm": 0.9857757091522217, "learning_rate": 2.799225408810756e-05, "loss": 0.1097, "step": 8695 }, { "epoch": 0.1916188776325285, "grad_norm": 1.419291615486145, "learning_rate": 2.7991719019011394e-05, "loss": 0.1348, "step": 8696 }, { "epoch": 0.19164091292204466, "grad_norm": 0.8509754538536072, "learning_rate": 2.7991183883741425e-05, "loss": 0.0866, "step": 8697 }, { "epoch": 0.19166294821156082, "grad_norm": 1.0930113792419434, "learning_rate": 2.7990648682300395e-05, "loss": 0.1515, "step": 8698 }, { "epoch": 0.19168498350107696, "grad_norm": 0.8503203988075256, "learning_rate": 2.799011341469102e-05, "loss": 0.1269, "step": 8699 }, { "epoch": 0.19170701879059313, "grad_norm": 0.9994200468063354, "learning_rate": 2.7989578080916038e-05, "loss": 0.1363, "step": 8700 }, { "epoch": 0.1917290540801093, "grad_norm": 0.7553423047065735, "learning_rate": 2.7989042680978166e-05, "loss": 0.0668, "step": 8701 }, { "epoch": 0.19175108936962545, "grad_norm": 1.1007698774337769, "learning_rate": 2.7988507214880133e-05, "loss": 0.1245, "step": 8702 }, { "epoch": 0.19177312465914162, "grad_norm": 0.8118755221366882, "learning_rate": 2.798797168262467e-05, "loss": 0.1064, "step": 8703 }, { "epoch": 0.19179515994865778, "grad_norm": 0.921332836151123, "learning_rate": 2.79874360842145e-05, "loss": 0.1231, "step": 8704 }, { "epoch": 0.19181719523817395, "grad_norm": 0.6406541466712952, "learning_rate": 2.798690041965236e-05, "loss": 0.0847, "step": 8705 }, { "epoch": 0.19183923052769009, "grad_norm": 1.0638983249664307, "learning_rate": 2.7986364688940968e-05, "loss": 0.105, "step": 8706 }, { "epoch": 0.19186126581720625, "grad_norm": 1.2182002067565918, "learning_rate": 2.7985828892083054e-05, "loss": 0.1249, "step": 8707 }, { "epoch": 0.19188330110672241, "grad_norm": 0.8440285325050354, "learning_rate": 2.7985293029081354e-05, "loss": 0.0962, "step": 8708 }, { "epoch": 0.19190533639623858, "grad_norm": 1.2012500762939453, "learning_rate": 2.798475709993859e-05, "loss": 0.1186, "step": 8709 }, { "epoch": 0.19192737168575474, "grad_norm": 1.273736834526062, "learning_rate": 2.79842211046575e-05, "loss": 0.0863, "step": 8710 }, { "epoch": 0.1919494069752709, "grad_norm": 0.9277943968772888, "learning_rate": 2.79836850432408e-05, "loss": 0.0866, "step": 8711 }, { "epoch": 0.19197144226478705, "grad_norm": 1.1978952884674072, "learning_rate": 2.798314891569124e-05, "loss": 0.1221, "step": 8712 }, { "epoch": 0.1919934775543032, "grad_norm": 0.9312525391578674, "learning_rate": 2.7982612722011536e-05, "loss": 0.1208, "step": 8713 }, { "epoch": 0.19201551284381937, "grad_norm": 0.5860686898231506, "learning_rate": 2.7982076462204424e-05, "loss": 0.0768, "step": 8714 }, { "epoch": 0.19203754813333554, "grad_norm": 0.6315475702285767, "learning_rate": 2.798154013627263e-05, "loss": 0.109, "step": 8715 }, { "epoch": 0.1920595834228517, "grad_norm": 0.9392101764678955, "learning_rate": 2.79810037442189e-05, "loss": 0.0916, "step": 8716 }, { "epoch": 0.19208161871236787, "grad_norm": 0.8025791049003601, "learning_rate": 2.798046728604595e-05, "loss": 0.1101, "step": 8717 }, { "epoch": 0.192103654001884, "grad_norm": 0.8546632528305054, "learning_rate": 2.7979930761756525e-05, "loss": 0.1183, "step": 8718 }, { "epoch": 0.19212568929140017, "grad_norm": 1.214829444885254, "learning_rate": 2.7979394171353348e-05, "loss": 0.1169, "step": 8719 }, { "epoch": 0.19214772458091633, "grad_norm": 1.0879011154174805, "learning_rate": 2.7978857514839158e-05, "loss": 0.1491, "step": 8720 }, { "epoch": 0.1921697598704325, "grad_norm": 1.2428314685821533, "learning_rate": 2.7978320792216686e-05, "loss": 0.1148, "step": 8721 }, { "epoch": 0.19219179515994866, "grad_norm": 0.796454131603241, "learning_rate": 2.7977784003488666e-05, "loss": 0.1254, "step": 8722 }, { "epoch": 0.19221383044946483, "grad_norm": 1.2125455141067505, "learning_rate": 2.7977247148657835e-05, "loss": 0.1132, "step": 8723 }, { "epoch": 0.19223586573898097, "grad_norm": 1.0147221088409424, "learning_rate": 2.7976710227726925e-05, "loss": 0.1349, "step": 8724 }, { "epoch": 0.19225790102849713, "grad_norm": 1.195992112159729, "learning_rate": 2.797617324069867e-05, "loss": 0.1445, "step": 8725 }, { "epoch": 0.1922799363180133, "grad_norm": 0.8335642218589783, "learning_rate": 2.7975636187575804e-05, "loss": 0.141, "step": 8726 }, { "epoch": 0.19230197160752946, "grad_norm": 1.4616613388061523, "learning_rate": 2.7975099068361068e-05, "loss": 0.1284, "step": 8727 }, { "epoch": 0.19232400689704562, "grad_norm": 1.138609528541565, "learning_rate": 2.7974561883057196e-05, "loss": 0.1737, "step": 8728 }, { "epoch": 0.1923460421865618, "grad_norm": 0.8576905727386475, "learning_rate": 2.7974024631666922e-05, "loss": 0.1342, "step": 8729 }, { "epoch": 0.19236807747607793, "grad_norm": 0.8293136358261108, "learning_rate": 2.7973487314192984e-05, "loss": 0.1411, "step": 8730 }, { "epoch": 0.1923901127655941, "grad_norm": 1.2370176315307617, "learning_rate": 2.7972949930638113e-05, "loss": 0.1184, "step": 8731 }, { "epoch": 0.19241214805511025, "grad_norm": 0.8376564979553223, "learning_rate": 2.7972412481005057e-05, "loss": 0.0734, "step": 8732 }, { "epoch": 0.19243418334462642, "grad_norm": 0.944502055644989, "learning_rate": 2.7971874965296546e-05, "loss": 0.1267, "step": 8733 }, { "epoch": 0.19245621863414258, "grad_norm": 0.9703637361526489, "learning_rate": 2.7971337383515318e-05, "loss": 0.1039, "step": 8734 }, { "epoch": 0.19247825392365875, "grad_norm": 0.7583222985267639, "learning_rate": 2.7970799735664114e-05, "loss": 0.116, "step": 8735 }, { "epoch": 0.1925002892131749, "grad_norm": 0.8721965551376343, "learning_rate": 2.7970262021745672e-05, "loss": 0.099, "step": 8736 }, { "epoch": 0.19252232450269105, "grad_norm": 1.1209570169448853, "learning_rate": 2.796972424176273e-05, "loss": 0.1307, "step": 8737 }, { "epoch": 0.19254435979220721, "grad_norm": 1.2407052516937256, "learning_rate": 2.7969186395718026e-05, "loss": 0.1241, "step": 8738 }, { "epoch": 0.19256639508172338, "grad_norm": 1.4518595933914185, "learning_rate": 2.7968648483614305e-05, "loss": 0.1252, "step": 8739 }, { "epoch": 0.19258843037123954, "grad_norm": 0.8191936016082764, "learning_rate": 2.79681105054543e-05, "loss": 0.112, "step": 8740 }, { "epoch": 0.1926104656607557, "grad_norm": 1.0868676900863647, "learning_rate": 2.7967572461240756e-05, "loss": 0.1162, "step": 8741 }, { "epoch": 0.19263250095027187, "grad_norm": 0.6915767192840576, "learning_rate": 2.7967034350976407e-05, "loss": 0.1262, "step": 8742 }, { "epoch": 0.192654536239788, "grad_norm": 0.9381698966026306, "learning_rate": 2.7966496174664004e-05, "loss": 0.1107, "step": 8743 }, { "epoch": 0.19267657152930417, "grad_norm": 1.0717904567718506, "learning_rate": 2.796595793230628e-05, "loss": 0.1442, "step": 8744 }, { "epoch": 0.19269860681882034, "grad_norm": 0.8471382856369019, "learning_rate": 2.796541962390598e-05, "loss": 0.0812, "step": 8745 }, { "epoch": 0.1927206421083365, "grad_norm": 0.8209478259086609, "learning_rate": 2.7964881249465845e-05, "loss": 0.1058, "step": 8746 }, { "epoch": 0.19274267739785267, "grad_norm": 1.605911135673523, "learning_rate": 2.7964342808988622e-05, "loss": 0.0874, "step": 8747 }, { "epoch": 0.19276471268736883, "grad_norm": 0.7464463114738464, "learning_rate": 2.7963804302477043e-05, "loss": 0.1199, "step": 8748 }, { "epoch": 0.19278674797688497, "grad_norm": 0.8594237565994263, "learning_rate": 2.796326572993386e-05, "loss": 0.106, "step": 8749 }, { "epoch": 0.19280878326640113, "grad_norm": 0.920173704624176, "learning_rate": 2.796272709136181e-05, "loss": 0.1111, "step": 8750 }, { "epoch": 0.1928308185559173, "grad_norm": 1.2496473789215088, "learning_rate": 2.796218838676364e-05, "loss": 0.1258, "step": 8751 }, { "epoch": 0.19285285384543346, "grad_norm": 1.1431496143341064, "learning_rate": 2.7961649616142097e-05, "loss": 0.1413, "step": 8752 }, { "epoch": 0.19287488913494963, "grad_norm": 1.1055151224136353, "learning_rate": 2.796111077949992e-05, "loss": 0.1318, "step": 8753 }, { "epoch": 0.1928969244244658, "grad_norm": 0.8243435621261597, "learning_rate": 2.7960571876839856e-05, "loss": 0.085, "step": 8754 }, { "epoch": 0.19291895971398193, "grad_norm": 0.9334582090377808, "learning_rate": 2.796003290816465e-05, "loss": 0.1411, "step": 8755 }, { "epoch": 0.1929409950034981, "grad_norm": 0.5352087020874023, "learning_rate": 2.7959493873477048e-05, "loss": 0.0759, "step": 8756 }, { "epoch": 0.19296303029301426, "grad_norm": 1.1622705459594727, "learning_rate": 2.795895477277979e-05, "loss": 0.1307, "step": 8757 }, { "epoch": 0.19298506558253042, "grad_norm": 0.8580156564712524, "learning_rate": 2.7958415606075626e-05, "loss": 0.0964, "step": 8758 }, { "epoch": 0.1930071008720466, "grad_norm": 1.3491369485855103, "learning_rate": 2.795787637336731e-05, "loss": 0.1353, "step": 8759 }, { "epoch": 0.19302913616156275, "grad_norm": 0.6926379203796387, "learning_rate": 2.795733707465757e-05, "loss": 0.1928, "step": 8760 }, { "epoch": 0.1930511714510789, "grad_norm": 0.9532139301300049, "learning_rate": 2.7956797709949177e-05, "loss": 0.137, "step": 8761 }, { "epoch": 0.19307320674059505, "grad_norm": 0.7032883763313293, "learning_rate": 2.795625827924486e-05, "loss": 0.1005, "step": 8762 }, { "epoch": 0.19309524203011122, "grad_norm": 1.034295916557312, "learning_rate": 2.7955718782547363e-05, "loss": 0.1145, "step": 8763 }, { "epoch": 0.19311727731962738, "grad_norm": 1.181363821029663, "learning_rate": 2.7955179219859452e-05, "loss": 0.1142, "step": 8764 }, { "epoch": 0.19313931260914355, "grad_norm": 0.6939429640769958, "learning_rate": 2.7954639591183866e-05, "loss": 0.0742, "step": 8765 }, { "epoch": 0.1931613478986597, "grad_norm": 0.9533124566078186, "learning_rate": 2.7954099896523352e-05, "loss": 0.1279, "step": 8766 }, { "epoch": 0.19318338318817585, "grad_norm": 0.950987696647644, "learning_rate": 2.795356013588066e-05, "loss": 0.162, "step": 8767 }, { "epoch": 0.19320541847769201, "grad_norm": 0.8344695568084717, "learning_rate": 2.7953020309258542e-05, "loss": 0.103, "step": 8768 }, { "epoch": 0.19322745376720818, "grad_norm": 1.0488911867141724, "learning_rate": 2.795248041665974e-05, "loss": 0.1039, "step": 8769 }, { "epoch": 0.19324948905672434, "grad_norm": 1.0397473573684692, "learning_rate": 2.7951940458087017e-05, "loss": 0.109, "step": 8770 }, { "epoch": 0.1932715243462405, "grad_norm": 0.7909061312675476, "learning_rate": 2.795140043354311e-05, "loss": 0.1087, "step": 8771 }, { "epoch": 0.19329355963575667, "grad_norm": 1.0099762678146362, "learning_rate": 2.7950860343030777e-05, "loss": 0.1187, "step": 8772 }, { "epoch": 0.19331559492527284, "grad_norm": 0.9062734842300415, "learning_rate": 2.7950320186552773e-05, "loss": 0.1454, "step": 8773 }, { "epoch": 0.19333763021478897, "grad_norm": 0.9398316144943237, "learning_rate": 2.7949779964111837e-05, "loss": 0.115, "step": 8774 }, { "epoch": 0.19335966550430514, "grad_norm": 1.1350300312042236, "learning_rate": 2.7949239675710732e-05, "loss": 0.1286, "step": 8775 }, { "epoch": 0.1933817007938213, "grad_norm": 0.769003689289093, "learning_rate": 2.79486993213522e-05, "loss": 0.1059, "step": 8776 }, { "epoch": 0.19340373608333747, "grad_norm": 1.1560455560684204, "learning_rate": 2.7948158901039005e-05, "loss": 0.1191, "step": 8777 }, { "epoch": 0.19342577137285363, "grad_norm": 0.8673094511032104, "learning_rate": 2.794761841477389e-05, "loss": 0.1099, "step": 8778 }, { "epoch": 0.1934478066623698, "grad_norm": 0.9131283164024353, "learning_rate": 2.794707786255961e-05, "loss": 0.1074, "step": 8779 }, { "epoch": 0.19346984195188593, "grad_norm": 0.6354082226753235, "learning_rate": 2.794653724439892e-05, "loss": 0.1328, "step": 8780 }, { "epoch": 0.1934918772414021, "grad_norm": 1.9801723957061768, "learning_rate": 2.7945996560294573e-05, "loss": 0.1011, "step": 8781 }, { "epoch": 0.19351391253091826, "grad_norm": 0.8709987998008728, "learning_rate": 2.7945455810249324e-05, "loss": 0.0826, "step": 8782 }, { "epoch": 0.19353594782043443, "grad_norm": 1.0525392293930054, "learning_rate": 2.7944914994265928e-05, "loss": 0.1115, "step": 8783 }, { "epoch": 0.1935579831099506, "grad_norm": 1.1680554151535034, "learning_rate": 2.7944374112347142e-05, "loss": 0.1136, "step": 8784 }, { "epoch": 0.19358001839946676, "grad_norm": 1.0764527320861816, "learning_rate": 2.794383316449571e-05, "loss": 0.1035, "step": 8785 }, { "epoch": 0.1936020536889829, "grad_norm": 1.323763370513916, "learning_rate": 2.79432921507144e-05, "loss": 0.1077, "step": 8786 }, { "epoch": 0.19362408897849906, "grad_norm": 1.1706616878509521, "learning_rate": 2.794275107100596e-05, "loss": 0.1449, "step": 8787 }, { "epoch": 0.19364612426801522, "grad_norm": 1.3221819400787354, "learning_rate": 2.794220992537315e-05, "loss": 0.1233, "step": 8788 }, { "epoch": 0.1936681595575314, "grad_norm": 1.0231366157531738, "learning_rate": 2.794166871381872e-05, "loss": 0.0866, "step": 8789 }, { "epoch": 0.19369019484704755, "grad_norm": 0.9455653429031372, "learning_rate": 2.7941127436345438e-05, "loss": 0.1129, "step": 8790 }, { "epoch": 0.19371223013656372, "grad_norm": 0.6767756342887878, "learning_rate": 2.794058609295605e-05, "loss": 0.1224, "step": 8791 }, { "epoch": 0.19373426542607985, "grad_norm": 0.8490936160087585, "learning_rate": 2.794004468365332e-05, "loss": 0.0999, "step": 8792 }, { "epoch": 0.19375630071559602, "grad_norm": 1.1545474529266357, "learning_rate": 2.793950320844e-05, "loss": 0.1204, "step": 8793 }, { "epoch": 0.19377833600511218, "grad_norm": 1.1886714696884155, "learning_rate": 2.7938961667318852e-05, "loss": 0.11, "step": 8794 }, { "epoch": 0.19380037129462835, "grad_norm": 0.7820653319358826, "learning_rate": 2.7938420060292637e-05, "loss": 0.1348, "step": 8795 }, { "epoch": 0.1938224065841445, "grad_norm": 1.065973162651062, "learning_rate": 2.793787838736411e-05, "loss": 0.1321, "step": 8796 }, { "epoch": 0.19384444187366068, "grad_norm": 1.0272036790847778, "learning_rate": 2.793733664853603e-05, "loss": 0.1396, "step": 8797 }, { "epoch": 0.19386647716317681, "grad_norm": 0.6944327354431152, "learning_rate": 2.7936794843811156e-05, "loss": 0.1359, "step": 8798 }, { "epoch": 0.19388851245269298, "grad_norm": 0.6351340413093567, "learning_rate": 2.7936252973192248e-05, "loss": 0.1127, "step": 8799 }, { "epoch": 0.19391054774220914, "grad_norm": 1.808314561843872, "learning_rate": 2.7935711036682067e-05, "loss": 0.1787, "step": 8800 }, { "epoch": 0.1939325830317253, "grad_norm": 0.8179317116737366, "learning_rate": 2.7935169034283372e-05, "loss": 0.1296, "step": 8801 }, { "epoch": 0.19395461832124147, "grad_norm": 0.7837725281715393, "learning_rate": 2.7934626965998928e-05, "loss": 0.0857, "step": 8802 }, { "epoch": 0.19397665361075764, "grad_norm": 0.6172361373901367, "learning_rate": 2.793408483183149e-05, "loss": 0.0871, "step": 8803 }, { "epoch": 0.19399868890027377, "grad_norm": 1.0476510524749756, "learning_rate": 2.7933542631783824e-05, "loss": 0.1493, "step": 8804 }, { "epoch": 0.19402072418978994, "grad_norm": 1.253740906715393, "learning_rate": 2.7933000365858692e-05, "loss": 0.1153, "step": 8805 }, { "epoch": 0.1940427594793061, "grad_norm": 0.9248711466789246, "learning_rate": 2.7932458034058848e-05, "loss": 0.1126, "step": 8806 }, { "epoch": 0.19406479476882227, "grad_norm": 1.0169960260391235, "learning_rate": 2.793191563638706e-05, "loss": 0.1069, "step": 8807 }, { "epoch": 0.19408683005833843, "grad_norm": 0.7366311550140381, "learning_rate": 2.79313731728461e-05, "loss": 0.0785, "step": 8808 }, { "epoch": 0.1941088653478546, "grad_norm": 1.0126616954803467, "learning_rate": 2.7930830643438715e-05, "loss": 0.0975, "step": 8809 }, { "epoch": 0.19413090063737076, "grad_norm": 0.9481325745582581, "learning_rate": 2.7930288048167677e-05, "loss": 0.1115, "step": 8810 }, { "epoch": 0.1941529359268869, "grad_norm": 1.1056594848632812, "learning_rate": 2.7929745387035748e-05, "loss": 0.1007, "step": 8811 }, { "epoch": 0.19417497121640306, "grad_norm": 0.7261096239089966, "learning_rate": 2.7929202660045693e-05, "loss": 0.1495, "step": 8812 }, { "epoch": 0.19419700650591923, "grad_norm": 1.5520548820495605, "learning_rate": 2.7928659867200273e-05, "loss": 0.1215, "step": 8813 }, { "epoch": 0.1942190417954354, "grad_norm": 1.0418148040771484, "learning_rate": 2.7928117008502257e-05, "loss": 0.1189, "step": 8814 }, { "epoch": 0.19424107708495156, "grad_norm": 1.0004836320877075, "learning_rate": 2.792757408395441e-05, "loss": 0.105, "step": 8815 }, { "epoch": 0.19426311237446772, "grad_norm": 0.8396268486976624, "learning_rate": 2.7927031093559495e-05, "loss": 0.1212, "step": 8816 }, { "epoch": 0.19428514766398386, "grad_norm": 0.7863916754722595, "learning_rate": 2.792648803732028e-05, "loss": 0.1209, "step": 8817 }, { "epoch": 0.19430718295350002, "grad_norm": 1.1406382322311401, "learning_rate": 2.7925944915239527e-05, "loss": 0.1399, "step": 8818 }, { "epoch": 0.1943292182430162, "grad_norm": 1.048775553703308, "learning_rate": 2.7925401727320007e-05, "loss": 0.078, "step": 8819 }, { "epoch": 0.19435125353253235, "grad_norm": 0.8374823331832886, "learning_rate": 2.7924858473564485e-05, "loss": 0.1205, "step": 8820 }, { "epoch": 0.19437328882204852, "grad_norm": 0.5653469562530518, "learning_rate": 2.7924315153975727e-05, "loss": 0.0889, "step": 8821 }, { "epoch": 0.19439532411156468, "grad_norm": 0.8314390778541565, "learning_rate": 2.79237717685565e-05, "loss": 0.159, "step": 8822 }, { "epoch": 0.19441735940108082, "grad_norm": 0.7580898404121399, "learning_rate": 2.7923228317309574e-05, "loss": 0.1179, "step": 8823 }, { "epoch": 0.19443939469059698, "grad_norm": 0.9661529064178467, "learning_rate": 2.7922684800237717e-05, "loss": 0.132, "step": 8824 }, { "epoch": 0.19446142998011315, "grad_norm": 1.2565187215805054, "learning_rate": 2.7922141217343694e-05, "loss": 0.0979, "step": 8825 }, { "epoch": 0.1944834652696293, "grad_norm": 1.0372369289398193, "learning_rate": 2.792159756863028e-05, "loss": 0.1324, "step": 8826 }, { "epoch": 0.19450550055914548, "grad_norm": 0.6191341876983643, "learning_rate": 2.7921053854100235e-05, "loss": 0.0881, "step": 8827 }, { "epoch": 0.19452753584866164, "grad_norm": 1.1641018390655518, "learning_rate": 2.792051007375634e-05, "loss": 0.1196, "step": 8828 }, { "epoch": 0.19454957113817778, "grad_norm": 1.0664722919464111, "learning_rate": 2.791996622760136e-05, "loss": 0.1244, "step": 8829 }, { "epoch": 0.19457160642769394, "grad_norm": 3.3272855281829834, "learning_rate": 2.7919422315638055e-05, "loss": 0.1002, "step": 8830 }, { "epoch": 0.1945936417172101, "grad_norm": 1.0497357845306396, "learning_rate": 2.7918878337869213e-05, "loss": 0.1114, "step": 8831 }, { "epoch": 0.19461567700672627, "grad_norm": 1.1952048540115356, "learning_rate": 2.7918334294297592e-05, "loss": 0.0993, "step": 8832 }, { "epoch": 0.19463771229624244, "grad_norm": 1.1545534133911133, "learning_rate": 2.791779018492597e-05, "loss": 0.1233, "step": 8833 }, { "epoch": 0.1946597475857586, "grad_norm": 0.9393310546875, "learning_rate": 2.7917246009757108e-05, "loss": 0.1193, "step": 8834 }, { "epoch": 0.19468178287527474, "grad_norm": 1.0558439493179321, "learning_rate": 2.7916701768793794e-05, "loss": 0.1375, "step": 8835 }, { "epoch": 0.1947038181647909, "grad_norm": 2.2705092430114746, "learning_rate": 2.7916157462038788e-05, "loss": 0.1148, "step": 8836 }, { "epoch": 0.19472585345430707, "grad_norm": 1.2980167865753174, "learning_rate": 2.7915613089494865e-05, "loss": 0.1112, "step": 8837 }, { "epoch": 0.19474788874382323, "grad_norm": 0.9004482626914978, "learning_rate": 2.79150686511648e-05, "loss": 0.1252, "step": 8838 }, { "epoch": 0.1947699240333394, "grad_norm": 0.9214346408843994, "learning_rate": 2.7914524147051365e-05, "loss": 0.1047, "step": 8839 }, { "epoch": 0.19479195932285556, "grad_norm": 1.572176456451416, "learning_rate": 2.7913979577157334e-05, "loss": 0.1671, "step": 8840 }, { "epoch": 0.19481399461237173, "grad_norm": 0.6116546988487244, "learning_rate": 2.7913434941485475e-05, "loss": 0.061, "step": 8841 }, { "epoch": 0.19483602990188786, "grad_norm": 0.8106176257133484, "learning_rate": 2.791289024003857e-05, "loss": 0.112, "step": 8842 }, { "epoch": 0.19485806519140403, "grad_norm": 1.7729424238204956, "learning_rate": 2.791234547281939e-05, "loss": 0.1244, "step": 8843 }, { "epoch": 0.1948801004809202, "grad_norm": 0.9652870297431946, "learning_rate": 2.791180063983071e-05, "loss": 0.1581, "step": 8844 }, { "epoch": 0.19490213577043636, "grad_norm": 0.7315377593040466, "learning_rate": 2.7911255741075307e-05, "loss": 0.111, "step": 8845 }, { "epoch": 0.19492417105995252, "grad_norm": 0.9695184826850891, "learning_rate": 2.7910710776555958e-05, "loss": 0.0857, "step": 8846 }, { "epoch": 0.1949462063494687, "grad_norm": 0.79404616355896, "learning_rate": 2.791016574627543e-05, "loss": 0.1144, "step": 8847 }, { "epoch": 0.19496824163898482, "grad_norm": 0.943418562412262, "learning_rate": 2.790962065023651e-05, "loss": 0.1144, "step": 8848 }, { "epoch": 0.194990276928501, "grad_norm": 1.279671549797058, "learning_rate": 2.7909075488441964e-05, "loss": 0.1314, "step": 8849 }, { "epoch": 0.19501231221801715, "grad_norm": 0.8302587866783142, "learning_rate": 2.790853026089458e-05, "loss": 0.0884, "step": 8850 }, { "epoch": 0.19503434750753332, "grad_norm": 1.2712337970733643, "learning_rate": 2.7907984967597124e-05, "loss": 0.1604, "step": 8851 }, { "epoch": 0.19505638279704948, "grad_norm": 0.7750282287597656, "learning_rate": 2.7907439608552383e-05, "loss": 0.0741, "step": 8852 }, { "epoch": 0.19507841808656565, "grad_norm": 0.6619980931282043, "learning_rate": 2.7906894183763126e-05, "loss": 0.1156, "step": 8853 }, { "epoch": 0.19510045337608178, "grad_norm": 0.8402136564254761, "learning_rate": 2.790634869323214e-05, "loss": 0.1134, "step": 8854 }, { "epoch": 0.19512248866559795, "grad_norm": 0.9980397820472717, "learning_rate": 2.7905803136962196e-05, "loss": 0.1066, "step": 8855 }, { "epoch": 0.1951445239551141, "grad_norm": 1.3618243932724, "learning_rate": 2.7905257514956077e-05, "loss": 0.1178, "step": 8856 }, { "epoch": 0.19516655924463028, "grad_norm": 0.835917592048645, "learning_rate": 2.790471182721656e-05, "loss": 0.0677, "step": 8857 }, { "epoch": 0.19518859453414644, "grad_norm": 0.9638242125511169, "learning_rate": 2.790416607374643e-05, "loss": 0.0966, "step": 8858 }, { "epoch": 0.1952106298236626, "grad_norm": 0.7638001441955566, "learning_rate": 2.790362025454846e-05, "loss": 0.0499, "step": 8859 }, { "epoch": 0.19523266511317874, "grad_norm": 1.1956777572631836, "learning_rate": 2.7903074369625428e-05, "loss": 0.0651, "step": 8860 }, { "epoch": 0.1952547004026949, "grad_norm": 0.8438317179679871, "learning_rate": 2.7902528418980123e-05, "loss": 0.0846, "step": 8861 }, { "epoch": 0.19527673569221107, "grad_norm": 0.7570667266845703, "learning_rate": 2.790198240261532e-05, "loss": 0.1508, "step": 8862 }, { "epoch": 0.19529877098172724, "grad_norm": 0.7650373578071594, "learning_rate": 2.790143632053381e-05, "loss": 0.1143, "step": 8863 }, { "epoch": 0.1953208062712434, "grad_norm": 0.9843212366104126, "learning_rate": 2.790089017273836e-05, "loss": 0.1484, "step": 8864 }, { "epoch": 0.19534284156075957, "grad_norm": 1.0220054388046265, "learning_rate": 2.790034395923176e-05, "loss": 0.1048, "step": 8865 }, { "epoch": 0.1953648768502757, "grad_norm": 0.672824501991272, "learning_rate": 2.7899797680016785e-05, "loss": 0.0935, "step": 8866 }, { "epoch": 0.19538691213979187, "grad_norm": 0.7175924181938171, "learning_rate": 2.7899251335096227e-05, "loss": 0.1308, "step": 8867 }, { "epoch": 0.19540894742930803, "grad_norm": 0.9592366218566895, "learning_rate": 2.7898704924472865e-05, "loss": 0.0853, "step": 8868 }, { "epoch": 0.1954309827188242, "grad_norm": 1.1485811471939087, "learning_rate": 2.7898158448149488e-05, "loss": 0.1205, "step": 8869 }, { "epoch": 0.19545301800834036, "grad_norm": 2.012559175491333, "learning_rate": 2.7897611906128866e-05, "loss": 0.1132, "step": 8870 }, { "epoch": 0.19547505329785653, "grad_norm": 0.9416446685791016, "learning_rate": 2.789706529841379e-05, "loss": 0.1281, "step": 8871 }, { "epoch": 0.19549708858737266, "grad_norm": 0.9751105308532715, "learning_rate": 2.7896518625007047e-05, "loss": 0.1334, "step": 8872 }, { "epoch": 0.19551912387688883, "grad_norm": 1.216526746749878, "learning_rate": 2.789597188591142e-05, "loss": 0.1125, "step": 8873 }, { "epoch": 0.195541159166405, "grad_norm": 0.9055219292640686, "learning_rate": 2.789542508112969e-05, "loss": 0.1392, "step": 8874 }, { "epoch": 0.19556319445592116, "grad_norm": 1.4564399719238281, "learning_rate": 2.7894878210664645e-05, "loss": 0.1094, "step": 8875 }, { "epoch": 0.19558522974543732, "grad_norm": 1.4991751909255981, "learning_rate": 2.789433127451907e-05, "loss": 0.1335, "step": 8876 }, { "epoch": 0.1956072650349535, "grad_norm": 0.8147773146629333, "learning_rate": 2.7893784272695756e-05, "loss": 0.1451, "step": 8877 }, { "epoch": 0.19562930032446965, "grad_norm": 0.8546268343925476, "learning_rate": 2.789323720519748e-05, "loss": 0.0945, "step": 8878 }, { "epoch": 0.1956513356139858, "grad_norm": 0.9112716913223267, "learning_rate": 2.7892690072027034e-05, "loss": 0.1484, "step": 8879 }, { "epoch": 0.19567337090350195, "grad_norm": 0.9681262373924255, "learning_rate": 2.789214287318721e-05, "loss": 0.1362, "step": 8880 }, { "epoch": 0.19569540619301812, "grad_norm": 1.2100470066070557, "learning_rate": 2.789159560868078e-05, "loss": 0.1122, "step": 8881 }, { "epoch": 0.19571744148253428, "grad_norm": 1.1519802808761597, "learning_rate": 2.789104827851054e-05, "loss": 0.1424, "step": 8882 }, { "epoch": 0.19573947677205045, "grad_norm": 0.9717768430709839, "learning_rate": 2.7890500882679285e-05, "loss": 0.114, "step": 8883 }, { "epoch": 0.1957615120615666, "grad_norm": 0.747826099395752, "learning_rate": 2.7889953421189796e-05, "loss": 0.0726, "step": 8884 }, { "epoch": 0.19578354735108275, "grad_norm": 1.3329553604125977, "learning_rate": 2.7889405894044863e-05, "loss": 0.1281, "step": 8885 }, { "epoch": 0.1958055826405989, "grad_norm": 0.43851134181022644, "learning_rate": 2.788885830124727e-05, "loss": 0.0968, "step": 8886 }, { "epoch": 0.19582761793011508, "grad_norm": 0.9531524181365967, "learning_rate": 2.788831064279981e-05, "loss": 0.1155, "step": 8887 }, { "epoch": 0.19584965321963124, "grad_norm": 0.6833838820457458, "learning_rate": 2.788776291870527e-05, "loss": 0.0917, "step": 8888 }, { "epoch": 0.1958716885091474, "grad_norm": 0.7270785570144653, "learning_rate": 2.788721512896645e-05, "loss": 0.093, "step": 8889 }, { "epoch": 0.19589372379866357, "grad_norm": 0.7405022978782654, "learning_rate": 2.7886667273586128e-05, "loss": 0.0847, "step": 8890 }, { "epoch": 0.1959157590881797, "grad_norm": 1.3051451444625854, "learning_rate": 2.78861193525671e-05, "loss": 0.1409, "step": 8891 }, { "epoch": 0.19593779437769587, "grad_norm": 0.8030140995979309, "learning_rate": 2.788557136591216e-05, "loss": 0.115, "step": 8892 }, { "epoch": 0.19595982966721204, "grad_norm": 0.5856572389602661, "learning_rate": 2.7885023313624084e-05, "loss": 0.0955, "step": 8893 }, { "epoch": 0.1959818649567282, "grad_norm": 0.5576624870300293, "learning_rate": 2.7884475195705684e-05, "loss": 0.1085, "step": 8894 }, { "epoch": 0.19600390024624437, "grad_norm": 1.0471842288970947, "learning_rate": 2.7883927012159744e-05, "loss": 0.1231, "step": 8895 }, { "epoch": 0.19602593553576053, "grad_norm": 1.1552455425262451, "learning_rate": 2.7883378762989045e-05, "loss": 0.1372, "step": 8896 }, { "epoch": 0.19604797082527667, "grad_norm": 0.8718148469924927, "learning_rate": 2.78828304481964e-05, "loss": 0.0699, "step": 8897 }, { "epoch": 0.19607000611479283, "grad_norm": 0.8024356961250305, "learning_rate": 2.7882282067784587e-05, "loss": 0.1197, "step": 8898 }, { "epoch": 0.196092041404309, "grad_norm": 1.0338716506958008, "learning_rate": 2.7881733621756405e-05, "loss": 0.1062, "step": 8899 }, { "epoch": 0.19611407669382516, "grad_norm": 0.925403356552124, "learning_rate": 2.7881185110114645e-05, "loss": 0.0896, "step": 8900 }, { "epoch": 0.19613611198334133, "grad_norm": 1.2353273630142212, "learning_rate": 2.7880636532862102e-05, "loss": 0.1212, "step": 8901 }, { "epoch": 0.1961581472728575, "grad_norm": 1.0894228219985962, "learning_rate": 2.7880087890001565e-05, "loss": 0.1221, "step": 8902 }, { "epoch": 0.19618018256237363, "grad_norm": 0.8637140989303589, "learning_rate": 2.787953918153584e-05, "loss": 0.0839, "step": 8903 }, { "epoch": 0.1962022178518898, "grad_norm": 0.8679981231689453, "learning_rate": 2.787899040746771e-05, "loss": 0.0929, "step": 8904 }, { "epoch": 0.19622425314140596, "grad_norm": 0.8609330058097839, "learning_rate": 2.787844156779998e-05, "loss": 0.0844, "step": 8905 }, { "epoch": 0.19624628843092212, "grad_norm": 1.1915737390518188, "learning_rate": 2.787789266253544e-05, "loss": 0.1271, "step": 8906 }, { "epoch": 0.1962683237204383, "grad_norm": 0.933628499507904, "learning_rate": 2.7877343691676885e-05, "loss": 0.1151, "step": 8907 }, { "epoch": 0.19629035900995445, "grad_norm": 0.8174864053726196, "learning_rate": 2.7876794655227118e-05, "loss": 0.0799, "step": 8908 }, { "epoch": 0.1963123942994706, "grad_norm": 0.7803528904914856, "learning_rate": 2.787624555318893e-05, "loss": 0.0782, "step": 8909 }, { "epoch": 0.19633442958898675, "grad_norm": 0.7576156258583069, "learning_rate": 2.7875696385565115e-05, "loss": 0.0918, "step": 8910 }, { "epoch": 0.19635646487850292, "grad_norm": 1.2304329872131348, "learning_rate": 2.7875147152358475e-05, "loss": 0.1339, "step": 8911 }, { "epoch": 0.19637850016801908, "grad_norm": 0.5189573168754578, "learning_rate": 2.787459785357181e-05, "loss": 0.0617, "step": 8912 }, { "epoch": 0.19640053545753525, "grad_norm": 0.8844586610794067, "learning_rate": 2.7874048489207906e-05, "loss": 0.0886, "step": 8913 }, { "epoch": 0.1964225707470514, "grad_norm": 0.7553572654724121, "learning_rate": 2.7873499059269576e-05, "loss": 0.1152, "step": 8914 }, { "epoch": 0.19644460603656758, "grad_norm": 1.06644868850708, "learning_rate": 2.787294956375961e-05, "loss": 0.1272, "step": 8915 }, { "epoch": 0.1964666413260837, "grad_norm": 1.1951113939285278, "learning_rate": 2.7872400002680806e-05, "loss": 0.0999, "step": 8916 }, { "epoch": 0.19648867661559988, "grad_norm": 0.8339784741401672, "learning_rate": 2.787185037603597e-05, "loss": 0.1143, "step": 8917 }, { "epoch": 0.19651071190511604, "grad_norm": 0.8822348117828369, "learning_rate": 2.7871300683827898e-05, "loss": 0.1028, "step": 8918 }, { "epoch": 0.1965327471946322, "grad_norm": 1.1594607830047607, "learning_rate": 2.787075092605939e-05, "loss": 0.1537, "step": 8919 }, { "epoch": 0.19655478248414837, "grad_norm": 0.9110357165336609, "learning_rate": 2.787020110273324e-05, "loss": 0.1132, "step": 8920 }, { "epoch": 0.19657681777366454, "grad_norm": 0.6848576664924622, "learning_rate": 2.786965121385226e-05, "loss": 0.095, "step": 8921 }, { "epoch": 0.19659885306318067, "grad_norm": 1.1246018409729004, "learning_rate": 2.7869101259419246e-05, "loss": 0.1407, "step": 8922 }, { "epoch": 0.19662088835269684, "grad_norm": 1.152132511138916, "learning_rate": 2.7868551239436992e-05, "loss": 0.11, "step": 8923 }, { "epoch": 0.196642923642213, "grad_norm": 0.9549837708473206, "learning_rate": 2.786800115390831e-05, "loss": 0.149, "step": 8924 }, { "epoch": 0.19666495893172917, "grad_norm": 0.86293625831604, "learning_rate": 2.7867451002835998e-05, "loss": 0.1663, "step": 8925 }, { "epoch": 0.19668699422124533, "grad_norm": 0.719864547252655, "learning_rate": 2.7866900786222855e-05, "loss": 0.1116, "step": 8926 }, { "epoch": 0.1967090295107615, "grad_norm": 2.2677292823791504, "learning_rate": 2.786635050407169e-05, "loss": 0.1236, "step": 8927 }, { "epoch": 0.19673106480027763, "grad_norm": 0.7302703261375427, "learning_rate": 2.78658001563853e-05, "loss": 0.096, "step": 8928 }, { "epoch": 0.1967531000897938, "grad_norm": 0.5015128254890442, "learning_rate": 2.7865249743166497e-05, "loss": 0.0753, "step": 8929 }, { "epoch": 0.19677513537930996, "grad_norm": 1.035332441329956, "learning_rate": 2.7864699264418074e-05, "loss": 0.1358, "step": 8930 }, { "epoch": 0.19679717066882613, "grad_norm": 0.8061214685440063, "learning_rate": 2.7864148720142832e-05, "loss": 0.093, "step": 8931 }, { "epoch": 0.1968192059583423, "grad_norm": 0.706464409828186, "learning_rate": 2.786359811034359e-05, "loss": 0.1166, "step": 8932 }, { "epoch": 0.19684124124785846, "grad_norm": 1.2408878803253174, "learning_rate": 2.786304743502314e-05, "loss": 0.1082, "step": 8933 }, { "epoch": 0.1968632765373746, "grad_norm": 0.7057391405105591, "learning_rate": 2.78624966941843e-05, "loss": 0.0959, "step": 8934 }, { "epoch": 0.19688531182689076, "grad_norm": 0.6429778933525085, "learning_rate": 2.7861945887829857e-05, "loss": 0.0956, "step": 8935 }, { "epoch": 0.19690734711640692, "grad_norm": 0.9846447706222534, "learning_rate": 2.7861395015962638e-05, "loss": 0.1323, "step": 8936 }, { "epoch": 0.1969293824059231, "grad_norm": 1.0702109336853027, "learning_rate": 2.7860844078585423e-05, "loss": 0.1289, "step": 8937 }, { "epoch": 0.19695141769543925, "grad_norm": 0.9442785382270813, "learning_rate": 2.7860293075701045e-05, "loss": 0.1048, "step": 8938 }, { "epoch": 0.19697345298495542, "grad_norm": 1.0352015495300293, "learning_rate": 2.785974200731229e-05, "loss": 0.1353, "step": 8939 }, { "epoch": 0.19699548827447155, "grad_norm": 1.075522780418396, "learning_rate": 2.785919087342198e-05, "loss": 0.1189, "step": 8940 }, { "epoch": 0.19701752356398772, "grad_norm": 0.9748114347457886, "learning_rate": 2.7858639674032906e-05, "loss": 0.0961, "step": 8941 }, { "epoch": 0.19703955885350388, "grad_norm": 1.434446096420288, "learning_rate": 2.7858088409147892e-05, "loss": 0.1314, "step": 8942 }, { "epoch": 0.19706159414302005, "grad_norm": 0.6996307373046875, "learning_rate": 2.7857537078769738e-05, "loss": 0.1085, "step": 8943 }, { "epoch": 0.1970836294325362, "grad_norm": 1.1721646785736084, "learning_rate": 2.785698568290125e-05, "loss": 0.0933, "step": 8944 }, { "epoch": 0.19710566472205238, "grad_norm": 0.7536431550979614, "learning_rate": 2.7856434221545245e-05, "loss": 0.1437, "step": 8945 }, { "epoch": 0.19712770001156854, "grad_norm": 0.911413848400116, "learning_rate": 2.785588269470452e-05, "loss": 0.1365, "step": 8946 }, { "epoch": 0.19714973530108468, "grad_norm": 0.9806762337684631, "learning_rate": 2.7855331102381894e-05, "loss": 0.0891, "step": 8947 }, { "epoch": 0.19717177059060084, "grad_norm": 1.1878383159637451, "learning_rate": 2.785477944458017e-05, "loss": 0.0935, "step": 8948 }, { "epoch": 0.197193805880117, "grad_norm": 0.7217035889625549, "learning_rate": 2.7854227721302167e-05, "loss": 0.1124, "step": 8949 }, { "epoch": 0.19721584116963317, "grad_norm": 1.0253881216049194, "learning_rate": 2.7853675932550683e-05, "loss": 0.1207, "step": 8950 }, { "epoch": 0.19723787645914934, "grad_norm": 1.1229488849639893, "learning_rate": 2.7853124078328537e-05, "loss": 0.084, "step": 8951 }, { "epoch": 0.1972599117486655, "grad_norm": 1.1490833759307861, "learning_rate": 2.785257215863854e-05, "loss": 0.1403, "step": 8952 }, { "epoch": 0.19728194703818164, "grad_norm": 0.6688854098320007, "learning_rate": 2.7852020173483496e-05, "loss": 0.0991, "step": 8953 }, { "epoch": 0.1973039823276978, "grad_norm": 0.649590015411377, "learning_rate": 2.7851468122866224e-05, "loss": 0.1015, "step": 8954 }, { "epoch": 0.19732601761721397, "grad_norm": 1.0564879179000854, "learning_rate": 2.7850916006789537e-05, "loss": 0.1344, "step": 8955 }, { "epoch": 0.19734805290673013, "grad_norm": 0.8249820470809937, "learning_rate": 2.785036382525624e-05, "loss": 0.13, "step": 8956 }, { "epoch": 0.1973700881962463, "grad_norm": 1.260103464126587, "learning_rate": 2.7849811578269148e-05, "loss": 0.1133, "step": 8957 }, { "epoch": 0.19739212348576246, "grad_norm": 1.3701304197311401, "learning_rate": 2.7849259265831074e-05, "loss": 0.1491, "step": 8958 }, { "epoch": 0.1974141587752786, "grad_norm": 1.4545199871063232, "learning_rate": 2.7848706887944838e-05, "loss": 0.1217, "step": 8959 }, { "epoch": 0.19743619406479476, "grad_norm": 0.661135196685791, "learning_rate": 2.784815444461324e-05, "loss": 0.0988, "step": 8960 }, { "epoch": 0.19745822935431093, "grad_norm": 0.9648042321205139, "learning_rate": 2.784760193583911e-05, "loss": 0.1307, "step": 8961 }, { "epoch": 0.1974802646438271, "grad_norm": 1.0443633794784546, "learning_rate": 2.784704936162525e-05, "loss": 0.1194, "step": 8962 }, { "epoch": 0.19750229993334326, "grad_norm": 0.7183996438980103, "learning_rate": 2.7846496721974476e-05, "loss": 0.0881, "step": 8963 }, { "epoch": 0.19752433522285942, "grad_norm": 1.1460354328155518, "learning_rate": 2.784594401688961e-05, "loss": 0.1285, "step": 8964 }, { "epoch": 0.19754637051237556, "grad_norm": 1.205121636390686, "learning_rate": 2.7845391246373456e-05, "loss": 0.1327, "step": 8965 }, { "epoch": 0.19756840580189172, "grad_norm": 1.063834309577942, "learning_rate": 2.7844838410428844e-05, "loss": 0.1442, "step": 8966 }, { "epoch": 0.1975904410914079, "grad_norm": 1.2629468441009521, "learning_rate": 2.784428550905858e-05, "loss": 0.099, "step": 8967 }, { "epoch": 0.19761247638092405, "grad_norm": 0.9514883756637573, "learning_rate": 2.784373254226548e-05, "loss": 0.0951, "step": 8968 }, { "epoch": 0.19763451167044022, "grad_norm": 0.9799221754074097, "learning_rate": 2.7843179510052362e-05, "loss": 0.1065, "step": 8969 }, { "epoch": 0.19765654695995638, "grad_norm": 0.8178109526634216, "learning_rate": 2.7842626412422044e-05, "loss": 0.1324, "step": 8970 }, { "epoch": 0.19767858224947252, "grad_norm": 1.0435830354690552, "learning_rate": 2.784207324937735e-05, "loss": 0.1074, "step": 8971 }, { "epoch": 0.19770061753898868, "grad_norm": 1.6552118062973022, "learning_rate": 2.7841520020921083e-05, "loss": 0.0959, "step": 8972 }, { "epoch": 0.19772265282850485, "grad_norm": 0.7454891800880432, "learning_rate": 2.784096672705607e-05, "loss": 0.0863, "step": 8973 }, { "epoch": 0.197744688118021, "grad_norm": 2.117082357406616, "learning_rate": 2.784041336778513e-05, "loss": 0.0946, "step": 8974 }, { "epoch": 0.19776672340753718, "grad_norm": 0.9622727036476135, "learning_rate": 2.7839859943111073e-05, "loss": 0.1361, "step": 8975 }, { "epoch": 0.19778875869705334, "grad_norm": 1.1996923685073853, "learning_rate": 2.783930645303673e-05, "loss": 0.0854, "step": 8976 }, { "epoch": 0.19781079398656948, "grad_norm": 1.073385238647461, "learning_rate": 2.783875289756491e-05, "loss": 0.1008, "step": 8977 }, { "epoch": 0.19783282927608564, "grad_norm": 0.7488837838172913, "learning_rate": 2.7838199276698438e-05, "loss": 0.094, "step": 8978 }, { "epoch": 0.1978548645656018, "grad_norm": 0.8263638615608215, "learning_rate": 2.7837645590440137e-05, "loss": 0.1479, "step": 8979 }, { "epoch": 0.19787689985511797, "grad_norm": 0.48724105954170227, "learning_rate": 2.7837091838792818e-05, "loss": 0.1256, "step": 8980 }, { "epoch": 0.19789893514463414, "grad_norm": 1.2757163047790527, "learning_rate": 2.7836538021759308e-05, "loss": 0.1313, "step": 8981 }, { "epoch": 0.1979209704341503, "grad_norm": 1.1659588813781738, "learning_rate": 2.7835984139342424e-05, "loss": 0.1085, "step": 8982 }, { "epoch": 0.19794300572366647, "grad_norm": 1.3638027906417847, "learning_rate": 2.783543019154499e-05, "loss": 0.125, "step": 8983 }, { "epoch": 0.1979650410131826, "grad_norm": 1.576176643371582, "learning_rate": 2.783487617836983e-05, "loss": 0.155, "step": 8984 }, { "epoch": 0.19798707630269877, "grad_norm": 1.2032268047332764, "learning_rate": 2.7834322099819762e-05, "loss": 0.0899, "step": 8985 }, { "epoch": 0.19800911159221493, "grad_norm": 0.6438871026039124, "learning_rate": 2.7833767955897614e-05, "loss": 0.1066, "step": 8986 }, { "epoch": 0.1980311468817311, "grad_norm": 0.6541603803634644, "learning_rate": 2.78332137466062e-05, "loss": 0.0551, "step": 8987 }, { "epoch": 0.19805318217124726, "grad_norm": 1.1173295974731445, "learning_rate": 2.7832659471948344e-05, "loss": 0.1276, "step": 8988 }, { "epoch": 0.19807521746076343, "grad_norm": 0.7216687202453613, "learning_rate": 2.783210513192687e-05, "loss": 0.0854, "step": 8989 }, { "epoch": 0.19809725275027956, "grad_norm": 0.4748021960258484, "learning_rate": 2.7831550726544607e-05, "loss": 0.0871, "step": 8990 }, { "epoch": 0.19811928803979573, "grad_norm": 1.1891793012619019, "learning_rate": 2.783099625580438e-05, "loss": 0.1219, "step": 8991 }, { "epoch": 0.1981413233293119, "grad_norm": 0.9236436486244202, "learning_rate": 2.7830441719709004e-05, "loss": 0.0905, "step": 8992 }, { "epoch": 0.19816335861882806, "grad_norm": 0.8997510671615601, "learning_rate": 2.7829887118261304e-05, "loss": 0.1107, "step": 8993 }, { "epoch": 0.19818539390834422, "grad_norm": 1.108189582824707, "learning_rate": 2.7829332451464115e-05, "loss": 0.1538, "step": 8994 }, { "epoch": 0.19820742919786039, "grad_norm": 0.8450685143470764, "learning_rate": 2.7828777719320254e-05, "loss": 0.1545, "step": 8995 }, { "epoch": 0.19822946448737652, "grad_norm": 0.9883496165275574, "learning_rate": 2.782822292183255e-05, "loss": 0.1299, "step": 8996 }, { "epoch": 0.1982514997768927, "grad_norm": 0.9858754277229309, "learning_rate": 2.7827668059003824e-05, "loss": 0.1519, "step": 8997 }, { "epoch": 0.19827353506640885, "grad_norm": 0.8258414268493652, "learning_rate": 2.7827113130836913e-05, "loss": 0.1354, "step": 8998 }, { "epoch": 0.19829557035592502, "grad_norm": 0.7988438606262207, "learning_rate": 2.782655813733463e-05, "loss": 0.1436, "step": 8999 }, { "epoch": 0.19831760564544118, "grad_norm": 0.6812145709991455, "learning_rate": 2.7826003078499807e-05, "loss": 0.1366, "step": 9000 }, { "epoch": 0.19833964093495735, "grad_norm": 0.8380270004272461, "learning_rate": 2.782544795433528e-05, "loss": 0.1412, "step": 9001 }, { "epoch": 0.19836167622447348, "grad_norm": 1.119350552558899, "learning_rate": 2.7824892764843863e-05, "loss": 0.1027, "step": 9002 }, { "epoch": 0.19838371151398965, "grad_norm": 0.6982187628746033, "learning_rate": 2.7824337510028392e-05, "loss": 0.0976, "step": 9003 }, { "epoch": 0.1984057468035058, "grad_norm": 1.1260963678359985, "learning_rate": 2.7823782189891692e-05, "loss": 0.1611, "step": 9004 }, { "epoch": 0.19842778209302198, "grad_norm": 0.6945470571517944, "learning_rate": 2.782322680443659e-05, "loss": 0.1095, "step": 9005 }, { "epoch": 0.19844981738253814, "grad_norm": 1.6466467380523682, "learning_rate": 2.7822671353665923e-05, "loss": 0.1358, "step": 9006 }, { "epoch": 0.1984718526720543, "grad_norm": 1.9850281476974487, "learning_rate": 2.7822115837582513e-05, "loss": 0.1004, "step": 9007 }, { "epoch": 0.19849388796157044, "grad_norm": 0.7540703415870667, "learning_rate": 2.782156025618919e-05, "loss": 0.14, "step": 9008 }, { "epoch": 0.1985159232510866, "grad_norm": 0.8452740907669067, "learning_rate": 2.7821004609488786e-05, "loss": 0.1007, "step": 9009 }, { "epoch": 0.19853795854060277, "grad_norm": 1.0522420406341553, "learning_rate": 2.782044889748413e-05, "loss": 0.1022, "step": 9010 }, { "epoch": 0.19855999383011894, "grad_norm": 0.8481420874595642, "learning_rate": 2.7819893120178056e-05, "loss": 0.1071, "step": 9011 }, { "epoch": 0.1985820291196351, "grad_norm": 0.8366519808769226, "learning_rate": 2.7819337277573386e-05, "loss": 0.0871, "step": 9012 }, { "epoch": 0.19860406440915127, "grad_norm": 0.8199566006660461, "learning_rate": 2.7818781369672958e-05, "loss": 0.1159, "step": 9013 }, { "epoch": 0.1986260996986674, "grad_norm": 0.9567537307739258, "learning_rate": 2.7818225396479608e-05, "loss": 0.1014, "step": 9014 }, { "epoch": 0.19864813498818357, "grad_norm": 1.0005698204040527, "learning_rate": 2.781766935799616e-05, "loss": 0.1426, "step": 9015 }, { "epoch": 0.19867017027769973, "grad_norm": 0.542044997215271, "learning_rate": 2.7817113254225447e-05, "loss": 0.102, "step": 9016 }, { "epoch": 0.1986922055672159, "grad_norm": 0.7982549071311951, "learning_rate": 2.78165570851703e-05, "loss": 0.1302, "step": 9017 }, { "epoch": 0.19871424085673206, "grad_norm": 0.8028298020362854, "learning_rate": 2.781600085083356e-05, "loss": 0.0874, "step": 9018 }, { "epoch": 0.19873627614624823, "grad_norm": 1.177856206893921, "learning_rate": 2.7815444551218053e-05, "loss": 0.1047, "step": 9019 }, { "epoch": 0.1987583114357644, "grad_norm": 0.6917674541473389, "learning_rate": 2.7814888186326615e-05, "loss": 0.1441, "step": 9020 }, { "epoch": 0.19878034672528053, "grad_norm": 1.2749916315078735, "learning_rate": 2.781433175616208e-05, "loss": 0.1347, "step": 9021 }, { "epoch": 0.1988023820147967, "grad_norm": 0.9970927238464355, "learning_rate": 2.7813775260727285e-05, "loss": 0.1406, "step": 9022 }, { "epoch": 0.19882441730431286, "grad_norm": 1.151977300643921, "learning_rate": 2.781321870002506e-05, "loss": 0.1063, "step": 9023 }, { "epoch": 0.19884645259382902, "grad_norm": 1.6658971309661865, "learning_rate": 2.7812662074058233e-05, "loss": 0.1602, "step": 9024 }, { "epoch": 0.19886848788334519, "grad_norm": 0.8374198079109192, "learning_rate": 2.7812105382829654e-05, "loss": 0.1097, "step": 9025 }, { "epoch": 0.19889052317286135, "grad_norm": 1.1066862344741821, "learning_rate": 2.7811548626342153e-05, "loss": 0.1114, "step": 9026 }, { "epoch": 0.1989125584623775, "grad_norm": 0.7800713777542114, "learning_rate": 2.7810991804598566e-05, "loss": 0.0639, "step": 9027 }, { "epoch": 0.19893459375189365, "grad_norm": 0.6372981667518616, "learning_rate": 2.7810434917601722e-05, "loss": 0.1016, "step": 9028 }, { "epoch": 0.19895662904140982, "grad_norm": 1.4248507022857666, "learning_rate": 2.7809877965354468e-05, "loss": 0.0844, "step": 9029 }, { "epoch": 0.19897866433092598, "grad_norm": 1.0521306991577148, "learning_rate": 2.7809320947859632e-05, "loss": 0.1057, "step": 9030 }, { "epoch": 0.19900069962044215, "grad_norm": 1.089304804801941, "learning_rate": 2.7808763865120058e-05, "loss": 0.1064, "step": 9031 }, { "epoch": 0.1990227349099583, "grad_norm": 0.8221654891967773, "learning_rate": 2.7808206717138584e-05, "loss": 0.0834, "step": 9032 }, { "epoch": 0.19904477019947445, "grad_norm": 1.2382543087005615, "learning_rate": 2.7807649503918042e-05, "loss": 0.1251, "step": 9033 }, { "epoch": 0.1990668054889906, "grad_norm": 1.058232069015503, "learning_rate": 2.7807092225461273e-05, "loss": 0.1363, "step": 9034 }, { "epoch": 0.19908884077850678, "grad_norm": 0.8458788990974426, "learning_rate": 2.780653488177112e-05, "loss": 0.136, "step": 9035 }, { "epoch": 0.19911087606802294, "grad_norm": 0.8550074100494385, "learning_rate": 2.780597747285041e-05, "loss": 0.1207, "step": 9036 }, { "epoch": 0.1991329113575391, "grad_norm": 0.7500032186508179, "learning_rate": 2.7805419998701994e-05, "loss": 0.1146, "step": 9037 }, { "epoch": 0.19915494664705527, "grad_norm": 0.7119057774543762, "learning_rate": 2.7804862459328703e-05, "loss": 0.1458, "step": 9038 }, { "epoch": 0.1991769819365714, "grad_norm": 0.5630068778991699, "learning_rate": 2.7804304854733386e-05, "loss": 0.1657, "step": 9039 }, { "epoch": 0.19919901722608757, "grad_norm": 0.9472647309303284, "learning_rate": 2.7803747184918878e-05, "loss": 0.094, "step": 9040 }, { "epoch": 0.19922105251560374, "grad_norm": 0.9538694620132446, "learning_rate": 2.7803189449888013e-05, "loss": 0.1351, "step": 9041 }, { "epoch": 0.1992430878051199, "grad_norm": 0.9181234836578369, "learning_rate": 2.7802631649643647e-05, "loss": 0.1243, "step": 9042 }, { "epoch": 0.19926512309463607, "grad_norm": 0.7433072328567505, "learning_rate": 2.7802073784188607e-05, "loss": 0.0864, "step": 9043 }, { "epoch": 0.19928715838415223, "grad_norm": 0.8959723711013794, "learning_rate": 2.7801515853525745e-05, "loss": 0.0781, "step": 9044 }, { "epoch": 0.19930919367366837, "grad_norm": 1.075145959854126, "learning_rate": 2.7800957857657894e-05, "loss": 0.112, "step": 9045 }, { "epoch": 0.19933122896318453, "grad_norm": 0.5651399493217468, "learning_rate": 2.7800399796587903e-05, "loss": 0.113, "step": 9046 }, { "epoch": 0.1993532642527007, "grad_norm": 0.5191226601600647, "learning_rate": 2.779984167031861e-05, "loss": 0.1023, "step": 9047 }, { "epoch": 0.19937529954221686, "grad_norm": 1.0032719373703003, "learning_rate": 2.7799283478852862e-05, "loss": 0.1528, "step": 9048 }, { "epoch": 0.19939733483173303, "grad_norm": 0.8064351677894592, "learning_rate": 2.7798725222193497e-05, "loss": 0.1002, "step": 9049 }, { "epoch": 0.1994193701212492, "grad_norm": 1.3659350872039795, "learning_rate": 2.779816690034336e-05, "loss": 0.1042, "step": 9050 }, { "epoch": 0.19944140541076535, "grad_norm": 0.921922504901886, "learning_rate": 2.7797608513305298e-05, "loss": 0.0799, "step": 9051 }, { "epoch": 0.1994634407002815, "grad_norm": 0.8814350366592407, "learning_rate": 2.7797050061082153e-05, "loss": 0.1179, "step": 9052 }, { "epoch": 0.19948547598979766, "grad_norm": 0.8578471541404724, "learning_rate": 2.779649154367677e-05, "loss": 0.1327, "step": 9053 }, { "epoch": 0.19950751127931382, "grad_norm": 2.3690335750579834, "learning_rate": 2.7795932961091992e-05, "loss": 0.0946, "step": 9054 }, { "epoch": 0.19952954656882999, "grad_norm": 1.2561945915222168, "learning_rate": 2.7795374313330667e-05, "loss": 0.1356, "step": 9055 }, { "epoch": 0.19955158185834615, "grad_norm": 0.76735520362854, "learning_rate": 2.779481560039564e-05, "loss": 0.1251, "step": 9056 }, { "epoch": 0.19957361714786231, "grad_norm": 0.6231800317764282, "learning_rate": 2.7794256822289755e-05, "loss": 0.0927, "step": 9057 }, { "epoch": 0.19959565243737845, "grad_norm": 0.8824473023414612, "learning_rate": 2.7793697979015862e-05, "loss": 0.1316, "step": 9058 }, { "epoch": 0.19961768772689462, "grad_norm": 0.7266947031021118, "learning_rate": 2.7793139070576798e-05, "loss": 0.0797, "step": 9059 }, { "epoch": 0.19963972301641078, "grad_norm": 1.0453543663024902, "learning_rate": 2.7792580096975424e-05, "loss": 0.1334, "step": 9060 }, { "epoch": 0.19966175830592695, "grad_norm": 0.6985859870910645, "learning_rate": 2.7792021058214577e-05, "loss": 0.1168, "step": 9061 }, { "epoch": 0.1996837935954431, "grad_norm": 1.1260676383972168, "learning_rate": 2.7791461954297107e-05, "loss": 0.1205, "step": 9062 }, { "epoch": 0.19970582888495927, "grad_norm": 1.1302337646484375, "learning_rate": 2.7790902785225862e-05, "loss": 0.1554, "step": 9063 }, { "epoch": 0.1997278641744754, "grad_norm": 1.0506696701049805, "learning_rate": 2.7790343551003685e-05, "loss": 0.0932, "step": 9064 }, { "epoch": 0.19974989946399158, "grad_norm": 0.9404318928718567, "learning_rate": 2.7789784251633437e-05, "loss": 0.1401, "step": 9065 }, { "epoch": 0.19977193475350774, "grad_norm": 0.7813735604286194, "learning_rate": 2.7789224887117955e-05, "loss": 0.1014, "step": 9066 }, { "epoch": 0.1997939700430239, "grad_norm": 0.5152005553245544, "learning_rate": 2.7788665457460094e-05, "loss": 0.0954, "step": 9067 }, { "epoch": 0.19981600533254007, "grad_norm": 1.0724198818206787, "learning_rate": 2.7788105962662702e-05, "loss": 0.0895, "step": 9068 }, { "epoch": 0.19983804062205623, "grad_norm": 1.077034592628479, "learning_rate": 2.778754640272863e-05, "loss": 0.1237, "step": 9069 }, { "epoch": 0.19986007591157237, "grad_norm": 0.8400794267654419, "learning_rate": 2.7786986777660727e-05, "loss": 0.1196, "step": 9070 }, { "epoch": 0.19988211120108854, "grad_norm": 0.7511234879493713, "learning_rate": 2.778642708746184e-05, "loss": 0.1162, "step": 9071 }, { "epoch": 0.1999041464906047, "grad_norm": 0.8473747372627258, "learning_rate": 2.7785867332134823e-05, "loss": 0.144, "step": 9072 }, { "epoch": 0.19992618178012087, "grad_norm": 1.1738454103469849, "learning_rate": 2.778530751168253e-05, "loss": 0.1379, "step": 9073 }, { "epoch": 0.19994821706963703, "grad_norm": 0.891153872013092, "learning_rate": 2.7784747626107815e-05, "loss": 0.1062, "step": 9074 }, { "epoch": 0.1999702523591532, "grad_norm": 0.8436341285705566, "learning_rate": 2.7784187675413517e-05, "loss": 0.1472, "step": 9075 }, { "epoch": 0.19999228764866933, "grad_norm": 0.9559421539306641, "learning_rate": 2.7783627659602496e-05, "loss": 0.0961, "step": 9076 }, { "epoch": 0.2000143229381855, "grad_norm": 0.7032745480537415, "learning_rate": 2.7783067578677607e-05, "loss": 0.0846, "step": 9077 }, { "epoch": 0.20003635822770166, "grad_norm": 0.8525707125663757, "learning_rate": 2.7782507432641698e-05, "loss": 0.0992, "step": 9078 }, { "epoch": 0.20005839351721783, "grad_norm": 0.7702050805091858, "learning_rate": 2.7781947221497623e-05, "loss": 0.112, "step": 9079 }, { "epoch": 0.200080428806734, "grad_norm": 0.7649068236351013, "learning_rate": 2.778138694524824e-05, "loss": 0.0994, "step": 9080 }, { "epoch": 0.20010246409625015, "grad_norm": 0.9565579891204834, "learning_rate": 2.77808266038964e-05, "loss": 0.1211, "step": 9081 }, { "epoch": 0.2001244993857663, "grad_norm": 1.007056474685669, "learning_rate": 2.778026619744495e-05, "loss": 0.0887, "step": 9082 }, { "epoch": 0.20014653467528246, "grad_norm": 0.7015578150749207, "learning_rate": 2.777970572589676e-05, "loss": 0.1031, "step": 9083 }, { "epoch": 0.20016856996479862, "grad_norm": 1.0802403688430786, "learning_rate": 2.777914518925467e-05, "loss": 0.1166, "step": 9084 }, { "epoch": 0.20019060525431479, "grad_norm": 0.8656284809112549, "learning_rate": 2.7778584587521542e-05, "loss": 0.1456, "step": 9085 }, { "epoch": 0.20021264054383095, "grad_norm": 0.7928807139396667, "learning_rate": 2.7778023920700233e-05, "loss": 0.1388, "step": 9086 }, { "epoch": 0.20023467583334711, "grad_norm": 0.8075770735740662, "learning_rate": 2.7777463188793593e-05, "loss": 0.0782, "step": 9087 }, { "epoch": 0.20025671112286328, "grad_norm": 0.7601058483123779, "learning_rate": 2.777690239180448e-05, "loss": 0.0985, "step": 9088 }, { "epoch": 0.20027874641237942, "grad_norm": 0.5086649656295776, "learning_rate": 2.7776341529735753e-05, "loss": 0.0912, "step": 9089 }, { "epoch": 0.20030078170189558, "grad_norm": 0.6591746211051941, "learning_rate": 2.7775780602590265e-05, "loss": 0.1064, "step": 9090 }, { "epoch": 0.20032281699141175, "grad_norm": 0.9480512738227844, "learning_rate": 2.777521961037088e-05, "loss": 0.0783, "step": 9091 }, { "epoch": 0.2003448522809279, "grad_norm": 0.7524390816688538, "learning_rate": 2.777465855308045e-05, "loss": 0.1275, "step": 9092 }, { "epoch": 0.20036688757044407, "grad_norm": 1.5383987426757812, "learning_rate": 2.7774097430721835e-05, "loss": 0.1187, "step": 9093 }, { "epoch": 0.20038892285996024, "grad_norm": 1.1891123056411743, "learning_rate": 2.7773536243297884e-05, "loss": 0.1525, "step": 9094 }, { "epoch": 0.20041095814947638, "grad_norm": 1.007552146911621, "learning_rate": 2.7772974990811473e-05, "loss": 0.1243, "step": 9095 }, { "epoch": 0.20043299343899254, "grad_norm": 0.918570876121521, "learning_rate": 2.7772413673265443e-05, "loss": 0.1411, "step": 9096 }, { "epoch": 0.2004550287285087, "grad_norm": 1.2933272123336792, "learning_rate": 2.7771852290662666e-05, "loss": 0.1481, "step": 9097 }, { "epoch": 0.20047706401802487, "grad_norm": 0.6873888373374939, "learning_rate": 2.7771290843005996e-05, "loss": 0.1011, "step": 9098 }, { "epoch": 0.20049909930754103, "grad_norm": 0.9746820330619812, "learning_rate": 2.777072933029829e-05, "loss": 0.0921, "step": 9099 }, { "epoch": 0.2005211345970572, "grad_norm": 0.7229942679405212, "learning_rate": 2.7770167752542417e-05, "loss": 0.1034, "step": 9100 }, { "epoch": 0.20054316988657334, "grad_norm": 0.7673346996307373, "learning_rate": 2.776960610974123e-05, "loss": 0.1012, "step": 9101 }, { "epoch": 0.2005652051760895, "grad_norm": 0.9613867998123169, "learning_rate": 2.7769044401897586e-05, "loss": 0.1187, "step": 9102 }, { "epoch": 0.20058724046560567, "grad_norm": 0.7042784094810486, "learning_rate": 2.7768482629014352e-05, "loss": 0.0767, "step": 9103 }, { "epoch": 0.20060927575512183, "grad_norm": 0.7248678207397461, "learning_rate": 2.7767920791094397e-05, "loss": 0.0596, "step": 9104 }, { "epoch": 0.200631311044638, "grad_norm": 0.8345337510108948, "learning_rate": 2.776735888814057e-05, "loss": 0.099, "step": 9105 }, { "epoch": 0.20065334633415416, "grad_norm": 0.8730498552322388, "learning_rate": 2.776679692015574e-05, "loss": 0.124, "step": 9106 }, { "epoch": 0.2006753816236703, "grad_norm": 0.8085710406303406, "learning_rate": 2.7766234887142764e-05, "loss": 0.0836, "step": 9107 }, { "epoch": 0.20069741691318646, "grad_norm": 0.8913067579269409, "learning_rate": 2.7765672789104507e-05, "loss": 0.1513, "step": 9108 }, { "epoch": 0.20071945220270263, "grad_norm": 0.8986802697181702, "learning_rate": 2.7765110626043834e-05, "loss": 0.0992, "step": 9109 }, { "epoch": 0.2007414874922188, "grad_norm": 0.8399437665939331, "learning_rate": 2.7764548397963607e-05, "loss": 0.0992, "step": 9110 }, { "epoch": 0.20076352278173495, "grad_norm": 0.8509653210639954, "learning_rate": 2.7763986104866693e-05, "loss": 0.1003, "step": 9111 }, { "epoch": 0.20078555807125112, "grad_norm": 1.0363103151321411, "learning_rate": 2.776342374675595e-05, "loss": 0.0983, "step": 9112 }, { "epoch": 0.20080759336076726, "grad_norm": 0.904491126537323, "learning_rate": 2.7762861323634248e-05, "loss": 0.0881, "step": 9113 }, { "epoch": 0.20082962865028342, "grad_norm": 0.8332952857017517, "learning_rate": 2.776229883550445e-05, "loss": 0.1426, "step": 9114 }, { "epoch": 0.20085166393979959, "grad_norm": 0.9171603322029114, "learning_rate": 2.7761736282369418e-05, "loss": 0.1316, "step": 9115 }, { "epoch": 0.20087369922931575, "grad_norm": 0.7821520566940308, "learning_rate": 2.7761173664232023e-05, "loss": 0.142, "step": 9116 }, { "epoch": 0.20089573451883191, "grad_norm": 0.9152632355690002, "learning_rate": 2.7760610981095126e-05, "loss": 0.1206, "step": 9117 }, { "epoch": 0.20091776980834808, "grad_norm": 0.715903103351593, "learning_rate": 2.7760048232961592e-05, "loss": 0.1043, "step": 9118 }, { "epoch": 0.20093980509786422, "grad_norm": 1.505774736404419, "learning_rate": 2.775948541983429e-05, "loss": 0.0833, "step": 9119 }, { "epoch": 0.20096184038738038, "grad_norm": 0.4038422405719757, "learning_rate": 2.775892254171609e-05, "loss": 0.0845, "step": 9120 }, { "epoch": 0.20098387567689655, "grad_norm": 1.4364560842514038, "learning_rate": 2.775835959860985e-05, "loss": 0.1371, "step": 9121 }, { "epoch": 0.2010059109664127, "grad_norm": 0.907453715801239, "learning_rate": 2.7757796590518448e-05, "loss": 0.1202, "step": 9122 }, { "epoch": 0.20102794625592887, "grad_norm": 1.042304277420044, "learning_rate": 2.7757233517444743e-05, "loss": 0.0865, "step": 9123 }, { "epoch": 0.20104998154544504, "grad_norm": 0.6140085458755493, "learning_rate": 2.775667037939161e-05, "loss": 0.1052, "step": 9124 }, { "epoch": 0.2010720168349612, "grad_norm": 0.7850778102874756, "learning_rate": 2.7756107176361915e-05, "loss": 0.0954, "step": 9125 }, { "epoch": 0.20109405212447734, "grad_norm": 0.9276770353317261, "learning_rate": 2.7755543908358522e-05, "loss": 0.0874, "step": 9126 }, { "epoch": 0.2011160874139935, "grad_norm": 0.7032469511032104, "learning_rate": 2.7754980575384307e-05, "loss": 0.0789, "step": 9127 }, { "epoch": 0.20113812270350967, "grad_norm": 0.7787439823150635, "learning_rate": 2.7754417177442133e-05, "loss": 0.1205, "step": 9128 }, { "epoch": 0.20116015799302583, "grad_norm": 0.7167564034461975, "learning_rate": 2.7753853714534873e-05, "loss": 0.0807, "step": 9129 }, { "epoch": 0.201182193282542, "grad_norm": 0.694769024848938, "learning_rate": 2.77532901866654e-05, "loss": 0.1012, "step": 9130 }, { "epoch": 0.20120422857205816, "grad_norm": 0.8414686322212219, "learning_rate": 2.775272659383658e-05, "loss": 0.116, "step": 9131 }, { "epoch": 0.2012262638615743, "grad_norm": 0.9680564999580383, "learning_rate": 2.7752162936051286e-05, "loss": 0.0916, "step": 9132 }, { "epoch": 0.20124829915109047, "grad_norm": 0.7058871388435364, "learning_rate": 2.7751599213312384e-05, "loss": 0.1306, "step": 9133 }, { "epoch": 0.20127033444060663, "grad_norm": 1.0382353067398071, "learning_rate": 2.7751035425622755e-05, "loss": 0.0942, "step": 9134 }, { "epoch": 0.2012923697301228, "grad_norm": 1.1504796743392944, "learning_rate": 2.7750471572985265e-05, "loss": 0.1173, "step": 9135 }, { "epoch": 0.20131440501963896, "grad_norm": 1.0943273305892944, "learning_rate": 2.774990765540278e-05, "loss": 0.0858, "step": 9136 }, { "epoch": 0.20133644030915512, "grad_norm": 1.0281261205673218, "learning_rate": 2.774934367287818e-05, "loss": 0.1022, "step": 9137 }, { "epoch": 0.20135847559867126, "grad_norm": 0.7040899991989136, "learning_rate": 2.7748779625414336e-05, "loss": 0.122, "step": 9138 }, { "epoch": 0.20138051088818743, "grad_norm": 0.6579122543334961, "learning_rate": 2.7748215513014125e-05, "loss": 0.1084, "step": 9139 }, { "epoch": 0.2014025461777036, "grad_norm": 0.7670077681541443, "learning_rate": 2.774765133568041e-05, "loss": 0.1227, "step": 9140 }, { "epoch": 0.20142458146721975, "grad_norm": 0.8863052725791931, "learning_rate": 2.7747087093416074e-05, "loss": 0.0721, "step": 9141 }, { "epoch": 0.20144661675673592, "grad_norm": 0.8237957954406738, "learning_rate": 2.7746522786223987e-05, "loss": 0.1092, "step": 9142 }, { "epoch": 0.20146865204625208, "grad_norm": 0.6789111495018005, "learning_rate": 2.7745958414107025e-05, "loss": 0.1069, "step": 9143 }, { "epoch": 0.20149068733576822, "grad_norm": 1.2714389562606812, "learning_rate": 2.774539397706806e-05, "loss": 0.1357, "step": 9144 }, { "epoch": 0.20151272262528439, "grad_norm": 1.1280461549758911, "learning_rate": 2.7744829475109974e-05, "loss": 0.0901, "step": 9145 }, { "epoch": 0.20153475791480055, "grad_norm": 1.0816330909729004, "learning_rate": 2.7744264908235634e-05, "loss": 0.0934, "step": 9146 }, { "epoch": 0.20155679320431671, "grad_norm": 0.9017579555511475, "learning_rate": 2.774370027644792e-05, "loss": 0.129, "step": 9147 }, { "epoch": 0.20157882849383288, "grad_norm": 1.0567214488983154, "learning_rate": 2.7743135579749703e-05, "loss": 0.1285, "step": 9148 }, { "epoch": 0.20160086378334904, "grad_norm": 0.946719229221344, "learning_rate": 2.7742570818143868e-05, "loss": 0.115, "step": 9149 }, { "epoch": 0.20162289907286518, "grad_norm": 1.1380218267440796, "learning_rate": 2.7742005991633282e-05, "loss": 0.1263, "step": 9150 }, { "epoch": 0.20164493436238135, "grad_norm": 0.9704753756523132, "learning_rate": 2.774144110022083e-05, "loss": 0.1084, "step": 9151 }, { "epoch": 0.2016669696518975, "grad_norm": 1.2672985792160034, "learning_rate": 2.7740876143909385e-05, "loss": 0.1359, "step": 9152 }, { "epoch": 0.20168900494141367, "grad_norm": 0.777155876159668, "learning_rate": 2.7740311122701826e-05, "loss": 0.1365, "step": 9153 }, { "epoch": 0.20171104023092984, "grad_norm": 1.232815146446228, "learning_rate": 2.773974603660103e-05, "loss": 0.0957, "step": 9154 }, { "epoch": 0.201733075520446, "grad_norm": 0.9242773652076721, "learning_rate": 2.7739180885609873e-05, "loss": 0.0923, "step": 9155 }, { "epoch": 0.20175511080996217, "grad_norm": 0.7688427567481995, "learning_rate": 2.7738615669731238e-05, "loss": 0.115, "step": 9156 }, { "epoch": 0.2017771460994783, "grad_norm": 0.9858619570732117, "learning_rate": 2.7738050388968007e-05, "loss": 0.1339, "step": 9157 }, { "epoch": 0.20179918138899447, "grad_norm": 1.109772801399231, "learning_rate": 2.7737485043323054e-05, "loss": 0.1457, "step": 9158 }, { "epoch": 0.20182121667851063, "grad_norm": 1.0709199905395508, "learning_rate": 2.7736919632799257e-05, "loss": 0.1115, "step": 9159 }, { "epoch": 0.2018432519680268, "grad_norm": 1.094095230102539, "learning_rate": 2.7736354157399496e-05, "loss": 0.1384, "step": 9160 }, { "epoch": 0.20186528725754296, "grad_norm": 1.0563766956329346, "learning_rate": 2.7735788617126654e-05, "loss": 0.1373, "step": 9161 }, { "epoch": 0.20188732254705913, "grad_norm": 0.8059423565864563, "learning_rate": 2.7735223011983613e-05, "loss": 0.087, "step": 9162 }, { "epoch": 0.20190935783657527, "grad_norm": 1.2802973985671997, "learning_rate": 2.7734657341973252e-05, "loss": 0.1229, "step": 9163 }, { "epoch": 0.20193139312609143, "grad_norm": 1.0399556159973145, "learning_rate": 2.7734091607098452e-05, "loss": 0.1244, "step": 9164 }, { "epoch": 0.2019534284156076, "grad_norm": 0.8618602752685547, "learning_rate": 2.7733525807362095e-05, "loss": 0.1481, "step": 9165 }, { "epoch": 0.20197546370512376, "grad_norm": 0.9757124781608582, "learning_rate": 2.7732959942767062e-05, "loss": 0.15, "step": 9166 }, { "epoch": 0.20199749899463992, "grad_norm": 0.9617423415184021, "learning_rate": 2.7732394013316238e-05, "loss": 0.1015, "step": 9167 }, { "epoch": 0.2020195342841561, "grad_norm": 0.9721381068229675, "learning_rate": 2.7731828019012502e-05, "loss": 0.1028, "step": 9168 }, { "epoch": 0.20204156957367223, "grad_norm": 0.5761193633079529, "learning_rate": 2.7731261959858742e-05, "loss": 0.0709, "step": 9169 }, { "epoch": 0.2020636048631884, "grad_norm": 0.8224055767059326, "learning_rate": 2.773069583585783e-05, "loss": 0.1804, "step": 9170 }, { "epoch": 0.20208564015270455, "grad_norm": 1.2185076475143433, "learning_rate": 2.7730129647012663e-05, "loss": 0.0959, "step": 9171 }, { "epoch": 0.20210767544222072, "grad_norm": 0.6844977736473083, "learning_rate": 2.772956339332612e-05, "loss": 0.0793, "step": 9172 }, { "epoch": 0.20212971073173688, "grad_norm": 0.5323242545127869, "learning_rate": 2.772899707480108e-05, "loss": 0.048, "step": 9173 }, { "epoch": 0.20215174602125305, "grad_norm": 0.905967652797699, "learning_rate": 2.7728430691440438e-05, "loss": 0.0804, "step": 9174 }, { "epoch": 0.20217378131076919, "grad_norm": 0.9174564480781555, "learning_rate": 2.772786424324707e-05, "loss": 0.1112, "step": 9175 }, { "epoch": 0.20219581660028535, "grad_norm": 1.0090726613998413, "learning_rate": 2.772729773022386e-05, "loss": 0.1005, "step": 9176 }, { "epoch": 0.20221785188980151, "grad_norm": 0.8832210898399353, "learning_rate": 2.7726731152373707e-05, "loss": 0.1101, "step": 9177 }, { "epoch": 0.20223988717931768, "grad_norm": 0.8561690449714661, "learning_rate": 2.772616450969948e-05, "loss": 0.0996, "step": 9178 }, { "epoch": 0.20226192246883384, "grad_norm": 0.6806350350379944, "learning_rate": 2.7725597802204077e-05, "loss": 0.0961, "step": 9179 }, { "epoch": 0.20228395775835, "grad_norm": 0.9015605449676514, "learning_rate": 2.7725031029890378e-05, "loss": 0.1109, "step": 9180 }, { "epoch": 0.20230599304786615, "grad_norm": 0.7710138559341431, "learning_rate": 2.7724464192761273e-05, "loss": 0.0907, "step": 9181 }, { "epoch": 0.2023280283373823, "grad_norm": 1.2296648025512695, "learning_rate": 2.7723897290819647e-05, "loss": 0.0934, "step": 9182 }, { "epoch": 0.20235006362689847, "grad_norm": 0.8105817437171936, "learning_rate": 2.772333032406839e-05, "loss": 0.1209, "step": 9183 }, { "epoch": 0.20237209891641464, "grad_norm": 1.441179871559143, "learning_rate": 2.7722763292510388e-05, "loss": 0.1384, "step": 9184 }, { "epoch": 0.2023941342059308, "grad_norm": 1.0203914642333984, "learning_rate": 2.7722196196148534e-05, "loss": 0.1209, "step": 9185 }, { "epoch": 0.20241616949544697, "grad_norm": 0.9220729470252991, "learning_rate": 2.772162903498571e-05, "loss": 0.1755, "step": 9186 }, { "epoch": 0.2024382047849631, "grad_norm": 1.009452223777771, "learning_rate": 2.7721061809024808e-05, "loss": 0.1217, "step": 9187 }, { "epoch": 0.20246024007447927, "grad_norm": 1.213861346244812, "learning_rate": 2.7720494518268713e-05, "loss": 0.1393, "step": 9188 }, { "epoch": 0.20248227536399543, "grad_norm": 1.1758850812911987, "learning_rate": 2.7719927162720324e-05, "loss": 0.1367, "step": 9189 }, { "epoch": 0.2025043106535116, "grad_norm": 1.0990960597991943, "learning_rate": 2.7719359742382523e-05, "loss": 0.0791, "step": 9190 }, { "epoch": 0.20252634594302776, "grad_norm": 1.3151425123214722, "learning_rate": 2.77187922572582e-05, "loss": 0.1343, "step": 9191 }, { "epoch": 0.20254838123254393, "grad_norm": 0.6566488146781921, "learning_rate": 2.771822470735025e-05, "loss": 0.085, "step": 9192 }, { "epoch": 0.2025704165220601, "grad_norm": 0.8099175095558167, "learning_rate": 2.7717657092661562e-05, "loss": 0.0928, "step": 9193 }, { "epoch": 0.20259245181157623, "grad_norm": 0.8089494705200195, "learning_rate": 2.7717089413195024e-05, "loss": 0.1455, "step": 9194 }, { "epoch": 0.2026144871010924, "grad_norm": 1.0099256038665771, "learning_rate": 2.7716521668953532e-05, "loss": 0.1287, "step": 9195 }, { "epoch": 0.20263652239060856, "grad_norm": 0.8775032758712769, "learning_rate": 2.7715953859939973e-05, "loss": 0.097, "step": 9196 }, { "epoch": 0.20265855768012472, "grad_norm": 0.5548396110534668, "learning_rate": 2.7715385986157246e-05, "loss": 0.1072, "step": 9197 }, { "epoch": 0.2026805929696409, "grad_norm": 1.0447618961334229, "learning_rate": 2.771481804760824e-05, "loss": 0.0949, "step": 9198 }, { "epoch": 0.20270262825915705, "grad_norm": 0.9969208240509033, "learning_rate": 2.7714250044295842e-05, "loss": 0.1438, "step": 9199 }, { "epoch": 0.2027246635486732, "grad_norm": 1.1378575563430786, "learning_rate": 2.7713681976222954e-05, "loss": 0.1087, "step": 9200 }, { "epoch": 0.20274669883818935, "grad_norm": 0.848800778388977, "learning_rate": 2.771311384339247e-05, "loss": 0.1042, "step": 9201 }, { "epoch": 0.20276873412770552, "grad_norm": 0.9012178182601929, "learning_rate": 2.7712545645807276e-05, "loss": 0.1288, "step": 9202 }, { "epoch": 0.20279076941722168, "grad_norm": 1.0322179794311523, "learning_rate": 2.771197738347027e-05, "loss": 0.0874, "step": 9203 }, { "epoch": 0.20281280470673785, "grad_norm": 0.9527548551559448, "learning_rate": 2.7711409056384346e-05, "loss": 0.1082, "step": 9204 }, { "epoch": 0.202834839996254, "grad_norm": 1.3373968601226807, "learning_rate": 2.7710840664552396e-05, "loss": 0.159, "step": 9205 }, { "epoch": 0.20285687528577015, "grad_norm": 0.7283115983009338, "learning_rate": 2.7710272207977324e-05, "loss": 0.1322, "step": 9206 }, { "epoch": 0.20287891057528631, "grad_norm": 0.829627275466919, "learning_rate": 2.770970368666202e-05, "loss": 0.0781, "step": 9207 }, { "epoch": 0.20290094586480248, "grad_norm": 1.7138885259628296, "learning_rate": 2.7709135100609375e-05, "loss": 0.1418, "step": 9208 }, { "epoch": 0.20292298115431864, "grad_norm": 1.320062518119812, "learning_rate": 2.7708566449822296e-05, "loss": 0.0933, "step": 9209 }, { "epoch": 0.2029450164438348, "grad_norm": 0.8680279850959778, "learning_rate": 2.7707997734303668e-05, "loss": 0.1176, "step": 9210 }, { "epoch": 0.20296705173335097, "grad_norm": 0.7926251292228699, "learning_rate": 2.7707428954056395e-05, "loss": 0.1472, "step": 9211 }, { "epoch": 0.2029890870228671, "grad_norm": 0.8472136855125427, "learning_rate": 2.7706860109083374e-05, "loss": 0.099, "step": 9212 }, { "epoch": 0.20301112231238327, "grad_norm": 0.8821789026260376, "learning_rate": 2.7706291199387498e-05, "loss": 0.1489, "step": 9213 }, { "epoch": 0.20303315760189944, "grad_norm": 0.4344218373298645, "learning_rate": 2.7705722224971668e-05, "loss": 0.0802, "step": 9214 }, { "epoch": 0.2030551928914156, "grad_norm": 0.8981002569198608, "learning_rate": 2.770515318583878e-05, "loss": 0.1095, "step": 9215 }, { "epoch": 0.20307722818093177, "grad_norm": 0.8701351881027222, "learning_rate": 2.7704584081991734e-05, "loss": 0.1264, "step": 9216 }, { "epoch": 0.20309926347044793, "grad_norm": 0.9792489409446716, "learning_rate": 2.7704014913433428e-05, "loss": 0.1249, "step": 9217 }, { "epoch": 0.20312129875996407, "grad_norm": 0.7914222478866577, "learning_rate": 2.7703445680166765e-05, "loss": 0.1196, "step": 9218 }, { "epoch": 0.20314333404948023, "grad_norm": 0.8690603375434875, "learning_rate": 2.770287638219464e-05, "loss": 0.092, "step": 9219 }, { "epoch": 0.2031653693389964, "grad_norm": 0.7234814167022705, "learning_rate": 2.7702307019519952e-05, "loss": 0.1192, "step": 9220 }, { "epoch": 0.20318740462851256, "grad_norm": 0.7959800362586975, "learning_rate": 2.7701737592145603e-05, "loss": 0.1143, "step": 9221 }, { "epoch": 0.20320943991802873, "grad_norm": 0.9634459614753723, "learning_rate": 2.7701168100074494e-05, "loss": 0.1528, "step": 9222 }, { "epoch": 0.2032314752075449, "grad_norm": 1.0074886083602905, "learning_rate": 2.7700598543309523e-05, "loss": 0.0873, "step": 9223 }, { "epoch": 0.20325351049706103, "grad_norm": 0.41170933842658997, "learning_rate": 2.7700028921853596e-05, "loss": 0.0862, "step": 9224 }, { "epoch": 0.2032755457865772, "grad_norm": 1.0522960424423218, "learning_rate": 2.769945923570961e-05, "loss": 0.0986, "step": 9225 }, { "epoch": 0.20329758107609336, "grad_norm": 1.0612773895263672, "learning_rate": 2.769888948488047e-05, "loss": 0.1156, "step": 9226 }, { "epoch": 0.20331961636560952, "grad_norm": 0.7552604675292969, "learning_rate": 2.7698319669369075e-05, "loss": 0.1258, "step": 9227 }, { "epoch": 0.2033416516551257, "grad_norm": 0.8736036419868469, "learning_rate": 2.7697749789178325e-05, "loss": 0.0762, "step": 9228 }, { "epoch": 0.20336368694464185, "grad_norm": 0.886728823184967, "learning_rate": 2.769717984431113e-05, "loss": 0.066, "step": 9229 }, { "epoch": 0.20338572223415802, "grad_norm": 0.889845609664917, "learning_rate": 2.769660983477039e-05, "loss": 0.1079, "step": 9230 }, { "epoch": 0.20340775752367415, "grad_norm": 1.0458568334579468, "learning_rate": 2.7696039760559005e-05, "loss": 0.0997, "step": 9231 }, { "epoch": 0.20342979281319032, "grad_norm": 0.8260747194290161, "learning_rate": 2.7695469621679884e-05, "loss": 0.1238, "step": 9232 }, { "epoch": 0.20345182810270648, "grad_norm": 1.625696063041687, "learning_rate": 2.7694899418135922e-05, "loss": 0.1185, "step": 9233 }, { "epoch": 0.20347386339222265, "grad_norm": 0.4665241539478302, "learning_rate": 2.769432914993004e-05, "loss": 0.1196, "step": 9234 }, { "epoch": 0.2034958986817388, "grad_norm": 0.8177447319030762, "learning_rate": 2.7693758817065126e-05, "loss": 0.1255, "step": 9235 }, { "epoch": 0.20351793397125498, "grad_norm": 0.6327870488166809, "learning_rate": 2.7693188419544087e-05, "loss": 0.0776, "step": 9236 }, { "epoch": 0.20353996926077111, "grad_norm": 0.9486212730407715, "learning_rate": 2.7692617957369844e-05, "loss": 0.1638, "step": 9237 }, { "epoch": 0.20356200455028728, "grad_norm": 0.8595067262649536, "learning_rate": 2.7692047430545286e-05, "loss": 0.0737, "step": 9238 }, { "epoch": 0.20358403983980344, "grad_norm": 1.2014830112457275, "learning_rate": 2.7691476839073323e-05, "loss": 0.1604, "step": 9239 }, { "epoch": 0.2036060751293196, "grad_norm": 1.2037025690078735, "learning_rate": 2.7690906182956863e-05, "loss": 0.1681, "step": 9240 }, { "epoch": 0.20362811041883577, "grad_norm": 1.0569629669189453, "learning_rate": 2.7690335462198815e-05, "loss": 0.1157, "step": 9241 }, { "epoch": 0.20365014570835194, "grad_norm": 1.1957579851150513, "learning_rate": 2.768976467680208e-05, "loss": 0.1561, "step": 9242 }, { "epoch": 0.20367218099786807, "grad_norm": 0.8793075084686279, "learning_rate": 2.768919382676957e-05, "loss": 0.1438, "step": 9243 }, { "epoch": 0.20369421628738424, "grad_norm": 0.8796051144599915, "learning_rate": 2.7688622912104195e-05, "loss": 0.1143, "step": 9244 }, { "epoch": 0.2037162515769004, "grad_norm": 1.220298409461975, "learning_rate": 2.7688051932808856e-05, "loss": 0.1342, "step": 9245 }, { "epoch": 0.20373828686641657, "grad_norm": 1.0872275829315186, "learning_rate": 2.7687480888886462e-05, "loss": 0.1372, "step": 9246 }, { "epoch": 0.20376032215593273, "grad_norm": 1.1300420761108398, "learning_rate": 2.7686909780339926e-05, "loss": 0.144, "step": 9247 }, { "epoch": 0.2037823574454489, "grad_norm": 0.9815576672554016, "learning_rate": 2.7686338607172156e-05, "loss": 0.1191, "step": 9248 }, { "epoch": 0.20380439273496503, "grad_norm": 0.8347902297973633, "learning_rate": 2.768576736938606e-05, "loss": 0.1189, "step": 9249 }, { "epoch": 0.2038264280244812, "grad_norm": 0.903710663318634, "learning_rate": 2.768519606698455e-05, "loss": 0.0856, "step": 9250 }, { "epoch": 0.20384846331399736, "grad_norm": 0.7293544411659241, "learning_rate": 2.7684624699970533e-05, "loss": 0.0726, "step": 9251 }, { "epoch": 0.20387049860351353, "grad_norm": 0.9073483943939209, "learning_rate": 2.768405326834692e-05, "loss": 0.1304, "step": 9252 }, { "epoch": 0.2038925338930297, "grad_norm": 0.4805199205875397, "learning_rate": 2.7683481772116622e-05, "loss": 0.081, "step": 9253 }, { "epoch": 0.20391456918254586, "grad_norm": 1.1045011281967163, "learning_rate": 2.768291021128255e-05, "loss": 0.1223, "step": 9254 }, { "epoch": 0.203936604472062, "grad_norm": 0.9086152911186218, "learning_rate": 2.7682338585847615e-05, "loss": 0.0822, "step": 9255 }, { "epoch": 0.20395863976157816, "grad_norm": 0.832552969455719, "learning_rate": 2.768176689581473e-05, "loss": 0.1476, "step": 9256 }, { "epoch": 0.20398067505109432, "grad_norm": 1.0328670740127563, "learning_rate": 2.7681195141186807e-05, "loss": 0.1002, "step": 9257 }, { "epoch": 0.2040027103406105, "grad_norm": 0.6821430325508118, "learning_rate": 2.768062332196675e-05, "loss": 0.1241, "step": 9258 }, { "epoch": 0.20402474563012665, "grad_norm": 0.9071939587593079, "learning_rate": 2.7680051438157484e-05, "loss": 0.0938, "step": 9259 }, { "epoch": 0.20404678091964282, "grad_norm": 1.1982142925262451, "learning_rate": 2.7679479489761918e-05, "loss": 0.134, "step": 9260 }, { "epoch": 0.20406881620915898, "grad_norm": 0.8436968326568604, "learning_rate": 2.7678907476782963e-05, "loss": 0.0986, "step": 9261 }, { "epoch": 0.20409085149867512, "grad_norm": 0.6423991918563843, "learning_rate": 2.767833539922353e-05, "loss": 0.0915, "step": 9262 }, { "epoch": 0.20411288678819128, "grad_norm": 0.8309521079063416, "learning_rate": 2.7677763257086537e-05, "loss": 0.1113, "step": 9263 }, { "epoch": 0.20413492207770745, "grad_norm": 1.2969586849212646, "learning_rate": 2.7677191050374898e-05, "loss": 0.1312, "step": 9264 }, { "epoch": 0.2041569573672236, "grad_norm": 0.7586893439292908, "learning_rate": 2.7676618779091524e-05, "loss": 0.1091, "step": 9265 }, { "epoch": 0.20417899265673978, "grad_norm": 1.4698721170425415, "learning_rate": 2.7676046443239337e-05, "loss": 0.1335, "step": 9266 }, { "epoch": 0.20420102794625594, "grad_norm": 1.0351004600524902, "learning_rate": 2.7675474042821248e-05, "loss": 0.0785, "step": 9267 }, { "epoch": 0.20422306323577208, "grad_norm": 0.7608059644699097, "learning_rate": 2.7674901577840172e-05, "loss": 0.0838, "step": 9268 }, { "epoch": 0.20424509852528824, "grad_norm": 0.9222431778907776, "learning_rate": 2.7674329048299024e-05, "loss": 0.0777, "step": 9269 }, { "epoch": 0.2042671338148044, "grad_norm": 1.572605013847351, "learning_rate": 2.7673756454200722e-05, "loss": 0.1396, "step": 9270 }, { "epoch": 0.20428916910432057, "grad_norm": 0.7519240975379944, "learning_rate": 2.7673183795548186e-05, "loss": 0.1464, "step": 9271 }, { "epoch": 0.20431120439383674, "grad_norm": 0.9640136957168579, "learning_rate": 2.7672611072344322e-05, "loss": 0.1176, "step": 9272 }, { "epoch": 0.2043332396833529, "grad_norm": 1.2532979249954224, "learning_rate": 2.767203828459206e-05, "loss": 0.1107, "step": 9273 }, { "epoch": 0.20435527497286904, "grad_norm": 0.6658013463020325, "learning_rate": 2.767146543229431e-05, "loss": 0.1064, "step": 9274 }, { "epoch": 0.2043773102623852, "grad_norm": 0.8753812909126282, "learning_rate": 2.767089251545399e-05, "loss": 0.1091, "step": 9275 }, { "epoch": 0.20439934555190137, "grad_norm": 1.242933750152588, "learning_rate": 2.767031953407402e-05, "loss": 0.1119, "step": 9276 }, { "epoch": 0.20442138084141753, "grad_norm": 1.1695317029953003, "learning_rate": 2.7669746488157318e-05, "loss": 0.1351, "step": 9277 }, { "epoch": 0.2044434161309337, "grad_norm": 1.1713213920593262, "learning_rate": 2.766917337770681e-05, "loss": 0.1031, "step": 9278 }, { "epoch": 0.20446545142044986, "grad_norm": 1.2483537197113037, "learning_rate": 2.7668600202725398e-05, "loss": 0.0881, "step": 9279 }, { "epoch": 0.204487486709966, "grad_norm": 1.0682722330093384, "learning_rate": 2.7668026963216016e-05, "loss": 0.1101, "step": 9280 }, { "epoch": 0.20450952199948216, "grad_norm": 0.9349268078804016, "learning_rate": 2.7667453659181582e-05, "loss": 0.1375, "step": 9281 }, { "epoch": 0.20453155728899833, "grad_norm": 1.229665994644165, "learning_rate": 2.7666880290625013e-05, "loss": 0.0963, "step": 9282 }, { "epoch": 0.2045535925785145, "grad_norm": 0.6136758327484131, "learning_rate": 2.766630685754923e-05, "loss": 0.0937, "step": 9283 }, { "epoch": 0.20457562786803066, "grad_norm": 0.9287968873977661, "learning_rate": 2.7665733359957153e-05, "loss": 0.0973, "step": 9284 }, { "epoch": 0.20459766315754682, "grad_norm": 1.0024770498275757, "learning_rate": 2.7665159797851702e-05, "loss": 0.0966, "step": 9285 }, { "epoch": 0.20461969844706296, "grad_norm": 1.1322158575057983, "learning_rate": 2.7664586171235802e-05, "loss": 0.192, "step": 9286 }, { "epoch": 0.20464173373657912, "grad_norm": 0.9312000870704651, "learning_rate": 2.766401248011237e-05, "loss": 0.1093, "step": 9287 }, { "epoch": 0.2046637690260953, "grad_norm": 0.7335143685340881, "learning_rate": 2.766343872448434e-05, "loss": 0.0696, "step": 9288 }, { "epoch": 0.20468580431561145, "grad_norm": 0.9182259440422058, "learning_rate": 2.7662864904354622e-05, "loss": 0.1192, "step": 9289 }, { "epoch": 0.20470783960512762, "grad_norm": 0.848330557346344, "learning_rate": 2.766229101972614e-05, "loss": 0.1035, "step": 9290 }, { "epoch": 0.20472987489464378, "grad_norm": 1.0088063478469849, "learning_rate": 2.7661717070601822e-05, "loss": 0.1105, "step": 9291 }, { "epoch": 0.20475191018415992, "grad_norm": 0.8051267266273499, "learning_rate": 2.7661143056984588e-05, "loss": 0.1266, "step": 9292 }, { "epoch": 0.20477394547367608, "grad_norm": 0.8477528691291809, "learning_rate": 2.7660568978877362e-05, "loss": 0.1317, "step": 9293 }, { "epoch": 0.20479598076319225, "grad_norm": 1.1167941093444824, "learning_rate": 2.7659994836283072e-05, "loss": 0.1023, "step": 9294 }, { "epoch": 0.2048180160527084, "grad_norm": 0.6762244701385498, "learning_rate": 2.7659420629204635e-05, "loss": 0.143, "step": 9295 }, { "epoch": 0.20484005134222458, "grad_norm": 0.6790413856506348, "learning_rate": 2.7658846357644987e-05, "loss": 0.1093, "step": 9296 }, { "epoch": 0.20486208663174074, "grad_norm": 0.82863849401474, "learning_rate": 2.7658272021607037e-05, "loss": 0.0856, "step": 9297 }, { "epoch": 0.2048841219212569, "grad_norm": 1.2666809558868408, "learning_rate": 2.765769762109373e-05, "loss": 0.1268, "step": 9298 }, { "epoch": 0.20490615721077304, "grad_norm": 0.6181236505508423, "learning_rate": 2.765712315610798e-05, "loss": 0.1041, "step": 9299 }, { "epoch": 0.2049281925002892, "grad_norm": 0.8671543598175049, "learning_rate": 2.7656548626652708e-05, "loss": 0.102, "step": 9300 }, { "epoch": 0.20495022778980537, "grad_norm": 0.6806018352508545, "learning_rate": 2.765597403273085e-05, "loss": 0.089, "step": 9301 }, { "epoch": 0.20497226307932154, "grad_norm": 0.9379857182502747, "learning_rate": 2.7655399374345327e-05, "loss": 0.1559, "step": 9302 }, { "epoch": 0.2049942983688377, "grad_norm": 0.8114489912986755, "learning_rate": 2.7654824651499075e-05, "loss": 0.0906, "step": 9303 }, { "epoch": 0.20501633365835387, "grad_norm": 0.9493926763534546, "learning_rate": 2.765424986419501e-05, "loss": 0.1297, "step": 9304 }, { "epoch": 0.20503836894787, "grad_norm": 0.666530191898346, "learning_rate": 2.7653675012436067e-05, "loss": 0.1345, "step": 9305 }, { "epoch": 0.20506040423738617, "grad_norm": 0.9463397264480591, "learning_rate": 2.7653100096225167e-05, "loss": 0.1099, "step": 9306 }, { "epoch": 0.20508243952690233, "grad_norm": 0.834291398525238, "learning_rate": 2.765252511556525e-05, "loss": 0.1179, "step": 9307 }, { "epoch": 0.2051044748164185, "grad_norm": 0.6922436952590942, "learning_rate": 2.7651950070459235e-05, "loss": 0.1097, "step": 9308 }, { "epoch": 0.20512651010593466, "grad_norm": 1.530332326889038, "learning_rate": 2.7651374960910055e-05, "loss": 0.148, "step": 9309 }, { "epoch": 0.20514854539545083, "grad_norm": 0.9469811320304871, "learning_rate": 2.7650799786920635e-05, "loss": 0.1141, "step": 9310 }, { "epoch": 0.20517058068496696, "grad_norm": 0.8920313715934753, "learning_rate": 2.765022454849391e-05, "loss": 0.147, "step": 9311 }, { "epoch": 0.20519261597448313, "grad_norm": 1.0203174352645874, "learning_rate": 2.7649649245632807e-05, "loss": 0.1202, "step": 9312 }, { "epoch": 0.2052146512639993, "grad_norm": 0.8094485998153687, "learning_rate": 2.7649073878340256e-05, "loss": 0.1269, "step": 9313 }, { "epoch": 0.20523668655351546, "grad_norm": 1.0144156217575073, "learning_rate": 2.7648498446619188e-05, "loss": 0.1126, "step": 9314 }, { "epoch": 0.20525872184303162, "grad_norm": 1.1749106645584106, "learning_rate": 2.764792295047254e-05, "loss": 0.0905, "step": 9315 }, { "epoch": 0.2052807571325478, "grad_norm": 0.6962859034538269, "learning_rate": 2.764734738990323e-05, "loss": 0.1412, "step": 9316 }, { "epoch": 0.20530279242206392, "grad_norm": 0.8609103560447693, "learning_rate": 2.7646771764914202e-05, "loss": 0.083, "step": 9317 }, { "epoch": 0.2053248277115801, "grad_norm": 0.7071546316146851, "learning_rate": 2.7646196075508385e-05, "loss": 0.0923, "step": 9318 }, { "epoch": 0.20534686300109625, "grad_norm": 0.68337482213974, "learning_rate": 2.764562032168871e-05, "loss": 0.1212, "step": 9319 }, { "epoch": 0.20536889829061242, "grad_norm": 1.193709135055542, "learning_rate": 2.7645044503458112e-05, "loss": 0.097, "step": 9320 }, { "epoch": 0.20539093358012858, "grad_norm": 1.1455057859420776, "learning_rate": 2.7644468620819516e-05, "loss": 0.1203, "step": 9321 }, { "epoch": 0.20541296886964475, "grad_norm": 0.7283661961555481, "learning_rate": 2.764389267377586e-05, "loss": 0.1544, "step": 9322 }, { "epoch": 0.20543500415916088, "grad_norm": 1.116706371307373, "learning_rate": 2.7643316662330085e-05, "loss": 0.0947, "step": 9323 }, { "epoch": 0.20545703944867705, "grad_norm": 0.8325636982917786, "learning_rate": 2.7642740586485116e-05, "loss": 0.1018, "step": 9324 }, { "epoch": 0.2054790747381932, "grad_norm": 0.7706156969070435, "learning_rate": 2.7642164446243885e-05, "loss": 0.1023, "step": 9325 }, { "epoch": 0.20550111002770938, "grad_norm": 0.8691095113754272, "learning_rate": 2.764158824160934e-05, "loss": 0.1268, "step": 9326 }, { "epoch": 0.20552314531722554, "grad_norm": 0.7583446502685547, "learning_rate": 2.7641011972584398e-05, "loss": 0.0963, "step": 9327 }, { "epoch": 0.2055451806067417, "grad_norm": 0.82455974817276, "learning_rate": 2.7640435639172007e-05, "loss": 0.1309, "step": 9328 }, { "epoch": 0.20556721589625784, "grad_norm": 1.0159962177276611, "learning_rate": 2.7639859241375097e-05, "loss": 0.1149, "step": 9329 }, { "epoch": 0.205589251185774, "grad_norm": 0.6963873505592346, "learning_rate": 2.763928277919661e-05, "loss": 0.1644, "step": 9330 }, { "epoch": 0.20561128647529017, "grad_norm": 0.7993451356887817, "learning_rate": 2.7638706252639476e-05, "loss": 0.1303, "step": 9331 }, { "epoch": 0.20563332176480634, "grad_norm": 0.6178909540176392, "learning_rate": 2.7638129661706633e-05, "loss": 0.0688, "step": 9332 }, { "epoch": 0.2056553570543225, "grad_norm": 0.6897450089454651, "learning_rate": 2.763755300640102e-05, "loss": 0.1006, "step": 9333 }, { "epoch": 0.20567739234383867, "grad_norm": 0.7334285974502563, "learning_rate": 2.763697628672557e-05, "loss": 0.0994, "step": 9334 }, { "epoch": 0.20569942763335483, "grad_norm": 1.0704542398452759, "learning_rate": 2.7636399502683225e-05, "loss": 0.0998, "step": 9335 }, { "epoch": 0.20572146292287097, "grad_norm": 1.2246540784835815, "learning_rate": 2.7635822654276923e-05, "loss": 0.1121, "step": 9336 }, { "epoch": 0.20574349821238713, "grad_norm": 1.0387519598007202, "learning_rate": 2.76352457415096e-05, "loss": 0.1531, "step": 9337 }, { "epoch": 0.2057655335019033, "grad_norm": 1.4891809225082397, "learning_rate": 2.7634668764384193e-05, "loss": 0.1204, "step": 9338 }, { "epoch": 0.20578756879141946, "grad_norm": 0.9392635822296143, "learning_rate": 2.763409172290364e-05, "loss": 0.1114, "step": 9339 }, { "epoch": 0.20580960408093563, "grad_norm": 0.9015285968780518, "learning_rate": 2.763351461707089e-05, "loss": 0.1219, "step": 9340 }, { "epoch": 0.2058316393704518, "grad_norm": 0.7672760486602783, "learning_rate": 2.763293744688887e-05, "loss": 0.1424, "step": 9341 }, { "epoch": 0.20585367465996793, "grad_norm": 0.9042097926139832, "learning_rate": 2.763236021236053e-05, "loss": 0.0998, "step": 9342 }, { "epoch": 0.2058757099494841, "grad_norm": 0.5327138304710388, "learning_rate": 2.76317829134888e-05, "loss": 0.0714, "step": 9343 }, { "epoch": 0.20589774523900026, "grad_norm": 0.6643821001052856, "learning_rate": 2.763120555027663e-05, "loss": 0.0993, "step": 9344 }, { "epoch": 0.20591978052851642, "grad_norm": 1.246469259262085, "learning_rate": 2.7630628122726952e-05, "loss": 0.1215, "step": 9345 }, { "epoch": 0.2059418158180326, "grad_norm": 0.5822333693504333, "learning_rate": 2.7630050630842714e-05, "loss": 0.0785, "step": 9346 }, { "epoch": 0.20596385110754875, "grad_norm": 1.2110991477966309, "learning_rate": 2.762947307462686e-05, "loss": 0.1383, "step": 9347 }, { "epoch": 0.2059858863970649, "grad_norm": 1.1105117797851562, "learning_rate": 2.7628895454082324e-05, "loss": 0.102, "step": 9348 }, { "epoch": 0.20600792168658105, "grad_norm": 0.8277856111526489, "learning_rate": 2.762831776921205e-05, "loss": 0.1013, "step": 9349 }, { "epoch": 0.20602995697609722, "grad_norm": 0.7996873259544373, "learning_rate": 2.762774002001898e-05, "loss": 0.0891, "step": 9350 }, { "epoch": 0.20605199226561338, "grad_norm": 1.333871603012085, "learning_rate": 2.762716220650606e-05, "loss": 0.1534, "step": 9351 }, { "epoch": 0.20607402755512955, "grad_norm": 1.1043003797531128, "learning_rate": 2.7626584328676234e-05, "loss": 0.1044, "step": 9352 }, { "epoch": 0.2060960628446457, "grad_norm": 1.4644793272018433, "learning_rate": 2.7626006386532446e-05, "loss": 0.124, "step": 9353 }, { "epoch": 0.20611809813416185, "grad_norm": 0.7341408133506775, "learning_rate": 2.762542838007763e-05, "loss": 0.0823, "step": 9354 }, { "epoch": 0.206140133423678, "grad_norm": 0.9931955337524414, "learning_rate": 2.762485030931474e-05, "loss": 0.1159, "step": 9355 }, { "epoch": 0.20616216871319418, "grad_norm": 0.7360921502113342, "learning_rate": 2.7624272174246718e-05, "loss": 0.1615, "step": 9356 }, { "epoch": 0.20618420400271034, "grad_norm": 1.1603742837905884, "learning_rate": 2.7623693974876506e-05, "loss": 0.0978, "step": 9357 }, { "epoch": 0.2062062392922265, "grad_norm": 1.0041941404342651, "learning_rate": 2.762311571120705e-05, "loss": 0.1154, "step": 9358 }, { "epoch": 0.20622827458174267, "grad_norm": 0.8524743318557739, "learning_rate": 2.7622537383241304e-05, "loss": 0.105, "step": 9359 }, { "epoch": 0.2062503098712588, "grad_norm": 0.993766725063324, "learning_rate": 2.76219589909822e-05, "loss": 0.1058, "step": 9360 }, { "epoch": 0.20627234516077497, "grad_norm": 0.769841730594635, "learning_rate": 2.762138053443269e-05, "loss": 0.0738, "step": 9361 }, { "epoch": 0.20629438045029114, "grad_norm": 0.8055963516235352, "learning_rate": 2.7620802013595723e-05, "loss": 0.1294, "step": 9362 }, { "epoch": 0.2063164157398073, "grad_norm": 1.9182095527648926, "learning_rate": 2.7620223428474245e-05, "loss": 0.1141, "step": 9363 }, { "epoch": 0.20633845102932347, "grad_norm": 1.041597843170166, "learning_rate": 2.7619644779071197e-05, "loss": 0.1401, "step": 9364 }, { "epoch": 0.20636048631883963, "grad_norm": 0.7614874839782715, "learning_rate": 2.7619066065389536e-05, "loss": 0.1174, "step": 9365 }, { "epoch": 0.2063825216083558, "grad_norm": 1.1176775693893433, "learning_rate": 2.7618487287432202e-05, "loss": 0.0865, "step": 9366 }, { "epoch": 0.20640455689787193, "grad_norm": 0.998505175113678, "learning_rate": 2.7617908445202147e-05, "loss": 0.1101, "step": 9367 }, { "epoch": 0.2064265921873881, "grad_norm": 0.910099983215332, "learning_rate": 2.7617329538702316e-05, "loss": 0.1105, "step": 9368 }, { "epoch": 0.20644862747690426, "grad_norm": 0.950186550617218, "learning_rate": 2.7616750567935662e-05, "loss": 0.1157, "step": 9369 }, { "epoch": 0.20647066276642043, "grad_norm": 0.5416138172149658, "learning_rate": 2.761617153290513e-05, "loss": 0.0814, "step": 9370 }, { "epoch": 0.2064926980559366, "grad_norm": 0.8191086053848267, "learning_rate": 2.761559243361367e-05, "loss": 0.1304, "step": 9371 }, { "epoch": 0.20651473334545276, "grad_norm": 0.7935423851013184, "learning_rate": 2.7615013270064236e-05, "loss": 0.1003, "step": 9372 }, { "epoch": 0.2065367686349689, "grad_norm": 1.0344767570495605, "learning_rate": 2.7614434042259772e-05, "loss": 0.1256, "step": 9373 }, { "epoch": 0.20655880392448506, "grad_norm": 0.9389232993125916, "learning_rate": 2.7613854750203235e-05, "loss": 0.1081, "step": 9374 }, { "epoch": 0.20658083921400122, "grad_norm": 0.9164100885391235, "learning_rate": 2.7613275393897565e-05, "loss": 0.109, "step": 9375 }, { "epoch": 0.2066028745035174, "grad_norm": 0.8922126889228821, "learning_rate": 2.7612695973345726e-05, "loss": 0.1175, "step": 9376 }, { "epoch": 0.20662490979303355, "grad_norm": 0.9092586636543274, "learning_rate": 2.7612116488550657e-05, "loss": 0.063, "step": 9377 }, { "epoch": 0.20664694508254972, "grad_norm": 1.1898518800735474, "learning_rate": 2.761153693951532e-05, "loss": 0.0785, "step": 9378 }, { "epoch": 0.20666898037206585, "grad_norm": 0.7595199346542358, "learning_rate": 2.7610957326242664e-05, "loss": 0.089, "step": 9379 }, { "epoch": 0.20669101566158202, "grad_norm": 1.112353801727295, "learning_rate": 2.7610377648735636e-05, "loss": 0.1305, "step": 9380 }, { "epoch": 0.20671305095109818, "grad_norm": 1.4093424081802368, "learning_rate": 2.760979790699719e-05, "loss": 0.1509, "step": 9381 }, { "epoch": 0.20673508624061435, "grad_norm": 1.6833792924880981, "learning_rate": 2.7609218101030288e-05, "loss": 0.1142, "step": 9382 }, { "epoch": 0.2067571215301305, "grad_norm": 1.160085916519165, "learning_rate": 2.760863823083787e-05, "loss": 0.1279, "step": 9383 }, { "epoch": 0.20677915681964668, "grad_norm": 0.9651698470115662, "learning_rate": 2.76080582964229e-05, "loss": 0.103, "step": 9384 }, { "epoch": 0.2068011921091628, "grad_norm": 0.742437481880188, "learning_rate": 2.760747829778833e-05, "loss": 0.1176, "step": 9385 }, { "epoch": 0.20682322739867898, "grad_norm": 0.8832831978797913, "learning_rate": 2.7606898234937106e-05, "loss": 0.1054, "step": 9386 }, { "epoch": 0.20684526268819514, "grad_norm": 0.6553797125816345, "learning_rate": 2.7606318107872193e-05, "loss": 0.1011, "step": 9387 }, { "epoch": 0.2068672979777113, "grad_norm": 1.027026891708374, "learning_rate": 2.7605737916596545e-05, "loss": 0.0946, "step": 9388 }, { "epoch": 0.20688933326722747, "grad_norm": 1.4200323820114136, "learning_rate": 2.760515766111311e-05, "loss": 0.1591, "step": 9389 }, { "epoch": 0.20691136855674364, "grad_norm": 0.6994435787200928, "learning_rate": 2.7604577341424853e-05, "loss": 0.0602, "step": 9390 }, { "epoch": 0.20693340384625977, "grad_norm": 0.934648871421814, "learning_rate": 2.760399695753472e-05, "loss": 0.1082, "step": 9391 }, { "epoch": 0.20695543913577594, "grad_norm": 1.2822041511535645, "learning_rate": 2.760341650944567e-05, "loss": 0.0947, "step": 9392 }, { "epoch": 0.2069774744252921, "grad_norm": 0.8948583006858826, "learning_rate": 2.7602835997160662e-05, "loss": 0.1361, "step": 9393 }, { "epoch": 0.20699950971480827, "grad_norm": 0.8930867910385132, "learning_rate": 2.7602255420682656e-05, "loss": 0.1072, "step": 9394 }, { "epoch": 0.20702154500432443, "grad_norm": 1.3632620573043823, "learning_rate": 2.7601674780014602e-05, "loss": 0.1289, "step": 9395 }, { "epoch": 0.2070435802938406, "grad_norm": 1.1870718002319336, "learning_rate": 2.7601094075159462e-05, "loss": 0.1463, "step": 9396 }, { "epoch": 0.20706561558335673, "grad_norm": 0.7236281037330627, "learning_rate": 2.7600513306120187e-05, "loss": 0.0938, "step": 9397 }, { "epoch": 0.2070876508728729, "grad_norm": 0.8633131384849548, "learning_rate": 2.759993247289975e-05, "loss": 0.1483, "step": 9398 }, { "epoch": 0.20710968616238906, "grad_norm": 0.6856895685195923, "learning_rate": 2.7599351575501094e-05, "loss": 0.0917, "step": 9399 }, { "epoch": 0.20713172145190523, "grad_norm": 0.661914587020874, "learning_rate": 2.759877061392719e-05, "loss": 0.1322, "step": 9400 }, { "epoch": 0.2071537567414214, "grad_norm": 0.8573197722434998, "learning_rate": 2.7598189588180986e-05, "loss": 0.1283, "step": 9401 }, { "epoch": 0.20717579203093756, "grad_norm": 1.1642701625823975, "learning_rate": 2.7597608498265447e-05, "loss": 0.1421, "step": 9402 }, { "epoch": 0.20719782732045372, "grad_norm": 1.2143369913101196, "learning_rate": 2.7597027344183535e-05, "loss": 0.1237, "step": 9403 }, { "epoch": 0.20721986260996986, "grad_norm": 0.7786707282066345, "learning_rate": 2.7596446125938205e-05, "loss": 0.0846, "step": 9404 }, { "epoch": 0.20724189789948602, "grad_norm": 0.7375503182411194, "learning_rate": 2.759586484353242e-05, "loss": 0.0797, "step": 9405 }, { "epoch": 0.2072639331890022, "grad_norm": 0.7793048024177551, "learning_rate": 2.759528349696914e-05, "loss": 0.0662, "step": 9406 }, { "epoch": 0.20728596847851835, "grad_norm": 0.9668035507202148, "learning_rate": 2.7594702086251333e-05, "loss": 0.0975, "step": 9407 }, { "epoch": 0.20730800376803452, "grad_norm": 0.5190058350563049, "learning_rate": 2.759412061138195e-05, "loss": 0.0885, "step": 9408 }, { "epoch": 0.20733003905755068, "grad_norm": 0.7733794450759888, "learning_rate": 2.759353907236396e-05, "loss": 0.1148, "step": 9409 }, { "epoch": 0.20735207434706682, "grad_norm": 0.7504174113273621, "learning_rate": 2.759295746920032e-05, "loss": 0.0994, "step": 9410 }, { "epoch": 0.20737410963658298, "grad_norm": 1.4288936853408813, "learning_rate": 2.7592375801893994e-05, "loss": 0.1487, "step": 9411 }, { "epoch": 0.20739614492609915, "grad_norm": 0.820326566696167, "learning_rate": 2.7591794070447946e-05, "loss": 0.1137, "step": 9412 }, { "epoch": 0.2074181802156153, "grad_norm": 0.6200536489486694, "learning_rate": 2.759121227486514e-05, "loss": 0.0975, "step": 9413 }, { "epoch": 0.20744021550513148, "grad_norm": 1.0655087232589722, "learning_rate": 2.7590630415148535e-05, "loss": 0.1123, "step": 9414 }, { "epoch": 0.20746225079464764, "grad_norm": 0.7221612334251404, "learning_rate": 2.75900484913011e-05, "loss": 0.0948, "step": 9415 }, { "epoch": 0.20748428608416378, "grad_norm": 0.9289413690567017, "learning_rate": 2.7589466503325796e-05, "loss": 0.0872, "step": 9416 }, { "epoch": 0.20750632137367994, "grad_norm": 0.8096078634262085, "learning_rate": 2.758888445122559e-05, "loss": 0.103, "step": 9417 }, { "epoch": 0.2075283566631961, "grad_norm": 0.6549517512321472, "learning_rate": 2.7588302335003445e-05, "loss": 0.1161, "step": 9418 }, { "epoch": 0.20755039195271227, "grad_norm": 1.116187572479248, "learning_rate": 2.7587720154662322e-05, "loss": 0.11, "step": 9419 }, { "epoch": 0.20757242724222844, "grad_norm": 0.5916738510131836, "learning_rate": 2.7587137910205196e-05, "loss": 0.0928, "step": 9420 }, { "epoch": 0.2075944625317446, "grad_norm": 1.1150026321411133, "learning_rate": 2.758655560163502e-05, "loss": 0.1108, "step": 9421 }, { "epoch": 0.20761649782126074, "grad_norm": 1.174324870109558, "learning_rate": 2.7585973228954774e-05, "loss": 0.1332, "step": 9422 }, { "epoch": 0.2076385331107769, "grad_norm": 1.18239164352417, "learning_rate": 2.758539079216741e-05, "loss": 0.1015, "step": 9423 }, { "epoch": 0.20766056840029307, "grad_norm": 1.740654468536377, "learning_rate": 2.7584808291275903e-05, "loss": 0.1581, "step": 9424 }, { "epoch": 0.20768260368980923, "grad_norm": 0.9597906470298767, "learning_rate": 2.7584225726283225e-05, "loss": 0.1211, "step": 9425 }, { "epoch": 0.2077046389793254, "grad_norm": 0.9222613573074341, "learning_rate": 2.7583643097192334e-05, "loss": 0.1194, "step": 9426 }, { "epoch": 0.20772667426884156, "grad_norm": 0.8566294312477112, "learning_rate": 2.75830604040062e-05, "loss": 0.0933, "step": 9427 }, { "epoch": 0.2077487095583577, "grad_norm": 1.1248717308044434, "learning_rate": 2.758247764672779e-05, "loss": 0.1568, "step": 9428 }, { "epoch": 0.20777074484787386, "grad_norm": 1.0741316080093384, "learning_rate": 2.7581894825360076e-05, "loss": 0.1099, "step": 9429 }, { "epoch": 0.20779278013739003, "grad_norm": 0.9624758362770081, "learning_rate": 2.7581311939906024e-05, "loss": 0.121, "step": 9430 }, { "epoch": 0.2078148154269062, "grad_norm": 1.1232603788375854, "learning_rate": 2.7580728990368603e-05, "loss": 0.13, "step": 9431 }, { "epoch": 0.20783685071642236, "grad_norm": 0.8758746385574341, "learning_rate": 2.7580145976750785e-05, "loss": 0.0905, "step": 9432 }, { "epoch": 0.20785888600593852, "grad_norm": 0.8317629098892212, "learning_rate": 2.7579562899055534e-05, "loss": 0.1121, "step": 9433 }, { "epoch": 0.20788092129545466, "grad_norm": 1.033937931060791, "learning_rate": 2.757897975728583e-05, "loss": 0.1432, "step": 9434 }, { "epoch": 0.20790295658497082, "grad_norm": 1.3909679651260376, "learning_rate": 2.757839655144463e-05, "loss": 0.1248, "step": 9435 }, { "epoch": 0.207924991874487, "grad_norm": 1.020462155342102, "learning_rate": 2.7577813281534918e-05, "loss": 0.1357, "step": 9436 }, { "epoch": 0.20794702716400315, "grad_norm": 1.3218791484832764, "learning_rate": 2.7577229947559654e-05, "loss": 0.1293, "step": 9437 }, { "epoch": 0.20796906245351932, "grad_norm": 0.9900200963020325, "learning_rate": 2.7576646549521812e-05, "loss": 0.1108, "step": 9438 }, { "epoch": 0.20799109774303548, "grad_norm": 0.8952530026435852, "learning_rate": 2.7576063087424365e-05, "loss": 0.0692, "step": 9439 }, { "epoch": 0.20801313303255164, "grad_norm": 0.937129557132721, "learning_rate": 2.757547956127029e-05, "loss": 0.1302, "step": 9440 }, { "epoch": 0.20803516832206778, "grad_norm": 0.7179258465766907, "learning_rate": 2.757489597106255e-05, "loss": 0.1034, "step": 9441 }, { "epoch": 0.20805720361158395, "grad_norm": 1.1627850532531738, "learning_rate": 2.757431231680412e-05, "loss": 0.1146, "step": 9442 }, { "epoch": 0.2080792389011001, "grad_norm": 1.2910186052322388, "learning_rate": 2.757372859849798e-05, "loss": 0.1124, "step": 9443 }, { "epoch": 0.20810127419061628, "grad_norm": 1.1514984369277954, "learning_rate": 2.7573144816147096e-05, "loss": 0.1076, "step": 9444 }, { "epoch": 0.20812330948013244, "grad_norm": 2.1069164276123047, "learning_rate": 2.757256096975444e-05, "loss": 0.0916, "step": 9445 }, { "epoch": 0.2081453447696486, "grad_norm": 1.060973882675171, "learning_rate": 2.757197705932299e-05, "loss": 0.1257, "step": 9446 }, { "epoch": 0.20816738005916474, "grad_norm": 1.0822300910949707, "learning_rate": 2.757139308485572e-05, "loss": 0.0726, "step": 9447 }, { "epoch": 0.2081894153486809, "grad_norm": 1.2347657680511475, "learning_rate": 2.7570809046355604e-05, "loss": 0.1665, "step": 9448 }, { "epoch": 0.20821145063819707, "grad_norm": 0.9853793382644653, "learning_rate": 2.7570224943825616e-05, "loss": 0.1126, "step": 9449 }, { "epoch": 0.20823348592771324, "grad_norm": 0.917035698890686, "learning_rate": 2.7569640777268736e-05, "loss": 0.1681, "step": 9450 }, { "epoch": 0.2082555212172294, "grad_norm": 1.5999023914337158, "learning_rate": 2.756905654668793e-05, "loss": 0.1306, "step": 9451 }, { "epoch": 0.20827755650674556, "grad_norm": 1.1077183485031128, "learning_rate": 2.756847225208618e-05, "loss": 0.1603, "step": 9452 }, { "epoch": 0.2082995917962617, "grad_norm": 0.702277660369873, "learning_rate": 2.756788789346646e-05, "loss": 0.1205, "step": 9453 }, { "epoch": 0.20832162708577787, "grad_norm": 1.1787068843841553, "learning_rate": 2.7567303470831745e-05, "loss": 0.1302, "step": 9454 }, { "epoch": 0.20834366237529403, "grad_norm": 1.1886435747146606, "learning_rate": 2.7566718984185018e-05, "loss": 0.1206, "step": 9455 }, { "epoch": 0.2083656976648102, "grad_norm": 0.9499973654747009, "learning_rate": 2.7566134433529253e-05, "loss": 0.0931, "step": 9456 }, { "epoch": 0.20838773295432636, "grad_norm": 1.0019091367721558, "learning_rate": 2.7565549818867428e-05, "loss": 0.0744, "step": 9457 }, { "epoch": 0.20840976824384252, "grad_norm": 1.021523356437683, "learning_rate": 2.756496514020252e-05, "loss": 0.1242, "step": 9458 }, { "epoch": 0.20843180353335866, "grad_norm": 0.9981280565261841, "learning_rate": 2.75643803975375e-05, "loss": 0.1127, "step": 9459 }, { "epoch": 0.20845383882287483, "grad_norm": 0.684693455696106, "learning_rate": 2.756379559087536e-05, "loss": 0.1261, "step": 9460 }, { "epoch": 0.208475874112391, "grad_norm": 0.874529242515564, "learning_rate": 2.756321072021907e-05, "loss": 0.1146, "step": 9461 }, { "epoch": 0.20849790940190716, "grad_norm": 0.8345451951026917, "learning_rate": 2.7562625785571607e-05, "loss": 0.0918, "step": 9462 }, { "epoch": 0.20851994469142332, "grad_norm": 1.2836506366729736, "learning_rate": 2.7562040786935957e-05, "loss": 0.1138, "step": 9463 }, { "epoch": 0.20854197998093948, "grad_norm": 0.8550941944122314, "learning_rate": 2.7561455724315102e-05, "loss": 0.1022, "step": 9464 }, { "epoch": 0.20856401527045562, "grad_norm": 0.6184864640235901, "learning_rate": 2.756087059771201e-05, "loss": 0.0788, "step": 9465 }, { "epoch": 0.2085860505599718, "grad_norm": 0.6422236561775208, "learning_rate": 2.756028540712967e-05, "loss": 0.0827, "step": 9466 }, { "epoch": 0.20860808584948795, "grad_norm": 0.715080738067627, "learning_rate": 2.7559700152571064e-05, "loss": 0.1013, "step": 9467 }, { "epoch": 0.20863012113900412, "grad_norm": 0.7059758901596069, "learning_rate": 2.7559114834039167e-05, "loss": 0.1244, "step": 9468 }, { "epoch": 0.20865215642852028, "grad_norm": 0.7056425213813782, "learning_rate": 2.7558529451536964e-05, "loss": 0.0776, "step": 9469 }, { "epoch": 0.20867419171803644, "grad_norm": 0.44975799322128296, "learning_rate": 2.7557944005067435e-05, "loss": 0.0784, "step": 9470 }, { "epoch": 0.2086962270075526, "grad_norm": 0.9612434506416321, "learning_rate": 2.7557358494633566e-05, "loss": 0.1464, "step": 9471 }, { "epoch": 0.20871826229706875, "grad_norm": 0.719810962677002, "learning_rate": 2.7556772920238336e-05, "loss": 0.0996, "step": 9472 }, { "epoch": 0.2087402975865849, "grad_norm": 1.0586614608764648, "learning_rate": 2.7556187281884725e-05, "loss": 0.118, "step": 9473 }, { "epoch": 0.20876233287610108, "grad_norm": 0.6260471940040588, "learning_rate": 2.755560157957572e-05, "loss": 0.1096, "step": 9474 }, { "epoch": 0.20878436816561724, "grad_norm": 1.1036655902862549, "learning_rate": 2.7555015813314304e-05, "loss": 0.1268, "step": 9475 }, { "epoch": 0.2088064034551334, "grad_norm": 0.9308378100395203, "learning_rate": 2.755442998310346e-05, "loss": 0.1052, "step": 9476 }, { "epoch": 0.20882843874464957, "grad_norm": 0.759578287601471, "learning_rate": 2.755384408894617e-05, "loss": 0.1126, "step": 9477 }, { "epoch": 0.2088504740341657, "grad_norm": 1.4586395025253296, "learning_rate": 2.7553258130845426e-05, "loss": 0.1256, "step": 9478 }, { "epoch": 0.20887250932368187, "grad_norm": 1.0112391710281372, "learning_rate": 2.7552672108804202e-05, "loss": 0.1076, "step": 9479 }, { "epoch": 0.20889454461319804, "grad_norm": 1.0817574262619019, "learning_rate": 2.755208602282549e-05, "loss": 0.1305, "step": 9480 }, { "epoch": 0.2089165799027142, "grad_norm": 0.8324152231216431, "learning_rate": 2.755149987291227e-05, "loss": 0.1478, "step": 9481 }, { "epoch": 0.20893861519223036, "grad_norm": 0.7189725041389465, "learning_rate": 2.7550913659067534e-05, "loss": 0.1299, "step": 9482 }, { "epoch": 0.20896065048174653, "grad_norm": 0.9773246645927429, "learning_rate": 2.755032738129426e-05, "loss": 0.111, "step": 9483 }, { "epoch": 0.20898268577126267, "grad_norm": 1.202492356300354, "learning_rate": 2.7549741039595443e-05, "loss": 0.1152, "step": 9484 }, { "epoch": 0.20900472106077883, "grad_norm": 0.7439968585968018, "learning_rate": 2.7549154633974062e-05, "loss": 0.0856, "step": 9485 }, { "epoch": 0.209026756350295, "grad_norm": 0.8121330738067627, "learning_rate": 2.754856816443311e-05, "loss": 0.0962, "step": 9486 }, { "epoch": 0.20904879163981116, "grad_norm": 1.033241868019104, "learning_rate": 2.754798163097557e-05, "loss": 0.1074, "step": 9487 }, { "epoch": 0.20907082692932732, "grad_norm": 0.9616760015487671, "learning_rate": 2.754739503360443e-05, "loss": 0.1218, "step": 9488 }, { "epoch": 0.2090928622188435, "grad_norm": 0.5873150825500488, "learning_rate": 2.7546808372322677e-05, "loss": 0.0743, "step": 9489 }, { "epoch": 0.20911489750835963, "grad_norm": 0.9487913846969604, "learning_rate": 2.7546221647133305e-05, "loss": 0.0964, "step": 9490 }, { "epoch": 0.2091369327978758, "grad_norm": 0.8171796202659607, "learning_rate": 2.7545634858039296e-05, "loss": 0.1076, "step": 9491 }, { "epoch": 0.20915896808739196, "grad_norm": 0.9712464213371277, "learning_rate": 2.754504800504364e-05, "loss": 0.1231, "step": 9492 }, { "epoch": 0.20918100337690812, "grad_norm": 0.8176759481430054, "learning_rate": 2.754446108814933e-05, "loss": 0.1337, "step": 9493 }, { "epoch": 0.20920303866642428, "grad_norm": 0.719488263130188, "learning_rate": 2.7543874107359354e-05, "loss": 0.091, "step": 9494 }, { "epoch": 0.20922507395594045, "grad_norm": 0.6649525761604309, "learning_rate": 2.7543287062676693e-05, "loss": 0.0864, "step": 9495 }, { "epoch": 0.2092471092454566, "grad_norm": 0.9149569869041443, "learning_rate": 2.7542699954104353e-05, "loss": 0.0937, "step": 9496 }, { "epoch": 0.20926914453497275, "grad_norm": 0.7913311123847961, "learning_rate": 2.7542112781645312e-05, "loss": 0.1076, "step": 9497 }, { "epoch": 0.20929117982448892, "grad_norm": 0.6130743622779846, "learning_rate": 2.7541525545302568e-05, "loss": 0.0949, "step": 9498 }, { "epoch": 0.20931321511400508, "grad_norm": 0.9090756773948669, "learning_rate": 2.7540938245079103e-05, "loss": 0.1397, "step": 9499 }, { "epoch": 0.20933525040352124, "grad_norm": 0.8608099222183228, "learning_rate": 2.7540350880977923e-05, "loss": 0.0806, "step": 9500 }, { "epoch": 0.2093572856930374, "grad_norm": 1.0295428037643433, "learning_rate": 2.7539763453002e-05, "loss": 0.0957, "step": 9501 }, { "epoch": 0.20937932098255355, "grad_norm": 0.9089780449867249, "learning_rate": 2.753917596115435e-05, "loss": 0.1436, "step": 9502 }, { "epoch": 0.2094013562720697, "grad_norm": 1.399708867073059, "learning_rate": 2.7538588405437946e-05, "loss": 0.1236, "step": 9503 }, { "epoch": 0.20942339156158588, "grad_norm": 1.1690599918365479, "learning_rate": 2.753800078585579e-05, "loss": 0.1041, "step": 9504 }, { "epoch": 0.20944542685110204, "grad_norm": 0.46034958958625793, "learning_rate": 2.753741310241087e-05, "loss": 0.0774, "step": 9505 }, { "epoch": 0.2094674621406182, "grad_norm": 1.2918155193328857, "learning_rate": 2.7536825355106185e-05, "loss": 0.1291, "step": 9506 }, { "epoch": 0.20948949743013437, "grad_norm": 0.7301418781280518, "learning_rate": 2.7536237543944725e-05, "loss": 0.0746, "step": 9507 }, { "epoch": 0.20951153271965053, "grad_norm": 0.7647320628166199, "learning_rate": 2.753564966892948e-05, "loss": 0.1261, "step": 9508 }, { "epoch": 0.20953356800916667, "grad_norm": 0.9129912257194519, "learning_rate": 2.7535061730063454e-05, "loss": 0.1403, "step": 9509 }, { "epoch": 0.20955560329868284, "grad_norm": 0.739506721496582, "learning_rate": 2.7534473727349633e-05, "loss": 0.1535, "step": 9510 }, { "epoch": 0.209577638588199, "grad_norm": 0.733069121837616, "learning_rate": 2.7533885660791022e-05, "loss": 0.1053, "step": 9511 }, { "epoch": 0.20959967387771516, "grad_norm": 1.3726614713668823, "learning_rate": 2.7533297530390603e-05, "loss": 0.1296, "step": 9512 }, { "epoch": 0.20962170916723133, "grad_norm": 0.8927154541015625, "learning_rate": 2.7532709336151384e-05, "loss": 0.1028, "step": 9513 }, { "epoch": 0.2096437444567475, "grad_norm": 0.7924230694770813, "learning_rate": 2.7532121078076354e-05, "loss": 0.0981, "step": 9514 }, { "epoch": 0.20966577974626363, "grad_norm": 0.8632528781890869, "learning_rate": 2.7531532756168514e-05, "loss": 0.1347, "step": 9515 }, { "epoch": 0.2096878150357798, "grad_norm": 0.977155327796936, "learning_rate": 2.7530944370430853e-05, "loss": 0.0964, "step": 9516 }, { "epoch": 0.20970985032529596, "grad_norm": 0.6851212382316589, "learning_rate": 2.7530355920866373e-05, "loss": 0.0956, "step": 9517 }, { "epoch": 0.20973188561481212, "grad_norm": 1.103827714920044, "learning_rate": 2.7529767407478072e-05, "loss": 0.1717, "step": 9518 }, { "epoch": 0.2097539209043283, "grad_norm": 0.7130390405654907, "learning_rate": 2.7529178830268946e-05, "loss": 0.1217, "step": 9519 }, { "epoch": 0.20977595619384445, "grad_norm": 0.7208288908004761, "learning_rate": 2.7528590189242e-05, "loss": 0.0701, "step": 9520 }, { "epoch": 0.2097979914833606, "grad_norm": 1.550302505493164, "learning_rate": 2.7528001484400217e-05, "loss": 0.1505, "step": 9521 }, { "epoch": 0.20982002677287676, "grad_norm": 1.0147572755813599, "learning_rate": 2.7527412715746608e-05, "loss": 0.121, "step": 9522 }, { "epoch": 0.20984206206239292, "grad_norm": 0.9762120246887207, "learning_rate": 2.752682388328417e-05, "loss": 0.1594, "step": 9523 }, { "epoch": 0.20986409735190908, "grad_norm": 1.4010249376296997, "learning_rate": 2.75262349870159e-05, "loss": 0.1158, "step": 9524 }, { "epoch": 0.20988613264142525, "grad_norm": 0.7363265752792358, "learning_rate": 2.7525646026944794e-05, "loss": 0.0928, "step": 9525 }, { "epoch": 0.20990816793094141, "grad_norm": 0.3927961587905884, "learning_rate": 2.752505700307386e-05, "loss": 0.129, "step": 9526 }, { "epoch": 0.20993020322045755, "grad_norm": 0.6672350168228149, "learning_rate": 2.752446791540609e-05, "loss": 0.1316, "step": 9527 }, { "epoch": 0.20995223850997372, "grad_norm": 0.8909263014793396, "learning_rate": 2.7523878763944493e-05, "loss": 0.1124, "step": 9528 }, { "epoch": 0.20997427379948988, "grad_norm": 0.7532713413238525, "learning_rate": 2.7523289548692064e-05, "loss": 0.1285, "step": 9529 }, { "epoch": 0.20999630908900604, "grad_norm": 0.9118532538414001, "learning_rate": 2.7522700269651807e-05, "loss": 0.1066, "step": 9530 }, { "epoch": 0.2100183443785222, "grad_norm": 1.043329119682312, "learning_rate": 2.752211092682672e-05, "loss": 0.1522, "step": 9531 }, { "epoch": 0.21004037966803837, "grad_norm": 0.702423632144928, "learning_rate": 2.752152152021981e-05, "loss": 0.1029, "step": 9532 }, { "epoch": 0.2100624149575545, "grad_norm": 0.7827533483505249, "learning_rate": 2.7520932049834072e-05, "loss": 0.1089, "step": 9533 }, { "epoch": 0.21008445024707068, "grad_norm": 0.8319678902626038, "learning_rate": 2.752034251567252e-05, "loss": 0.1014, "step": 9534 }, { "epoch": 0.21010648553658684, "grad_norm": 0.9124559760093689, "learning_rate": 2.7519752917738144e-05, "loss": 0.1224, "step": 9535 }, { "epoch": 0.210128520826103, "grad_norm": 0.8791576027870178, "learning_rate": 2.7519163256033955e-05, "loss": 0.1225, "step": 9536 }, { "epoch": 0.21015055611561917, "grad_norm": 1.1103156805038452, "learning_rate": 2.7518573530562953e-05, "loss": 0.0878, "step": 9537 }, { "epoch": 0.21017259140513533, "grad_norm": 1.4813899993896484, "learning_rate": 2.7517983741328145e-05, "loss": 0.1241, "step": 9538 }, { "epoch": 0.21019462669465147, "grad_norm": 0.8261683583259583, "learning_rate": 2.751739388833253e-05, "loss": 0.1403, "step": 9539 }, { "epoch": 0.21021666198416764, "grad_norm": 1.0007140636444092, "learning_rate": 2.751680397157912e-05, "loss": 0.1154, "step": 9540 }, { "epoch": 0.2102386972736838, "grad_norm": 1.758321762084961, "learning_rate": 2.751621399107091e-05, "loss": 0.1475, "step": 9541 }, { "epoch": 0.21026073256319996, "grad_norm": 0.8227629065513611, "learning_rate": 2.7515623946810912e-05, "loss": 0.1204, "step": 9542 }, { "epoch": 0.21028276785271613, "grad_norm": 1.07012140750885, "learning_rate": 2.7515033838802132e-05, "loss": 0.137, "step": 9543 }, { "epoch": 0.2103048031422323, "grad_norm": 1.1461741924285889, "learning_rate": 2.7514443667047573e-05, "loss": 0.079, "step": 9544 }, { "epoch": 0.21032683843174846, "grad_norm": 0.7484453320503235, "learning_rate": 2.7513853431550238e-05, "loss": 0.0881, "step": 9545 }, { "epoch": 0.2103488737212646, "grad_norm": 0.6473890542984009, "learning_rate": 2.751326313231314e-05, "loss": 0.0998, "step": 9546 }, { "epoch": 0.21037090901078076, "grad_norm": 1.3410927057266235, "learning_rate": 2.7512672769339286e-05, "loss": 0.1466, "step": 9547 }, { "epoch": 0.21039294430029692, "grad_norm": 0.9275954365730286, "learning_rate": 2.7512082342631676e-05, "loss": 0.1301, "step": 9548 }, { "epoch": 0.2104149795898131, "grad_norm": 0.9861015677452087, "learning_rate": 2.7511491852193324e-05, "loss": 0.1699, "step": 9549 }, { "epoch": 0.21043701487932925, "grad_norm": 1.928267002105713, "learning_rate": 2.7510901298027225e-05, "loss": 0.1217, "step": 9550 }, { "epoch": 0.21045905016884542, "grad_norm": 0.9403424859046936, "learning_rate": 2.7510310680136405e-05, "loss": 0.0982, "step": 9551 }, { "epoch": 0.21048108545836156, "grad_norm": 0.845876157283783, "learning_rate": 2.7509719998523865e-05, "loss": 0.1269, "step": 9552 }, { "epoch": 0.21050312074787772, "grad_norm": 0.9987785220146179, "learning_rate": 2.7509129253192612e-05, "loss": 0.0893, "step": 9553 }, { "epoch": 0.21052515603739388, "grad_norm": 0.7228416204452515, "learning_rate": 2.7508538444145656e-05, "loss": 0.0925, "step": 9554 }, { "epoch": 0.21054719132691005, "grad_norm": 1.412286400794983, "learning_rate": 2.7507947571386003e-05, "loss": 0.1217, "step": 9555 }, { "epoch": 0.2105692266164262, "grad_norm": 1.1366053819656372, "learning_rate": 2.7507356634916666e-05, "loss": 0.1099, "step": 9556 }, { "epoch": 0.21059126190594238, "grad_norm": 1.196070671081543, "learning_rate": 2.750676563474066e-05, "loss": 0.1287, "step": 9557 }, { "epoch": 0.21061329719545852, "grad_norm": 0.9819999933242798, "learning_rate": 2.7506174570860985e-05, "loss": 0.0872, "step": 9558 }, { "epoch": 0.21063533248497468, "grad_norm": 1.0811009407043457, "learning_rate": 2.7505583443280656e-05, "loss": 0.1495, "step": 9559 }, { "epoch": 0.21065736777449084, "grad_norm": 0.5119748711585999, "learning_rate": 2.750499225200269e-05, "loss": 0.0883, "step": 9560 }, { "epoch": 0.210679403064007, "grad_norm": 0.7947272658348083, "learning_rate": 2.750440099703009e-05, "loss": 0.1466, "step": 9561 }, { "epoch": 0.21070143835352317, "grad_norm": 0.899817705154419, "learning_rate": 2.750380967836587e-05, "loss": 0.1071, "step": 9562 }, { "epoch": 0.21072347364303934, "grad_norm": 0.8216478824615479, "learning_rate": 2.7503218296013038e-05, "loss": 0.1066, "step": 9563 }, { "epoch": 0.21074550893255548, "grad_norm": 1.14667809009552, "learning_rate": 2.7502626849974616e-05, "loss": 0.1203, "step": 9564 }, { "epoch": 0.21076754422207164, "grad_norm": 0.9630961418151855, "learning_rate": 2.7502035340253607e-05, "loss": 0.0805, "step": 9565 }, { "epoch": 0.2107895795115878, "grad_norm": 1.1367900371551514, "learning_rate": 2.7501443766853034e-05, "loss": 0.1391, "step": 9566 }, { "epoch": 0.21081161480110397, "grad_norm": 1.028194546699524, "learning_rate": 2.7500852129775898e-05, "loss": 0.115, "step": 9567 }, { "epoch": 0.21083365009062013, "grad_norm": 1.0806689262390137, "learning_rate": 2.7500260429025222e-05, "loss": 0.0962, "step": 9568 }, { "epoch": 0.2108556853801363, "grad_norm": 1.701483130455017, "learning_rate": 2.7499668664604012e-05, "loss": 0.1176, "step": 9569 }, { "epoch": 0.21087772066965244, "grad_norm": 0.9813047051429749, "learning_rate": 2.7499076836515292e-05, "loss": 0.1362, "step": 9570 }, { "epoch": 0.2108997559591686, "grad_norm": 0.9074527025222778, "learning_rate": 2.749848494476207e-05, "loss": 0.0758, "step": 9571 }, { "epoch": 0.21092179124868476, "grad_norm": 0.6421617269515991, "learning_rate": 2.7497892989347362e-05, "loss": 0.105, "step": 9572 }, { "epoch": 0.21094382653820093, "grad_norm": 1.0461639165878296, "learning_rate": 2.749730097027418e-05, "loss": 0.1236, "step": 9573 }, { "epoch": 0.2109658618277171, "grad_norm": 0.9637064933776855, "learning_rate": 2.7496708887545546e-05, "loss": 0.0717, "step": 9574 }, { "epoch": 0.21098789711723326, "grad_norm": 0.5204156041145325, "learning_rate": 2.749611674116447e-05, "loss": 0.0981, "step": 9575 }, { "epoch": 0.2110099324067494, "grad_norm": 0.8958938121795654, "learning_rate": 2.749552453113397e-05, "loss": 0.1023, "step": 9576 }, { "epoch": 0.21103196769626556, "grad_norm": 0.7332285642623901, "learning_rate": 2.7494932257457064e-05, "loss": 0.0968, "step": 9577 }, { "epoch": 0.21105400298578172, "grad_norm": 0.7407975196838379, "learning_rate": 2.7494339920136768e-05, "loss": 0.1409, "step": 9578 }, { "epoch": 0.2110760382752979, "grad_norm": 1.05000901222229, "learning_rate": 2.74937475191761e-05, "loss": 0.1172, "step": 9579 }, { "epoch": 0.21109807356481405, "grad_norm": 0.4522228240966797, "learning_rate": 2.7493155054578073e-05, "loss": 0.1505, "step": 9580 }, { "epoch": 0.21112010885433022, "grad_norm": 0.6940401792526245, "learning_rate": 2.7492562526345715e-05, "loss": 0.1234, "step": 9581 }, { "epoch": 0.21114214414384638, "grad_norm": 1.0198485851287842, "learning_rate": 2.749196993448203e-05, "loss": 0.0847, "step": 9582 }, { "epoch": 0.21116417943336252, "grad_norm": 0.6861733198165894, "learning_rate": 2.7491377278990047e-05, "loss": 0.0908, "step": 9583 }, { "epoch": 0.21118621472287868, "grad_norm": 0.815743625164032, "learning_rate": 2.749078455987278e-05, "loss": 0.0807, "step": 9584 }, { "epoch": 0.21120825001239485, "grad_norm": 0.9144197106361389, "learning_rate": 2.749019177713325e-05, "loss": 0.0962, "step": 9585 }, { "epoch": 0.211230285301911, "grad_norm": 0.884756326675415, "learning_rate": 2.7489598930774473e-05, "loss": 0.1078, "step": 9586 }, { "epoch": 0.21125232059142718, "grad_norm": 0.7405425906181335, "learning_rate": 2.748900602079947e-05, "loss": 0.1473, "step": 9587 }, { "epoch": 0.21127435588094334, "grad_norm": 0.8536518216133118, "learning_rate": 2.7488413047211263e-05, "loss": 0.1126, "step": 9588 }, { "epoch": 0.21129639117045948, "grad_norm": 1.1921648979187012, "learning_rate": 2.7487820010012877e-05, "loss": 0.1111, "step": 9589 }, { "epoch": 0.21131842645997564, "grad_norm": 0.989563524723053, "learning_rate": 2.748722690920732e-05, "loss": 0.0839, "step": 9590 }, { "epoch": 0.2113404617494918, "grad_norm": 1.014656662940979, "learning_rate": 2.7486633744797625e-05, "loss": 0.0922, "step": 9591 }, { "epoch": 0.21136249703900797, "grad_norm": 0.8718709945678711, "learning_rate": 2.7486040516786805e-05, "loss": 0.0881, "step": 9592 }, { "epoch": 0.21138453232852414, "grad_norm": 1.6399030685424805, "learning_rate": 2.748544722517789e-05, "loss": 0.1301, "step": 9593 }, { "epoch": 0.2114065676180403, "grad_norm": 1.3414584398269653, "learning_rate": 2.7484853869973896e-05, "loss": 0.1074, "step": 9594 }, { "epoch": 0.21142860290755644, "grad_norm": 0.9070578217506409, "learning_rate": 2.7484260451177843e-05, "loss": 0.0987, "step": 9595 }, { "epoch": 0.2114506381970726, "grad_norm": 1.0350326299667358, "learning_rate": 2.748366696879276e-05, "loss": 0.0924, "step": 9596 }, { "epoch": 0.21147267348658877, "grad_norm": 0.5838679075241089, "learning_rate": 2.748307342282166e-05, "loss": 0.0654, "step": 9597 }, { "epoch": 0.21149470877610493, "grad_norm": 0.9094191193580627, "learning_rate": 2.7482479813267584e-05, "loss": 0.0889, "step": 9598 }, { "epoch": 0.2115167440656211, "grad_norm": 0.6579936742782593, "learning_rate": 2.7481886140133538e-05, "loss": 0.1266, "step": 9599 }, { "epoch": 0.21153877935513726, "grad_norm": 0.7267175316810608, "learning_rate": 2.7481292403422554e-05, "loss": 0.0906, "step": 9600 }, { "epoch": 0.2115608146446534, "grad_norm": 0.6867476105690002, "learning_rate": 2.748069860313765e-05, "loss": 0.1059, "step": 9601 }, { "epoch": 0.21158284993416956, "grad_norm": 0.49464529752731323, "learning_rate": 2.748010473928186e-05, "loss": 0.1108, "step": 9602 }, { "epoch": 0.21160488522368573, "grad_norm": 0.7427147626876831, "learning_rate": 2.7479510811858207e-05, "loss": 0.0993, "step": 9603 }, { "epoch": 0.2116269205132019, "grad_norm": 0.8840469717979431, "learning_rate": 2.7478916820869708e-05, "loss": 0.0825, "step": 9604 }, { "epoch": 0.21164895580271806, "grad_norm": 1.0920355319976807, "learning_rate": 2.7478322766319396e-05, "loss": 0.1274, "step": 9605 }, { "epoch": 0.21167099109223422, "grad_norm": 0.8909595012664795, "learning_rate": 2.74777286482103e-05, "loss": 0.0835, "step": 9606 }, { "epoch": 0.21169302638175036, "grad_norm": 1.0996824502944946, "learning_rate": 2.747713446654543e-05, "loss": 0.1354, "step": 9607 }, { "epoch": 0.21171506167126652, "grad_norm": 0.7091004848480225, "learning_rate": 2.7476540221327832e-05, "loss": 0.1332, "step": 9608 }, { "epoch": 0.2117370969607827, "grad_norm": 0.6624748110771179, "learning_rate": 2.7475945912560523e-05, "loss": 0.0871, "step": 9609 }, { "epoch": 0.21175913225029885, "grad_norm": 1.2398308515548706, "learning_rate": 2.747535154024653e-05, "loss": 0.1173, "step": 9610 }, { "epoch": 0.21178116753981502, "grad_norm": 0.7948720455169678, "learning_rate": 2.747475710438888e-05, "loss": 0.0838, "step": 9611 }, { "epoch": 0.21180320282933118, "grad_norm": 1.2391152381896973, "learning_rate": 2.7474162604990604e-05, "loss": 0.1305, "step": 9612 }, { "epoch": 0.21182523811884735, "grad_norm": 0.9639773964881897, "learning_rate": 2.7473568042054728e-05, "loss": 0.1282, "step": 9613 }, { "epoch": 0.21184727340836348, "grad_norm": 1.015613317489624, "learning_rate": 2.7472973415584283e-05, "loss": 0.1023, "step": 9614 }, { "epoch": 0.21186930869787965, "grad_norm": 1.1830288171768188, "learning_rate": 2.7472378725582295e-05, "loss": 0.0722, "step": 9615 }, { "epoch": 0.2118913439873958, "grad_norm": 1.1004729270935059, "learning_rate": 2.7471783972051795e-05, "loss": 0.1358, "step": 9616 }, { "epoch": 0.21191337927691198, "grad_norm": 0.5980182886123657, "learning_rate": 2.747118915499581e-05, "loss": 0.0854, "step": 9617 }, { "epoch": 0.21193541456642814, "grad_norm": 0.5400837659835815, "learning_rate": 2.7470594274417368e-05, "loss": 0.0667, "step": 9618 }, { "epoch": 0.2119574498559443, "grad_norm": 0.7679840922355652, "learning_rate": 2.746999933031951e-05, "loss": 0.1123, "step": 9619 }, { "epoch": 0.21197948514546044, "grad_norm": 0.9085874557495117, "learning_rate": 2.746940432270525e-05, "loss": 0.0959, "step": 9620 }, { "epoch": 0.2120015204349766, "grad_norm": 1.1350879669189453, "learning_rate": 2.7468809251577633e-05, "loss": 0.1371, "step": 9621 }, { "epoch": 0.21202355572449277, "grad_norm": 0.707844078540802, "learning_rate": 2.7468214116939682e-05, "loss": 0.1125, "step": 9622 }, { "epoch": 0.21204559101400894, "grad_norm": 0.6240555047988892, "learning_rate": 2.746761891879443e-05, "loss": 0.0909, "step": 9623 }, { "epoch": 0.2120676263035251, "grad_norm": 0.7553934454917908, "learning_rate": 2.746702365714491e-05, "loss": 0.1064, "step": 9624 }, { "epoch": 0.21208966159304127, "grad_norm": 0.5724050998687744, "learning_rate": 2.7466428331994152e-05, "loss": 0.1254, "step": 9625 }, { "epoch": 0.2121116968825574, "grad_norm": 0.9186209440231323, "learning_rate": 2.746583294334519e-05, "loss": 0.1045, "step": 9626 }, { "epoch": 0.21213373217207357, "grad_norm": 0.7946040630340576, "learning_rate": 2.7465237491201057e-05, "loss": 0.1079, "step": 9627 }, { "epoch": 0.21215576746158973, "grad_norm": 1.0974299907684326, "learning_rate": 2.7464641975564786e-05, "loss": 0.1487, "step": 9628 }, { "epoch": 0.2121778027511059, "grad_norm": 0.8070678114891052, "learning_rate": 2.7464046396439408e-05, "loss": 0.1047, "step": 9629 }, { "epoch": 0.21219983804062206, "grad_norm": 0.8562759757041931, "learning_rate": 2.7463450753827958e-05, "loss": 0.1271, "step": 9630 }, { "epoch": 0.21222187333013823, "grad_norm": 0.9073349237442017, "learning_rate": 2.746285504773347e-05, "loss": 0.0939, "step": 9631 }, { "epoch": 0.21224390861965436, "grad_norm": 0.7894222736358643, "learning_rate": 2.7462259278158977e-05, "loss": 0.112, "step": 9632 }, { "epoch": 0.21226594390917053, "grad_norm": 0.7256762385368347, "learning_rate": 2.7461663445107517e-05, "loss": 0.1127, "step": 9633 }, { "epoch": 0.2122879791986867, "grad_norm": 0.852218508720398, "learning_rate": 2.746106754858212e-05, "loss": 0.0954, "step": 9634 }, { "epoch": 0.21231001448820286, "grad_norm": 0.9615163207054138, "learning_rate": 2.7460471588585824e-05, "loss": 0.0941, "step": 9635 }, { "epoch": 0.21233204977771902, "grad_norm": 1.183384895324707, "learning_rate": 2.7459875565121666e-05, "loss": 0.1005, "step": 9636 }, { "epoch": 0.2123540850672352, "grad_norm": 0.8721834421157837, "learning_rate": 2.745927947819268e-05, "loss": 0.0965, "step": 9637 }, { "epoch": 0.21237612035675132, "grad_norm": 0.9358057379722595, "learning_rate": 2.7458683327801902e-05, "loss": 0.0856, "step": 9638 }, { "epoch": 0.2123981556462675, "grad_norm": 1.0696560144424438, "learning_rate": 2.7458087113952367e-05, "loss": 0.098, "step": 9639 }, { "epoch": 0.21242019093578365, "grad_norm": 0.8437453508377075, "learning_rate": 2.7457490836647116e-05, "loss": 0.1082, "step": 9640 }, { "epoch": 0.21244222622529982, "grad_norm": 0.9198430776596069, "learning_rate": 2.7456894495889186e-05, "loss": 0.0872, "step": 9641 }, { "epoch": 0.21246426151481598, "grad_norm": 0.7524867653846741, "learning_rate": 2.745629809168161e-05, "loss": 0.1083, "step": 9642 }, { "epoch": 0.21248629680433215, "grad_norm": 0.8255652189254761, "learning_rate": 2.7455701624027426e-05, "loss": 0.1277, "step": 9643 }, { "epoch": 0.21250833209384828, "grad_norm": 0.6448826193809509, "learning_rate": 2.745510509292968e-05, "loss": 0.1373, "step": 9644 }, { "epoch": 0.21253036738336445, "grad_norm": 0.881388247013092, "learning_rate": 2.74545084983914e-05, "loss": 0.1056, "step": 9645 }, { "epoch": 0.2125524026728806, "grad_norm": 0.6193966269493103, "learning_rate": 2.745391184041563e-05, "loss": 0.0707, "step": 9646 }, { "epoch": 0.21257443796239678, "grad_norm": 1.4886760711669922, "learning_rate": 2.745331511900541e-05, "loss": 0.1183, "step": 9647 }, { "epoch": 0.21259647325191294, "grad_norm": 0.9308347105979919, "learning_rate": 2.745271833416378e-05, "loss": 0.1252, "step": 9648 }, { "epoch": 0.2126185085414291, "grad_norm": 0.9186280369758606, "learning_rate": 2.7452121485893774e-05, "loss": 0.1103, "step": 9649 }, { "epoch": 0.21264054383094527, "grad_norm": 1.4241435527801514, "learning_rate": 2.7451524574198436e-05, "loss": 0.155, "step": 9650 }, { "epoch": 0.2126625791204614, "grad_norm": 0.6409652233123779, "learning_rate": 2.745092759908081e-05, "loss": 0.0619, "step": 9651 }, { "epoch": 0.21268461440997757, "grad_norm": 1.5654466152191162, "learning_rate": 2.745033056054393e-05, "loss": 0.163, "step": 9652 }, { "epoch": 0.21270664969949374, "grad_norm": 1.096545934677124, "learning_rate": 2.744973345859084e-05, "loss": 0.1124, "step": 9653 }, { "epoch": 0.2127286849890099, "grad_norm": 0.5669367909431458, "learning_rate": 2.744913629322458e-05, "loss": 0.1451, "step": 9654 }, { "epoch": 0.21275072027852607, "grad_norm": 1.2273027896881104, "learning_rate": 2.74485390644482e-05, "loss": 0.0876, "step": 9655 }, { "epoch": 0.21277275556804223, "grad_norm": 0.7399181127548218, "learning_rate": 2.744794177226473e-05, "loss": 0.1163, "step": 9656 }, { "epoch": 0.21279479085755837, "grad_norm": 1.1725246906280518, "learning_rate": 2.7447344416677216e-05, "loss": 0.1116, "step": 9657 }, { "epoch": 0.21281682614707453, "grad_norm": 0.849331796169281, "learning_rate": 2.744674699768871e-05, "loss": 0.1145, "step": 9658 }, { "epoch": 0.2128388614365907, "grad_norm": 0.6965487599372864, "learning_rate": 2.7446149515302233e-05, "loss": 0.1073, "step": 9659 }, { "epoch": 0.21286089672610686, "grad_norm": 1.1055755615234375, "learning_rate": 2.7445551969520853e-05, "loss": 0.108, "step": 9660 }, { "epoch": 0.21288293201562303, "grad_norm": 0.6397094130516052, "learning_rate": 2.7444954360347602e-05, "loss": 0.0904, "step": 9661 }, { "epoch": 0.2129049673051392, "grad_norm": 0.9406716823577881, "learning_rate": 2.7444356687785523e-05, "loss": 0.1094, "step": 9662 }, { "epoch": 0.21292700259465533, "grad_norm": 0.8964734077453613, "learning_rate": 2.7443758951837663e-05, "loss": 0.0949, "step": 9663 }, { "epoch": 0.2129490378841715, "grad_norm": 0.8933403491973877, "learning_rate": 2.7443161152507064e-05, "loss": 0.1002, "step": 9664 }, { "epoch": 0.21297107317368766, "grad_norm": 0.9388737678527832, "learning_rate": 2.7442563289796777e-05, "loss": 0.1611, "step": 9665 }, { "epoch": 0.21299310846320382, "grad_norm": 0.5410265326499939, "learning_rate": 2.744196536370984e-05, "loss": 0.0956, "step": 9666 }, { "epoch": 0.21301514375272, "grad_norm": 0.8174242377281189, "learning_rate": 2.7441367374249298e-05, "loss": 0.1142, "step": 9667 }, { "epoch": 0.21303717904223615, "grad_norm": 0.9469037652015686, "learning_rate": 2.7440769321418207e-05, "loss": 0.0902, "step": 9668 }, { "epoch": 0.2130592143317523, "grad_norm": 0.9543617367744446, "learning_rate": 2.7440171205219603e-05, "loss": 0.1114, "step": 9669 }, { "epoch": 0.21308124962126845, "grad_norm": 0.6936478614807129, "learning_rate": 2.7439573025656533e-05, "loss": 0.0993, "step": 9670 }, { "epoch": 0.21310328491078462, "grad_norm": 1.2412117719650269, "learning_rate": 2.7438974782732055e-05, "loss": 0.0922, "step": 9671 }, { "epoch": 0.21312532020030078, "grad_norm": 1.1238564252853394, "learning_rate": 2.74383764764492e-05, "loss": 0.1225, "step": 9672 }, { "epoch": 0.21314735548981695, "grad_norm": 0.6426645517349243, "learning_rate": 2.743777810681103e-05, "loss": 0.1034, "step": 9673 }, { "epoch": 0.2131693907793331, "grad_norm": 1.13162362575531, "learning_rate": 2.7437179673820586e-05, "loss": 0.1072, "step": 9674 }, { "epoch": 0.21319142606884925, "grad_norm": 0.8448649048805237, "learning_rate": 2.743658117748091e-05, "loss": 0.0967, "step": 9675 }, { "epoch": 0.2132134613583654, "grad_norm": 1.4336931705474854, "learning_rate": 2.743598261779506e-05, "loss": 0.1752, "step": 9676 }, { "epoch": 0.21323549664788158, "grad_norm": 1.1881788969039917, "learning_rate": 2.7435383994766086e-05, "loss": 0.1264, "step": 9677 }, { "epoch": 0.21325753193739774, "grad_norm": 0.4967601001262665, "learning_rate": 2.7434785308397028e-05, "loss": 0.1066, "step": 9678 }, { "epoch": 0.2132795672269139, "grad_norm": 1.5295358896255493, "learning_rate": 2.7434186558690946e-05, "loss": 0.1204, "step": 9679 }, { "epoch": 0.21330160251643007, "grad_norm": 0.5441892743110657, "learning_rate": 2.7433587745650878e-05, "loss": 0.1419, "step": 9680 }, { "epoch": 0.2133236378059462, "grad_norm": 1.122509241104126, "learning_rate": 2.7432988869279885e-05, "loss": 0.0945, "step": 9681 }, { "epoch": 0.21334567309546237, "grad_norm": 0.7909618020057678, "learning_rate": 2.743238992958101e-05, "loss": 0.081, "step": 9682 }, { "epoch": 0.21336770838497854, "grad_norm": 0.6620793342590332, "learning_rate": 2.7431790926557306e-05, "loss": 0.1077, "step": 9683 }, { "epoch": 0.2133897436744947, "grad_norm": 0.6230141520500183, "learning_rate": 2.7431191860211826e-05, "loss": 0.0923, "step": 9684 }, { "epoch": 0.21341177896401087, "grad_norm": 0.5631279349327087, "learning_rate": 2.7430592730547617e-05, "loss": 0.0733, "step": 9685 }, { "epoch": 0.21343381425352703, "grad_norm": 0.7121277451515198, "learning_rate": 2.7429993537567736e-05, "loss": 0.132, "step": 9686 }, { "epoch": 0.2134558495430432, "grad_norm": 0.7668827176094055, "learning_rate": 2.7429394281275233e-05, "loss": 0.1009, "step": 9687 }, { "epoch": 0.21347788483255933, "grad_norm": 0.8763332962989807, "learning_rate": 2.7428794961673155e-05, "loss": 0.1015, "step": 9688 }, { "epoch": 0.2134999201220755, "grad_norm": 1.0700912475585938, "learning_rate": 2.7428195578764568e-05, "loss": 0.0907, "step": 9689 }, { "epoch": 0.21352195541159166, "grad_norm": 0.9473975896835327, "learning_rate": 2.7427596132552508e-05, "loss": 0.1306, "step": 9690 }, { "epoch": 0.21354399070110783, "grad_norm": 0.8052263259887695, "learning_rate": 2.742699662304004e-05, "loss": 0.1274, "step": 9691 }, { "epoch": 0.213566025990624, "grad_norm": 2.517289638519287, "learning_rate": 2.7426397050230213e-05, "loss": 0.116, "step": 9692 }, { "epoch": 0.21358806128014016, "grad_norm": 0.7088700532913208, "learning_rate": 2.7425797414126086e-05, "loss": 0.0914, "step": 9693 }, { "epoch": 0.2136100965696563, "grad_norm": 1.9248440265655518, "learning_rate": 2.7425197714730705e-05, "loss": 0.1191, "step": 9694 }, { "epoch": 0.21363213185917246, "grad_norm": 1.4681044816970825, "learning_rate": 2.7424597952047133e-05, "loss": 0.1309, "step": 9695 }, { "epoch": 0.21365416714868862, "grad_norm": 0.9597435593605042, "learning_rate": 2.7423998126078416e-05, "loss": 0.1314, "step": 9696 }, { "epoch": 0.2136762024382048, "grad_norm": 0.9064372181892395, "learning_rate": 2.742339823682762e-05, "loss": 0.091, "step": 9697 }, { "epoch": 0.21369823772772095, "grad_norm": 0.599298357963562, "learning_rate": 2.742279828429779e-05, "loss": 0.0745, "step": 9698 }, { "epoch": 0.21372027301723712, "grad_norm": 0.8400723338127136, "learning_rate": 2.742219826849199e-05, "loss": 0.1412, "step": 9699 }, { "epoch": 0.21374230830675325, "grad_norm": 1.1578258275985718, "learning_rate": 2.742159818941327e-05, "loss": 0.1473, "step": 9700 }, { "epoch": 0.21376434359626942, "grad_norm": 0.5525045394897461, "learning_rate": 2.7420998047064693e-05, "loss": 0.1031, "step": 9701 }, { "epoch": 0.21378637888578558, "grad_norm": 0.9559522867202759, "learning_rate": 2.742039784144931e-05, "loss": 0.1425, "step": 9702 }, { "epoch": 0.21380841417530175, "grad_norm": 1.236151099205017, "learning_rate": 2.741979757257018e-05, "loss": 0.1304, "step": 9703 }, { "epoch": 0.2138304494648179, "grad_norm": 0.8911829590797424, "learning_rate": 2.7419197240430365e-05, "loss": 0.1237, "step": 9704 }, { "epoch": 0.21385248475433408, "grad_norm": 0.6426092982292175, "learning_rate": 2.7418596845032914e-05, "loss": 0.1281, "step": 9705 }, { "epoch": 0.2138745200438502, "grad_norm": 0.7868907451629639, "learning_rate": 2.7417996386380896e-05, "loss": 0.122, "step": 9706 }, { "epoch": 0.21389655533336638, "grad_norm": 0.6242086887359619, "learning_rate": 2.7417395864477356e-05, "loss": 0.0696, "step": 9707 }, { "epoch": 0.21391859062288254, "grad_norm": 0.9197854399681091, "learning_rate": 2.7416795279325358e-05, "loss": 0.0959, "step": 9708 }, { "epoch": 0.2139406259123987, "grad_norm": 0.48794126510620117, "learning_rate": 2.741619463092797e-05, "loss": 0.0847, "step": 9709 }, { "epoch": 0.21396266120191487, "grad_norm": 1.4019372463226318, "learning_rate": 2.7415593919288246e-05, "loss": 0.1436, "step": 9710 }, { "epoch": 0.21398469649143104, "grad_norm": 0.9563698172569275, "learning_rate": 2.741499314440924e-05, "loss": 0.0828, "step": 9711 }, { "epoch": 0.21400673178094717, "grad_norm": 0.7814763784408569, "learning_rate": 2.7414392306294016e-05, "loss": 0.1237, "step": 9712 }, { "epoch": 0.21402876707046334, "grad_norm": 0.8309527635574341, "learning_rate": 2.741379140494564e-05, "loss": 0.1123, "step": 9713 }, { "epoch": 0.2140508023599795, "grad_norm": 0.5558580756187439, "learning_rate": 2.741319044036716e-05, "loss": 0.1053, "step": 9714 }, { "epoch": 0.21407283764949567, "grad_norm": 0.8606625199317932, "learning_rate": 2.741258941256165e-05, "loss": 0.1049, "step": 9715 }, { "epoch": 0.21409487293901183, "grad_norm": 0.901614785194397, "learning_rate": 2.7411988321532167e-05, "loss": 0.1122, "step": 9716 }, { "epoch": 0.214116908228528, "grad_norm": 0.8148550987243652, "learning_rate": 2.741138716728177e-05, "loss": 0.1117, "step": 9717 }, { "epoch": 0.21413894351804416, "grad_norm": 0.7775790095329285, "learning_rate": 2.7410785949813522e-05, "loss": 0.0894, "step": 9718 }, { "epoch": 0.2141609788075603, "grad_norm": 0.6436282992362976, "learning_rate": 2.7410184669130485e-05, "loss": 0.1633, "step": 9719 }, { "epoch": 0.21418301409707646, "grad_norm": 0.9028128981590271, "learning_rate": 2.7409583325235725e-05, "loss": 0.1028, "step": 9720 }, { "epoch": 0.21420504938659263, "grad_norm": 1.0935004949569702, "learning_rate": 2.74089819181323e-05, "loss": 0.1448, "step": 9721 }, { "epoch": 0.2142270846761088, "grad_norm": 0.7448315024375916, "learning_rate": 2.740838044782328e-05, "loss": 0.1045, "step": 9722 }, { "epoch": 0.21424911996562496, "grad_norm": 0.9233422875404358, "learning_rate": 2.7407778914311716e-05, "loss": 0.1257, "step": 9723 }, { "epoch": 0.21427115525514112, "grad_norm": 0.867538571357727, "learning_rate": 2.740717731760069e-05, "loss": 0.1293, "step": 9724 }, { "epoch": 0.21429319054465726, "grad_norm": 1.1599148511886597, "learning_rate": 2.7406575657693255e-05, "loss": 0.1037, "step": 9725 }, { "epoch": 0.21431522583417342, "grad_norm": 1.1660281419754028, "learning_rate": 2.7405973934592472e-05, "loss": 0.102, "step": 9726 }, { "epoch": 0.2143372611236896, "grad_norm": 0.9564726948738098, "learning_rate": 2.7405372148301415e-05, "loss": 0.1464, "step": 9727 }, { "epoch": 0.21435929641320575, "grad_norm": 0.5857319831848145, "learning_rate": 2.740477029882314e-05, "loss": 0.1556, "step": 9728 }, { "epoch": 0.21438133170272192, "grad_norm": 1.2680320739746094, "learning_rate": 2.7404168386160725e-05, "loss": 0.1051, "step": 9729 }, { "epoch": 0.21440336699223808, "grad_norm": 1.2307931184768677, "learning_rate": 2.7403566410317227e-05, "loss": 0.1034, "step": 9730 }, { "epoch": 0.21442540228175422, "grad_norm": 1.3568028211593628, "learning_rate": 2.7402964371295713e-05, "loss": 0.1479, "step": 9731 }, { "epoch": 0.21444743757127038, "grad_norm": 2.472790241241455, "learning_rate": 2.740236226909925e-05, "loss": 0.1008, "step": 9732 }, { "epoch": 0.21446947286078655, "grad_norm": 1.2543524503707886, "learning_rate": 2.7401760103730904e-05, "loss": 0.1128, "step": 9733 }, { "epoch": 0.2144915081503027, "grad_norm": 0.9151908159255981, "learning_rate": 2.7401157875193744e-05, "loss": 0.1202, "step": 9734 }, { "epoch": 0.21451354343981888, "grad_norm": 1.1401047706604004, "learning_rate": 2.740055558349084e-05, "loss": 0.1289, "step": 9735 }, { "epoch": 0.21453557872933504, "grad_norm": 0.9123346209526062, "learning_rate": 2.7399953228625255e-05, "loss": 0.1166, "step": 9736 }, { "epoch": 0.21455761401885118, "grad_norm": 1.1503993272781372, "learning_rate": 2.7399350810600062e-05, "loss": 0.1427, "step": 9737 }, { "epoch": 0.21457964930836734, "grad_norm": 0.6227632761001587, "learning_rate": 2.739874832941832e-05, "loss": 0.0786, "step": 9738 }, { "epoch": 0.2146016845978835, "grad_norm": 0.8829655051231384, "learning_rate": 2.7398145785083106e-05, "loss": 0.1441, "step": 9739 }, { "epoch": 0.21462371988739967, "grad_norm": 1.1160508394241333, "learning_rate": 2.7397543177597487e-05, "loss": 0.1158, "step": 9740 }, { "epoch": 0.21464575517691584, "grad_norm": 0.5854774117469788, "learning_rate": 2.7396940506964535e-05, "loss": 0.0733, "step": 9741 }, { "epoch": 0.214667790466432, "grad_norm": 0.8819620013237, "learning_rate": 2.7396337773187316e-05, "loss": 0.1197, "step": 9742 }, { "epoch": 0.21468982575594814, "grad_norm": 1.0639832019805908, "learning_rate": 2.73957349762689e-05, "loss": 0.1193, "step": 9743 }, { "epoch": 0.2147118610454643, "grad_norm": 0.7828847765922546, "learning_rate": 2.739513211621236e-05, "loss": 0.0845, "step": 9744 }, { "epoch": 0.21473389633498047, "grad_norm": 1.146138310432434, "learning_rate": 2.7394529193020766e-05, "loss": 0.144, "step": 9745 }, { "epoch": 0.21475593162449663, "grad_norm": 1.0762146711349487, "learning_rate": 2.7393926206697188e-05, "loss": 0.1055, "step": 9746 }, { "epoch": 0.2147779669140128, "grad_norm": 0.8457525968551636, "learning_rate": 2.7393323157244698e-05, "loss": 0.0951, "step": 9747 }, { "epoch": 0.21480000220352896, "grad_norm": 0.6843161582946777, "learning_rate": 2.7392720044666366e-05, "loss": 0.0846, "step": 9748 }, { "epoch": 0.2148220374930451, "grad_norm": 0.9669232368469238, "learning_rate": 2.7392116868965268e-05, "loss": 0.1314, "step": 9749 }, { "epoch": 0.21484407278256126, "grad_norm": 1.2063714265823364, "learning_rate": 2.7391513630144468e-05, "loss": 0.1048, "step": 9750 }, { "epoch": 0.21486610807207743, "grad_norm": 0.9027712941169739, "learning_rate": 2.739091032820705e-05, "loss": 0.1188, "step": 9751 }, { "epoch": 0.2148881433615936, "grad_norm": 0.9344396591186523, "learning_rate": 2.7390306963156076e-05, "loss": 0.0771, "step": 9752 }, { "epoch": 0.21491017865110976, "grad_norm": 0.5784066319465637, "learning_rate": 2.738970353499463e-05, "loss": 0.0998, "step": 9753 }, { "epoch": 0.21493221394062592, "grad_norm": 1.199633240699768, "learning_rate": 2.7389100043725777e-05, "loss": 0.1152, "step": 9754 }, { "epoch": 0.21495424923014209, "grad_norm": 1.0550358295440674, "learning_rate": 2.7388496489352593e-05, "loss": 0.0945, "step": 9755 }, { "epoch": 0.21497628451965822, "grad_norm": 1.0403286218643188, "learning_rate": 2.7387892871878156e-05, "loss": 0.1307, "step": 9756 }, { "epoch": 0.2149983198091744, "grad_norm": 0.95399409532547, "learning_rate": 2.738728919130553e-05, "loss": 0.1548, "step": 9757 }, { "epoch": 0.21502035509869055, "grad_norm": 1.4766464233398438, "learning_rate": 2.7386685447637806e-05, "loss": 0.1222, "step": 9758 }, { "epoch": 0.21504239038820672, "grad_norm": 1.41071355342865, "learning_rate": 2.7386081640878047e-05, "loss": 0.1474, "step": 9759 }, { "epoch": 0.21506442567772288, "grad_norm": 0.7814160585403442, "learning_rate": 2.7385477771029336e-05, "loss": 0.1389, "step": 9760 }, { "epoch": 0.21508646096723905, "grad_norm": 1.1778223514556885, "learning_rate": 2.7384873838094743e-05, "loss": 0.1341, "step": 9761 }, { "epoch": 0.21510849625675518, "grad_norm": 0.8989056944847107, "learning_rate": 2.7384269842077344e-05, "loss": 0.1131, "step": 9762 }, { "epoch": 0.21513053154627135, "grad_norm": 1.2804651260375977, "learning_rate": 2.738366578298022e-05, "loss": 0.0591, "step": 9763 }, { "epoch": 0.2151525668357875, "grad_norm": 0.642869234085083, "learning_rate": 2.7383061660806444e-05, "loss": 0.1141, "step": 9764 }, { "epoch": 0.21517460212530368, "grad_norm": 1.0478814840316772, "learning_rate": 2.7382457475559096e-05, "loss": 0.0568, "step": 9765 }, { "epoch": 0.21519663741481984, "grad_norm": 0.6668543219566345, "learning_rate": 2.738185322724125e-05, "loss": 0.1032, "step": 9766 }, { "epoch": 0.215218672704336, "grad_norm": 1.080010175704956, "learning_rate": 2.738124891585599e-05, "loss": 0.1356, "step": 9767 }, { "epoch": 0.21524070799385214, "grad_norm": 1.046826720237732, "learning_rate": 2.7380644541406388e-05, "loss": 0.113, "step": 9768 }, { "epoch": 0.2152627432833683, "grad_norm": 1.1378860473632812, "learning_rate": 2.738004010389552e-05, "loss": 0.108, "step": 9769 }, { "epoch": 0.21528477857288447, "grad_norm": 0.9027636647224426, "learning_rate": 2.7379435603326477e-05, "loss": 0.0887, "step": 9770 }, { "epoch": 0.21530681386240064, "grad_norm": 0.959364116191864, "learning_rate": 2.7378831039702322e-05, "loss": 0.1154, "step": 9771 }, { "epoch": 0.2153288491519168, "grad_norm": 0.8312862515449524, "learning_rate": 2.737822641302615e-05, "loss": 0.0866, "step": 9772 }, { "epoch": 0.21535088444143297, "grad_norm": 0.7215378284454346, "learning_rate": 2.7377621723301028e-05, "loss": 0.1007, "step": 9773 }, { "epoch": 0.2153729197309491, "grad_norm": 0.8688461184501648, "learning_rate": 2.7377016970530045e-05, "loss": 0.1081, "step": 9774 }, { "epoch": 0.21539495502046527, "grad_norm": 0.8747122287750244, "learning_rate": 2.7376412154716274e-05, "loss": 0.1027, "step": 9775 }, { "epoch": 0.21541699030998143, "grad_norm": 0.6224718689918518, "learning_rate": 2.73758072758628e-05, "loss": 0.1392, "step": 9776 }, { "epoch": 0.2154390255994976, "grad_norm": 0.9358782172203064, "learning_rate": 2.7375202333972705e-05, "loss": 0.0936, "step": 9777 }, { "epoch": 0.21546106088901376, "grad_norm": 0.6334565877914429, "learning_rate": 2.7374597329049066e-05, "loss": 0.0912, "step": 9778 }, { "epoch": 0.21548309617852993, "grad_norm": 1.0556724071502686, "learning_rate": 2.7373992261094966e-05, "loss": 0.1058, "step": 9779 }, { "epoch": 0.21550513146804606, "grad_norm": 0.7988684773445129, "learning_rate": 2.737338713011349e-05, "loss": 0.1156, "step": 9780 }, { "epoch": 0.21552716675756223, "grad_norm": 1.1203720569610596, "learning_rate": 2.737278193610772e-05, "loss": 0.0764, "step": 9781 }, { "epoch": 0.2155492020470784, "grad_norm": 1.1953790187835693, "learning_rate": 2.7372176679080737e-05, "loss": 0.0996, "step": 9782 }, { "epoch": 0.21557123733659456, "grad_norm": 0.5913764238357544, "learning_rate": 2.737157135903562e-05, "loss": 0.087, "step": 9783 }, { "epoch": 0.21559327262611072, "grad_norm": 0.9508056640625, "learning_rate": 2.7370965975975455e-05, "loss": 0.1549, "step": 9784 }, { "epoch": 0.21561530791562689, "grad_norm": 0.7063152194023132, "learning_rate": 2.737036052990333e-05, "loss": 0.0961, "step": 9785 }, { "epoch": 0.21563734320514302, "grad_norm": 0.900494396686554, "learning_rate": 2.7369755020822323e-05, "loss": 0.0992, "step": 9786 }, { "epoch": 0.2156593784946592, "grad_norm": 1.3963426351547241, "learning_rate": 2.7369149448735524e-05, "loss": 0.1444, "step": 9787 }, { "epoch": 0.21568141378417535, "grad_norm": 0.9756178259849548, "learning_rate": 2.7368543813646014e-05, "loss": 0.0828, "step": 9788 }, { "epoch": 0.21570344907369152, "grad_norm": 1.030173659324646, "learning_rate": 2.736793811555687e-05, "loss": 0.1678, "step": 9789 }, { "epoch": 0.21572548436320768, "grad_norm": 1.5417068004608154, "learning_rate": 2.736733235447119e-05, "loss": 0.0898, "step": 9790 }, { "epoch": 0.21574751965272385, "grad_norm": 0.8240540623664856, "learning_rate": 2.736672653039206e-05, "loss": 0.107, "step": 9791 }, { "epoch": 0.21576955494224, "grad_norm": 1.1119858026504517, "learning_rate": 2.7366120643322554e-05, "loss": 0.1006, "step": 9792 }, { "epoch": 0.21579159023175615, "grad_norm": 1.2928746938705444, "learning_rate": 2.7365514693265767e-05, "loss": 0.1158, "step": 9793 }, { "epoch": 0.2158136255212723, "grad_norm": 1.18155038356781, "learning_rate": 2.7364908680224775e-05, "loss": 0.1206, "step": 9794 }, { "epoch": 0.21583566081078848, "grad_norm": 0.8862105011940002, "learning_rate": 2.736430260420268e-05, "loss": 0.1506, "step": 9795 }, { "epoch": 0.21585769610030464, "grad_norm": 0.8000038862228394, "learning_rate": 2.7363696465202562e-05, "loss": 0.1168, "step": 9796 }, { "epoch": 0.2158797313898208, "grad_norm": 0.8652304410934448, "learning_rate": 2.7363090263227504e-05, "loss": 0.1129, "step": 9797 }, { "epoch": 0.21590176667933697, "grad_norm": 1.0162304639816284, "learning_rate": 2.73624839982806e-05, "loss": 0.1239, "step": 9798 }, { "epoch": 0.2159238019688531, "grad_norm": 0.6378148198127747, "learning_rate": 2.7361877670364938e-05, "loss": 0.0947, "step": 9799 }, { "epoch": 0.21594583725836927, "grad_norm": 0.7951072454452515, "learning_rate": 2.7361271279483602e-05, "loss": 0.1397, "step": 9800 }, { "epoch": 0.21596787254788544, "grad_norm": 0.8214629292488098, "learning_rate": 2.7360664825639682e-05, "loss": 0.1101, "step": 9801 }, { "epoch": 0.2159899078374016, "grad_norm": 0.9750916361808777, "learning_rate": 2.7360058308836267e-05, "loss": 0.1066, "step": 9802 }, { "epoch": 0.21601194312691777, "grad_norm": 0.6971189379692078, "learning_rate": 2.735945172907645e-05, "loss": 0.0937, "step": 9803 }, { "epoch": 0.21603397841643393, "grad_norm": 0.7641711831092834, "learning_rate": 2.7358845086363313e-05, "loss": 0.1406, "step": 9804 }, { "epoch": 0.21605601370595007, "grad_norm": 0.8567407727241516, "learning_rate": 2.7358238380699953e-05, "loss": 0.1016, "step": 9805 }, { "epoch": 0.21607804899546623, "grad_norm": 0.9185589551925659, "learning_rate": 2.7357631612089462e-05, "loss": 0.0837, "step": 9806 }, { "epoch": 0.2161000842849824, "grad_norm": 1.4076991081237793, "learning_rate": 2.735702478053492e-05, "loss": 0.1381, "step": 9807 }, { "epoch": 0.21612211957449856, "grad_norm": 1.2011343240737915, "learning_rate": 2.735641788603943e-05, "loss": 0.1428, "step": 9808 }, { "epoch": 0.21614415486401473, "grad_norm": 0.7421225905418396, "learning_rate": 2.735581092860607e-05, "loss": 0.0842, "step": 9809 }, { "epoch": 0.2161661901535309, "grad_norm": 0.7277806997299194, "learning_rate": 2.7355203908237946e-05, "loss": 0.0974, "step": 9810 }, { "epoch": 0.21618822544304703, "grad_norm": 0.7278497219085693, "learning_rate": 2.7354596824938143e-05, "loss": 0.1461, "step": 9811 }, { "epoch": 0.2162102607325632, "grad_norm": 1.1707720756530762, "learning_rate": 2.735398967870975e-05, "loss": 0.1219, "step": 9812 }, { "epoch": 0.21623229602207936, "grad_norm": 1.116533875465393, "learning_rate": 2.7353382469555864e-05, "loss": 0.1302, "step": 9813 }, { "epoch": 0.21625433131159552, "grad_norm": 0.8669387698173523, "learning_rate": 2.735277519747958e-05, "loss": 0.1018, "step": 9814 }, { "epoch": 0.21627636660111169, "grad_norm": 0.9520685076713562, "learning_rate": 2.7352167862483985e-05, "loss": 0.1145, "step": 9815 }, { "epoch": 0.21629840189062785, "grad_norm": 1.3589340448379517, "learning_rate": 2.7351560464572176e-05, "loss": 0.1759, "step": 9816 }, { "epoch": 0.216320437180144, "grad_norm": 1.0061787366867065, "learning_rate": 2.7350953003747244e-05, "loss": 0.1127, "step": 9817 }, { "epoch": 0.21634247246966015, "grad_norm": 1.026029348373413, "learning_rate": 2.7350345480012285e-05, "loss": 0.1054, "step": 9818 }, { "epoch": 0.21636450775917632, "grad_norm": 1.129684567451477, "learning_rate": 2.7349737893370394e-05, "loss": 0.1207, "step": 9819 }, { "epoch": 0.21638654304869248, "grad_norm": 0.9086986780166626, "learning_rate": 2.7349130243824672e-05, "loss": 0.1073, "step": 9820 }, { "epoch": 0.21640857833820865, "grad_norm": 0.8048388957977295, "learning_rate": 2.73485225313782e-05, "loss": 0.1393, "step": 9821 }, { "epoch": 0.2164306136277248, "grad_norm": 0.7916024923324585, "learning_rate": 2.7347914756034085e-05, "loss": 0.05, "step": 9822 }, { "epoch": 0.21645264891724098, "grad_norm": 0.690180242061615, "learning_rate": 2.7347306917795416e-05, "loss": 0.1145, "step": 9823 }, { "epoch": 0.2164746842067571, "grad_norm": 0.8854736089706421, "learning_rate": 2.734669901666529e-05, "loss": 0.1197, "step": 9824 }, { "epoch": 0.21649671949627328, "grad_norm": 0.8534532785415649, "learning_rate": 2.734609105264681e-05, "loss": 0.1218, "step": 9825 }, { "epoch": 0.21651875478578944, "grad_norm": 1.074884057044983, "learning_rate": 2.7345483025743064e-05, "loss": 0.129, "step": 9826 }, { "epoch": 0.2165407900753056, "grad_norm": 0.8766551613807678, "learning_rate": 2.7344874935957155e-05, "loss": 0.0982, "step": 9827 }, { "epoch": 0.21656282536482177, "grad_norm": 1.7811329364776611, "learning_rate": 2.734426678329218e-05, "loss": 0.094, "step": 9828 }, { "epoch": 0.21658486065433794, "grad_norm": 1.0333130359649658, "learning_rate": 2.7343658567751228e-05, "loss": 0.1057, "step": 9829 }, { "epoch": 0.21660689594385407, "grad_norm": 0.868908703327179, "learning_rate": 2.7343050289337408e-05, "loss": 0.0778, "step": 9830 }, { "epoch": 0.21662893123337024, "grad_norm": 0.9739840626716614, "learning_rate": 2.7342441948053817e-05, "loss": 0.1085, "step": 9831 }, { "epoch": 0.2166509665228864, "grad_norm": 0.8347100019454956, "learning_rate": 2.734183354390355e-05, "loss": 0.0905, "step": 9832 }, { "epoch": 0.21667300181240257, "grad_norm": 0.9931015968322754, "learning_rate": 2.73412250768897e-05, "loss": 0.1027, "step": 9833 }, { "epoch": 0.21669503710191873, "grad_norm": 1.1724013090133667, "learning_rate": 2.7340616547015376e-05, "loss": 0.1636, "step": 9834 }, { "epoch": 0.2167170723914349, "grad_norm": 0.9097615480422974, "learning_rate": 2.7340007954283675e-05, "loss": 0.1078, "step": 9835 }, { "epoch": 0.21673910768095103, "grad_norm": 0.9369729161262512, "learning_rate": 2.73393992986977e-05, "loss": 0.0916, "step": 9836 }, { "epoch": 0.2167611429704672, "grad_norm": 0.5136308073997498, "learning_rate": 2.7338790580260543e-05, "loss": 0.0861, "step": 9837 }, { "epoch": 0.21678317825998336, "grad_norm": 1.1030443906784058, "learning_rate": 2.7338181798975306e-05, "loss": 0.1556, "step": 9838 }, { "epoch": 0.21680521354949953, "grad_norm": 1.4842149019241333, "learning_rate": 2.73375729548451e-05, "loss": 0.157, "step": 9839 }, { "epoch": 0.2168272488390157, "grad_norm": 0.8555013537406921, "learning_rate": 2.7336964047873016e-05, "loss": 0.1556, "step": 9840 }, { "epoch": 0.21684928412853186, "grad_norm": 1.0976954698562622, "learning_rate": 2.7336355078062158e-05, "loss": 0.1336, "step": 9841 }, { "epoch": 0.216871319418048, "grad_norm": 0.8771147131919861, "learning_rate": 2.7335746045415625e-05, "loss": 0.0933, "step": 9842 }, { "epoch": 0.21689335470756416, "grad_norm": 0.9394304156303406, "learning_rate": 2.7335136949936527e-05, "loss": 0.1344, "step": 9843 }, { "epoch": 0.21691538999708032, "grad_norm": 0.6048985719680786, "learning_rate": 2.7334527791627964e-05, "loss": 0.1038, "step": 9844 }, { "epoch": 0.21693742528659649, "grad_norm": 0.8115314245223999, "learning_rate": 2.733391857049303e-05, "loss": 0.0429, "step": 9845 }, { "epoch": 0.21695946057611265, "grad_norm": 0.85149085521698, "learning_rate": 2.733330928653484e-05, "loss": 0.0944, "step": 9846 }, { "epoch": 0.21698149586562882, "grad_norm": 0.8368645310401917, "learning_rate": 2.733269993975649e-05, "loss": 0.1366, "step": 9847 }, { "epoch": 0.21700353115514495, "grad_norm": 0.9651333093643188, "learning_rate": 2.7332090530161088e-05, "loss": 0.0824, "step": 9848 }, { "epoch": 0.21702556644466112, "grad_norm": 0.7595599293708801, "learning_rate": 2.7331481057751735e-05, "loss": 0.1165, "step": 9849 }, { "epoch": 0.21704760173417728, "grad_norm": 0.826263427734375, "learning_rate": 2.7330871522531535e-05, "loss": 0.1011, "step": 9850 }, { "epoch": 0.21706963702369345, "grad_norm": 0.8625915050506592, "learning_rate": 2.7330261924503595e-05, "loss": 0.1187, "step": 9851 }, { "epoch": 0.2170916723132096, "grad_norm": 1.1219658851623535, "learning_rate": 2.7329652263671018e-05, "loss": 0.1151, "step": 9852 }, { "epoch": 0.21711370760272578, "grad_norm": 0.9458510279655457, "learning_rate": 2.7329042540036908e-05, "loss": 0.1063, "step": 9853 }, { "epoch": 0.2171357428922419, "grad_norm": 1.108649492263794, "learning_rate": 2.732843275360438e-05, "loss": 0.1084, "step": 9854 }, { "epoch": 0.21715777818175808, "grad_norm": 0.8856021761894226, "learning_rate": 2.732782290437653e-05, "loss": 0.1105, "step": 9855 }, { "epoch": 0.21717981347127424, "grad_norm": 1.0287024974822998, "learning_rate": 2.7327212992356463e-05, "loss": 0.1383, "step": 9856 }, { "epoch": 0.2172018487607904, "grad_norm": 1.3856239318847656, "learning_rate": 2.7326603017547292e-05, "loss": 0.1149, "step": 9857 }, { "epoch": 0.21722388405030657, "grad_norm": 0.5847941040992737, "learning_rate": 2.7325992979952126e-05, "loss": 0.0887, "step": 9858 }, { "epoch": 0.21724591933982274, "grad_norm": 0.9686013460159302, "learning_rate": 2.732538287957406e-05, "loss": 0.0808, "step": 9859 }, { "epoch": 0.2172679546293389, "grad_norm": 1.065737009048462, "learning_rate": 2.7324772716416214e-05, "loss": 0.1379, "step": 9860 }, { "epoch": 0.21728998991885504, "grad_norm": 0.5393158197402954, "learning_rate": 2.7324162490481697e-05, "loss": 0.1, "step": 9861 }, { "epoch": 0.2173120252083712, "grad_norm": 1.3041695356369019, "learning_rate": 2.73235522017736e-05, "loss": 0.0808, "step": 9862 }, { "epoch": 0.21733406049788737, "grad_norm": 0.9755109548568726, "learning_rate": 2.7322941850295055e-05, "loss": 0.1066, "step": 9863 }, { "epoch": 0.21735609578740353, "grad_norm": 0.7958221435546875, "learning_rate": 2.7322331436049155e-05, "loss": 0.117, "step": 9864 }, { "epoch": 0.2173781310769197, "grad_norm": 0.5999990701675415, "learning_rate": 2.7321720959039012e-05, "loss": 0.0903, "step": 9865 }, { "epoch": 0.21740016636643586, "grad_norm": 0.7676544785499573, "learning_rate": 2.7321110419267735e-05, "loss": 0.0666, "step": 9866 }, { "epoch": 0.217422201655952, "grad_norm": 0.9129335880279541, "learning_rate": 2.732049981673844e-05, "loss": 0.1198, "step": 9867 }, { "epoch": 0.21744423694546816, "grad_norm": 0.6778915524482727, "learning_rate": 2.7319889151454227e-05, "loss": 0.1157, "step": 9868 }, { "epoch": 0.21746627223498433, "grad_norm": 0.9336807131767273, "learning_rate": 2.7319278423418215e-05, "loss": 0.0975, "step": 9869 }, { "epoch": 0.2174883075245005, "grad_norm": 0.7869174480438232, "learning_rate": 2.7318667632633512e-05, "loss": 0.0938, "step": 9870 }, { "epoch": 0.21751034281401666, "grad_norm": 0.6915240287780762, "learning_rate": 2.7318056779103227e-05, "loss": 0.1173, "step": 9871 }, { "epoch": 0.21753237810353282, "grad_norm": 0.9697345495223999, "learning_rate": 2.7317445862830478e-05, "loss": 0.0941, "step": 9872 }, { "epoch": 0.21755441339304896, "grad_norm": 0.898285984992981, "learning_rate": 2.731683488381837e-05, "loss": 0.0747, "step": 9873 }, { "epoch": 0.21757644868256512, "grad_norm": 1.0473361015319824, "learning_rate": 2.7316223842070016e-05, "loss": 0.1395, "step": 9874 }, { "epoch": 0.21759848397208129, "grad_norm": 0.9636662602424622, "learning_rate": 2.731561273758853e-05, "loss": 0.1297, "step": 9875 }, { "epoch": 0.21762051926159745, "grad_norm": 0.6312209963798523, "learning_rate": 2.7315001570377026e-05, "loss": 0.0695, "step": 9876 }, { "epoch": 0.21764255455111362, "grad_norm": 0.7425031065940857, "learning_rate": 2.731439034043861e-05, "loss": 0.0673, "step": 9877 }, { "epoch": 0.21766458984062978, "grad_norm": 0.7001942992210388, "learning_rate": 2.7313779047776407e-05, "loss": 0.0987, "step": 9878 }, { "epoch": 0.21768662513014592, "grad_norm": 0.41735896468162537, "learning_rate": 2.731316769239352e-05, "loss": 0.0496, "step": 9879 }, { "epoch": 0.21770866041966208, "grad_norm": 1.0146104097366333, "learning_rate": 2.7312556274293066e-05, "loss": 0.0815, "step": 9880 }, { "epoch": 0.21773069570917825, "grad_norm": 0.8412766456604004, "learning_rate": 2.7311944793478163e-05, "loss": 0.1139, "step": 9881 }, { "epoch": 0.2177527309986944, "grad_norm": 1.0347460508346558, "learning_rate": 2.7311333249951926e-05, "loss": 0.1019, "step": 9882 }, { "epoch": 0.21777476628821058, "grad_norm": 1.037291169166565, "learning_rate": 2.731072164371746e-05, "loss": 0.1461, "step": 9883 }, { "epoch": 0.21779680157772674, "grad_norm": 1.3272712230682373, "learning_rate": 2.7310109974777892e-05, "loss": 0.1342, "step": 9884 }, { "epoch": 0.21781883686724288, "grad_norm": 0.9531673192977905, "learning_rate": 2.7309498243136333e-05, "loss": 0.1013, "step": 9885 }, { "epoch": 0.21784087215675904, "grad_norm": 0.9516242146492004, "learning_rate": 2.7308886448795895e-05, "loss": 0.1143, "step": 9886 }, { "epoch": 0.2178629074462752, "grad_norm": 0.7445951104164124, "learning_rate": 2.73082745917597e-05, "loss": 0.1219, "step": 9887 }, { "epoch": 0.21788494273579137, "grad_norm": 5.743669033050537, "learning_rate": 2.730766267203086e-05, "loss": 0.152, "step": 9888 }, { "epoch": 0.21790697802530754, "grad_norm": 2.3578944206237793, "learning_rate": 2.73070506896125e-05, "loss": 0.1179, "step": 9889 }, { "epoch": 0.2179290133148237, "grad_norm": 0.8401825428009033, "learning_rate": 2.7306438644507723e-05, "loss": 0.0736, "step": 9890 }, { "epoch": 0.21795104860433984, "grad_norm": 0.9541088342666626, "learning_rate": 2.730582653671966e-05, "loss": 0.1079, "step": 9891 }, { "epoch": 0.217973083893856, "grad_norm": 1.8081151247024536, "learning_rate": 2.730521436625142e-05, "loss": 0.1386, "step": 9892 }, { "epoch": 0.21799511918337217, "grad_norm": 1.0659035444259644, "learning_rate": 2.730460213310613e-05, "loss": 0.1353, "step": 9893 }, { "epoch": 0.21801715447288833, "grad_norm": 0.7439138889312744, "learning_rate": 2.73039898372869e-05, "loss": 0.0955, "step": 9894 }, { "epoch": 0.2180391897624045, "grad_norm": 2.2746479511260986, "learning_rate": 2.7303377478796853e-05, "loss": 0.1643, "step": 9895 }, { "epoch": 0.21806122505192066, "grad_norm": 0.6089339852333069, "learning_rate": 2.7302765057639104e-05, "loss": 0.1176, "step": 9896 }, { "epoch": 0.21808326034143682, "grad_norm": 1.1431047916412354, "learning_rate": 2.7302152573816778e-05, "loss": 0.1013, "step": 9897 }, { "epoch": 0.21810529563095296, "grad_norm": 1.1714239120483398, "learning_rate": 2.730154002733299e-05, "loss": 0.1425, "step": 9898 }, { "epoch": 0.21812733092046913, "grad_norm": 1.1291598081588745, "learning_rate": 2.7300927418190864e-05, "loss": 0.101, "step": 9899 }, { "epoch": 0.2181493662099853, "grad_norm": 1.348883032798767, "learning_rate": 2.730031474639352e-05, "loss": 0.1305, "step": 9900 }, { "epoch": 0.21817140149950146, "grad_norm": 1.1533794403076172, "learning_rate": 2.7299702011944074e-05, "loss": 0.1249, "step": 9901 }, { "epoch": 0.21819343678901762, "grad_norm": 0.6662187576293945, "learning_rate": 2.729908921484565e-05, "loss": 0.1027, "step": 9902 }, { "epoch": 0.21821547207853378, "grad_norm": 1.1784839630126953, "learning_rate": 2.729847635510137e-05, "loss": 0.1139, "step": 9903 }, { "epoch": 0.21823750736804992, "grad_norm": 1.1149256229400635, "learning_rate": 2.7297863432714354e-05, "loss": 0.1009, "step": 9904 }, { "epoch": 0.21825954265756609, "grad_norm": 0.690186619758606, "learning_rate": 2.7297250447687728e-05, "loss": 0.1153, "step": 9905 }, { "epoch": 0.21828157794708225, "grad_norm": 1.4511055946350098, "learning_rate": 2.7296637400024608e-05, "loss": 0.0975, "step": 9906 }, { "epoch": 0.21830361323659841, "grad_norm": 0.8727253675460815, "learning_rate": 2.729602428972812e-05, "loss": 0.1002, "step": 9907 }, { "epoch": 0.21832564852611458, "grad_norm": 1.3645743131637573, "learning_rate": 2.7295411116801388e-05, "loss": 0.1355, "step": 9908 }, { "epoch": 0.21834768381563074, "grad_norm": 0.7125402688980103, "learning_rate": 2.7294797881247535e-05, "loss": 0.1136, "step": 9909 }, { "epoch": 0.21836971910514688, "grad_norm": 1.1614059209823608, "learning_rate": 2.729418458306968e-05, "loss": 0.1148, "step": 9910 }, { "epoch": 0.21839175439466305, "grad_norm": 1.1846494674682617, "learning_rate": 2.7293571222270952e-05, "loss": 0.092, "step": 9911 }, { "epoch": 0.2184137896841792, "grad_norm": 0.7993575930595398, "learning_rate": 2.7292957798854476e-05, "loss": 0.0886, "step": 9912 }, { "epoch": 0.21843582497369537, "grad_norm": 0.8417266607284546, "learning_rate": 2.729234431282337e-05, "loss": 0.1041, "step": 9913 }, { "epoch": 0.21845786026321154, "grad_norm": 0.8326360583305359, "learning_rate": 2.7291730764180762e-05, "loss": 0.1448, "step": 9914 }, { "epoch": 0.2184798955527277, "grad_norm": 0.8106403350830078, "learning_rate": 2.7291117152929783e-05, "loss": 0.0958, "step": 9915 }, { "epoch": 0.21850193084224384, "grad_norm": 1.0881333351135254, "learning_rate": 2.7290503479073548e-05, "loss": 0.1131, "step": 9916 }, { "epoch": 0.21852396613176, "grad_norm": 1.0948801040649414, "learning_rate": 2.7289889742615192e-05, "loss": 0.1095, "step": 9917 }, { "epoch": 0.21854600142127617, "grad_norm": 0.6932938694953918, "learning_rate": 2.7289275943557835e-05, "loss": 0.1248, "step": 9918 }, { "epoch": 0.21856803671079233, "grad_norm": 0.5736254453659058, "learning_rate": 2.7288662081904608e-05, "loss": 0.0749, "step": 9919 }, { "epoch": 0.2185900720003085, "grad_norm": 1.0352212190628052, "learning_rate": 2.728804815765863e-05, "loss": 0.1178, "step": 9920 }, { "epoch": 0.21861210728982466, "grad_norm": 1.1213696002960205, "learning_rate": 2.7287434170823037e-05, "loss": 0.0842, "step": 9921 }, { "epoch": 0.2186341425793408, "grad_norm": 1.5144697427749634, "learning_rate": 2.7286820121400952e-05, "loss": 0.1316, "step": 9922 }, { "epoch": 0.21865617786885697, "grad_norm": 1.1596416234970093, "learning_rate": 2.7286206009395506e-05, "loss": 0.1457, "step": 9923 }, { "epoch": 0.21867821315837313, "grad_norm": 0.8120702505111694, "learning_rate": 2.728559183480982e-05, "loss": 0.1084, "step": 9924 }, { "epoch": 0.2187002484478893, "grad_norm": 1.2884482145309448, "learning_rate": 2.728497759764703e-05, "loss": 0.1025, "step": 9925 }, { "epoch": 0.21872228373740546, "grad_norm": 0.7360664010047913, "learning_rate": 2.728436329791026e-05, "loss": 0.1327, "step": 9926 }, { "epoch": 0.21874431902692162, "grad_norm": 1.1739948987960815, "learning_rate": 2.728374893560264e-05, "loss": 0.1388, "step": 9927 }, { "epoch": 0.2187663543164378, "grad_norm": 1.1280500888824463, "learning_rate": 2.7283134510727304e-05, "loss": 0.1562, "step": 9928 }, { "epoch": 0.21878838960595393, "grad_norm": 1.1087864637374878, "learning_rate": 2.728252002328737e-05, "loss": 0.1212, "step": 9929 }, { "epoch": 0.2188104248954701, "grad_norm": 0.8904322981834412, "learning_rate": 2.728190547328598e-05, "loss": 0.1117, "step": 9930 }, { "epoch": 0.21883246018498625, "grad_norm": 0.768980085849762, "learning_rate": 2.7281290860726257e-05, "loss": 0.0712, "step": 9931 }, { "epoch": 0.21885449547450242, "grad_norm": 0.801201343536377, "learning_rate": 2.7280676185611338e-05, "loss": 0.1096, "step": 9932 }, { "epoch": 0.21887653076401858, "grad_norm": 1.327208161354065, "learning_rate": 2.7280061447944348e-05, "loss": 0.1287, "step": 9933 }, { "epoch": 0.21889856605353475, "grad_norm": 0.8003602027893066, "learning_rate": 2.7279446647728417e-05, "loss": 0.1866, "step": 9934 }, { "epoch": 0.21892060134305089, "grad_norm": 0.709285318851471, "learning_rate": 2.7278831784966678e-05, "loss": 0.074, "step": 9935 }, { "epoch": 0.21894263663256705, "grad_norm": 0.9968032836914062, "learning_rate": 2.727821685966227e-05, "loss": 0.1339, "step": 9936 }, { "epoch": 0.21896467192208321, "grad_norm": 1.432634949684143, "learning_rate": 2.7277601871818314e-05, "loss": 0.1117, "step": 9937 }, { "epoch": 0.21898670721159938, "grad_norm": 0.7429400682449341, "learning_rate": 2.727698682143795e-05, "loss": 0.1037, "step": 9938 }, { "epoch": 0.21900874250111554, "grad_norm": 0.9018116593360901, "learning_rate": 2.7276371708524313e-05, "loss": 0.1257, "step": 9939 }, { "epoch": 0.2190307777906317, "grad_norm": 1.447251319885254, "learning_rate": 2.7275756533080528e-05, "loss": 0.1469, "step": 9940 }, { "epoch": 0.21905281308014785, "grad_norm": 0.9532185196876526, "learning_rate": 2.727514129510973e-05, "loss": 0.1154, "step": 9941 }, { "epoch": 0.219074848369664, "grad_norm": 1.018237590789795, "learning_rate": 2.7274525994615058e-05, "loss": 0.1386, "step": 9942 }, { "epoch": 0.21909688365918017, "grad_norm": 0.6323473453521729, "learning_rate": 2.7273910631599642e-05, "loss": 0.069, "step": 9943 }, { "epoch": 0.21911891894869634, "grad_norm": 1.0913084745407104, "learning_rate": 2.727329520606662e-05, "loss": 0.1563, "step": 9944 }, { "epoch": 0.2191409542382125, "grad_norm": 1.1391606330871582, "learning_rate": 2.727267971801912e-05, "loss": 0.0799, "step": 9945 }, { "epoch": 0.21916298952772867, "grad_norm": 0.930403470993042, "learning_rate": 2.727206416746028e-05, "loss": 0.1125, "step": 9946 }, { "epoch": 0.2191850248172448, "grad_norm": 1.1188459396362305, "learning_rate": 2.7271448554393244e-05, "loss": 0.1055, "step": 9947 }, { "epoch": 0.21920706010676097, "grad_norm": 1.0799719095230103, "learning_rate": 2.7270832878821132e-05, "loss": 0.1102, "step": 9948 }, { "epoch": 0.21922909539627713, "grad_norm": 0.6703499555587769, "learning_rate": 2.7270217140747093e-05, "loss": 0.1045, "step": 9949 }, { "epoch": 0.2192511306857933, "grad_norm": 0.9742268323898315, "learning_rate": 2.7269601340174256e-05, "loss": 0.1395, "step": 9950 }, { "epoch": 0.21927316597530946, "grad_norm": 1.1528940200805664, "learning_rate": 2.726898547710576e-05, "loss": 0.126, "step": 9951 }, { "epoch": 0.21929520126482563, "grad_norm": 0.8977015018463135, "learning_rate": 2.726836955154474e-05, "loss": 0.1193, "step": 9952 }, { "epoch": 0.21931723655434177, "grad_norm": 0.8058996796607971, "learning_rate": 2.726775356349434e-05, "loss": 0.1127, "step": 9953 }, { "epoch": 0.21933927184385793, "grad_norm": 1.138666033744812, "learning_rate": 2.726713751295769e-05, "loss": 0.1381, "step": 9954 }, { "epoch": 0.2193613071333741, "grad_norm": 0.621137261390686, "learning_rate": 2.726652139993793e-05, "loss": 0.0849, "step": 9955 }, { "epoch": 0.21938334242289026, "grad_norm": 0.8133714199066162, "learning_rate": 2.7265905224438198e-05, "loss": 0.0987, "step": 9956 }, { "epoch": 0.21940537771240642, "grad_norm": 0.7086314558982849, "learning_rate": 2.7265288986461636e-05, "loss": 0.0788, "step": 9957 }, { "epoch": 0.2194274130019226, "grad_norm": 1.01987886428833, "learning_rate": 2.7264672686011378e-05, "loss": 0.1099, "step": 9958 }, { "epoch": 0.21944944829143873, "grad_norm": 0.7582898736000061, "learning_rate": 2.726405632309057e-05, "loss": 0.1074, "step": 9959 }, { "epoch": 0.2194714835809549, "grad_norm": 1.234008550643921, "learning_rate": 2.7263439897702337e-05, "loss": 0.1198, "step": 9960 }, { "epoch": 0.21949351887047105, "grad_norm": 0.788784384727478, "learning_rate": 2.7262823409849833e-05, "loss": 0.1127, "step": 9961 }, { "epoch": 0.21951555415998722, "grad_norm": 1.0988487005233765, "learning_rate": 2.7262206859536195e-05, "loss": 0.0959, "step": 9962 }, { "epoch": 0.21953758944950338, "grad_norm": 0.5128484964370728, "learning_rate": 2.726159024676456e-05, "loss": 0.1089, "step": 9963 }, { "epoch": 0.21955962473901955, "grad_norm": 0.777970016002655, "learning_rate": 2.726097357153807e-05, "loss": 0.1182, "step": 9964 }, { "epoch": 0.2195816600285357, "grad_norm": 1.5082201957702637, "learning_rate": 2.726035683385987e-05, "loss": 0.0983, "step": 9965 }, { "epoch": 0.21960369531805185, "grad_norm": 0.8555256724357605, "learning_rate": 2.7259740033733096e-05, "loss": 0.11, "step": 9966 }, { "epoch": 0.21962573060756801, "grad_norm": 0.854417622089386, "learning_rate": 2.7259123171160894e-05, "loss": 0.0963, "step": 9967 }, { "epoch": 0.21964776589708418, "grad_norm": 0.9186294674873352, "learning_rate": 2.72585062461464e-05, "loss": 0.1153, "step": 9968 }, { "epoch": 0.21966980118660034, "grad_norm": 0.5285221934318542, "learning_rate": 2.725788925869276e-05, "loss": 0.0636, "step": 9969 }, { "epoch": 0.2196918364761165, "grad_norm": 0.7702758312225342, "learning_rate": 2.7257272208803123e-05, "loss": 0.1127, "step": 9970 }, { "epoch": 0.21971387176563267, "grad_norm": 1.337301254272461, "learning_rate": 2.725665509648062e-05, "loss": 0.1715, "step": 9971 }, { "epoch": 0.2197359070551488, "grad_norm": 0.9862313270568848, "learning_rate": 2.72560379217284e-05, "loss": 0.1231, "step": 9972 }, { "epoch": 0.21975794234466497, "grad_norm": 1.0416511297225952, "learning_rate": 2.7255420684549613e-05, "loss": 0.0891, "step": 9973 }, { "epoch": 0.21977997763418114, "grad_norm": 0.7161097526550293, "learning_rate": 2.725480338494739e-05, "loss": 0.0893, "step": 9974 }, { "epoch": 0.2198020129236973, "grad_norm": 1.0325288772583008, "learning_rate": 2.7254186022924888e-05, "loss": 0.1159, "step": 9975 }, { "epoch": 0.21982404821321347, "grad_norm": 0.8953006267547607, "learning_rate": 2.725356859848524e-05, "loss": 0.1208, "step": 9976 }, { "epoch": 0.21984608350272963, "grad_norm": 1.0890164375305176, "learning_rate": 2.7252951111631598e-05, "loss": 0.1005, "step": 9977 }, { "epoch": 0.21986811879224577, "grad_norm": 0.9051802754402161, "learning_rate": 2.7252333562367105e-05, "loss": 0.0996, "step": 9978 }, { "epoch": 0.21989015408176193, "grad_norm": 1.325790286064148, "learning_rate": 2.7251715950694906e-05, "loss": 0.0899, "step": 9979 }, { "epoch": 0.2199121893712781, "grad_norm": 0.9305921792984009, "learning_rate": 2.7251098276618147e-05, "loss": 0.1348, "step": 9980 }, { "epoch": 0.21993422466079426, "grad_norm": 0.4954572021961212, "learning_rate": 2.7250480540139976e-05, "loss": 0.1266, "step": 9981 }, { "epoch": 0.21995625995031043, "grad_norm": 0.97369784116745, "learning_rate": 2.724986274126354e-05, "loss": 0.0813, "step": 9982 }, { "epoch": 0.2199782952398266, "grad_norm": 0.8789435029029846, "learning_rate": 2.7249244879991982e-05, "loss": 0.0982, "step": 9983 }, { "epoch": 0.22000033052934273, "grad_norm": 0.8482926487922668, "learning_rate": 2.7248626956328454e-05, "loss": 0.1251, "step": 9984 }, { "epoch": 0.2200223658188589, "grad_norm": 0.988722562789917, "learning_rate": 2.7248008970276098e-05, "loss": 0.111, "step": 9985 }, { "epoch": 0.22004440110837506, "grad_norm": 0.6862898468971252, "learning_rate": 2.7247390921838064e-05, "loss": 0.0825, "step": 9986 }, { "epoch": 0.22006643639789122, "grad_norm": 0.7978460192680359, "learning_rate": 2.7246772811017504e-05, "loss": 0.0915, "step": 9987 }, { "epoch": 0.2200884716874074, "grad_norm": 0.9982849359512329, "learning_rate": 2.7246154637817562e-05, "loss": 0.1046, "step": 9988 }, { "epoch": 0.22011050697692355, "grad_norm": 1.3253566026687622, "learning_rate": 2.724553640224138e-05, "loss": 0.1332, "step": 9989 }, { "epoch": 0.2201325422664397, "grad_norm": 0.6635584831237793, "learning_rate": 2.7244918104292124e-05, "loss": 0.0824, "step": 9990 }, { "epoch": 0.22015457755595585, "grad_norm": 1.2561343908309937, "learning_rate": 2.7244299743972934e-05, "loss": 0.12, "step": 9991 }, { "epoch": 0.22017661284547202, "grad_norm": 0.6619030237197876, "learning_rate": 2.7243681321286952e-05, "loss": 0.1115, "step": 9992 }, { "epoch": 0.22019864813498818, "grad_norm": 0.8209976553916931, "learning_rate": 2.7243062836237346e-05, "loss": 0.1762, "step": 9993 }, { "epoch": 0.22022068342450435, "grad_norm": 0.7849910259246826, "learning_rate": 2.7242444288827246e-05, "loss": 0.1351, "step": 9994 }, { "epoch": 0.2202427187140205, "grad_norm": 0.8517194986343384, "learning_rate": 2.7241825679059814e-05, "loss": 0.113, "step": 9995 }, { "epoch": 0.22026475400353665, "grad_norm": 0.7186002731323242, "learning_rate": 2.72412070069382e-05, "loss": 0.1094, "step": 9996 }, { "epoch": 0.22028678929305281, "grad_norm": 1.0186516046524048, "learning_rate": 2.7240588272465558e-05, "loss": 0.1129, "step": 9997 }, { "epoch": 0.22030882458256898, "grad_norm": 1.0003764629364014, "learning_rate": 2.7239969475645035e-05, "loss": 0.156, "step": 9998 }, { "epoch": 0.22033085987208514, "grad_norm": 1.2438358068466187, "learning_rate": 2.723935061647978e-05, "loss": 0.1145, "step": 9999 }, { "epoch": 0.2203528951616013, "grad_norm": 1.0268765687942505, "learning_rate": 2.723873169497295e-05, "loss": 0.1058, "step": 10000 }, { "epoch": 0.22037493045111747, "grad_norm": 0.7889515161514282, "learning_rate": 2.72381127111277e-05, "loss": 0.0988, "step": 10001 }, { "epoch": 0.22039696574063364, "grad_norm": 0.8499554991722107, "learning_rate": 2.7237493664947175e-05, "loss": 0.1419, "step": 10002 }, { "epoch": 0.22041900103014977, "grad_norm": 0.6791795492172241, "learning_rate": 2.7236874556434536e-05, "loss": 0.0923, "step": 10003 }, { "epoch": 0.22044103631966594, "grad_norm": 1.3988656997680664, "learning_rate": 2.723625538559293e-05, "loss": 0.138, "step": 10004 }, { "epoch": 0.2204630716091821, "grad_norm": 1.1192121505737305, "learning_rate": 2.7235636152425516e-05, "loss": 0.0889, "step": 10005 }, { "epoch": 0.22048510689869827, "grad_norm": 1.0737602710723877, "learning_rate": 2.7235016856935447e-05, "loss": 0.1149, "step": 10006 }, { "epoch": 0.22050714218821443, "grad_norm": 0.9917152523994446, "learning_rate": 2.7234397499125873e-05, "loss": 0.1345, "step": 10007 }, { "epoch": 0.2205291774777306, "grad_norm": 0.42686548829078674, "learning_rate": 2.7233778078999954e-05, "loss": 0.0872, "step": 10008 }, { "epoch": 0.22055121276724673, "grad_norm": 1.1675158739089966, "learning_rate": 2.7233158596560843e-05, "loss": 0.1017, "step": 10009 }, { "epoch": 0.2205732480567629, "grad_norm": 0.5909465551376343, "learning_rate": 2.7232539051811695e-05, "loss": 0.1142, "step": 10010 }, { "epoch": 0.22059528334627906, "grad_norm": 0.7344589829444885, "learning_rate": 2.7231919444755663e-05, "loss": 0.0483, "step": 10011 }, { "epoch": 0.22061731863579523, "grad_norm": 0.6089965105056763, "learning_rate": 2.7231299775395914e-05, "loss": 0.0875, "step": 10012 }, { "epoch": 0.2206393539253114, "grad_norm": 1.1043410301208496, "learning_rate": 2.723068004373559e-05, "loss": 0.1357, "step": 10013 }, { "epoch": 0.22066138921482756, "grad_norm": 0.7385397553443909, "learning_rate": 2.7230060249777852e-05, "loss": 0.0905, "step": 10014 }, { "epoch": 0.2206834245043437, "grad_norm": 1.0420044660568237, "learning_rate": 2.7229440393525863e-05, "loss": 0.0909, "step": 10015 }, { "epoch": 0.22070545979385986, "grad_norm": 0.9796206951141357, "learning_rate": 2.7228820474982776e-05, "loss": 0.1055, "step": 10016 }, { "epoch": 0.22072749508337602, "grad_norm": 0.8933034539222717, "learning_rate": 2.722820049415175e-05, "loss": 0.0896, "step": 10017 }, { "epoch": 0.2207495303728922, "grad_norm": 0.6115058660507202, "learning_rate": 2.7227580451035936e-05, "loss": 0.081, "step": 10018 }, { "epoch": 0.22077156566240835, "grad_norm": 1.7287797927856445, "learning_rate": 2.72269603456385e-05, "loss": 0.1956, "step": 10019 }, { "epoch": 0.22079360095192452, "grad_norm": 0.4928208589553833, "learning_rate": 2.7226340177962598e-05, "loss": 0.0957, "step": 10020 }, { "epoch": 0.22081563624144065, "grad_norm": 0.936122715473175, "learning_rate": 2.7225719948011395e-05, "loss": 0.1284, "step": 10021 }, { "epoch": 0.22083767153095682, "grad_norm": 0.830355167388916, "learning_rate": 2.7225099655788034e-05, "loss": 0.1435, "step": 10022 }, { "epoch": 0.22085970682047298, "grad_norm": 0.9485803842544556, "learning_rate": 2.7224479301295693e-05, "loss": 0.0802, "step": 10023 }, { "epoch": 0.22088174210998915, "grad_norm": 1.0664958953857422, "learning_rate": 2.722385888453752e-05, "loss": 0.1576, "step": 10024 }, { "epoch": 0.2209037773995053, "grad_norm": 0.9357352256774902, "learning_rate": 2.7223238405516682e-05, "loss": 0.1045, "step": 10025 }, { "epoch": 0.22092581268902148, "grad_norm": 0.6105978488922119, "learning_rate": 2.7222617864236335e-05, "loss": 0.1101, "step": 10026 }, { "epoch": 0.22094784797853761, "grad_norm": 0.751507580280304, "learning_rate": 2.722199726069964e-05, "loss": 0.0934, "step": 10027 }, { "epoch": 0.22096988326805378, "grad_norm": 0.9199213981628418, "learning_rate": 2.7221376594909756e-05, "loss": 0.125, "step": 10028 }, { "epoch": 0.22099191855756994, "grad_norm": 1.0530768632888794, "learning_rate": 2.7220755866869855e-05, "loss": 0.0886, "step": 10029 }, { "epoch": 0.2210139538470861, "grad_norm": 0.5483476519584656, "learning_rate": 2.7220135076583084e-05, "loss": 0.1277, "step": 10030 }, { "epoch": 0.22103598913660227, "grad_norm": 1.3002068996429443, "learning_rate": 2.7219514224052613e-05, "loss": 0.105, "step": 10031 }, { "epoch": 0.22105802442611844, "grad_norm": 0.6197077035903931, "learning_rate": 2.7218893309281608e-05, "loss": 0.0879, "step": 10032 }, { "epoch": 0.2210800597156346, "grad_norm": 0.7786886692047119, "learning_rate": 2.7218272332273224e-05, "loss": 0.1163, "step": 10033 }, { "epoch": 0.22110209500515074, "grad_norm": 0.8922103643417358, "learning_rate": 2.721765129303063e-05, "loss": 0.0812, "step": 10034 }, { "epoch": 0.2211241302946669, "grad_norm": 0.634300708770752, "learning_rate": 2.721703019155698e-05, "loss": 0.0803, "step": 10035 }, { "epoch": 0.22114616558418307, "grad_norm": 1.4138649702072144, "learning_rate": 2.7216409027855448e-05, "loss": 0.193, "step": 10036 }, { "epoch": 0.22116820087369923, "grad_norm": 0.588144838809967, "learning_rate": 2.7215787801929194e-05, "loss": 0.1018, "step": 10037 }, { "epoch": 0.2211902361632154, "grad_norm": 1.3717951774597168, "learning_rate": 2.721516651378138e-05, "loss": 0.086, "step": 10038 }, { "epoch": 0.22121227145273156, "grad_norm": 1.261015772819519, "learning_rate": 2.7214545163415175e-05, "loss": 0.1008, "step": 10039 }, { "epoch": 0.2212343067422477, "grad_norm": 0.9522586464881897, "learning_rate": 2.7213923750833742e-05, "loss": 0.1043, "step": 10040 }, { "epoch": 0.22125634203176386, "grad_norm": 0.8294019103050232, "learning_rate": 2.7213302276040243e-05, "loss": 0.1034, "step": 10041 }, { "epoch": 0.22127837732128003, "grad_norm": 1.0707975625991821, "learning_rate": 2.721268073903785e-05, "loss": 0.1193, "step": 10042 }, { "epoch": 0.2213004126107962, "grad_norm": 1.3951222896575928, "learning_rate": 2.721205913982972e-05, "loss": 0.1048, "step": 10043 }, { "epoch": 0.22132244790031236, "grad_norm": 1.020912766456604, "learning_rate": 2.7211437478419028e-05, "loss": 0.1341, "step": 10044 }, { "epoch": 0.22134448318982852, "grad_norm": 0.9786034822463989, "learning_rate": 2.7210815754808937e-05, "loss": 0.1231, "step": 10045 }, { "epoch": 0.22136651847934466, "grad_norm": 1.0493836402893066, "learning_rate": 2.7210193969002612e-05, "loss": 0.1251, "step": 10046 }, { "epoch": 0.22138855376886082, "grad_norm": 0.8558642864227295, "learning_rate": 2.720957212100322e-05, "loss": 0.1215, "step": 10047 }, { "epoch": 0.221410589058377, "grad_norm": 0.7661131024360657, "learning_rate": 2.720895021081393e-05, "loss": 0.0809, "step": 10048 }, { "epoch": 0.22143262434789315, "grad_norm": 0.7177752256393433, "learning_rate": 2.7208328238437913e-05, "loss": 0.1456, "step": 10049 }, { "epoch": 0.22145465963740932, "grad_norm": 1.334648847579956, "learning_rate": 2.720770620387833e-05, "loss": 0.1332, "step": 10050 }, { "epoch": 0.22147669492692548, "grad_norm": 0.7921330332756042, "learning_rate": 2.7207084107138357e-05, "loss": 0.1525, "step": 10051 }, { "epoch": 0.22149873021644162, "grad_norm": 1.111025094985962, "learning_rate": 2.7206461948221158e-05, "loss": 0.1328, "step": 10052 }, { "epoch": 0.22152076550595778, "grad_norm": 1.369758129119873, "learning_rate": 2.7205839727129897e-05, "loss": 0.1448, "step": 10053 }, { "epoch": 0.22154280079547395, "grad_norm": 0.7155562043190002, "learning_rate": 2.7205217443867753e-05, "loss": 0.1241, "step": 10054 }, { "epoch": 0.2215648360849901, "grad_norm": 0.9094608426094055, "learning_rate": 2.720459509843789e-05, "loss": 0.0987, "step": 10055 }, { "epoch": 0.22158687137450628, "grad_norm": 0.795226514339447, "learning_rate": 2.7203972690843483e-05, "loss": 0.0868, "step": 10056 }, { "epoch": 0.22160890666402244, "grad_norm": 0.9970377683639526, "learning_rate": 2.7203350221087696e-05, "loss": 0.1265, "step": 10057 }, { "epoch": 0.22163094195353858, "grad_norm": 0.894763171672821, "learning_rate": 2.7202727689173704e-05, "loss": 0.1031, "step": 10058 }, { "epoch": 0.22165297724305474, "grad_norm": 0.7258543968200684, "learning_rate": 2.7202105095104675e-05, "loss": 0.1333, "step": 10059 }, { "epoch": 0.2216750125325709, "grad_norm": 0.6062623262405396, "learning_rate": 2.7201482438883784e-05, "loss": 0.0639, "step": 10060 }, { "epoch": 0.22169704782208707, "grad_norm": 1.0468330383300781, "learning_rate": 2.7200859720514194e-05, "loss": 0.0867, "step": 10061 }, { "epoch": 0.22171908311160324, "grad_norm": 0.6209940314292908, "learning_rate": 2.7200236939999092e-05, "loss": 0.0838, "step": 10062 }, { "epoch": 0.2217411184011194, "grad_norm": 0.5300602316856384, "learning_rate": 2.7199614097341634e-05, "loss": 0.0987, "step": 10063 }, { "epoch": 0.22176315369063554, "grad_norm": 0.5049350261688232, "learning_rate": 2.7198991192545002e-05, "loss": 0.0827, "step": 10064 }, { "epoch": 0.2217851889801517, "grad_norm": 0.726148247718811, "learning_rate": 2.7198368225612365e-05, "loss": 0.1203, "step": 10065 }, { "epoch": 0.22180722426966787, "grad_norm": 1.0201352834701538, "learning_rate": 2.71977451965469e-05, "loss": 0.0864, "step": 10066 }, { "epoch": 0.22182925955918403, "grad_norm": 0.9311366081237793, "learning_rate": 2.7197122105351774e-05, "loss": 0.1136, "step": 10067 }, { "epoch": 0.2218512948487002, "grad_norm": 1.410618782043457, "learning_rate": 2.7196498952030163e-05, "loss": 0.121, "step": 10068 }, { "epoch": 0.22187333013821636, "grad_norm": 0.7525171637535095, "learning_rate": 2.7195875736585245e-05, "loss": 0.0933, "step": 10069 }, { "epoch": 0.22189536542773253, "grad_norm": 0.8732406497001648, "learning_rate": 2.7195252459020194e-05, "loss": 0.099, "step": 10070 }, { "epoch": 0.22191740071724866, "grad_norm": 0.8062119483947754, "learning_rate": 2.7194629119338182e-05, "loss": 0.082, "step": 10071 }, { "epoch": 0.22193943600676483, "grad_norm": 0.5086005926132202, "learning_rate": 2.7194005717542383e-05, "loss": 0.1283, "step": 10072 }, { "epoch": 0.221961471296281, "grad_norm": 1.1760821342468262, "learning_rate": 2.7193382253635973e-05, "loss": 0.1691, "step": 10073 }, { "epoch": 0.22198350658579716, "grad_norm": 0.7771771550178528, "learning_rate": 2.7192758727622132e-05, "loss": 0.1255, "step": 10074 }, { "epoch": 0.22200554187531332, "grad_norm": 0.8903673887252808, "learning_rate": 2.7192135139504032e-05, "loss": 0.0942, "step": 10075 }, { "epoch": 0.2220275771648295, "grad_norm": 0.6541823148727417, "learning_rate": 2.7191511489284847e-05, "loss": 0.1229, "step": 10076 }, { "epoch": 0.22204961245434562, "grad_norm": 1.4601619243621826, "learning_rate": 2.7190887776967756e-05, "loss": 0.1199, "step": 10077 }, { "epoch": 0.2220716477438618, "grad_norm": 0.6072689294815063, "learning_rate": 2.719026400255594e-05, "loss": 0.0893, "step": 10078 }, { "epoch": 0.22209368303337795, "grad_norm": 0.38899707794189453, "learning_rate": 2.718964016605257e-05, "loss": 0.1339, "step": 10079 }, { "epoch": 0.22211571832289412, "grad_norm": 1.0658752918243408, "learning_rate": 2.7189016267460828e-05, "loss": 0.0953, "step": 10080 }, { "epoch": 0.22213775361241028, "grad_norm": 0.8468160629272461, "learning_rate": 2.718839230678389e-05, "loss": 0.1177, "step": 10081 }, { "epoch": 0.22215978890192645, "grad_norm": 1.0605018138885498, "learning_rate": 2.718776828402493e-05, "loss": 0.107, "step": 10082 }, { "epoch": 0.22218182419144258, "grad_norm": 0.7629113793373108, "learning_rate": 2.7187144199187136e-05, "loss": 0.0925, "step": 10083 }, { "epoch": 0.22220385948095875, "grad_norm": 0.8855711817741394, "learning_rate": 2.7186520052273675e-05, "loss": 0.082, "step": 10084 }, { "epoch": 0.2222258947704749, "grad_norm": 0.7445241808891296, "learning_rate": 2.7185895843287737e-05, "loss": 0.097, "step": 10085 }, { "epoch": 0.22224793005999108, "grad_norm": 1.3707458972930908, "learning_rate": 2.71852715722325e-05, "loss": 0.126, "step": 10086 }, { "epoch": 0.22226996534950724, "grad_norm": 0.6302245259284973, "learning_rate": 2.7184647239111135e-05, "loss": 0.0986, "step": 10087 }, { "epoch": 0.2222920006390234, "grad_norm": 1.1061837673187256, "learning_rate": 2.718402284392683e-05, "loss": 0.1092, "step": 10088 }, { "epoch": 0.22231403592853954, "grad_norm": 1.0989302396774292, "learning_rate": 2.718339838668276e-05, "loss": 0.1012, "step": 10089 }, { "epoch": 0.2223360712180557, "grad_norm": 0.8820054531097412, "learning_rate": 2.7182773867382114e-05, "loss": 0.1348, "step": 10090 }, { "epoch": 0.22235810650757187, "grad_norm": 0.90816730260849, "learning_rate": 2.7182149286028063e-05, "loss": 0.1485, "step": 10091 }, { "epoch": 0.22238014179708804, "grad_norm": 0.5965984463691711, "learning_rate": 2.7181524642623795e-05, "loss": 0.0752, "step": 10092 }, { "epoch": 0.2224021770866042, "grad_norm": 0.9628579616546631, "learning_rate": 2.718089993717249e-05, "loss": 0.1243, "step": 10093 }, { "epoch": 0.22242421237612037, "grad_norm": 0.7623855471611023, "learning_rate": 2.7180275169677328e-05, "loss": 0.095, "step": 10094 }, { "epoch": 0.2224462476656365, "grad_norm": 0.8552988171577454, "learning_rate": 2.7179650340141496e-05, "loss": 0.0917, "step": 10095 }, { "epoch": 0.22246828295515267, "grad_norm": 0.6414490938186646, "learning_rate": 2.717902544856817e-05, "loss": 0.1918, "step": 10096 }, { "epoch": 0.22249031824466883, "grad_norm": 0.8444130420684814, "learning_rate": 2.717840049496054e-05, "loss": 0.1142, "step": 10097 }, { "epoch": 0.222512353534185, "grad_norm": 0.8902589678764343, "learning_rate": 2.7177775479321783e-05, "loss": 0.1173, "step": 10098 }, { "epoch": 0.22253438882370116, "grad_norm": 0.9496859908103943, "learning_rate": 2.7177150401655086e-05, "loss": 0.1164, "step": 10099 }, { "epoch": 0.22255642411321733, "grad_norm": 0.9084880352020264, "learning_rate": 2.717652526196363e-05, "loss": 0.0955, "step": 10100 }, { "epoch": 0.22257845940273346, "grad_norm": 1.0181070566177368, "learning_rate": 2.7175900060250607e-05, "loss": 0.0837, "step": 10101 }, { "epoch": 0.22260049469224963, "grad_norm": 1.1774654388427734, "learning_rate": 2.717527479651919e-05, "loss": 0.1073, "step": 10102 }, { "epoch": 0.2226225299817658, "grad_norm": 0.6182176470756531, "learning_rate": 2.7174649470772574e-05, "loss": 0.0727, "step": 10103 }, { "epoch": 0.22264456527128196, "grad_norm": 0.746043860912323, "learning_rate": 2.7174024083013935e-05, "loss": 0.1151, "step": 10104 }, { "epoch": 0.22266660056079812, "grad_norm": 0.7812341451644897, "learning_rate": 2.7173398633246467e-05, "loss": 0.1046, "step": 10105 }, { "epoch": 0.2226886358503143, "grad_norm": 1.0718311071395874, "learning_rate": 2.7172773121473352e-05, "loss": 0.1261, "step": 10106 }, { "epoch": 0.22271067113983045, "grad_norm": 0.9802188277244568, "learning_rate": 2.717214754769777e-05, "loss": 0.0754, "step": 10107 }, { "epoch": 0.2227327064293466, "grad_norm": 0.9777213931083679, "learning_rate": 2.717152191192292e-05, "loss": 0.136, "step": 10108 }, { "epoch": 0.22275474171886275, "grad_norm": 0.8169599175453186, "learning_rate": 2.7170896214151982e-05, "loss": 0.0939, "step": 10109 }, { "epoch": 0.22277677700837892, "grad_norm": 0.6302915215492249, "learning_rate": 2.7170270454388142e-05, "loss": 0.1033, "step": 10110 }, { "epoch": 0.22279881229789508, "grad_norm": 1.100840449333191, "learning_rate": 2.7169644632634585e-05, "loss": 0.1151, "step": 10111 }, { "epoch": 0.22282084758741125, "grad_norm": 1.0025339126586914, "learning_rate": 2.7169018748894503e-05, "loss": 0.1046, "step": 10112 }, { "epoch": 0.2228428828769274, "grad_norm": 1.2815768718719482, "learning_rate": 2.7168392803171085e-05, "loss": 0.0947, "step": 10113 }, { "epoch": 0.22286491816644355, "grad_norm": 0.8571373820304871, "learning_rate": 2.7167766795467517e-05, "loss": 0.1789, "step": 10114 }, { "epoch": 0.2228869534559597, "grad_norm": 1.0417851209640503, "learning_rate": 2.716714072578699e-05, "loss": 0.1378, "step": 10115 }, { "epoch": 0.22290898874547588, "grad_norm": 1.942679524421692, "learning_rate": 2.7166514594132687e-05, "loss": 0.1226, "step": 10116 }, { "epoch": 0.22293102403499204, "grad_norm": 1.0400151014328003, "learning_rate": 2.7165888400507804e-05, "loss": 0.0975, "step": 10117 }, { "epoch": 0.2229530593245082, "grad_norm": 0.7102256417274475, "learning_rate": 2.7165262144915525e-05, "loss": 0.145, "step": 10118 }, { "epoch": 0.22297509461402437, "grad_norm": 0.7840312719345093, "learning_rate": 2.716463582735904e-05, "loss": 0.0891, "step": 10119 }, { "epoch": 0.2229971299035405, "grad_norm": 1.4957590103149414, "learning_rate": 2.7164009447841546e-05, "loss": 0.0997, "step": 10120 }, { "epoch": 0.22301916519305667, "grad_norm": 1.2004361152648926, "learning_rate": 2.716338300636623e-05, "loss": 0.1113, "step": 10121 }, { "epoch": 0.22304120048257284, "grad_norm": 0.9901188015937805, "learning_rate": 2.7162756502936275e-05, "loss": 0.1055, "step": 10122 }, { "epoch": 0.223063235772089, "grad_norm": 1.1390050649642944, "learning_rate": 2.7162129937554882e-05, "loss": 0.1543, "step": 10123 }, { "epoch": 0.22308527106160517, "grad_norm": 0.6494156718254089, "learning_rate": 2.7161503310225243e-05, "loss": 0.1389, "step": 10124 }, { "epoch": 0.22310730635112133, "grad_norm": 1.2261446714401245, "learning_rate": 2.7160876620950544e-05, "loss": 0.119, "step": 10125 }, { "epoch": 0.22312934164063747, "grad_norm": 1.1791348457336426, "learning_rate": 2.7160249869733978e-05, "loss": 0.1225, "step": 10126 }, { "epoch": 0.22315137693015363, "grad_norm": 1.2718849182128906, "learning_rate": 2.715962305657874e-05, "loss": 0.1365, "step": 10127 }, { "epoch": 0.2231734122196698, "grad_norm": 1.0973457098007202, "learning_rate": 2.715899618148802e-05, "loss": 0.1158, "step": 10128 }, { "epoch": 0.22319544750918596, "grad_norm": 1.0075867176055908, "learning_rate": 2.7158369244465014e-05, "loss": 0.1399, "step": 10129 }, { "epoch": 0.22321748279870213, "grad_norm": 0.9967958331108093, "learning_rate": 2.7157742245512914e-05, "loss": 0.0932, "step": 10130 }, { "epoch": 0.2232395180882183, "grad_norm": 1.0810497999191284, "learning_rate": 2.715711518463491e-05, "loss": 0.0873, "step": 10131 }, { "epoch": 0.22326155337773443, "grad_norm": 0.6085215210914612, "learning_rate": 2.7156488061834202e-05, "loss": 0.0722, "step": 10132 }, { "epoch": 0.2232835886672506, "grad_norm": 0.6569129824638367, "learning_rate": 2.7155860877113983e-05, "loss": 0.0657, "step": 10133 }, { "epoch": 0.22330562395676676, "grad_norm": 0.9495887160301208, "learning_rate": 2.7155233630477442e-05, "loss": 0.086, "step": 10134 }, { "epoch": 0.22332765924628292, "grad_norm": 0.9747902154922485, "learning_rate": 2.715460632192778e-05, "loss": 0.1149, "step": 10135 }, { "epoch": 0.2233496945357991, "grad_norm": 0.7109290361404419, "learning_rate": 2.715397895146819e-05, "loss": 0.1488, "step": 10136 }, { "epoch": 0.22337172982531525, "grad_norm": 0.8837416768074036, "learning_rate": 2.7153351519101867e-05, "loss": 0.1025, "step": 10137 }, { "epoch": 0.22339376511483142, "grad_norm": 0.6934148669242859, "learning_rate": 2.7152724024832008e-05, "loss": 0.1209, "step": 10138 }, { "epoch": 0.22341580040434755, "grad_norm": 0.9938518404960632, "learning_rate": 2.7152096468661808e-05, "loss": 0.0951, "step": 10139 }, { "epoch": 0.22343783569386372, "grad_norm": 0.7556613683700562, "learning_rate": 2.7151468850594465e-05, "loss": 0.1299, "step": 10140 }, { "epoch": 0.22345987098337988, "grad_norm": 1.2238924503326416, "learning_rate": 2.7150841170633178e-05, "loss": 0.1039, "step": 10141 }, { "epoch": 0.22348190627289605, "grad_norm": 0.7037465572357178, "learning_rate": 2.7150213428781138e-05, "loss": 0.0836, "step": 10142 }, { "epoch": 0.2235039415624122, "grad_norm": 0.8595729470252991, "learning_rate": 2.7149585625041543e-05, "loss": 0.0782, "step": 10143 }, { "epoch": 0.22352597685192838, "grad_norm": 0.7317312955856323, "learning_rate": 2.7148957759417597e-05, "loss": 0.1352, "step": 10144 }, { "epoch": 0.2235480121414445, "grad_norm": 0.5540996789932251, "learning_rate": 2.714832983191249e-05, "loss": 0.0958, "step": 10145 }, { "epoch": 0.22357004743096068, "grad_norm": 0.5210787057876587, "learning_rate": 2.714770184252943e-05, "loss": 0.0633, "step": 10146 }, { "epoch": 0.22359208272047684, "grad_norm": 2.2733871936798096, "learning_rate": 2.714707379127161e-05, "loss": 0.1175, "step": 10147 }, { "epoch": 0.223614118009993, "grad_norm": 0.9768264293670654, "learning_rate": 2.7146445678142226e-05, "loss": 0.143, "step": 10148 }, { "epoch": 0.22363615329950917, "grad_norm": 1.109914779663086, "learning_rate": 2.7145817503144486e-05, "loss": 0.1443, "step": 10149 }, { "epoch": 0.22365818858902534, "grad_norm": 0.9918004870414734, "learning_rate": 2.714518926628158e-05, "loss": 0.1242, "step": 10150 }, { "epoch": 0.22368022387854147, "grad_norm": 0.7165598273277283, "learning_rate": 2.714456096755671e-05, "loss": 0.0837, "step": 10151 }, { "epoch": 0.22370225916805764, "grad_norm": 1.6940433979034424, "learning_rate": 2.7143932606973083e-05, "loss": 0.1046, "step": 10152 }, { "epoch": 0.2237242944575738, "grad_norm": 1.2888755798339844, "learning_rate": 2.7143304184533896e-05, "loss": 0.1362, "step": 10153 }, { "epoch": 0.22374632974708997, "grad_norm": 0.8569178581237793, "learning_rate": 2.7142675700242346e-05, "loss": 0.1231, "step": 10154 }, { "epoch": 0.22376836503660613, "grad_norm": 0.8981164693832397, "learning_rate": 2.714204715410164e-05, "loss": 0.1005, "step": 10155 }, { "epoch": 0.2237904003261223, "grad_norm": 0.8472853899002075, "learning_rate": 2.7141418546114975e-05, "loss": 0.129, "step": 10156 }, { "epoch": 0.22381243561563843, "grad_norm": 0.9912520051002502, "learning_rate": 2.7140789876285555e-05, "loss": 0.0799, "step": 10157 }, { "epoch": 0.2238344709051546, "grad_norm": 1.1705033779144287, "learning_rate": 2.714016114461658e-05, "loss": 0.1044, "step": 10158 }, { "epoch": 0.22385650619467076, "grad_norm": 0.8347854018211365, "learning_rate": 2.7139532351111254e-05, "loss": 0.0874, "step": 10159 }, { "epoch": 0.22387854148418693, "grad_norm": 0.6697324514389038, "learning_rate": 2.713890349577278e-05, "loss": 0.0853, "step": 10160 }, { "epoch": 0.2239005767737031, "grad_norm": 0.9246054291725159, "learning_rate": 2.7138274578604367e-05, "loss": 0.0959, "step": 10161 }, { "epoch": 0.22392261206321926, "grad_norm": 1.0290721654891968, "learning_rate": 2.7137645599609208e-05, "loss": 0.1111, "step": 10162 }, { "epoch": 0.2239446473527354, "grad_norm": 0.9830859899520874, "learning_rate": 2.7137016558790507e-05, "loss": 0.107, "step": 10163 }, { "epoch": 0.22396668264225156, "grad_norm": 0.8827304840087891, "learning_rate": 2.713638745615148e-05, "loss": 0.1191, "step": 10164 }, { "epoch": 0.22398871793176772, "grad_norm": 1.222222924232483, "learning_rate": 2.7135758291695318e-05, "loss": 0.1346, "step": 10165 }, { "epoch": 0.2240107532212839, "grad_norm": 1.107548713684082, "learning_rate": 2.7135129065425238e-05, "loss": 0.0902, "step": 10166 }, { "epoch": 0.22403278851080005, "grad_norm": 1.0197120904922485, "learning_rate": 2.7134499777344433e-05, "loss": 0.1144, "step": 10167 }, { "epoch": 0.22405482380031622, "grad_norm": 1.082517147064209, "learning_rate": 2.713387042745611e-05, "loss": 0.1533, "step": 10168 }, { "epoch": 0.22407685908983235, "grad_norm": 0.7766857743263245, "learning_rate": 2.7133241015763485e-05, "loss": 0.0993, "step": 10169 }, { "epoch": 0.22409889437934852, "grad_norm": 0.5145701766014099, "learning_rate": 2.7132611542269753e-05, "loss": 0.1108, "step": 10170 }, { "epoch": 0.22412092966886468, "grad_norm": 0.7057518362998962, "learning_rate": 2.7131982006978126e-05, "loss": 0.0817, "step": 10171 }, { "epoch": 0.22414296495838085, "grad_norm": 1.032608985900879, "learning_rate": 2.713135240989181e-05, "loss": 0.1025, "step": 10172 }, { "epoch": 0.224165000247897, "grad_norm": 0.5948395729064941, "learning_rate": 2.7130722751014015e-05, "loss": 0.079, "step": 10173 }, { "epoch": 0.22418703553741318, "grad_norm": 1.042546272277832, "learning_rate": 2.7130093030347934e-05, "loss": 0.0988, "step": 10174 }, { "epoch": 0.22420907082692934, "grad_norm": 1.504455327987671, "learning_rate": 2.7129463247896793e-05, "loss": 0.1474, "step": 10175 }, { "epoch": 0.22423110611644548, "grad_norm": 0.7161321043968201, "learning_rate": 2.7128833403663784e-05, "loss": 0.1204, "step": 10176 }, { "epoch": 0.22425314140596164, "grad_norm": 0.5892266631126404, "learning_rate": 2.7128203497652128e-05, "loss": 0.0857, "step": 10177 }, { "epoch": 0.2242751766954778, "grad_norm": 0.7100671529769897, "learning_rate": 2.7127573529865028e-05, "loss": 0.0821, "step": 10178 }, { "epoch": 0.22429721198499397, "grad_norm": 0.9400622844696045, "learning_rate": 2.7126943500305693e-05, "loss": 0.1182, "step": 10179 }, { "epoch": 0.22431924727451014, "grad_norm": 1.0355212688446045, "learning_rate": 2.7126313408977326e-05, "loss": 0.0976, "step": 10180 }, { "epoch": 0.2243412825640263, "grad_norm": 0.629266083240509, "learning_rate": 2.7125683255883148e-05, "loss": 0.11, "step": 10181 }, { "epoch": 0.22436331785354244, "grad_norm": 1.1941354274749756, "learning_rate": 2.7125053041026363e-05, "loss": 0.1268, "step": 10182 }, { "epoch": 0.2243853531430586, "grad_norm": 0.8211716413497925, "learning_rate": 2.7124422764410176e-05, "loss": 0.0785, "step": 10183 }, { "epoch": 0.22440738843257477, "grad_norm": 1.632789134979248, "learning_rate": 2.7123792426037807e-05, "loss": 0.1331, "step": 10184 }, { "epoch": 0.22442942372209093, "grad_norm": 1.2515381574630737, "learning_rate": 2.7123162025912456e-05, "loss": 0.1525, "step": 10185 }, { "epoch": 0.2244514590116071, "grad_norm": 1.0054856538772583, "learning_rate": 2.7122531564037344e-05, "loss": 0.0894, "step": 10186 }, { "epoch": 0.22447349430112326, "grad_norm": 1.320602297782898, "learning_rate": 2.7121901040415677e-05, "loss": 0.0909, "step": 10187 }, { "epoch": 0.2244955295906394, "grad_norm": 1.020653486251831, "learning_rate": 2.712127045505067e-05, "loss": 0.112, "step": 10188 }, { "epoch": 0.22451756488015556, "grad_norm": 1.1238363981246948, "learning_rate": 2.7120639807945534e-05, "loss": 0.1209, "step": 10189 }, { "epoch": 0.22453960016967173, "grad_norm": 0.9094500541687012, "learning_rate": 2.7120009099103474e-05, "loss": 0.1121, "step": 10190 }, { "epoch": 0.2245616354591879, "grad_norm": 0.5400019884109497, "learning_rate": 2.7119378328527712e-05, "loss": 0.0842, "step": 10191 }, { "epoch": 0.22458367074870406, "grad_norm": 0.6581376194953918, "learning_rate": 2.711874749622146e-05, "loss": 0.0608, "step": 10192 }, { "epoch": 0.22460570603822022, "grad_norm": 0.8240603804588318, "learning_rate": 2.7118116602187924e-05, "loss": 0.1069, "step": 10193 }, { "epoch": 0.22462774132773636, "grad_norm": 1.0481394529342651, "learning_rate": 2.7117485646430322e-05, "loss": 0.1187, "step": 10194 }, { "epoch": 0.22464977661725252, "grad_norm": 0.927980899810791, "learning_rate": 2.7116854628951866e-05, "loss": 0.1501, "step": 10195 }, { "epoch": 0.2246718119067687, "grad_norm": 0.9997285604476929, "learning_rate": 2.711622354975578e-05, "loss": 0.1317, "step": 10196 }, { "epoch": 0.22469384719628485, "grad_norm": 1.2146557569503784, "learning_rate": 2.7115592408845263e-05, "loss": 0.1138, "step": 10197 }, { "epoch": 0.22471588248580102, "grad_norm": 0.6568275690078735, "learning_rate": 2.7114961206223538e-05, "loss": 0.1105, "step": 10198 }, { "epoch": 0.22473791777531718, "grad_norm": 0.5577822923660278, "learning_rate": 2.711432994189382e-05, "loss": 0.1101, "step": 10199 }, { "epoch": 0.22475995306483332, "grad_norm": 1.0252712965011597, "learning_rate": 2.711369861585932e-05, "loss": 0.0872, "step": 10200 }, { "epoch": 0.22478198835434948, "grad_norm": 0.8914593458175659, "learning_rate": 2.7113067228123262e-05, "loss": 0.1509, "step": 10201 }, { "epoch": 0.22480402364386565, "grad_norm": 0.2617640197277069, "learning_rate": 2.7112435778688858e-05, "loss": 0.0916, "step": 10202 }, { "epoch": 0.2248260589333818, "grad_norm": 1.1249854564666748, "learning_rate": 2.711180426755932e-05, "loss": 0.1379, "step": 10203 }, { "epoch": 0.22484809422289798, "grad_norm": 0.8253135681152344, "learning_rate": 2.711117269473787e-05, "loss": 0.0783, "step": 10204 }, { "epoch": 0.22487012951241414, "grad_norm": 0.7957121133804321, "learning_rate": 2.711054106022772e-05, "loss": 0.0965, "step": 10205 }, { "epoch": 0.22489216480193028, "grad_norm": 1.018815040588379, "learning_rate": 2.7109909364032088e-05, "loss": 0.1165, "step": 10206 }, { "epoch": 0.22491420009144644, "grad_norm": 0.8092893362045288, "learning_rate": 2.71092776061542e-05, "loss": 0.0871, "step": 10207 }, { "epoch": 0.2249362353809626, "grad_norm": 0.7170670032501221, "learning_rate": 2.7108645786597265e-05, "loss": 0.1141, "step": 10208 }, { "epoch": 0.22495827067047877, "grad_norm": 0.5370727181434631, "learning_rate": 2.7108013905364506e-05, "loss": 0.0773, "step": 10209 }, { "epoch": 0.22498030595999494, "grad_norm": 1.0338095426559448, "learning_rate": 2.7107381962459135e-05, "loss": 0.1188, "step": 10210 }, { "epoch": 0.2250023412495111, "grad_norm": 1.1081690788269043, "learning_rate": 2.7106749957884376e-05, "loss": 0.0883, "step": 10211 }, { "epoch": 0.22502437653902727, "grad_norm": 1.2513864040374756, "learning_rate": 2.710611789164345e-05, "loss": 0.0838, "step": 10212 }, { "epoch": 0.2250464118285434, "grad_norm": 0.838656485080719, "learning_rate": 2.710548576373957e-05, "loss": 0.122, "step": 10213 }, { "epoch": 0.22506844711805957, "grad_norm": 1.108212947845459, "learning_rate": 2.7104853574175968e-05, "loss": 0.0906, "step": 10214 }, { "epoch": 0.22509048240757573, "grad_norm": 2.3314969539642334, "learning_rate": 2.710422132295585e-05, "loss": 0.0907, "step": 10215 }, { "epoch": 0.2251125176970919, "grad_norm": 1.458380937576294, "learning_rate": 2.710358901008244e-05, "loss": 0.0912, "step": 10216 }, { "epoch": 0.22513455298660806, "grad_norm": 1.1426507234573364, "learning_rate": 2.7102956635558962e-05, "loss": 0.1123, "step": 10217 }, { "epoch": 0.22515658827612423, "grad_norm": 0.7126551866531372, "learning_rate": 2.7102324199388634e-05, "loss": 0.0764, "step": 10218 }, { "epoch": 0.22517862356564036, "grad_norm": 0.8071787357330322, "learning_rate": 2.7101691701574683e-05, "loss": 0.068, "step": 10219 }, { "epoch": 0.22520065885515653, "grad_norm": 1.106026291847229, "learning_rate": 2.7101059142120326e-05, "loss": 0.1293, "step": 10220 }, { "epoch": 0.2252226941446727, "grad_norm": 1.4150400161743164, "learning_rate": 2.7100426521028784e-05, "loss": 0.1296, "step": 10221 }, { "epoch": 0.22524472943418886, "grad_norm": 1.846495270729065, "learning_rate": 2.709979383830328e-05, "loss": 0.1303, "step": 10222 }, { "epoch": 0.22526676472370502, "grad_norm": 0.7818790674209595, "learning_rate": 2.709916109394704e-05, "loss": 0.0946, "step": 10223 }, { "epoch": 0.22528880001322119, "grad_norm": 1.157050609588623, "learning_rate": 2.7098528287963282e-05, "loss": 0.1264, "step": 10224 }, { "epoch": 0.22531083530273732, "grad_norm": 0.7962803840637207, "learning_rate": 2.7097895420355238e-05, "loss": 0.1237, "step": 10225 }, { "epoch": 0.2253328705922535, "grad_norm": 1.3572793006896973, "learning_rate": 2.7097262491126117e-05, "loss": 0.1434, "step": 10226 }, { "epoch": 0.22535490588176965, "grad_norm": 1.253920078277588, "learning_rate": 2.7096629500279156e-05, "loss": 0.1247, "step": 10227 }, { "epoch": 0.22537694117128582, "grad_norm": 0.9984431266784668, "learning_rate": 2.7095996447817573e-05, "loss": 0.0984, "step": 10228 }, { "epoch": 0.22539897646080198, "grad_norm": 1.1816385984420776, "learning_rate": 2.709536333374459e-05, "loss": 0.1259, "step": 10229 }, { "epoch": 0.22542101175031815, "grad_norm": 0.909227728843689, "learning_rate": 2.709473015806344e-05, "loss": 0.1362, "step": 10230 }, { "epoch": 0.22544304703983428, "grad_norm": 0.9210830926895142, "learning_rate": 2.7094096920777344e-05, "loss": 0.1051, "step": 10231 }, { "epoch": 0.22546508232935045, "grad_norm": 0.8871368169784546, "learning_rate": 2.7093463621889526e-05, "loss": 0.1281, "step": 10232 }, { "epoch": 0.2254871176188666, "grad_norm": 0.6731590032577515, "learning_rate": 2.709283026140321e-05, "loss": 0.0601, "step": 10233 }, { "epoch": 0.22550915290838278, "grad_norm": 0.9486973285675049, "learning_rate": 2.709219683932163e-05, "loss": 0.133, "step": 10234 }, { "epoch": 0.22553118819789894, "grad_norm": 0.740180492401123, "learning_rate": 2.7091563355648e-05, "loss": 0.1367, "step": 10235 }, { "epoch": 0.2255532234874151, "grad_norm": 0.7307168841362, "learning_rate": 2.709092981038556e-05, "loss": 0.084, "step": 10236 }, { "epoch": 0.22557525877693124, "grad_norm": 0.9119473695755005, "learning_rate": 2.709029620353753e-05, "loss": 0.1007, "step": 10237 }, { "epoch": 0.2255972940664474, "grad_norm": 0.8282085061073303, "learning_rate": 2.7089662535107137e-05, "loss": 0.1115, "step": 10238 }, { "epoch": 0.22561932935596357, "grad_norm": 0.7004229426383972, "learning_rate": 2.7089028805097607e-05, "loss": 0.1024, "step": 10239 }, { "epoch": 0.22564136464547974, "grad_norm": 0.6387943029403687, "learning_rate": 2.7088395013512174e-05, "loss": 0.1037, "step": 10240 }, { "epoch": 0.2256633999349959, "grad_norm": 0.5912377238273621, "learning_rate": 2.708776116035407e-05, "loss": 0.1144, "step": 10241 }, { "epoch": 0.22568543522451207, "grad_norm": 1.1128743886947632, "learning_rate": 2.7087127245626504e-05, "loss": 0.094, "step": 10242 }, { "epoch": 0.22570747051402823, "grad_norm": 1.5725409984588623, "learning_rate": 2.7086493269332723e-05, "loss": 0.084, "step": 10243 }, { "epoch": 0.22572950580354437, "grad_norm": 0.8620442152023315, "learning_rate": 2.7085859231475946e-05, "loss": 0.0929, "step": 10244 }, { "epoch": 0.22575154109306053, "grad_norm": 0.9207369089126587, "learning_rate": 2.7085225132059414e-05, "loss": 0.0941, "step": 10245 }, { "epoch": 0.2257735763825767, "grad_norm": 0.8184155821800232, "learning_rate": 2.7084590971086348e-05, "loss": 0.0996, "step": 10246 }, { "epoch": 0.22579561167209286, "grad_norm": 1.2158864736557007, "learning_rate": 2.7083956748559976e-05, "loss": 0.1259, "step": 10247 }, { "epoch": 0.22581764696160903, "grad_norm": 0.9643034338951111, "learning_rate": 2.7083322464483533e-05, "loss": 0.1401, "step": 10248 }, { "epoch": 0.2258396822511252, "grad_norm": 0.855439305305481, "learning_rate": 2.7082688118860252e-05, "loss": 0.1288, "step": 10249 }, { "epoch": 0.22586171754064133, "grad_norm": 1.1317622661590576, "learning_rate": 2.7082053711693363e-05, "loss": 0.1241, "step": 10250 }, { "epoch": 0.2258837528301575, "grad_norm": 1.2437485456466675, "learning_rate": 2.708141924298609e-05, "loss": 0.117, "step": 10251 }, { "epoch": 0.22590578811967366, "grad_norm": 0.8220329284667969, "learning_rate": 2.7080784712741673e-05, "loss": 0.0993, "step": 10252 }, { "epoch": 0.22592782340918982, "grad_norm": 0.7787992358207703, "learning_rate": 2.708015012096334e-05, "loss": 0.1385, "step": 10253 }, { "epoch": 0.22594985869870599, "grad_norm": 1.1892222166061401, "learning_rate": 2.7079515467654322e-05, "loss": 0.1077, "step": 10254 }, { "epoch": 0.22597189398822215, "grad_norm": 1.246899127960205, "learning_rate": 2.707888075281786e-05, "loss": 0.1126, "step": 10255 }, { "epoch": 0.2259939292777383, "grad_norm": 1.1882072687149048, "learning_rate": 2.707824597645718e-05, "loss": 0.1229, "step": 10256 }, { "epoch": 0.22601596456725445, "grad_norm": 0.9699028730392456, "learning_rate": 2.7077611138575515e-05, "loss": 0.137, "step": 10257 }, { "epoch": 0.22603799985677062, "grad_norm": 0.7171992063522339, "learning_rate": 2.7076976239176096e-05, "loss": 0.0584, "step": 10258 }, { "epoch": 0.22606003514628678, "grad_norm": 0.8790439963340759, "learning_rate": 2.7076341278262167e-05, "loss": 0.1062, "step": 10259 }, { "epoch": 0.22608207043580295, "grad_norm": 1.0849062204360962, "learning_rate": 2.7075706255836956e-05, "loss": 0.0816, "step": 10260 }, { "epoch": 0.2261041057253191, "grad_norm": 1.058165192604065, "learning_rate": 2.7075071171903695e-05, "loss": 0.1319, "step": 10261 }, { "epoch": 0.22612614101483525, "grad_norm": 0.8806741237640381, "learning_rate": 2.707443602646562e-05, "loss": 0.1026, "step": 10262 }, { "epoch": 0.2261481763043514, "grad_norm": 1.1216877698898315, "learning_rate": 2.707380081952597e-05, "loss": 0.0994, "step": 10263 }, { "epoch": 0.22617021159386758, "grad_norm": 0.6037587523460388, "learning_rate": 2.7073165551087975e-05, "loss": 0.1077, "step": 10264 }, { "epoch": 0.22619224688338374, "grad_norm": 0.9148160815238953, "learning_rate": 2.707253022115488e-05, "loss": 0.1019, "step": 10265 }, { "epoch": 0.2262142821728999, "grad_norm": 1.313250184059143, "learning_rate": 2.707189482972991e-05, "loss": 0.0964, "step": 10266 }, { "epoch": 0.22623631746241607, "grad_norm": 1.0662096738815308, "learning_rate": 2.7071259376816305e-05, "loss": 0.1311, "step": 10267 }, { "epoch": 0.2262583527519322, "grad_norm": 0.7484006285667419, "learning_rate": 2.7070623862417303e-05, "loss": 0.0982, "step": 10268 }, { "epoch": 0.22628038804144837, "grad_norm": 0.9623584151268005, "learning_rate": 2.7069988286536146e-05, "loss": 0.122, "step": 10269 }, { "epoch": 0.22630242333096454, "grad_norm": 1.0096275806427002, "learning_rate": 2.706935264917606e-05, "loss": 0.093, "step": 10270 }, { "epoch": 0.2263244586204807, "grad_norm": 0.7766446471214294, "learning_rate": 2.706871695034029e-05, "loss": 0.1054, "step": 10271 }, { "epoch": 0.22634649390999687, "grad_norm": 0.6534724235534668, "learning_rate": 2.7068081190032076e-05, "loss": 0.1055, "step": 10272 }, { "epoch": 0.22636852919951303, "grad_norm": 0.9519628286361694, "learning_rate": 2.7067445368254652e-05, "loss": 0.0798, "step": 10273 }, { "epoch": 0.22639056448902917, "grad_norm": 0.7512866258621216, "learning_rate": 2.706680948501126e-05, "loss": 0.0897, "step": 10274 }, { "epoch": 0.22641259977854533, "grad_norm": 1.037196397781372, "learning_rate": 2.706617354030513e-05, "loss": 0.1189, "step": 10275 }, { "epoch": 0.2264346350680615, "grad_norm": 1.1370776891708374, "learning_rate": 2.706553753413951e-05, "loss": 0.1028, "step": 10276 }, { "epoch": 0.22645667035757766, "grad_norm": 0.6817894577980042, "learning_rate": 2.706490146651764e-05, "loss": 0.1242, "step": 10277 }, { "epoch": 0.22647870564709383, "grad_norm": 0.7245444655418396, "learning_rate": 2.7064265337442756e-05, "loss": 0.1362, "step": 10278 }, { "epoch": 0.22650074093661, "grad_norm": 1.2569389343261719, "learning_rate": 2.70636291469181e-05, "loss": 0.0968, "step": 10279 }, { "epoch": 0.22652277622612615, "grad_norm": 0.7717910408973694, "learning_rate": 2.706299289494691e-05, "loss": 0.0933, "step": 10280 }, { "epoch": 0.2265448115156423, "grad_norm": 0.9234269857406616, "learning_rate": 2.706235658153243e-05, "loss": 0.1449, "step": 10281 }, { "epoch": 0.22656684680515846, "grad_norm": 0.8531665802001953, "learning_rate": 2.70617202066779e-05, "loss": 0.1198, "step": 10282 }, { "epoch": 0.22658888209467462, "grad_norm": 0.932704508304596, "learning_rate": 2.706108377038656e-05, "loss": 0.0989, "step": 10283 }, { "epoch": 0.22661091738419079, "grad_norm": 1.2743452787399292, "learning_rate": 2.7060447272661652e-05, "loss": 0.1615, "step": 10284 }, { "epoch": 0.22663295267370695, "grad_norm": 0.9236071109771729, "learning_rate": 2.7059810713506418e-05, "loss": 0.0999, "step": 10285 }, { "epoch": 0.22665498796322311, "grad_norm": 0.7425352334976196, "learning_rate": 2.70591740929241e-05, "loss": 0.1249, "step": 10286 }, { "epoch": 0.22667702325273925, "grad_norm": 1.0334452390670776, "learning_rate": 2.705853741091795e-05, "loss": 0.0954, "step": 10287 }, { "epoch": 0.22669905854225542, "grad_norm": 1.3853516578674316, "learning_rate": 2.7057900667491192e-05, "loss": 0.1179, "step": 10288 }, { "epoch": 0.22672109383177158, "grad_norm": 1.0122402906417847, "learning_rate": 2.7057263862647086e-05, "loss": 0.1031, "step": 10289 }, { "epoch": 0.22674312912128775, "grad_norm": 1.144805908203125, "learning_rate": 2.705662699638887e-05, "loss": 0.099, "step": 10290 }, { "epoch": 0.2267651644108039, "grad_norm": 0.9141274690628052, "learning_rate": 2.7055990068719786e-05, "loss": 0.1145, "step": 10291 }, { "epoch": 0.22678719970032007, "grad_norm": 0.5323312282562256, "learning_rate": 2.7055353079643083e-05, "loss": 0.0824, "step": 10292 }, { "epoch": 0.2268092349898362, "grad_norm": 0.8930016160011292, "learning_rate": 2.7054716029161993e-05, "loss": 0.0945, "step": 10293 }, { "epoch": 0.22683127027935238, "grad_norm": 0.7512515187263489, "learning_rate": 2.7054078917279777e-05, "loss": 0.0915, "step": 10294 }, { "epoch": 0.22685330556886854, "grad_norm": 1.2277841567993164, "learning_rate": 2.7053441743999673e-05, "loss": 0.1314, "step": 10295 }, { "epoch": 0.2268753408583847, "grad_norm": 0.7054110765457153, "learning_rate": 2.7052804509324923e-05, "loss": 0.0841, "step": 10296 }, { "epoch": 0.22689737614790087, "grad_norm": 0.6809034943580627, "learning_rate": 2.7052167213258777e-05, "loss": 0.1348, "step": 10297 }, { "epoch": 0.22691941143741703, "grad_norm": 0.9728965759277344, "learning_rate": 2.7051529855804484e-05, "loss": 0.087, "step": 10298 }, { "epoch": 0.22694144672693317, "grad_norm": 0.843486487865448, "learning_rate": 2.7050892436965287e-05, "loss": 0.0849, "step": 10299 }, { "epoch": 0.22696348201644934, "grad_norm": 0.9784231185913086, "learning_rate": 2.705025495674443e-05, "loss": 0.0928, "step": 10300 }, { "epoch": 0.2269855173059655, "grad_norm": 1.4666532278060913, "learning_rate": 2.704961741514516e-05, "loss": 0.0905, "step": 10301 }, { "epoch": 0.22700755259548167, "grad_norm": 0.7968837022781372, "learning_rate": 2.7048979812170732e-05, "loss": 0.0851, "step": 10302 }, { "epoch": 0.22702958788499783, "grad_norm": 0.9832261204719543, "learning_rate": 2.7048342147824385e-05, "loss": 0.1034, "step": 10303 }, { "epoch": 0.227051623174514, "grad_norm": 0.7781362533569336, "learning_rate": 2.7047704422109375e-05, "loss": 0.147, "step": 10304 }, { "epoch": 0.22707365846403013, "grad_norm": 0.7980002164840698, "learning_rate": 2.7047066635028943e-05, "loss": 0.1097, "step": 10305 }, { "epoch": 0.2270956937535463, "grad_norm": 0.8555769920349121, "learning_rate": 2.7046428786586344e-05, "loss": 0.1047, "step": 10306 }, { "epoch": 0.22711772904306246, "grad_norm": 1.7590352296829224, "learning_rate": 2.7045790876784817e-05, "loss": 0.1375, "step": 10307 }, { "epoch": 0.22713976433257863, "grad_norm": 0.890997052192688, "learning_rate": 2.7045152905627622e-05, "loss": 0.103, "step": 10308 }, { "epoch": 0.2271617996220948, "grad_norm": 0.6534250378608704, "learning_rate": 2.7044514873118004e-05, "loss": 0.1172, "step": 10309 }, { "epoch": 0.22718383491161095, "grad_norm": 1.0300238132476807, "learning_rate": 2.704387677925921e-05, "loss": 0.0908, "step": 10310 }, { "epoch": 0.2272058702011271, "grad_norm": 0.7537365555763245, "learning_rate": 2.7043238624054495e-05, "loss": 0.1187, "step": 10311 }, { "epoch": 0.22722790549064326, "grad_norm": 0.7982177734375, "learning_rate": 2.704260040750711e-05, "loss": 0.1168, "step": 10312 }, { "epoch": 0.22724994078015942, "grad_norm": 1.0539788007736206, "learning_rate": 2.7041962129620302e-05, "loss": 0.1345, "step": 10313 }, { "epoch": 0.22727197606967559, "grad_norm": 0.8294234871864319, "learning_rate": 2.704132379039732e-05, "loss": 0.1177, "step": 10314 }, { "epoch": 0.22729401135919175, "grad_norm": 1.0263874530792236, "learning_rate": 2.704068538984142e-05, "loss": 0.136, "step": 10315 }, { "epoch": 0.22731604664870791, "grad_norm": 0.7302790284156799, "learning_rate": 2.7040046927955854e-05, "loss": 0.1003, "step": 10316 }, { "epoch": 0.22733808193822408, "grad_norm": 0.5742692947387695, "learning_rate": 2.7039408404743878e-05, "loss": 0.1338, "step": 10317 }, { "epoch": 0.22736011722774022, "grad_norm": 0.8104633092880249, "learning_rate": 2.7038769820208733e-05, "loss": 0.1351, "step": 10318 }, { "epoch": 0.22738215251725638, "grad_norm": 0.7665953636169434, "learning_rate": 2.7038131174353678e-05, "loss": 0.1025, "step": 10319 }, { "epoch": 0.22740418780677255, "grad_norm": 0.7151505351066589, "learning_rate": 2.7037492467181962e-05, "loss": 0.1045, "step": 10320 }, { "epoch": 0.2274262230962887, "grad_norm": 0.8889245390892029, "learning_rate": 2.7036853698696848e-05, "loss": 0.1305, "step": 10321 }, { "epoch": 0.22744825838580487, "grad_norm": 0.8834128975868225, "learning_rate": 2.7036214868901578e-05, "loss": 0.0998, "step": 10322 }, { "epoch": 0.22747029367532104, "grad_norm": 0.8620421886444092, "learning_rate": 2.7035575977799416e-05, "loss": 0.1186, "step": 10323 }, { "epoch": 0.22749232896483718, "grad_norm": 0.8619876503944397, "learning_rate": 2.7034937025393613e-05, "loss": 0.0629, "step": 10324 }, { "epoch": 0.22751436425435334, "grad_norm": 0.671180009841919, "learning_rate": 2.7034298011687416e-05, "loss": 0.1022, "step": 10325 }, { "epoch": 0.2275363995438695, "grad_norm": 1.259181261062622, "learning_rate": 2.703365893668409e-05, "loss": 0.1264, "step": 10326 }, { "epoch": 0.22755843483338567, "grad_norm": 1.074053168296814, "learning_rate": 2.7033019800386886e-05, "loss": 0.1045, "step": 10327 }, { "epoch": 0.22758047012290183, "grad_norm": 0.9536052942276001, "learning_rate": 2.7032380602799058e-05, "loss": 0.0868, "step": 10328 }, { "epoch": 0.227602505412418, "grad_norm": 1.037571668624878, "learning_rate": 2.7031741343923864e-05, "loss": 0.1224, "step": 10329 }, { "epoch": 0.22762454070193414, "grad_norm": 0.7227060794830322, "learning_rate": 2.7031102023764558e-05, "loss": 0.0911, "step": 10330 }, { "epoch": 0.2276465759914503, "grad_norm": 0.8929519057273865, "learning_rate": 2.7030462642324397e-05, "loss": 0.1084, "step": 10331 }, { "epoch": 0.22766861128096647, "grad_norm": 0.7365241646766663, "learning_rate": 2.7029823199606644e-05, "loss": 0.1042, "step": 10332 }, { "epoch": 0.22769064657048263, "grad_norm": 0.7072571516036987, "learning_rate": 2.7029183695614548e-05, "loss": 0.0844, "step": 10333 }, { "epoch": 0.2277126818599988, "grad_norm": 1.044795036315918, "learning_rate": 2.7028544130351365e-05, "loss": 0.125, "step": 10334 }, { "epoch": 0.22773471714951496, "grad_norm": 1.3392913341522217, "learning_rate": 2.702790450382036e-05, "loss": 0.1691, "step": 10335 }, { "epoch": 0.2277567524390311, "grad_norm": 0.7652268409729004, "learning_rate": 2.702726481602479e-05, "loss": 0.0963, "step": 10336 }, { "epoch": 0.22777878772854726, "grad_norm": 0.7364884614944458, "learning_rate": 2.7026625066967907e-05, "loss": 0.1011, "step": 10337 }, { "epoch": 0.22780082301806343, "grad_norm": 0.849291205406189, "learning_rate": 2.7025985256652973e-05, "loss": 0.1117, "step": 10338 }, { "epoch": 0.2278228583075796, "grad_norm": 1.0637351274490356, "learning_rate": 2.7025345385083248e-05, "loss": 0.1066, "step": 10339 }, { "epoch": 0.22784489359709575, "grad_norm": 0.6999943256378174, "learning_rate": 2.7024705452261993e-05, "loss": 0.081, "step": 10340 }, { "epoch": 0.22786692888661192, "grad_norm": 1.0828752517700195, "learning_rate": 2.7024065458192463e-05, "loss": 0.1313, "step": 10341 }, { "epoch": 0.22788896417612806, "grad_norm": 1.2412959337234497, "learning_rate": 2.702342540287792e-05, "loss": 0.0783, "step": 10342 }, { "epoch": 0.22791099946564422, "grad_norm": 0.989596426486969, "learning_rate": 2.7022785286321623e-05, "loss": 0.0983, "step": 10343 }, { "epoch": 0.22793303475516039, "grad_norm": 0.810224175453186, "learning_rate": 2.702214510852683e-05, "loss": 0.1149, "step": 10344 }, { "epoch": 0.22795507004467655, "grad_norm": 0.7425218224525452, "learning_rate": 2.7021504869496813e-05, "loss": 0.1211, "step": 10345 }, { "epoch": 0.22797710533419271, "grad_norm": 0.8291780948638916, "learning_rate": 2.702086456923482e-05, "loss": 0.1096, "step": 10346 }, { "epoch": 0.22799914062370888, "grad_norm": 0.798744261264801, "learning_rate": 2.702022420774412e-05, "loss": 0.0639, "step": 10347 }, { "epoch": 0.22802117591322504, "grad_norm": 0.9068384766578674, "learning_rate": 2.701958378502797e-05, "loss": 0.108, "step": 10348 }, { "epoch": 0.22804321120274118, "grad_norm": 0.8416250348091125, "learning_rate": 2.7018943301089636e-05, "loss": 0.0995, "step": 10349 }, { "epoch": 0.22806524649225735, "grad_norm": 0.926177978515625, "learning_rate": 2.701830275593238e-05, "loss": 0.0651, "step": 10350 }, { "epoch": 0.2280872817817735, "grad_norm": 0.7684730887413025, "learning_rate": 2.701766214955946e-05, "loss": 0.1262, "step": 10351 }, { "epoch": 0.22810931707128967, "grad_norm": 1.0442053079605103, "learning_rate": 2.7017021481974147e-05, "loss": 0.0971, "step": 10352 }, { "epoch": 0.22813135236080584, "grad_norm": 0.6461008191108704, "learning_rate": 2.7016380753179694e-05, "loss": 0.0747, "step": 10353 }, { "epoch": 0.228153387650322, "grad_norm": 1.0277992486953735, "learning_rate": 2.7015739963179372e-05, "loss": 0.1094, "step": 10354 }, { "epoch": 0.22817542293983814, "grad_norm": 0.909326434135437, "learning_rate": 2.7015099111976443e-05, "loss": 0.1079, "step": 10355 }, { "epoch": 0.2281974582293543, "grad_norm": 0.5404334664344788, "learning_rate": 2.701445819957417e-05, "loss": 0.08, "step": 10356 }, { "epoch": 0.22821949351887047, "grad_norm": 0.9989080429077148, "learning_rate": 2.7013817225975826e-05, "loss": 0.077, "step": 10357 }, { "epoch": 0.22824152880838663, "grad_norm": 2.025747537612915, "learning_rate": 2.7013176191184662e-05, "loss": 0.1183, "step": 10358 }, { "epoch": 0.2282635640979028, "grad_norm": 0.7212291359901428, "learning_rate": 2.701253509520395e-05, "loss": 0.0931, "step": 10359 }, { "epoch": 0.22828559938741896, "grad_norm": 0.8245453238487244, "learning_rate": 2.7011893938036956e-05, "loss": 0.1133, "step": 10360 }, { "epoch": 0.2283076346769351, "grad_norm": 0.6517316699028015, "learning_rate": 2.7011252719686946e-05, "loss": 0.1358, "step": 10361 }, { "epoch": 0.22832966996645127, "grad_norm": 0.7378641963005066, "learning_rate": 2.7010611440157183e-05, "loss": 0.0941, "step": 10362 }, { "epoch": 0.22835170525596743, "grad_norm": 0.6646902561187744, "learning_rate": 2.7009970099450936e-05, "loss": 0.0727, "step": 10363 }, { "epoch": 0.2283737405454836, "grad_norm": 0.7927955389022827, "learning_rate": 2.7009328697571472e-05, "loss": 0.1078, "step": 10364 }, { "epoch": 0.22839577583499976, "grad_norm": 1.0168769359588623, "learning_rate": 2.7008687234522056e-05, "loss": 0.0854, "step": 10365 }, { "epoch": 0.22841781112451592, "grad_norm": 0.741169273853302, "learning_rate": 2.700804571030596e-05, "loss": 0.0896, "step": 10366 }, { "epoch": 0.22843984641403206, "grad_norm": 0.35212022066116333, "learning_rate": 2.7007404124926442e-05, "loss": 0.1029, "step": 10367 }, { "epoch": 0.22846188170354823, "grad_norm": 0.6774041652679443, "learning_rate": 2.700676247838678e-05, "loss": 0.0979, "step": 10368 }, { "epoch": 0.2284839169930644, "grad_norm": 1.345170259475708, "learning_rate": 2.7006120770690237e-05, "loss": 0.0979, "step": 10369 }, { "epoch": 0.22850595228258055, "grad_norm": 0.6368944048881531, "learning_rate": 2.7005479001840084e-05, "loss": 0.1303, "step": 10370 }, { "epoch": 0.22852798757209672, "grad_norm": 0.7621022462844849, "learning_rate": 2.7004837171839584e-05, "loss": 0.0748, "step": 10371 }, { "epoch": 0.22855002286161288, "grad_norm": 0.6642983555793762, "learning_rate": 2.7004195280692015e-05, "loss": 0.0889, "step": 10372 }, { "epoch": 0.22857205815112902, "grad_norm": 0.7834219932556152, "learning_rate": 2.700355332840064e-05, "loss": 0.0837, "step": 10373 }, { "epoch": 0.22859409344064519, "grad_norm": 1.1526519060134888, "learning_rate": 2.7002911314968733e-05, "loss": 0.1261, "step": 10374 }, { "epoch": 0.22861612873016135, "grad_norm": 1.1826965808868408, "learning_rate": 2.700226924039956e-05, "loss": 0.1085, "step": 10375 }, { "epoch": 0.22863816401967751, "grad_norm": 0.8337389230728149, "learning_rate": 2.7001627104696396e-05, "loss": 0.1049, "step": 10376 }, { "epoch": 0.22866019930919368, "grad_norm": 0.9047576785087585, "learning_rate": 2.7000984907862507e-05, "loss": 0.117, "step": 10377 }, { "epoch": 0.22868223459870984, "grad_norm": 0.89473956823349, "learning_rate": 2.700034264990117e-05, "loss": 0.0702, "step": 10378 }, { "epoch": 0.22870426988822598, "grad_norm": 0.8796499967575073, "learning_rate": 2.699970033081565e-05, "loss": 0.1467, "step": 10379 }, { "epoch": 0.22872630517774215, "grad_norm": 1.0405981540679932, "learning_rate": 2.6999057950609223e-05, "loss": 0.1208, "step": 10380 }, { "epoch": 0.2287483404672583, "grad_norm": 0.45229655504226685, "learning_rate": 2.6998415509285156e-05, "loss": 0.0773, "step": 10381 }, { "epoch": 0.22877037575677447, "grad_norm": 0.5300114750862122, "learning_rate": 2.699777300684673e-05, "loss": 0.082, "step": 10382 }, { "epoch": 0.22879241104629064, "grad_norm": 1.1195052862167358, "learning_rate": 2.699713044329721e-05, "loss": 0.1056, "step": 10383 }, { "epoch": 0.2288144463358068, "grad_norm": 0.9699375629425049, "learning_rate": 2.699648781863987e-05, "loss": 0.1051, "step": 10384 }, { "epoch": 0.22883648162532297, "grad_norm": 1.147188663482666, "learning_rate": 2.6995845132877985e-05, "loss": 0.1209, "step": 10385 }, { "epoch": 0.2288585169148391, "grad_norm": 0.8413276076316833, "learning_rate": 2.699520238601483e-05, "loss": 0.0876, "step": 10386 }, { "epoch": 0.22888055220435527, "grad_norm": 0.5336809158325195, "learning_rate": 2.6994559578053673e-05, "loss": 0.0865, "step": 10387 }, { "epoch": 0.22890258749387143, "grad_norm": 0.9362546801567078, "learning_rate": 2.6993916708997798e-05, "loss": 0.097, "step": 10388 }, { "epoch": 0.2289246227833876, "grad_norm": 0.7852234840393066, "learning_rate": 2.699327377885047e-05, "loss": 0.1349, "step": 10389 }, { "epoch": 0.22894665807290376, "grad_norm": 0.8075113296508789, "learning_rate": 2.6992630787614963e-05, "loss": 0.0999, "step": 10390 }, { "epoch": 0.22896869336241993, "grad_norm": 0.8467054963111877, "learning_rate": 2.6991987735294563e-05, "loss": 0.083, "step": 10391 }, { "epoch": 0.22899072865193607, "grad_norm": 0.9637097120285034, "learning_rate": 2.6991344621892536e-05, "loss": 0.1076, "step": 10392 }, { "epoch": 0.22901276394145223, "grad_norm": 0.848686933517456, "learning_rate": 2.699070144741216e-05, "loss": 0.0863, "step": 10393 }, { "epoch": 0.2290347992309684, "grad_norm": 0.73139888048172, "learning_rate": 2.6990058211856716e-05, "loss": 0.1247, "step": 10394 }, { "epoch": 0.22905683452048456, "grad_norm": 0.8170140385627747, "learning_rate": 2.6989414915229472e-05, "loss": 0.0931, "step": 10395 }, { "epoch": 0.22907886981000072, "grad_norm": 0.8238321542739868, "learning_rate": 2.698877155753371e-05, "loss": 0.1085, "step": 10396 }, { "epoch": 0.2291009050995169, "grad_norm": 0.8967533111572266, "learning_rate": 2.6988128138772704e-05, "loss": 0.1205, "step": 10397 }, { "epoch": 0.22912294038903303, "grad_norm": 0.5123867392539978, "learning_rate": 2.6987484658949734e-05, "loss": 0.0839, "step": 10398 }, { "epoch": 0.2291449756785492, "grad_norm": 0.7689908146858215, "learning_rate": 2.6986841118068074e-05, "loss": 0.1524, "step": 10399 }, { "epoch": 0.22916701096806535, "grad_norm": 0.8648446798324585, "learning_rate": 2.698619751613101e-05, "loss": 0.1009, "step": 10400 }, { "epoch": 0.22918904625758152, "grad_norm": 0.5227159261703491, "learning_rate": 2.698555385314181e-05, "loss": 0.0677, "step": 10401 }, { "epoch": 0.22921108154709768, "grad_norm": 0.6963269710540771, "learning_rate": 2.698491012910376e-05, "loss": 0.1023, "step": 10402 }, { "epoch": 0.22923311683661385, "grad_norm": 0.9175900220870972, "learning_rate": 2.6984266344020133e-05, "loss": 0.1231, "step": 10403 }, { "epoch": 0.22925515212612999, "grad_norm": 1.105217456817627, "learning_rate": 2.698362249789421e-05, "loss": 0.0774, "step": 10404 }, { "epoch": 0.22927718741564615, "grad_norm": 0.7159206867218018, "learning_rate": 2.6982978590729277e-05, "loss": 0.0971, "step": 10405 }, { "epoch": 0.22929922270516231, "grad_norm": 0.5723243355751038, "learning_rate": 2.6982334622528603e-05, "loss": 0.1085, "step": 10406 }, { "epoch": 0.22932125799467848, "grad_norm": 1.0755949020385742, "learning_rate": 2.6981690593295475e-05, "loss": 0.1436, "step": 10407 }, { "epoch": 0.22934329328419464, "grad_norm": 1.1230911016464233, "learning_rate": 2.6981046503033173e-05, "loss": 0.1244, "step": 10408 }, { "epoch": 0.2293653285737108, "grad_norm": 0.7834658026695251, "learning_rate": 2.6980402351744975e-05, "loss": 0.0993, "step": 10409 }, { "epoch": 0.22938736386322695, "grad_norm": 1.1123991012573242, "learning_rate": 2.6979758139434163e-05, "loss": 0.0883, "step": 10410 }, { "epoch": 0.2294093991527431, "grad_norm": 0.939657986164093, "learning_rate": 2.697911386610402e-05, "loss": 0.0825, "step": 10411 }, { "epoch": 0.22943143444225927, "grad_norm": 0.6529988646507263, "learning_rate": 2.6978469531757826e-05, "loss": 0.1304, "step": 10412 }, { "epoch": 0.22945346973177544, "grad_norm": 0.577881395816803, "learning_rate": 2.697782513639886e-05, "loss": 0.1381, "step": 10413 }, { "epoch": 0.2294755050212916, "grad_norm": 1.1652824878692627, "learning_rate": 2.6977180680030412e-05, "loss": 0.1179, "step": 10414 }, { "epoch": 0.22949754031080777, "grad_norm": 0.8728120923042297, "learning_rate": 2.697653616265576e-05, "loss": 0.1057, "step": 10415 }, { "epoch": 0.2295195756003239, "grad_norm": 0.6569268107414246, "learning_rate": 2.6975891584278186e-05, "loss": 0.1012, "step": 10416 }, { "epoch": 0.22954161088984007, "grad_norm": 0.5907149314880371, "learning_rate": 2.6975246944900975e-05, "loss": 0.1237, "step": 10417 }, { "epoch": 0.22956364617935623, "grad_norm": 0.7720644474029541, "learning_rate": 2.697460224452741e-05, "loss": 0.102, "step": 10418 }, { "epoch": 0.2295856814688724, "grad_norm": 0.9196961522102356, "learning_rate": 2.697395748316077e-05, "loss": 0.0913, "step": 10419 }, { "epoch": 0.22960771675838856, "grad_norm": 0.6028608679771423, "learning_rate": 2.697331266080435e-05, "loss": 0.0851, "step": 10420 }, { "epoch": 0.22962975204790473, "grad_norm": 0.5836058259010315, "learning_rate": 2.697266777746142e-05, "loss": 0.0966, "step": 10421 }, { "epoch": 0.2296517873374209, "grad_norm": 1.464144229888916, "learning_rate": 2.6972022833135277e-05, "loss": 0.0989, "step": 10422 }, { "epoch": 0.22967382262693703, "grad_norm": 1.3832226991653442, "learning_rate": 2.6971377827829204e-05, "loss": 0.141, "step": 10423 }, { "epoch": 0.2296958579164532, "grad_norm": 0.8594551682472229, "learning_rate": 2.6970732761546482e-05, "loss": 0.1622, "step": 10424 }, { "epoch": 0.22971789320596936, "grad_norm": 1.1686408519744873, "learning_rate": 2.69700876342904e-05, "loss": 0.1041, "step": 10425 }, { "epoch": 0.22973992849548552, "grad_norm": 0.7502267956733704, "learning_rate": 2.696944244606424e-05, "loss": 0.0986, "step": 10426 }, { "epoch": 0.2297619637850017, "grad_norm": 0.6548405289649963, "learning_rate": 2.6968797196871298e-05, "loss": 0.0903, "step": 10427 }, { "epoch": 0.22978399907451785, "grad_norm": 0.9059584736824036, "learning_rate": 2.6968151886714848e-05, "loss": 0.0937, "step": 10428 }, { "epoch": 0.229806034364034, "grad_norm": 0.578377902507782, "learning_rate": 2.6967506515598183e-05, "loss": 0.115, "step": 10429 }, { "epoch": 0.22982806965355015, "grad_norm": 1.2000482082366943, "learning_rate": 2.6966861083524592e-05, "loss": 0.1158, "step": 10430 }, { "epoch": 0.22985010494306632, "grad_norm": 0.8348286747932434, "learning_rate": 2.6966215590497358e-05, "loss": 0.0917, "step": 10431 }, { "epoch": 0.22987214023258248, "grad_norm": 0.8095489740371704, "learning_rate": 2.6965570036519767e-05, "loss": 0.0869, "step": 10432 }, { "epoch": 0.22989417552209865, "grad_norm": 0.6085199117660522, "learning_rate": 2.696492442159512e-05, "loss": 0.0936, "step": 10433 }, { "epoch": 0.2299162108116148, "grad_norm": 1.05519437789917, "learning_rate": 2.6964278745726694e-05, "loss": 0.111, "step": 10434 }, { "epoch": 0.22993824610113095, "grad_norm": 0.944007933139801, "learning_rate": 2.6963633008917777e-05, "loss": 0.1171, "step": 10435 }, { "epoch": 0.22996028139064711, "grad_norm": 0.9984035491943359, "learning_rate": 2.6962987211171667e-05, "loss": 0.0828, "step": 10436 }, { "epoch": 0.22998231668016328, "grad_norm": 0.8015859723091125, "learning_rate": 2.6962341352491646e-05, "loss": 0.0774, "step": 10437 }, { "epoch": 0.23000435196967944, "grad_norm": 0.8372088074684143, "learning_rate": 2.6961695432881005e-05, "loss": 0.1056, "step": 10438 }, { "epoch": 0.2300263872591956, "grad_norm": 0.7496451735496521, "learning_rate": 2.6961049452343037e-05, "loss": 0.0955, "step": 10439 }, { "epoch": 0.23004842254871177, "grad_norm": 1.0023046731948853, "learning_rate": 2.6960403410881027e-05, "loss": 0.1196, "step": 10440 }, { "epoch": 0.2300704578382279, "grad_norm": 1.0439223051071167, "learning_rate": 2.6959757308498267e-05, "loss": 0.0979, "step": 10441 }, { "epoch": 0.23009249312774407, "grad_norm": 1.0810469388961792, "learning_rate": 2.6959111145198053e-05, "loss": 0.095, "step": 10442 }, { "epoch": 0.23011452841726024, "grad_norm": 0.5603166818618774, "learning_rate": 2.6958464920983675e-05, "loss": 0.1087, "step": 10443 }, { "epoch": 0.2301365637067764, "grad_norm": 0.6468489766120911, "learning_rate": 2.695781863585842e-05, "loss": 0.1085, "step": 10444 }, { "epoch": 0.23015859899629257, "grad_norm": 0.6883202195167542, "learning_rate": 2.695717228982558e-05, "loss": 0.1252, "step": 10445 }, { "epoch": 0.23018063428580873, "grad_norm": 0.7964684963226318, "learning_rate": 2.6956525882888452e-05, "loss": 0.0889, "step": 10446 }, { "epoch": 0.23020266957532487, "grad_norm": 1.037009835243225, "learning_rate": 2.6955879415050328e-05, "loss": 0.1014, "step": 10447 }, { "epoch": 0.23022470486484103, "grad_norm": 0.842102587223053, "learning_rate": 2.6955232886314496e-05, "loss": 0.1362, "step": 10448 }, { "epoch": 0.2302467401543572, "grad_norm": 0.9447268843650818, "learning_rate": 2.6954586296684253e-05, "loss": 0.104, "step": 10449 }, { "epoch": 0.23026877544387336, "grad_norm": 0.7437647581100464, "learning_rate": 2.695393964616289e-05, "loss": 0.0957, "step": 10450 }, { "epoch": 0.23029081073338953, "grad_norm": 0.9859029650688171, "learning_rate": 2.6953292934753705e-05, "loss": 0.0883, "step": 10451 }, { "epoch": 0.2303128460229057, "grad_norm": 0.9339360594749451, "learning_rate": 2.6952646162459987e-05, "loss": 0.0789, "step": 10452 }, { "epoch": 0.23033488131242183, "grad_norm": 0.8862185478210449, "learning_rate": 2.6951999329285033e-05, "loss": 0.1101, "step": 10453 }, { "epoch": 0.230356916601938, "grad_norm": 0.9485715627670288, "learning_rate": 2.695135243523214e-05, "loss": 0.1366, "step": 10454 }, { "epoch": 0.23037895189145416, "grad_norm": 0.7563159465789795, "learning_rate": 2.6950705480304598e-05, "loss": 0.0923, "step": 10455 }, { "epoch": 0.23040098718097032, "grad_norm": 1.07610285282135, "learning_rate": 2.6950058464505702e-05, "loss": 0.12, "step": 10456 }, { "epoch": 0.2304230224704865, "grad_norm": 0.706142246723175, "learning_rate": 2.694941138783875e-05, "loss": 0.1052, "step": 10457 }, { "epoch": 0.23044505776000265, "grad_norm": 0.5937598347663879, "learning_rate": 2.6948764250307042e-05, "loss": 0.1265, "step": 10458 }, { "epoch": 0.23046709304951882, "grad_norm": 0.8833543658256531, "learning_rate": 2.6948117051913874e-05, "loss": 0.1051, "step": 10459 }, { "epoch": 0.23048912833903495, "grad_norm": 0.8778257369995117, "learning_rate": 2.694746979266253e-05, "loss": 0.0958, "step": 10460 }, { "epoch": 0.23051116362855112, "grad_norm": 1.3744752407073975, "learning_rate": 2.6946822472556318e-05, "loss": 0.0977, "step": 10461 }, { "epoch": 0.23053319891806728, "grad_norm": 0.860678493976593, "learning_rate": 2.6946175091598537e-05, "loss": 0.1526, "step": 10462 }, { "epoch": 0.23055523420758345, "grad_norm": 0.6480454206466675, "learning_rate": 2.6945527649792478e-05, "loss": 0.0839, "step": 10463 }, { "epoch": 0.2305772694970996, "grad_norm": 1.0571517944335938, "learning_rate": 2.694488014714144e-05, "loss": 0.0808, "step": 10464 }, { "epoch": 0.23059930478661578, "grad_norm": 1.1165696382522583, "learning_rate": 2.694423258364872e-05, "loss": 0.1213, "step": 10465 }, { "epoch": 0.23062134007613191, "grad_norm": 0.9144888520240784, "learning_rate": 2.694358495931762e-05, "loss": 0.1328, "step": 10466 }, { "epoch": 0.23064337536564808, "grad_norm": 0.9083656668663025, "learning_rate": 2.694293727415144e-05, "loss": 0.1396, "step": 10467 }, { "epoch": 0.23066541065516424, "grad_norm": 0.9120715260505676, "learning_rate": 2.6942289528153474e-05, "loss": 0.1335, "step": 10468 }, { "epoch": 0.2306874459446804, "grad_norm": 0.9973872900009155, "learning_rate": 2.6941641721327022e-05, "loss": 0.1227, "step": 10469 }, { "epoch": 0.23070948123419657, "grad_norm": 0.7810676097869873, "learning_rate": 2.6940993853675386e-05, "loss": 0.113, "step": 10470 }, { "epoch": 0.23073151652371274, "grad_norm": 0.7381459474563599, "learning_rate": 2.6940345925201867e-05, "loss": 0.0931, "step": 10471 }, { "epoch": 0.23075355181322887, "grad_norm": 1.0315476655960083, "learning_rate": 2.693969793590976e-05, "loss": 0.0617, "step": 10472 }, { "epoch": 0.23077558710274504, "grad_norm": 0.7837905287742615, "learning_rate": 2.6939049885802372e-05, "loss": 0.1018, "step": 10473 }, { "epoch": 0.2307976223922612, "grad_norm": 0.8686456084251404, "learning_rate": 2.6938401774882998e-05, "loss": 0.1206, "step": 10474 }, { "epoch": 0.23081965768177737, "grad_norm": 1.2245213985443115, "learning_rate": 2.6937753603154943e-05, "loss": 0.1038, "step": 10475 }, { "epoch": 0.23084169297129353, "grad_norm": 0.6726422309875488, "learning_rate": 2.6937105370621506e-05, "loss": 0.1138, "step": 10476 }, { "epoch": 0.2308637282608097, "grad_norm": 0.9908841252326965, "learning_rate": 2.693645707728599e-05, "loss": 0.1317, "step": 10477 }, { "epoch": 0.23088576355032583, "grad_norm": 1.0202690362930298, "learning_rate": 2.6935808723151705e-05, "loss": 0.1213, "step": 10478 }, { "epoch": 0.230907798839842, "grad_norm": 0.8991684913635254, "learning_rate": 2.6935160308221937e-05, "loss": 0.0931, "step": 10479 }, { "epoch": 0.23092983412935816, "grad_norm": 1.5767625570297241, "learning_rate": 2.69345118325e-05, "loss": 0.1388, "step": 10480 }, { "epoch": 0.23095186941887433, "grad_norm": 0.7006779313087463, "learning_rate": 2.6933863295989194e-05, "loss": 0.0843, "step": 10481 }, { "epoch": 0.2309739047083905, "grad_norm": 1.0994302034378052, "learning_rate": 2.6933214698692823e-05, "loss": 0.1419, "step": 10482 }, { "epoch": 0.23099593999790666, "grad_norm": 0.6490026116371155, "learning_rate": 2.6932566040614194e-05, "loss": 0.1285, "step": 10483 }, { "epoch": 0.2310179752874228, "grad_norm": 0.8039844632148743, "learning_rate": 2.69319173217566e-05, "loss": 0.1165, "step": 10484 }, { "epoch": 0.23104001057693896, "grad_norm": 1.018937349319458, "learning_rate": 2.6931268542123356e-05, "loss": 0.0823, "step": 10485 }, { "epoch": 0.23106204586645512, "grad_norm": 0.9564973711967468, "learning_rate": 2.693061970171777e-05, "loss": 0.1542, "step": 10486 }, { "epoch": 0.2310840811559713, "grad_norm": 0.9962475299835205, "learning_rate": 2.6929970800543133e-05, "loss": 0.1238, "step": 10487 }, { "epoch": 0.23110611644548745, "grad_norm": 1.3179676532745361, "learning_rate": 2.6929321838602758e-05, "loss": 0.1356, "step": 10488 }, { "epoch": 0.23112815173500362, "grad_norm": 0.6826973557472229, "learning_rate": 2.692867281589995e-05, "loss": 0.0733, "step": 10489 }, { "epoch": 0.23115018702451978, "grad_norm": 0.8056627511978149, "learning_rate": 2.6928023732438016e-05, "loss": 0.1467, "step": 10490 }, { "epoch": 0.23117222231403592, "grad_norm": 0.7751525044441223, "learning_rate": 2.6927374588220263e-05, "loss": 0.086, "step": 10491 }, { "epoch": 0.23119425760355208, "grad_norm": 0.8570393919944763, "learning_rate": 2.6926725383249987e-05, "loss": 0.1014, "step": 10492 }, { "epoch": 0.23121629289306825, "grad_norm": 0.6889019012451172, "learning_rate": 2.692607611753051e-05, "loss": 0.0818, "step": 10493 }, { "epoch": 0.2312383281825844, "grad_norm": 1.0450338125228882, "learning_rate": 2.692542679106513e-05, "loss": 0.1094, "step": 10494 }, { "epoch": 0.23126036347210058, "grad_norm": 1.0571506023406982, "learning_rate": 2.6924777403857155e-05, "loss": 0.1332, "step": 10495 }, { "epoch": 0.23128239876161674, "grad_norm": 0.9658380150794983, "learning_rate": 2.6924127955909897e-05, "loss": 0.0691, "step": 10496 }, { "epoch": 0.23130443405113288, "grad_norm": 2.2010393142700195, "learning_rate": 2.692347844722666e-05, "loss": 0.0687, "step": 10497 }, { "epoch": 0.23132646934064904, "grad_norm": 0.5339142084121704, "learning_rate": 2.6922828877810756e-05, "loss": 0.0911, "step": 10498 }, { "epoch": 0.2313485046301652, "grad_norm": 0.8367758393287659, "learning_rate": 2.6922179247665485e-05, "loss": 0.1023, "step": 10499 }, { "epoch": 0.23137053991968137, "grad_norm": 0.6434641480445862, "learning_rate": 2.6921529556794166e-05, "loss": 0.11, "step": 10500 }, { "epoch": 0.23139257520919754, "grad_norm": 0.9434415102005005, "learning_rate": 2.6920879805200105e-05, "loss": 0.1247, "step": 10501 }, { "epoch": 0.2314146104987137, "grad_norm": 1.0592375993728638, "learning_rate": 2.692022999288661e-05, "loss": 0.1297, "step": 10502 }, { "epoch": 0.23143664578822984, "grad_norm": 0.7502210736274719, "learning_rate": 2.6919580119856986e-05, "loss": 0.1397, "step": 10503 }, { "epoch": 0.231458681077746, "grad_norm": 0.8396193385124207, "learning_rate": 2.6918930186114554e-05, "loss": 0.1357, "step": 10504 }, { "epoch": 0.23148071636726217, "grad_norm": 0.95634526014328, "learning_rate": 2.691828019166262e-05, "loss": 0.1231, "step": 10505 }, { "epoch": 0.23150275165677833, "grad_norm": 0.856609582901001, "learning_rate": 2.6917630136504492e-05, "loss": 0.0977, "step": 10506 }, { "epoch": 0.2315247869462945, "grad_norm": 1.037764310836792, "learning_rate": 2.6916980020643486e-05, "loss": 0.0783, "step": 10507 }, { "epoch": 0.23154682223581066, "grad_norm": 1.080040693283081, "learning_rate": 2.6916329844082905e-05, "loss": 0.0975, "step": 10508 }, { "epoch": 0.2315688575253268, "grad_norm": 0.6867284178733826, "learning_rate": 2.691567960682607e-05, "loss": 0.1087, "step": 10509 }, { "epoch": 0.23159089281484296, "grad_norm": 0.8238505125045776, "learning_rate": 2.6915029308876285e-05, "loss": 0.1295, "step": 10510 }, { "epoch": 0.23161292810435913, "grad_norm": 0.9425132274627686, "learning_rate": 2.6914378950236872e-05, "loss": 0.0943, "step": 10511 }, { "epoch": 0.2316349633938753, "grad_norm": 0.6422354578971863, "learning_rate": 2.6913728530911133e-05, "loss": 0.0819, "step": 10512 }, { "epoch": 0.23165699868339146, "grad_norm": 0.9750787615776062, "learning_rate": 2.691307805090239e-05, "loss": 0.0722, "step": 10513 }, { "epoch": 0.23167903397290762, "grad_norm": 0.5594758987426758, "learning_rate": 2.6912427510213952e-05, "loss": 0.079, "step": 10514 }, { "epoch": 0.23170106926242376, "grad_norm": 0.8159303069114685, "learning_rate": 2.6911776908849127e-05, "loss": 0.0972, "step": 10515 }, { "epoch": 0.23172310455193992, "grad_norm": 1.1221511363983154, "learning_rate": 2.691112624681124e-05, "loss": 0.1127, "step": 10516 }, { "epoch": 0.2317451398414561, "grad_norm": 0.9708904027938843, "learning_rate": 2.69104755241036e-05, "loss": 0.0593, "step": 10517 }, { "epoch": 0.23176717513097225, "grad_norm": 1.0211267471313477, "learning_rate": 2.690982474072952e-05, "loss": 0.1058, "step": 10518 }, { "epoch": 0.23178921042048842, "grad_norm": 1.3152161836624146, "learning_rate": 2.6909173896692313e-05, "loss": 0.1353, "step": 10519 }, { "epoch": 0.23181124571000458, "grad_norm": 1.1648303270339966, "learning_rate": 2.69085229919953e-05, "loss": 0.0839, "step": 10520 }, { "epoch": 0.23183328099952072, "grad_norm": 0.911554753780365, "learning_rate": 2.6907872026641792e-05, "loss": 0.09, "step": 10521 }, { "epoch": 0.23185531628903688, "grad_norm": 1.2646372318267822, "learning_rate": 2.6907221000635108e-05, "loss": 0.1359, "step": 10522 }, { "epoch": 0.23187735157855305, "grad_norm": 1.3111988306045532, "learning_rate": 2.6906569913978565e-05, "loss": 0.1334, "step": 10523 }, { "epoch": 0.2318993868680692, "grad_norm": 0.9185410737991333, "learning_rate": 2.6905918766675472e-05, "loss": 0.1049, "step": 10524 }, { "epoch": 0.23192142215758538, "grad_norm": 1.01088547706604, "learning_rate": 2.690526755872915e-05, "loss": 0.1136, "step": 10525 }, { "epoch": 0.23194345744710154, "grad_norm": 0.49696049094200134, "learning_rate": 2.690461629014292e-05, "loss": 0.0765, "step": 10526 }, { "epoch": 0.2319654927366177, "grad_norm": 1.092236042022705, "learning_rate": 2.690396496092009e-05, "loss": 0.1344, "step": 10527 }, { "epoch": 0.23198752802613384, "grad_norm": 1.46549654006958, "learning_rate": 2.690331357106399e-05, "loss": 0.119, "step": 10528 }, { "epoch": 0.23200956331565, "grad_norm": 1.0314947366714478, "learning_rate": 2.6902662120577927e-05, "loss": 0.1028, "step": 10529 }, { "epoch": 0.23203159860516617, "grad_norm": 1.1034209728240967, "learning_rate": 2.6902010609465222e-05, "loss": 0.0867, "step": 10530 }, { "epoch": 0.23205363389468234, "grad_norm": 0.4863007366657257, "learning_rate": 2.6901359037729196e-05, "loss": 0.0733, "step": 10531 }, { "epoch": 0.2320756691841985, "grad_norm": 0.842776358127594, "learning_rate": 2.6900707405373167e-05, "loss": 0.0913, "step": 10532 }, { "epoch": 0.23209770447371467, "grad_norm": 0.8937697410583496, "learning_rate": 2.6900055712400456e-05, "loss": 0.0962, "step": 10533 }, { "epoch": 0.2321197397632308, "grad_norm": 1.0068367719650269, "learning_rate": 2.6899403958814374e-05, "loss": 0.0984, "step": 10534 }, { "epoch": 0.23214177505274697, "grad_norm": 1.4770219326019287, "learning_rate": 2.689875214461825e-05, "loss": 0.1364, "step": 10535 }, { "epoch": 0.23216381034226313, "grad_norm": 1.7013804912567139, "learning_rate": 2.68981002698154e-05, "loss": 0.1134, "step": 10536 }, { "epoch": 0.2321858456317793, "grad_norm": 0.7874033451080322, "learning_rate": 2.689744833440915e-05, "loss": 0.1172, "step": 10537 }, { "epoch": 0.23220788092129546, "grad_norm": 0.8331979513168335, "learning_rate": 2.689679633840281e-05, "loss": 0.0961, "step": 10538 }, { "epoch": 0.23222991621081163, "grad_norm": 1.0605769157409668, "learning_rate": 2.6896144281799705e-05, "loss": 0.1412, "step": 10539 }, { "epoch": 0.23225195150032776, "grad_norm": 1.3901995420455933, "learning_rate": 2.6895492164603162e-05, "loss": 0.1417, "step": 10540 }, { "epoch": 0.23227398678984393, "grad_norm": 1.0885968208312988, "learning_rate": 2.6894839986816496e-05, "loss": 0.111, "step": 10541 }, { "epoch": 0.2322960220793601, "grad_norm": 0.7894212007522583, "learning_rate": 2.6894187748443034e-05, "loss": 0.1342, "step": 10542 }, { "epoch": 0.23231805736887626, "grad_norm": 0.9652902483940125, "learning_rate": 2.6893535449486092e-05, "loss": 0.1035, "step": 10543 }, { "epoch": 0.23234009265839242, "grad_norm": 0.8242504000663757, "learning_rate": 2.6892883089949e-05, "loss": 0.1144, "step": 10544 }, { "epoch": 0.2323621279479086, "grad_norm": 0.934527575969696, "learning_rate": 2.6892230669835076e-05, "loss": 0.1175, "step": 10545 }, { "epoch": 0.23238416323742472, "grad_norm": 0.44507765769958496, "learning_rate": 2.6891578189147643e-05, "loss": 0.1088, "step": 10546 }, { "epoch": 0.2324061985269409, "grad_norm": 1.0261924266815186, "learning_rate": 2.6890925647890028e-05, "loss": 0.1495, "step": 10547 }, { "epoch": 0.23242823381645705, "grad_norm": 1.2481918334960938, "learning_rate": 2.6890273046065546e-05, "loss": 0.1585, "step": 10548 }, { "epoch": 0.23245026910597322, "grad_norm": 1.2364158630371094, "learning_rate": 2.6889620383677535e-05, "loss": 0.1187, "step": 10549 }, { "epoch": 0.23247230439548938, "grad_norm": 0.5974692106246948, "learning_rate": 2.688896766072931e-05, "loss": 0.1021, "step": 10550 }, { "epoch": 0.23249433968500555, "grad_norm": 0.9661501049995422, "learning_rate": 2.6888314877224193e-05, "loss": 0.1182, "step": 10551 }, { "epoch": 0.23251637497452168, "grad_norm": 0.9595692753791809, "learning_rate": 2.6887662033165517e-05, "loss": 0.118, "step": 10552 }, { "epoch": 0.23253841026403785, "grad_norm": 0.9166494607925415, "learning_rate": 2.6887009128556603e-05, "loss": 0.1313, "step": 10553 }, { "epoch": 0.232560445553554, "grad_norm": 0.6908222436904907, "learning_rate": 2.6886356163400777e-05, "loss": 0.1136, "step": 10554 }, { "epoch": 0.23258248084307018, "grad_norm": 0.2474755495786667, "learning_rate": 2.688570313770136e-05, "loss": 0.0939, "step": 10555 }, { "epoch": 0.23260451613258634, "grad_norm": 0.7054906487464905, "learning_rate": 2.688505005146169e-05, "loss": 0.1091, "step": 10556 }, { "epoch": 0.2326265514221025, "grad_norm": 0.9730948805809021, "learning_rate": 2.6884396904685085e-05, "loss": 0.1116, "step": 10557 }, { "epoch": 0.23264858671161864, "grad_norm": 0.9407144784927368, "learning_rate": 2.6883743697374872e-05, "loss": 0.1036, "step": 10558 }, { "epoch": 0.2326706220011348, "grad_norm": 0.8181325197219849, "learning_rate": 2.6883090429534385e-05, "loss": 0.0878, "step": 10559 }, { "epoch": 0.23269265729065097, "grad_norm": 0.901100218296051, "learning_rate": 2.688243710116694e-05, "loss": 0.0989, "step": 10560 }, { "epoch": 0.23271469258016714, "grad_norm": 0.8794867992401123, "learning_rate": 2.6881783712275874e-05, "loss": 0.1208, "step": 10561 }, { "epoch": 0.2327367278696833, "grad_norm": 0.8103960752487183, "learning_rate": 2.6881130262864513e-05, "loss": 0.0951, "step": 10562 }, { "epoch": 0.23275876315919947, "grad_norm": 0.9468051791191101, "learning_rate": 2.688047675293618e-05, "loss": 0.0983, "step": 10563 }, { "epoch": 0.23278079844871563, "grad_norm": 1.175745964050293, "learning_rate": 2.6879823182494213e-05, "loss": 0.1507, "step": 10564 }, { "epoch": 0.23280283373823177, "grad_norm": 0.6130149960517883, "learning_rate": 2.6879169551541933e-05, "loss": 0.0572, "step": 10565 }, { "epoch": 0.23282486902774793, "grad_norm": 0.5665243864059448, "learning_rate": 2.6878515860082677e-05, "loss": 0.0786, "step": 10566 }, { "epoch": 0.2328469043172641, "grad_norm": 1.0063015222549438, "learning_rate": 2.6877862108119763e-05, "loss": 0.1125, "step": 10567 }, { "epoch": 0.23286893960678026, "grad_norm": 0.8186681866645813, "learning_rate": 2.6877208295656534e-05, "loss": 0.1076, "step": 10568 }, { "epoch": 0.23289097489629643, "grad_norm": 1.568628191947937, "learning_rate": 2.6876554422696312e-05, "loss": 0.1441, "step": 10569 }, { "epoch": 0.2329130101858126, "grad_norm": 2.92563533782959, "learning_rate": 2.6875900489242427e-05, "loss": 0.147, "step": 10570 }, { "epoch": 0.23293504547532873, "grad_norm": 0.7867268919944763, "learning_rate": 2.6875246495298215e-05, "loss": 0.1252, "step": 10571 }, { "epoch": 0.2329570807648449, "grad_norm": 0.5029639601707458, "learning_rate": 2.6874592440867e-05, "loss": 0.0861, "step": 10572 }, { "epoch": 0.23297911605436106, "grad_norm": 1.6157113313674927, "learning_rate": 2.6873938325952123e-05, "loss": 0.1245, "step": 10573 }, { "epoch": 0.23300115134387722, "grad_norm": 1.193426251411438, "learning_rate": 2.6873284150556913e-05, "loss": 0.1428, "step": 10574 }, { "epoch": 0.2330231866333934, "grad_norm": 1.0092161893844604, "learning_rate": 2.68726299146847e-05, "loss": 0.1136, "step": 10575 }, { "epoch": 0.23304522192290955, "grad_norm": 0.7686318755149841, "learning_rate": 2.687197561833881e-05, "loss": 0.103, "step": 10576 }, { "epoch": 0.2330672572124257, "grad_norm": 0.70069819688797, "learning_rate": 2.6871321261522583e-05, "loss": 0.0968, "step": 10577 }, { "epoch": 0.23308929250194185, "grad_norm": 0.8097330331802368, "learning_rate": 2.687066684423935e-05, "loss": 0.1747, "step": 10578 }, { "epoch": 0.23311132779145802, "grad_norm": 0.7986325025558472, "learning_rate": 2.6870012366492452e-05, "loss": 0.1005, "step": 10579 }, { "epoch": 0.23313336308097418, "grad_norm": 0.7479994893074036, "learning_rate": 2.686935782828521e-05, "loss": 0.1011, "step": 10580 }, { "epoch": 0.23315539837049035, "grad_norm": 0.9867478609085083, "learning_rate": 2.6868703229620966e-05, "loss": 0.1122, "step": 10581 }, { "epoch": 0.2331774336600065, "grad_norm": 1.1780656576156616, "learning_rate": 2.6868048570503052e-05, "loss": 0.1317, "step": 10582 }, { "epoch": 0.23319946894952265, "grad_norm": 0.9321885704994202, "learning_rate": 2.6867393850934805e-05, "loss": 0.1117, "step": 10583 }, { "epoch": 0.2332215042390388, "grad_norm": 0.7685291767120361, "learning_rate": 2.686673907091955e-05, "loss": 0.1028, "step": 10584 }, { "epoch": 0.23324353952855498, "grad_norm": 1.212493658065796, "learning_rate": 2.6866084230460636e-05, "loss": 0.1085, "step": 10585 }, { "epoch": 0.23326557481807114, "grad_norm": 0.8518898487091064, "learning_rate": 2.686542932956139e-05, "loss": 0.0805, "step": 10586 }, { "epoch": 0.2332876101075873, "grad_norm": 1.0590990781784058, "learning_rate": 2.6864774368225147e-05, "loss": 0.1003, "step": 10587 }, { "epoch": 0.23330964539710347, "grad_norm": 0.8995510935783386, "learning_rate": 2.686411934645525e-05, "loss": 0.1222, "step": 10588 }, { "epoch": 0.2333316806866196, "grad_norm": 0.7258545160293579, "learning_rate": 2.686346426425503e-05, "loss": 0.1017, "step": 10589 }, { "epoch": 0.23335371597613577, "grad_norm": 0.7766638994216919, "learning_rate": 2.6862809121627823e-05, "loss": 0.1127, "step": 10590 }, { "epoch": 0.23337575126565194, "grad_norm": 0.6793697476387024, "learning_rate": 2.6862153918576972e-05, "loss": 0.116, "step": 10591 }, { "epoch": 0.2333977865551681, "grad_norm": 0.9632986187934875, "learning_rate": 2.6861498655105803e-05, "loss": 0.1207, "step": 10592 }, { "epoch": 0.23341982184468427, "grad_norm": 0.8310210108757019, "learning_rate": 2.6860843331217665e-05, "loss": 0.0879, "step": 10593 }, { "epoch": 0.23344185713420043, "grad_norm": 0.4978277385234833, "learning_rate": 2.686018794691589e-05, "loss": 0.0929, "step": 10594 }, { "epoch": 0.2334638924237166, "grad_norm": 0.9244274497032166, "learning_rate": 2.685953250220382e-05, "loss": 0.1006, "step": 10595 }, { "epoch": 0.23348592771323273, "grad_norm": 0.9141243100166321, "learning_rate": 2.685887699708479e-05, "loss": 0.1088, "step": 10596 }, { "epoch": 0.2335079630027489, "grad_norm": 0.5509089231491089, "learning_rate": 2.685822143156214e-05, "loss": 0.124, "step": 10597 }, { "epoch": 0.23352999829226506, "grad_norm": 0.748187243938446, "learning_rate": 2.6857565805639216e-05, "loss": 0.0855, "step": 10598 }, { "epoch": 0.23355203358178123, "grad_norm": 0.5711525082588196, "learning_rate": 2.6856910119319344e-05, "loss": 0.131, "step": 10599 }, { "epoch": 0.2335740688712974, "grad_norm": 0.9208478331565857, "learning_rate": 2.6856254372605866e-05, "loss": 0.1361, "step": 10600 }, { "epoch": 0.23359610416081356, "grad_norm": 0.6576024889945984, "learning_rate": 2.6855598565502135e-05, "loss": 0.1287, "step": 10601 }, { "epoch": 0.2336181394503297, "grad_norm": 1.455107569694519, "learning_rate": 2.685494269801148e-05, "loss": 0.1309, "step": 10602 }, { "epoch": 0.23364017473984586, "grad_norm": 0.6894576549530029, "learning_rate": 2.6854286770137243e-05, "loss": 0.1132, "step": 10603 }, { "epoch": 0.23366221002936202, "grad_norm": 0.8690942525863647, "learning_rate": 2.685363078188277e-05, "loss": 0.0898, "step": 10604 }, { "epoch": 0.2336842453188782, "grad_norm": 2.052852153778076, "learning_rate": 2.68529747332514e-05, "loss": 0.093, "step": 10605 }, { "epoch": 0.23370628060839435, "grad_norm": 1.0452935695648193, "learning_rate": 2.6852318624246466e-05, "loss": 0.134, "step": 10606 }, { "epoch": 0.23372831589791052, "grad_norm": 0.7323994636535645, "learning_rate": 2.6851662454871328e-05, "loss": 0.1052, "step": 10607 }, { "epoch": 0.23375035118742665, "grad_norm": 0.52667635679245, "learning_rate": 2.685100622512931e-05, "loss": 0.0585, "step": 10608 }, { "epoch": 0.23377238647694282, "grad_norm": 0.8624679446220398, "learning_rate": 2.6850349935023767e-05, "loss": 0.0882, "step": 10609 }, { "epoch": 0.23379442176645898, "grad_norm": 0.8074585795402527, "learning_rate": 2.6849693584558036e-05, "loss": 0.0839, "step": 10610 }, { "epoch": 0.23381645705597515, "grad_norm": 0.653802752494812, "learning_rate": 2.6849037173735458e-05, "loss": 0.1114, "step": 10611 }, { "epoch": 0.2338384923454913, "grad_norm": 1.1546379327774048, "learning_rate": 2.684838070255938e-05, "loss": 0.1236, "step": 10612 }, { "epoch": 0.23386052763500748, "grad_norm": 0.800040066242218, "learning_rate": 2.6847724171033146e-05, "loss": 0.0634, "step": 10613 }, { "epoch": 0.2338825629245236, "grad_norm": 1.3951607942581177, "learning_rate": 2.68470675791601e-05, "loss": 0.1704, "step": 10614 }, { "epoch": 0.23390459821403978, "grad_norm": 0.753707230091095, "learning_rate": 2.684641092694359e-05, "loss": 0.0998, "step": 10615 }, { "epoch": 0.23392663350355594, "grad_norm": 0.677006721496582, "learning_rate": 2.6845754214386953e-05, "loss": 0.1177, "step": 10616 }, { "epoch": 0.2339486687930721, "grad_norm": 0.4563770890235901, "learning_rate": 2.684509744149354e-05, "loss": 0.0787, "step": 10617 }, { "epoch": 0.23397070408258827, "grad_norm": 0.6513388156890869, "learning_rate": 2.684444060826669e-05, "loss": 0.0828, "step": 10618 }, { "epoch": 0.23399273937210444, "grad_norm": 0.9055618047714233, "learning_rate": 2.6843783714709754e-05, "loss": 0.0859, "step": 10619 }, { "epoch": 0.23401477466162057, "grad_norm": 0.91741544008255, "learning_rate": 2.684312676082608e-05, "loss": 0.1297, "step": 10620 }, { "epoch": 0.23403680995113674, "grad_norm": 0.2870784401893616, "learning_rate": 2.6842469746619005e-05, "loss": 0.1029, "step": 10621 }, { "epoch": 0.2340588452406529, "grad_norm": 0.8924667835235596, "learning_rate": 2.6841812672091882e-05, "loss": 0.0592, "step": 10622 }, { "epoch": 0.23408088053016907, "grad_norm": 0.8215535879135132, "learning_rate": 2.684115553724806e-05, "loss": 0.0839, "step": 10623 }, { "epoch": 0.23410291581968523, "grad_norm": 0.8255791068077087, "learning_rate": 2.684049834209088e-05, "loss": 0.076, "step": 10624 }, { "epoch": 0.2341249511092014, "grad_norm": 0.899302065372467, "learning_rate": 2.6839841086623693e-05, "loss": 0.1039, "step": 10625 }, { "epoch": 0.23414698639871753, "grad_norm": 1.2712563276290894, "learning_rate": 2.683918377084985e-05, "loss": 0.1308, "step": 10626 }, { "epoch": 0.2341690216882337, "grad_norm": 0.6591161489486694, "learning_rate": 2.6838526394772694e-05, "loss": 0.0727, "step": 10627 }, { "epoch": 0.23419105697774986, "grad_norm": 1.8174165487289429, "learning_rate": 2.6837868958395576e-05, "loss": 0.1433, "step": 10628 }, { "epoch": 0.23421309226726603, "grad_norm": 1.710228443145752, "learning_rate": 2.6837211461721835e-05, "loss": 0.1171, "step": 10629 }, { "epoch": 0.2342351275567822, "grad_norm": 0.9561527371406555, "learning_rate": 2.683655390475484e-05, "loss": 0.0834, "step": 10630 }, { "epoch": 0.23425716284629836, "grad_norm": 0.5985528826713562, "learning_rate": 2.6835896287497925e-05, "loss": 0.0984, "step": 10631 }, { "epoch": 0.23427919813581452, "grad_norm": 0.8021051287651062, "learning_rate": 2.6835238609954437e-05, "loss": 0.1119, "step": 10632 }, { "epoch": 0.23430123342533066, "grad_norm": 0.8312442898750305, "learning_rate": 2.6834580872127736e-05, "loss": 0.0859, "step": 10633 }, { "epoch": 0.23432326871484682, "grad_norm": 0.965106189250946, "learning_rate": 2.683392307402117e-05, "loss": 0.1613, "step": 10634 }, { "epoch": 0.234345304004363, "grad_norm": 0.8172258734703064, "learning_rate": 2.683326521563809e-05, "loss": 0.0812, "step": 10635 }, { "epoch": 0.23436733929387915, "grad_norm": 0.9985284805297852, "learning_rate": 2.683260729698184e-05, "loss": 0.1085, "step": 10636 }, { "epoch": 0.23438937458339532, "grad_norm": 0.8013941049575806, "learning_rate": 2.6831949318055776e-05, "loss": 0.0657, "step": 10637 }, { "epoch": 0.23441140987291148, "grad_norm": 0.48875489830970764, "learning_rate": 2.6831291278863254e-05, "loss": 0.1339, "step": 10638 }, { "epoch": 0.23443344516242762, "grad_norm": 0.8128037452697754, "learning_rate": 2.6830633179407616e-05, "loss": 0.1099, "step": 10639 }, { "epoch": 0.23445548045194378, "grad_norm": 1.039552927017212, "learning_rate": 2.6829975019692222e-05, "loss": 0.1158, "step": 10640 }, { "epoch": 0.23447751574145995, "grad_norm": 1.1880359649658203, "learning_rate": 2.682931679972042e-05, "loss": 0.1141, "step": 10641 }, { "epoch": 0.2344995510309761, "grad_norm": 1.5029288530349731, "learning_rate": 2.6828658519495564e-05, "loss": 0.1285, "step": 10642 }, { "epoch": 0.23452158632049228, "grad_norm": 1.1280877590179443, "learning_rate": 2.6828000179021008e-05, "loss": 0.0916, "step": 10643 }, { "epoch": 0.23454362161000844, "grad_norm": 0.7577630877494812, "learning_rate": 2.6827341778300105e-05, "loss": 0.1071, "step": 10644 }, { "epoch": 0.23456565689952458, "grad_norm": 0.7053540349006653, "learning_rate": 2.6826683317336203e-05, "loss": 0.1075, "step": 10645 }, { "epoch": 0.23458769218904074, "grad_norm": 1.1281236410140991, "learning_rate": 2.682602479613267e-05, "loss": 0.1231, "step": 10646 }, { "epoch": 0.2346097274785569, "grad_norm": 0.6379018425941467, "learning_rate": 2.6825366214692846e-05, "loss": 0.0943, "step": 10647 }, { "epoch": 0.23463176276807307, "grad_norm": 0.5669849514961243, "learning_rate": 2.682470757302009e-05, "loss": 0.1096, "step": 10648 }, { "epoch": 0.23465379805758924, "grad_norm": 0.998412549495697, "learning_rate": 2.6824048871117757e-05, "loss": 0.1048, "step": 10649 }, { "epoch": 0.2346758333471054, "grad_norm": 1.2084907293319702, "learning_rate": 2.6823390108989207e-05, "loss": 0.1521, "step": 10650 }, { "epoch": 0.23469786863662154, "grad_norm": 0.5343531966209412, "learning_rate": 2.6822731286637787e-05, "loss": 0.0976, "step": 10651 }, { "epoch": 0.2347199039261377, "grad_norm": 0.9684785008430481, "learning_rate": 2.6822072404066858e-05, "loss": 0.1139, "step": 10652 }, { "epoch": 0.23474193921565387, "grad_norm": 0.9128878712654114, "learning_rate": 2.682141346127977e-05, "loss": 0.1334, "step": 10653 }, { "epoch": 0.23476397450517003, "grad_norm": 1.6896904706954956, "learning_rate": 2.6820754458279895e-05, "loss": 0.1024, "step": 10654 }, { "epoch": 0.2347860097946862, "grad_norm": 1.1318435668945312, "learning_rate": 2.6820095395070567e-05, "loss": 0.0992, "step": 10655 }, { "epoch": 0.23480804508420236, "grad_norm": 0.8397793769836426, "learning_rate": 2.681943627165516e-05, "loss": 0.106, "step": 10656 }, { "epoch": 0.2348300803737185, "grad_norm": 0.8060338497161865, "learning_rate": 2.681877708803703e-05, "loss": 0.112, "step": 10657 }, { "epoch": 0.23485211566323466, "grad_norm": 0.7369317412376404, "learning_rate": 2.6818117844219524e-05, "loss": 0.0811, "step": 10658 }, { "epoch": 0.23487415095275083, "grad_norm": 0.9458054900169373, "learning_rate": 2.681745854020601e-05, "loss": 0.1129, "step": 10659 }, { "epoch": 0.234896186242267, "grad_norm": 0.991948127746582, "learning_rate": 2.681679917599984e-05, "loss": 0.1594, "step": 10660 }, { "epoch": 0.23491822153178316, "grad_norm": 0.5481213331222534, "learning_rate": 2.6816139751604378e-05, "loss": 0.0972, "step": 10661 }, { "epoch": 0.23494025682129932, "grad_norm": 0.7911333441734314, "learning_rate": 2.6815480267022976e-05, "loss": 0.1215, "step": 10662 }, { "epoch": 0.23496229211081546, "grad_norm": 1.3578014373779297, "learning_rate": 2.6814820722258998e-05, "loss": 0.1267, "step": 10663 }, { "epoch": 0.23498432740033162, "grad_norm": 0.9735850691795349, "learning_rate": 2.6814161117315806e-05, "loss": 0.1115, "step": 10664 }, { "epoch": 0.2350063626898478, "grad_norm": 2.967439889907837, "learning_rate": 2.6813501452196758e-05, "loss": 0.0818, "step": 10665 }, { "epoch": 0.23502839797936395, "grad_norm": 1.6941564083099365, "learning_rate": 2.6812841726905207e-05, "loss": 0.1186, "step": 10666 }, { "epoch": 0.23505043326888012, "grad_norm": 0.7760751843452454, "learning_rate": 2.681218194144452e-05, "loss": 0.1166, "step": 10667 }, { "epoch": 0.23507246855839628, "grad_norm": 0.9745524525642395, "learning_rate": 2.6811522095818057e-05, "loss": 0.0951, "step": 10668 }, { "epoch": 0.23509450384791244, "grad_norm": 0.9391095042228699, "learning_rate": 2.6810862190029176e-05, "loss": 0.0932, "step": 10669 }, { "epoch": 0.23511653913742858, "grad_norm": 1.1529661417007446, "learning_rate": 2.681020222408124e-05, "loss": 0.0977, "step": 10670 }, { "epoch": 0.23513857442694475, "grad_norm": 0.6089633703231812, "learning_rate": 2.6809542197977613e-05, "loss": 0.1047, "step": 10671 }, { "epoch": 0.2351606097164609, "grad_norm": 1.2195146083831787, "learning_rate": 2.6808882111721654e-05, "loss": 0.1367, "step": 10672 }, { "epoch": 0.23518264500597708, "grad_norm": 0.8064125776290894, "learning_rate": 2.680822196531673e-05, "loss": 0.131, "step": 10673 }, { "epoch": 0.23520468029549324, "grad_norm": 1.034319519996643, "learning_rate": 2.6807561758766192e-05, "loss": 0.1273, "step": 10674 }, { "epoch": 0.2352267155850094, "grad_norm": 0.9408929347991943, "learning_rate": 2.6806901492073418e-05, "loss": 0.1197, "step": 10675 }, { "epoch": 0.23524875087452554, "grad_norm": 0.3621422052383423, "learning_rate": 2.6806241165241755e-05, "loss": 0.0728, "step": 10676 }, { "epoch": 0.2352707861640417, "grad_norm": 1.1741881370544434, "learning_rate": 2.6805580778274582e-05, "loss": 0.1436, "step": 10677 }, { "epoch": 0.23529282145355787, "grad_norm": 0.8665832877159119, "learning_rate": 2.6804920331175252e-05, "loss": 0.1213, "step": 10678 }, { "epoch": 0.23531485674307404, "grad_norm": 1.050108790397644, "learning_rate": 2.680425982394713e-05, "loss": 0.0998, "step": 10679 }, { "epoch": 0.2353368920325902, "grad_norm": 1.0257750749588013, "learning_rate": 2.6803599256593587e-05, "loss": 0.1519, "step": 10680 }, { "epoch": 0.23535892732210636, "grad_norm": 0.8775188326835632, "learning_rate": 2.6802938629117985e-05, "loss": 0.0996, "step": 10681 }, { "epoch": 0.2353809626116225, "grad_norm": 0.7238379716873169, "learning_rate": 2.6802277941523684e-05, "loss": 0.0842, "step": 10682 }, { "epoch": 0.23540299790113867, "grad_norm": 0.7199122905731201, "learning_rate": 2.6801617193814055e-05, "loss": 0.093, "step": 10683 }, { "epoch": 0.23542503319065483, "grad_norm": 0.6197943091392517, "learning_rate": 2.6800956385992456e-05, "loss": 0.075, "step": 10684 }, { "epoch": 0.235447068480171, "grad_norm": 1.0929927825927734, "learning_rate": 2.6800295518062262e-05, "loss": 0.1099, "step": 10685 }, { "epoch": 0.23546910376968716, "grad_norm": 0.840121865272522, "learning_rate": 2.6799634590026834e-05, "loss": 0.1027, "step": 10686 }, { "epoch": 0.23549113905920332, "grad_norm": 0.9317470192909241, "learning_rate": 2.6798973601889542e-05, "loss": 0.0981, "step": 10687 }, { "epoch": 0.23551317434871946, "grad_norm": 1.0584660768508911, "learning_rate": 2.6798312553653745e-05, "loss": 0.1056, "step": 10688 }, { "epoch": 0.23553520963823563, "grad_norm": 1.3802289962768555, "learning_rate": 2.679765144532282e-05, "loss": 0.1067, "step": 10689 }, { "epoch": 0.2355572449277518, "grad_norm": 1.0256288051605225, "learning_rate": 2.679699027690013e-05, "loss": 0.1212, "step": 10690 }, { "epoch": 0.23557928021726796, "grad_norm": 0.7297626733779907, "learning_rate": 2.679632904838904e-05, "loss": 0.1167, "step": 10691 }, { "epoch": 0.23560131550678412, "grad_norm": 0.6352612376213074, "learning_rate": 2.6795667759792922e-05, "loss": 0.0708, "step": 10692 }, { "epoch": 0.23562335079630028, "grad_norm": 0.80258709192276, "learning_rate": 2.679500641111514e-05, "loss": 0.1359, "step": 10693 }, { "epoch": 0.23564538608581642, "grad_norm": 1.4687920808792114, "learning_rate": 2.679434500235907e-05, "loss": 0.1321, "step": 10694 }, { "epoch": 0.2356674213753326, "grad_norm": 0.49106407165527344, "learning_rate": 2.6793683533528073e-05, "loss": 0.1121, "step": 10695 }, { "epoch": 0.23568945666484875, "grad_norm": 0.8860142230987549, "learning_rate": 2.679302200462552e-05, "loss": 0.14, "step": 10696 }, { "epoch": 0.23571149195436492, "grad_norm": 0.9594093561172485, "learning_rate": 2.679236041565479e-05, "loss": 0.0868, "step": 10697 }, { "epoch": 0.23573352724388108, "grad_norm": 0.8517051935195923, "learning_rate": 2.6791698766619235e-05, "loss": 0.1006, "step": 10698 }, { "epoch": 0.23575556253339724, "grad_norm": 1.0277196168899536, "learning_rate": 2.6791037057522237e-05, "loss": 0.1373, "step": 10699 }, { "epoch": 0.2357775978229134, "grad_norm": 0.7854055166244507, "learning_rate": 2.679037528836717e-05, "loss": 0.0668, "step": 10700 }, { "epoch": 0.23579963311242955, "grad_norm": 0.5927215218544006, "learning_rate": 2.6789713459157395e-05, "loss": 0.1015, "step": 10701 }, { "epoch": 0.2358216684019457, "grad_norm": 0.9507991671562195, "learning_rate": 2.678905156989629e-05, "loss": 0.1188, "step": 10702 }, { "epoch": 0.23584370369146188, "grad_norm": 0.784814715385437, "learning_rate": 2.678838962058722e-05, "loss": 0.079, "step": 10703 }, { "epoch": 0.23586573898097804, "grad_norm": 0.764214277267456, "learning_rate": 2.678772761123356e-05, "loss": 0.152, "step": 10704 }, { "epoch": 0.2358877742704942, "grad_norm": 0.8812594413757324, "learning_rate": 2.6787065541838683e-05, "loss": 0.1076, "step": 10705 }, { "epoch": 0.23590980956001037, "grad_norm": 1.0851426124572754, "learning_rate": 2.6786403412405963e-05, "loss": 0.1167, "step": 10706 }, { "epoch": 0.2359318448495265, "grad_norm": 0.8127404451370239, "learning_rate": 2.678574122293877e-05, "loss": 0.1093, "step": 10707 }, { "epoch": 0.23595388013904267, "grad_norm": 0.7461082935333252, "learning_rate": 2.6785078973440473e-05, "loss": 0.098, "step": 10708 }, { "epoch": 0.23597591542855884, "grad_norm": 0.6431834697723389, "learning_rate": 2.6784416663914454e-05, "loss": 0.1105, "step": 10709 }, { "epoch": 0.235997950718075, "grad_norm": 0.9954922795295715, "learning_rate": 2.6783754294364078e-05, "loss": 0.1187, "step": 10710 }, { "epoch": 0.23601998600759116, "grad_norm": 1.9929125308990479, "learning_rate": 2.678309186479272e-05, "loss": 0.0941, "step": 10711 }, { "epoch": 0.23604202129710733, "grad_norm": 1.2193244695663452, "learning_rate": 2.678242937520376e-05, "loss": 0.0828, "step": 10712 }, { "epoch": 0.23606405658662347, "grad_norm": 0.7878785729408264, "learning_rate": 2.678176682560057e-05, "loss": 0.0951, "step": 10713 }, { "epoch": 0.23608609187613963, "grad_norm": 2.4848175048828125, "learning_rate": 2.678110421598652e-05, "loss": 0.0852, "step": 10714 }, { "epoch": 0.2361081271656558, "grad_norm": 0.9355248808860779, "learning_rate": 2.6780441546364993e-05, "loss": 0.0819, "step": 10715 }, { "epoch": 0.23613016245517196, "grad_norm": 1.0254137516021729, "learning_rate": 2.677977881673936e-05, "loss": 0.1312, "step": 10716 }, { "epoch": 0.23615219774468812, "grad_norm": 1.0338454246520996, "learning_rate": 2.6779116027112992e-05, "loss": 0.1167, "step": 10717 }, { "epoch": 0.2361742330342043, "grad_norm": 0.7755290269851685, "learning_rate": 2.677845317748928e-05, "loss": 0.0928, "step": 10718 }, { "epoch": 0.23619626832372043, "grad_norm": 0.8778491616249084, "learning_rate": 2.6777790267871577e-05, "loss": 0.0768, "step": 10719 }, { "epoch": 0.2362183036132366, "grad_norm": 0.8149794340133667, "learning_rate": 2.677712729826328e-05, "loss": 0.0912, "step": 10720 }, { "epoch": 0.23624033890275276, "grad_norm": 1.16832435131073, "learning_rate": 2.6776464268667753e-05, "loss": 0.0938, "step": 10721 }, { "epoch": 0.23626237419226892, "grad_norm": 0.5585132837295532, "learning_rate": 2.677580117908838e-05, "loss": 0.0952, "step": 10722 }, { "epoch": 0.23628440948178508, "grad_norm": 0.7614457607269287, "learning_rate": 2.677513802952854e-05, "loss": 0.0724, "step": 10723 }, { "epoch": 0.23630644477130125, "grad_norm": 1.3747650384902954, "learning_rate": 2.6774474819991608e-05, "loss": 0.1209, "step": 10724 }, { "epoch": 0.2363284800608174, "grad_norm": 1.248213529586792, "learning_rate": 2.677381155048096e-05, "loss": 0.1035, "step": 10725 }, { "epoch": 0.23635051535033355, "grad_norm": 1.0000602006912231, "learning_rate": 2.6773148220999977e-05, "loss": 0.1261, "step": 10726 }, { "epoch": 0.23637255063984972, "grad_norm": 1.0707706212997437, "learning_rate": 2.6772484831552036e-05, "loss": 0.1403, "step": 10727 }, { "epoch": 0.23639458592936588, "grad_norm": 0.9384280443191528, "learning_rate": 2.6771821382140514e-05, "loss": 0.1237, "step": 10728 }, { "epoch": 0.23641662121888204, "grad_norm": 0.7848973870277405, "learning_rate": 2.6771157872768798e-05, "loss": 0.1287, "step": 10729 }, { "epoch": 0.2364386565083982, "grad_norm": 0.9561944007873535, "learning_rate": 2.677049430344026e-05, "loss": 0.1109, "step": 10730 }, { "epoch": 0.23646069179791435, "grad_norm": 0.8375077843666077, "learning_rate": 2.676983067415828e-05, "loss": 0.1163, "step": 10731 }, { "epoch": 0.2364827270874305, "grad_norm": 0.8049051761627197, "learning_rate": 2.676916698492625e-05, "loss": 0.1304, "step": 10732 }, { "epoch": 0.23650476237694668, "grad_norm": 0.8817855715751648, "learning_rate": 2.6768503235747532e-05, "loss": 0.0883, "step": 10733 }, { "epoch": 0.23652679766646284, "grad_norm": 0.7053149342536926, "learning_rate": 2.676783942662552e-05, "loss": 0.0978, "step": 10734 }, { "epoch": 0.236548832955979, "grad_norm": 1.85872220993042, "learning_rate": 2.676717555756359e-05, "loss": 0.1072, "step": 10735 }, { "epoch": 0.23657086824549517, "grad_norm": 0.9232239127159119, "learning_rate": 2.6766511628565127e-05, "loss": 0.0882, "step": 10736 }, { "epoch": 0.23659290353501133, "grad_norm": 1.1186646223068237, "learning_rate": 2.6765847639633514e-05, "loss": 0.1177, "step": 10737 }, { "epoch": 0.23661493882452747, "grad_norm": 0.7026121020317078, "learning_rate": 2.676518359077212e-05, "loss": 0.1296, "step": 10738 }, { "epoch": 0.23663697411404364, "grad_norm": 3.1848132610321045, "learning_rate": 2.6764519481984345e-05, "loss": 0.0976, "step": 10739 }, { "epoch": 0.2366590094035598, "grad_norm": 1.0628941059112549, "learning_rate": 2.676385531327356e-05, "loss": 0.1247, "step": 10740 }, { "epoch": 0.23668104469307596, "grad_norm": 0.7637186050415039, "learning_rate": 2.6763191084643152e-05, "loss": 0.1061, "step": 10741 }, { "epoch": 0.23670307998259213, "grad_norm": 0.7435016632080078, "learning_rate": 2.6762526796096507e-05, "loss": 0.0744, "step": 10742 }, { "epoch": 0.2367251152721083, "grad_norm": 1.396461844444275, "learning_rate": 2.6761862447637002e-05, "loss": 0.0977, "step": 10743 }, { "epoch": 0.23674715056162443, "grad_norm": 0.5916118025779724, "learning_rate": 2.6761198039268026e-05, "loss": 0.075, "step": 10744 }, { "epoch": 0.2367691858511406, "grad_norm": 0.6941571831703186, "learning_rate": 2.676053357099296e-05, "loss": 0.1062, "step": 10745 }, { "epoch": 0.23679122114065676, "grad_norm": 0.9106974005699158, "learning_rate": 2.6759869042815187e-05, "loss": 0.1144, "step": 10746 }, { "epoch": 0.23681325643017292, "grad_norm": 0.694072425365448, "learning_rate": 2.6759204454738102e-05, "loss": 0.078, "step": 10747 }, { "epoch": 0.2368352917196891, "grad_norm": 0.9722704291343689, "learning_rate": 2.675853980676508e-05, "loss": 0.1074, "step": 10748 }, { "epoch": 0.23685732700920525, "grad_norm": 1.053952693939209, "learning_rate": 2.6757875098899508e-05, "loss": 0.1101, "step": 10749 }, { "epoch": 0.2368793622987214, "grad_norm": 0.9174346923828125, "learning_rate": 2.6757210331144772e-05, "loss": 0.0845, "step": 10750 }, { "epoch": 0.23690139758823756, "grad_norm": 0.6965460777282715, "learning_rate": 2.675654550350426e-05, "loss": 0.0738, "step": 10751 }, { "epoch": 0.23692343287775372, "grad_norm": 0.80517578125, "learning_rate": 2.675588061598136e-05, "loss": 0.1072, "step": 10752 }, { "epoch": 0.23694546816726988, "grad_norm": 0.7571510672569275, "learning_rate": 2.675521566857946e-05, "loss": 0.147, "step": 10753 }, { "epoch": 0.23696750345678605, "grad_norm": 1.0812140703201294, "learning_rate": 2.6754550661301935e-05, "loss": 0.112, "step": 10754 }, { "epoch": 0.23698953874630221, "grad_norm": 0.7206763625144958, "learning_rate": 2.6753885594152183e-05, "loss": 0.0999, "step": 10755 }, { "epoch": 0.23701157403581835, "grad_norm": 0.8206133246421814, "learning_rate": 2.6753220467133586e-05, "loss": 0.1223, "step": 10756 }, { "epoch": 0.23703360932533452, "grad_norm": 0.841189980506897, "learning_rate": 2.675255528024954e-05, "loss": 0.1051, "step": 10757 }, { "epoch": 0.23705564461485068, "grad_norm": 0.7850548028945923, "learning_rate": 2.6751890033503422e-05, "loss": 0.0955, "step": 10758 }, { "epoch": 0.23707767990436684, "grad_norm": 1.0747730731964111, "learning_rate": 2.6751224726898628e-05, "loss": 0.1311, "step": 10759 }, { "epoch": 0.237099715193883, "grad_norm": 0.729228675365448, "learning_rate": 2.6750559360438545e-05, "loss": 0.1264, "step": 10760 }, { "epoch": 0.23712175048339917, "grad_norm": 0.8142217397689819, "learning_rate": 2.6749893934126566e-05, "loss": 0.0958, "step": 10761 }, { "epoch": 0.2371437857729153, "grad_norm": 0.5716840028762817, "learning_rate": 2.674922844796607e-05, "loss": 0.117, "step": 10762 }, { "epoch": 0.23716582106243148, "grad_norm": 0.9551562666893005, "learning_rate": 2.6748562901960455e-05, "loss": 0.1264, "step": 10763 }, { "epoch": 0.23718785635194764, "grad_norm": 0.7180691957473755, "learning_rate": 2.674789729611311e-05, "loss": 0.0872, "step": 10764 }, { "epoch": 0.2372098916414638, "grad_norm": 1.0284594297409058, "learning_rate": 2.674723163042743e-05, "loss": 0.1276, "step": 10765 }, { "epoch": 0.23723192693097997, "grad_norm": 1.2842047214508057, "learning_rate": 2.6746565904906795e-05, "loss": 0.1272, "step": 10766 }, { "epoch": 0.23725396222049613, "grad_norm": 1.8720670938491821, "learning_rate": 2.67459001195546e-05, "loss": 0.0873, "step": 10767 }, { "epoch": 0.23727599751001227, "grad_norm": 1.207825779914856, "learning_rate": 2.6745234274374235e-05, "loss": 0.1264, "step": 10768 }, { "epoch": 0.23729803279952844, "grad_norm": 0.9086560010910034, "learning_rate": 2.6744568369369097e-05, "loss": 0.1381, "step": 10769 }, { "epoch": 0.2373200680890446, "grad_norm": 0.9994848370552063, "learning_rate": 2.6743902404542572e-05, "loss": 0.1234, "step": 10770 }, { "epoch": 0.23734210337856076, "grad_norm": 0.956691563129425, "learning_rate": 2.6743236379898055e-05, "loss": 0.1188, "step": 10771 }, { "epoch": 0.23736413866807693, "grad_norm": 0.88001948595047, "learning_rate": 2.674257029543894e-05, "loss": 0.1265, "step": 10772 }, { "epoch": 0.2373861739575931, "grad_norm": 0.9923413991928101, "learning_rate": 2.6741904151168618e-05, "loss": 0.1241, "step": 10773 }, { "epoch": 0.23740820924710926, "grad_norm": 1.5197688341140747, "learning_rate": 2.6741237947090478e-05, "loss": 0.18, "step": 10774 }, { "epoch": 0.2374302445366254, "grad_norm": 0.9510436654090881, "learning_rate": 2.674057168320792e-05, "loss": 0.1042, "step": 10775 }, { "epoch": 0.23745227982614156, "grad_norm": 0.9208258390426636, "learning_rate": 2.6739905359524332e-05, "loss": 0.1121, "step": 10776 }, { "epoch": 0.23747431511565772, "grad_norm": 0.9161277413368225, "learning_rate": 2.6739238976043113e-05, "loss": 0.0951, "step": 10777 }, { "epoch": 0.2374963504051739, "grad_norm": 0.876591145992279, "learning_rate": 2.6738572532767656e-05, "loss": 0.0922, "step": 10778 }, { "epoch": 0.23751838569469005, "grad_norm": 0.9818403720855713, "learning_rate": 2.6737906029701353e-05, "loss": 0.1621, "step": 10779 }, { "epoch": 0.23754042098420622, "grad_norm": 0.7158412337303162, "learning_rate": 2.6737239466847598e-05, "loss": 0.0731, "step": 10780 }, { "epoch": 0.23756245627372236, "grad_norm": 0.7635856866836548, "learning_rate": 2.673657284420979e-05, "loss": 0.0733, "step": 10781 }, { "epoch": 0.23758449156323852, "grad_norm": 0.6732383966445923, "learning_rate": 2.6735906161791325e-05, "loss": 0.1221, "step": 10782 }, { "epoch": 0.23760652685275468, "grad_norm": 0.8159315586090088, "learning_rate": 2.6735239419595593e-05, "loss": 0.1065, "step": 10783 }, { "epoch": 0.23762856214227085, "grad_norm": 0.5997750163078308, "learning_rate": 2.6734572617625996e-05, "loss": 0.1295, "step": 10784 }, { "epoch": 0.23765059743178701, "grad_norm": 0.4938642680644989, "learning_rate": 2.673390575588593e-05, "loss": 0.1235, "step": 10785 }, { "epoch": 0.23767263272130318, "grad_norm": 0.7714682817459106, "learning_rate": 2.673323883437878e-05, "loss": 0.0949, "step": 10786 }, { "epoch": 0.23769466801081932, "grad_norm": 1.1557022333145142, "learning_rate": 2.6732571853107962e-05, "loss": 0.1078, "step": 10787 }, { "epoch": 0.23771670330033548, "grad_norm": 0.785401463508606, "learning_rate": 2.6731904812076865e-05, "loss": 0.1434, "step": 10788 }, { "epoch": 0.23773873858985164, "grad_norm": 0.9032447338104248, "learning_rate": 2.673123771128888e-05, "loss": 0.1074, "step": 10789 }, { "epoch": 0.2377607738793678, "grad_norm": 1.2243931293487549, "learning_rate": 2.6730570550747415e-05, "loss": 0.1319, "step": 10790 }, { "epoch": 0.23778280916888397, "grad_norm": 0.9365447163581848, "learning_rate": 2.672990333045586e-05, "loss": 0.1129, "step": 10791 }, { "epoch": 0.23780484445840014, "grad_norm": 0.7221978306770325, "learning_rate": 2.672923605041762e-05, "loss": 0.0866, "step": 10792 }, { "epoch": 0.23782687974791628, "grad_norm": 1.1689895391464233, "learning_rate": 2.6728568710636088e-05, "loss": 0.1494, "step": 10793 }, { "epoch": 0.23784891503743244, "grad_norm": 0.777386486530304, "learning_rate": 2.672790131111467e-05, "loss": 0.0843, "step": 10794 }, { "epoch": 0.2378709503269486, "grad_norm": 1.0444952249526978, "learning_rate": 2.672723385185676e-05, "loss": 0.1379, "step": 10795 }, { "epoch": 0.23789298561646477, "grad_norm": 0.4437461197376251, "learning_rate": 2.6726566332865757e-05, "loss": 0.0827, "step": 10796 }, { "epoch": 0.23791502090598093, "grad_norm": 0.7018908858299255, "learning_rate": 2.6725898754145067e-05, "loss": 0.0801, "step": 10797 }, { "epoch": 0.2379370561954971, "grad_norm": 0.9189902544021606, "learning_rate": 2.6725231115698088e-05, "loss": 0.1169, "step": 10798 }, { "epoch": 0.23795909148501324, "grad_norm": 0.9619276523590088, "learning_rate": 2.6724563417528215e-05, "loss": 0.1061, "step": 10799 }, { "epoch": 0.2379811267745294, "grad_norm": 0.5627219080924988, "learning_rate": 2.6723895659638857e-05, "loss": 0.1053, "step": 10800 }, { "epoch": 0.23800316206404556, "grad_norm": 0.6330054402351379, "learning_rate": 2.672322784203341e-05, "loss": 0.0902, "step": 10801 }, { "epoch": 0.23802519735356173, "grad_norm": 0.6655842661857605, "learning_rate": 2.6722559964715275e-05, "loss": 0.0835, "step": 10802 }, { "epoch": 0.2380472326430779, "grad_norm": 1.308526873588562, "learning_rate": 2.6721892027687858e-05, "loss": 0.1196, "step": 10803 }, { "epoch": 0.23806926793259406, "grad_norm": 1.8431439399719238, "learning_rate": 2.6721224030954558e-05, "loss": 0.1341, "step": 10804 }, { "epoch": 0.23809130322211022, "grad_norm": 1.243535041809082, "learning_rate": 2.6720555974518778e-05, "loss": 0.1404, "step": 10805 }, { "epoch": 0.23811333851162636, "grad_norm": 0.9437042474746704, "learning_rate": 2.6719887858383927e-05, "loss": 0.1354, "step": 10806 }, { "epoch": 0.23813537380114252, "grad_norm": 1.1715153455734253, "learning_rate": 2.6719219682553398e-05, "loss": 0.1157, "step": 10807 }, { "epoch": 0.2381574090906587, "grad_norm": 0.7876611351966858, "learning_rate": 2.6718551447030597e-05, "loss": 0.092, "step": 10808 }, { "epoch": 0.23817944438017485, "grad_norm": 0.9219115376472473, "learning_rate": 2.671788315181893e-05, "loss": 0.0761, "step": 10809 }, { "epoch": 0.23820147966969102, "grad_norm": 0.8936131000518799, "learning_rate": 2.6717214796921804e-05, "loss": 0.0658, "step": 10810 }, { "epoch": 0.23822351495920718, "grad_norm": 0.6561617851257324, "learning_rate": 2.671654638234262e-05, "loss": 0.0971, "step": 10811 }, { "epoch": 0.23824555024872332, "grad_norm": 1.3872971534729004, "learning_rate": 2.6715877908084778e-05, "loss": 0.1269, "step": 10812 }, { "epoch": 0.23826758553823948, "grad_norm": 0.5908021330833435, "learning_rate": 2.6715209374151687e-05, "loss": 0.0758, "step": 10813 }, { "epoch": 0.23828962082775565, "grad_norm": 1.0474278926849365, "learning_rate": 2.6714540780546755e-05, "loss": 0.1461, "step": 10814 }, { "epoch": 0.23831165611727181, "grad_norm": 0.8939008712768555, "learning_rate": 2.6713872127273387e-05, "loss": 0.1154, "step": 10815 }, { "epoch": 0.23833369140678798, "grad_norm": 1.1758090257644653, "learning_rate": 2.6713203414334987e-05, "loss": 0.1032, "step": 10816 }, { "epoch": 0.23835572669630414, "grad_norm": 0.8766790628433228, "learning_rate": 2.6712534641734955e-05, "loss": 0.115, "step": 10817 }, { "epoch": 0.23837776198582028, "grad_norm": 0.979709267616272, "learning_rate": 2.6711865809476708e-05, "loss": 0.0926, "step": 10818 }, { "epoch": 0.23839979727533644, "grad_norm": 0.9434762001037598, "learning_rate": 2.6711196917563644e-05, "loss": 0.1178, "step": 10819 }, { "epoch": 0.2384218325648526, "grad_norm": 1.0589573383331299, "learning_rate": 2.671052796599918e-05, "loss": 0.1164, "step": 10820 }, { "epoch": 0.23844386785436877, "grad_norm": 1.0133998394012451, "learning_rate": 2.6709858954786714e-05, "loss": 0.1187, "step": 10821 }, { "epoch": 0.23846590314388494, "grad_norm": 1.1049222946166992, "learning_rate": 2.6709189883929657e-05, "loss": 0.1, "step": 10822 }, { "epoch": 0.2384879384334011, "grad_norm": 1.0082371234893799, "learning_rate": 2.6708520753431417e-05, "loss": 0.1217, "step": 10823 }, { "epoch": 0.23850997372291724, "grad_norm": 1.1138139963150024, "learning_rate": 2.6707851563295404e-05, "loss": 0.1294, "step": 10824 }, { "epoch": 0.2385320090124334, "grad_norm": 0.8217549920082092, "learning_rate": 2.6707182313525022e-05, "loss": 0.0838, "step": 10825 }, { "epoch": 0.23855404430194957, "grad_norm": 0.691870927810669, "learning_rate": 2.6706513004123686e-05, "loss": 0.117, "step": 10826 }, { "epoch": 0.23857607959146573, "grad_norm": 0.862627387046814, "learning_rate": 2.6705843635094796e-05, "loss": 0.1, "step": 10827 }, { "epoch": 0.2385981148809819, "grad_norm": 1.4351128339767456, "learning_rate": 2.6705174206441772e-05, "loss": 0.1414, "step": 10828 }, { "epoch": 0.23862015017049806, "grad_norm": 1.3186988830566406, "learning_rate": 2.6704504718168017e-05, "loss": 0.1188, "step": 10829 }, { "epoch": 0.2386421854600142, "grad_norm": 0.7023579478263855, "learning_rate": 2.6703835170276945e-05, "loss": 0.1379, "step": 10830 }, { "epoch": 0.23866422074953036, "grad_norm": 1.2012721300125122, "learning_rate": 2.6703165562771963e-05, "loss": 0.1367, "step": 10831 }, { "epoch": 0.23868625603904653, "grad_norm": 0.8589473366737366, "learning_rate": 2.670249589565648e-05, "loss": 0.122, "step": 10832 }, { "epoch": 0.2387082913285627, "grad_norm": 1.123012661933899, "learning_rate": 2.6701826168933915e-05, "loss": 0.1061, "step": 10833 }, { "epoch": 0.23873032661807886, "grad_norm": 0.9639894366264343, "learning_rate": 2.6701156382607673e-05, "loss": 0.0958, "step": 10834 }, { "epoch": 0.23875236190759502, "grad_norm": 0.9722513556480408, "learning_rate": 2.6700486536681168e-05, "loss": 0.1102, "step": 10835 }, { "epoch": 0.23877439719711116, "grad_norm": 0.9325042366981506, "learning_rate": 2.6699816631157807e-05, "loss": 0.102, "step": 10836 }, { "epoch": 0.23879643248662732, "grad_norm": 0.7397155165672302, "learning_rate": 2.669914666604101e-05, "loss": 0.1097, "step": 10837 }, { "epoch": 0.2388184677761435, "grad_norm": 1.0819714069366455, "learning_rate": 2.6698476641334184e-05, "loss": 0.1062, "step": 10838 }, { "epoch": 0.23884050306565965, "grad_norm": 0.88669753074646, "learning_rate": 2.6697806557040742e-05, "loss": 0.0791, "step": 10839 }, { "epoch": 0.23886253835517582, "grad_norm": 0.7531558275222778, "learning_rate": 2.66971364131641e-05, "loss": 0.0885, "step": 10840 }, { "epoch": 0.23888457364469198, "grad_norm": 0.7343122363090515, "learning_rate": 2.6696466209707666e-05, "loss": 0.1009, "step": 10841 }, { "epoch": 0.23890660893420815, "grad_norm": 1.112074375152588, "learning_rate": 2.669579594667486e-05, "loss": 0.1133, "step": 10842 }, { "epoch": 0.23892864422372428, "grad_norm": 0.7094982266426086, "learning_rate": 2.6695125624069094e-05, "loss": 0.124, "step": 10843 }, { "epoch": 0.23895067951324045, "grad_norm": 1.0241526365280151, "learning_rate": 2.6694455241893782e-05, "loss": 0.1156, "step": 10844 }, { "epoch": 0.23897271480275661, "grad_norm": 1.1272087097167969, "learning_rate": 2.669378480015234e-05, "loss": 0.1149, "step": 10845 }, { "epoch": 0.23899475009227278, "grad_norm": 1.0238168239593506, "learning_rate": 2.6693114298848178e-05, "loss": 0.0987, "step": 10846 }, { "epoch": 0.23901678538178894, "grad_norm": 0.9655992388725281, "learning_rate": 2.6692443737984716e-05, "loss": 0.1196, "step": 10847 }, { "epoch": 0.2390388206713051, "grad_norm": 0.945704996585846, "learning_rate": 2.6691773117565368e-05, "loss": 0.1243, "step": 10848 }, { "epoch": 0.23906085596082124, "grad_norm": 0.7549975514411926, "learning_rate": 2.669110243759355e-05, "loss": 0.1029, "step": 10849 }, { "epoch": 0.2390828912503374, "grad_norm": 0.8541207909584045, "learning_rate": 2.669043169807268e-05, "loss": 0.0974, "step": 10850 }, { "epoch": 0.23910492653985357, "grad_norm": 0.7260382175445557, "learning_rate": 2.6689760899006168e-05, "loss": 0.0682, "step": 10851 }, { "epoch": 0.23912696182936974, "grad_norm": 0.8559780716896057, "learning_rate": 2.6689090040397438e-05, "loss": 0.0897, "step": 10852 }, { "epoch": 0.2391489971188859, "grad_norm": 0.8351947665214539, "learning_rate": 2.6688419122249906e-05, "loss": 0.0941, "step": 10853 }, { "epoch": 0.23917103240840207, "grad_norm": 0.9514232873916626, "learning_rate": 2.6687748144566982e-05, "loss": 0.116, "step": 10854 }, { "epoch": 0.2391930676979182, "grad_norm": 0.5765991806983948, "learning_rate": 2.6687077107352093e-05, "loss": 0.0804, "step": 10855 }, { "epoch": 0.23921510298743437, "grad_norm": 1.7451814413070679, "learning_rate": 2.6686406010608653e-05, "loss": 0.1375, "step": 10856 }, { "epoch": 0.23923713827695053, "grad_norm": 0.6349593997001648, "learning_rate": 2.6685734854340078e-05, "loss": 0.0799, "step": 10857 }, { "epoch": 0.2392591735664667, "grad_norm": 1.597484827041626, "learning_rate": 2.6685063638549792e-05, "loss": 0.1325, "step": 10858 }, { "epoch": 0.23928120885598286, "grad_norm": 0.764003336429596, "learning_rate": 2.668439236324121e-05, "loss": 0.0944, "step": 10859 }, { "epoch": 0.23930324414549903, "grad_norm": 0.5849332809448242, "learning_rate": 2.6683721028417753e-05, "loss": 0.0775, "step": 10860 }, { "epoch": 0.23932527943501516, "grad_norm": 0.8482457995414734, "learning_rate": 2.6683049634082836e-05, "loss": 0.1101, "step": 10861 }, { "epoch": 0.23934731472453133, "grad_norm": 0.7548465728759766, "learning_rate": 2.6682378180239887e-05, "loss": 0.0783, "step": 10862 }, { "epoch": 0.2393693500140475, "grad_norm": 0.6993907690048218, "learning_rate": 2.6681706666892318e-05, "loss": 0.0902, "step": 10863 }, { "epoch": 0.23939138530356366, "grad_norm": 1.0307307243347168, "learning_rate": 2.6681035094043554e-05, "loss": 0.106, "step": 10864 }, { "epoch": 0.23941342059307982, "grad_norm": 1.1337038278579712, "learning_rate": 2.6680363461697014e-05, "loss": 0.1235, "step": 10865 }, { "epoch": 0.239435455882596, "grad_norm": 0.7858988642692566, "learning_rate": 2.667969176985612e-05, "loss": 0.0935, "step": 10866 }, { "epoch": 0.23945749117211212, "grad_norm": 0.5281973481178284, "learning_rate": 2.667902001852429e-05, "loss": 0.1065, "step": 10867 }, { "epoch": 0.2394795264616283, "grad_norm": 0.8414812088012695, "learning_rate": 2.6678348207704953e-05, "loss": 0.0815, "step": 10868 }, { "epoch": 0.23950156175114445, "grad_norm": 0.8868131041526794, "learning_rate": 2.6677676337401525e-05, "loss": 0.0684, "step": 10869 }, { "epoch": 0.23952359704066062, "grad_norm": 0.7949684858322144, "learning_rate": 2.6677004407617424e-05, "loss": 0.0747, "step": 10870 }, { "epoch": 0.23954563233017678, "grad_norm": 0.7631525993347168, "learning_rate": 2.6676332418356083e-05, "loss": 0.1125, "step": 10871 }, { "epoch": 0.23956766761969295, "grad_norm": 1.8024338483810425, "learning_rate": 2.667566036962092e-05, "loss": 0.1179, "step": 10872 }, { "epoch": 0.23958970290920908, "grad_norm": 1.4080684185028076, "learning_rate": 2.6674988261415355e-05, "loss": 0.079, "step": 10873 }, { "epoch": 0.23961173819872525, "grad_norm": 0.7167962193489075, "learning_rate": 2.6674316093742814e-05, "loss": 0.0496, "step": 10874 }, { "epoch": 0.23963377348824141, "grad_norm": 0.9560477137565613, "learning_rate": 2.6673643866606726e-05, "loss": 0.0917, "step": 10875 }, { "epoch": 0.23965580877775758, "grad_norm": 1.0354607105255127, "learning_rate": 2.6672971580010504e-05, "loss": 0.0976, "step": 10876 }, { "epoch": 0.23967784406727374, "grad_norm": 0.8933561444282532, "learning_rate": 2.667229923395758e-05, "loss": 0.1138, "step": 10877 }, { "epoch": 0.2396998793567899, "grad_norm": 0.706952691078186, "learning_rate": 2.6671626828451377e-05, "loss": 0.0712, "step": 10878 }, { "epoch": 0.23972191464630607, "grad_norm": 0.9821396470069885, "learning_rate": 2.6670954363495323e-05, "loss": 0.121, "step": 10879 }, { "epoch": 0.2397439499358222, "grad_norm": 0.8465863466262817, "learning_rate": 2.6670281839092836e-05, "loss": 0.1182, "step": 10880 }, { "epoch": 0.23976598522533837, "grad_norm": 1.0026682615280151, "learning_rate": 2.6669609255247345e-05, "loss": 0.1299, "step": 10881 }, { "epoch": 0.23978802051485454, "grad_norm": 1.011897325515747, "learning_rate": 2.6668936611962277e-05, "loss": 0.0785, "step": 10882 }, { "epoch": 0.2398100558043707, "grad_norm": 0.8506794571876526, "learning_rate": 2.6668263909241062e-05, "loss": 0.1269, "step": 10883 }, { "epoch": 0.23983209109388687, "grad_norm": 1.3085817098617554, "learning_rate": 2.6667591147087116e-05, "loss": 0.1324, "step": 10884 }, { "epoch": 0.23985412638340303, "grad_norm": 0.769871711730957, "learning_rate": 2.6666918325503875e-05, "loss": 0.0826, "step": 10885 }, { "epoch": 0.23987616167291917, "grad_norm": 0.9230899214744568, "learning_rate": 2.666624544449476e-05, "loss": 0.0919, "step": 10886 }, { "epoch": 0.23989819696243533, "grad_norm": 0.964910626411438, "learning_rate": 2.6665572504063206e-05, "loss": 0.0949, "step": 10887 }, { "epoch": 0.2399202322519515, "grad_norm": 0.8115866780281067, "learning_rate": 2.666489950421263e-05, "loss": 0.1011, "step": 10888 }, { "epoch": 0.23994226754146766, "grad_norm": 0.961232602596283, "learning_rate": 2.6664226444946467e-05, "loss": 0.1171, "step": 10889 }, { "epoch": 0.23996430283098383, "grad_norm": 0.7670515179634094, "learning_rate": 2.6663553326268145e-05, "loss": 0.1139, "step": 10890 }, { "epoch": 0.2399863381205, "grad_norm": 1.0058410167694092, "learning_rate": 2.666288014818109e-05, "loss": 0.1175, "step": 10891 }, { "epoch": 0.24000837341001613, "grad_norm": 0.8044697046279907, "learning_rate": 2.666220691068873e-05, "loss": 0.1097, "step": 10892 }, { "epoch": 0.2400304086995323, "grad_norm": 1.0555704832077026, "learning_rate": 2.66615336137945e-05, "loss": 0.0985, "step": 10893 }, { "epoch": 0.24005244398904846, "grad_norm": 0.5416188836097717, "learning_rate": 2.666086025750183e-05, "loss": 0.107, "step": 10894 }, { "epoch": 0.24007447927856462, "grad_norm": 0.8179457783699036, "learning_rate": 2.6660186841814138e-05, "loss": 0.0977, "step": 10895 }, { "epoch": 0.2400965145680808, "grad_norm": 1.3076854944229126, "learning_rate": 2.6659513366734864e-05, "loss": 0.0941, "step": 10896 }, { "epoch": 0.24011854985759695, "grad_norm": 1.326458215713501, "learning_rate": 2.6658839832267438e-05, "loss": 0.1551, "step": 10897 }, { "epoch": 0.2401405851471131, "grad_norm": 0.8296149373054504, "learning_rate": 2.665816623841529e-05, "loss": 0.1385, "step": 10898 }, { "epoch": 0.24016262043662925, "grad_norm": 0.9815095067024231, "learning_rate": 2.6657492585181848e-05, "loss": 0.1403, "step": 10899 }, { "epoch": 0.24018465572614542, "grad_norm": 0.49809232354164124, "learning_rate": 2.665681887257054e-05, "loss": 0.0546, "step": 10900 }, { "epoch": 0.24020669101566158, "grad_norm": 1.191328525543213, "learning_rate": 2.665614510058481e-05, "loss": 0.0964, "step": 10901 }, { "epoch": 0.24022872630517775, "grad_norm": 1.186340093612671, "learning_rate": 2.6655471269228077e-05, "loss": 0.1254, "step": 10902 }, { "epoch": 0.2402507615946939, "grad_norm": 1.1057132482528687, "learning_rate": 2.665479737850378e-05, "loss": 0.098, "step": 10903 }, { "epoch": 0.24027279688421005, "grad_norm": 1.931105136871338, "learning_rate": 2.6654123428415355e-05, "loss": 0.1096, "step": 10904 }, { "epoch": 0.2402948321737262, "grad_norm": 1.0416151285171509, "learning_rate": 2.6653449418966225e-05, "loss": 0.0955, "step": 10905 }, { "epoch": 0.24031686746324238, "grad_norm": 0.5206298828125, "learning_rate": 2.665277535015983e-05, "loss": 0.1185, "step": 10906 }, { "epoch": 0.24033890275275854, "grad_norm": 0.9323496222496033, "learning_rate": 2.66521012219996e-05, "loss": 0.16, "step": 10907 }, { "epoch": 0.2403609380422747, "grad_norm": 0.8631995320320129, "learning_rate": 2.6651427034488973e-05, "loss": 0.1188, "step": 10908 }, { "epoch": 0.24038297333179087, "grad_norm": 0.810885488986969, "learning_rate": 2.6650752787631378e-05, "loss": 0.1121, "step": 10909 }, { "epoch": 0.24040500862130704, "grad_norm": 0.6783493161201477, "learning_rate": 2.665007848143025e-05, "loss": 0.0614, "step": 10910 }, { "epoch": 0.24042704391082317, "grad_norm": 0.9045076966285706, "learning_rate": 2.664940411588903e-05, "loss": 0.1069, "step": 10911 }, { "epoch": 0.24044907920033934, "grad_norm": 0.9508938789367676, "learning_rate": 2.6648729691011142e-05, "loss": 0.1214, "step": 10912 }, { "epoch": 0.2404711144898555, "grad_norm": 0.7850440144538879, "learning_rate": 2.6648055206800028e-05, "loss": 0.0892, "step": 10913 }, { "epoch": 0.24049314977937167, "grad_norm": 0.6138636469841003, "learning_rate": 2.6647380663259124e-05, "loss": 0.133, "step": 10914 }, { "epoch": 0.24051518506888783, "grad_norm": 0.8913254737854004, "learning_rate": 2.6646706060391867e-05, "loss": 0.1179, "step": 10915 }, { "epoch": 0.240537220358404, "grad_norm": 1.9252227544784546, "learning_rate": 2.6646031398201688e-05, "loss": 0.1206, "step": 10916 }, { "epoch": 0.24055925564792013, "grad_norm": 0.6594871878623962, "learning_rate": 2.6645356676692027e-05, "loss": 0.0941, "step": 10917 }, { "epoch": 0.2405812909374363, "grad_norm": 1.0895920991897583, "learning_rate": 2.6644681895866315e-05, "loss": 0.0814, "step": 10918 }, { "epoch": 0.24060332622695246, "grad_norm": 1.0371694564819336, "learning_rate": 2.6644007055728e-05, "loss": 0.136, "step": 10919 }, { "epoch": 0.24062536151646863, "grad_norm": 0.974341094493866, "learning_rate": 2.664333215628051e-05, "loss": 0.0987, "step": 10920 }, { "epoch": 0.2406473968059848, "grad_norm": 0.8683251142501831, "learning_rate": 2.6642657197527288e-05, "loss": 0.115, "step": 10921 }, { "epoch": 0.24066943209550096, "grad_norm": 0.6905533671379089, "learning_rate": 2.6641982179471768e-05, "loss": 0.0882, "step": 10922 }, { "epoch": 0.2406914673850171, "grad_norm": 0.8488445281982422, "learning_rate": 2.664130710211739e-05, "loss": 0.1029, "step": 10923 }, { "epoch": 0.24071350267453326, "grad_norm": 0.9641841053962708, "learning_rate": 2.6640631965467593e-05, "loss": 0.107, "step": 10924 }, { "epoch": 0.24073553796404942, "grad_norm": 0.7557926774024963, "learning_rate": 2.663995676952581e-05, "loss": 0.1129, "step": 10925 }, { "epoch": 0.2407575732535656, "grad_norm": 1.6097382307052612, "learning_rate": 2.6639281514295494e-05, "loss": 0.1276, "step": 10926 }, { "epoch": 0.24077960854308175, "grad_norm": 1.824418067932129, "learning_rate": 2.6638606199780072e-05, "loss": 0.1107, "step": 10927 }, { "epoch": 0.24080164383259792, "grad_norm": 0.3886568546295166, "learning_rate": 2.6637930825982982e-05, "loss": 0.0956, "step": 10928 }, { "epoch": 0.24082367912211405, "grad_norm": 0.5779144167900085, "learning_rate": 2.663725539290768e-05, "loss": 0.1436, "step": 10929 }, { "epoch": 0.24084571441163022, "grad_norm": 0.7626128792762756, "learning_rate": 2.6636579900557587e-05, "loss": 0.1038, "step": 10930 }, { "epoch": 0.24086774970114638, "grad_norm": 1.1408145427703857, "learning_rate": 2.663590434893616e-05, "loss": 0.0923, "step": 10931 }, { "epoch": 0.24088978499066255, "grad_norm": 1.0055477619171143, "learning_rate": 2.663522873804683e-05, "loss": 0.0956, "step": 10932 }, { "epoch": 0.2409118202801787, "grad_norm": 0.9577937722206116, "learning_rate": 2.6634553067893034e-05, "loss": 0.1408, "step": 10933 }, { "epoch": 0.24093385556969488, "grad_norm": 0.8552321195602417, "learning_rate": 2.6633877338478224e-05, "loss": 0.1168, "step": 10934 }, { "epoch": 0.240955890859211, "grad_norm": 0.7630558609962463, "learning_rate": 2.6633201549805844e-05, "loss": 0.1049, "step": 10935 }, { "epoch": 0.24097792614872718, "grad_norm": 0.8994885087013245, "learning_rate": 2.6632525701879328e-05, "loss": 0.1119, "step": 10936 }, { "epoch": 0.24099996143824334, "grad_norm": 0.7935569882392883, "learning_rate": 2.6631849794702115e-05, "loss": 0.1262, "step": 10937 }, { "epoch": 0.2410219967277595, "grad_norm": 1.0185319185256958, "learning_rate": 2.663117382827766e-05, "loss": 0.1203, "step": 10938 }, { "epoch": 0.24104403201727567, "grad_norm": 0.6993281245231628, "learning_rate": 2.6630497802609393e-05, "loss": 0.1053, "step": 10939 }, { "epoch": 0.24106606730679184, "grad_norm": 0.9147133827209473, "learning_rate": 2.662982171770077e-05, "loss": 0.1088, "step": 10940 }, { "epoch": 0.24108810259630797, "grad_norm": 0.7694564461708069, "learning_rate": 2.6629145573555226e-05, "loss": 0.1116, "step": 10941 }, { "epoch": 0.24111013788582414, "grad_norm": 0.7234802842140198, "learning_rate": 2.6628469370176206e-05, "loss": 0.084, "step": 10942 }, { "epoch": 0.2411321731753403, "grad_norm": 0.7804599404335022, "learning_rate": 2.662779310756716e-05, "loss": 0.1894, "step": 10943 }, { "epoch": 0.24115420846485647, "grad_norm": 0.7229787707328796, "learning_rate": 2.6627116785731525e-05, "loss": 0.1177, "step": 10944 }, { "epoch": 0.24117624375437263, "grad_norm": 0.768434464931488, "learning_rate": 2.662644040467275e-05, "loss": 0.1162, "step": 10945 }, { "epoch": 0.2411982790438888, "grad_norm": 0.9041771292686462, "learning_rate": 2.662576396439428e-05, "loss": 0.0842, "step": 10946 }, { "epoch": 0.24122031433340496, "grad_norm": 0.8730602264404297, "learning_rate": 2.662508746489956e-05, "loss": 0.1011, "step": 10947 }, { "epoch": 0.2412423496229211, "grad_norm": 0.31570011377334595, "learning_rate": 2.6624410906192035e-05, "loss": 0.083, "step": 10948 }, { "epoch": 0.24126438491243726, "grad_norm": 0.7802384495735168, "learning_rate": 2.662373428827515e-05, "loss": 0.0752, "step": 10949 }, { "epoch": 0.24128642020195343, "grad_norm": 0.940241277217865, "learning_rate": 2.6623057611152356e-05, "loss": 0.1201, "step": 10950 }, { "epoch": 0.2413084554914696, "grad_norm": 0.7700000405311584, "learning_rate": 2.6622380874827096e-05, "loss": 0.1126, "step": 10951 }, { "epoch": 0.24133049078098576, "grad_norm": 0.754233181476593, "learning_rate": 2.662170407930282e-05, "loss": 0.0751, "step": 10952 }, { "epoch": 0.24135252607050192, "grad_norm": 0.6749497652053833, "learning_rate": 2.6621027224582968e-05, "loss": 0.0754, "step": 10953 }, { "epoch": 0.24137456136001806, "grad_norm": 0.5543703436851501, "learning_rate": 2.6620350310670997e-05, "loss": 0.1324, "step": 10954 }, { "epoch": 0.24139659664953422, "grad_norm": 0.8448367714881897, "learning_rate": 2.6619673337570346e-05, "loss": 0.137, "step": 10955 }, { "epoch": 0.2414186319390504, "grad_norm": 0.7732837796211243, "learning_rate": 2.6618996305284474e-05, "loss": 0.0968, "step": 10956 }, { "epoch": 0.24144066722856655, "grad_norm": 0.7377433180809021, "learning_rate": 2.661831921381682e-05, "loss": 0.1151, "step": 10957 }, { "epoch": 0.24146270251808272, "grad_norm": 0.7440695762634277, "learning_rate": 2.661764206317084e-05, "loss": 0.0965, "step": 10958 }, { "epoch": 0.24148473780759888, "grad_norm": 0.7074894309043884, "learning_rate": 2.661696485334997e-05, "loss": 0.0877, "step": 10959 }, { "epoch": 0.24150677309711502, "grad_norm": 0.6143946647644043, "learning_rate": 2.6616287584357675e-05, "loss": 0.0762, "step": 10960 }, { "epoch": 0.24152880838663118, "grad_norm": 0.7181845307350159, "learning_rate": 2.6615610256197397e-05, "loss": 0.0997, "step": 10961 }, { "epoch": 0.24155084367614735, "grad_norm": 0.7592646479606628, "learning_rate": 2.6614932868872583e-05, "loss": 0.1053, "step": 10962 }, { "epoch": 0.2415728789656635, "grad_norm": 0.9685811996459961, "learning_rate": 2.6614255422386695e-05, "loss": 0.114, "step": 10963 }, { "epoch": 0.24159491425517968, "grad_norm": 0.9948508739471436, "learning_rate": 2.6613577916743175e-05, "loss": 0.1071, "step": 10964 }, { "epoch": 0.24161694954469584, "grad_norm": 1.068223476409912, "learning_rate": 2.661290035194547e-05, "loss": 0.0747, "step": 10965 }, { "epoch": 0.24163898483421198, "grad_norm": 1.1913185119628906, "learning_rate": 2.6612222727997038e-05, "loss": 0.0902, "step": 10966 }, { "epoch": 0.24166102012372814, "grad_norm": 1.1743041276931763, "learning_rate": 2.661154504490133e-05, "loss": 0.1147, "step": 10967 }, { "epoch": 0.2416830554132443, "grad_norm": 1.0355138778686523, "learning_rate": 2.6610867302661795e-05, "loss": 0.0883, "step": 10968 }, { "epoch": 0.24170509070276047, "grad_norm": 0.5967842936515808, "learning_rate": 2.661018950128189e-05, "loss": 0.102, "step": 10969 }, { "epoch": 0.24172712599227664, "grad_norm": 0.683929979801178, "learning_rate": 2.660951164076506e-05, "loss": 0.0924, "step": 10970 }, { "epoch": 0.2417491612817928, "grad_norm": 0.6675096750259399, "learning_rate": 2.660883372111476e-05, "loss": 0.1318, "step": 10971 }, { "epoch": 0.24177119657130894, "grad_norm": 0.8712495565414429, "learning_rate": 2.660815574233445e-05, "loss": 0.1025, "step": 10972 }, { "epoch": 0.2417932318608251, "grad_norm": 0.9239082932472229, "learning_rate": 2.6607477704427577e-05, "loss": 0.0862, "step": 10973 }, { "epoch": 0.24181526715034127, "grad_norm": 0.6476742029190063, "learning_rate": 2.6606799607397597e-05, "loss": 0.0666, "step": 10974 }, { "epoch": 0.24183730243985743, "grad_norm": 1.0636060237884521, "learning_rate": 2.6606121451247956e-05, "loss": 0.1234, "step": 10975 }, { "epoch": 0.2418593377293736, "grad_norm": 0.6697700619697571, "learning_rate": 2.6605443235982117e-05, "loss": 0.0675, "step": 10976 }, { "epoch": 0.24188137301888976, "grad_norm": 0.8754305839538574, "learning_rate": 2.6604764961603536e-05, "loss": 0.12, "step": 10977 }, { "epoch": 0.2419034083084059, "grad_norm": 1.2038630247116089, "learning_rate": 2.6604086628115662e-05, "loss": 0.097, "step": 10978 }, { "epoch": 0.24192544359792206, "grad_norm": 0.7795751690864563, "learning_rate": 2.660340823552195e-05, "loss": 0.0963, "step": 10979 }, { "epoch": 0.24194747888743823, "grad_norm": 0.7718006372451782, "learning_rate": 2.6602729783825863e-05, "loss": 0.0944, "step": 10980 }, { "epoch": 0.2419695141769544, "grad_norm": 0.9319835901260376, "learning_rate": 2.660205127303085e-05, "loss": 0.1055, "step": 10981 }, { "epoch": 0.24199154946647056, "grad_norm": 0.5631774067878723, "learning_rate": 2.6601372703140365e-05, "loss": 0.1072, "step": 10982 }, { "epoch": 0.24201358475598672, "grad_norm": 0.8265522718429565, "learning_rate": 2.660069407415787e-05, "loss": 0.126, "step": 10983 }, { "epoch": 0.24203562004550289, "grad_norm": 0.6304964423179626, "learning_rate": 2.6600015386086817e-05, "loss": 0.1187, "step": 10984 }, { "epoch": 0.24205765533501902, "grad_norm": 1.833927035331726, "learning_rate": 2.6599336638930667e-05, "loss": 0.1409, "step": 10985 }, { "epoch": 0.2420796906245352, "grad_norm": 0.47725313901901245, "learning_rate": 2.6598657832692875e-05, "loss": 0.1079, "step": 10986 }, { "epoch": 0.24210172591405135, "grad_norm": 1.0597426891326904, "learning_rate": 2.65979789673769e-05, "loss": 0.1019, "step": 10987 }, { "epoch": 0.24212376120356752, "grad_norm": 1.0595251321792603, "learning_rate": 2.6597300042986197e-05, "loss": 0.1123, "step": 10988 }, { "epoch": 0.24214579649308368, "grad_norm": 0.9353421330451965, "learning_rate": 2.6596621059524227e-05, "loss": 0.0918, "step": 10989 }, { "epoch": 0.24216783178259985, "grad_norm": 0.9640823602676392, "learning_rate": 2.6595942016994444e-05, "loss": 0.1028, "step": 10990 }, { "epoch": 0.24218986707211598, "grad_norm": 0.8772297501564026, "learning_rate": 2.6595262915400316e-05, "loss": 0.0762, "step": 10991 }, { "epoch": 0.24221190236163215, "grad_norm": 0.8724722862243652, "learning_rate": 2.659458375474529e-05, "loss": 0.0941, "step": 10992 }, { "epoch": 0.2422339376511483, "grad_norm": 0.8683575987815857, "learning_rate": 2.6593904535032837e-05, "loss": 0.0948, "step": 10993 }, { "epoch": 0.24225597294066448, "grad_norm": 1.041685938835144, "learning_rate": 2.659322525626641e-05, "loss": 0.1214, "step": 10994 }, { "epoch": 0.24227800823018064, "grad_norm": 1.3076634407043457, "learning_rate": 2.659254591844947e-05, "loss": 0.0775, "step": 10995 }, { "epoch": 0.2423000435196968, "grad_norm": 0.6902965903282166, "learning_rate": 2.6591866521585476e-05, "loss": 0.1097, "step": 10996 }, { "epoch": 0.24232207880921294, "grad_norm": 0.7921469211578369, "learning_rate": 2.659118706567789e-05, "loss": 0.0846, "step": 10997 }, { "epoch": 0.2423441140987291, "grad_norm": 0.9001747965812683, "learning_rate": 2.6590507550730175e-05, "loss": 0.1028, "step": 10998 }, { "epoch": 0.24236614938824527, "grad_norm": 1.3909801244735718, "learning_rate": 2.6589827976745782e-05, "loss": 0.1196, "step": 10999 }, { "epoch": 0.24238818467776144, "grad_norm": 0.7083394527435303, "learning_rate": 2.6589148343728186e-05, "loss": 0.0733, "step": 11000 }, { "epoch": 0.2424102199672776, "grad_norm": 0.5581210851669312, "learning_rate": 2.658846865168084e-05, "loss": 0.1105, "step": 11001 }, { "epoch": 0.24243225525679377, "grad_norm": 0.6525811553001404, "learning_rate": 2.6587788900607216e-05, "loss": 0.1246, "step": 11002 }, { "epoch": 0.2424542905463099, "grad_norm": 0.783898651599884, "learning_rate": 2.6587109090510762e-05, "loss": 0.0985, "step": 11003 }, { "epoch": 0.24247632583582607, "grad_norm": 1.7525619268417358, "learning_rate": 2.658642922139495e-05, "loss": 0.1037, "step": 11004 }, { "epoch": 0.24249836112534223, "grad_norm": 0.8167681694030762, "learning_rate": 2.6585749293263243e-05, "loss": 0.1258, "step": 11005 }, { "epoch": 0.2425203964148584, "grad_norm": 1.5585368871688843, "learning_rate": 2.6585069306119102e-05, "loss": 0.1686, "step": 11006 }, { "epoch": 0.24254243170437456, "grad_norm": 1.0030726194381714, "learning_rate": 2.6584389259965988e-05, "loss": 0.0728, "step": 11007 }, { "epoch": 0.24256446699389073, "grad_norm": 0.8680692911148071, "learning_rate": 2.658370915480737e-05, "loss": 0.1409, "step": 11008 }, { "epoch": 0.24258650228340686, "grad_norm": 0.9490454792976379, "learning_rate": 2.658302899064671e-05, "loss": 0.1235, "step": 11009 }, { "epoch": 0.24260853757292303, "grad_norm": 0.6822746396064758, "learning_rate": 2.6582348767487466e-05, "loss": 0.1161, "step": 11010 }, { "epoch": 0.2426305728624392, "grad_norm": 0.8961812853813171, "learning_rate": 2.6581668485333113e-05, "loss": 0.0983, "step": 11011 }, { "epoch": 0.24265260815195536, "grad_norm": 0.8360594511032104, "learning_rate": 2.658098814418711e-05, "loss": 0.1103, "step": 11012 }, { "epoch": 0.24267464344147152, "grad_norm": 0.5992461442947388, "learning_rate": 2.6580307744052924e-05, "loss": 0.1129, "step": 11013 }, { "epoch": 0.24269667873098769, "grad_norm": 0.8545689582824707, "learning_rate": 2.6579627284934022e-05, "loss": 0.1109, "step": 11014 }, { "epoch": 0.24271871402050385, "grad_norm": 0.7136977910995483, "learning_rate": 2.657894676683387e-05, "loss": 0.0721, "step": 11015 }, { "epoch": 0.24274074931002, "grad_norm": 1.3663148880004883, "learning_rate": 2.6578266189755936e-05, "loss": 0.1044, "step": 11016 }, { "epoch": 0.24276278459953615, "grad_norm": 1.1716712713241577, "learning_rate": 2.6577585553703676e-05, "loss": 0.1196, "step": 11017 }, { "epoch": 0.24278481988905232, "grad_norm": 0.7404365539550781, "learning_rate": 2.657690485868057e-05, "loss": 0.07, "step": 11018 }, { "epoch": 0.24280685517856848, "grad_norm": 0.6647052764892578, "learning_rate": 2.6576224104690078e-05, "loss": 0.1077, "step": 11019 }, { "epoch": 0.24282889046808465, "grad_norm": 1.0370553731918335, "learning_rate": 2.6575543291735668e-05, "loss": 0.11, "step": 11020 }, { "epoch": 0.2428509257576008, "grad_norm": 0.6076189279556274, "learning_rate": 2.657486241982081e-05, "loss": 0.0711, "step": 11021 }, { "epoch": 0.24287296104711695, "grad_norm": 1.4087142944335938, "learning_rate": 2.6574181488948968e-05, "loss": 0.1753, "step": 11022 }, { "epoch": 0.2428949963366331, "grad_norm": 1.0857653617858887, "learning_rate": 2.6573500499123616e-05, "loss": 0.1409, "step": 11023 }, { "epoch": 0.24291703162614928, "grad_norm": 0.8615322709083557, "learning_rate": 2.657281945034822e-05, "loss": 0.1056, "step": 11024 }, { "epoch": 0.24293906691566544, "grad_norm": 1.0987038612365723, "learning_rate": 2.6572138342626247e-05, "loss": 0.0937, "step": 11025 }, { "epoch": 0.2429611022051816, "grad_norm": 1.3591749668121338, "learning_rate": 2.657145717596117e-05, "loss": 0.1127, "step": 11026 }, { "epoch": 0.24298313749469777, "grad_norm": 0.7955777049064636, "learning_rate": 2.6570775950356453e-05, "loss": 0.0915, "step": 11027 }, { "epoch": 0.2430051727842139, "grad_norm": 0.7519497871398926, "learning_rate": 2.6570094665815573e-05, "loss": 0.112, "step": 11028 }, { "epoch": 0.24302720807373007, "grad_norm": 1.1800212860107422, "learning_rate": 2.6569413322341995e-05, "loss": 0.1268, "step": 11029 }, { "epoch": 0.24304924336324624, "grad_norm": 0.48332345485687256, "learning_rate": 2.6568731919939187e-05, "loss": 0.1276, "step": 11030 }, { "epoch": 0.2430712786527624, "grad_norm": 1.9557305574417114, "learning_rate": 2.656805045861063e-05, "loss": 0.1425, "step": 11031 }, { "epoch": 0.24309331394227857, "grad_norm": 0.5734055638313293, "learning_rate": 2.6567368938359787e-05, "loss": 0.0951, "step": 11032 }, { "epoch": 0.24311534923179473, "grad_norm": 1.3442044258117676, "learning_rate": 2.656668735919013e-05, "loss": 0.1378, "step": 11033 }, { "epoch": 0.24313738452131087, "grad_norm": 0.9838977456092834, "learning_rate": 2.656600572110513e-05, "loss": 0.1136, "step": 11034 }, { "epoch": 0.24315941981082703, "grad_norm": 0.8070023655891418, "learning_rate": 2.6565324024108266e-05, "loss": 0.1361, "step": 11035 }, { "epoch": 0.2431814551003432, "grad_norm": 0.8567844033241272, "learning_rate": 2.6564642268203003e-05, "loss": 0.1412, "step": 11036 }, { "epoch": 0.24320349038985936, "grad_norm": 0.8251155018806458, "learning_rate": 2.6563960453392812e-05, "loss": 0.117, "step": 11037 }, { "epoch": 0.24322552567937553, "grad_norm": 1.2604364156723022, "learning_rate": 2.6563278579681173e-05, "loss": 0.1362, "step": 11038 }, { "epoch": 0.2432475609688917, "grad_norm": 0.6241073608398438, "learning_rate": 2.6562596647071556e-05, "loss": 0.0804, "step": 11039 }, { "epoch": 0.24326959625840783, "grad_norm": 0.7829256653785706, "learning_rate": 2.656191465556743e-05, "loss": 0.0948, "step": 11040 }, { "epoch": 0.243291631547924, "grad_norm": 1.0509902238845825, "learning_rate": 2.6561232605172274e-05, "loss": 0.1211, "step": 11041 }, { "epoch": 0.24331366683744016, "grad_norm": 0.646852433681488, "learning_rate": 2.6560550495889558e-05, "loss": 0.0832, "step": 11042 }, { "epoch": 0.24333570212695632, "grad_norm": 0.8152239918708801, "learning_rate": 2.6559868327722765e-05, "loss": 0.129, "step": 11043 }, { "epoch": 0.24335773741647249, "grad_norm": 1.2598004341125488, "learning_rate": 2.6559186100675365e-05, "loss": 0.1016, "step": 11044 }, { "epoch": 0.24337977270598865, "grad_norm": 0.9425714612007141, "learning_rate": 2.6558503814750826e-05, "loss": 0.0743, "step": 11045 }, { "epoch": 0.2434018079955048, "grad_norm": 0.9106046557426453, "learning_rate": 2.6557821469952628e-05, "loss": 0.1167, "step": 11046 }, { "epoch": 0.24342384328502095, "grad_norm": 0.6672477722167969, "learning_rate": 2.6557139066284252e-05, "loss": 0.0976, "step": 11047 }, { "epoch": 0.24344587857453712, "grad_norm": 0.7524117231369019, "learning_rate": 2.6556456603749163e-05, "loss": 0.108, "step": 11048 }, { "epoch": 0.24346791386405328, "grad_norm": 0.7157117128372192, "learning_rate": 2.6555774082350853e-05, "loss": 0.1264, "step": 11049 }, { "epoch": 0.24348994915356945, "grad_norm": 0.7934949994087219, "learning_rate": 2.6555091502092784e-05, "loss": 0.1341, "step": 11050 }, { "epoch": 0.2435119844430856, "grad_norm": 0.7045218348503113, "learning_rate": 2.6554408862978434e-05, "loss": 0.1124, "step": 11051 }, { "epoch": 0.24353401973260178, "grad_norm": 0.6730120182037354, "learning_rate": 2.655372616501129e-05, "loss": 0.1242, "step": 11052 }, { "epoch": 0.2435560550221179, "grad_norm": 1.5680978298187256, "learning_rate": 2.655304340819482e-05, "loss": 0.0696, "step": 11053 }, { "epoch": 0.24357809031163408, "grad_norm": 1.1326032876968384, "learning_rate": 2.6552360592532504e-05, "loss": 0.1309, "step": 11054 }, { "epoch": 0.24360012560115024, "grad_norm": 0.8860864043235779, "learning_rate": 2.6551677718027823e-05, "loss": 0.126, "step": 11055 }, { "epoch": 0.2436221608906664, "grad_norm": 0.686008632183075, "learning_rate": 2.655099478468425e-05, "loss": 0.0961, "step": 11056 }, { "epoch": 0.24364419618018257, "grad_norm": 0.615670919418335, "learning_rate": 2.6550311792505268e-05, "loss": 0.0962, "step": 11057 }, { "epoch": 0.24366623146969874, "grad_norm": 0.896279513835907, "learning_rate": 2.6549628741494354e-05, "loss": 0.1071, "step": 11058 }, { "epoch": 0.24368826675921487, "grad_norm": 1.0958269834518433, "learning_rate": 2.654894563165499e-05, "loss": 0.134, "step": 11059 }, { "epoch": 0.24371030204873104, "grad_norm": 1.0032427310943604, "learning_rate": 2.654826246299065e-05, "loss": 0.1191, "step": 11060 }, { "epoch": 0.2437323373382472, "grad_norm": 0.6249209642410278, "learning_rate": 2.6547579235504815e-05, "loss": 0.0914, "step": 11061 }, { "epoch": 0.24375437262776337, "grad_norm": 0.9134203195571899, "learning_rate": 2.6546895949200968e-05, "loss": 0.0934, "step": 11062 }, { "epoch": 0.24377640791727953, "grad_norm": 0.8746656775474548, "learning_rate": 2.6546212604082586e-05, "loss": 0.1184, "step": 11063 }, { "epoch": 0.2437984432067957, "grad_norm": 1.2047430276870728, "learning_rate": 2.6545529200153158e-05, "loss": 0.1155, "step": 11064 }, { "epoch": 0.24382047849631183, "grad_norm": 1.2462821006774902, "learning_rate": 2.6544845737416154e-05, "loss": 0.108, "step": 11065 }, { "epoch": 0.243842513785828, "grad_norm": 1.4440075159072876, "learning_rate": 2.654416221587506e-05, "loss": 0.1162, "step": 11066 }, { "epoch": 0.24386454907534416, "grad_norm": 0.850156843662262, "learning_rate": 2.6543478635533354e-05, "loss": 0.1351, "step": 11067 }, { "epoch": 0.24388658436486033, "grad_norm": 0.9447088241577148, "learning_rate": 2.6542794996394532e-05, "loss": 0.0997, "step": 11068 }, { "epoch": 0.2439086196543765, "grad_norm": 1.3324167728424072, "learning_rate": 2.6542111298462055e-05, "loss": 0.1071, "step": 11069 }, { "epoch": 0.24393065494389266, "grad_norm": 0.9139233827590942, "learning_rate": 2.654142754173942e-05, "loss": 0.0796, "step": 11070 }, { "epoch": 0.2439526902334088, "grad_norm": 0.6913141012191772, "learning_rate": 2.6540743726230104e-05, "loss": 0.1115, "step": 11071 }, { "epoch": 0.24397472552292496, "grad_norm": 1.0161809921264648, "learning_rate": 2.6540059851937595e-05, "loss": 0.0842, "step": 11072 }, { "epoch": 0.24399676081244112, "grad_norm": 1.2707643508911133, "learning_rate": 2.653937591886537e-05, "loss": 0.0939, "step": 11073 }, { "epoch": 0.24401879610195729, "grad_norm": 0.6992540955543518, "learning_rate": 2.6538691927016916e-05, "loss": 0.0912, "step": 11074 }, { "epoch": 0.24404083139147345, "grad_norm": 1.0515029430389404, "learning_rate": 2.653800787639572e-05, "loss": 0.093, "step": 11075 }, { "epoch": 0.24406286668098962, "grad_norm": 1.0813028812408447, "learning_rate": 2.6537323767005255e-05, "loss": 0.1104, "step": 11076 }, { "epoch": 0.24408490197050575, "grad_norm": 1.047873854637146, "learning_rate": 2.6536639598849017e-05, "loss": 0.0852, "step": 11077 }, { "epoch": 0.24410693726002192, "grad_norm": 0.8777153491973877, "learning_rate": 2.6535955371930488e-05, "loss": 0.1126, "step": 11078 }, { "epoch": 0.24412897254953808, "grad_norm": 0.9367717504501343, "learning_rate": 2.6535271086253152e-05, "loss": 0.1121, "step": 11079 }, { "epoch": 0.24415100783905425, "grad_norm": 1.1350094079971313, "learning_rate": 2.653458674182049e-05, "loss": 0.1231, "step": 11080 }, { "epoch": 0.2441730431285704, "grad_norm": 0.7410030364990234, "learning_rate": 2.6533902338636e-05, "loss": 0.0895, "step": 11081 }, { "epoch": 0.24419507841808658, "grad_norm": 0.7041188478469849, "learning_rate": 2.6533217876703155e-05, "loss": 0.0925, "step": 11082 }, { "epoch": 0.2442171137076027, "grad_norm": 1.6687192916870117, "learning_rate": 2.6532533356025448e-05, "loss": 0.1063, "step": 11083 }, { "epoch": 0.24423914899711888, "grad_norm": 0.8740396499633789, "learning_rate": 2.653184877660637e-05, "loss": 0.1117, "step": 11084 }, { "epoch": 0.24426118428663504, "grad_norm": 1.0563899278640747, "learning_rate": 2.6531164138449395e-05, "loss": 0.0971, "step": 11085 }, { "epoch": 0.2442832195761512, "grad_norm": 0.7240185141563416, "learning_rate": 2.653047944155802e-05, "loss": 0.0919, "step": 11086 }, { "epoch": 0.24430525486566737, "grad_norm": 1.0703797340393066, "learning_rate": 2.652979468593573e-05, "loss": 0.0951, "step": 11087 }, { "epoch": 0.24432729015518354, "grad_norm": 0.999052107334137, "learning_rate": 2.652910987158601e-05, "loss": 0.1324, "step": 11088 }, { "epoch": 0.2443493254446997, "grad_norm": 0.5133037567138672, "learning_rate": 2.652842499851236e-05, "loss": 0.0824, "step": 11089 }, { "epoch": 0.24437136073421584, "grad_norm": 0.5898476243019104, "learning_rate": 2.652774006671825e-05, "loss": 0.1027, "step": 11090 }, { "epoch": 0.244393396023732, "grad_norm": 1.2961632013320923, "learning_rate": 2.6527055076207184e-05, "loss": 0.1027, "step": 11091 }, { "epoch": 0.24441543131324817, "grad_norm": 0.6049692034721375, "learning_rate": 2.6526370026982643e-05, "loss": 0.059, "step": 11092 }, { "epoch": 0.24443746660276433, "grad_norm": 0.743836522102356, "learning_rate": 2.6525684919048117e-05, "loss": 0.1054, "step": 11093 }, { "epoch": 0.2444595018922805, "grad_norm": 0.8345068693161011, "learning_rate": 2.6524999752407097e-05, "loss": 0.0847, "step": 11094 }, { "epoch": 0.24448153718179666, "grad_norm": 0.5250245928764343, "learning_rate": 2.6524314527063078e-05, "loss": 0.0779, "step": 11095 }, { "epoch": 0.2445035724713128, "grad_norm": 0.8176496028900146, "learning_rate": 2.652362924301954e-05, "loss": 0.0886, "step": 11096 }, { "epoch": 0.24452560776082896, "grad_norm": 1.3397024869918823, "learning_rate": 2.652294390027998e-05, "loss": 0.1245, "step": 11097 }, { "epoch": 0.24454764305034513, "grad_norm": 0.8162847757339478, "learning_rate": 2.652225849884789e-05, "loss": 0.1004, "step": 11098 }, { "epoch": 0.2445696783398613, "grad_norm": 1.1495662927627563, "learning_rate": 2.6521573038726756e-05, "loss": 0.08, "step": 11099 }, { "epoch": 0.24459171362937746, "grad_norm": 1.2949634790420532, "learning_rate": 2.6520887519920075e-05, "loss": 0.113, "step": 11100 }, { "epoch": 0.24461374891889362, "grad_norm": 0.9847965836524963, "learning_rate": 2.6520201942431334e-05, "loss": 0.1672, "step": 11101 }, { "epoch": 0.24463578420840976, "grad_norm": 1.07689368724823, "learning_rate": 2.6519516306264027e-05, "loss": 0.113, "step": 11102 }, { "epoch": 0.24465781949792592, "grad_norm": 0.8094796538352966, "learning_rate": 2.6518830611421646e-05, "loss": 0.1103, "step": 11103 }, { "epoch": 0.24467985478744209, "grad_norm": 1.2108104228973389, "learning_rate": 2.6518144857907685e-05, "loss": 0.1261, "step": 11104 }, { "epoch": 0.24470189007695825, "grad_norm": 0.7933679819107056, "learning_rate": 2.6517459045725633e-05, "loss": 0.0877, "step": 11105 }, { "epoch": 0.24472392536647442, "grad_norm": 0.6667004823684692, "learning_rate": 2.6516773174878984e-05, "loss": 0.1065, "step": 11106 }, { "epoch": 0.24474596065599058, "grad_norm": 0.9046193361282349, "learning_rate": 2.6516087245371238e-05, "loss": 0.1003, "step": 11107 }, { "epoch": 0.24476799594550672, "grad_norm": 0.8313437104225159, "learning_rate": 2.6515401257205888e-05, "loss": 0.1103, "step": 11108 }, { "epoch": 0.24479003123502288, "grad_norm": 0.65899258852005, "learning_rate": 2.651471521038642e-05, "loss": 0.0945, "step": 11109 }, { "epoch": 0.24481206652453905, "grad_norm": 0.9208388924598694, "learning_rate": 2.651402910491633e-05, "loss": 0.1071, "step": 11110 }, { "epoch": 0.2448341018140552, "grad_norm": 0.7039466500282288, "learning_rate": 2.6513342940799116e-05, "loss": 0.0836, "step": 11111 }, { "epoch": 0.24485613710357138, "grad_norm": 0.7825613617897034, "learning_rate": 2.6512656718038273e-05, "loss": 0.0676, "step": 11112 }, { "epoch": 0.24487817239308754, "grad_norm": 0.6457536220550537, "learning_rate": 2.65119704366373e-05, "loss": 0.0827, "step": 11113 }, { "epoch": 0.24490020768260368, "grad_norm": 0.8512476682662964, "learning_rate": 2.6511284096599687e-05, "loss": 0.1063, "step": 11114 }, { "epoch": 0.24492224297211984, "grad_norm": 1.1775261163711548, "learning_rate": 2.651059769792893e-05, "loss": 0.1289, "step": 11115 }, { "epoch": 0.244944278261636, "grad_norm": 0.9585078954696655, "learning_rate": 2.6509911240628527e-05, "loss": 0.1038, "step": 11116 }, { "epoch": 0.24496631355115217, "grad_norm": 0.7825642228126526, "learning_rate": 2.650922472470197e-05, "loss": 0.1333, "step": 11117 }, { "epoch": 0.24498834884066834, "grad_norm": 0.6823897957801819, "learning_rate": 2.6508538150152764e-05, "loss": 0.1023, "step": 11118 }, { "epoch": 0.2450103841301845, "grad_norm": 0.3414914906024933, "learning_rate": 2.65078515169844e-05, "loss": 0.0682, "step": 11119 }, { "epoch": 0.24503241941970066, "grad_norm": 0.5210508704185486, "learning_rate": 2.650716482520038e-05, "loss": 0.1047, "step": 11120 }, { "epoch": 0.2450544547092168, "grad_norm": 0.8333777785301208, "learning_rate": 2.6506478074804198e-05, "loss": 0.1167, "step": 11121 }, { "epoch": 0.24507648999873297, "grad_norm": 0.7469133734703064, "learning_rate": 2.6505791265799347e-05, "loss": 0.0901, "step": 11122 }, { "epoch": 0.24509852528824913, "grad_norm": 0.5944724678993225, "learning_rate": 2.650510439818934e-05, "loss": 0.1026, "step": 11123 }, { "epoch": 0.2451205605777653, "grad_norm": 0.8601832985877991, "learning_rate": 2.650441747197766e-05, "loss": 0.0774, "step": 11124 }, { "epoch": 0.24514259586728146, "grad_norm": 0.530182957649231, "learning_rate": 2.6503730487167815e-05, "loss": 0.0938, "step": 11125 }, { "epoch": 0.24516463115679762, "grad_norm": 0.7355627417564392, "learning_rate": 2.6503043443763303e-05, "loss": 0.1128, "step": 11126 }, { "epoch": 0.24518666644631376, "grad_norm": 0.6512256860733032, "learning_rate": 2.6502356341767618e-05, "loss": 0.1213, "step": 11127 }, { "epoch": 0.24520870173582993, "grad_norm": 0.6019582152366638, "learning_rate": 2.650166918118427e-05, "loss": 0.0796, "step": 11128 }, { "epoch": 0.2452307370253461, "grad_norm": 0.6783086061477661, "learning_rate": 2.6500981962016752e-05, "loss": 0.0951, "step": 11129 }, { "epoch": 0.24525277231486226, "grad_norm": 0.4975524842739105, "learning_rate": 2.6500294684268568e-05, "loss": 0.0904, "step": 11130 }, { "epoch": 0.24527480760437842, "grad_norm": 0.5709240436553955, "learning_rate": 2.649960734794321e-05, "loss": 0.0962, "step": 11131 }, { "epoch": 0.24529684289389458, "grad_norm": 0.6438294053077698, "learning_rate": 2.649891995304419e-05, "loss": 0.1402, "step": 11132 }, { "epoch": 0.24531887818341072, "grad_norm": 1.3293325901031494, "learning_rate": 2.6498232499574998e-05, "loss": 0.1615, "step": 11133 }, { "epoch": 0.24534091347292689, "grad_norm": 1.0609443187713623, "learning_rate": 2.6497544987539147e-05, "loss": 0.0924, "step": 11134 }, { "epoch": 0.24536294876244305, "grad_norm": 0.7089268565177917, "learning_rate": 2.6496857416940133e-05, "loss": 0.0698, "step": 11135 }, { "epoch": 0.24538498405195922, "grad_norm": 1.0275816917419434, "learning_rate": 2.6496169787781462e-05, "loss": 0.1355, "step": 11136 }, { "epoch": 0.24540701934147538, "grad_norm": 1.3093129396438599, "learning_rate": 2.6495482100066627e-05, "loss": 0.1011, "step": 11137 }, { "epoch": 0.24542905463099154, "grad_norm": 0.6090683937072754, "learning_rate": 2.6494794353799146e-05, "loss": 0.1118, "step": 11138 }, { "epoch": 0.24545108992050768, "grad_norm": 0.7185223698616028, "learning_rate": 2.6494106548982505e-05, "loss": 0.1045, "step": 11139 }, { "epoch": 0.24547312521002385, "grad_norm": 0.7929478883743286, "learning_rate": 2.649341868562022e-05, "loss": 0.0893, "step": 11140 }, { "epoch": 0.24549516049954, "grad_norm": 1.0211799144744873, "learning_rate": 2.649273076371579e-05, "loss": 0.1095, "step": 11141 }, { "epoch": 0.24551719578905618, "grad_norm": 0.7677479386329651, "learning_rate": 2.649204278327272e-05, "loss": 0.102, "step": 11142 }, { "epoch": 0.24553923107857234, "grad_norm": 0.8100216388702393, "learning_rate": 2.6491354744294515e-05, "loss": 0.0865, "step": 11143 }, { "epoch": 0.2455612663680885, "grad_norm": 0.6750974655151367, "learning_rate": 2.649066664678467e-05, "loss": 0.1025, "step": 11144 }, { "epoch": 0.24558330165760464, "grad_norm": 0.7628803253173828, "learning_rate": 2.6489978490746707e-05, "loss": 0.1102, "step": 11145 }, { "epoch": 0.2456053369471208, "grad_norm": 0.8664891719818115, "learning_rate": 2.648929027618412e-05, "loss": 0.1071, "step": 11146 }, { "epoch": 0.24562737223663697, "grad_norm": 0.6571367383003235, "learning_rate": 2.648860200310041e-05, "loss": 0.0992, "step": 11147 }, { "epoch": 0.24564940752615314, "grad_norm": 0.8074290156364441, "learning_rate": 2.6487913671499097e-05, "loss": 0.0819, "step": 11148 }, { "epoch": 0.2456714428156693, "grad_norm": 0.527056097984314, "learning_rate": 2.6487225281383675e-05, "loss": 0.0888, "step": 11149 }, { "epoch": 0.24569347810518546, "grad_norm": 0.5566209554672241, "learning_rate": 2.6486536832757657e-05, "loss": 0.0933, "step": 11150 }, { "epoch": 0.2457155133947016, "grad_norm": 0.9292789101600647, "learning_rate": 2.648584832562455e-05, "loss": 0.092, "step": 11151 }, { "epoch": 0.24573754868421777, "grad_norm": 0.7426461577415466, "learning_rate": 2.6485159759987853e-05, "loss": 0.1137, "step": 11152 }, { "epoch": 0.24575958397373393, "grad_norm": 1.2290825843811035, "learning_rate": 2.6484471135851082e-05, "loss": 0.093, "step": 11153 }, { "epoch": 0.2457816192632501, "grad_norm": 0.9234660863876343, "learning_rate": 2.648378245321774e-05, "loss": 0.1062, "step": 11154 }, { "epoch": 0.24580365455276626, "grad_norm": 0.899285614490509, "learning_rate": 2.6483093712091334e-05, "loss": 0.1599, "step": 11155 }, { "epoch": 0.24582568984228242, "grad_norm": 0.7688789367675781, "learning_rate": 2.6482404912475377e-05, "loss": 0.1404, "step": 11156 }, { "epoch": 0.2458477251317986, "grad_norm": 0.8372324705123901, "learning_rate": 2.6481716054373374e-05, "loss": 0.0852, "step": 11157 }, { "epoch": 0.24586976042131473, "grad_norm": 0.6226063370704651, "learning_rate": 2.648102713778883e-05, "loss": 0.105, "step": 11158 }, { "epoch": 0.2458917957108309, "grad_norm": 1.0219049453735352, "learning_rate": 2.6480338162725263e-05, "loss": 0.1065, "step": 11159 }, { "epoch": 0.24591383100034706, "grad_norm": 0.46352308988571167, "learning_rate": 2.647964912918618e-05, "loss": 0.0916, "step": 11160 }, { "epoch": 0.24593586628986322, "grad_norm": 0.7566937208175659, "learning_rate": 2.647896003717508e-05, "loss": 0.118, "step": 11161 }, { "epoch": 0.24595790157937938, "grad_norm": 0.6660405397415161, "learning_rate": 2.6478270886695487e-05, "loss": 0.0908, "step": 11162 }, { "epoch": 0.24597993686889555, "grad_norm": 0.8087984919548035, "learning_rate": 2.6477581677750902e-05, "loss": 0.1165, "step": 11163 }, { "epoch": 0.24600197215841169, "grad_norm": 1.5274633169174194, "learning_rate": 2.6476892410344842e-05, "loss": 0.0997, "step": 11164 }, { "epoch": 0.24602400744792785, "grad_norm": 0.7898973822593689, "learning_rate": 2.647620308448081e-05, "loss": 0.0977, "step": 11165 }, { "epoch": 0.24604604273744402, "grad_norm": 1.9151734113693237, "learning_rate": 2.6475513700162323e-05, "loss": 0.0906, "step": 11166 }, { "epoch": 0.24606807802696018, "grad_norm": 0.8785910606384277, "learning_rate": 2.6474824257392894e-05, "loss": 0.0822, "step": 11167 }, { "epoch": 0.24609011331647634, "grad_norm": 1.1400399208068848, "learning_rate": 2.647413475617603e-05, "loss": 0.0997, "step": 11168 }, { "epoch": 0.2461121486059925, "grad_norm": 0.7295728921890259, "learning_rate": 2.6473445196515245e-05, "loss": 0.0806, "step": 11169 }, { "epoch": 0.24613418389550865, "grad_norm": 0.8471229672431946, "learning_rate": 2.647275557841405e-05, "loss": 0.1195, "step": 11170 }, { "epoch": 0.2461562191850248, "grad_norm": 1.1932058334350586, "learning_rate": 2.647206590187596e-05, "loss": 0.1078, "step": 11171 }, { "epoch": 0.24617825447454098, "grad_norm": 0.935890793800354, "learning_rate": 2.6471376166904488e-05, "loss": 0.1265, "step": 11172 }, { "epoch": 0.24620028976405714, "grad_norm": 0.5623357892036438, "learning_rate": 2.6470686373503144e-05, "loss": 0.0779, "step": 11173 }, { "epoch": 0.2462223250535733, "grad_norm": 0.7211048007011414, "learning_rate": 2.646999652167544e-05, "loss": 0.0886, "step": 11174 }, { "epoch": 0.24624436034308947, "grad_norm": 0.75112384557724, "learning_rate": 2.64693066114249e-05, "loss": 0.1043, "step": 11175 }, { "epoch": 0.2462663956326056, "grad_norm": 0.5298912525177002, "learning_rate": 2.6468616642755024e-05, "loss": 0.1153, "step": 11176 }, { "epoch": 0.24628843092212177, "grad_norm": 2.9626898765563965, "learning_rate": 2.646792661566934e-05, "loss": 0.1067, "step": 11177 }, { "epoch": 0.24631046621163794, "grad_norm": 0.5747419595718384, "learning_rate": 2.6467236530171354e-05, "loss": 0.0864, "step": 11178 }, { "epoch": 0.2463325015011541, "grad_norm": 2.0474390983581543, "learning_rate": 2.646654638626458e-05, "loss": 0.1347, "step": 11179 }, { "epoch": 0.24635453679067026, "grad_norm": 0.7577977180480957, "learning_rate": 2.646585618395254e-05, "loss": 0.084, "step": 11180 }, { "epoch": 0.24637657208018643, "grad_norm": 0.6715741753578186, "learning_rate": 2.6465165923238743e-05, "loss": 0.1005, "step": 11181 }, { "epoch": 0.24639860736970257, "grad_norm": 0.7981688976287842, "learning_rate": 2.646447560412671e-05, "loss": 0.0847, "step": 11182 }, { "epoch": 0.24642064265921873, "grad_norm": 0.6181614398956299, "learning_rate": 2.6463785226619953e-05, "loss": 0.0941, "step": 11183 }, { "epoch": 0.2464426779487349, "grad_norm": 0.5533742308616638, "learning_rate": 2.646309479072199e-05, "loss": 0.1063, "step": 11184 }, { "epoch": 0.24646471323825106, "grad_norm": 0.9322394132614136, "learning_rate": 2.646240429643634e-05, "loss": 0.1184, "step": 11185 }, { "epoch": 0.24648674852776722, "grad_norm": 0.5169748663902283, "learning_rate": 2.646171374376652e-05, "loss": 0.082, "step": 11186 }, { "epoch": 0.2465087838172834, "grad_norm": 0.7665998935699463, "learning_rate": 2.6461023132716042e-05, "loss": 0.1219, "step": 11187 }, { "epoch": 0.24653081910679953, "grad_norm": 0.9644864797592163, "learning_rate": 2.646033246328843e-05, "loss": 0.1001, "step": 11188 }, { "epoch": 0.2465528543963157, "grad_norm": 0.6012171506881714, "learning_rate": 2.64596417354872e-05, "loss": 0.0947, "step": 11189 }, { "epoch": 0.24657488968583186, "grad_norm": 0.7940882444381714, "learning_rate": 2.6458950949315867e-05, "loss": 0.1019, "step": 11190 }, { "epoch": 0.24659692497534802, "grad_norm": 0.6231250166893005, "learning_rate": 2.6458260104777953e-05, "loss": 0.1054, "step": 11191 }, { "epoch": 0.24661896026486418, "grad_norm": 0.4588167369365692, "learning_rate": 2.6457569201876978e-05, "loss": 0.072, "step": 11192 }, { "epoch": 0.24664099555438035, "grad_norm": 0.6710396409034729, "learning_rate": 2.6456878240616457e-05, "loss": 0.0741, "step": 11193 }, { "epoch": 0.2466630308438965, "grad_norm": 0.7495868802070618, "learning_rate": 2.645618722099991e-05, "loss": 0.0819, "step": 11194 }, { "epoch": 0.24668506613341265, "grad_norm": 0.8135561943054199, "learning_rate": 2.6455496143030858e-05, "loss": 0.136, "step": 11195 }, { "epoch": 0.24670710142292882, "grad_norm": 0.9040391445159912, "learning_rate": 2.645480500671282e-05, "loss": 0.0699, "step": 11196 }, { "epoch": 0.24672913671244498, "grad_norm": 0.7067610621452332, "learning_rate": 2.6454113812049325e-05, "loss": 0.0841, "step": 11197 }, { "epoch": 0.24675117200196114, "grad_norm": 1.0282799005508423, "learning_rate": 2.645342255904388e-05, "loss": 0.1315, "step": 11198 }, { "epoch": 0.2467732072914773, "grad_norm": 1.131545066833496, "learning_rate": 2.6452731247700013e-05, "loss": 0.0998, "step": 11199 }, { "epoch": 0.24679524258099347, "grad_norm": 1.0077996253967285, "learning_rate": 2.6452039878021245e-05, "loss": 0.1014, "step": 11200 }, { "epoch": 0.2468172778705096, "grad_norm": 0.7966886758804321, "learning_rate": 2.64513484500111e-05, "loss": 0.0809, "step": 11201 }, { "epoch": 0.24683931316002578, "grad_norm": 0.7152053117752075, "learning_rate": 2.645065696367309e-05, "loss": 0.1075, "step": 11202 }, { "epoch": 0.24686134844954194, "grad_norm": 0.5058673620223999, "learning_rate": 2.6449965419010746e-05, "loss": 0.0854, "step": 11203 }, { "epoch": 0.2468833837390581, "grad_norm": 1.1338411569595337, "learning_rate": 2.644927381602759e-05, "loss": 0.1084, "step": 11204 }, { "epoch": 0.24690541902857427, "grad_norm": 0.5023582577705383, "learning_rate": 2.6448582154727143e-05, "loss": 0.1074, "step": 11205 }, { "epoch": 0.24692745431809043, "grad_norm": 0.8039440512657166, "learning_rate": 2.6447890435112928e-05, "loss": 0.1002, "step": 11206 }, { "epoch": 0.24694948960760657, "grad_norm": 1.2730897665023804, "learning_rate": 2.6447198657188463e-05, "loss": 0.126, "step": 11207 }, { "epoch": 0.24697152489712274, "grad_norm": 1.062237024307251, "learning_rate": 2.6446506820957283e-05, "loss": 0.0954, "step": 11208 }, { "epoch": 0.2469935601866389, "grad_norm": 0.6321877241134644, "learning_rate": 2.6445814926422904e-05, "loss": 0.1104, "step": 11209 }, { "epoch": 0.24701559547615506, "grad_norm": 0.8664748668670654, "learning_rate": 2.644512297358885e-05, "loss": 0.0868, "step": 11210 }, { "epoch": 0.24703763076567123, "grad_norm": 1.0967544317245483, "learning_rate": 2.644443096245865e-05, "loss": 0.1251, "step": 11211 }, { "epoch": 0.2470596660551874, "grad_norm": 0.6099457144737244, "learning_rate": 2.6443738893035825e-05, "loss": 0.0884, "step": 11212 }, { "epoch": 0.24708170134470353, "grad_norm": 0.7305456399917603, "learning_rate": 2.6443046765323904e-05, "loss": 0.0934, "step": 11213 }, { "epoch": 0.2471037366342197, "grad_norm": 0.9963683485984802, "learning_rate": 2.6442354579326406e-05, "loss": 0.1023, "step": 11214 }, { "epoch": 0.24712577192373586, "grad_norm": 1.1117435693740845, "learning_rate": 2.644166233504686e-05, "loss": 0.1012, "step": 11215 }, { "epoch": 0.24714780721325202, "grad_norm": 0.9741190671920776, "learning_rate": 2.6440970032488793e-05, "loss": 0.1244, "step": 11216 }, { "epoch": 0.2471698425027682, "grad_norm": 0.8016980290412903, "learning_rate": 2.644027767165573e-05, "loss": 0.1068, "step": 11217 }, { "epoch": 0.24719187779228435, "grad_norm": 0.6001740097999573, "learning_rate": 2.6439585252551197e-05, "loss": 0.1075, "step": 11218 }, { "epoch": 0.2472139130818005, "grad_norm": 0.43252110481262207, "learning_rate": 2.6438892775178726e-05, "loss": 0.0979, "step": 11219 }, { "epoch": 0.24723594837131666, "grad_norm": 0.7330437898635864, "learning_rate": 2.6438200239541836e-05, "loss": 0.0994, "step": 11220 }, { "epoch": 0.24725798366083282, "grad_norm": 0.7751214504241943, "learning_rate": 2.6437507645644062e-05, "loss": 0.0855, "step": 11221 }, { "epoch": 0.24728001895034898, "grad_norm": 1.2177191972732544, "learning_rate": 2.6436814993488927e-05, "loss": 0.0963, "step": 11222 }, { "epoch": 0.24730205423986515, "grad_norm": 1.4131014347076416, "learning_rate": 2.643612228307996e-05, "loss": 0.1014, "step": 11223 }, { "epoch": 0.2473240895293813, "grad_norm": 0.5882245898246765, "learning_rate": 2.643542951442069e-05, "loss": 0.1736, "step": 11224 }, { "epoch": 0.24734612481889748, "grad_norm": 0.9899246692657471, "learning_rate": 2.6434736687514645e-05, "loss": 0.101, "step": 11225 }, { "epoch": 0.24736816010841362, "grad_norm": 0.6656920909881592, "learning_rate": 2.643404380236535e-05, "loss": 0.0938, "step": 11226 }, { "epoch": 0.24739019539792978, "grad_norm": 0.8730238676071167, "learning_rate": 2.6433350858976346e-05, "loss": 0.1004, "step": 11227 }, { "epoch": 0.24741223068744594, "grad_norm": 0.7000529170036316, "learning_rate": 2.6432657857351148e-05, "loss": 0.1546, "step": 11228 }, { "epoch": 0.2474342659769621, "grad_norm": 0.9229430556297302, "learning_rate": 2.64319647974933e-05, "loss": 0.1436, "step": 11229 }, { "epoch": 0.24745630126647827, "grad_norm": 0.7615268230438232, "learning_rate": 2.643127167940632e-05, "loss": 0.1038, "step": 11230 }, { "epoch": 0.24747833655599444, "grad_norm": 0.6349321007728577, "learning_rate": 2.6430578503093742e-05, "loss": 0.0979, "step": 11231 }, { "epoch": 0.24750037184551058, "grad_norm": 0.6417069435119629, "learning_rate": 2.6429885268559106e-05, "loss": 0.0995, "step": 11232 }, { "epoch": 0.24752240713502674, "grad_norm": 1.0942875146865845, "learning_rate": 2.642919197580593e-05, "loss": 0.1204, "step": 11233 }, { "epoch": 0.2475444424245429, "grad_norm": 0.7137454152107239, "learning_rate": 2.642849862483775e-05, "loss": 0.1022, "step": 11234 }, { "epoch": 0.24756647771405907, "grad_norm": 0.5721895694732666, "learning_rate": 2.64278052156581e-05, "loss": 0.094, "step": 11235 }, { "epoch": 0.24758851300357523, "grad_norm": 1.2916005849838257, "learning_rate": 2.6427111748270505e-05, "loss": 0.1172, "step": 11236 }, { "epoch": 0.2476105482930914, "grad_norm": 0.7226446866989136, "learning_rate": 2.6426418222678508e-05, "loss": 0.1307, "step": 11237 }, { "epoch": 0.24763258358260753, "grad_norm": 0.7042778134346008, "learning_rate": 2.642572463888563e-05, "loss": 0.1368, "step": 11238 }, { "epoch": 0.2476546188721237, "grad_norm": 0.6583191752433777, "learning_rate": 2.6425030996895414e-05, "loss": 0.149, "step": 11239 }, { "epoch": 0.24767665416163986, "grad_norm": 1.0473097562789917, "learning_rate": 2.642433729671139e-05, "loss": 0.0834, "step": 11240 }, { "epoch": 0.24769868945115603, "grad_norm": 1.114424228668213, "learning_rate": 2.642364353833709e-05, "loss": 0.1742, "step": 11241 }, { "epoch": 0.2477207247406722, "grad_norm": 1.0868138074874878, "learning_rate": 2.642294972177604e-05, "loss": 0.125, "step": 11242 }, { "epoch": 0.24774276003018836, "grad_norm": 0.8809233903884888, "learning_rate": 2.6422255847031788e-05, "loss": 0.0817, "step": 11243 }, { "epoch": 0.2477647953197045, "grad_norm": 0.9595130085945129, "learning_rate": 2.6421561914107858e-05, "loss": 0.1051, "step": 11244 }, { "epoch": 0.24778683060922066, "grad_norm": 1.1838051080703735, "learning_rate": 2.642086792300779e-05, "loss": 0.1175, "step": 11245 }, { "epoch": 0.24780886589873682, "grad_norm": 1.2487003803253174, "learning_rate": 2.642017387373512e-05, "loss": 0.1381, "step": 11246 }, { "epoch": 0.247830901188253, "grad_norm": 0.5671594738960266, "learning_rate": 2.641947976629338e-05, "loss": 0.0799, "step": 11247 }, { "epoch": 0.24785293647776915, "grad_norm": 0.7280190587043762, "learning_rate": 2.6418785600686107e-05, "loss": 0.0689, "step": 11248 }, { "epoch": 0.24787497176728532, "grad_norm": 1.2922011613845825, "learning_rate": 2.641809137691683e-05, "loss": 0.121, "step": 11249 }, { "epoch": 0.24789700705680145, "grad_norm": 0.8345125317573547, "learning_rate": 2.6417397094989094e-05, "loss": 0.0737, "step": 11250 }, { "epoch": 0.24791904234631762, "grad_norm": 0.6087179780006409, "learning_rate": 2.641670275490643e-05, "loss": 0.0891, "step": 11251 }, { "epoch": 0.24794107763583378, "grad_norm": 0.900104284286499, "learning_rate": 2.6416008356672382e-05, "loss": 0.1044, "step": 11252 }, { "epoch": 0.24796311292534995, "grad_norm": 0.6682727932929993, "learning_rate": 2.6415313900290476e-05, "loss": 0.1236, "step": 11253 }, { "epoch": 0.2479851482148661, "grad_norm": 0.8270304203033447, "learning_rate": 2.641461938576426e-05, "loss": 0.1072, "step": 11254 }, { "epoch": 0.24800718350438228, "grad_norm": 0.4864620268344879, "learning_rate": 2.6413924813097267e-05, "loss": 0.0953, "step": 11255 }, { "epoch": 0.24802921879389841, "grad_norm": 0.9283967018127441, "learning_rate": 2.641323018229303e-05, "loss": 0.0498, "step": 11256 }, { "epoch": 0.24805125408341458, "grad_norm": 0.8768844604492188, "learning_rate": 2.6412535493355086e-05, "loss": 0.1004, "step": 11257 }, { "epoch": 0.24807328937293074, "grad_norm": 1.0106537342071533, "learning_rate": 2.641184074628699e-05, "loss": 0.085, "step": 11258 }, { "epoch": 0.2480953246624469, "grad_norm": 0.6891992092132568, "learning_rate": 2.6411145941092265e-05, "loss": 0.1064, "step": 11259 }, { "epoch": 0.24811735995196307, "grad_norm": 1.5646029710769653, "learning_rate": 2.6410451077774457e-05, "loss": 0.0979, "step": 11260 }, { "epoch": 0.24813939524147924, "grad_norm": 0.7400856614112854, "learning_rate": 2.6409756156337097e-05, "loss": 0.1107, "step": 11261 }, { "epoch": 0.2481614305309954, "grad_norm": 0.7846018075942993, "learning_rate": 2.6409061176783736e-05, "loss": 0.1234, "step": 11262 }, { "epoch": 0.24818346582051154, "grad_norm": 0.5376316905021667, "learning_rate": 2.6408366139117904e-05, "loss": 0.0756, "step": 11263 }, { "epoch": 0.2482055011100277, "grad_norm": 0.4466281533241272, "learning_rate": 2.640767104334315e-05, "loss": 0.0815, "step": 11264 }, { "epoch": 0.24822753639954387, "grad_norm": 0.6151078343391418, "learning_rate": 2.640697588946301e-05, "loss": 0.0994, "step": 11265 }, { "epoch": 0.24824957168906003, "grad_norm": 1.1093388795852661, "learning_rate": 2.640628067748102e-05, "loss": 0.1025, "step": 11266 }, { "epoch": 0.2482716069785762, "grad_norm": 0.6802314519882202, "learning_rate": 2.640558540740073e-05, "loss": 0.072, "step": 11267 }, { "epoch": 0.24829364226809236, "grad_norm": 0.8540846109390259, "learning_rate": 2.6404890079225673e-05, "loss": 0.1014, "step": 11268 }, { "epoch": 0.2483156775576085, "grad_norm": 0.7824400663375854, "learning_rate": 2.64041946929594e-05, "loss": 0.0952, "step": 11269 }, { "epoch": 0.24833771284712466, "grad_norm": 0.8022063374519348, "learning_rate": 2.6403499248605447e-05, "loss": 0.1047, "step": 11270 }, { "epoch": 0.24835974813664083, "grad_norm": 0.7321067452430725, "learning_rate": 2.6402803746167358e-05, "loss": 0.1081, "step": 11271 }, { "epoch": 0.248381783426157, "grad_norm": 1.4289308786392212, "learning_rate": 2.6402108185648674e-05, "loss": 0.1146, "step": 11272 }, { "epoch": 0.24840381871567316, "grad_norm": 0.6052084565162659, "learning_rate": 2.640141256705294e-05, "loss": 0.1094, "step": 11273 }, { "epoch": 0.24842585400518932, "grad_norm": 0.7301265001296997, "learning_rate": 2.6400716890383694e-05, "loss": 0.1013, "step": 11274 }, { "epoch": 0.24844788929470546, "grad_norm": 1.039745569229126, "learning_rate": 2.6400021155644488e-05, "loss": 0.0956, "step": 11275 }, { "epoch": 0.24846992458422162, "grad_norm": 1.1664130687713623, "learning_rate": 2.6399325362838857e-05, "loss": 0.1032, "step": 11276 }, { "epoch": 0.2484919598737378, "grad_norm": 0.8249202966690063, "learning_rate": 2.639862951197035e-05, "loss": 0.1006, "step": 11277 }, { "epoch": 0.24851399516325395, "grad_norm": 1.1220940351486206, "learning_rate": 2.6397933603042512e-05, "loss": 0.1225, "step": 11278 }, { "epoch": 0.24853603045277012, "grad_norm": 1.2888615131378174, "learning_rate": 2.6397237636058883e-05, "loss": 0.1376, "step": 11279 }, { "epoch": 0.24855806574228628, "grad_norm": 0.798991858959198, "learning_rate": 2.6396541611023015e-05, "loss": 0.1012, "step": 11280 }, { "epoch": 0.24858010103180242, "grad_norm": 1.0461983680725098, "learning_rate": 2.6395845527938447e-05, "loss": 0.1206, "step": 11281 }, { "epoch": 0.24860213632131858, "grad_norm": 1.2180595397949219, "learning_rate": 2.6395149386808727e-05, "loss": 0.1703, "step": 11282 }, { "epoch": 0.24862417161083475, "grad_norm": 0.8079507350921631, "learning_rate": 2.6394453187637402e-05, "loss": 0.1181, "step": 11283 }, { "epoch": 0.2486462069003509, "grad_norm": 1.0542535781860352, "learning_rate": 2.6393756930428014e-05, "loss": 0.1146, "step": 11284 }, { "epoch": 0.24866824218986708, "grad_norm": 0.7860861420631409, "learning_rate": 2.6393060615184113e-05, "loss": 0.123, "step": 11285 }, { "epoch": 0.24869027747938324, "grad_norm": 0.4576812982559204, "learning_rate": 2.6392364241909245e-05, "loss": 0.0743, "step": 11286 }, { "epoch": 0.24871231276889938, "grad_norm": 0.6471774578094482, "learning_rate": 2.639166781060696e-05, "loss": 0.1107, "step": 11287 }, { "epoch": 0.24873434805841554, "grad_norm": 0.8147375583648682, "learning_rate": 2.63909713212808e-05, "loss": 0.0987, "step": 11288 }, { "epoch": 0.2487563833479317, "grad_norm": 0.605029821395874, "learning_rate": 2.6390274773934315e-05, "loss": 0.0718, "step": 11289 }, { "epoch": 0.24877841863744787, "grad_norm": 0.8533700704574585, "learning_rate": 2.638957816857105e-05, "loss": 0.0901, "step": 11290 }, { "epoch": 0.24880045392696404, "grad_norm": 0.9022535681724548, "learning_rate": 2.638888150519456e-05, "loss": 0.1053, "step": 11291 }, { "epoch": 0.2488224892164802, "grad_norm": 0.9690940976142883, "learning_rate": 2.6388184783808387e-05, "loss": 0.0543, "step": 11292 }, { "epoch": 0.24884452450599634, "grad_norm": 0.7212982177734375, "learning_rate": 2.6387488004416082e-05, "loss": 0.0929, "step": 11293 }, { "epoch": 0.2488665597955125, "grad_norm": 0.7815033793449402, "learning_rate": 2.6386791167021194e-05, "loss": 0.1227, "step": 11294 }, { "epoch": 0.24888859508502867, "grad_norm": 0.3143764138221741, "learning_rate": 2.638609427162727e-05, "loss": 0.0661, "step": 11295 }, { "epoch": 0.24891063037454483, "grad_norm": 0.44405055046081543, "learning_rate": 2.638539731823787e-05, "loss": 0.077, "step": 11296 }, { "epoch": 0.248932665664061, "grad_norm": 1.5185340642929077, "learning_rate": 2.6384700306856528e-05, "loss": 0.1387, "step": 11297 }, { "epoch": 0.24895470095357716, "grad_norm": 0.7609834671020508, "learning_rate": 2.6384003237486803e-05, "loss": 0.1228, "step": 11298 }, { "epoch": 0.24897673624309333, "grad_norm": 0.8493061661720276, "learning_rate": 2.638330611013225e-05, "loss": 0.1217, "step": 11299 }, { "epoch": 0.24899877153260946, "grad_norm": 0.7080199718475342, "learning_rate": 2.6382608924796412e-05, "loss": 0.1118, "step": 11300 }, { "epoch": 0.24902080682212563, "grad_norm": 0.4886351227760315, "learning_rate": 2.6381911681482844e-05, "loss": 0.0991, "step": 11301 }, { "epoch": 0.2490428421116418, "grad_norm": 1.0615829229354858, "learning_rate": 2.6381214380195095e-05, "loss": 0.0846, "step": 11302 }, { "epoch": 0.24906487740115796, "grad_norm": 0.886533796787262, "learning_rate": 2.6380517020936716e-05, "loss": 0.125, "step": 11303 }, { "epoch": 0.24908691269067412, "grad_norm": 0.9572031497955322, "learning_rate": 2.6379819603711267e-05, "loss": 0.092, "step": 11304 }, { "epoch": 0.2491089479801903, "grad_norm": 0.49551019072532654, "learning_rate": 2.6379122128522288e-05, "loss": 0.1207, "step": 11305 }, { "epoch": 0.24913098326970642, "grad_norm": 1.2020487785339355, "learning_rate": 2.637842459537334e-05, "loss": 0.1329, "step": 11306 }, { "epoch": 0.2491530185592226, "grad_norm": 0.614549458026886, "learning_rate": 2.6377727004267976e-05, "loss": 0.0876, "step": 11307 }, { "epoch": 0.24917505384873875, "grad_norm": 1.4117785692214966, "learning_rate": 2.637702935520975e-05, "loss": 0.1142, "step": 11308 }, { "epoch": 0.24919708913825492, "grad_norm": 0.9657573699951172, "learning_rate": 2.6376331648202207e-05, "loss": 0.105, "step": 11309 }, { "epoch": 0.24921912442777108, "grad_norm": 1.006050944328308, "learning_rate": 2.6375633883248907e-05, "loss": 0.1379, "step": 11310 }, { "epoch": 0.24924115971728725, "grad_norm": 0.7465881705284119, "learning_rate": 2.6374936060353404e-05, "loss": 0.1783, "step": 11311 }, { "epoch": 0.24926319500680338, "grad_norm": 1.178389072418213, "learning_rate": 2.6374238179519254e-05, "loss": 0.1385, "step": 11312 }, { "epoch": 0.24928523029631955, "grad_norm": 1.2708381414413452, "learning_rate": 2.637354024075001e-05, "loss": 0.0865, "step": 11313 }, { "epoch": 0.2493072655858357, "grad_norm": 0.6536512970924377, "learning_rate": 2.6372842244049224e-05, "loss": 0.1138, "step": 11314 }, { "epoch": 0.24932930087535188, "grad_norm": 1.0139870643615723, "learning_rate": 2.6372144189420455e-05, "loss": 0.0972, "step": 11315 }, { "epoch": 0.24935133616486804, "grad_norm": 1.101012110710144, "learning_rate": 2.637144607686726e-05, "loss": 0.1007, "step": 11316 }, { "epoch": 0.2493733714543842, "grad_norm": 0.7399787306785583, "learning_rate": 2.6370747906393186e-05, "loss": 0.1233, "step": 11317 }, { "epoch": 0.24939540674390034, "grad_norm": 1.147584080696106, "learning_rate": 2.63700496780018e-05, "loss": 0.1131, "step": 11318 }, { "epoch": 0.2494174420334165, "grad_norm": 0.7102985382080078, "learning_rate": 2.6369351391696652e-05, "loss": 0.1126, "step": 11319 }, { "epoch": 0.24943947732293267, "grad_norm": 0.6653561592102051, "learning_rate": 2.6368653047481298e-05, "loss": 0.111, "step": 11320 }, { "epoch": 0.24946151261244884, "grad_norm": 0.9075579047203064, "learning_rate": 2.6367954645359305e-05, "loss": 0.0938, "step": 11321 }, { "epoch": 0.249483547901965, "grad_norm": 1.0786594152450562, "learning_rate": 2.6367256185334218e-05, "loss": 0.1248, "step": 11322 }, { "epoch": 0.24950558319148117, "grad_norm": 1.0901848077774048, "learning_rate": 2.6366557667409597e-05, "loss": 0.0915, "step": 11323 }, { "epoch": 0.2495276184809973, "grad_norm": 1.0810285806655884, "learning_rate": 2.6365859091589007e-05, "loss": 0.0923, "step": 11324 }, { "epoch": 0.24954965377051347, "grad_norm": 1.247827172279358, "learning_rate": 2.6365160457876e-05, "loss": 0.1287, "step": 11325 }, { "epoch": 0.24957168906002963, "grad_norm": 1.1984835863113403, "learning_rate": 2.636446176627414e-05, "loss": 0.1341, "step": 11326 }, { "epoch": 0.2495937243495458, "grad_norm": 0.7887274026870728, "learning_rate": 2.6363763016786982e-05, "loss": 0.1287, "step": 11327 }, { "epoch": 0.24961575963906196, "grad_norm": 1.5912998914718628, "learning_rate": 2.636306420941808e-05, "loss": 0.1189, "step": 11328 }, { "epoch": 0.24963779492857813, "grad_norm": 0.6530058979988098, "learning_rate": 2.6362365344171002e-05, "loss": 0.1122, "step": 11329 }, { "epoch": 0.24965983021809426, "grad_norm": 1.3544467687606812, "learning_rate": 2.6361666421049303e-05, "loss": 0.16, "step": 11330 }, { "epoch": 0.24968186550761043, "grad_norm": 0.7066870927810669, "learning_rate": 2.6360967440056547e-05, "loss": 0.1117, "step": 11331 }, { "epoch": 0.2497039007971266, "grad_norm": 0.7729730606079102, "learning_rate": 2.636026840119629e-05, "loss": 0.0851, "step": 11332 }, { "epoch": 0.24972593608664276, "grad_norm": 0.7381036877632141, "learning_rate": 2.6359569304472094e-05, "loss": 0.0932, "step": 11333 }, { "epoch": 0.24974797137615892, "grad_norm": 0.9795123338699341, "learning_rate": 2.635887014988752e-05, "loss": 0.1043, "step": 11334 }, { "epoch": 0.2497700066656751, "grad_norm": 0.6714910268783569, "learning_rate": 2.6358170937446127e-05, "loss": 0.0947, "step": 11335 }, { "epoch": 0.24979204195519125, "grad_norm": 0.5465897917747498, "learning_rate": 2.6357471667151478e-05, "loss": 0.0845, "step": 11336 }, { "epoch": 0.2498140772447074, "grad_norm": 0.9933611154556274, "learning_rate": 2.6356772339007137e-05, "loss": 0.1115, "step": 11337 }, { "epoch": 0.24983611253422355, "grad_norm": 0.8686003088951111, "learning_rate": 2.635607295301667e-05, "loss": 0.1228, "step": 11338 }, { "epoch": 0.24985814782373972, "grad_norm": 0.4654780328273773, "learning_rate": 2.635537350918363e-05, "loss": 0.0616, "step": 11339 }, { "epoch": 0.24988018311325588, "grad_norm": 0.6301536560058594, "learning_rate": 2.6354674007511582e-05, "loss": 0.0785, "step": 11340 }, { "epoch": 0.24990221840277205, "grad_norm": 0.8320651054382324, "learning_rate": 2.635397444800409e-05, "loss": 0.1062, "step": 11341 }, { "epoch": 0.2499242536922882, "grad_norm": 1.1151996850967407, "learning_rate": 2.6353274830664723e-05, "loss": 0.128, "step": 11342 }, { "epoch": 0.24994628898180435, "grad_norm": 1.1268588304519653, "learning_rate": 2.6352575155497032e-05, "loss": 0.1472, "step": 11343 }, { "epoch": 0.2499683242713205, "grad_norm": 1.1915093660354614, "learning_rate": 2.635187542250459e-05, "loss": 0.0831, "step": 11344 }, { "epoch": 0.24999035956083668, "grad_norm": 0.6000493168830872, "learning_rate": 2.635117563169096e-05, "loss": 0.0838, "step": 11345 }, { "epoch": 0.2500123948503528, "grad_norm": 0.5483796000480652, "learning_rate": 2.6350475783059706e-05, "loss": 0.1004, "step": 11346 }, { "epoch": 0.250034430139869, "grad_norm": 0.9713258147239685, "learning_rate": 2.6349775876614392e-05, "loss": 0.1162, "step": 11347 }, { "epoch": 0.25005646542938514, "grad_norm": 1.105763554573059, "learning_rate": 2.634907591235858e-05, "loss": 0.1217, "step": 11348 }, { "epoch": 0.2500785007189013, "grad_norm": 0.8041632175445557, "learning_rate": 2.6348375890295844e-05, "loss": 0.1257, "step": 11349 }, { "epoch": 0.2501005360084175, "grad_norm": 0.724236786365509, "learning_rate": 2.634767581042974e-05, "loss": 0.0831, "step": 11350 }, { "epoch": 0.25012257129793364, "grad_norm": 1.128684401512146, "learning_rate": 2.6346975672763845e-05, "loss": 0.1113, "step": 11351 }, { "epoch": 0.2501446065874498, "grad_norm": 0.794823169708252, "learning_rate": 2.6346275477301714e-05, "loss": 0.0882, "step": 11352 }, { "epoch": 0.25016664187696597, "grad_norm": 0.9176578521728516, "learning_rate": 2.6345575224046912e-05, "loss": 0.119, "step": 11353 }, { "epoch": 0.25018867716648213, "grad_norm": 1.378374695777893, "learning_rate": 2.6344874913003015e-05, "loss": 0.1092, "step": 11354 }, { "epoch": 0.2502107124559983, "grad_norm": 0.6814283132553101, "learning_rate": 2.6344174544173593e-05, "loss": 0.1256, "step": 11355 }, { "epoch": 0.25023274774551446, "grad_norm": 0.7531617879867554, "learning_rate": 2.63434741175622e-05, "loss": 0.1084, "step": 11356 }, { "epoch": 0.2502547830350306, "grad_norm": 0.9479734897613525, "learning_rate": 2.6342773633172413e-05, "loss": 0.099, "step": 11357 }, { "epoch": 0.25027681832454673, "grad_norm": 1.2743242979049683, "learning_rate": 2.6342073091007794e-05, "loss": 0.1153, "step": 11358 }, { "epoch": 0.2502988536140629, "grad_norm": 0.8918540477752686, "learning_rate": 2.6341372491071917e-05, "loss": 0.1113, "step": 11359 }, { "epoch": 0.25032088890357906, "grad_norm": 0.9049042463302612, "learning_rate": 2.6340671833368348e-05, "loss": 0.082, "step": 11360 }, { "epoch": 0.25034292419309523, "grad_norm": 0.8522199392318726, "learning_rate": 2.633997111790066e-05, "loss": 0.1063, "step": 11361 }, { "epoch": 0.2503649594826114, "grad_norm": 1.1292016506195068, "learning_rate": 2.6339270344672412e-05, "loss": 0.1169, "step": 11362 }, { "epoch": 0.25038699477212756, "grad_norm": 1.1196233034133911, "learning_rate": 2.633856951368718e-05, "loss": 0.1186, "step": 11363 }, { "epoch": 0.2504090300616437, "grad_norm": 0.7231181263923645, "learning_rate": 2.6337868624948535e-05, "loss": 0.1033, "step": 11364 }, { "epoch": 0.2504310653511599, "grad_norm": 0.9615692496299744, "learning_rate": 2.6337167678460047e-05, "loss": 0.1286, "step": 11365 }, { "epoch": 0.25045310064067605, "grad_norm": 0.47687068581581116, "learning_rate": 2.633646667422528e-05, "loss": 0.1148, "step": 11366 }, { "epoch": 0.2504751359301922, "grad_norm": 0.7724563479423523, "learning_rate": 2.6335765612247812e-05, "loss": 0.1019, "step": 11367 }, { "epoch": 0.2504971712197084, "grad_norm": 1.4171252250671387, "learning_rate": 2.6335064492531213e-05, "loss": 0.1143, "step": 11368 }, { "epoch": 0.25051920650922455, "grad_norm": 0.7938097715377808, "learning_rate": 2.6334363315079047e-05, "loss": 0.0795, "step": 11369 }, { "epoch": 0.2505412417987407, "grad_norm": 0.7815518975257874, "learning_rate": 2.6333662079894893e-05, "loss": 0.0891, "step": 11370 }, { "epoch": 0.2505632770882568, "grad_norm": 0.7408329844474792, "learning_rate": 2.6332960786982324e-05, "loss": 0.0823, "step": 11371 }, { "epoch": 0.250585312377773, "grad_norm": 0.6960488557815552, "learning_rate": 2.6332259436344906e-05, "loss": 0.0997, "step": 11372 }, { "epoch": 0.25060734766728915, "grad_norm": 0.8666942715644836, "learning_rate": 2.633155802798621e-05, "loss": 0.0622, "step": 11373 }, { "epoch": 0.2506293829568053, "grad_norm": 1.1589939594268799, "learning_rate": 2.633085656190982e-05, "loss": 0.1689, "step": 11374 }, { "epoch": 0.2506514182463215, "grad_norm": 1.2589348554611206, "learning_rate": 2.63301550381193e-05, "loss": 0.1592, "step": 11375 }, { "epoch": 0.25067345353583764, "grad_norm": 0.7223392724990845, "learning_rate": 2.632945345661822e-05, "loss": 0.0967, "step": 11376 }, { "epoch": 0.2506954888253538, "grad_norm": 1.0633777379989624, "learning_rate": 2.632875181741016e-05, "loss": 0.1089, "step": 11377 }, { "epoch": 0.25071752411486997, "grad_norm": 0.7689091563224792, "learning_rate": 2.632805012049869e-05, "loss": 0.1206, "step": 11378 }, { "epoch": 0.25073955940438614, "grad_norm": 0.6367802023887634, "learning_rate": 2.632734836588739e-05, "loss": 0.0959, "step": 11379 }, { "epoch": 0.2507615946939023, "grad_norm": 1.0764397382736206, "learning_rate": 2.6326646553579828e-05, "loss": 0.0987, "step": 11380 }, { "epoch": 0.25078362998341847, "grad_norm": 1.1366642713546753, "learning_rate": 2.6325944683579585e-05, "loss": 0.1015, "step": 11381 }, { "epoch": 0.25080566527293463, "grad_norm": 0.698366641998291, "learning_rate": 2.632524275589023e-05, "loss": 0.1189, "step": 11382 }, { "epoch": 0.25082770056245074, "grad_norm": 0.8972684741020203, "learning_rate": 2.632454077051534e-05, "loss": 0.1005, "step": 11383 }, { "epoch": 0.2508497358519669, "grad_norm": 1.2592921257019043, "learning_rate": 2.6323838727458493e-05, "loss": 0.1078, "step": 11384 }, { "epoch": 0.25087177114148307, "grad_norm": 0.7425583600997925, "learning_rate": 2.632313662672326e-05, "loss": 0.0985, "step": 11385 }, { "epoch": 0.25089380643099923, "grad_norm": 0.8470374345779419, "learning_rate": 2.6322434468313217e-05, "loss": 0.1133, "step": 11386 }, { "epoch": 0.2509158417205154, "grad_norm": 0.9758893251419067, "learning_rate": 2.632173225223195e-05, "loss": 0.0972, "step": 11387 }, { "epoch": 0.25093787701003156, "grad_norm": 0.8096317052841187, "learning_rate": 2.632102997848303e-05, "loss": 0.1188, "step": 11388 }, { "epoch": 0.2509599122995477, "grad_norm": 1.1083298921585083, "learning_rate": 2.632032764707003e-05, "loss": 0.0973, "step": 11389 }, { "epoch": 0.2509819475890639, "grad_norm": 0.5875276327133179, "learning_rate": 2.6319625257996532e-05, "loss": 0.1028, "step": 11390 }, { "epoch": 0.25100398287858006, "grad_norm": 0.762715220451355, "learning_rate": 2.6318922811266114e-05, "loss": 0.0758, "step": 11391 }, { "epoch": 0.2510260181680962, "grad_norm": 0.8238139152526855, "learning_rate": 2.631822030688235e-05, "loss": 0.0741, "step": 11392 }, { "epoch": 0.2510480534576124, "grad_norm": 0.9846736192703247, "learning_rate": 2.631751774484882e-05, "loss": 0.1156, "step": 11393 }, { "epoch": 0.25107008874712855, "grad_norm": 1.5349302291870117, "learning_rate": 2.6316815125169107e-05, "loss": 0.1203, "step": 11394 }, { "epoch": 0.25109212403664466, "grad_norm": 0.8364424705505371, "learning_rate": 2.6316112447846783e-05, "loss": 0.0922, "step": 11395 }, { "epoch": 0.2511141593261608, "grad_norm": 0.60049968957901, "learning_rate": 2.6315409712885432e-05, "loss": 0.0891, "step": 11396 }, { "epoch": 0.251136194615677, "grad_norm": 0.8900742530822754, "learning_rate": 2.631470692028863e-05, "loss": 0.102, "step": 11397 }, { "epoch": 0.25115822990519315, "grad_norm": 1.1552402973175049, "learning_rate": 2.631400407005996e-05, "loss": 0.1165, "step": 11398 }, { "epoch": 0.2511802651947093, "grad_norm": 1.265060544013977, "learning_rate": 2.6313301162202998e-05, "loss": 0.084, "step": 11399 }, { "epoch": 0.2512023004842255, "grad_norm": 0.6871063709259033, "learning_rate": 2.631259819672133e-05, "loss": 0.147, "step": 11400 }, { "epoch": 0.25122433577374165, "grad_norm": 0.5992005467414856, "learning_rate": 2.631189517361853e-05, "loss": 0.0959, "step": 11401 }, { "epoch": 0.2512463710632578, "grad_norm": 1.2173821926116943, "learning_rate": 2.631119209289818e-05, "loss": 0.1156, "step": 11402 }, { "epoch": 0.251268406352774, "grad_norm": 0.6692683696746826, "learning_rate": 2.6310488954563867e-05, "loss": 0.0612, "step": 11403 }, { "epoch": 0.25129044164229014, "grad_norm": 0.8855447769165039, "learning_rate": 2.630978575861917e-05, "loss": 0.096, "step": 11404 }, { "epoch": 0.2513124769318063, "grad_norm": 1.1982898712158203, "learning_rate": 2.6309082505067667e-05, "loss": 0.0986, "step": 11405 }, { "epoch": 0.25133451222132247, "grad_norm": 0.7164506912231445, "learning_rate": 2.6308379193912936e-05, "loss": 0.1152, "step": 11406 }, { "epoch": 0.25135654751083863, "grad_norm": 0.8055424094200134, "learning_rate": 2.6307675825158576e-05, "loss": 0.1344, "step": 11407 }, { "epoch": 0.25137858280035474, "grad_norm": 1.3067867755889893, "learning_rate": 2.6306972398808154e-05, "loss": 0.1216, "step": 11408 }, { "epoch": 0.2514006180898709, "grad_norm": 0.5971179604530334, "learning_rate": 2.630626891486526e-05, "loss": 0.0918, "step": 11409 }, { "epoch": 0.2514226533793871, "grad_norm": 1.0712871551513672, "learning_rate": 2.6305565373333473e-05, "loss": 0.1836, "step": 11410 }, { "epoch": 0.25144468866890324, "grad_norm": 0.6667041778564453, "learning_rate": 2.6304861774216384e-05, "loss": 0.1051, "step": 11411 }, { "epoch": 0.2514667239584194, "grad_norm": 0.5287922620773315, "learning_rate": 2.6304158117517568e-05, "loss": 0.0633, "step": 11412 }, { "epoch": 0.25148875924793557, "grad_norm": 0.9339732527732849, "learning_rate": 2.630345440324061e-05, "loss": 0.1366, "step": 11413 }, { "epoch": 0.25151079453745173, "grad_norm": 0.8901476263999939, "learning_rate": 2.63027506313891e-05, "loss": 0.0958, "step": 11414 }, { "epoch": 0.2515328298269679, "grad_norm": 0.49826082587242126, "learning_rate": 2.6302046801966623e-05, "loss": 0.1089, "step": 11415 }, { "epoch": 0.25155486511648406, "grad_norm": 0.99006187915802, "learning_rate": 2.6301342914976763e-05, "loss": 0.1261, "step": 11416 }, { "epoch": 0.2515769004060002, "grad_norm": 0.6735267639160156, "learning_rate": 2.6300638970423097e-05, "loss": 0.1085, "step": 11417 }, { "epoch": 0.2515989356955164, "grad_norm": 0.9137939214706421, "learning_rate": 2.629993496830922e-05, "loss": 0.0898, "step": 11418 }, { "epoch": 0.25162097098503255, "grad_norm": 0.6901825070381165, "learning_rate": 2.6299230908638713e-05, "loss": 0.0895, "step": 11419 }, { "epoch": 0.25164300627454866, "grad_norm": 0.6798363924026489, "learning_rate": 2.6298526791415165e-05, "loss": 0.1078, "step": 11420 }, { "epoch": 0.25166504156406483, "grad_norm": 0.8716544508934021, "learning_rate": 2.629782261664216e-05, "loss": 0.1172, "step": 11421 }, { "epoch": 0.251687076853581, "grad_norm": 0.6519979238510132, "learning_rate": 2.6297118384323285e-05, "loss": 0.0984, "step": 11422 }, { "epoch": 0.25170911214309716, "grad_norm": 0.8954731822013855, "learning_rate": 2.6296414094462137e-05, "loss": 0.1331, "step": 11423 }, { "epoch": 0.2517311474326133, "grad_norm": 0.7017858028411865, "learning_rate": 2.6295709747062284e-05, "loss": 0.0757, "step": 11424 }, { "epoch": 0.2517531827221295, "grad_norm": 1.1421294212341309, "learning_rate": 2.629500534212733e-05, "loss": 0.0821, "step": 11425 }, { "epoch": 0.25177521801164565, "grad_norm": 4.69108772277832, "learning_rate": 2.6294300879660853e-05, "loss": 0.0686, "step": 11426 }, { "epoch": 0.2517972533011618, "grad_norm": 7.216239929199219, "learning_rate": 2.6293596359666447e-05, "loss": 0.118, "step": 11427 }, { "epoch": 0.251819288590678, "grad_norm": 1.4516547918319702, "learning_rate": 2.6292891782147695e-05, "loss": 0.0909, "step": 11428 }, { "epoch": 0.25184132388019415, "grad_norm": 2.2170569896698, "learning_rate": 2.6292187147108193e-05, "loss": 0.1304, "step": 11429 }, { "epoch": 0.2518633591697103, "grad_norm": 14.055523872375488, "learning_rate": 2.629148245455153e-05, "loss": 0.112, "step": 11430 }, { "epoch": 0.2518853944592265, "grad_norm": 3.198610782623291, "learning_rate": 2.6290777704481286e-05, "loss": 0.0658, "step": 11431 }, { "epoch": 0.2519074297487426, "grad_norm": 1.5549057722091675, "learning_rate": 2.6290072896901058e-05, "loss": 0.1243, "step": 11432 }, { "epoch": 0.25192946503825875, "grad_norm": 1.1513723134994507, "learning_rate": 2.6289368031814437e-05, "loss": 0.0943, "step": 11433 }, { "epoch": 0.2519515003277749, "grad_norm": 0.7824363112449646, "learning_rate": 2.6288663109225007e-05, "loss": 0.1237, "step": 11434 }, { "epoch": 0.2519735356172911, "grad_norm": 0.9596419930458069, "learning_rate": 2.6287958129136368e-05, "loss": 0.1663, "step": 11435 }, { "epoch": 0.25199557090680724, "grad_norm": 1.8304826021194458, "learning_rate": 2.6287253091552098e-05, "loss": 0.131, "step": 11436 }, { "epoch": 0.2520176061963234, "grad_norm": 1.153978943824768, "learning_rate": 2.6286547996475802e-05, "loss": 0.1356, "step": 11437 }, { "epoch": 0.25203964148583957, "grad_norm": 0.8210124969482422, "learning_rate": 2.628584284391106e-05, "loss": 0.0605, "step": 11438 }, { "epoch": 0.25206167677535574, "grad_norm": 1.0199843645095825, "learning_rate": 2.628513763386147e-05, "loss": 0.1002, "step": 11439 }, { "epoch": 0.2520837120648719, "grad_norm": 0.9090614914894104, "learning_rate": 2.6284432366330623e-05, "loss": 0.1238, "step": 11440 }, { "epoch": 0.25210574735438807, "grad_norm": 0.9481558799743652, "learning_rate": 2.628372704132211e-05, "loss": 0.1105, "step": 11441 }, { "epoch": 0.25212778264390423, "grad_norm": 0.7327084541320801, "learning_rate": 2.6283021658839522e-05, "loss": 0.101, "step": 11442 }, { "epoch": 0.2521498179334204, "grad_norm": 0.6087390780448914, "learning_rate": 2.628231621888646e-05, "loss": 0.0936, "step": 11443 }, { "epoch": 0.25217185322293656, "grad_norm": 0.5592333674430847, "learning_rate": 2.6281610721466506e-05, "loss": 0.077, "step": 11444 }, { "epoch": 0.25219388851245267, "grad_norm": 1.6719692945480347, "learning_rate": 2.628090516658326e-05, "loss": 0.0841, "step": 11445 }, { "epoch": 0.25221592380196883, "grad_norm": 0.605931282043457, "learning_rate": 2.6280199554240315e-05, "loss": 0.1133, "step": 11446 }, { "epoch": 0.252237959091485, "grad_norm": 0.7643113732337952, "learning_rate": 2.627949388444127e-05, "loss": 0.0932, "step": 11447 }, { "epoch": 0.25225999438100116, "grad_norm": 0.7066969275474548, "learning_rate": 2.6278788157189708e-05, "loss": 0.0781, "step": 11448 }, { "epoch": 0.2522820296705173, "grad_norm": 0.7974853515625, "learning_rate": 2.627808237248923e-05, "loss": 0.1302, "step": 11449 }, { "epoch": 0.2523040649600335, "grad_norm": 0.7444814443588257, "learning_rate": 2.6277376530343433e-05, "loss": 0.1285, "step": 11450 }, { "epoch": 0.25232610024954966, "grad_norm": 1.0379570722579956, "learning_rate": 2.6276670630755908e-05, "loss": 0.1096, "step": 11451 }, { "epoch": 0.2523481355390658, "grad_norm": 0.5217885971069336, "learning_rate": 2.6275964673730256e-05, "loss": 0.0703, "step": 11452 }, { "epoch": 0.252370170828582, "grad_norm": 0.7640361785888672, "learning_rate": 2.6275258659270067e-05, "loss": 0.0481, "step": 11453 }, { "epoch": 0.25239220611809815, "grad_norm": 0.7870277166366577, "learning_rate": 2.627455258737894e-05, "loss": 0.1076, "step": 11454 }, { "epoch": 0.2524142414076143, "grad_norm": 1.035750389099121, "learning_rate": 2.6273846458060468e-05, "loss": 0.1248, "step": 11455 }, { "epoch": 0.2524362766971305, "grad_norm": 0.8831973671913147, "learning_rate": 2.6273140271318253e-05, "loss": 0.186, "step": 11456 }, { "epoch": 0.2524583119866466, "grad_norm": 0.7131795287132263, "learning_rate": 2.6272434027155886e-05, "loss": 0.0876, "step": 11457 }, { "epoch": 0.25248034727616275, "grad_norm": 1.8896336555480957, "learning_rate": 2.627172772557697e-05, "loss": 0.0812, "step": 11458 }, { "epoch": 0.2525023825656789, "grad_norm": 1.2878156900405884, "learning_rate": 2.6271021366585103e-05, "loss": 0.1217, "step": 11459 }, { "epoch": 0.2525244178551951, "grad_norm": 1.2207175493240356, "learning_rate": 2.6270314950183877e-05, "loss": 0.0974, "step": 11460 }, { "epoch": 0.25254645314471125, "grad_norm": 0.7905603051185608, "learning_rate": 2.6269608476376895e-05, "loss": 0.1381, "step": 11461 }, { "epoch": 0.2525684884342274, "grad_norm": 0.8802785277366638, "learning_rate": 2.6268901945167753e-05, "loss": 0.1547, "step": 11462 }, { "epoch": 0.2525905237237436, "grad_norm": 1.1242998838424683, "learning_rate": 2.626819535656005e-05, "loss": 0.1124, "step": 11463 }, { "epoch": 0.25261255901325974, "grad_norm": 0.9236152768135071, "learning_rate": 2.626748871055739e-05, "loss": 0.0943, "step": 11464 }, { "epoch": 0.2526345943027759, "grad_norm": 0.5679746270179749, "learning_rate": 2.6266782007163362e-05, "loss": 0.0854, "step": 11465 }, { "epoch": 0.25265662959229207, "grad_norm": 0.6069711446762085, "learning_rate": 2.6266075246381573e-05, "loss": 0.1314, "step": 11466 }, { "epoch": 0.25267866488180823, "grad_norm": 0.7206109762191772, "learning_rate": 2.6265368428215623e-05, "loss": 0.084, "step": 11467 }, { "epoch": 0.2527007001713244, "grad_norm": 1.1413885354995728, "learning_rate": 2.6264661552669113e-05, "loss": 0.0944, "step": 11468 }, { "epoch": 0.2527227354608405, "grad_norm": 1.1707552671432495, "learning_rate": 2.626395461974563e-05, "loss": 0.137, "step": 11469 }, { "epoch": 0.2527447707503567, "grad_norm": 0.7324046492576599, "learning_rate": 2.62632476294488e-05, "loss": 0.0994, "step": 11470 }, { "epoch": 0.25276680603987284, "grad_norm": 0.8220298886299133, "learning_rate": 2.6262540581782206e-05, "loss": 0.081, "step": 11471 }, { "epoch": 0.252788841329389, "grad_norm": 1.0288413763046265, "learning_rate": 2.626183347674945e-05, "loss": 0.0936, "step": 11472 }, { "epoch": 0.25281087661890517, "grad_norm": 0.46167728304862976, "learning_rate": 2.6261126314354136e-05, "loss": 0.1033, "step": 11473 }, { "epoch": 0.25283291190842133, "grad_norm": 1.0596938133239746, "learning_rate": 2.626041909459987e-05, "loss": 0.0955, "step": 11474 }, { "epoch": 0.2528549471979375, "grad_norm": 0.7082998156547546, "learning_rate": 2.625971181749025e-05, "loss": 0.0948, "step": 11475 }, { "epoch": 0.25287698248745366, "grad_norm": 0.7606220841407776, "learning_rate": 2.625900448302888e-05, "loss": 0.1179, "step": 11476 }, { "epoch": 0.2528990177769698, "grad_norm": 0.809337317943573, "learning_rate": 2.625829709121936e-05, "loss": 0.1216, "step": 11477 }, { "epoch": 0.252921053066486, "grad_norm": 0.4989163875579834, "learning_rate": 2.62575896420653e-05, "loss": 0.0853, "step": 11478 }, { "epoch": 0.25294308835600215, "grad_norm": 0.6081591248512268, "learning_rate": 2.6256882135570296e-05, "loss": 0.1094, "step": 11479 }, { "epoch": 0.2529651236455183, "grad_norm": 0.7597969174385071, "learning_rate": 2.625617457173796e-05, "loss": 0.0946, "step": 11480 }, { "epoch": 0.2529871589350345, "grad_norm": 0.6858100295066833, "learning_rate": 2.6255466950571883e-05, "loss": 0.1315, "step": 11481 }, { "epoch": 0.2530091942245506, "grad_norm": 1.268563151359558, "learning_rate": 2.6254759272075686e-05, "loss": 0.1327, "step": 11482 }, { "epoch": 0.25303122951406676, "grad_norm": 0.9751344323158264, "learning_rate": 2.625405153625296e-05, "loss": 0.0822, "step": 11483 }, { "epoch": 0.2530532648035829, "grad_norm": 0.8757258653640747, "learning_rate": 2.6253343743107313e-05, "loss": 0.1045, "step": 11484 }, { "epoch": 0.2530753000930991, "grad_norm": 0.8392258882522583, "learning_rate": 2.6252635892642354e-05, "loss": 0.0789, "step": 11485 }, { "epoch": 0.25309733538261525, "grad_norm": 0.8248998522758484, "learning_rate": 2.6251927984861687e-05, "loss": 0.0942, "step": 11486 }, { "epoch": 0.2531193706721314, "grad_norm": 1.1292861700057983, "learning_rate": 2.6251220019768914e-05, "loss": 0.1211, "step": 11487 }, { "epoch": 0.2531414059616476, "grad_norm": 0.8259032964706421, "learning_rate": 2.6250511997367647e-05, "loss": 0.1036, "step": 11488 }, { "epoch": 0.25316344125116375, "grad_norm": 1.2484445571899414, "learning_rate": 2.6249803917661487e-05, "loss": 0.1263, "step": 11489 }, { "epoch": 0.2531854765406799, "grad_norm": 1.02527916431427, "learning_rate": 2.6249095780654047e-05, "loss": 0.0913, "step": 11490 }, { "epoch": 0.2532075118301961, "grad_norm": 0.8099772334098816, "learning_rate": 2.6248387586348927e-05, "loss": 0.0744, "step": 11491 }, { "epoch": 0.25322954711971224, "grad_norm": 0.8493970036506653, "learning_rate": 2.6247679334749733e-05, "loss": 0.0879, "step": 11492 }, { "epoch": 0.2532515824092284, "grad_norm": 1.1030595302581787, "learning_rate": 2.6246971025860084e-05, "loss": 0.1366, "step": 11493 }, { "epoch": 0.2532736176987445, "grad_norm": 0.5932806134223938, "learning_rate": 2.6246262659683577e-05, "loss": 0.0881, "step": 11494 }, { "epoch": 0.2532956529882607, "grad_norm": 0.8494893312454224, "learning_rate": 2.6245554236223827e-05, "loss": 0.0886, "step": 11495 }, { "epoch": 0.25331768827777684, "grad_norm": 1.1120107173919678, "learning_rate": 2.6244845755484433e-05, "loss": 0.0665, "step": 11496 }, { "epoch": 0.253339723567293, "grad_norm": 0.7880803942680359, "learning_rate": 2.6244137217469014e-05, "loss": 0.1048, "step": 11497 }, { "epoch": 0.25336175885680917, "grad_norm": 0.5772904753684998, "learning_rate": 2.6243428622181177e-05, "loss": 0.0654, "step": 11498 }, { "epoch": 0.25338379414632534, "grad_norm": 0.9813472032546997, "learning_rate": 2.624271996962452e-05, "loss": 0.086, "step": 11499 }, { "epoch": 0.2534058294358415, "grad_norm": 1.283948540687561, "learning_rate": 2.6242011259802668e-05, "loss": 0.1098, "step": 11500 }, { "epoch": 0.25342786472535767, "grad_norm": 0.8294695615768433, "learning_rate": 2.6241302492719222e-05, "loss": 0.1065, "step": 11501 }, { "epoch": 0.25344990001487383, "grad_norm": 0.852048397064209, "learning_rate": 2.6240593668377796e-05, "loss": 0.103, "step": 11502 }, { "epoch": 0.25347193530439, "grad_norm": 0.8254604935646057, "learning_rate": 2.6239884786781995e-05, "loss": 0.1492, "step": 11503 }, { "epoch": 0.25349397059390616, "grad_norm": 1.1550076007843018, "learning_rate": 2.623917584793544e-05, "loss": 0.0889, "step": 11504 }, { "epoch": 0.2535160058834223, "grad_norm": 0.6433809399604797, "learning_rate": 2.623846685184173e-05, "loss": 0.0666, "step": 11505 }, { "epoch": 0.2535380411729385, "grad_norm": 0.4938584268093109, "learning_rate": 2.6237757798504482e-05, "loss": 0.088, "step": 11506 }, { "epoch": 0.2535600764624546, "grad_norm": 0.9442456960678101, "learning_rate": 2.6237048687927313e-05, "loss": 0.091, "step": 11507 }, { "epoch": 0.25358211175197076, "grad_norm": 0.6464394927024841, "learning_rate": 2.6236339520113827e-05, "loss": 0.0737, "step": 11508 }, { "epoch": 0.2536041470414869, "grad_norm": 0.9528955221176147, "learning_rate": 2.6235630295067633e-05, "loss": 0.1004, "step": 11509 }, { "epoch": 0.2536261823310031, "grad_norm": 0.9005370140075684, "learning_rate": 2.6234921012792355e-05, "loss": 0.0979, "step": 11510 }, { "epoch": 0.25364821762051926, "grad_norm": 0.8946318626403809, "learning_rate": 2.6234211673291596e-05, "loss": 0.0959, "step": 11511 }, { "epoch": 0.2536702529100354, "grad_norm": 0.7226933240890503, "learning_rate": 2.6233502276568974e-05, "loss": 0.1024, "step": 11512 }, { "epoch": 0.2536922881995516, "grad_norm": 0.9331442713737488, "learning_rate": 2.6232792822628097e-05, "loss": 0.1003, "step": 11513 }, { "epoch": 0.25371432348906775, "grad_norm": 0.5414844155311584, "learning_rate": 2.623208331147259e-05, "loss": 0.1294, "step": 11514 }, { "epoch": 0.2537363587785839, "grad_norm": 0.8748544454574585, "learning_rate": 2.6231373743106057e-05, "loss": 0.0977, "step": 11515 }, { "epoch": 0.2537583940681001, "grad_norm": 0.8158866167068481, "learning_rate": 2.623066411753211e-05, "loss": 0.0751, "step": 11516 }, { "epoch": 0.25378042935761624, "grad_norm": 1.047279953956604, "learning_rate": 2.6229954434754374e-05, "loss": 0.0985, "step": 11517 }, { "epoch": 0.2538024646471324, "grad_norm": 0.8160390257835388, "learning_rate": 2.6229244694776457e-05, "loss": 0.0876, "step": 11518 }, { "epoch": 0.2538244999366485, "grad_norm": 0.842927098274231, "learning_rate": 2.6228534897601974e-05, "loss": 0.1142, "step": 11519 }, { "epoch": 0.2538465352261647, "grad_norm": 1.37497079372406, "learning_rate": 2.6227825043234542e-05, "loss": 0.0897, "step": 11520 }, { "epoch": 0.25386857051568085, "grad_norm": 0.5636454820632935, "learning_rate": 2.6227115131677776e-05, "loss": 0.0931, "step": 11521 }, { "epoch": 0.253890605805197, "grad_norm": 0.8767473697662354, "learning_rate": 2.6226405162935292e-05, "loss": 0.0875, "step": 11522 }, { "epoch": 0.2539126410947132, "grad_norm": 0.8614013195037842, "learning_rate": 2.622569513701071e-05, "loss": 0.1428, "step": 11523 }, { "epoch": 0.25393467638422934, "grad_norm": 1.9887065887451172, "learning_rate": 2.6224985053907636e-05, "loss": 0.1421, "step": 11524 }, { "epoch": 0.2539567116737455, "grad_norm": 0.6547281742095947, "learning_rate": 2.6224274913629698e-05, "loss": 0.0637, "step": 11525 }, { "epoch": 0.25397874696326167, "grad_norm": 0.9464898705482483, "learning_rate": 2.6223564716180508e-05, "loss": 0.0895, "step": 11526 }, { "epoch": 0.25400078225277783, "grad_norm": 0.6099291443824768, "learning_rate": 2.622285446156369e-05, "loss": 0.0906, "step": 11527 }, { "epoch": 0.254022817542294, "grad_norm": 0.5795163512229919, "learning_rate": 2.6222144149782846e-05, "loss": 0.1035, "step": 11528 }, { "epoch": 0.25404485283181016, "grad_norm": 0.9988101720809937, "learning_rate": 2.622143378084161e-05, "loss": 0.1198, "step": 11529 }, { "epoch": 0.25406688812132633, "grad_norm": 1.0972014665603638, "learning_rate": 2.6220723354743593e-05, "loss": 0.1182, "step": 11530 }, { "epoch": 0.25408892341084244, "grad_norm": 0.8824039697647095, "learning_rate": 2.6220012871492415e-05, "loss": 0.1097, "step": 11531 }, { "epoch": 0.2541109587003586, "grad_norm": 0.8769010305404663, "learning_rate": 2.621930233109169e-05, "loss": 0.0852, "step": 11532 }, { "epoch": 0.25413299398987477, "grad_norm": 1.0276764631271362, "learning_rate": 2.6218591733545053e-05, "loss": 0.1516, "step": 11533 }, { "epoch": 0.25415502927939093, "grad_norm": 0.7284246683120728, "learning_rate": 2.6217881078856104e-05, "loss": 0.0856, "step": 11534 }, { "epoch": 0.2541770645689071, "grad_norm": 1.160042643547058, "learning_rate": 2.6217170367028475e-05, "loss": 0.1208, "step": 11535 }, { "epoch": 0.25419909985842326, "grad_norm": 0.7709147334098816, "learning_rate": 2.6216459598065783e-05, "loss": 0.098, "step": 11536 }, { "epoch": 0.2542211351479394, "grad_norm": 1.6300952434539795, "learning_rate": 2.6215748771971644e-05, "loss": 0.1272, "step": 11537 }, { "epoch": 0.2542431704374556, "grad_norm": 0.6776608824729919, "learning_rate": 2.6215037888749683e-05, "loss": 0.075, "step": 11538 }, { "epoch": 0.25426520572697175, "grad_norm": 0.7861534357070923, "learning_rate": 2.621432694840352e-05, "loss": 0.0876, "step": 11539 }, { "epoch": 0.2542872410164879, "grad_norm": 0.9857446551322937, "learning_rate": 2.6213615950936778e-05, "loss": 0.064, "step": 11540 }, { "epoch": 0.2543092763060041, "grad_norm": 0.6995904445648193, "learning_rate": 2.6212904896353072e-05, "loss": 0.108, "step": 11541 }, { "epoch": 0.25433131159552025, "grad_norm": 0.7699483036994934, "learning_rate": 2.6212193784656032e-05, "loss": 0.1214, "step": 11542 }, { "epoch": 0.2543533468850364, "grad_norm": 0.7576216459274292, "learning_rate": 2.621148261584928e-05, "loss": 0.1063, "step": 11543 }, { "epoch": 0.2543753821745525, "grad_norm": 0.6058982610702515, "learning_rate": 2.6210771389936432e-05, "loss": 0.0825, "step": 11544 }, { "epoch": 0.2543974174640687, "grad_norm": 1.168296217918396, "learning_rate": 2.6210060106921114e-05, "loss": 0.116, "step": 11545 }, { "epoch": 0.25441945275358485, "grad_norm": 0.5416131615638733, "learning_rate": 2.6209348766806948e-05, "loss": 0.0691, "step": 11546 }, { "epoch": 0.254441488043101, "grad_norm": 0.8505477905273438, "learning_rate": 2.620863736959755e-05, "loss": 0.0994, "step": 11547 }, { "epoch": 0.2544635233326172, "grad_norm": 1.095505952835083, "learning_rate": 2.620792591529656e-05, "loss": 0.1121, "step": 11548 }, { "epoch": 0.25448555862213335, "grad_norm": 1.4639415740966797, "learning_rate": 2.6207214403907594e-05, "loss": 0.1302, "step": 11549 }, { "epoch": 0.2545075939116495, "grad_norm": 0.7456328272819519, "learning_rate": 2.6206502835434277e-05, "loss": 0.0743, "step": 11550 }, { "epoch": 0.2545296292011657, "grad_norm": 0.6905485987663269, "learning_rate": 2.6205791209880227e-05, "loss": 0.0869, "step": 11551 }, { "epoch": 0.25455166449068184, "grad_norm": 0.8870568871498108, "learning_rate": 2.6205079527249072e-05, "loss": 0.0703, "step": 11552 }, { "epoch": 0.254573699780198, "grad_norm": 0.8774862289428711, "learning_rate": 2.620436778754444e-05, "loss": 0.0788, "step": 11553 }, { "epoch": 0.25459573506971417, "grad_norm": 0.7098369598388672, "learning_rate": 2.6203655990769957e-05, "loss": 0.1026, "step": 11554 }, { "epoch": 0.25461777035923033, "grad_norm": 0.8238643407821655, "learning_rate": 2.6202944136929242e-05, "loss": 0.1088, "step": 11555 }, { "epoch": 0.25463980564874644, "grad_norm": 1.0250322818756104, "learning_rate": 2.620223222602593e-05, "loss": 0.0711, "step": 11556 }, { "epoch": 0.2546618409382626, "grad_norm": 0.8417701721191406, "learning_rate": 2.6201520258063635e-05, "loss": 0.1251, "step": 11557 }, { "epoch": 0.25468387622777877, "grad_norm": 0.5040130019187927, "learning_rate": 2.6200808233046e-05, "loss": 0.074, "step": 11558 }, { "epoch": 0.25470591151729494, "grad_norm": 0.7220971584320068, "learning_rate": 2.6200096150976636e-05, "loss": 0.1218, "step": 11559 }, { "epoch": 0.2547279468068111, "grad_norm": 1.745930552482605, "learning_rate": 2.6199384011859178e-05, "loss": 0.1196, "step": 11560 }, { "epoch": 0.25474998209632727, "grad_norm": 0.9266997575759888, "learning_rate": 2.6198671815697252e-05, "loss": 0.1233, "step": 11561 }, { "epoch": 0.25477201738584343, "grad_norm": 0.7329087853431702, "learning_rate": 2.6197959562494482e-05, "loss": 0.0991, "step": 11562 }, { "epoch": 0.2547940526753596, "grad_norm": 0.6999072432518005, "learning_rate": 2.6197247252254505e-05, "loss": 0.1241, "step": 11563 }, { "epoch": 0.25481608796487576, "grad_norm": 0.8473231196403503, "learning_rate": 2.619653488498094e-05, "loss": 0.1238, "step": 11564 }, { "epoch": 0.2548381232543919, "grad_norm": 0.7751977443695068, "learning_rate": 2.6195822460677416e-05, "loss": 0.1266, "step": 11565 }, { "epoch": 0.2548601585439081, "grad_norm": 0.6024946570396423, "learning_rate": 2.6195109979347568e-05, "loss": 0.0951, "step": 11566 }, { "epoch": 0.25488219383342425, "grad_norm": 0.644664466381073, "learning_rate": 2.6194397440995022e-05, "loss": 0.0677, "step": 11567 }, { "epoch": 0.25490422912294036, "grad_norm": 0.7518036961555481, "learning_rate": 2.6193684845623406e-05, "loss": 0.0778, "step": 11568 }, { "epoch": 0.2549262644124565, "grad_norm": 0.6117691993713379, "learning_rate": 2.619297219323635e-05, "loss": 0.096, "step": 11569 }, { "epoch": 0.2549482997019727, "grad_norm": 1.0491256713867188, "learning_rate": 2.6192259483837486e-05, "loss": 0.1422, "step": 11570 }, { "epoch": 0.25497033499148886, "grad_norm": 1.5206047296524048, "learning_rate": 2.6191546717430442e-05, "loss": 0.0812, "step": 11571 }, { "epoch": 0.254992370281005, "grad_norm": 0.8251475095748901, "learning_rate": 2.6190833894018848e-05, "loss": 0.1157, "step": 11572 }, { "epoch": 0.2550144055705212, "grad_norm": 1.2301042079925537, "learning_rate": 2.6190121013606337e-05, "loss": 0.1015, "step": 11573 }, { "epoch": 0.25503644086003735, "grad_norm": 1.3767781257629395, "learning_rate": 2.618940807619654e-05, "loss": 0.106, "step": 11574 }, { "epoch": 0.2550584761495535, "grad_norm": 1.028105616569519, "learning_rate": 2.6188695081793084e-05, "loss": 0.1326, "step": 11575 }, { "epoch": 0.2550805114390697, "grad_norm": 1.3472062349319458, "learning_rate": 2.6187982030399607e-05, "loss": 0.1235, "step": 11576 }, { "epoch": 0.25510254672858584, "grad_norm": 0.9132612943649292, "learning_rate": 2.6187268922019736e-05, "loss": 0.1135, "step": 11577 }, { "epoch": 0.255124582018102, "grad_norm": 0.9719254374504089, "learning_rate": 2.6186555756657108e-05, "loss": 0.1215, "step": 11578 }, { "epoch": 0.2551466173076182, "grad_norm": 0.9156538844108582, "learning_rate": 2.618584253431535e-05, "loss": 0.1098, "step": 11579 }, { "epoch": 0.25516865259713434, "grad_norm": 0.7705077528953552, "learning_rate": 2.61851292549981e-05, "loss": 0.1113, "step": 11580 }, { "epoch": 0.25519068788665045, "grad_norm": 1.4209345579147339, "learning_rate": 2.6184415918708988e-05, "loss": 0.0849, "step": 11581 }, { "epoch": 0.2552127231761666, "grad_norm": 0.9315113425254822, "learning_rate": 2.6183702525451648e-05, "loss": 0.07, "step": 11582 }, { "epoch": 0.2552347584656828, "grad_norm": 0.5376349091529846, "learning_rate": 2.6182989075229712e-05, "loss": 0.0945, "step": 11583 }, { "epoch": 0.25525679375519894, "grad_norm": 0.8492876291275024, "learning_rate": 2.618227556804682e-05, "loss": 0.0808, "step": 11584 }, { "epoch": 0.2552788290447151, "grad_norm": 0.9478516578674316, "learning_rate": 2.61815620039066e-05, "loss": 0.0932, "step": 11585 }, { "epoch": 0.25530086433423127, "grad_norm": 1.007867455482483, "learning_rate": 2.618084838281269e-05, "loss": 0.1273, "step": 11586 }, { "epoch": 0.25532289962374743, "grad_norm": 0.6992642879486084, "learning_rate": 2.6180134704768722e-05, "loss": 0.0774, "step": 11587 }, { "epoch": 0.2553449349132636, "grad_norm": 0.8534828424453735, "learning_rate": 2.617942096977833e-05, "loss": 0.1169, "step": 11588 }, { "epoch": 0.25536697020277976, "grad_norm": 0.8628910779953003, "learning_rate": 2.617870717784516e-05, "loss": 0.1443, "step": 11589 }, { "epoch": 0.25538900549229593, "grad_norm": 0.6767701506614685, "learning_rate": 2.617799332897283e-05, "loss": 0.1372, "step": 11590 }, { "epoch": 0.2554110407818121, "grad_norm": 0.6460846066474915, "learning_rate": 2.6177279423164996e-05, "loss": 0.1108, "step": 11591 }, { "epoch": 0.25543307607132826, "grad_norm": 0.9610159397125244, "learning_rate": 2.6176565460425274e-05, "loss": 0.0975, "step": 11592 }, { "epoch": 0.25545511136084437, "grad_norm": 0.598814070224762, "learning_rate": 2.6175851440757315e-05, "loss": 0.1116, "step": 11593 }, { "epoch": 0.25547714665036053, "grad_norm": 0.7136005163192749, "learning_rate": 2.617513736416475e-05, "loss": 0.0723, "step": 11594 }, { "epoch": 0.2554991819398767, "grad_norm": 0.7184354662895203, "learning_rate": 2.617442323065122e-05, "loss": 0.0799, "step": 11595 }, { "epoch": 0.25552121722939286, "grad_norm": 0.8160622119903564, "learning_rate": 2.617370904022036e-05, "loss": 0.0823, "step": 11596 }, { "epoch": 0.255543252518909, "grad_norm": 1.1991626024246216, "learning_rate": 2.617299479287581e-05, "loss": 0.1142, "step": 11597 }, { "epoch": 0.2555652878084252, "grad_norm": 0.8805636167526245, "learning_rate": 2.6172280488621207e-05, "loss": 0.1001, "step": 11598 }, { "epoch": 0.25558732309794135, "grad_norm": 0.2649672329425812, "learning_rate": 2.6171566127460185e-05, "loss": 0.0639, "step": 11599 }, { "epoch": 0.2556093583874575, "grad_norm": 0.970029890537262, "learning_rate": 2.6170851709396385e-05, "loss": 0.1286, "step": 11600 }, { "epoch": 0.2556313936769737, "grad_norm": 0.9402976632118225, "learning_rate": 2.6170137234433454e-05, "loss": 0.1089, "step": 11601 }, { "epoch": 0.25565342896648985, "grad_norm": 0.8028994798660278, "learning_rate": 2.616942270257502e-05, "loss": 0.1178, "step": 11602 }, { "epoch": 0.255675464256006, "grad_norm": 0.5947684049606323, "learning_rate": 2.616870811382472e-05, "loss": 0.0656, "step": 11603 }, { "epoch": 0.2556974995455222, "grad_norm": 0.7501023411750793, "learning_rate": 2.6167993468186208e-05, "loss": 0.1133, "step": 11604 }, { "epoch": 0.2557195348350383, "grad_norm": 0.9659567475318909, "learning_rate": 2.6167278765663116e-05, "loss": 0.1251, "step": 11605 }, { "epoch": 0.25574157012455445, "grad_norm": 0.9419936537742615, "learning_rate": 2.616656400625908e-05, "loss": 0.1182, "step": 11606 }, { "epoch": 0.2557636054140706, "grad_norm": 0.831392228603363, "learning_rate": 2.6165849189977754e-05, "loss": 0.1699, "step": 11607 }, { "epoch": 0.2557856407035868, "grad_norm": 1.0174713134765625, "learning_rate": 2.616513431682276e-05, "loss": 0.1345, "step": 11608 }, { "epoch": 0.25580767599310295, "grad_norm": 1.18983793258667, "learning_rate": 2.616441938679776e-05, "loss": 0.1383, "step": 11609 }, { "epoch": 0.2558297112826191, "grad_norm": 0.5864644050598145, "learning_rate": 2.616370439990638e-05, "loss": 0.128, "step": 11610 }, { "epoch": 0.2558517465721353, "grad_norm": 1.2822142839431763, "learning_rate": 2.616298935615227e-05, "loss": 0.1368, "step": 11611 }, { "epoch": 0.25587378186165144, "grad_norm": 0.6051946878433228, "learning_rate": 2.6162274255539063e-05, "loss": 0.0996, "step": 11612 }, { "epoch": 0.2558958171511676, "grad_norm": 0.7384427189826965, "learning_rate": 2.616155909807041e-05, "loss": 0.0966, "step": 11613 }, { "epoch": 0.25591785244068377, "grad_norm": 0.806198239326477, "learning_rate": 2.616084388374995e-05, "loss": 0.0874, "step": 11614 }, { "epoch": 0.25593988773019993, "grad_norm": 0.8811898827552795, "learning_rate": 2.6160128612581333e-05, "loss": 0.1386, "step": 11615 }, { "epoch": 0.2559619230197161, "grad_norm": 0.8578298091888428, "learning_rate": 2.615941328456819e-05, "loss": 0.1042, "step": 11616 }, { "epoch": 0.25598395830923226, "grad_norm": 0.6305529475212097, "learning_rate": 2.6158697899714178e-05, "loss": 0.08, "step": 11617 }, { "epoch": 0.25600599359874837, "grad_norm": 0.9806171655654907, "learning_rate": 2.6157982458022926e-05, "loss": 0.0807, "step": 11618 }, { "epoch": 0.25602802888826454, "grad_norm": 1.0234007835388184, "learning_rate": 2.615726695949809e-05, "loss": 0.1092, "step": 11619 }, { "epoch": 0.2560500641777807, "grad_norm": 0.842793881893158, "learning_rate": 2.615655140414331e-05, "loss": 0.1082, "step": 11620 }, { "epoch": 0.25607209946729687, "grad_norm": 0.81782466173172, "learning_rate": 2.6155835791962233e-05, "loss": 0.1431, "step": 11621 }, { "epoch": 0.25609413475681303, "grad_norm": 0.9428004026412964, "learning_rate": 2.6155120122958502e-05, "loss": 0.1262, "step": 11622 }, { "epoch": 0.2561161700463292, "grad_norm": 0.644365131855011, "learning_rate": 2.615440439713576e-05, "loss": 0.07, "step": 11623 }, { "epoch": 0.25613820533584536, "grad_norm": 0.7610803246498108, "learning_rate": 2.6153688614497653e-05, "loss": 0.1135, "step": 11624 }, { "epoch": 0.2561602406253615, "grad_norm": 0.7954703569412231, "learning_rate": 2.615297277504783e-05, "loss": 0.0794, "step": 11625 }, { "epoch": 0.2561822759148777, "grad_norm": 0.6932584643363953, "learning_rate": 2.615225687878994e-05, "loss": 0.1055, "step": 11626 }, { "epoch": 0.25620431120439385, "grad_norm": 0.8567997813224792, "learning_rate": 2.6151540925727622e-05, "loss": 0.095, "step": 11627 }, { "epoch": 0.25622634649391, "grad_norm": 1.2773741483688354, "learning_rate": 2.6150824915864528e-05, "loss": 0.151, "step": 11628 }, { "epoch": 0.2562483817834262, "grad_norm": 0.8175346851348877, "learning_rate": 2.61501088492043e-05, "loss": 0.0965, "step": 11629 }, { "epoch": 0.2562704170729423, "grad_norm": 1.0830007791519165, "learning_rate": 2.614939272575059e-05, "loss": 0.1091, "step": 11630 }, { "epoch": 0.25629245236245846, "grad_norm": 1.107418179512024, "learning_rate": 2.614867654550705e-05, "loss": 0.1153, "step": 11631 }, { "epoch": 0.2563144876519746, "grad_norm": 1.469272494316101, "learning_rate": 2.6147960308477316e-05, "loss": 0.1455, "step": 11632 }, { "epoch": 0.2563365229414908, "grad_norm": 0.791635274887085, "learning_rate": 2.614724401466504e-05, "loss": 0.1065, "step": 11633 }, { "epoch": 0.25635855823100695, "grad_norm": 1.1939160823822021, "learning_rate": 2.6146527664073876e-05, "loss": 0.0909, "step": 11634 }, { "epoch": 0.2563805935205231, "grad_norm": 0.836018443107605, "learning_rate": 2.614581125670747e-05, "loss": 0.092, "step": 11635 }, { "epoch": 0.2564026288100393, "grad_norm": 0.7035007476806641, "learning_rate": 2.6145094792569466e-05, "loss": 0.1019, "step": 11636 }, { "epoch": 0.25642466409955544, "grad_norm": 0.8278286457061768, "learning_rate": 2.6144378271663524e-05, "loss": 0.0877, "step": 11637 }, { "epoch": 0.2564466993890716, "grad_norm": 0.6032010316848755, "learning_rate": 2.6143661693993282e-05, "loss": 0.0792, "step": 11638 }, { "epoch": 0.2564687346785878, "grad_norm": 0.5484336018562317, "learning_rate": 2.61429450595624e-05, "loss": 0.1348, "step": 11639 }, { "epoch": 0.25649076996810394, "grad_norm": 1.2749378681182861, "learning_rate": 2.6142228368374515e-05, "loss": 0.0947, "step": 11640 }, { "epoch": 0.2565128052576201, "grad_norm": 0.5982073545455933, "learning_rate": 2.6141511620433298e-05, "loss": 0.0885, "step": 11641 }, { "epoch": 0.2565348405471362, "grad_norm": 1.1668128967285156, "learning_rate": 2.614079481574238e-05, "loss": 0.1255, "step": 11642 }, { "epoch": 0.2565568758366524, "grad_norm": 1.2652199268341064, "learning_rate": 2.6140077954305422e-05, "loss": 0.1182, "step": 11643 }, { "epoch": 0.25657891112616854, "grad_norm": 0.9853755235671997, "learning_rate": 2.6139361036126073e-05, "loss": 0.1396, "step": 11644 }, { "epoch": 0.2566009464156847, "grad_norm": 0.8047568798065186, "learning_rate": 2.6138644061207984e-05, "loss": 0.1241, "step": 11645 }, { "epoch": 0.25662298170520087, "grad_norm": 0.7675716280937195, "learning_rate": 2.6137927029554805e-05, "loss": 0.0788, "step": 11646 }, { "epoch": 0.25664501699471703, "grad_norm": 1.0131444931030273, "learning_rate": 2.61372099411702e-05, "loss": 0.1252, "step": 11647 }, { "epoch": 0.2566670522842332, "grad_norm": 0.9477890729904175, "learning_rate": 2.6136492796057804e-05, "loss": 0.111, "step": 11648 }, { "epoch": 0.25668908757374936, "grad_norm": 0.8350419402122498, "learning_rate": 2.613577559422128e-05, "loss": 0.1095, "step": 11649 }, { "epoch": 0.25671112286326553, "grad_norm": 0.8468961715698242, "learning_rate": 2.613505833566428e-05, "loss": 0.104, "step": 11650 }, { "epoch": 0.2567331581527817, "grad_norm": 0.6482775211334229, "learning_rate": 2.613434102039046e-05, "loss": 0.0823, "step": 11651 }, { "epoch": 0.25675519344229786, "grad_norm": 0.6932728886604309, "learning_rate": 2.6133623648403467e-05, "loss": 0.1336, "step": 11652 }, { "epoch": 0.256777228731814, "grad_norm": 0.628667414188385, "learning_rate": 2.613290621970696e-05, "loss": 0.1015, "step": 11653 }, { "epoch": 0.2567992640213302, "grad_norm": 1.2755674123764038, "learning_rate": 2.6132188734304592e-05, "loss": 0.1506, "step": 11654 }, { "epoch": 0.2568212993108463, "grad_norm": 0.46262627840042114, "learning_rate": 2.613147119220002e-05, "loss": 0.0932, "step": 11655 }, { "epoch": 0.25684333460036246, "grad_norm": 0.8972775340080261, "learning_rate": 2.613075359339689e-05, "loss": 0.109, "step": 11656 }, { "epoch": 0.2568653698898786, "grad_norm": 0.4830021858215332, "learning_rate": 2.6130035937898865e-05, "loss": 0.0608, "step": 11657 }, { "epoch": 0.2568874051793948, "grad_norm": 0.7898946404457092, "learning_rate": 2.6129318225709598e-05, "loss": 0.1048, "step": 11658 }, { "epoch": 0.25690944046891095, "grad_norm": 0.44359099864959717, "learning_rate": 2.6128600456832742e-05, "loss": 0.0803, "step": 11659 }, { "epoch": 0.2569314757584271, "grad_norm": 0.7214512228965759, "learning_rate": 2.6127882631271963e-05, "loss": 0.0969, "step": 11660 }, { "epoch": 0.2569535110479433, "grad_norm": 0.9080845713615417, "learning_rate": 2.612716474903091e-05, "loss": 0.0929, "step": 11661 }, { "epoch": 0.25697554633745945, "grad_norm": 1.086249828338623, "learning_rate": 2.6126446810113234e-05, "loss": 0.0754, "step": 11662 }, { "epoch": 0.2569975816269756, "grad_norm": 0.6936947107315063, "learning_rate": 2.6125728814522606e-05, "loss": 0.1019, "step": 11663 }, { "epoch": 0.2570196169164918, "grad_norm": 0.7039819359779358, "learning_rate": 2.6125010762262668e-05, "loss": 0.0681, "step": 11664 }, { "epoch": 0.25704165220600794, "grad_norm": 0.5043895244598389, "learning_rate": 2.6124292653337093e-05, "loss": 0.0899, "step": 11665 }, { "epoch": 0.2570636874955241, "grad_norm": 0.9292150735855103, "learning_rate": 2.6123574487749524e-05, "loss": 0.1055, "step": 11666 }, { "epoch": 0.2570857227850402, "grad_norm": 1.0405999422073364, "learning_rate": 2.6122856265503622e-05, "loss": 0.0903, "step": 11667 }, { "epoch": 0.2571077580745564, "grad_norm": 0.7861732244491577, "learning_rate": 2.6122137986603055e-05, "loss": 0.1247, "step": 11668 }, { "epoch": 0.25712979336407255, "grad_norm": 0.5475175976753235, "learning_rate": 2.6121419651051474e-05, "loss": 0.1068, "step": 11669 }, { "epoch": 0.2571518286535887, "grad_norm": 0.5859092473983765, "learning_rate": 2.6120701258852535e-05, "loss": 0.1071, "step": 11670 }, { "epoch": 0.2571738639431049, "grad_norm": 0.7014479041099548, "learning_rate": 2.6119982810009904e-05, "loss": 0.1056, "step": 11671 }, { "epoch": 0.25719589923262104, "grad_norm": 0.6030300259590149, "learning_rate": 2.611926430452724e-05, "loss": 0.1091, "step": 11672 }, { "epoch": 0.2572179345221372, "grad_norm": 0.5702686905860901, "learning_rate": 2.6118545742408197e-05, "loss": 0.0755, "step": 11673 }, { "epoch": 0.25723996981165337, "grad_norm": 0.9512666463851929, "learning_rate": 2.611782712365644e-05, "loss": 0.087, "step": 11674 }, { "epoch": 0.25726200510116953, "grad_norm": 1.1759811639785767, "learning_rate": 2.6117108448275622e-05, "loss": 0.1515, "step": 11675 }, { "epoch": 0.2572840403906857, "grad_norm": 0.8341869711875916, "learning_rate": 2.6116389716269413e-05, "loss": 0.0761, "step": 11676 }, { "epoch": 0.25730607568020186, "grad_norm": 0.7405261397361755, "learning_rate": 2.6115670927641474e-05, "loss": 0.1111, "step": 11677 }, { "epoch": 0.257328110969718, "grad_norm": 0.8647644519805908, "learning_rate": 2.6114952082395457e-05, "loss": 0.0871, "step": 11678 }, { "epoch": 0.25735014625923414, "grad_norm": 1.0490323305130005, "learning_rate": 2.611423318053503e-05, "loss": 0.0947, "step": 11679 }, { "epoch": 0.2573721815487503, "grad_norm": 0.76164311170578, "learning_rate": 2.6113514222063854e-05, "loss": 0.1028, "step": 11680 }, { "epoch": 0.25739421683826647, "grad_norm": 0.9289867281913757, "learning_rate": 2.6112795206985594e-05, "loss": 0.0968, "step": 11681 }, { "epoch": 0.25741625212778263, "grad_norm": 0.7432454228401184, "learning_rate": 2.6112076135303902e-05, "loss": 0.1032, "step": 11682 }, { "epoch": 0.2574382874172988, "grad_norm": 0.6257258653640747, "learning_rate": 2.6111357007022453e-05, "loss": 0.1084, "step": 11683 }, { "epoch": 0.25746032270681496, "grad_norm": 1.1299779415130615, "learning_rate": 2.61106378221449e-05, "loss": 0.1132, "step": 11684 }, { "epoch": 0.2574823579963311, "grad_norm": 0.9604451060295105, "learning_rate": 2.6109918580674914e-05, "loss": 0.1188, "step": 11685 }, { "epoch": 0.2575043932858473, "grad_norm": 0.8665562272071838, "learning_rate": 2.6109199282616156e-05, "loss": 0.103, "step": 11686 }, { "epoch": 0.25752642857536345, "grad_norm": 0.5043057799339294, "learning_rate": 2.6108479927972287e-05, "loss": 0.11, "step": 11687 }, { "epoch": 0.2575484638648796, "grad_norm": 1.6959903240203857, "learning_rate": 2.6107760516746973e-05, "loss": 0.103, "step": 11688 }, { "epoch": 0.2575704991543958, "grad_norm": 1.1530789136886597, "learning_rate": 2.6107041048943876e-05, "loss": 0.1234, "step": 11689 }, { "epoch": 0.25759253444391195, "grad_norm": 1.257663607597351, "learning_rate": 2.6106321524566666e-05, "loss": 0.095, "step": 11690 }, { "epoch": 0.2576145697334281, "grad_norm": 0.7800691723823547, "learning_rate": 2.6105601943619002e-05, "loss": 0.1082, "step": 11691 }, { "epoch": 0.2576366050229442, "grad_norm": 0.689877450466156, "learning_rate": 2.6104882306104553e-05, "loss": 0.0958, "step": 11692 }, { "epoch": 0.2576586403124604, "grad_norm": 0.8799811005592346, "learning_rate": 2.6104162612026986e-05, "loss": 0.068, "step": 11693 }, { "epoch": 0.25768067560197655, "grad_norm": 0.5379952788352966, "learning_rate": 2.6103442861389962e-05, "loss": 0.0691, "step": 11694 }, { "epoch": 0.2577027108914927, "grad_norm": 1.0978546142578125, "learning_rate": 2.610272305419715e-05, "loss": 0.077, "step": 11695 }, { "epoch": 0.2577247461810089, "grad_norm": 0.7881747484207153, "learning_rate": 2.6102003190452214e-05, "loss": 0.1328, "step": 11696 }, { "epoch": 0.25774678147052504, "grad_norm": 1.077227234840393, "learning_rate": 2.6101283270158828e-05, "loss": 0.0727, "step": 11697 }, { "epoch": 0.2577688167600412, "grad_norm": 0.599337637424469, "learning_rate": 2.6100563293320646e-05, "loss": 0.0947, "step": 11698 }, { "epoch": 0.2577908520495574, "grad_norm": 0.5235782861709595, "learning_rate": 2.6099843259941345e-05, "loss": 0.1149, "step": 11699 }, { "epoch": 0.25781288733907354, "grad_norm": 0.7211378812789917, "learning_rate": 2.609912317002459e-05, "loss": 0.0991, "step": 11700 }, { "epoch": 0.2578349226285897, "grad_norm": 1.0110845565795898, "learning_rate": 2.609840302357405e-05, "loss": 0.0998, "step": 11701 }, { "epoch": 0.25785695791810587, "grad_norm": 0.954599142074585, "learning_rate": 2.6097682820593397e-05, "loss": 0.1031, "step": 11702 }, { "epoch": 0.25787899320762203, "grad_norm": 1.17822265625, "learning_rate": 2.6096962561086287e-05, "loss": 0.1111, "step": 11703 }, { "epoch": 0.25790102849713814, "grad_norm": 0.5286830067634583, "learning_rate": 2.6096242245056397e-05, "loss": 0.1063, "step": 11704 }, { "epoch": 0.2579230637866543, "grad_norm": 0.5691343545913696, "learning_rate": 2.6095521872507397e-05, "loss": 0.0917, "step": 11705 }, { "epoch": 0.25794509907617047, "grad_norm": 0.528200626373291, "learning_rate": 2.6094801443442953e-05, "loss": 0.1268, "step": 11706 }, { "epoch": 0.25796713436568663, "grad_norm": 1.0522472858428955, "learning_rate": 2.6094080957866734e-05, "loss": 0.1321, "step": 11707 }, { "epoch": 0.2579891696552028, "grad_norm": 0.7301629781723022, "learning_rate": 2.6093360415782416e-05, "loss": 0.0864, "step": 11708 }, { "epoch": 0.25801120494471896, "grad_norm": 1.4197717905044556, "learning_rate": 2.6092639817193662e-05, "loss": 0.1015, "step": 11709 }, { "epoch": 0.25803324023423513, "grad_norm": 0.8270143866539001, "learning_rate": 2.6091919162104144e-05, "loss": 0.105, "step": 11710 }, { "epoch": 0.2580552755237513, "grad_norm": 0.8562472462654114, "learning_rate": 2.6091198450517536e-05, "loss": 0.0722, "step": 11711 }, { "epoch": 0.25807731081326746, "grad_norm": 0.9813911318778992, "learning_rate": 2.6090477682437502e-05, "loss": 0.097, "step": 11712 }, { "epoch": 0.2580993461027836, "grad_norm": 1.2815395593643188, "learning_rate": 2.6089756857867724e-05, "loss": 0.1489, "step": 11713 }, { "epoch": 0.2581213813922998, "grad_norm": 1.044779896736145, "learning_rate": 2.608903597681187e-05, "loss": 0.1379, "step": 11714 }, { "epoch": 0.25814341668181595, "grad_norm": 1.1138590574264526, "learning_rate": 2.60883150392736e-05, "loss": 0.0985, "step": 11715 }, { "epoch": 0.2581654519713321, "grad_norm": 0.9854695200920105, "learning_rate": 2.6087594045256602e-05, "loss": 0.1454, "step": 11716 }, { "epoch": 0.2581874872608482, "grad_norm": 0.7612738013267517, "learning_rate": 2.6086872994764535e-05, "loss": 0.0669, "step": 11717 }, { "epoch": 0.2582095225503644, "grad_norm": 1.0364515781402588, "learning_rate": 2.6086151887801087e-05, "loss": 0.0962, "step": 11718 }, { "epoch": 0.25823155783988055, "grad_norm": 0.6253119111061096, "learning_rate": 2.608543072436992e-05, "loss": 0.0816, "step": 11719 }, { "epoch": 0.2582535931293967, "grad_norm": 1.124003291130066, "learning_rate": 2.6084709504474706e-05, "loss": 0.1156, "step": 11720 }, { "epoch": 0.2582756284189129, "grad_norm": 1.0387684106826782, "learning_rate": 2.608398822811913e-05, "loss": 0.1015, "step": 11721 }, { "epoch": 0.25829766370842905, "grad_norm": 1.496266484260559, "learning_rate": 2.608326689530685e-05, "loss": 0.0982, "step": 11722 }, { "epoch": 0.2583196989979452, "grad_norm": 1.0047541856765747, "learning_rate": 2.6082545506041555e-05, "loss": 0.1068, "step": 11723 }, { "epoch": 0.2583417342874614, "grad_norm": 0.8807810544967651, "learning_rate": 2.608182406032691e-05, "loss": 0.121, "step": 11724 }, { "epoch": 0.25836376957697754, "grad_norm": 0.7314437031745911, "learning_rate": 2.6081102558166593e-05, "loss": 0.1098, "step": 11725 }, { "epoch": 0.2583858048664937, "grad_norm": 1.2178739309310913, "learning_rate": 2.6080380999564273e-05, "loss": 0.143, "step": 11726 }, { "epoch": 0.25840784015600987, "grad_norm": 0.7545002698898315, "learning_rate": 2.6079659384523635e-05, "loss": 0.1037, "step": 11727 }, { "epoch": 0.25842987544552604, "grad_norm": 1.3034754991531372, "learning_rate": 2.6078937713048357e-05, "loss": 0.1189, "step": 11728 }, { "epoch": 0.25845191073504215, "grad_norm": 0.9721773862838745, "learning_rate": 2.6078215985142098e-05, "loss": 0.0874, "step": 11729 }, { "epoch": 0.2584739460245583, "grad_norm": 0.6229824423789978, "learning_rate": 2.607749420080855e-05, "loss": 0.0755, "step": 11730 }, { "epoch": 0.2584959813140745, "grad_norm": 0.8883801102638245, "learning_rate": 2.6076772360051382e-05, "loss": 0.0989, "step": 11731 }, { "epoch": 0.25851801660359064, "grad_norm": 0.7851659059524536, "learning_rate": 2.6076050462874272e-05, "loss": 0.0966, "step": 11732 }, { "epoch": 0.2585400518931068, "grad_norm": 0.7679064869880676, "learning_rate": 2.6075328509280898e-05, "loss": 0.0951, "step": 11733 }, { "epoch": 0.25856208718262297, "grad_norm": 0.8115931749343872, "learning_rate": 2.607460649927494e-05, "loss": 0.1126, "step": 11734 }, { "epoch": 0.25858412247213913, "grad_norm": 1.0294221639633179, "learning_rate": 2.6073884432860068e-05, "loss": 0.1103, "step": 11735 }, { "epoch": 0.2586061577616553, "grad_norm": 0.5808262228965759, "learning_rate": 2.607316231003997e-05, "loss": 0.1016, "step": 11736 }, { "epoch": 0.25862819305117146, "grad_norm": 1.0235520601272583, "learning_rate": 2.607244013081831e-05, "loss": 0.0949, "step": 11737 }, { "epoch": 0.2586502283406876, "grad_norm": 1.1718066930770874, "learning_rate": 2.607171789519878e-05, "loss": 0.1096, "step": 11738 }, { "epoch": 0.2586722636302038, "grad_norm": 1.0446604490280151, "learning_rate": 2.6070995603185055e-05, "loss": 0.1371, "step": 11739 }, { "epoch": 0.25869429891971996, "grad_norm": 1.3079123497009277, "learning_rate": 2.607027325478081e-05, "loss": 0.1027, "step": 11740 }, { "epoch": 0.25871633420923607, "grad_norm": 0.751072883605957, "learning_rate": 2.606955084998973e-05, "loss": 0.0762, "step": 11741 }, { "epoch": 0.25873836949875223, "grad_norm": 0.6254918575286865, "learning_rate": 2.6068828388815485e-05, "loss": 0.0852, "step": 11742 }, { "epoch": 0.2587604047882684, "grad_norm": 0.6838716268539429, "learning_rate": 2.6068105871261765e-05, "loss": 0.0856, "step": 11743 }, { "epoch": 0.25878244007778456, "grad_norm": 0.6935866475105286, "learning_rate": 2.6067383297332248e-05, "loss": 0.1167, "step": 11744 }, { "epoch": 0.2588044753673007, "grad_norm": 0.8926147818565369, "learning_rate": 2.6066660667030616e-05, "loss": 0.1065, "step": 11745 }, { "epoch": 0.2588265106568169, "grad_norm": 0.7565850615501404, "learning_rate": 2.6065937980360543e-05, "loss": 0.0853, "step": 11746 }, { "epoch": 0.25884854594633305, "grad_norm": 0.7932235598564148, "learning_rate": 2.606521523732571e-05, "loss": 0.1134, "step": 11747 }, { "epoch": 0.2588705812358492, "grad_norm": 0.7336727380752563, "learning_rate": 2.606449243792981e-05, "loss": 0.0823, "step": 11748 }, { "epoch": 0.2588926165253654, "grad_norm": 0.6445533633232117, "learning_rate": 2.606376958217651e-05, "loss": 0.0712, "step": 11749 }, { "epoch": 0.25891465181488155, "grad_norm": 0.7270956039428711, "learning_rate": 2.6063046670069503e-05, "loss": 0.0776, "step": 11750 }, { "epoch": 0.2589366871043977, "grad_norm": 0.6959579586982727, "learning_rate": 2.6062323701612463e-05, "loss": 0.1047, "step": 11751 }, { "epoch": 0.2589587223939139, "grad_norm": 0.6698840260505676, "learning_rate": 2.606160067680908e-05, "loss": 0.0812, "step": 11752 }, { "epoch": 0.25898075768343004, "grad_norm": 0.8832581043243408, "learning_rate": 2.6060877595663032e-05, "loss": 0.1107, "step": 11753 }, { "epoch": 0.25900279297294615, "grad_norm": 0.6771389842033386, "learning_rate": 2.6060154458178003e-05, "loss": 0.0855, "step": 11754 }, { "epoch": 0.2590248282624623, "grad_norm": 0.7596575617790222, "learning_rate": 2.6059431264357673e-05, "loss": 0.0824, "step": 11755 }, { "epoch": 0.2590468635519785, "grad_norm": 0.7212809324264526, "learning_rate": 2.6058708014205736e-05, "loss": 0.0799, "step": 11756 }, { "epoch": 0.25906889884149464, "grad_norm": 0.7745775580406189, "learning_rate": 2.6057984707725866e-05, "loss": 0.1285, "step": 11757 }, { "epoch": 0.2590909341310108, "grad_norm": 0.6282169222831726, "learning_rate": 2.6057261344921745e-05, "loss": 0.0791, "step": 11758 }, { "epoch": 0.259112969420527, "grad_norm": 0.6317362189292908, "learning_rate": 2.605653792579707e-05, "loss": 0.1146, "step": 11759 }, { "epoch": 0.25913500471004314, "grad_norm": 0.615676760673523, "learning_rate": 2.6055814450355516e-05, "loss": 0.1102, "step": 11760 }, { "epoch": 0.2591570399995593, "grad_norm": 0.5959252715110779, "learning_rate": 2.605509091860077e-05, "loss": 0.0864, "step": 11761 }, { "epoch": 0.25917907528907547, "grad_norm": 0.8767427206039429, "learning_rate": 2.6054367330536517e-05, "loss": 0.0726, "step": 11762 }, { "epoch": 0.25920111057859163, "grad_norm": 0.7145603895187378, "learning_rate": 2.6053643686166444e-05, "loss": 0.092, "step": 11763 }, { "epoch": 0.2592231458681078, "grad_norm": 0.7622098922729492, "learning_rate": 2.6052919985494236e-05, "loss": 0.0882, "step": 11764 }, { "epoch": 0.25924518115762396, "grad_norm": 0.6955431699752808, "learning_rate": 2.605219622852358e-05, "loss": 0.091, "step": 11765 }, { "epoch": 0.25926721644714007, "grad_norm": 1.169324517250061, "learning_rate": 2.6051472415258163e-05, "loss": 0.1422, "step": 11766 }, { "epoch": 0.25928925173665623, "grad_norm": 0.7755885720252991, "learning_rate": 2.6050748545701667e-05, "loss": 0.0895, "step": 11767 }, { "epoch": 0.2593112870261724, "grad_norm": 0.9253634214401245, "learning_rate": 2.6050024619857784e-05, "loss": 0.1015, "step": 11768 }, { "epoch": 0.25933332231568856, "grad_norm": 0.9336172342300415, "learning_rate": 2.60493006377302e-05, "loss": 0.0859, "step": 11769 }, { "epoch": 0.25935535760520473, "grad_norm": 0.8975705504417419, "learning_rate": 2.6048576599322605e-05, "loss": 0.1001, "step": 11770 }, { "epoch": 0.2593773928947209, "grad_norm": 0.6747732162475586, "learning_rate": 2.6047852504638686e-05, "loss": 0.1133, "step": 11771 }, { "epoch": 0.25939942818423706, "grad_norm": 0.4429384768009186, "learning_rate": 2.6047128353682127e-05, "loss": 0.0728, "step": 11772 }, { "epoch": 0.2594214634737532, "grad_norm": 0.7091642618179321, "learning_rate": 2.6046404146456617e-05, "loss": 0.0949, "step": 11773 }, { "epoch": 0.2594434987632694, "grad_norm": 2.177119731903076, "learning_rate": 2.604567988296585e-05, "loss": 0.0833, "step": 11774 }, { "epoch": 0.25946553405278555, "grad_norm": 0.9129478931427002, "learning_rate": 2.6044955563213514e-05, "loss": 0.0768, "step": 11775 }, { "epoch": 0.2594875693423017, "grad_norm": 0.820943295955658, "learning_rate": 2.6044231187203296e-05, "loss": 0.0961, "step": 11776 }, { "epoch": 0.2595096046318179, "grad_norm": 1.05446457862854, "learning_rate": 2.6043506754938882e-05, "loss": 0.1546, "step": 11777 }, { "epoch": 0.259531639921334, "grad_norm": 0.5090555548667908, "learning_rate": 2.604278226642397e-05, "loss": 0.0862, "step": 11778 }, { "epoch": 0.25955367521085015, "grad_norm": 0.6289262175559998, "learning_rate": 2.6042057721662244e-05, "loss": 0.0693, "step": 11779 }, { "epoch": 0.2595757105003663, "grad_norm": 0.7079680562019348, "learning_rate": 2.6041333120657398e-05, "loss": 0.1217, "step": 11780 }, { "epoch": 0.2595977457898825, "grad_norm": 0.9246115684509277, "learning_rate": 2.6040608463413126e-05, "loss": 0.0721, "step": 11781 }, { "epoch": 0.25961978107939865, "grad_norm": 0.770209014415741, "learning_rate": 2.6039883749933103e-05, "loss": 0.1288, "step": 11782 }, { "epoch": 0.2596418163689148, "grad_norm": 0.9274845719337463, "learning_rate": 2.6039158980221043e-05, "loss": 0.1586, "step": 11783 }, { "epoch": 0.259663851658431, "grad_norm": 1.1524665355682373, "learning_rate": 2.6038434154280625e-05, "loss": 0.0722, "step": 11784 }, { "epoch": 0.25968588694794714, "grad_norm": 0.8385589718818665, "learning_rate": 2.603770927211554e-05, "loss": 0.0912, "step": 11785 }, { "epoch": 0.2597079222374633, "grad_norm": 0.8271170258522034, "learning_rate": 2.6036984333729485e-05, "loss": 0.1012, "step": 11786 }, { "epoch": 0.25972995752697947, "grad_norm": 0.6687385439872742, "learning_rate": 2.6036259339126145e-05, "loss": 0.1137, "step": 11787 }, { "epoch": 0.25975199281649564, "grad_norm": 1.330383539199829, "learning_rate": 2.6035534288309224e-05, "loss": 0.1719, "step": 11788 }, { "epoch": 0.2597740281060118, "grad_norm": 1.0883997678756714, "learning_rate": 2.6034809181282403e-05, "loss": 0.1051, "step": 11789 }, { "epoch": 0.25979606339552797, "grad_norm": 0.7361628413200378, "learning_rate": 2.603408401804939e-05, "loss": 0.0771, "step": 11790 }, { "epoch": 0.2598180986850441, "grad_norm": 1.1452912092208862, "learning_rate": 2.6033358798613863e-05, "loss": 0.1315, "step": 11791 }, { "epoch": 0.25984013397456024, "grad_norm": 0.7910416722297668, "learning_rate": 2.6032633522979527e-05, "loss": 0.0858, "step": 11792 }, { "epoch": 0.2598621692640764, "grad_norm": 0.513554573059082, "learning_rate": 2.603190819115007e-05, "loss": 0.0892, "step": 11793 }, { "epoch": 0.25988420455359257, "grad_norm": 1.57514226436615, "learning_rate": 2.6031182803129186e-05, "loss": 0.0987, "step": 11794 }, { "epoch": 0.25990623984310873, "grad_norm": 0.9932149052619934, "learning_rate": 2.603045735892058e-05, "loss": 0.1166, "step": 11795 }, { "epoch": 0.2599282751326249, "grad_norm": 1.0767334699630737, "learning_rate": 2.602973185852793e-05, "loss": 0.105, "step": 11796 }, { "epoch": 0.25995031042214106, "grad_norm": 0.5922936797142029, "learning_rate": 2.602900630195495e-05, "loss": 0.0989, "step": 11797 }, { "epoch": 0.2599723457116572, "grad_norm": 0.7962903380393982, "learning_rate": 2.6028280689205323e-05, "loss": 0.0979, "step": 11798 }, { "epoch": 0.2599943810011734, "grad_norm": 1.2502546310424805, "learning_rate": 2.602755502028275e-05, "loss": 0.1159, "step": 11799 }, { "epoch": 0.26001641629068956, "grad_norm": 1.065558910369873, "learning_rate": 2.6026829295190923e-05, "loss": 0.1451, "step": 11800 }, { "epoch": 0.2600384515802057, "grad_norm": 0.7356336116790771, "learning_rate": 2.6026103513933544e-05, "loss": 0.0933, "step": 11801 }, { "epoch": 0.2600604868697219, "grad_norm": 0.7035050988197327, "learning_rate": 2.602537767651431e-05, "loss": 0.0759, "step": 11802 }, { "epoch": 0.260082522159238, "grad_norm": 0.7183796167373657, "learning_rate": 2.6024651782936908e-05, "loss": 0.0843, "step": 11803 }, { "epoch": 0.26010455744875416, "grad_norm": 0.7678345441818237, "learning_rate": 2.6023925833205048e-05, "loss": 0.1193, "step": 11804 }, { "epoch": 0.2601265927382703, "grad_norm": 0.6886864304542542, "learning_rate": 2.6023199827322422e-05, "loss": 0.1243, "step": 11805 }, { "epoch": 0.2601486280277865, "grad_norm": 0.5655127763748169, "learning_rate": 2.6022473765292726e-05, "loss": 0.1049, "step": 11806 }, { "epoch": 0.26017066331730265, "grad_norm": 0.9327511191368103, "learning_rate": 2.602174764711966e-05, "loss": 0.1337, "step": 11807 }, { "epoch": 0.2601926986068188, "grad_norm": 0.6729631423950195, "learning_rate": 2.6021021472806925e-05, "loss": 0.0837, "step": 11808 }, { "epoch": 0.260214733896335, "grad_norm": 0.685494601726532, "learning_rate": 2.602029524235822e-05, "loss": 0.0652, "step": 11809 }, { "epoch": 0.26023676918585115, "grad_norm": 0.8294466733932495, "learning_rate": 2.601956895577724e-05, "loss": 0.1068, "step": 11810 }, { "epoch": 0.2602588044753673, "grad_norm": 0.5000391602516174, "learning_rate": 2.601884261306768e-05, "loss": 0.092, "step": 11811 }, { "epoch": 0.2602808397648835, "grad_norm": 0.8742332458496094, "learning_rate": 2.6018116214233248e-05, "loss": 0.0841, "step": 11812 }, { "epoch": 0.26030287505439964, "grad_norm": 0.5264390110969543, "learning_rate": 2.6017389759277646e-05, "loss": 0.0601, "step": 11813 }, { "epoch": 0.2603249103439158, "grad_norm": 0.8435434103012085, "learning_rate": 2.601666324820457e-05, "loss": 0.1676, "step": 11814 }, { "epoch": 0.2603469456334319, "grad_norm": 0.36600595712661743, "learning_rate": 2.6015936681017716e-05, "loss": 0.0957, "step": 11815 }, { "epoch": 0.2603689809229481, "grad_norm": 0.8570663928985596, "learning_rate": 2.601521005772079e-05, "loss": 0.0993, "step": 11816 }, { "epoch": 0.26039101621246424, "grad_norm": 0.653941810131073, "learning_rate": 2.6014483378317498e-05, "loss": 0.069, "step": 11817 }, { "epoch": 0.2604130515019804, "grad_norm": 0.8389836549758911, "learning_rate": 2.6013756642811533e-05, "loss": 0.1206, "step": 11818 }, { "epoch": 0.2604350867914966, "grad_norm": 0.7537224888801575, "learning_rate": 2.6013029851206596e-05, "loss": 0.0924, "step": 11819 }, { "epoch": 0.26045712208101274, "grad_norm": 0.9608502984046936, "learning_rate": 2.6012303003506394e-05, "loss": 0.109, "step": 11820 }, { "epoch": 0.2604791573705289, "grad_norm": 1.1438658237457275, "learning_rate": 2.6011576099714627e-05, "loss": 0.1595, "step": 11821 }, { "epoch": 0.26050119266004507, "grad_norm": 0.5333040356636047, "learning_rate": 2.6010849139835e-05, "loss": 0.0954, "step": 11822 }, { "epoch": 0.26052322794956123, "grad_norm": 0.7547497749328613, "learning_rate": 2.6010122123871217e-05, "loss": 0.1036, "step": 11823 }, { "epoch": 0.2605452632390774, "grad_norm": 0.9422279000282288, "learning_rate": 2.6009395051826973e-05, "loss": 0.1231, "step": 11824 }, { "epoch": 0.26056729852859356, "grad_norm": 1.9294034242630005, "learning_rate": 2.6008667923705977e-05, "loss": 0.1709, "step": 11825 }, { "epoch": 0.2605893338181097, "grad_norm": 1.6782639026641846, "learning_rate": 2.600794073951193e-05, "loss": 0.0971, "step": 11826 }, { "epoch": 0.2606113691076259, "grad_norm": 0.9797788262367249, "learning_rate": 2.600721349924854e-05, "loss": 0.1334, "step": 11827 }, { "epoch": 0.260633404397142, "grad_norm": 1.0957478284835815, "learning_rate": 2.6006486202919513e-05, "loss": 0.0914, "step": 11828 }, { "epoch": 0.26065543968665816, "grad_norm": 0.8374493718147278, "learning_rate": 2.6005758850528543e-05, "loss": 0.0904, "step": 11829 }, { "epoch": 0.26067747497617433, "grad_norm": 0.656049907207489, "learning_rate": 2.6005031442079344e-05, "loss": 0.0984, "step": 11830 }, { "epoch": 0.2606995102656905, "grad_norm": 0.871108889579773, "learning_rate": 2.6004303977575618e-05, "loss": 0.0944, "step": 11831 }, { "epoch": 0.26072154555520666, "grad_norm": 0.9394197463989258, "learning_rate": 2.6003576457021072e-05, "loss": 0.0957, "step": 11832 }, { "epoch": 0.2607435808447228, "grad_norm": 1.0253074169158936, "learning_rate": 2.600284888041941e-05, "loss": 0.0841, "step": 11833 }, { "epoch": 0.260765616134239, "grad_norm": 0.9545996785163879, "learning_rate": 2.6002121247774342e-05, "loss": 0.1194, "step": 11834 }, { "epoch": 0.26078765142375515, "grad_norm": 0.7496140599250793, "learning_rate": 2.6001393559089564e-05, "loss": 0.0854, "step": 11835 }, { "epoch": 0.2608096867132713, "grad_norm": 0.815304696559906, "learning_rate": 2.6000665814368793e-05, "loss": 0.0892, "step": 11836 }, { "epoch": 0.2608317220027875, "grad_norm": 0.4744715690612793, "learning_rate": 2.5999938013615728e-05, "loss": 0.0947, "step": 11837 }, { "epoch": 0.26085375729230365, "grad_norm": 0.8782065510749817, "learning_rate": 2.5999210156834087e-05, "loss": 0.1046, "step": 11838 }, { "epoch": 0.2608757925818198, "grad_norm": 0.7123908996582031, "learning_rate": 2.5998482244027566e-05, "loss": 0.0768, "step": 11839 }, { "epoch": 0.2608978278713359, "grad_norm": 0.7950890064239502, "learning_rate": 2.599775427519988e-05, "loss": 0.0974, "step": 11840 }, { "epoch": 0.2609198631608521, "grad_norm": 1.5134861469268799, "learning_rate": 2.5997026250354733e-05, "loss": 0.1073, "step": 11841 }, { "epoch": 0.26094189845036825, "grad_norm": 0.6855721473693848, "learning_rate": 2.599629816949583e-05, "loss": 0.1131, "step": 11842 }, { "epoch": 0.2609639337398844, "grad_norm": 0.769782304763794, "learning_rate": 2.599557003262689e-05, "loss": 0.1225, "step": 11843 }, { "epoch": 0.2609859690294006, "grad_norm": 0.5386178493499756, "learning_rate": 2.599484183975161e-05, "loss": 0.0991, "step": 11844 }, { "epoch": 0.26100800431891674, "grad_norm": 1.1502914428710938, "learning_rate": 2.599411359087371e-05, "loss": 0.1222, "step": 11845 }, { "epoch": 0.2610300396084329, "grad_norm": 0.6218864321708679, "learning_rate": 2.599338528599689e-05, "loss": 0.0952, "step": 11846 }, { "epoch": 0.26105207489794907, "grad_norm": 0.7779783606529236, "learning_rate": 2.5992656925124868e-05, "loss": 0.1271, "step": 11847 }, { "epoch": 0.26107411018746524, "grad_norm": 0.7815370559692383, "learning_rate": 2.5991928508261345e-05, "loss": 0.1055, "step": 11848 }, { "epoch": 0.2610961454769814, "grad_norm": 0.6591810584068298, "learning_rate": 2.5991200035410036e-05, "loss": 0.1121, "step": 11849 }, { "epoch": 0.26111818076649757, "grad_norm": 0.923292875289917, "learning_rate": 2.5990471506574653e-05, "loss": 0.0978, "step": 11850 }, { "epoch": 0.26114021605601373, "grad_norm": 0.8787731528282166, "learning_rate": 2.59897429217589e-05, "loss": 0.137, "step": 11851 }, { "epoch": 0.26116225134552984, "grad_norm": 0.9224716424942017, "learning_rate": 2.59890142809665e-05, "loss": 0.1036, "step": 11852 }, { "epoch": 0.261184286635046, "grad_norm": 0.7194644212722778, "learning_rate": 2.5988285584201157e-05, "loss": 0.1153, "step": 11853 }, { "epoch": 0.26120632192456217, "grad_norm": 0.7669853568077087, "learning_rate": 2.598755683146658e-05, "loss": 0.1191, "step": 11854 }, { "epoch": 0.26122835721407833, "grad_norm": 0.8980799913406372, "learning_rate": 2.5986828022766487e-05, "loss": 0.1066, "step": 11855 }, { "epoch": 0.2612503925035945, "grad_norm": 0.6731573939323425, "learning_rate": 2.5986099158104584e-05, "loss": 0.1098, "step": 11856 }, { "epoch": 0.26127242779311066, "grad_norm": 0.8805309534072876, "learning_rate": 2.5985370237484587e-05, "loss": 0.1107, "step": 11857 }, { "epoch": 0.2612944630826268, "grad_norm": 0.9557692408561707, "learning_rate": 2.5984641260910208e-05, "loss": 0.0894, "step": 11858 }, { "epoch": 0.261316498372143, "grad_norm": 0.7474671602249146, "learning_rate": 2.5983912228385168e-05, "loss": 0.0866, "step": 11859 }, { "epoch": 0.26133853366165916, "grad_norm": 0.6046609282493591, "learning_rate": 2.5983183139913165e-05, "loss": 0.0926, "step": 11860 }, { "epoch": 0.2613605689511753, "grad_norm": 0.9794211387634277, "learning_rate": 2.5982453995497923e-05, "loss": 0.1048, "step": 11861 }, { "epoch": 0.2613826042406915, "grad_norm": 0.7054710388183594, "learning_rate": 2.5981724795143155e-05, "loss": 0.0996, "step": 11862 }, { "epoch": 0.26140463953020765, "grad_norm": 0.798542857170105, "learning_rate": 2.5980995538852573e-05, "loss": 0.107, "step": 11863 }, { "epoch": 0.2614266748197238, "grad_norm": 0.9557386636734009, "learning_rate": 2.598026622662989e-05, "loss": 0.0968, "step": 11864 }, { "epoch": 0.2614487101092399, "grad_norm": 0.6905498504638672, "learning_rate": 2.597953685847882e-05, "loss": 0.1084, "step": 11865 }, { "epoch": 0.2614707453987561, "grad_norm": 0.9946619272232056, "learning_rate": 2.597880743440309e-05, "loss": 0.1141, "step": 11866 }, { "epoch": 0.26149278068827225, "grad_norm": 0.9756636023521423, "learning_rate": 2.59780779544064e-05, "loss": 0.1227, "step": 11867 }, { "epoch": 0.2615148159777884, "grad_norm": 0.6306465268135071, "learning_rate": 2.5977348418492475e-05, "loss": 0.1113, "step": 11868 }, { "epoch": 0.2615368512673046, "grad_norm": 0.8813285231590271, "learning_rate": 2.5976618826665028e-05, "loss": 0.1118, "step": 11869 }, { "epoch": 0.26155888655682075, "grad_norm": 1.005852460861206, "learning_rate": 2.597588917892777e-05, "loss": 0.0819, "step": 11870 }, { "epoch": 0.2615809218463369, "grad_norm": 0.5913669466972351, "learning_rate": 2.5975159475284433e-05, "loss": 0.1134, "step": 11871 }, { "epoch": 0.2616029571358531, "grad_norm": 0.5726468563079834, "learning_rate": 2.5974429715738716e-05, "loss": 0.1255, "step": 11872 }, { "epoch": 0.26162499242536924, "grad_norm": 0.5882366299629211, "learning_rate": 2.5973699900294344e-05, "loss": 0.1221, "step": 11873 }, { "epoch": 0.2616470277148854, "grad_norm": 0.967544436454773, "learning_rate": 2.5972970028955037e-05, "loss": 0.1356, "step": 11874 }, { "epoch": 0.26166906300440157, "grad_norm": 0.7693871855735779, "learning_rate": 2.5972240101724508e-05, "loss": 0.1111, "step": 11875 }, { "epoch": 0.26169109829391773, "grad_norm": 0.547186017036438, "learning_rate": 2.5971510118606474e-05, "loss": 0.1169, "step": 11876 }, { "epoch": 0.26171313358343384, "grad_norm": 1.0794650316238403, "learning_rate": 2.5970780079604655e-05, "loss": 0.0706, "step": 11877 }, { "epoch": 0.26173516887295, "grad_norm": 0.8117381930351257, "learning_rate": 2.5970049984722772e-05, "loss": 0.0962, "step": 11878 }, { "epoch": 0.2617572041624662, "grad_norm": 0.48408061265945435, "learning_rate": 2.5969319833964544e-05, "loss": 0.0909, "step": 11879 }, { "epoch": 0.26177923945198234, "grad_norm": 0.8540915250778198, "learning_rate": 2.5968589627333686e-05, "loss": 0.0877, "step": 11880 }, { "epoch": 0.2618012747414985, "grad_norm": 0.8580380082130432, "learning_rate": 2.596785936483392e-05, "loss": 0.0701, "step": 11881 }, { "epoch": 0.26182331003101467, "grad_norm": 1.1656739711761475, "learning_rate": 2.5967129046468963e-05, "loss": 0.1414, "step": 11882 }, { "epoch": 0.26184534532053083, "grad_norm": 0.982781708240509, "learning_rate": 2.596639867224254e-05, "loss": 0.1267, "step": 11883 }, { "epoch": 0.261867380610047, "grad_norm": 0.8783485293388367, "learning_rate": 2.5965668242158365e-05, "loss": 0.0948, "step": 11884 }, { "epoch": 0.26188941589956316, "grad_norm": 0.8762171864509583, "learning_rate": 2.596493775622016e-05, "loss": 0.1077, "step": 11885 }, { "epoch": 0.2619114511890793, "grad_norm": 0.6146574020385742, "learning_rate": 2.5964207214431653e-05, "loss": 0.0838, "step": 11886 }, { "epoch": 0.2619334864785955, "grad_norm": 0.6775349974632263, "learning_rate": 2.5963476616796555e-05, "loss": 0.093, "step": 11887 }, { "epoch": 0.26195552176811165, "grad_norm": 0.8145143985748291, "learning_rate": 2.5962745963318588e-05, "loss": 0.1087, "step": 11888 }, { "epoch": 0.26197755705762776, "grad_norm": 0.7580755352973938, "learning_rate": 2.5962015254001482e-05, "loss": 0.1015, "step": 11889 }, { "epoch": 0.26199959234714393, "grad_norm": 0.8028398156166077, "learning_rate": 2.596128448884895e-05, "loss": 0.0858, "step": 11890 }, { "epoch": 0.2620216276366601, "grad_norm": 0.6749716401100159, "learning_rate": 2.5960553667864725e-05, "loss": 0.0829, "step": 11891 }, { "epoch": 0.26204366292617626, "grad_norm": 0.6970502734184265, "learning_rate": 2.5959822791052518e-05, "loss": 0.1074, "step": 11892 }, { "epoch": 0.2620656982156924, "grad_norm": 0.8995144367218018, "learning_rate": 2.595909185841606e-05, "loss": 0.0906, "step": 11893 }, { "epoch": 0.2620877335052086, "grad_norm": 0.640907347202301, "learning_rate": 2.595836086995907e-05, "loss": 0.1266, "step": 11894 }, { "epoch": 0.26210976879472475, "grad_norm": 1.0346065759658813, "learning_rate": 2.5957629825685264e-05, "loss": 0.1019, "step": 11895 }, { "epoch": 0.2621318040842409, "grad_norm": 0.6942477822303772, "learning_rate": 2.595689872559838e-05, "loss": 0.1008, "step": 11896 }, { "epoch": 0.2621538393737571, "grad_norm": 0.7103541493415833, "learning_rate": 2.5956167569702133e-05, "loss": 0.0856, "step": 11897 }, { "epoch": 0.26217587466327325, "grad_norm": 0.6206691861152649, "learning_rate": 2.5955436358000255e-05, "loss": 0.1218, "step": 11898 }, { "epoch": 0.2621979099527894, "grad_norm": 1.3052719831466675, "learning_rate": 2.5954705090496456e-05, "loss": 0.1089, "step": 11899 }, { "epoch": 0.2622199452423056, "grad_norm": 0.7616313099861145, "learning_rate": 2.5953973767194476e-05, "loss": 0.0849, "step": 11900 }, { "epoch": 0.26224198053182174, "grad_norm": 0.8247581720352173, "learning_rate": 2.5953242388098034e-05, "loss": 0.103, "step": 11901 }, { "epoch": 0.26226401582133785, "grad_norm": 0.7964766621589661, "learning_rate": 2.5952510953210848e-05, "loss": 0.1076, "step": 11902 }, { "epoch": 0.262286051110854, "grad_norm": 1.1021077632904053, "learning_rate": 2.5951779462536656e-05, "loss": 0.145, "step": 11903 }, { "epoch": 0.2623080864003702, "grad_norm": 0.8007513880729675, "learning_rate": 2.5951047916079178e-05, "loss": 0.1124, "step": 11904 }, { "epoch": 0.26233012168988634, "grad_norm": 0.7775412201881409, "learning_rate": 2.595031631384214e-05, "loss": 0.0966, "step": 11905 }, { "epoch": 0.2623521569794025, "grad_norm": 0.5932471752166748, "learning_rate": 2.594958465582927e-05, "loss": 0.098, "step": 11906 }, { "epoch": 0.26237419226891867, "grad_norm": 0.8140376210212708, "learning_rate": 2.594885294204429e-05, "loss": 0.0945, "step": 11907 }, { "epoch": 0.26239622755843484, "grad_norm": 0.9054332971572876, "learning_rate": 2.5948121172490935e-05, "loss": 0.1463, "step": 11908 }, { "epoch": 0.262418262847951, "grad_norm": 0.6422691941261292, "learning_rate": 2.594738934717292e-05, "loss": 0.0975, "step": 11909 }, { "epoch": 0.26244029813746717, "grad_norm": 0.943150520324707, "learning_rate": 2.594665746609399e-05, "loss": 0.0918, "step": 11910 }, { "epoch": 0.26246233342698333, "grad_norm": 0.964283287525177, "learning_rate": 2.594592552925786e-05, "loss": 0.0936, "step": 11911 }, { "epoch": 0.2624843687164995, "grad_norm": 1.017332911491394, "learning_rate": 2.5945193536668265e-05, "loss": 0.0922, "step": 11912 }, { "epoch": 0.26250640400601566, "grad_norm": 0.7929391860961914, "learning_rate": 2.5944461488328926e-05, "loss": 0.1017, "step": 11913 }, { "epoch": 0.26252843929553177, "grad_norm": 0.613131582736969, "learning_rate": 2.5943729384243577e-05, "loss": 0.127, "step": 11914 }, { "epoch": 0.26255047458504793, "grad_norm": 0.8939883708953857, "learning_rate": 2.5942997224415945e-05, "loss": 0.0991, "step": 11915 }, { "epoch": 0.2625725098745641, "grad_norm": 0.6595969796180725, "learning_rate": 2.5942265008849758e-05, "loss": 0.0844, "step": 11916 }, { "epoch": 0.26259454516408026, "grad_norm": 0.8376533389091492, "learning_rate": 2.5941532737548755e-05, "loss": 0.096, "step": 11917 }, { "epoch": 0.2626165804535964, "grad_norm": 0.7535460591316223, "learning_rate": 2.5940800410516653e-05, "loss": 0.1064, "step": 11918 }, { "epoch": 0.2626386157431126, "grad_norm": 0.9612013697624207, "learning_rate": 2.594006802775719e-05, "loss": 0.0839, "step": 11919 }, { "epoch": 0.26266065103262876, "grad_norm": 0.8601572513580322, "learning_rate": 2.5939335589274092e-05, "loss": 0.0927, "step": 11920 }, { "epoch": 0.2626826863221449, "grad_norm": 0.5962080955505371, "learning_rate": 2.593860309507109e-05, "loss": 0.0783, "step": 11921 }, { "epoch": 0.2627047216116611, "grad_norm": 1.0634952783584595, "learning_rate": 2.5937870545151922e-05, "loss": 0.1037, "step": 11922 }, { "epoch": 0.26272675690117725, "grad_norm": 0.934167206287384, "learning_rate": 2.593713793952031e-05, "loss": 0.0641, "step": 11923 }, { "epoch": 0.2627487921906934, "grad_norm": 0.7918167114257812, "learning_rate": 2.5936405278179987e-05, "loss": 0.105, "step": 11924 }, { "epoch": 0.2627708274802096, "grad_norm": 0.7481971979141235, "learning_rate": 2.593567256113469e-05, "loss": 0.1128, "step": 11925 }, { "epoch": 0.2627928627697257, "grad_norm": 0.6070044040679932, "learning_rate": 2.5934939788388148e-05, "loss": 0.1172, "step": 11926 }, { "epoch": 0.26281489805924185, "grad_norm": 1.010031819343567, "learning_rate": 2.59342069599441e-05, "loss": 0.1166, "step": 11927 }, { "epoch": 0.262836933348758, "grad_norm": 0.8105018138885498, "learning_rate": 2.5933474075806264e-05, "loss": 0.0743, "step": 11928 }, { "epoch": 0.2628589686382742, "grad_norm": 0.7994173765182495, "learning_rate": 2.5932741135978385e-05, "loss": 0.1188, "step": 11929 }, { "epoch": 0.26288100392779035, "grad_norm": 0.7454172372817993, "learning_rate": 2.5932008140464192e-05, "loss": 0.0615, "step": 11930 }, { "epoch": 0.2629030392173065, "grad_norm": 0.7256911993026733, "learning_rate": 2.5931275089267422e-05, "loss": 0.1163, "step": 11931 }, { "epoch": 0.2629250745068227, "grad_norm": 0.9220466613769531, "learning_rate": 2.59305419823918e-05, "loss": 0.1026, "step": 11932 }, { "epoch": 0.26294710979633884, "grad_norm": 0.7335436940193176, "learning_rate": 2.592980881984107e-05, "loss": 0.1118, "step": 11933 }, { "epoch": 0.262969145085855, "grad_norm": 0.8061366677284241, "learning_rate": 2.5929075601618965e-05, "loss": 0.0635, "step": 11934 }, { "epoch": 0.26299118037537117, "grad_norm": 0.6789199709892273, "learning_rate": 2.5928342327729216e-05, "loss": 0.0906, "step": 11935 }, { "epoch": 0.26301321566488733, "grad_norm": 0.37552523612976074, "learning_rate": 2.592760899817556e-05, "loss": 0.0932, "step": 11936 }, { "epoch": 0.2630352509544035, "grad_norm": 1.219990611076355, "learning_rate": 2.5926875612961727e-05, "loss": 0.1154, "step": 11937 }, { "epoch": 0.26305728624391966, "grad_norm": 1.264313817024231, "learning_rate": 2.5926142172091464e-05, "loss": 0.0915, "step": 11938 }, { "epoch": 0.2630793215334358, "grad_norm": 1.3783698081970215, "learning_rate": 2.5925408675568497e-05, "loss": 0.1182, "step": 11939 }, { "epoch": 0.26310135682295194, "grad_norm": 1.440433144569397, "learning_rate": 2.5924675123396564e-05, "loss": 0.1313, "step": 11940 }, { "epoch": 0.2631233921124681, "grad_norm": 1.44037926197052, "learning_rate": 2.59239415155794e-05, "loss": 0.1502, "step": 11941 }, { "epoch": 0.26314542740198427, "grad_norm": 0.8926549553871155, "learning_rate": 2.5923207852120744e-05, "loss": 0.1018, "step": 11942 }, { "epoch": 0.26316746269150043, "grad_norm": 1.5676794052124023, "learning_rate": 2.592247413302434e-05, "loss": 0.1638, "step": 11943 }, { "epoch": 0.2631894979810166, "grad_norm": 0.934148371219635, "learning_rate": 2.5921740358293912e-05, "loss": 0.0868, "step": 11944 }, { "epoch": 0.26321153327053276, "grad_norm": 0.8023200631141663, "learning_rate": 2.5921006527933204e-05, "loss": 0.0836, "step": 11945 }, { "epoch": 0.2632335685600489, "grad_norm": 0.6662608981132507, "learning_rate": 2.5920272641945955e-05, "loss": 0.1224, "step": 11946 }, { "epoch": 0.2632556038495651, "grad_norm": 0.7121772170066833, "learning_rate": 2.5919538700335903e-05, "loss": 0.1326, "step": 11947 }, { "epoch": 0.26327763913908125, "grad_norm": 0.8539063334465027, "learning_rate": 2.591880470310678e-05, "loss": 0.0986, "step": 11948 }, { "epoch": 0.2632996744285974, "grad_norm": 0.9666037559509277, "learning_rate": 2.591807065026233e-05, "loss": 0.1001, "step": 11949 }, { "epoch": 0.2633217097181136, "grad_norm": 0.7335166335105896, "learning_rate": 2.5917336541806297e-05, "loss": 0.0961, "step": 11950 }, { "epoch": 0.2633437450076297, "grad_norm": 0.7618456482887268, "learning_rate": 2.591660237774241e-05, "loss": 0.1329, "step": 11951 }, { "epoch": 0.26336578029714586, "grad_norm": 0.93010014295578, "learning_rate": 2.5915868158074416e-05, "loss": 0.1247, "step": 11952 }, { "epoch": 0.263387815586662, "grad_norm": 0.9077962040901184, "learning_rate": 2.591513388280605e-05, "loss": 0.1107, "step": 11953 }, { "epoch": 0.2634098508761782, "grad_norm": 0.7538526058197021, "learning_rate": 2.5914399551941055e-05, "loss": 0.0776, "step": 11954 }, { "epoch": 0.26343188616569435, "grad_norm": 0.9276170134544373, "learning_rate": 2.591366516548317e-05, "loss": 0.0827, "step": 11955 }, { "epoch": 0.2634539214552105, "grad_norm": 0.9416835308074951, "learning_rate": 2.591293072343614e-05, "loss": 0.1246, "step": 11956 }, { "epoch": 0.2634759567447267, "grad_norm": 0.8054460287094116, "learning_rate": 2.5912196225803695e-05, "loss": 0.0709, "step": 11957 }, { "epoch": 0.26349799203424284, "grad_norm": 0.7255118489265442, "learning_rate": 2.5911461672589582e-05, "loss": 0.1003, "step": 11958 }, { "epoch": 0.263520027323759, "grad_norm": 0.6268274784088135, "learning_rate": 2.5910727063797552e-05, "loss": 0.0874, "step": 11959 }, { "epoch": 0.2635420626132752, "grad_norm": 0.9535412192344666, "learning_rate": 2.590999239943133e-05, "loss": 0.1069, "step": 11960 }, { "epoch": 0.26356409790279134, "grad_norm": 1.103004813194275, "learning_rate": 2.5909257679494674e-05, "loss": 0.1407, "step": 11961 }, { "epoch": 0.2635861331923075, "grad_norm": 0.4719831645488739, "learning_rate": 2.5908522903991313e-05, "loss": 0.0765, "step": 11962 }, { "epoch": 0.26360816848182367, "grad_norm": 0.6992096900939941, "learning_rate": 2.5907788072924997e-05, "loss": 0.1068, "step": 11963 }, { "epoch": 0.2636302037713398, "grad_norm": 0.710821807384491, "learning_rate": 2.5907053186299465e-05, "loss": 0.1099, "step": 11964 }, { "epoch": 0.26365223906085594, "grad_norm": 0.5953431725502014, "learning_rate": 2.5906318244118462e-05, "loss": 0.0963, "step": 11965 }, { "epoch": 0.2636742743503721, "grad_norm": 1.2109841108322144, "learning_rate": 2.5905583246385738e-05, "loss": 0.1149, "step": 11966 }, { "epoch": 0.26369630963988827, "grad_norm": 0.9622512459754944, "learning_rate": 2.5904848193105024e-05, "loss": 0.1048, "step": 11967 }, { "epoch": 0.26371834492940444, "grad_norm": 0.7210782766342163, "learning_rate": 2.5904113084280073e-05, "loss": 0.0683, "step": 11968 }, { "epoch": 0.2637403802189206, "grad_norm": 0.6847290396690369, "learning_rate": 2.5903377919914627e-05, "loss": 0.106, "step": 11969 }, { "epoch": 0.26376241550843676, "grad_norm": 0.6559644341468811, "learning_rate": 2.590264270001243e-05, "loss": 0.0995, "step": 11970 }, { "epoch": 0.26378445079795293, "grad_norm": 0.9471180438995361, "learning_rate": 2.590190742457723e-05, "loss": 0.0856, "step": 11971 }, { "epoch": 0.2638064860874691, "grad_norm": 1.0176323652267456, "learning_rate": 2.5901172093612765e-05, "loss": 0.1119, "step": 11972 }, { "epoch": 0.26382852137698526, "grad_norm": 0.6440777778625488, "learning_rate": 2.5900436707122788e-05, "loss": 0.148, "step": 11973 }, { "epoch": 0.2638505566665014, "grad_norm": 0.4518507719039917, "learning_rate": 2.589970126511104e-05, "loss": 0.079, "step": 11974 }, { "epoch": 0.2638725919560176, "grad_norm": 0.8835290670394897, "learning_rate": 2.5898965767581265e-05, "loss": 0.1272, "step": 11975 }, { "epoch": 0.2638946272455337, "grad_norm": 0.840248167514801, "learning_rate": 2.589823021453722e-05, "loss": 0.113, "step": 11976 }, { "epoch": 0.26391666253504986, "grad_norm": 0.7360749244689941, "learning_rate": 2.589749460598264e-05, "loss": 0.1181, "step": 11977 }, { "epoch": 0.263938697824566, "grad_norm": 0.6112350225448608, "learning_rate": 2.589675894192128e-05, "loss": 0.0754, "step": 11978 }, { "epoch": 0.2639607331140822, "grad_norm": 0.8584751486778259, "learning_rate": 2.5896023222356876e-05, "loss": 0.094, "step": 11979 }, { "epoch": 0.26398276840359836, "grad_norm": 0.7700524926185608, "learning_rate": 2.5895287447293187e-05, "loss": 0.0957, "step": 11980 }, { "epoch": 0.2640048036931145, "grad_norm": 0.8070178031921387, "learning_rate": 2.589455161673396e-05, "loss": 0.1032, "step": 11981 }, { "epoch": 0.2640268389826307, "grad_norm": 0.5762604475021362, "learning_rate": 2.5893815730682933e-05, "loss": 0.0545, "step": 11982 }, { "epoch": 0.26404887427214685, "grad_norm": 0.8575537204742432, "learning_rate": 2.589307978914387e-05, "loss": 0.0785, "step": 11983 }, { "epoch": 0.264070909561663, "grad_norm": 0.9371415972709656, "learning_rate": 2.5892343792120503e-05, "loss": 0.1127, "step": 11984 }, { "epoch": 0.2640929448511792, "grad_norm": 0.7487246990203857, "learning_rate": 2.589160773961659e-05, "loss": 0.0805, "step": 11985 }, { "epoch": 0.26411498014069534, "grad_norm": 1.2445470094680786, "learning_rate": 2.5890871631635878e-05, "loss": 0.1057, "step": 11986 }, { "epoch": 0.2641370154302115, "grad_norm": 1.3089313507080078, "learning_rate": 2.589013546818212e-05, "loss": 0.1229, "step": 11987 }, { "epoch": 0.2641590507197276, "grad_norm": 0.9605346918106079, "learning_rate": 2.5889399249259052e-05, "loss": 0.0876, "step": 11988 }, { "epoch": 0.2641810860092438, "grad_norm": 0.7991679310798645, "learning_rate": 2.5888662974870444e-05, "loss": 0.1237, "step": 11989 }, { "epoch": 0.26420312129875995, "grad_norm": 0.772511899471283, "learning_rate": 2.5887926645020032e-05, "loss": 0.1182, "step": 11990 }, { "epoch": 0.2642251565882761, "grad_norm": 0.513014554977417, "learning_rate": 2.588719025971157e-05, "loss": 0.0963, "step": 11991 }, { "epoch": 0.2642471918777923, "grad_norm": 0.9814366698265076, "learning_rate": 2.5886453818948816e-05, "loss": 0.11, "step": 11992 }, { "epoch": 0.26426922716730844, "grad_norm": 0.7100709676742554, "learning_rate": 2.5885717322735513e-05, "loss": 0.102, "step": 11993 }, { "epoch": 0.2642912624568246, "grad_norm": 0.8972681760787964, "learning_rate": 2.5884980771075412e-05, "loss": 0.1138, "step": 11994 }, { "epoch": 0.26431329774634077, "grad_norm": 0.6069985032081604, "learning_rate": 2.588424416397227e-05, "loss": 0.1105, "step": 11995 }, { "epoch": 0.26433533303585693, "grad_norm": 0.5640164613723755, "learning_rate": 2.5883507501429833e-05, "loss": 0.0705, "step": 11996 }, { "epoch": 0.2643573683253731, "grad_norm": 1.0846391916275024, "learning_rate": 2.5882770783451852e-05, "loss": 0.1233, "step": 11997 }, { "epoch": 0.26437940361488926, "grad_norm": 0.975610613822937, "learning_rate": 2.588203401004209e-05, "loss": 0.1261, "step": 11998 }, { "epoch": 0.26440143890440543, "grad_norm": 0.7215328812599182, "learning_rate": 2.5881297181204292e-05, "loss": 0.1116, "step": 11999 }, { "epoch": 0.2644234741939216, "grad_norm": 1.1492282152175903, "learning_rate": 2.588056029694221e-05, "loss": 0.1236, "step": 12000 }, { "epoch": 0.2644455094834377, "grad_norm": 0.7036541700363159, "learning_rate": 2.5879823357259607e-05, "loss": 0.0783, "step": 12001 }, { "epoch": 0.26446754477295387, "grad_norm": 0.782825231552124, "learning_rate": 2.5879086362160225e-05, "loss": 0.1219, "step": 12002 }, { "epoch": 0.26448958006247003, "grad_norm": 1.0036243200302124, "learning_rate": 2.587834931164782e-05, "loss": 0.1143, "step": 12003 }, { "epoch": 0.2645116153519862, "grad_norm": 0.658306896686554, "learning_rate": 2.587761220572615e-05, "loss": 0.1164, "step": 12004 }, { "epoch": 0.26453365064150236, "grad_norm": 0.8564976453781128, "learning_rate": 2.5876875044398968e-05, "loss": 0.0869, "step": 12005 }, { "epoch": 0.2645556859310185, "grad_norm": 0.7607794404029846, "learning_rate": 2.587613782767003e-05, "loss": 0.0935, "step": 12006 }, { "epoch": 0.2645777212205347, "grad_norm": 1.0641710758209229, "learning_rate": 2.587540055554309e-05, "loss": 0.105, "step": 12007 }, { "epoch": 0.26459975651005085, "grad_norm": 1.0790342092514038, "learning_rate": 2.5874663228021896e-05, "loss": 0.1164, "step": 12008 }, { "epoch": 0.264621791799567, "grad_norm": 0.5838444828987122, "learning_rate": 2.587392584511022e-05, "loss": 0.0971, "step": 12009 }, { "epoch": 0.2646438270890832, "grad_norm": 0.6504402160644531, "learning_rate": 2.5873188406811807e-05, "loss": 0.1002, "step": 12010 }, { "epoch": 0.26466586237859935, "grad_norm": 0.6113528609275818, "learning_rate": 2.587245091313041e-05, "loss": 0.0576, "step": 12011 }, { "epoch": 0.2646878976681155, "grad_norm": 0.6504359841346741, "learning_rate": 2.5871713364069794e-05, "loss": 0.1018, "step": 12012 }, { "epoch": 0.2647099329576316, "grad_norm": 0.8611806035041809, "learning_rate": 2.587097575963371e-05, "loss": 0.0999, "step": 12013 }, { "epoch": 0.2647319682471478, "grad_norm": 0.48905619978904724, "learning_rate": 2.587023809982592e-05, "loss": 0.0879, "step": 12014 }, { "epoch": 0.26475400353666395, "grad_norm": 0.9059696793556213, "learning_rate": 2.5869500384650177e-05, "loss": 0.0995, "step": 12015 }, { "epoch": 0.2647760388261801, "grad_norm": 0.975483238697052, "learning_rate": 2.586876261411024e-05, "loss": 0.1088, "step": 12016 }, { "epoch": 0.2647980741156963, "grad_norm": 1.1348435878753662, "learning_rate": 2.586802478820986e-05, "loss": 0.1495, "step": 12017 }, { "epoch": 0.26482010940521244, "grad_norm": 0.8839218020439148, "learning_rate": 2.586728690695281e-05, "loss": 0.0898, "step": 12018 }, { "epoch": 0.2648421446947286, "grad_norm": 0.954833447933197, "learning_rate": 2.586654897034284e-05, "loss": 0.1248, "step": 12019 }, { "epoch": 0.2648641799842448, "grad_norm": 0.6613649129867554, "learning_rate": 2.5865810978383705e-05, "loss": 0.075, "step": 12020 }, { "epoch": 0.26488621527376094, "grad_norm": 0.7886371612548828, "learning_rate": 2.5865072931079172e-05, "loss": 0.0945, "step": 12021 }, { "epoch": 0.2649082505632771, "grad_norm": 0.6982367038726807, "learning_rate": 2.5864334828432997e-05, "loss": 0.1027, "step": 12022 }, { "epoch": 0.26493028585279327, "grad_norm": 0.47885987162590027, "learning_rate": 2.5863596670448933e-05, "loss": 0.0985, "step": 12023 }, { "epoch": 0.26495232114230943, "grad_norm": 0.5533818602561951, "learning_rate": 2.5862858457130747e-05, "loss": 0.0815, "step": 12024 }, { "epoch": 0.26497435643182554, "grad_norm": 0.8621973991394043, "learning_rate": 2.58621201884822e-05, "loss": 0.1507, "step": 12025 }, { "epoch": 0.2649963917213417, "grad_norm": 1.6237196922302246, "learning_rate": 2.586138186450705e-05, "loss": 0.1241, "step": 12026 }, { "epoch": 0.26501842701085787, "grad_norm": 0.8761934638023376, "learning_rate": 2.5860643485209056e-05, "loss": 0.078, "step": 12027 }, { "epoch": 0.26504046230037404, "grad_norm": 1.1383484601974487, "learning_rate": 2.5859905050591984e-05, "loss": 0.1192, "step": 12028 }, { "epoch": 0.2650624975898902, "grad_norm": 1.3477895259857178, "learning_rate": 2.585916656065959e-05, "loss": 0.164, "step": 12029 }, { "epoch": 0.26508453287940636, "grad_norm": 0.9481208920478821, "learning_rate": 2.585842801541564e-05, "loss": 0.0891, "step": 12030 }, { "epoch": 0.26510656816892253, "grad_norm": 0.5808650851249695, "learning_rate": 2.5857689414863888e-05, "loss": 0.0913, "step": 12031 }, { "epoch": 0.2651286034584387, "grad_norm": 0.7908633351325989, "learning_rate": 2.5856950759008108e-05, "loss": 0.0987, "step": 12032 }, { "epoch": 0.26515063874795486, "grad_norm": 0.9116761088371277, "learning_rate": 2.585621204785205e-05, "loss": 0.1141, "step": 12033 }, { "epoch": 0.265172674037471, "grad_norm": 1.0338894128799438, "learning_rate": 2.585547328139949e-05, "loss": 0.1121, "step": 12034 }, { "epoch": 0.2651947093269872, "grad_norm": 1.1199328899383545, "learning_rate": 2.5854734459654173e-05, "loss": 0.0721, "step": 12035 }, { "epoch": 0.26521674461650335, "grad_norm": 0.8081387877464294, "learning_rate": 2.585399558261988e-05, "loss": 0.1064, "step": 12036 }, { "epoch": 0.2652387799060195, "grad_norm": 0.8826192021369934, "learning_rate": 2.585325665030036e-05, "loss": 0.1218, "step": 12037 }, { "epoch": 0.2652608151955356, "grad_norm": 0.9636754989624023, "learning_rate": 2.585251766269939e-05, "loss": 0.0672, "step": 12038 }, { "epoch": 0.2652828504850518, "grad_norm": 0.8206183910369873, "learning_rate": 2.585177861982073e-05, "loss": 0.0712, "step": 12039 }, { "epoch": 0.26530488577456796, "grad_norm": 0.4849896728992462, "learning_rate": 2.5851039521668138e-05, "loss": 0.0897, "step": 12040 }, { "epoch": 0.2653269210640841, "grad_norm": 0.8292975425720215, "learning_rate": 2.5850300368245384e-05, "loss": 0.1152, "step": 12041 }, { "epoch": 0.2653489563536003, "grad_norm": 0.7335988879203796, "learning_rate": 2.5849561159556234e-05, "loss": 0.1061, "step": 12042 }, { "epoch": 0.26537099164311645, "grad_norm": 0.9924055933952332, "learning_rate": 2.5848821895604445e-05, "loss": 0.0807, "step": 12043 }, { "epoch": 0.2653930269326326, "grad_norm": 0.5834545493125916, "learning_rate": 2.5848082576393796e-05, "loss": 0.1007, "step": 12044 }, { "epoch": 0.2654150622221488, "grad_norm": 0.5466098785400391, "learning_rate": 2.5847343201928043e-05, "loss": 0.0603, "step": 12045 }, { "epoch": 0.26543709751166494, "grad_norm": 0.4099442660808563, "learning_rate": 2.584660377221095e-05, "loss": 0.0659, "step": 12046 }, { "epoch": 0.2654591328011811, "grad_norm": 0.5140356421470642, "learning_rate": 2.5845864287246295e-05, "loss": 0.1047, "step": 12047 }, { "epoch": 0.2654811680906973, "grad_norm": 0.5848177671432495, "learning_rate": 2.584512474703783e-05, "loss": 0.0661, "step": 12048 }, { "epoch": 0.26550320338021344, "grad_norm": 0.854716956615448, "learning_rate": 2.5844385151589334e-05, "loss": 0.1415, "step": 12049 }, { "epoch": 0.26552523866972955, "grad_norm": 0.5122157335281372, "learning_rate": 2.5843645500904566e-05, "loss": 0.1002, "step": 12050 }, { "epoch": 0.2655472739592457, "grad_norm": 0.5873503088951111, "learning_rate": 2.58429057949873e-05, "loss": 0.1111, "step": 12051 }, { "epoch": 0.2655693092487619, "grad_norm": 0.6257814168930054, "learning_rate": 2.5842166033841296e-05, "loss": 0.0879, "step": 12052 }, { "epoch": 0.26559134453827804, "grad_norm": 0.6659302711486816, "learning_rate": 2.584142621747033e-05, "loss": 0.1024, "step": 12053 }, { "epoch": 0.2656133798277942, "grad_norm": 0.9029160737991333, "learning_rate": 2.5840686345878165e-05, "loss": 0.0968, "step": 12054 }, { "epoch": 0.26563541511731037, "grad_norm": 0.7535518407821655, "learning_rate": 2.5839946419068567e-05, "loss": 0.1082, "step": 12055 }, { "epoch": 0.26565745040682653, "grad_norm": 1.0739858150482178, "learning_rate": 2.583920643704532e-05, "loss": 0.0819, "step": 12056 }, { "epoch": 0.2656794856963427, "grad_norm": 0.7432816028594971, "learning_rate": 2.5838466399812172e-05, "loss": 0.116, "step": 12057 }, { "epoch": 0.26570152098585886, "grad_norm": 0.7619708180427551, "learning_rate": 2.5837726307372904e-05, "loss": 0.1187, "step": 12058 }, { "epoch": 0.26572355627537503, "grad_norm": 0.5607980489730835, "learning_rate": 2.583698615973129e-05, "loss": 0.0872, "step": 12059 }, { "epoch": 0.2657455915648912, "grad_norm": 0.9069575667381287, "learning_rate": 2.5836245956891086e-05, "loss": 0.0993, "step": 12060 }, { "epoch": 0.26576762685440736, "grad_norm": 0.7802935838699341, "learning_rate": 2.5835505698856077e-05, "loss": 0.1017, "step": 12061 }, { "epoch": 0.26578966214392347, "grad_norm": 1.0202735662460327, "learning_rate": 2.5834765385630024e-05, "loss": 0.0946, "step": 12062 }, { "epoch": 0.26581169743343963, "grad_norm": 0.7260958552360535, "learning_rate": 2.5834025017216706e-05, "loss": 0.1017, "step": 12063 }, { "epoch": 0.2658337327229558, "grad_norm": 0.6065691113471985, "learning_rate": 2.5833284593619885e-05, "loss": 0.0881, "step": 12064 }, { "epoch": 0.26585576801247196, "grad_norm": 1.1664535999298096, "learning_rate": 2.5832544114843337e-05, "loss": 0.1357, "step": 12065 }, { "epoch": 0.2658778033019881, "grad_norm": 1.013744592666626, "learning_rate": 2.583180358089083e-05, "loss": 0.1234, "step": 12066 }, { "epoch": 0.2658998385915043, "grad_norm": 0.7444263100624084, "learning_rate": 2.5831062991766138e-05, "loss": 0.0994, "step": 12067 }, { "epoch": 0.26592187388102045, "grad_norm": 0.5331867337226868, "learning_rate": 2.5830322347473036e-05, "loss": 0.1005, "step": 12068 }, { "epoch": 0.2659439091705366, "grad_norm": 0.7527098059654236, "learning_rate": 2.5829581648015298e-05, "loss": 0.1007, "step": 12069 }, { "epoch": 0.2659659444600528, "grad_norm": 0.8401585221290588, "learning_rate": 2.582884089339669e-05, "loss": 0.1016, "step": 12070 }, { "epoch": 0.26598797974956895, "grad_norm": 0.8075473308563232, "learning_rate": 2.582810008362099e-05, "loss": 0.0946, "step": 12071 }, { "epoch": 0.2660100150390851, "grad_norm": 0.7351547479629517, "learning_rate": 2.5827359218691968e-05, "loss": 0.0823, "step": 12072 }, { "epoch": 0.2660320503286013, "grad_norm": 0.6921597123146057, "learning_rate": 2.5826618298613402e-05, "loss": 0.1031, "step": 12073 }, { "epoch": 0.26605408561811744, "grad_norm": 1.116492748260498, "learning_rate": 2.5825877323389065e-05, "loss": 0.1348, "step": 12074 }, { "epoch": 0.26607612090763355, "grad_norm": 0.6249370574951172, "learning_rate": 2.5825136293022723e-05, "loss": 0.1207, "step": 12075 }, { "epoch": 0.2660981561971497, "grad_norm": 0.716829776763916, "learning_rate": 2.582439520751816e-05, "loss": 0.0804, "step": 12076 }, { "epoch": 0.2661201914866659, "grad_norm": 0.6998902559280396, "learning_rate": 2.582365406687915e-05, "loss": 0.1432, "step": 12077 }, { "epoch": 0.26614222677618204, "grad_norm": 0.4630562961101532, "learning_rate": 2.5822912871109465e-05, "loss": 0.0847, "step": 12078 }, { "epoch": 0.2661642620656982, "grad_norm": 1.741930603981018, "learning_rate": 2.5822171620212878e-05, "loss": 0.1323, "step": 12079 }, { "epoch": 0.2661862973552144, "grad_norm": 1.013906478881836, "learning_rate": 2.582143031419317e-05, "loss": 0.1323, "step": 12080 }, { "epoch": 0.26620833264473054, "grad_norm": 0.7572870850563049, "learning_rate": 2.5820688953054115e-05, "loss": 0.0869, "step": 12081 }, { "epoch": 0.2662303679342467, "grad_norm": 0.5469037294387817, "learning_rate": 2.581994753679949e-05, "loss": 0.0878, "step": 12082 }, { "epoch": 0.26625240322376287, "grad_norm": 0.7215595841407776, "learning_rate": 2.5819206065433066e-05, "loss": 0.0689, "step": 12083 }, { "epoch": 0.26627443851327903, "grad_norm": 1.1207129955291748, "learning_rate": 2.5818464538958623e-05, "loss": 0.0817, "step": 12084 }, { "epoch": 0.2662964738027952, "grad_norm": 0.49460697174072266, "learning_rate": 2.5817722957379943e-05, "loss": 0.1113, "step": 12085 }, { "epoch": 0.26631850909231136, "grad_norm": 0.9605579376220703, "learning_rate": 2.5816981320700794e-05, "loss": 0.144, "step": 12086 }, { "epoch": 0.26634054438182747, "grad_norm": 0.9355981349945068, "learning_rate": 2.5816239628924964e-05, "loss": 0.1226, "step": 12087 }, { "epoch": 0.26636257967134364, "grad_norm": 0.8512994647026062, "learning_rate": 2.5815497882056224e-05, "loss": 0.1082, "step": 12088 }, { "epoch": 0.2663846149608598, "grad_norm": 0.7037882208824158, "learning_rate": 2.581475608009835e-05, "loss": 0.0703, "step": 12089 }, { "epoch": 0.26640665025037596, "grad_norm": 0.9010145664215088, "learning_rate": 2.5814014223055128e-05, "loss": 0.0845, "step": 12090 }, { "epoch": 0.26642868553989213, "grad_norm": 0.6279622316360474, "learning_rate": 2.5813272310930334e-05, "loss": 0.0968, "step": 12091 }, { "epoch": 0.2664507208294083, "grad_norm": 0.876832902431488, "learning_rate": 2.581253034372774e-05, "loss": 0.0816, "step": 12092 }, { "epoch": 0.26647275611892446, "grad_norm": 0.7834048271179199, "learning_rate": 2.581178832145114e-05, "loss": 0.1126, "step": 12093 }, { "epoch": 0.2664947914084406, "grad_norm": 0.936011552810669, "learning_rate": 2.5811046244104294e-05, "loss": 0.0971, "step": 12094 }, { "epoch": 0.2665168266979568, "grad_norm": 0.706713855266571, "learning_rate": 2.5810304111690998e-05, "loss": 0.0881, "step": 12095 }, { "epoch": 0.26653886198747295, "grad_norm": 0.557262122631073, "learning_rate": 2.5809561924215024e-05, "loss": 0.1027, "step": 12096 }, { "epoch": 0.2665608972769891, "grad_norm": 0.7118154764175415, "learning_rate": 2.5808819681680154e-05, "loss": 0.0904, "step": 12097 }, { "epoch": 0.2665829325665053, "grad_norm": 0.8696944117546082, "learning_rate": 2.580807738409017e-05, "loss": 0.1233, "step": 12098 }, { "epoch": 0.2666049678560214, "grad_norm": 0.585147500038147, "learning_rate": 2.5807335031448855e-05, "loss": 0.0737, "step": 12099 }, { "epoch": 0.26662700314553756, "grad_norm": 1.1120985746383667, "learning_rate": 2.5806592623759985e-05, "loss": 0.1043, "step": 12100 }, { "epoch": 0.2666490384350537, "grad_norm": 0.4809424877166748, "learning_rate": 2.5805850161027343e-05, "loss": 0.0873, "step": 12101 }, { "epoch": 0.2666710737245699, "grad_norm": 0.7884101867675781, "learning_rate": 2.5805107643254713e-05, "loss": 0.0692, "step": 12102 }, { "epoch": 0.26669310901408605, "grad_norm": 0.925598680973053, "learning_rate": 2.5804365070445878e-05, "loss": 0.1765, "step": 12103 }, { "epoch": 0.2667151443036022, "grad_norm": 0.9056702852249146, "learning_rate": 2.5803622442604613e-05, "loss": 0.125, "step": 12104 }, { "epoch": 0.2667371795931184, "grad_norm": 0.4409959614276886, "learning_rate": 2.5802879759734705e-05, "loss": 0.0744, "step": 12105 }, { "epoch": 0.26675921488263454, "grad_norm": 0.9834291934967041, "learning_rate": 2.580213702183994e-05, "loss": 0.1107, "step": 12106 }, { "epoch": 0.2667812501721507, "grad_norm": 1.1934669017791748, "learning_rate": 2.58013942289241e-05, "loss": 0.1026, "step": 12107 }, { "epoch": 0.2668032854616669, "grad_norm": 0.5874598026275635, "learning_rate": 2.5800651380990962e-05, "loss": 0.1077, "step": 12108 }, { "epoch": 0.26682532075118304, "grad_norm": 0.9690940976142883, "learning_rate": 2.5799908478044323e-05, "loss": 0.1263, "step": 12109 }, { "epoch": 0.2668473560406992, "grad_norm": 0.7511412501335144, "learning_rate": 2.5799165520087953e-05, "loss": 0.0848, "step": 12110 }, { "epoch": 0.26686939133021537, "grad_norm": 1.1389256715774536, "learning_rate": 2.5798422507125643e-05, "loss": 0.0722, "step": 12111 }, { "epoch": 0.2668914266197315, "grad_norm": 0.9512912631034851, "learning_rate": 2.5797679439161177e-05, "loss": 0.1474, "step": 12112 }, { "epoch": 0.26691346190924764, "grad_norm": 1.103614091873169, "learning_rate": 2.579693631619834e-05, "loss": 0.1092, "step": 12113 }, { "epoch": 0.2669354971987638, "grad_norm": 0.9782264828681946, "learning_rate": 2.579619313824091e-05, "loss": 0.1491, "step": 12114 }, { "epoch": 0.26695753248827997, "grad_norm": 1.1275653839111328, "learning_rate": 2.5795449905292688e-05, "loss": 0.0913, "step": 12115 }, { "epoch": 0.26697956777779613, "grad_norm": 0.8803486227989197, "learning_rate": 2.5794706617357448e-05, "loss": 0.1051, "step": 12116 }, { "epoch": 0.2670016030673123, "grad_norm": 0.6259292960166931, "learning_rate": 2.579396327443898e-05, "loss": 0.0757, "step": 12117 }, { "epoch": 0.26702363835682846, "grad_norm": 1.414162039756775, "learning_rate": 2.5793219876541065e-05, "loss": 0.1024, "step": 12118 }, { "epoch": 0.26704567364634463, "grad_norm": 1.1242039203643799, "learning_rate": 2.5792476423667495e-05, "loss": 0.1256, "step": 12119 }, { "epoch": 0.2670677089358608, "grad_norm": 0.8585948348045349, "learning_rate": 2.5791732915822055e-05, "loss": 0.1098, "step": 12120 }, { "epoch": 0.26708974422537696, "grad_norm": 0.44181203842163086, "learning_rate": 2.5790989353008534e-05, "loss": 0.0558, "step": 12121 }, { "epoch": 0.2671117795148931, "grad_norm": 1.2795298099517822, "learning_rate": 2.5790245735230716e-05, "loss": 0.089, "step": 12122 }, { "epoch": 0.2671338148044093, "grad_norm": 0.7734789848327637, "learning_rate": 2.578950206249239e-05, "loss": 0.0708, "step": 12123 }, { "epoch": 0.2671558500939254, "grad_norm": 0.8411494493484497, "learning_rate": 2.5788758334797343e-05, "loss": 0.0957, "step": 12124 }, { "epoch": 0.26717788538344156, "grad_norm": 0.6153130531311035, "learning_rate": 2.5788014552149363e-05, "loss": 0.0949, "step": 12125 }, { "epoch": 0.2671999206729577, "grad_norm": 0.6312311887741089, "learning_rate": 2.578727071455225e-05, "loss": 0.1069, "step": 12126 }, { "epoch": 0.2672219559624739, "grad_norm": 0.6999503374099731, "learning_rate": 2.578652682200977e-05, "loss": 0.1152, "step": 12127 }, { "epoch": 0.26724399125199005, "grad_norm": 0.7452511787414551, "learning_rate": 2.5785782874525733e-05, "loss": 0.0896, "step": 12128 }, { "epoch": 0.2672660265415062, "grad_norm": 0.9466675519943237, "learning_rate": 2.5785038872103915e-05, "loss": 0.1293, "step": 12129 }, { "epoch": 0.2672880618310224, "grad_norm": 0.8326014876365662, "learning_rate": 2.5784294814748114e-05, "loss": 0.1245, "step": 12130 }, { "epoch": 0.26731009712053855, "grad_norm": 0.8808538913726807, "learning_rate": 2.5783550702462117e-05, "loss": 0.1219, "step": 12131 }, { "epoch": 0.2673321324100547, "grad_norm": 0.7486104369163513, "learning_rate": 2.578280653524971e-05, "loss": 0.1278, "step": 12132 }, { "epoch": 0.2673541676995709, "grad_norm": 0.6733526587486267, "learning_rate": 2.578206231311469e-05, "loss": 0.1344, "step": 12133 }, { "epoch": 0.26737620298908704, "grad_norm": 0.8141165375709534, "learning_rate": 2.5781318036060845e-05, "loss": 0.0822, "step": 12134 }, { "epoch": 0.2673982382786032, "grad_norm": 0.6574951410293579, "learning_rate": 2.5780573704091965e-05, "loss": 0.0757, "step": 12135 }, { "epoch": 0.2674202735681193, "grad_norm": 0.7749332785606384, "learning_rate": 2.5779829317211843e-05, "loss": 0.0944, "step": 12136 }, { "epoch": 0.2674423088576355, "grad_norm": 0.9129390716552734, "learning_rate": 2.5779084875424267e-05, "loss": 0.0794, "step": 12137 }, { "epoch": 0.26746434414715164, "grad_norm": 0.7767294645309448, "learning_rate": 2.577834037873303e-05, "loss": 0.0957, "step": 12138 }, { "epoch": 0.2674863794366678, "grad_norm": 0.8809773921966553, "learning_rate": 2.5777595827141932e-05, "loss": 0.1196, "step": 12139 }, { "epoch": 0.267508414726184, "grad_norm": 0.9628845453262329, "learning_rate": 2.5776851220654756e-05, "loss": 0.1008, "step": 12140 }, { "epoch": 0.26753045001570014, "grad_norm": 1.6378587484359741, "learning_rate": 2.5776106559275298e-05, "loss": 0.0595, "step": 12141 }, { "epoch": 0.2675524853052163, "grad_norm": 0.816979169845581, "learning_rate": 2.577536184300735e-05, "loss": 0.1093, "step": 12142 }, { "epoch": 0.26757452059473247, "grad_norm": 0.9752210378646851, "learning_rate": 2.5774617071854708e-05, "loss": 0.0834, "step": 12143 }, { "epoch": 0.26759655588424863, "grad_norm": 0.8717776536941528, "learning_rate": 2.577387224582116e-05, "loss": 0.1299, "step": 12144 }, { "epoch": 0.2676185911737648, "grad_norm": 1.169797420501709, "learning_rate": 2.5773127364910504e-05, "loss": 0.1178, "step": 12145 }, { "epoch": 0.26764062646328096, "grad_norm": 0.630246102809906, "learning_rate": 2.5772382429126537e-05, "loss": 0.0945, "step": 12146 }, { "epoch": 0.2676626617527971, "grad_norm": 0.6686426997184753, "learning_rate": 2.5771637438473046e-05, "loss": 0.0867, "step": 12147 }, { "epoch": 0.2676846970423133, "grad_norm": 0.8697509169578552, "learning_rate": 2.577089239295383e-05, "loss": 0.1045, "step": 12148 }, { "epoch": 0.2677067323318294, "grad_norm": 0.9842004179954529, "learning_rate": 2.5770147292572686e-05, "loss": 0.1187, "step": 12149 }, { "epoch": 0.26772876762134556, "grad_norm": 0.7848141193389893, "learning_rate": 2.57694021373334e-05, "loss": 0.1215, "step": 12150 }, { "epoch": 0.26775080291086173, "grad_norm": 0.744809091091156, "learning_rate": 2.576865692723978e-05, "loss": 0.1482, "step": 12151 }, { "epoch": 0.2677728382003779, "grad_norm": 0.7178104519844055, "learning_rate": 2.576791166229561e-05, "loss": 0.1237, "step": 12152 }, { "epoch": 0.26779487348989406, "grad_norm": 0.6767235398292542, "learning_rate": 2.5767166342504697e-05, "loss": 0.0788, "step": 12153 }, { "epoch": 0.2678169087794102, "grad_norm": 0.8615298271179199, "learning_rate": 2.576642096787083e-05, "loss": 0.1331, "step": 12154 }, { "epoch": 0.2678389440689264, "grad_norm": 0.8302696943283081, "learning_rate": 2.576567553839781e-05, "loss": 0.1079, "step": 12155 }, { "epoch": 0.26786097935844255, "grad_norm": 0.8278189897537231, "learning_rate": 2.576493005408943e-05, "loss": 0.1197, "step": 12156 }, { "epoch": 0.2678830146479587, "grad_norm": 0.8944408297538757, "learning_rate": 2.576418451494949e-05, "loss": 0.102, "step": 12157 }, { "epoch": 0.2679050499374749, "grad_norm": 0.6599105000495911, "learning_rate": 2.5763438920981782e-05, "loss": 0.1008, "step": 12158 }, { "epoch": 0.26792708522699105, "grad_norm": 0.42147523164749146, "learning_rate": 2.576269327219011e-05, "loss": 0.1049, "step": 12159 }, { "epoch": 0.2679491205165072, "grad_norm": 0.9182615876197815, "learning_rate": 2.576194756857827e-05, "loss": 0.1026, "step": 12160 }, { "epoch": 0.2679711558060233, "grad_norm": 1.011615514755249, "learning_rate": 2.576120181015006e-05, "loss": 0.1032, "step": 12161 }, { "epoch": 0.2679931910955395, "grad_norm": 0.4765244722366333, "learning_rate": 2.5760455996909276e-05, "loss": 0.0663, "step": 12162 }, { "epoch": 0.26801522638505565, "grad_norm": 0.7478194832801819, "learning_rate": 2.5759710128859723e-05, "loss": 0.0916, "step": 12163 }, { "epoch": 0.2680372616745718, "grad_norm": 0.5805611610412598, "learning_rate": 2.5758964206005192e-05, "loss": 0.106, "step": 12164 }, { "epoch": 0.268059296964088, "grad_norm": 0.7113179564476013, "learning_rate": 2.5758218228349494e-05, "loss": 0.1107, "step": 12165 }, { "epoch": 0.26808133225360414, "grad_norm": 1.0485693216323853, "learning_rate": 2.5757472195896415e-05, "loss": 0.1097, "step": 12166 }, { "epoch": 0.2681033675431203, "grad_norm": 1.3642574548721313, "learning_rate": 2.575672610864976e-05, "loss": 0.129, "step": 12167 }, { "epoch": 0.2681254028326365, "grad_norm": 0.7830303907394409, "learning_rate": 2.575597996661334e-05, "loss": 0.1108, "step": 12168 }, { "epoch": 0.26814743812215264, "grad_norm": 1.0619884729385376, "learning_rate": 2.575523376979094e-05, "loss": 0.1091, "step": 12169 }, { "epoch": 0.2681694734116688, "grad_norm": 1.249678611755371, "learning_rate": 2.5754487518186368e-05, "loss": 0.094, "step": 12170 }, { "epoch": 0.26819150870118497, "grad_norm": 0.911916196346283, "learning_rate": 2.5753741211803424e-05, "loss": 0.0752, "step": 12171 }, { "epoch": 0.26821354399070113, "grad_norm": 0.5725697875022888, "learning_rate": 2.5752994850645904e-05, "loss": 0.0769, "step": 12172 }, { "epoch": 0.2682355792802173, "grad_norm": 0.8076935410499573, "learning_rate": 2.5752248434717622e-05, "loss": 0.0617, "step": 12173 }, { "epoch": 0.2682576145697334, "grad_norm": 0.9082599878311157, "learning_rate": 2.5751501964022375e-05, "loss": 0.0758, "step": 12174 }, { "epoch": 0.26827964985924957, "grad_norm": 1.0998095273971558, "learning_rate": 2.5750755438563957e-05, "loss": 0.1208, "step": 12175 }, { "epoch": 0.26830168514876573, "grad_norm": 0.9605517983436584, "learning_rate": 2.5750008858346178e-05, "loss": 0.0937, "step": 12176 }, { "epoch": 0.2683237204382819, "grad_norm": 0.893071174621582, "learning_rate": 2.574926222337284e-05, "loss": 0.0962, "step": 12177 }, { "epoch": 0.26834575572779806, "grad_norm": 0.8173933029174805, "learning_rate": 2.574851553364775e-05, "loss": 0.114, "step": 12178 }, { "epoch": 0.26836779101731423, "grad_norm": 0.4706989824771881, "learning_rate": 2.57477687891747e-05, "loss": 0.0716, "step": 12179 }, { "epoch": 0.2683898263068304, "grad_norm": 0.8439398407936096, "learning_rate": 2.5747021989957502e-05, "loss": 0.0686, "step": 12180 }, { "epoch": 0.26841186159634656, "grad_norm": 0.7938633561134338, "learning_rate": 2.574627513599996e-05, "loss": 0.0906, "step": 12181 }, { "epoch": 0.2684338968858627, "grad_norm": 0.7963730692863464, "learning_rate": 2.574552822730587e-05, "loss": 0.101, "step": 12182 }, { "epoch": 0.2684559321753789, "grad_norm": 1.0517287254333496, "learning_rate": 2.5744781263879046e-05, "loss": 0.0854, "step": 12183 }, { "epoch": 0.26847796746489505, "grad_norm": 1.0016191005706787, "learning_rate": 2.5744034245723295e-05, "loss": 0.0824, "step": 12184 }, { "epoch": 0.2685000027544112, "grad_norm": 0.6443257927894592, "learning_rate": 2.5743287172842408e-05, "loss": 0.0751, "step": 12185 }, { "epoch": 0.2685220380439273, "grad_norm": 0.5854745507240295, "learning_rate": 2.5742540045240205e-05, "loss": 0.0661, "step": 12186 }, { "epoch": 0.2685440733334435, "grad_norm": 0.7853354811668396, "learning_rate": 2.574179286292048e-05, "loss": 0.1076, "step": 12187 }, { "epoch": 0.26856610862295965, "grad_norm": 0.6894584894180298, "learning_rate": 2.5741045625887044e-05, "loss": 0.1186, "step": 12188 }, { "epoch": 0.2685881439124758, "grad_norm": 0.6707054376602173, "learning_rate": 2.5740298334143706e-05, "loss": 0.0767, "step": 12189 }, { "epoch": 0.268610179201992, "grad_norm": 0.9369455575942993, "learning_rate": 2.5739550987694267e-05, "loss": 0.0918, "step": 12190 }, { "epoch": 0.26863221449150815, "grad_norm": 0.6068819761276245, "learning_rate": 2.5738803586542533e-05, "loss": 0.0668, "step": 12191 }, { "epoch": 0.2686542497810243, "grad_norm": 0.783329427242279, "learning_rate": 2.5738056130692318e-05, "loss": 0.1027, "step": 12192 }, { "epoch": 0.2686762850705405, "grad_norm": 0.9464202523231506, "learning_rate": 2.5737308620147424e-05, "loss": 0.099, "step": 12193 }, { "epoch": 0.26869832036005664, "grad_norm": 0.9632286429405212, "learning_rate": 2.573656105491166e-05, "loss": 0.1271, "step": 12194 }, { "epoch": 0.2687203556495728, "grad_norm": 0.5790697336196899, "learning_rate": 2.573581343498883e-05, "loss": 0.0836, "step": 12195 }, { "epoch": 0.26874239093908897, "grad_norm": 0.9452844262123108, "learning_rate": 2.573506576038275e-05, "loss": 0.0961, "step": 12196 }, { "epoch": 0.26876442622860514, "grad_norm": 0.7214882969856262, "learning_rate": 2.573431803109722e-05, "loss": 0.1128, "step": 12197 }, { "epoch": 0.26878646151812124, "grad_norm": 0.5075340867042542, "learning_rate": 2.5733570247136054e-05, "loss": 0.0621, "step": 12198 }, { "epoch": 0.2688084968076374, "grad_norm": 1.0115880966186523, "learning_rate": 2.5732822408503057e-05, "loss": 0.1022, "step": 12199 }, { "epoch": 0.2688305320971536, "grad_norm": 0.7547104954719543, "learning_rate": 2.573207451520204e-05, "loss": 0.1118, "step": 12200 }, { "epoch": 0.26885256738666974, "grad_norm": 1.1490975618362427, "learning_rate": 2.5731326567236813e-05, "loss": 0.1141, "step": 12201 }, { "epoch": 0.2688746026761859, "grad_norm": 0.7716127038002014, "learning_rate": 2.5730578564611184e-05, "loss": 0.0954, "step": 12202 }, { "epoch": 0.26889663796570207, "grad_norm": 0.8506773710250854, "learning_rate": 2.5729830507328965e-05, "loss": 0.1169, "step": 12203 }, { "epoch": 0.26891867325521823, "grad_norm": 0.6772934198379517, "learning_rate": 2.5729082395393968e-05, "loss": 0.0832, "step": 12204 }, { "epoch": 0.2689407085447344, "grad_norm": 0.7240467071533203, "learning_rate": 2.572833422881e-05, "loss": 0.0799, "step": 12205 }, { "epoch": 0.26896274383425056, "grad_norm": 1.0448158979415894, "learning_rate": 2.5727586007580873e-05, "loss": 0.1132, "step": 12206 }, { "epoch": 0.2689847791237667, "grad_norm": 0.96946781873703, "learning_rate": 2.5726837731710398e-05, "loss": 0.0857, "step": 12207 }, { "epoch": 0.2690068144132829, "grad_norm": 1.6739579439163208, "learning_rate": 2.5726089401202388e-05, "loss": 0.0999, "step": 12208 }, { "epoch": 0.26902884970279906, "grad_norm": 0.6574826836585999, "learning_rate": 2.5725341016060647e-05, "loss": 0.0984, "step": 12209 }, { "epoch": 0.2690508849923152, "grad_norm": 0.5546188950538635, "learning_rate": 2.5724592576288995e-05, "loss": 0.1051, "step": 12210 }, { "epoch": 0.26907292028183133, "grad_norm": 0.6680443286895752, "learning_rate": 2.572384408189124e-05, "loss": 0.1107, "step": 12211 }, { "epoch": 0.2690949555713475, "grad_norm": 1.008316993713379, "learning_rate": 2.5723095532871203e-05, "loss": 0.1345, "step": 12212 }, { "epoch": 0.26911699086086366, "grad_norm": 0.9934218525886536, "learning_rate": 2.572234692923269e-05, "loss": 0.0925, "step": 12213 }, { "epoch": 0.2691390261503798, "grad_norm": 0.6833614706993103, "learning_rate": 2.572159827097951e-05, "loss": 0.1267, "step": 12214 }, { "epoch": 0.269161061439896, "grad_norm": 0.701620876789093, "learning_rate": 2.5720849558115478e-05, "loss": 0.1165, "step": 12215 }, { "epoch": 0.26918309672941215, "grad_norm": 1.0431363582611084, "learning_rate": 2.5720100790644417e-05, "loss": 0.1101, "step": 12216 }, { "epoch": 0.2692051320189283, "grad_norm": 1.3609182834625244, "learning_rate": 2.571935196857013e-05, "loss": 0.1287, "step": 12217 }, { "epoch": 0.2692271673084445, "grad_norm": 0.7753946781158447, "learning_rate": 2.5718603091896437e-05, "loss": 0.0917, "step": 12218 }, { "epoch": 0.26924920259796065, "grad_norm": 0.5790748596191406, "learning_rate": 2.5717854160627148e-05, "loss": 0.1017, "step": 12219 }, { "epoch": 0.2692712378874768, "grad_norm": 1.0426665544509888, "learning_rate": 2.5717105174766084e-05, "loss": 0.1263, "step": 12220 }, { "epoch": 0.269293273176993, "grad_norm": 1.2335789203643799, "learning_rate": 2.5716356134317053e-05, "loss": 0.1068, "step": 12221 }, { "epoch": 0.26931530846650914, "grad_norm": 0.9201109409332275, "learning_rate": 2.5715607039283874e-05, "loss": 0.0973, "step": 12222 }, { "epoch": 0.26933734375602525, "grad_norm": 0.9409505724906921, "learning_rate": 2.5714857889670362e-05, "loss": 0.1345, "step": 12223 }, { "epoch": 0.2693593790455414, "grad_norm": 0.80380779504776, "learning_rate": 2.571410868548034e-05, "loss": 0.0713, "step": 12224 }, { "epoch": 0.2693814143350576, "grad_norm": 0.567791223526001, "learning_rate": 2.571335942671761e-05, "loss": 0.0573, "step": 12225 }, { "epoch": 0.26940344962457374, "grad_norm": 0.5616557002067566, "learning_rate": 2.5712610113385994e-05, "loss": 0.1095, "step": 12226 }, { "epoch": 0.2694254849140899, "grad_norm": 0.6010384559631348, "learning_rate": 2.5711860745489317e-05, "loss": 0.0669, "step": 12227 }, { "epoch": 0.2694475202036061, "grad_norm": 0.6128642559051514, "learning_rate": 2.5711111323031385e-05, "loss": 0.1121, "step": 12228 }, { "epoch": 0.26946955549312224, "grad_norm": 0.6293120384216309, "learning_rate": 2.5710361846016017e-05, "loss": 0.0846, "step": 12229 }, { "epoch": 0.2694915907826384, "grad_norm": 0.6418121457099915, "learning_rate": 2.570961231444704e-05, "loss": 0.1043, "step": 12230 }, { "epoch": 0.26951362607215457, "grad_norm": 0.49137258529663086, "learning_rate": 2.5708862728328256e-05, "loss": 0.0574, "step": 12231 }, { "epoch": 0.26953566136167073, "grad_norm": 0.8329362273216248, "learning_rate": 2.5708113087663493e-05, "loss": 0.0826, "step": 12232 }, { "epoch": 0.2695576966511869, "grad_norm": 0.6442931294441223, "learning_rate": 2.5707363392456574e-05, "loss": 0.0974, "step": 12233 }, { "epoch": 0.26957973194070306, "grad_norm": 0.9504063129425049, "learning_rate": 2.570661364271131e-05, "loss": 0.102, "step": 12234 }, { "epoch": 0.26960176723021917, "grad_norm": 1.1301507949829102, "learning_rate": 2.5705863838431517e-05, "loss": 0.1055, "step": 12235 }, { "epoch": 0.26962380251973533, "grad_norm": 0.711790144443512, "learning_rate": 2.570511397962102e-05, "loss": 0.0883, "step": 12236 }, { "epoch": 0.2696458378092515, "grad_norm": 1.4717247486114502, "learning_rate": 2.570436406628364e-05, "loss": 0.1291, "step": 12237 }, { "epoch": 0.26966787309876766, "grad_norm": 0.6240182518959045, "learning_rate": 2.570361409842319e-05, "loss": 0.0969, "step": 12238 }, { "epoch": 0.2696899083882838, "grad_norm": 0.7961947917938232, "learning_rate": 2.5702864076043493e-05, "loss": 0.0637, "step": 12239 }, { "epoch": 0.2697119436778, "grad_norm": 0.8526113033294678, "learning_rate": 2.5702113999148377e-05, "loss": 0.1051, "step": 12240 }, { "epoch": 0.26973397896731616, "grad_norm": 0.7491725087165833, "learning_rate": 2.5701363867741646e-05, "loss": 0.1411, "step": 12241 }, { "epoch": 0.2697560142568323, "grad_norm": 0.7811357975006104, "learning_rate": 2.5700613681827138e-05, "loss": 0.0842, "step": 12242 }, { "epoch": 0.2697780495463485, "grad_norm": 0.7139568328857422, "learning_rate": 2.569986344140866e-05, "loss": 0.0868, "step": 12243 }, { "epoch": 0.26980008483586465, "grad_norm": 0.9479608535766602, "learning_rate": 2.5699113146490047e-05, "loss": 0.1307, "step": 12244 }, { "epoch": 0.2698221201253808, "grad_norm": 0.7827329039573669, "learning_rate": 2.5698362797075107e-05, "loss": 0.086, "step": 12245 }, { "epoch": 0.269844155414897, "grad_norm": 0.8257478475570679, "learning_rate": 2.569761239316767e-05, "loss": 0.0574, "step": 12246 }, { "epoch": 0.26986619070441314, "grad_norm": 1.090747594833374, "learning_rate": 2.569686193477156e-05, "loss": 0.1064, "step": 12247 }, { "epoch": 0.26988822599392925, "grad_norm": 1.230989694595337, "learning_rate": 2.569611142189059e-05, "loss": 0.1434, "step": 12248 }, { "epoch": 0.2699102612834454, "grad_norm": 0.7955703735351562, "learning_rate": 2.5695360854528594e-05, "loss": 0.0986, "step": 12249 }, { "epoch": 0.2699322965729616, "grad_norm": 1.088780403137207, "learning_rate": 2.5694610232689387e-05, "loss": 0.1496, "step": 12250 }, { "epoch": 0.26995433186247775, "grad_norm": 1.4866937398910522, "learning_rate": 2.5693859556376796e-05, "loss": 0.1805, "step": 12251 }, { "epoch": 0.2699763671519939, "grad_norm": 0.49801790714263916, "learning_rate": 2.5693108825594643e-05, "loss": 0.0804, "step": 12252 }, { "epoch": 0.2699984024415101, "grad_norm": 1.4470773935317993, "learning_rate": 2.5692358040346758e-05, "loss": 0.184, "step": 12253 }, { "epoch": 0.27002043773102624, "grad_norm": 0.903823733329773, "learning_rate": 2.5691607200636954e-05, "loss": 0.094, "step": 12254 }, { "epoch": 0.2700424730205424, "grad_norm": 1.4529823064804077, "learning_rate": 2.569085630646906e-05, "loss": 0.1059, "step": 12255 }, { "epoch": 0.27006450831005857, "grad_norm": 1.3909776210784912, "learning_rate": 2.5690105357846907e-05, "loss": 0.1415, "step": 12256 }, { "epoch": 0.27008654359957474, "grad_norm": 1.2823246717453003, "learning_rate": 2.568935435477431e-05, "loss": 0.1143, "step": 12257 }, { "epoch": 0.2701085788890909, "grad_norm": 0.8904117941856384, "learning_rate": 2.56886032972551e-05, "loss": 0.1149, "step": 12258 }, { "epoch": 0.27013061417860706, "grad_norm": 0.5508522391319275, "learning_rate": 2.568785218529311e-05, "loss": 0.118, "step": 12259 }, { "epoch": 0.2701526494681232, "grad_norm": 0.4703342020511627, "learning_rate": 2.5687101018892148e-05, "loss": 0.0646, "step": 12260 }, { "epoch": 0.27017468475763934, "grad_norm": 0.8654890060424805, "learning_rate": 2.568634979805605e-05, "loss": 0.1273, "step": 12261 }, { "epoch": 0.2701967200471555, "grad_norm": 0.627736508846283, "learning_rate": 2.568559852278865e-05, "loss": 0.0911, "step": 12262 }, { "epoch": 0.27021875533667167, "grad_norm": 1.0307821035385132, "learning_rate": 2.5684847193093754e-05, "loss": 0.1331, "step": 12263 }, { "epoch": 0.27024079062618783, "grad_norm": 0.8332681059837341, "learning_rate": 2.5684095808975215e-05, "loss": 0.0648, "step": 12264 }, { "epoch": 0.270262825915704, "grad_norm": 1.0137656927108765, "learning_rate": 2.5683344370436838e-05, "loss": 0.106, "step": 12265 }, { "epoch": 0.27028486120522016, "grad_norm": 0.7307497262954712, "learning_rate": 2.5682592877482462e-05, "loss": 0.1317, "step": 12266 }, { "epoch": 0.2703068964947363, "grad_norm": 1.0451306104660034, "learning_rate": 2.5681841330115913e-05, "loss": 0.1057, "step": 12267 }, { "epoch": 0.2703289317842525, "grad_norm": 0.9986295104026794, "learning_rate": 2.5681089728341013e-05, "loss": 0.1041, "step": 12268 }, { "epoch": 0.27035096707376866, "grad_norm": 0.6394060850143433, "learning_rate": 2.5680338072161602e-05, "loss": 0.0809, "step": 12269 }, { "epoch": 0.2703730023632848, "grad_norm": 1.1853982210159302, "learning_rate": 2.5679586361581498e-05, "loss": 0.1318, "step": 12270 }, { "epoch": 0.270395037652801, "grad_norm": 0.9348951578140259, "learning_rate": 2.5678834596604538e-05, "loss": 0.0852, "step": 12271 }, { "epoch": 0.2704170729423171, "grad_norm": 0.6431326866149902, "learning_rate": 2.5678082777234544e-05, "loss": 0.0988, "step": 12272 }, { "epoch": 0.27043910823183326, "grad_norm": 1.1940875053405762, "learning_rate": 2.5677330903475348e-05, "loss": 0.1497, "step": 12273 }, { "epoch": 0.2704611435213494, "grad_norm": 0.5024474859237671, "learning_rate": 2.567657897533078e-05, "loss": 0.1083, "step": 12274 }, { "epoch": 0.2704831788108656, "grad_norm": 0.6293361186981201, "learning_rate": 2.5675826992804668e-05, "loss": 0.0942, "step": 12275 }, { "epoch": 0.27050521410038175, "grad_norm": 0.8757293820381165, "learning_rate": 2.567507495590085e-05, "loss": 0.0828, "step": 12276 }, { "epoch": 0.2705272493898979, "grad_norm": 0.6519703269004822, "learning_rate": 2.5674322864623148e-05, "loss": 0.0859, "step": 12277 }, { "epoch": 0.2705492846794141, "grad_norm": 0.7018254399299622, "learning_rate": 2.5673570718975395e-05, "loss": 0.0923, "step": 12278 }, { "epoch": 0.27057131996893025, "grad_norm": 0.771246612071991, "learning_rate": 2.5672818518961422e-05, "loss": 0.1063, "step": 12279 }, { "epoch": 0.2705933552584464, "grad_norm": 2.055652379989624, "learning_rate": 2.5672066264585064e-05, "loss": 0.1051, "step": 12280 }, { "epoch": 0.2706153905479626, "grad_norm": 0.8134242296218872, "learning_rate": 2.5671313955850147e-05, "loss": 0.1256, "step": 12281 }, { "epoch": 0.27063742583747874, "grad_norm": 0.9217394590377808, "learning_rate": 2.5670561592760505e-05, "loss": 0.109, "step": 12282 }, { "epoch": 0.2706594611269949, "grad_norm": 0.896670401096344, "learning_rate": 2.5669809175319975e-05, "loss": 0.1145, "step": 12283 }, { "epoch": 0.27068149641651107, "grad_norm": 0.8844426274299622, "learning_rate": 2.566905670353238e-05, "loss": 0.1035, "step": 12284 }, { "epoch": 0.2707035317060272, "grad_norm": 0.6693241000175476, "learning_rate": 2.5668304177401558e-05, "loss": 0.0704, "step": 12285 }, { "epoch": 0.27072556699554334, "grad_norm": 0.5700424313545227, "learning_rate": 2.5667551596931346e-05, "loss": 0.0806, "step": 12286 }, { "epoch": 0.2707476022850595, "grad_norm": 0.5902991890907288, "learning_rate": 2.566679896212557e-05, "loss": 0.0931, "step": 12287 }, { "epoch": 0.2707696375745757, "grad_norm": 0.9512274265289307, "learning_rate": 2.566604627298807e-05, "loss": 0.1036, "step": 12288 }, { "epoch": 0.27079167286409184, "grad_norm": 0.8846476674079895, "learning_rate": 2.5665293529522675e-05, "loss": 0.1394, "step": 12289 }, { "epoch": 0.270813708153608, "grad_norm": 0.9997893571853638, "learning_rate": 2.566454073173322e-05, "loss": 0.0846, "step": 12290 }, { "epoch": 0.27083574344312417, "grad_norm": 0.6652158498764038, "learning_rate": 2.566378787962354e-05, "loss": 0.104, "step": 12291 }, { "epoch": 0.27085777873264033, "grad_norm": 0.7549585103988647, "learning_rate": 2.5663034973197476e-05, "loss": 0.0734, "step": 12292 }, { "epoch": 0.2708798140221565, "grad_norm": 0.743675947189331, "learning_rate": 2.5662282012458852e-05, "loss": 0.1047, "step": 12293 }, { "epoch": 0.27090184931167266, "grad_norm": 1.3090699911117554, "learning_rate": 2.566152899741151e-05, "loss": 0.0593, "step": 12294 }, { "epoch": 0.2709238846011888, "grad_norm": 0.7027400732040405, "learning_rate": 2.566077592805928e-05, "loss": 0.1024, "step": 12295 }, { "epoch": 0.270945919890705, "grad_norm": 0.8267204165458679, "learning_rate": 2.5660022804406002e-05, "loss": 0.1105, "step": 12296 }, { "epoch": 0.2709679551802211, "grad_norm": 0.8527529835700989, "learning_rate": 2.5659269626455514e-05, "loss": 0.1227, "step": 12297 }, { "epoch": 0.27098999046973726, "grad_norm": 1.005031943321228, "learning_rate": 2.565851639421165e-05, "loss": 0.1165, "step": 12298 }, { "epoch": 0.2710120257592534, "grad_norm": 0.7379010915756226, "learning_rate": 2.5657763107678246e-05, "loss": 0.0946, "step": 12299 }, { "epoch": 0.2710340610487696, "grad_norm": 0.6680765748023987, "learning_rate": 2.565700976685914e-05, "loss": 0.0906, "step": 12300 }, { "epoch": 0.27105609633828576, "grad_norm": 0.6776323914527893, "learning_rate": 2.5656256371758166e-05, "loss": 0.1227, "step": 12301 }, { "epoch": 0.2710781316278019, "grad_norm": 0.8523286581039429, "learning_rate": 2.5655502922379165e-05, "loss": 0.1287, "step": 12302 }, { "epoch": 0.2711001669173181, "grad_norm": 0.7259578704833984, "learning_rate": 2.565474941872597e-05, "loss": 0.1146, "step": 12303 }, { "epoch": 0.27112220220683425, "grad_norm": 0.7506229877471924, "learning_rate": 2.565399586080243e-05, "loss": 0.1002, "step": 12304 }, { "epoch": 0.2711442374963504, "grad_norm": 0.2812569737434387, "learning_rate": 2.5653242248612375e-05, "loss": 0.0575, "step": 12305 }, { "epoch": 0.2711662727858666, "grad_norm": 0.7207456231117249, "learning_rate": 2.5652488582159635e-05, "loss": 0.1118, "step": 12306 }, { "epoch": 0.27118830807538274, "grad_norm": 0.735379695892334, "learning_rate": 2.565173486144807e-05, "loss": 0.073, "step": 12307 }, { "epoch": 0.2712103433648989, "grad_norm": 1.1608316898345947, "learning_rate": 2.5650981086481502e-05, "loss": 0.1208, "step": 12308 }, { "epoch": 0.271232378654415, "grad_norm": 1.0537598133087158, "learning_rate": 2.565022725726378e-05, "loss": 0.1511, "step": 12309 }, { "epoch": 0.2712544139439312, "grad_norm": 1.2676175832748413, "learning_rate": 2.564947337379873e-05, "loss": 0.1314, "step": 12310 }, { "epoch": 0.27127644923344735, "grad_norm": 0.4730457067489624, "learning_rate": 2.564871943609021e-05, "loss": 0.0641, "step": 12311 }, { "epoch": 0.2712984845229635, "grad_norm": 0.6626254320144653, "learning_rate": 2.564796544414205e-05, "loss": 0.1162, "step": 12312 }, { "epoch": 0.2713205198124797, "grad_norm": 0.5785676836967468, "learning_rate": 2.5647211397958092e-05, "loss": 0.0897, "step": 12313 }, { "epoch": 0.27134255510199584, "grad_norm": 0.655579686164856, "learning_rate": 2.5646457297542174e-05, "loss": 0.1017, "step": 12314 }, { "epoch": 0.271364590391512, "grad_norm": 0.9013640284538269, "learning_rate": 2.564570314289814e-05, "loss": 0.095, "step": 12315 }, { "epoch": 0.27138662568102817, "grad_norm": 0.7325394749641418, "learning_rate": 2.5644948934029835e-05, "loss": 0.0874, "step": 12316 }, { "epoch": 0.27140866097054434, "grad_norm": 0.8461979627609253, "learning_rate": 2.5644194670941094e-05, "loss": 0.1234, "step": 12317 }, { "epoch": 0.2714306962600605, "grad_norm": 0.7131909132003784, "learning_rate": 2.5643440353635763e-05, "loss": 0.0768, "step": 12318 }, { "epoch": 0.27145273154957666, "grad_norm": 1.1402109861373901, "learning_rate": 2.5642685982117684e-05, "loss": 0.1044, "step": 12319 }, { "epoch": 0.27147476683909283, "grad_norm": 0.4654819667339325, "learning_rate": 2.56419315563907e-05, "loss": 0.1364, "step": 12320 }, { "epoch": 0.271496802128609, "grad_norm": 0.9141680598258972, "learning_rate": 2.5641177076458643e-05, "loss": 0.1245, "step": 12321 }, { "epoch": 0.2715188374181251, "grad_norm": 0.8188886642456055, "learning_rate": 2.5640422542325373e-05, "loss": 0.0538, "step": 12322 }, { "epoch": 0.27154087270764127, "grad_norm": 0.6904820799827576, "learning_rate": 2.5639667953994724e-05, "loss": 0.1069, "step": 12323 }, { "epoch": 0.27156290799715743, "grad_norm": 0.5853683352470398, "learning_rate": 2.563891331147054e-05, "loss": 0.1472, "step": 12324 }, { "epoch": 0.2715849432866736, "grad_norm": 0.6770089268684387, "learning_rate": 2.5638158614756665e-05, "loss": 0.1009, "step": 12325 }, { "epoch": 0.27160697857618976, "grad_norm": 0.9124160408973694, "learning_rate": 2.563740386385694e-05, "loss": 0.1442, "step": 12326 }, { "epoch": 0.2716290138657059, "grad_norm": 0.9306375980377197, "learning_rate": 2.5636649058775218e-05, "loss": 0.0993, "step": 12327 }, { "epoch": 0.2716510491552221, "grad_norm": 0.6905512809753418, "learning_rate": 2.5635894199515334e-05, "loss": 0.0731, "step": 12328 }, { "epoch": 0.27167308444473826, "grad_norm": 0.5323866605758667, "learning_rate": 2.5635139286081144e-05, "loss": 0.0758, "step": 12329 }, { "epoch": 0.2716951197342544, "grad_norm": 0.6673007607460022, "learning_rate": 2.5634384318476483e-05, "loss": 0.1069, "step": 12330 }, { "epoch": 0.2717171550237706, "grad_norm": 0.6082770228385925, "learning_rate": 2.56336292967052e-05, "loss": 0.0777, "step": 12331 }, { "epoch": 0.27173919031328675, "grad_norm": 0.768129825592041, "learning_rate": 2.563287422077114e-05, "loss": 0.1018, "step": 12332 }, { "epoch": 0.2717612256028029, "grad_norm": 0.8950440883636475, "learning_rate": 2.5632119090678147e-05, "loss": 0.0886, "step": 12333 }, { "epoch": 0.271783260892319, "grad_norm": 0.8724617958068848, "learning_rate": 2.5631363906430074e-05, "loss": 0.1358, "step": 12334 }, { "epoch": 0.2718052961818352, "grad_norm": 1.1004911661148071, "learning_rate": 2.5630608668030765e-05, "loss": 0.1169, "step": 12335 }, { "epoch": 0.27182733147135135, "grad_norm": 0.7467524409294128, "learning_rate": 2.5629853375484062e-05, "loss": 0.1026, "step": 12336 }, { "epoch": 0.2718493667608675, "grad_norm": 0.789730966091156, "learning_rate": 2.5629098028793817e-05, "loss": 0.1449, "step": 12337 }, { "epoch": 0.2718714020503837, "grad_norm": 0.8468882441520691, "learning_rate": 2.5628342627963876e-05, "loss": 0.1233, "step": 12338 }, { "epoch": 0.27189343733989985, "grad_norm": 0.8970775604248047, "learning_rate": 2.562758717299809e-05, "loss": 0.095, "step": 12339 }, { "epoch": 0.271915472629416, "grad_norm": 0.7623366117477417, "learning_rate": 2.56268316639003e-05, "loss": 0.0732, "step": 12340 }, { "epoch": 0.2719375079189322, "grad_norm": 0.7431120276451111, "learning_rate": 2.5626076100674358e-05, "loss": 0.0832, "step": 12341 }, { "epoch": 0.27195954320844834, "grad_norm": 0.7817902565002441, "learning_rate": 2.5625320483324113e-05, "loss": 0.1055, "step": 12342 }, { "epoch": 0.2719815784979645, "grad_norm": 0.8738980889320374, "learning_rate": 2.562456481185341e-05, "loss": 0.1046, "step": 12343 }, { "epoch": 0.27200361378748067, "grad_norm": 0.7382611036300659, "learning_rate": 2.562380908626611e-05, "loss": 0.1027, "step": 12344 }, { "epoch": 0.27202564907699683, "grad_norm": 0.7376950979232788, "learning_rate": 2.5623053306566045e-05, "loss": 0.1067, "step": 12345 }, { "epoch": 0.27204768436651294, "grad_norm": 0.8360311388969421, "learning_rate": 2.562229747275707e-05, "loss": 0.0949, "step": 12346 }, { "epoch": 0.2720697196560291, "grad_norm": 0.6524226069450378, "learning_rate": 2.5621541584843044e-05, "loss": 0.0956, "step": 12347 }, { "epoch": 0.27209175494554527, "grad_norm": 0.49191564321517944, "learning_rate": 2.562078564282781e-05, "loss": 0.0791, "step": 12348 }, { "epoch": 0.27211379023506144, "grad_norm": 0.8457415103912354, "learning_rate": 2.562002964671522e-05, "loss": 0.0969, "step": 12349 }, { "epoch": 0.2721358255245776, "grad_norm": 0.7308583855628967, "learning_rate": 2.5619273596509126e-05, "loss": 0.1067, "step": 12350 }, { "epoch": 0.27215786081409377, "grad_norm": 0.8081404566764832, "learning_rate": 2.5618517492213372e-05, "loss": 0.0974, "step": 12351 }, { "epoch": 0.27217989610360993, "grad_norm": 0.8192729353904724, "learning_rate": 2.5617761333831816e-05, "loss": 0.1183, "step": 12352 }, { "epoch": 0.2722019313931261, "grad_norm": 0.7076566815376282, "learning_rate": 2.5617005121368308e-05, "loss": 0.1324, "step": 12353 }, { "epoch": 0.27222396668264226, "grad_norm": 0.7015531063079834, "learning_rate": 2.56162488548267e-05, "loss": 0.0848, "step": 12354 }, { "epoch": 0.2722460019721584, "grad_norm": 1.1022332906723022, "learning_rate": 2.5615492534210843e-05, "loss": 0.0942, "step": 12355 }, { "epoch": 0.2722680372616746, "grad_norm": 1.014668345451355, "learning_rate": 2.561473615952459e-05, "loss": 0.0977, "step": 12356 }, { "epoch": 0.27229007255119075, "grad_norm": 0.8710637092590332, "learning_rate": 2.5613979730771795e-05, "loss": 0.0913, "step": 12357 }, { "epoch": 0.2723121078407069, "grad_norm": 0.7725839018821716, "learning_rate": 2.561322324795631e-05, "loss": 0.1011, "step": 12358 }, { "epoch": 0.272334143130223, "grad_norm": 0.6362655758857727, "learning_rate": 2.5612466711081987e-05, "loss": 0.0526, "step": 12359 }, { "epoch": 0.2723561784197392, "grad_norm": 1.2358124256134033, "learning_rate": 2.561171012015268e-05, "loss": 0.1252, "step": 12360 }, { "epoch": 0.27237821370925536, "grad_norm": 0.8853369355201721, "learning_rate": 2.561095347517224e-05, "loss": 0.0826, "step": 12361 }, { "epoch": 0.2724002489987715, "grad_norm": 0.8766467571258545, "learning_rate": 2.561019677614453e-05, "loss": 0.1273, "step": 12362 }, { "epoch": 0.2724222842882877, "grad_norm": 0.9167178273200989, "learning_rate": 2.5609440023073396e-05, "loss": 0.1049, "step": 12363 }, { "epoch": 0.27244431957780385, "grad_norm": 0.7251109480857849, "learning_rate": 2.5608683215962693e-05, "loss": 0.0955, "step": 12364 }, { "epoch": 0.27246635486732, "grad_norm": 0.9151336550712585, "learning_rate": 2.5607926354816278e-05, "loss": 0.0913, "step": 12365 }, { "epoch": 0.2724883901568362, "grad_norm": 0.5953317284584045, "learning_rate": 2.5607169439638004e-05, "loss": 0.0954, "step": 12366 }, { "epoch": 0.27251042544635234, "grad_norm": 0.9944020509719849, "learning_rate": 2.560641247043173e-05, "loss": 0.0871, "step": 12367 }, { "epoch": 0.2725324607358685, "grad_norm": 0.9472521543502808, "learning_rate": 2.560565544720131e-05, "loss": 0.1246, "step": 12368 }, { "epoch": 0.2725544960253847, "grad_norm": 1.2315467596054077, "learning_rate": 2.5604898369950602e-05, "loss": 0.1272, "step": 12369 }, { "epoch": 0.27257653131490084, "grad_norm": 0.8383703231811523, "learning_rate": 2.5604141238683452e-05, "loss": 0.0854, "step": 12370 }, { "epoch": 0.27259856660441695, "grad_norm": 0.9178429245948792, "learning_rate": 2.5603384053403732e-05, "loss": 0.1105, "step": 12371 }, { "epoch": 0.2726206018939331, "grad_norm": 0.8049831390380859, "learning_rate": 2.5602626814115287e-05, "loss": 0.0846, "step": 12372 }, { "epoch": 0.2726426371834493, "grad_norm": 1.051829218864441, "learning_rate": 2.5601869520821984e-05, "loss": 0.0935, "step": 12373 }, { "epoch": 0.27266467247296544, "grad_norm": 1.3643771409988403, "learning_rate": 2.5601112173527666e-05, "loss": 0.1165, "step": 12374 }, { "epoch": 0.2726867077624816, "grad_norm": 0.6122249364852905, "learning_rate": 2.56003547722362e-05, "loss": 0.1349, "step": 12375 }, { "epoch": 0.27270874305199777, "grad_norm": 0.895010769367218, "learning_rate": 2.5599597316951445e-05, "loss": 0.0877, "step": 12376 }, { "epoch": 0.27273077834151394, "grad_norm": 0.6255603432655334, "learning_rate": 2.5598839807677262e-05, "loss": 0.1174, "step": 12377 }, { "epoch": 0.2727528136310301, "grad_norm": 0.5460942983627319, "learning_rate": 2.5598082244417495e-05, "loss": 0.093, "step": 12378 }, { "epoch": 0.27277484892054626, "grad_norm": 0.549185574054718, "learning_rate": 2.559732462717602e-05, "loss": 0.1022, "step": 12379 }, { "epoch": 0.27279688421006243, "grad_norm": 1.0383659601211548, "learning_rate": 2.5596566955956683e-05, "loss": 0.102, "step": 12380 }, { "epoch": 0.2728189194995786, "grad_norm": 0.8748606443405151, "learning_rate": 2.5595809230763353e-05, "loss": 0.0823, "step": 12381 }, { "epoch": 0.27284095478909476, "grad_norm": 0.8629230260848999, "learning_rate": 2.559505145159988e-05, "loss": 0.0995, "step": 12382 }, { "epoch": 0.2728629900786109, "grad_norm": 0.831593930721283, "learning_rate": 2.559429361847013e-05, "loss": 0.1179, "step": 12383 }, { "epoch": 0.27288502536812703, "grad_norm": 1.7662159204483032, "learning_rate": 2.5593535731377958e-05, "loss": 0.0817, "step": 12384 }, { "epoch": 0.2729070606576432, "grad_norm": 0.9415401220321655, "learning_rate": 2.559277779032723e-05, "loss": 0.1128, "step": 12385 }, { "epoch": 0.27292909594715936, "grad_norm": 0.6062628030776978, "learning_rate": 2.559201979532181e-05, "loss": 0.1002, "step": 12386 }, { "epoch": 0.2729511312366755, "grad_norm": 0.9222928881645203, "learning_rate": 2.5591261746365548e-05, "loss": 0.1074, "step": 12387 }, { "epoch": 0.2729731665261917, "grad_norm": 1.0524495840072632, "learning_rate": 2.559050364346231e-05, "loss": 0.1023, "step": 12388 }, { "epoch": 0.27299520181570786, "grad_norm": 0.7299909591674805, "learning_rate": 2.558974548661596e-05, "loss": 0.1154, "step": 12389 }, { "epoch": 0.273017237105224, "grad_norm": 0.9003955125808716, "learning_rate": 2.5588987275830356e-05, "loss": 0.1064, "step": 12390 }, { "epoch": 0.2730392723947402, "grad_norm": 2.0920250415802, "learning_rate": 2.558822901110936e-05, "loss": 0.1066, "step": 12391 }, { "epoch": 0.27306130768425635, "grad_norm": 1.0563117265701294, "learning_rate": 2.5587470692456844e-05, "loss": 0.1053, "step": 12392 }, { "epoch": 0.2730833429737725, "grad_norm": 0.5255282521247864, "learning_rate": 2.5586712319876652e-05, "loss": 0.0617, "step": 12393 }, { "epoch": 0.2731053782632887, "grad_norm": 0.7866507172584534, "learning_rate": 2.5585953893372663e-05, "loss": 0.1018, "step": 12394 }, { "epoch": 0.27312741355280484, "grad_norm": 1.3897705078125, "learning_rate": 2.5585195412948735e-05, "loss": 0.1396, "step": 12395 }, { "epoch": 0.27314944884232095, "grad_norm": 0.8318622708320618, "learning_rate": 2.5584436878608727e-05, "loss": 0.1019, "step": 12396 }, { "epoch": 0.2731714841318371, "grad_norm": 0.8884698152542114, "learning_rate": 2.5583678290356504e-05, "loss": 0.11, "step": 12397 }, { "epoch": 0.2731935194213533, "grad_norm": 0.9989001154899597, "learning_rate": 2.5582919648195938e-05, "loss": 0.1149, "step": 12398 }, { "epoch": 0.27321555471086945, "grad_norm": 1.0356395244598389, "learning_rate": 2.5582160952130886e-05, "loss": 0.1404, "step": 12399 }, { "epoch": 0.2732375900003856, "grad_norm": 0.8035906553268433, "learning_rate": 2.5581402202165213e-05, "loss": 0.1567, "step": 12400 }, { "epoch": 0.2732596252899018, "grad_norm": 1.45783269405365, "learning_rate": 2.5580643398302784e-05, "loss": 0.1133, "step": 12401 }, { "epoch": 0.27328166057941794, "grad_norm": 0.6301375031471252, "learning_rate": 2.5579884540547464e-05, "loss": 0.1103, "step": 12402 }, { "epoch": 0.2733036958689341, "grad_norm": 0.5701642632484436, "learning_rate": 2.5579125628903114e-05, "loss": 0.0907, "step": 12403 }, { "epoch": 0.27332573115845027, "grad_norm": 0.6882928013801575, "learning_rate": 2.557836666337361e-05, "loss": 0.0802, "step": 12404 }, { "epoch": 0.27334776644796643, "grad_norm": 0.6421954035758972, "learning_rate": 2.5577607643962812e-05, "loss": 0.0609, "step": 12405 }, { "epoch": 0.2733698017374826, "grad_norm": 0.5055539608001709, "learning_rate": 2.5576848570674584e-05, "loss": 0.0763, "step": 12406 }, { "epoch": 0.27339183702699876, "grad_norm": 0.5418874025344849, "learning_rate": 2.5576089443512798e-05, "loss": 0.0785, "step": 12407 }, { "epoch": 0.27341387231651487, "grad_norm": 0.9160182476043701, "learning_rate": 2.557533026248131e-05, "loss": 0.0969, "step": 12408 }, { "epoch": 0.27343590760603104, "grad_norm": 0.8016926646232605, "learning_rate": 2.5574571027584002e-05, "loss": 0.0776, "step": 12409 }, { "epoch": 0.2734579428955472, "grad_norm": 0.8680117726325989, "learning_rate": 2.5573811738824728e-05, "loss": 0.1078, "step": 12410 }, { "epoch": 0.27347997818506337, "grad_norm": 0.8411986827850342, "learning_rate": 2.5573052396207365e-05, "loss": 0.1047, "step": 12411 }, { "epoch": 0.27350201347457953, "grad_norm": 0.5981715321540833, "learning_rate": 2.5572292999735773e-05, "loss": 0.0723, "step": 12412 }, { "epoch": 0.2735240487640957, "grad_norm": 0.7713413834571838, "learning_rate": 2.5571533549413824e-05, "loss": 0.1056, "step": 12413 }, { "epoch": 0.27354608405361186, "grad_norm": 0.7249821424484253, "learning_rate": 2.5570774045245388e-05, "loss": 0.0992, "step": 12414 }, { "epoch": 0.273568119343128, "grad_norm": 0.9800817966461182, "learning_rate": 2.5570014487234326e-05, "loss": 0.1008, "step": 12415 }, { "epoch": 0.2735901546326442, "grad_norm": 0.9697549939155579, "learning_rate": 2.5569254875384516e-05, "loss": 0.1015, "step": 12416 }, { "epoch": 0.27361218992216035, "grad_norm": 0.857779860496521, "learning_rate": 2.5568495209699822e-05, "loss": 0.0996, "step": 12417 }, { "epoch": 0.2736342252116765, "grad_norm": 0.7898589968681335, "learning_rate": 2.5567735490184118e-05, "loss": 0.0877, "step": 12418 }, { "epoch": 0.2736562605011927, "grad_norm": 0.8772608041763306, "learning_rate": 2.556697571684127e-05, "loss": 0.0801, "step": 12419 }, { "epoch": 0.27367829579070885, "grad_norm": 0.8956435322761536, "learning_rate": 2.5566215889675144e-05, "loss": 0.0834, "step": 12420 }, { "epoch": 0.27370033108022496, "grad_norm": 0.6768117547035217, "learning_rate": 2.5565456008689617e-05, "loss": 0.1026, "step": 12421 }, { "epoch": 0.2737223663697411, "grad_norm": 0.5945755243301392, "learning_rate": 2.5564696073888556e-05, "loss": 0.081, "step": 12422 }, { "epoch": 0.2737444016592573, "grad_norm": 0.64774489402771, "learning_rate": 2.5563936085275834e-05, "loss": 0.0502, "step": 12423 }, { "epoch": 0.27376643694877345, "grad_norm": 0.7759250998497009, "learning_rate": 2.5563176042855317e-05, "loss": 0.0865, "step": 12424 }, { "epoch": 0.2737884722382896, "grad_norm": 0.8825563788414001, "learning_rate": 2.556241594663088e-05, "loss": 0.1116, "step": 12425 }, { "epoch": 0.2738105075278058, "grad_norm": 0.6431601047515869, "learning_rate": 2.55616557966064e-05, "loss": 0.059, "step": 12426 }, { "epoch": 0.27383254281732194, "grad_norm": 0.6250145435333252, "learning_rate": 2.5560895592785743e-05, "loss": 0.088, "step": 12427 }, { "epoch": 0.2738545781068381, "grad_norm": 0.42213550209999084, "learning_rate": 2.556013533517278e-05, "loss": 0.0817, "step": 12428 }, { "epoch": 0.2738766133963543, "grad_norm": 1.303040623664856, "learning_rate": 2.555937502377138e-05, "loss": 0.1055, "step": 12429 }, { "epoch": 0.27389864868587044, "grad_norm": 0.7259240746498108, "learning_rate": 2.5558614658585428e-05, "loss": 0.0694, "step": 12430 }, { "epoch": 0.2739206839753866, "grad_norm": 0.8379858136177063, "learning_rate": 2.5557854239618782e-05, "loss": 0.1282, "step": 12431 }, { "epoch": 0.27394271926490277, "grad_norm": 0.8921361565589905, "learning_rate": 2.5557093766875326e-05, "loss": 0.0835, "step": 12432 }, { "epoch": 0.2739647545544189, "grad_norm": 0.466297447681427, "learning_rate": 2.5556333240358933e-05, "loss": 0.0941, "step": 12433 }, { "epoch": 0.27398678984393504, "grad_norm": 0.5264007449150085, "learning_rate": 2.5555572660073473e-05, "loss": 0.1403, "step": 12434 }, { "epoch": 0.2740088251334512, "grad_norm": 0.47567155957221985, "learning_rate": 2.555481202602282e-05, "loss": 0.0567, "step": 12435 }, { "epoch": 0.27403086042296737, "grad_norm": 0.717728316783905, "learning_rate": 2.5554051338210853e-05, "loss": 0.1458, "step": 12436 }, { "epoch": 0.27405289571248354, "grad_norm": 1.0399247407913208, "learning_rate": 2.5553290596641433e-05, "loss": 0.1232, "step": 12437 }, { "epoch": 0.2740749310019997, "grad_norm": 0.5457565188407898, "learning_rate": 2.5552529801318453e-05, "loss": 0.0839, "step": 12438 }, { "epoch": 0.27409696629151586, "grad_norm": 0.8205020427703857, "learning_rate": 2.5551768952245783e-05, "loss": 0.0879, "step": 12439 }, { "epoch": 0.27411900158103203, "grad_norm": 1.0151784420013428, "learning_rate": 2.5551008049427288e-05, "loss": 0.112, "step": 12440 }, { "epoch": 0.2741410368705482, "grad_norm": 1.0700032711029053, "learning_rate": 2.5550247092866856e-05, "loss": 0.1146, "step": 12441 }, { "epoch": 0.27416307216006436, "grad_norm": 0.6462833881378174, "learning_rate": 2.5549486082568353e-05, "loss": 0.1102, "step": 12442 }, { "epoch": 0.2741851074495805, "grad_norm": 0.4181552231311798, "learning_rate": 2.5548725018535666e-05, "loss": 0.102, "step": 12443 }, { "epoch": 0.2742071427390967, "grad_norm": 0.7122339606285095, "learning_rate": 2.5547963900772663e-05, "loss": 0.0802, "step": 12444 }, { "epoch": 0.2742291780286128, "grad_norm": 0.9220147728919983, "learning_rate": 2.5547202729283225e-05, "loss": 0.0925, "step": 12445 }, { "epoch": 0.27425121331812896, "grad_norm": 0.7796754240989685, "learning_rate": 2.5546441504071227e-05, "loss": 0.1185, "step": 12446 }, { "epoch": 0.2742732486076451, "grad_norm": 0.7592527866363525, "learning_rate": 2.5545680225140548e-05, "loss": 0.1096, "step": 12447 }, { "epoch": 0.2742952838971613, "grad_norm": 0.5825967192649841, "learning_rate": 2.554491889249506e-05, "loss": 0.1203, "step": 12448 }, { "epoch": 0.27431731918667746, "grad_norm": 0.8628989458084106, "learning_rate": 2.554415750613865e-05, "loss": 0.1004, "step": 12449 }, { "epoch": 0.2743393544761936, "grad_norm": 0.7878066897392273, "learning_rate": 2.554339606607519e-05, "loss": 0.1209, "step": 12450 }, { "epoch": 0.2743613897657098, "grad_norm": 0.9551987648010254, "learning_rate": 2.554263457230856e-05, "loss": 0.1216, "step": 12451 }, { "epoch": 0.27438342505522595, "grad_norm": 0.629035234451294, "learning_rate": 2.5541873024842636e-05, "loss": 0.0858, "step": 12452 }, { "epoch": 0.2744054603447421, "grad_norm": 0.8496714234352112, "learning_rate": 2.5541111423681308e-05, "loss": 0.1098, "step": 12453 }, { "epoch": 0.2744274956342583, "grad_norm": 0.7071658968925476, "learning_rate": 2.554034976882844e-05, "loss": 0.0982, "step": 12454 }, { "epoch": 0.27444953092377444, "grad_norm": 1.9967244863510132, "learning_rate": 2.553958806028792e-05, "loss": 0.1203, "step": 12455 }, { "epoch": 0.2744715662132906, "grad_norm": 0.7137174010276794, "learning_rate": 2.5538826298063625e-05, "loss": 0.1331, "step": 12456 }, { "epoch": 0.27449360150280677, "grad_norm": 0.6655632257461548, "learning_rate": 2.5538064482159438e-05, "loss": 0.1144, "step": 12457 }, { "epoch": 0.2745156367923229, "grad_norm": 0.8587616682052612, "learning_rate": 2.5537302612579238e-05, "loss": 0.0849, "step": 12458 }, { "epoch": 0.27453767208183905, "grad_norm": 1.2980010509490967, "learning_rate": 2.5536540689326903e-05, "loss": 0.103, "step": 12459 }, { "epoch": 0.2745597073713552, "grad_norm": 0.7223560214042664, "learning_rate": 2.5535778712406317e-05, "loss": 0.1165, "step": 12460 }, { "epoch": 0.2745817426608714, "grad_norm": 0.7663379907608032, "learning_rate": 2.5535016681821362e-05, "loss": 0.1231, "step": 12461 }, { "epoch": 0.27460377795038754, "grad_norm": 0.7278308272361755, "learning_rate": 2.5534254597575915e-05, "loss": 0.1223, "step": 12462 }, { "epoch": 0.2746258132399037, "grad_norm": 0.8828038573265076, "learning_rate": 2.5533492459673867e-05, "loss": 0.1063, "step": 12463 }, { "epoch": 0.27464784852941987, "grad_norm": 0.6502294540405273, "learning_rate": 2.5532730268119083e-05, "loss": 0.0892, "step": 12464 }, { "epoch": 0.27466988381893603, "grad_norm": 0.8640602231025696, "learning_rate": 2.553196802291546e-05, "loss": 0.1182, "step": 12465 }, { "epoch": 0.2746919191084522, "grad_norm": 0.663076639175415, "learning_rate": 2.553120572406688e-05, "loss": 0.0849, "step": 12466 }, { "epoch": 0.27471395439796836, "grad_norm": 1.1870485544204712, "learning_rate": 2.553044337157722e-05, "loss": 0.1137, "step": 12467 }, { "epoch": 0.2747359896874845, "grad_norm": 0.9818976521492004, "learning_rate": 2.552968096545036e-05, "loss": 0.1027, "step": 12468 }, { "epoch": 0.2747580249770007, "grad_norm": 0.7355647087097168, "learning_rate": 2.5528918505690197e-05, "loss": 0.0943, "step": 12469 }, { "epoch": 0.2747800602665168, "grad_norm": 0.6765120029449463, "learning_rate": 2.55281559923006e-05, "loss": 0.096, "step": 12470 }, { "epoch": 0.27480209555603297, "grad_norm": 1.1823616027832031, "learning_rate": 2.5527393425285464e-05, "loss": 0.1131, "step": 12471 }, { "epoch": 0.27482413084554913, "grad_norm": 0.26023048162460327, "learning_rate": 2.552663080464866e-05, "loss": 0.0743, "step": 12472 }, { "epoch": 0.2748461661350653, "grad_norm": 0.8232349753379822, "learning_rate": 2.5525868130394086e-05, "loss": 0.0766, "step": 12473 }, { "epoch": 0.27486820142458146, "grad_norm": 1.3229668140411377, "learning_rate": 2.5525105402525623e-05, "loss": 0.1028, "step": 12474 }, { "epoch": 0.2748902367140976, "grad_norm": 0.5450406074523926, "learning_rate": 2.5524342621047146e-05, "loss": 0.11, "step": 12475 }, { "epoch": 0.2749122720036138, "grad_norm": 1.0596848726272583, "learning_rate": 2.5523579785962556e-05, "loss": 0.0862, "step": 12476 }, { "epoch": 0.27493430729312995, "grad_norm": 0.7953414916992188, "learning_rate": 2.552281689727573e-05, "loss": 0.114, "step": 12477 }, { "epoch": 0.2749563425826461, "grad_norm": 0.552783727645874, "learning_rate": 2.552205395499055e-05, "loss": 0.0789, "step": 12478 }, { "epoch": 0.2749783778721623, "grad_norm": 0.768190324306488, "learning_rate": 2.5521290959110912e-05, "loss": 0.1024, "step": 12479 }, { "epoch": 0.27500041316167845, "grad_norm": 0.6485816836357117, "learning_rate": 2.5520527909640698e-05, "loss": 0.0902, "step": 12480 }, { "epoch": 0.2750224484511946, "grad_norm": 0.6652829647064209, "learning_rate": 2.5519764806583787e-05, "loss": 0.1047, "step": 12481 }, { "epoch": 0.2750444837407107, "grad_norm": 0.7436160445213318, "learning_rate": 2.5519001649944078e-05, "loss": 0.0861, "step": 12482 }, { "epoch": 0.2750665190302269, "grad_norm": 0.6847408413887024, "learning_rate": 2.551823843972545e-05, "loss": 0.0711, "step": 12483 }, { "epoch": 0.27508855431974305, "grad_norm": 0.8848552107810974, "learning_rate": 2.5517475175931802e-05, "loss": 0.1073, "step": 12484 }, { "epoch": 0.2751105896092592, "grad_norm": 1.6363939046859741, "learning_rate": 2.5516711858567e-05, "loss": 0.1408, "step": 12485 }, { "epoch": 0.2751326248987754, "grad_norm": 0.6009572744369507, "learning_rate": 2.5515948487634956e-05, "loss": 0.0943, "step": 12486 }, { "epoch": 0.27515466018829154, "grad_norm": 1.3251394033432007, "learning_rate": 2.5515185063139542e-05, "loss": 0.1286, "step": 12487 }, { "epoch": 0.2751766954778077, "grad_norm": 0.893646240234375, "learning_rate": 2.5514421585084652e-05, "loss": 0.1083, "step": 12488 }, { "epoch": 0.2751987307673239, "grad_norm": 1.3138388395309448, "learning_rate": 2.5513658053474174e-05, "loss": 0.0895, "step": 12489 }, { "epoch": 0.27522076605684004, "grad_norm": 0.789580225944519, "learning_rate": 2.5512894468312e-05, "loss": 0.1028, "step": 12490 }, { "epoch": 0.2752428013463562, "grad_norm": 0.9854081273078918, "learning_rate": 2.5512130829602013e-05, "loss": 0.0986, "step": 12491 }, { "epoch": 0.27526483663587237, "grad_norm": 0.7409067153930664, "learning_rate": 2.551136713734811e-05, "loss": 0.105, "step": 12492 }, { "epoch": 0.27528687192538853, "grad_norm": 1.4754469394683838, "learning_rate": 2.5510603391554178e-05, "loss": 0.1208, "step": 12493 }, { "epoch": 0.2753089072149047, "grad_norm": 1.0254871845245361, "learning_rate": 2.5509839592224103e-05, "loss": 0.1238, "step": 12494 }, { "epoch": 0.2753309425044208, "grad_norm": 0.9665399789810181, "learning_rate": 2.5509075739361786e-05, "loss": 0.091, "step": 12495 }, { "epoch": 0.27535297779393697, "grad_norm": 0.6093981862068176, "learning_rate": 2.55083118329711e-05, "loss": 0.0766, "step": 12496 }, { "epoch": 0.27537501308345314, "grad_norm": 0.5178428888320923, "learning_rate": 2.5507547873055955e-05, "loss": 0.0971, "step": 12497 }, { "epoch": 0.2753970483729693, "grad_norm": 0.7618358731269836, "learning_rate": 2.550678385962023e-05, "loss": 0.1075, "step": 12498 }, { "epoch": 0.27541908366248546, "grad_norm": 0.8577805161476135, "learning_rate": 2.5506019792667826e-05, "loss": 0.0795, "step": 12499 }, { "epoch": 0.27544111895200163, "grad_norm": 0.966547429561615, "learning_rate": 2.5505255672202624e-05, "loss": 0.1087, "step": 12500 }, { "epoch": 0.2754631542415178, "grad_norm": 0.9887728691101074, "learning_rate": 2.5504491498228525e-05, "loss": 0.0752, "step": 12501 }, { "epoch": 0.27548518953103396, "grad_norm": 0.8085424304008484, "learning_rate": 2.5503727270749418e-05, "loss": 0.0966, "step": 12502 }, { "epoch": 0.2755072248205501, "grad_norm": 0.7673003077507019, "learning_rate": 2.5502962989769196e-05, "loss": 0.1057, "step": 12503 }, { "epoch": 0.2755292601100663, "grad_norm": 1.108567476272583, "learning_rate": 2.5502198655291745e-05, "loss": 0.0969, "step": 12504 }, { "epoch": 0.27555129539958245, "grad_norm": 0.7971130013465881, "learning_rate": 2.550143426732097e-05, "loss": 0.116, "step": 12505 }, { "epoch": 0.2755733306890986, "grad_norm": 0.44234102964401245, "learning_rate": 2.5500669825860762e-05, "loss": 0.0951, "step": 12506 }, { "epoch": 0.2755953659786147, "grad_norm": 1.0729018449783325, "learning_rate": 2.5499905330915007e-05, "loss": 0.1222, "step": 12507 }, { "epoch": 0.2756174012681309, "grad_norm": 0.5535479187965393, "learning_rate": 2.54991407824876e-05, "loss": 0.1152, "step": 12508 }, { "epoch": 0.27563943655764706, "grad_norm": 0.8839995861053467, "learning_rate": 2.5498376180582445e-05, "loss": 0.1258, "step": 12509 }, { "epoch": 0.2756614718471632, "grad_norm": 0.4108794629573822, "learning_rate": 2.5497611525203427e-05, "loss": 0.0752, "step": 12510 }, { "epoch": 0.2756835071366794, "grad_norm": 0.7348236441612244, "learning_rate": 2.549684681635445e-05, "loss": 0.0929, "step": 12511 }, { "epoch": 0.27570554242619555, "grad_norm": 1.0433542728424072, "learning_rate": 2.5496082054039393e-05, "loss": 0.1233, "step": 12512 }, { "epoch": 0.2757275777157117, "grad_norm": 1.0070961713790894, "learning_rate": 2.549531723826217e-05, "loss": 0.1134, "step": 12513 }, { "epoch": 0.2757496130052279, "grad_norm": 0.5567077994346619, "learning_rate": 2.5494552369026668e-05, "loss": 0.0783, "step": 12514 }, { "epoch": 0.27577164829474404, "grad_norm": 0.9470841884613037, "learning_rate": 2.549378744633678e-05, "loss": 0.1032, "step": 12515 }, { "epoch": 0.2757936835842602, "grad_norm": 0.6106857061386108, "learning_rate": 2.5493022470196407e-05, "loss": 0.1183, "step": 12516 }, { "epoch": 0.27581571887377637, "grad_norm": 0.8223024010658264, "learning_rate": 2.549225744060944e-05, "loss": 0.1004, "step": 12517 }, { "epoch": 0.27583775416329254, "grad_norm": 0.8885534405708313, "learning_rate": 2.5491492357579783e-05, "loss": 0.1183, "step": 12518 }, { "epoch": 0.27585978945280865, "grad_norm": 1.2828235626220703, "learning_rate": 2.549072722111133e-05, "loss": 0.1509, "step": 12519 }, { "epoch": 0.2758818247423248, "grad_norm": 0.7765795588493347, "learning_rate": 2.548996203120798e-05, "loss": 0.1318, "step": 12520 }, { "epoch": 0.275903860031841, "grad_norm": 0.7466689348220825, "learning_rate": 2.548919678787362e-05, "loss": 0.0808, "step": 12521 }, { "epoch": 0.27592589532135714, "grad_norm": 0.9740027785301208, "learning_rate": 2.5488431491112166e-05, "loss": 0.0743, "step": 12522 }, { "epoch": 0.2759479306108733, "grad_norm": 0.7169694304466248, "learning_rate": 2.5487666140927495e-05, "loss": 0.0737, "step": 12523 }, { "epoch": 0.27596996590038947, "grad_norm": 0.6492582559585571, "learning_rate": 2.5486900737323527e-05, "loss": 0.0618, "step": 12524 }, { "epoch": 0.27599200118990563, "grad_norm": 1.349126935005188, "learning_rate": 2.5486135280304143e-05, "loss": 0.1702, "step": 12525 }, { "epoch": 0.2760140364794218, "grad_norm": 0.774368941783905, "learning_rate": 2.5485369769873257e-05, "loss": 0.1057, "step": 12526 }, { "epoch": 0.27603607176893796, "grad_norm": 0.6696154475212097, "learning_rate": 2.5484604206034748e-05, "loss": 0.0685, "step": 12527 }, { "epoch": 0.2760581070584541, "grad_norm": 0.5501030683517456, "learning_rate": 2.548383858879254e-05, "loss": 0.075, "step": 12528 }, { "epoch": 0.2760801423479703, "grad_norm": 0.9082679748535156, "learning_rate": 2.548307291815051e-05, "loss": 0.0914, "step": 12529 }, { "epoch": 0.27610217763748646, "grad_norm": 0.7829393744468689, "learning_rate": 2.5482307194112576e-05, "loss": 0.0727, "step": 12530 }, { "epoch": 0.2761242129270026, "grad_norm": 0.5591388940811157, "learning_rate": 2.5481541416682624e-05, "loss": 0.0922, "step": 12531 }, { "epoch": 0.27614624821651873, "grad_norm": 0.6574352383613586, "learning_rate": 2.5480775585864565e-05, "loss": 0.0706, "step": 12532 }, { "epoch": 0.2761682835060349, "grad_norm": 0.7654784917831421, "learning_rate": 2.548000970166229e-05, "loss": 0.1255, "step": 12533 }, { "epoch": 0.27619031879555106, "grad_norm": 1.2611411809921265, "learning_rate": 2.5479243764079713e-05, "loss": 0.0997, "step": 12534 }, { "epoch": 0.2762123540850672, "grad_norm": 0.5405052304267883, "learning_rate": 2.5478477773120724e-05, "loss": 0.0711, "step": 12535 }, { "epoch": 0.2762343893745834, "grad_norm": 0.7117370963096619, "learning_rate": 2.5477711728789233e-05, "loss": 0.1085, "step": 12536 }, { "epoch": 0.27625642466409955, "grad_norm": 1.089408040046692, "learning_rate": 2.5476945631089134e-05, "loss": 0.0873, "step": 12537 }, { "epoch": 0.2762784599536157, "grad_norm": 0.8012107014656067, "learning_rate": 2.5476179480024336e-05, "loss": 0.0994, "step": 12538 }, { "epoch": 0.2763004952431319, "grad_norm": 0.9307128190994263, "learning_rate": 2.5475413275598736e-05, "loss": 0.1051, "step": 12539 }, { "epoch": 0.27632253053264805, "grad_norm": 0.729250967502594, "learning_rate": 2.5474647017816234e-05, "loss": 0.0976, "step": 12540 }, { "epoch": 0.2763445658221642, "grad_norm": 0.563130795955658, "learning_rate": 2.5473880706680742e-05, "loss": 0.1081, "step": 12541 }, { "epoch": 0.2763666011116804, "grad_norm": 0.495882511138916, "learning_rate": 2.5473114342196157e-05, "loss": 0.0894, "step": 12542 }, { "epoch": 0.27638863640119654, "grad_norm": 0.6798633337020874, "learning_rate": 2.547234792436639e-05, "loss": 0.0953, "step": 12543 }, { "epoch": 0.27641067169071265, "grad_norm": 0.6098551154136658, "learning_rate": 2.5471581453195336e-05, "loss": 0.069, "step": 12544 }, { "epoch": 0.2764327069802288, "grad_norm": 0.5677162408828735, "learning_rate": 2.54708149286869e-05, "loss": 0.0542, "step": 12545 }, { "epoch": 0.276454742269745, "grad_norm": 0.8860449194908142, "learning_rate": 2.5470048350844994e-05, "loss": 0.119, "step": 12546 }, { "epoch": 0.27647677755926114, "grad_norm": 0.9168340563774109, "learning_rate": 2.5469281719673512e-05, "loss": 0.1173, "step": 12547 }, { "epoch": 0.2764988128487773, "grad_norm": 1.2552744150161743, "learning_rate": 2.5468515035176366e-05, "loss": 0.0879, "step": 12548 }, { "epoch": 0.2765208481382935, "grad_norm": 1.5137232542037964, "learning_rate": 2.546774829735746e-05, "loss": 0.0931, "step": 12549 }, { "epoch": 0.27654288342780964, "grad_norm": 1.085375189781189, "learning_rate": 2.54669815062207e-05, "loss": 0.0855, "step": 12550 }, { "epoch": 0.2765649187173258, "grad_norm": 0.6906949877738953, "learning_rate": 2.5466214661769988e-05, "loss": 0.096, "step": 12551 }, { "epoch": 0.27658695400684197, "grad_norm": 0.9647223949432373, "learning_rate": 2.546544776400923e-05, "loss": 0.0751, "step": 12552 }, { "epoch": 0.27660898929635813, "grad_norm": 0.7862838506698608, "learning_rate": 2.546468081294234e-05, "loss": 0.1213, "step": 12553 }, { "epoch": 0.2766310245858743, "grad_norm": 0.9670507907867432, "learning_rate": 2.5463913808573217e-05, "loss": 0.1103, "step": 12554 }, { "epoch": 0.27665305987539046, "grad_norm": 2.413090229034424, "learning_rate": 2.546314675090577e-05, "loss": 0.1021, "step": 12555 }, { "epoch": 0.27667509516490657, "grad_norm": 0.6888179779052734, "learning_rate": 2.5462379639943903e-05, "loss": 0.0869, "step": 12556 }, { "epoch": 0.27669713045442274, "grad_norm": 0.5823664665222168, "learning_rate": 2.5461612475691528e-05, "loss": 0.075, "step": 12557 }, { "epoch": 0.2767191657439389, "grad_norm": 1.2391685247421265, "learning_rate": 2.546084525815255e-05, "loss": 0.1187, "step": 12558 }, { "epoch": 0.27674120103345506, "grad_norm": 0.969419538974762, "learning_rate": 2.546007798733088e-05, "loss": 0.0941, "step": 12559 }, { "epoch": 0.27676323632297123, "grad_norm": 0.6927028894424438, "learning_rate": 2.5459310663230422e-05, "loss": 0.0768, "step": 12560 }, { "epoch": 0.2767852716124874, "grad_norm": 1.1764758825302124, "learning_rate": 2.5458543285855084e-05, "loss": 0.0875, "step": 12561 }, { "epoch": 0.27680730690200356, "grad_norm": 0.7099035382270813, "learning_rate": 2.545777585520878e-05, "loss": 0.0723, "step": 12562 }, { "epoch": 0.2768293421915197, "grad_norm": 1.2645317316055298, "learning_rate": 2.545700837129541e-05, "loss": 0.1318, "step": 12563 }, { "epoch": 0.2768513774810359, "grad_norm": 0.6527200937271118, "learning_rate": 2.5456240834118895e-05, "loss": 0.0758, "step": 12564 }, { "epoch": 0.27687341277055205, "grad_norm": 0.5352064371109009, "learning_rate": 2.545547324368314e-05, "loss": 0.0892, "step": 12565 }, { "epoch": 0.2768954480600682, "grad_norm": 0.3725481927394867, "learning_rate": 2.545470559999205e-05, "loss": 0.0921, "step": 12566 }, { "epoch": 0.2769174833495844, "grad_norm": 1.0307135581970215, "learning_rate": 2.5453937903049538e-05, "loss": 0.1088, "step": 12567 }, { "epoch": 0.27693951863910055, "grad_norm": 0.9784126281738281, "learning_rate": 2.5453170152859513e-05, "loss": 0.1185, "step": 12568 }, { "epoch": 0.27696155392861666, "grad_norm": 0.6798046231269836, "learning_rate": 2.5452402349425888e-05, "loss": 0.1054, "step": 12569 }, { "epoch": 0.2769835892181328, "grad_norm": 0.5749251246452332, "learning_rate": 2.5451634492752572e-05, "loss": 0.1082, "step": 12570 }, { "epoch": 0.277005624507649, "grad_norm": 1.0197087526321411, "learning_rate": 2.545086658284348e-05, "loss": 0.096, "step": 12571 }, { "epoch": 0.27702765979716515, "grad_norm": 0.6377576589584351, "learning_rate": 2.545009861970252e-05, "loss": 0.1038, "step": 12572 }, { "epoch": 0.2770496950866813, "grad_norm": 0.8325440287590027, "learning_rate": 2.5449330603333602e-05, "loss": 0.1149, "step": 12573 }, { "epoch": 0.2770717303761975, "grad_norm": 0.9062284231185913, "learning_rate": 2.544856253374064e-05, "loss": 0.1296, "step": 12574 }, { "epoch": 0.27709376566571364, "grad_norm": 0.6717791557312012, "learning_rate": 2.5447794410927542e-05, "loss": 0.0597, "step": 12575 }, { "epoch": 0.2771158009552298, "grad_norm": 0.968708872795105, "learning_rate": 2.544702623489823e-05, "loss": 0.1252, "step": 12576 }, { "epoch": 0.27713783624474597, "grad_norm": 0.4636629819869995, "learning_rate": 2.544625800565661e-05, "loss": 0.11, "step": 12577 }, { "epoch": 0.27715987153426214, "grad_norm": 0.7086687684059143, "learning_rate": 2.54454897232066e-05, "loss": 0.1138, "step": 12578 }, { "epoch": 0.2771819068237783, "grad_norm": 0.8553387522697449, "learning_rate": 2.5444721387552103e-05, "loss": 0.1043, "step": 12579 }, { "epoch": 0.27720394211329447, "grad_norm": 1.177484393119812, "learning_rate": 2.5443952998697044e-05, "loss": 0.1003, "step": 12580 }, { "epoch": 0.2772259774028106, "grad_norm": 0.9888073801994324, "learning_rate": 2.544318455664533e-05, "loss": 0.1257, "step": 12581 }, { "epoch": 0.27724801269232674, "grad_norm": 0.6947562098503113, "learning_rate": 2.5442416061400876e-05, "loss": 0.0617, "step": 12582 }, { "epoch": 0.2772700479818429, "grad_norm": 0.9141046404838562, "learning_rate": 2.5441647512967597e-05, "loss": 0.1248, "step": 12583 }, { "epoch": 0.27729208327135907, "grad_norm": 0.8156901001930237, "learning_rate": 2.544087891134941e-05, "loss": 0.0999, "step": 12584 }, { "epoch": 0.27731411856087523, "grad_norm": 1.055747151374817, "learning_rate": 2.5440110256550228e-05, "loss": 0.0872, "step": 12585 }, { "epoch": 0.2773361538503914, "grad_norm": 0.6816742420196533, "learning_rate": 2.543934154857396e-05, "loss": 0.0783, "step": 12586 }, { "epoch": 0.27735818913990756, "grad_norm": 0.9456264972686768, "learning_rate": 2.5438572787424536e-05, "loss": 0.1253, "step": 12587 }, { "epoch": 0.2773802244294237, "grad_norm": 1.064600944519043, "learning_rate": 2.5437803973105857e-05, "loss": 0.1285, "step": 12588 }, { "epoch": 0.2774022597189399, "grad_norm": 0.9518530964851379, "learning_rate": 2.5437035105621847e-05, "loss": 0.1586, "step": 12589 }, { "epoch": 0.27742429500845606, "grad_norm": 0.749724268913269, "learning_rate": 2.5436266184976423e-05, "loss": 0.082, "step": 12590 }, { "epoch": 0.2774463302979722, "grad_norm": 0.9942233562469482, "learning_rate": 2.54354972111735e-05, "loss": 0.1083, "step": 12591 }, { "epoch": 0.2774683655874884, "grad_norm": 0.7895948886871338, "learning_rate": 2.543472818421699e-05, "loss": 0.1434, "step": 12592 }, { "epoch": 0.27749040087700455, "grad_norm": 1.016524076461792, "learning_rate": 2.5433959104110814e-05, "loss": 0.1609, "step": 12593 }, { "epoch": 0.27751243616652066, "grad_norm": 0.8653306365013123, "learning_rate": 2.5433189970858887e-05, "loss": 0.0868, "step": 12594 }, { "epoch": 0.2775344714560368, "grad_norm": 0.6293532848358154, "learning_rate": 2.5432420784465133e-05, "loss": 0.098, "step": 12595 }, { "epoch": 0.277556506745553, "grad_norm": 0.4172539710998535, "learning_rate": 2.5431651544933465e-05, "loss": 0.0804, "step": 12596 }, { "epoch": 0.27757854203506915, "grad_norm": 1.473176121711731, "learning_rate": 2.54308822522678e-05, "loss": 0.0941, "step": 12597 }, { "epoch": 0.2776005773245853, "grad_norm": 0.8296829462051392, "learning_rate": 2.5430112906472056e-05, "loss": 0.0886, "step": 12598 }, { "epoch": 0.2776226126141015, "grad_norm": 0.6341140270233154, "learning_rate": 2.5429343507550163e-05, "loss": 0.1187, "step": 12599 }, { "epoch": 0.27764464790361765, "grad_norm": 0.6873235702514648, "learning_rate": 2.5428574055506023e-05, "loss": 0.0685, "step": 12600 }, { "epoch": 0.2776666831931338, "grad_norm": 0.6303568482398987, "learning_rate": 2.5427804550343566e-05, "loss": 0.0856, "step": 12601 }, { "epoch": 0.27768871848265, "grad_norm": 1.0363774299621582, "learning_rate": 2.5427034992066704e-05, "loss": 0.0851, "step": 12602 }, { "epoch": 0.27771075377216614, "grad_norm": 1.3971161842346191, "learning_rate": 2.5426265380679366e-05, "loss": 0.1489, "step": 12603 }, { "epoch": 0.2777327890616823, "grad_norm": 0.7358406782150269, "learning_rate": 2.5425495716185465e-05, "loss": 0.1405, "step": 12604 }, { "epoch": 0.27775482435119847, "grad_norm": 0.7507364153862, "learning_rate": 2.5424725998588926e-05, "loss": 0.1062, "step": 12605 }, { "epoch": 0.2777768596407146, "grad_norm": 0.7016592025756836, "learning_rate": 2.5423956227893664e-05, "loss": 0.0721, "step": 12606 }, { "epoch": 0.27779889493023074, "grad_norm": 0.7233899235725403, "learning_rate": 2.542318640410361e-05, "loss": 0.1163, "step": 12607 }, { "epoch": 0.2778209302197469, "grad_norm": 0.8126707077026367, "learning_rate": 2.542241652722267e-05, "loss": 0.1443, "step": 12608 }, { "epoch": 0.2778429655092631, "grad_norm": 0.9336262345314026, "learning_rate": 2.5421646597254776e-05, "loss": 0.0742, "step": 12609 }, { "epoch": 0.27786500079877924, "grad_norm": 0.8396621942520142, "learning_rate": 2.542087661420385e-05, "loss": 0.0795, "step": 12610 }, { "epoch": 0.2778870360882954, "grad_norm": 0.7154679894447327, "learning_rate": 2.542010657807381e-05, "loss": 0.0826, "step": 12611 }, { "epoch": 0.27790907137781157, "grad_norm": 0.9773206114768982, "learning_rate": 2.541933648886858e-05, "loss": 0.1058, "step": 12612 }, { "epoch": 0.27793110666732773, "grad_norm": 0.7179941534996033, "learning_rate": 2.5418566346592083e-05, "loss": 0.1065, "step": 12613 }, { "epoch": 0.2779531419568439, "grad_norm": 0.804043173789978, "learning_rate": 2.541779615124824e-05, "loss": 0.1281, "step": 12614 }, { "epoch": 0.27797517724636006, "grad_norm": 1.035513997077942, "learning_rate": 2.5417025902840972e-05, "loss": 0.1154, "step": 12615 }, { "epoch": 0.2779972125358762, "grad_norm": 0.7998390197753906, "learning_rate": 2.5416255601374212e-05, "loss": 0.106, "step": 12616 }, { "epoch": 0.2780192478253924, "grad_norm": 1.112963318824768, "learning_rate": 2.541548524685187e-05, "loss": 0.1066, "step": 12617 }, { "epoch": 0.2780412831149085, "grad_norm": 0.8479104042053223, "learning_rate": 2.5414714839277877e-05, "loss": 0.0843, "step": 12618 }, { "epoch": 0.27806331840442466, "grad_norm": 0.927975058555603, "learning_rate": 2.5413944378656156e-05, "loss": 0.0696, "step": 12619 }, { "epoch": 0.27808535369394083, "grad_norm": 0.8458218574523926, "learning_rate": 2.5413173864990637e-05, "loss": 0.1088, "step": 12620 }, { "epoch": 0.278107388983457, "grad_norm": 0.491738498210907, "learning_rate": 2.5412403298285233e-05, "loss": 0.0856, "step": 12621 }, { "epoch": 0.27812942427297316, "grad_norm": 0.8543512225151062, "learning_rate": 2.541163267854388e-05, "loss": 0.115, "step": 12622 }, { "epoch": 0.2781514595624893, "grad_norm": 0.6364610195159912, "learning_rate": 2.5410862005770497e-05, "loss": 0.1079, "step": 12623 }, { "epoch": 0.2781734948520055, "grad_norm": 0.932876706123352, "learning_rate": 2.541009127996901e-05, "loss": 0.1324, "step": 12624 }, { "epoch": 0.27819553014152165, "grad_norm": 0.9920560717582703, "learning_rate": 2.540932050114335e-05, "loss": 0.1222, "step": 12625 }, { "epoch": 0.2782175654310378, "grad_norm": 0.8032773733139038, "learning_rate": 2.5408549669297435e-05, "loss": 0.1274, "step": 12626 }, { "epoch": 0.278239600720554, "grad_norm": 1.1650174856185913, "learning_rate": 2.5407778784435194e-05, "loss": 0.0737, "step": 12627 }, { "epoch": 0.27826163601007015, "grad_norm": 0.9599568247795105, "learning_rate": 2.5407007846560555e-05, "loss": 0.0908, "step": 12628 }, { "epoch": 0.2782836712995863, "grad_norm": 0.6995635628700256, "learning_rate": 2.540623685567745e-05, "loss": 0.1239, "step": 12629 }, { "epoch": 0.2783057065891025, "grad_norm": 0.8321220278739929, "learning_rate": 2.5405465811789794e-05, "loss": 0.103, "step": 12630 }, { "epoch": 0.2783277418786186, "grad_norm": 0.6556292176246643, "learning_rate": 2.540469471490152e-05, "loss": 0.0952, "step": 12631 }, { "epoch": 0.27834977716813475, "grad_norm": 0.6758642196655273, "learning_rate": 2.5403923565016562e-05, "loss": 0.0914, "step": 12632 }, { "epoch": 0.2783718124576509, "grad_norm": 0.8317709565162659, "learning_rate": 2.540315236213884e-05, "loss": 0.0881, "step": 12633 }, { "epoch": 0.2783938477471671, "grad_norm": 0.6378376483917236, "learning_rate": 2.540238110627228e-05, "loss": 0.0471, "step": 12634 }, { "epoch": 0.27841588303668324, "grad_norm": 0.7009780406951904, "learning_rate": 2.540160979742082e-05, "loss": 0.1431, "step": 12635 }, { "epoch": 0.2784379183261994, "grad_norm": 0.7038636803627014, "learning_rate": 2.5400838435588384e-05, "loss": 0.081, "step": 12636 }, { "epoch": 0.27845995361571557, "grad_norm": 0.8783656358718872, "learning_rate": 2.5400067020778898e-05, "loss": 0.1078, "step": 12637 }, { "epoch": 0.27848198890523174, "grad_norm": 0.7991598844528198, "learning_rate": 2.539929555299629e-05, "loss": 0.1054, "step": 12638 }, { "epoch": 0.2785040241947479, "grad_norm": 1.3046112060546875, "learning_rate": 2.53985240322445e-05, "loss": 0.1193, "step": 12639 }, { "epoch": 0.27852605948426407, "grad_norm": 0.6813960075378418, "learning_rate": 2.5397752458527447e-05, "loss": 0.0951, "step": 12640 }, { "epoch": 0.27854809477378023, "grad_norm": 0.6423971056938171, "learning_rate": 2.5396980831849068e-05, "loss": 0.1168, "step": 12641 }, { "epoch": 0.2785701300632964, "grad_norm": 0.7840006351470947, "learning_rate": 2.539620915221329e-05, "loss": 0.1042, "step": 12642 }, { "epoch": 0.2785921653528125, "grad_norm": 0.6085389256477356, "learning_rate": 2.5395437419624044e-05, "loss": 0.0805, "step": 12643 }, { "epoch": 0.27861420064232867, "grad_norm": 0.940654993057251, "learning_rate": 2.539466563408526e-05, "loss": 0.0952, "step": 12644 }, { "epoch": 0.27863623593184483, "grad_norm": 0.6825973391532898, "learning_rate": 2.5393893795600868e-05, "loss": 0.1444, "step": 12645 }, { "epoch": 0.278658271221361, "grad_norm": 0.566374659538269, "learning_rate": 2.5393121904174803e-05, "loss": 0.0894, "step": 12646 }, { "epoch": 0.27868030651087716, "grad_norm": 0.5175046920776367, "learning_rate": 2.5392349959810995e-05, "loss": 0.1267, "step": 12647 }, { "epoch": 0.2787023418003933, "grad_norm": 0.6722692847251892, "learning_rate": 2.5391577962513374e-05, "loss": 0.1034, "step": 12648 }, { "epoch": 0.2787243770899095, "grad_norm": 0.7706562280654907, "learning_rate": 2.5390805912285876e-05, "loss": 0.1074, "step": 12649 }, { "epoch": 0.27874641237942566, "grad_norm": 0.722162127494812, "learning_rate": 2.539003380913243e-05, "loss": 0.1091, "step": 12650 }, { "epoch": 0.2787684476689418, "grad_norm": 0.8526418209075928, "learning_rate": 2.5389261653056972e-05, "loss": 0.1253, "step": 12651 }, { "epoch": 0.278790482958458, "grad_norm": 0.8992982506752014, "learning_rate": 2.5388489444063435e-05, "loss": 0.1039, "step": 12652 }, { "epoch": 0.27881251824797415, "grad_norm": 0.633976399898529, "learning_rate": 2.5387717182155744e-05, "loss": 0.0938, "step": 12653 }, { "epoch": 0.2788345535374903, "grad_norm": 1.041029453277588, "learning_rate": 2.5386944867337844e-05, "loss": 0.1234, "step": 12654 }, { "epoch": 0.2788565888270064, "grad_norm": 0.7625554800033569, "learning_rate": 2.5386172499613664e-05, "loss": 0.0845, "step": 12655 }, { "epoch": 0.2788786241165226, "grad_norm": 0.7827449440956116, "learning_rate": 2.5385400078987136e-05, "loss": 0.0701, "step": 12656 }, { "epoch": 0.27890065940603875, "grad_norm": 1.2047717571258545, "learning_rate": 2.5384627605462195e-05, "loss": 0.0913, "step": 12657 }, { "epoch": 0.2789226946955549, "grad_norm": 1.1021206378936768, "learning_rate": 2.538385507904278e-05, "loss": 0.1572, "step": 12658 }, { "epoch": 0.2789447299850711, "grad_norm": 0.5714015960693359, "learning_rate": 2.5383082499732822e-05, "loss": 0.1178, "step": 12659 }, { "epoch": 0.27896676527458725, "grad_norm": 2.072899103164673, "learning_rate": 2.5382309867536258e-05, "loss": 0.115, "step": 12660 }, { "epoch": 0.2789888005641034, "grad_norm": 0.9234856367111206, "learning_rate": 2.538153718245702e-05, "loss": 0.0777, "step": 12661 }, { "epoch": 0.2790108358536196, "grad_norm": 0.7509884834289551, "learning_rate": 2.538076444449905e-05, "loss": 0.1074, "step": 12662 }, { "epoch": 0.27903287114313574, "grad_norm": 1.8093479871749878, "learning_rate": 2.5379991653666272e-05, "loss": 0.1007, "step": 12663 }, { "epoch": 0.2790549064326519, "grad_norm": 0.914109468460083, "learning_rate": 2.537921880996264e-05, "loss": 0.1051, "step": 12664 }, { "epoch": 0.27907694172216807, "grad_norm": 1.0678482055664062, "learning_rate": 2.5378445913392074e-05, "loss": 0.1319, "step": 12665 }, { "epoch": 0.27909897701168424, "grad_norm": 0.720863938331604, "learning_rate": 2.5377672963958523e-05, "loss": 0.1044, "step": 12666 }, { "epoch": 0.2791210123012004, "grad_norm": 0.81981360912323, "learning_rate": 2.5376899961665912e-05, "loss": 0.1161, "step": 12667 }, { "epoch": 0.2791430475907165, "grad_norm": 0.9724950194358826, "learning_rate": 2.537612690651819e-05, "loss": 0.1497, "step": 12668 }, { "epoch": 0.2791650828802327, "grad_norm": 0.9885824918746948, "learning_rate": 2.537535379851929e-05, "loss": 0.1086, "step": 12669 }, { "epoch": 0.27918711816974884, "grad_norm": 0.6387411952018738, "learning_rate": 2.5374580637673146e-05, "loss": 0.1099, "step": 12670 }, { "epoch": 0.279209153459265, "grad_norm": 0.8261083364486694, "learning_rate": 2.53738074239837e-05, "loss": 0.1116, "step": 12671 }, { "epoch": 0.27923118874878117, "grad_norm": 0.6848369836807251, "learning_rate": 2.5373034157454896e-05, "loss": 0.0918, "step": 12672 }, { "epoch": 0.27925322403829733, "grad_norm": 0.8822840452194214, "learning_rate": 2.5372260838090656e-05, "loss": 0.1395, "step": 12673 }, { "epoch": 0.2792752593278135, "grad_norm": 1.0776588916778564, "learning_rate": 2.537148746589494e-05, "loss": 0.139, "step": 12674 }, { "epoch": 0.27929729461732966, "grad_norm": 0.8145841360092163, "learning_rate": 2.5370714040871667e-05, "loss": 0.1207, "step": 12675 }, { "epoch": 0.2793193299068458, "grad_norm": 0.9105604290962219, "learning_rate": 2.5369940563024794e-05, "loss": 0.13, "step": 12676 }, { "epoch": 0.279341365196362, "grad_norm": 0.808802604675293, "learning_rate": 2.536916703235825e-05, "loss": 0.0826, "step": 12677 }, { "epoch": 0.27936340048587815, "grad_norm": 0.8537940979003906, "learning_rate": 2.5368393448875976e-05, "loss": 0.0906, "step": 12678 }, { "epoch": 0.2793854357753943, "grad_norm": 0.7048264145851135, "learning_rate": 2.5367619812581918e-05, "loss": 0.1024, "step": 12679 }, { "epoch": 0.27940747106491043, "grad_norm": 0.5966774225234985, "learning_rate": 2.536684612348001e-05, "loss": 0.0755, "step": 12680 }, { "epoch": 0.2794295063544266, "grad_norm": 0.4820796549320221, "learning_rate": 2.53660723815742e-05, "loss": 0.0991, "step": 12681 }, { "epoch": 0.27945154164394276, "grad_norm": 0.3797272741794586, "learning_rate": 2.5365298586868414e-05, "loss": 0.0961, "step": 12682 }, { "epoch": 0.2794735769334589, "grad_norm": 0.7232567667961121, "learning_rate": 2.5364524739366613e-05, "loss": 0.0935, "step": 12683 }, { "epoch": 0.2794956122229751, "grad_norm": 0.8279575109481812, "learning_rate": 2.5363750839072727e-05, "loss": 0.0917, "step": 12684 }, { "epoch": 0.27951764751249125, "grad_norm": 0.7921099662780762, "learning_rate": 2.5362976885990705e-05, "loss": 0.1013, "step": 12685 }, { "epoch": 0.2795396828020074, "grad_norm": 0.9840735793113708, "learning_rate": 2.536220288012448e-05, "loss": 0.0903, "step": 12686 }, { "epoch": 0.2795617180915236, "grad_norm": 0.774459958076477, "learning_rate": 2.5361428821477998e-05, "loss": 0.0911, "step": 12687 }, { "epoch": 0.27958375338103975, "grad_norm": 0.5682671666145325, "learning_rate": 2.5360654710055204e-05, "loss": 0.1276, "step": 12688 }, { "epoch": 0.2796057886705559, "grad_norm": 0.6850298047065735, "learning_rate": 2.5359880545860042e-05, "loss": 0.0707, "step": 12689 }, { "epoch": 0.2796278239600721, "grad_norm": 0.6646577715873718, "learning_rate": 2.5359106328896448e-05, "loss": 0.0747, "step": 12690 }, { "epoch": 0.27964985924958824, "grad_norm": 0.9460844397544861, "learning_rate": 2.5358332059168374e-05, "loss": 0.1131, "step": 12691 }, { "epoch": 0.27967189453910435, "grad_norm": 0.9342751502990723, "learning_rate": 2.5357557736679762e-05, "loss": 0.0914, "step": 12692 }, { "epoch": 0.2796939298286205, "grad_norm": 1.235883355140686, "learning_rate": 2.5356783361434553e-05, "loss": 0.0699, "step": 12693 }, { "epoch": 0.2797159651181367, "grad_norm": 0.8612986207008362, "learning_rate": 2.535600893343669e-05, "loss": 0.1155, "step": 12694 }, { "epoch": 0.27973800040765284, "grad_norm": 0.7597205638885498, "learning_rate": 2.5355234452690122e-05, "loss": 0.0951, "step": 12695 }, { "epoch": 0.279760035697169, "grad_norm": 0.8414492607116699, "learning_rate": 2.5354459919198793e-05, "loss": 0.1106, "step": 12696 }, { "epoch": 0.27978207098668517, "grad_norm": 0.7164528369903564, "learning_rate": 2.5353685332966642e-05, "loss": 0.1012, "step": 12697 }, { "epoch": 0.27980410627620134, "grad_norm": 0.5922816395759583, "learning_rate": 2.5352910693997627e-05, "loss": 0.0884, "step": 12698 }, { "epoch": 0.2798261415657175, "grad_norm": 1.3912941217422485, "learning_rate": 2.5352136002295682e-05, "loss": 0.1018, "step": 12699 }, { "epoch": 0.27984817685523367, "grad_norm": 1.237789511680603, "learning_rate": 2.535136125786476e-05, "loss": 0.065, "step": 12700 }, { "epoch": 0.27987021214474983, "grad_norm": 1.4057797193527222, "learning_rate": 2.5350586460708802e-05, "loss": 0.0966, "step": 12701 }, { "epoch": 0.279892247434266, "grad_norm": 0.8978190422058105, "learning_rate": 2.5349811610831755e-05, "loss": 0.132, "step": 12702 }, { "epoch": 0.27991428272378216, "grad_norm": 1.0014402866363525, "learning_rate": 2.534903670823757e-05, "loss": 0.1051, "step": 12703 }, { "epoch": 0.2799363180132983, "grad_norm": 0.9436091780662537, "learning_rate": 2.534826175293019e-05, "loss": 0.1195, "step": 12704 }, { "epoch": 0.27995835330281443, "grad_norm": 0.9166111350059509, "learning_rate": 2.5347486744913567e-05, "loss": 0.1368, "step": 12705 }, { "epoch": 0.2799803885923306, "grad_norm": 0.6279478073120117, "learning_rate": 2.534671168419164e-05, "loss": 0.098, "step": 12706 }, { "epoch": 0.28000242388184676, "grad_norm": 0.9179578423500061, "learning_rate": 2.5345936570768367e-05, "loss": 0.0746, "step": 12707 }, { "epoch": 0.2800244591713629, "grad_norm": 0.5086060762405396, "learning_rate": 2.534516140464769e-05, "loss": 0.0766, "step": 12708 }, { "epoch": 0.2800464944608791, "grad_norm": 0.9462670683860779, "learning_rate": 2.5344386185833556e-05, "loss": 0.0914, "step": 12709 }, { "epoch": 0.28006852975039526, "grad_norm": 0.6276516318321228, "learning_rate": 2.5343610914329918e-05, "loss": 0.0989, "step": 12710 }, { "epoch": 0.2800905650399114, "grad_norm": 0.47776126861572266, "learning_rate": 2.534283559014072e-05, "loss": 0.1072, "step": 12711 }, { "epoch": 0.2801126003294276, "grad_norm": 0.6060800552368164, "learning_rate": 2.5342060213269916e-05, "loss": 0.091, "step": 12712 }, { "epoch": 0.28013463561894375, "grad_norm": 0.5762593150138855, "learning_rate": 2.534128478372146e-05, "loss": 0.1071, "step": 12713 }, { "epoch": 0.2801566709084599, "grad_norm": 0.7197985053062439, "learning_rate": 2.5340509301499287e-05, "loss": 0.1142, "step": 12714 }, { "epoch": 0.2801787061979761, "grad_norm": 1.0117238759994507, "learning_rate": 2.5339733766607356e-05, "loss": 0.1061, "step": 12715 }, { "epoch": 0.28020074148749224, "grad_norm": 0.7416653633117676, "learning_rate": 2.5338958179049617e-05, "loss": 0.0792, "step": 12716 }, { "epoch": 0.28022277677700835, "grad_norm": 1.012060523033142, "learning_rate": 2.533818253883002e-05, "loss": 0.0951, "step": 12717 }, { "epoch": 0.2802448120665245, "grad_norm": 1.0230072736740112, "learning_rate": 2.5337406845952515e-05, "loss": 0.0888, "step": 12718 }, { "epoch": 0.2802668473560407, "grad_norm": 0.5766304731369019, "learning_rate": 2.5336631100421057e-05, "loss": 0.0816, "step": 12719 }, { "epoch": 0.28028888264555685, "grad_norm": 2.1722805500030518, "learning_rate": 2.5335855302239586e-05, "loss": 0.106, "step": 12720 }, { "epoch": 0.280310917935073, "grad_norm": 0.9112134575843811, "learning_rate": 2.533507945141207e-05, "loss": 0.0986, "step": 12721 }, { "epoch": 0.2803329532245892, "grad_norm": 0.828615128993988, "learning_rate": 2.533430354794245e-05, "loss": 0.0878, "step": 12722 }, { "epoch": 0.28035498851410534, "grad_norm": 0.8126205205917358, "learning_rate": 2.5333527591834678e-05, "loss": 0.1397, "step": 12723 }, { "epoch": 0.2803770238036215, "grad_norm": 1.1782763004302979, "learning_rate": 2.533275158309271e-05, "loss": 0.1029, "step": 12724 }, { "epoch": 0.28039905909313767, "grad_norm": 0.6463024020195007, "learning_rate": 2.53319755217205e-05, "loss": 0.0746, "step": 12725 }, { "epoch": 0.28042109438265383, "grad_norm": 0.6195653080940247, "learning_rate": 2.5331199407721992e-05, "loss": 0.0975, "step": 12726 }, { "epoch": 0.28044312967217, "grad_norm": 0.7380874156951904, "learning_rate": 2.5330423241101152e-05, "loss": 0.1059, "step": 12727 }, { "epoch": 0.28046516496168616, "grad_norm": 0.6000586152076721, "learning_rate": 2.5329647021861925e-05, "loss": 0.0992, "step": 12728 }, { "epoch": 0.2804872002512023, "grad_norm": 0.7291808724403381, "learning_rate": 2.5328870750008266e-05, "loss": 0.1158, "step": 12729 }, { "epoch": 0.28050923554071844, "grad_norm": 0.6105293035507202, "learning_rate": 2.5328094425544127e-05, "loss": 0.1002, "step": 12730 }, { "epoch": 0.2805312708302346, "grad_norm": 0.8013273477554321, "learning_rate": 2.5327318048473466e-05, "loss": 0.0936, "step": 12731 }, { "epoch": 0.28055330611975077, "grad_norm": 1.0084835290908813, "learning_rate": 2.532654161880024e-05, "loss": 0.1373, "step": 12732 }, { "epoch": 0.28057534140926693, "grad_norm": 0.9402202367782593, "learning_rate": 2.5325765136528394e-05, "loss": 0.0866, "step": 12733 }, { "epoch": 0.2805973766987831, "grad_norm": 0.8852420449256897, "learning_rate": 2.5324988601661888e-05, "loss": 0.1088, "step": 12734 }, { "epoch": 0.28061941198829926, "grad_norm": 0.8627429008483887, "learning_rate": 2.5324212014204685e-05, "loss": 0.0923, "step": 12735 }, { "epoch": 0.2806414472778154, "grad_norm": 1.0026915073394775, "learning_rate": 2.5323435374160732e-05, "loss": 0.1028, "step": 12736 }, { "epoch": 0.2806634825673316, "grad_norm": 1.1399376392364502, "learning_rate": 2.5322658681533984e-05, "loss": 0.1052, "step": 12737 }, { "epoch": 0.28068551785684775, "grad_norm": 0.6630401611328125, "learning_rate": 2.5321881936328404e-05, "loss": 0.0807, "step": 12738 }, { "epoch": 0.2807075531463639, "grad_norm": 0.9164959788322449, "learning_rate": 2.5321105138547943e-05, "loss": 0.0935, "step": 12739 }, { "epoch": 0.2807295884358801, "grad_norm": 0.4592340290546417, "learning_rate": 2.5320328288196555e-05, "loss": 0.1307, "step": 12740 }, { "epoch": 0.28075162372539625, "grad_norm": 0.6369075179100037, "learning_rate": 2.5319551385278205e-05, "loss": 0.0883, "step": 12741 }, { "epoch": 0.28077365901491236, "grad_norm": 0.805528461933136, "learning_rate": 2.5318774429796845e-05, "loss": 0.0887, "step": 12742 }, { "epoch": 0.2807956943044285, "grad_norm": 0.7471737861633301, "learning_rate": 2.5317997421756433e-05, "loss": 0.1092, "step": 12743 }, { "epoch": 0.2808177295939447, "grad_norm": 1.0848255157470703, "learning_rate": 2.531722036116093e-05, "loss": 0.1287, "step": 12744 }, { "epoch": 0.28083976488346085, "grad_norm": 0.8315449357032776, "learning_rate": 2.5316443248014284e-05, "loss": 0.0847, "step": 12745 }, { "epoch": 0.280861800172977, "grad_norm": 1.0802924633026123, "learning_rate": 2.5315666082320468e-05, "loss": 0.1299, "step": 12746 }, { "epoch": 0.2808838354624932, "grad_norm": 0.7849578857421875, "learning_rate": 2.5314888864083427e-05, "loss": 0.1122, "step": 12747 }, { "epoch": 0.28090587075200935, "grad_norm": 0.7221167087554932, "learning_rate": 2.5314111593307134e-05, "loss": 0.0948, "step": 12748 }, { "epoch": 0.2809279060415255, "grad_norm": 1.3933467864990234, "learning_rate": 2.531333426999553e-05, "loss": 0.0967, "step": 12749 }, { "epoch": 0.2809499413310417, "grad_norm": 0.5904006361961365, "learning_rate": 2.531255689415259e-05, "loss": 0.1045, "step": 12750 }, { "epoch": 0.28097197662055784, "grad_norm": 0.6255635023117065, "learning_rate": 2.5311779465782262e-05, "loss": 0.0926, "step": 12751 }, { "epoch": 0.280994011910074, "grad_norm": 0.6831848621368408, "learning_rate": 2.5311001984888516e-05, "loss": 0.0558, "step": 12752 }, { "epoch": 0.28101604719959017, "grad_norm": 0.8831283450126648, "learning_rate": 2.531022445147531e-05, "loss": 0.0989, "step": 12753 }, { "epoch": 0.2810380824891063, "grad_norm": 0.7810507416725159, "learning_rate": 2.5309446865546605e-05, "loss": 0.0996, "step": 12754 }, { "epoch": 0.28106011777862244, "grad_norm": 1.038498878479004, "learning_rate": 2.5308669227106347e-05, "loss": 0.1217, "step": 12755 }, { "epoch": 0.2810821530681386, "grad_norm": 0.8974339962005615, "learning_rate": 2.5307891536158517e-05, "loss": 0.085, "step": 12756 }, { "epoch": 0.28110418835765477, "grad_norm": 1.318733811378479, "learning_rate": 2.530711379270707e-05, "loss": 0.0905, "step": 12757 }, { "epoch": 0.28112622364717094, "grad_norm": 0.9189193248748779, "learning_rate": 2.5306335996755956e-05, "loss": 0.1382, "step": 12758 }, { "epoch": 0.2811482589366871, "grad_norm": 0.7520992159843445, "learning_rate": 2.5305558148309152e-05, "loss": 0.1009, "step": 12759 }, { "epoch": 0.28117029422620327, "grad_norm": 0.6557063460350037, "learning_rate": 2.5304780247370616e-05, "loss": 0.0991, "step": 12760 }, { "epoch": 0.28119232951571943, "grad_norm": 0.8317679166793823, "learning_rate": 2.5304002293944305e-05, "loss": 0.0752, "step": 12761 }, { "epoch": 0.2812143648052356, "grad_norm": 0.7717105746269226, "learning_rate": 2.5303224288034185e-05, "loss": 0.1161, "step": 12762 }, { "epoch": 0.28123640009475176, "grad_norm": 1.2029826641082764, "learning_rate": 2.530244622964422e-05, "loss": 0.1245, "step": 12763 }, { "epoch": 0.2812584353842679, "grad_norm": 0.9799685478210449, "learning_rate": 2.5301668118778367e-05, "loss": 0.0851, "step": 12764 }, { "epoch": 0.2812804706737841, "grad_norm": 0.368319034576416, "learning_rate": 2.53008899554406e-05, "loss": 0.0655, "step": 12765 }, { "epoch": 0.2813025059633002, "grad_norm": 1.3011606931686401, "learning_rate": 2.5300111739634877e-05, "loss": 0.1259, "step": 12766 }, { "epoch": 0.28132454125281636, "grad_norm": 0.5171195864677429, "learning_rate": 2.529933347136516e-05, "loss": 0.1039, "step": 12767 }, { "epoch": 0.2813465765423325, "grad_norm": 0.7350301742553711, "learning_rate": 2.5298555150635413e-05, "loss": 0.1166, "step": 12768 }, { "epoch": 0.2813686118318487, "grad_norm": 0.9018659591674805, "learning_rate": 2.5297776777449602e-05, "loss": 0.0829, "step": 12769 }, { "epoch": 0.28139064712136486, "grad_norm": 0.4298113286495209, "learning_rate": 2.5296998351811692e-05, "loss": 0.0846, "step": 12770 }, { "epoch": 0.281412682410881, "grad_norm": 0.9251612424850464, "learning_rate": 2.529621987372565e-05, "loss": 0.1055, "step": 12771 }, { "epoch": 0.2814347177003972, "grad_norm": 0.8859205842018127, "learning_rate": 2.5295441343195437e-05, "loss": 0.1098, "step": 12772 }, { "epoch": 0.28145675298991335, "grad_norm": 0.8153672814369202, "learning_rate": 2.529466276022502e-05, "loss": 0.1103, "step": 12773 }, { "epoch": 0.2814787882794295, "grad_norm": 0.6683886647224426, "learning_rate": 2.529388412481837e-05, "loss": 0.1041, "step": 12774 }, { "epoch": 0.2815008235689457, "grad_norm": 0.8148091435432434, "learning_rate": 2.529310543697944e-05, "loss": 0.1077, "step": 12775 }, { "epoch": 0.28152285885846184, "grad_norm": 0.7850407361984253, "learning_rate": 2.529232669671221e-05, "loss": 0.079, "step": 12776 }, { "epoch": 0.281544894147978, "grad_norm": 1.2283382415771484, "learning_rate": 2.529154790402064e-05, "loss": 0.1504, "step": 12777 }, { "epoch": 0.2815669294374942, "grad_norm": 0.7025816440582275, "learning_rate": 2.5290769058908696e-05, "loss": 0.1195, "step": 12778 }, { "epoch": 0.2815889647270103, "grad_norm": 0.8089442253112793, "learning_rate": 2.5289990161380352e-05, "loss": 0.0512, "step": 12779 }, { "epoch": 0.28161100001652645, "grad_norm": 0.9828896522521973, "learning_rate": 2.5289211211439565e-05, "loss": 0.0967, "step": 12780 }, { "epoch": 0.2816330353060426, "grad_norm": 0.5084933638572693, "learning_rate": 2.528843220909031e-05, "loss": 0.1078, "step": 12781 }, { "epoch": 0.2816550705955588, "grad_norm": 0.816511869430542, "learning_rate": 2.528765315433655e-05, "loss": 0.1292, "step": 12782 }, { "epoch": 0.28167710588507494, "grad_norm": 0.5776845812797546, "learning_rate": 2.5286874047182258e-05, "loss": 0.0897, "step": 12783 }, { "epoch": 0.2816991411745911, "grad_norm": 0.8514245748519897, "learning_rate": 2.5286094887631398e-05, "loss": 0.1043, "step": 12784 }, { "epoch": 0.28172117646410727, "grad_norm": 1.0372503995895386, "learning_rate": 2.5285315675687944e-05, "loss": 0.1105, "step": 12785 }, { "epoch": 0.28174321175362343, "grad_norm": 0.5357585549354553, "learning_rate": 2.5284536411355862e-05, "loss": 0.1003, "step": 12786 }, { "epoch": 0.2817652470431396, "grad_norm": 1.0175048112869263, "learning_rate": 2.528375709463912e-05, "loss": 0.0995, "step": 12787 }, { "epoch": 0.28178728233265576, "grad_norm": 0.867796778678894, "learning_rate": 2.5282977725541686e-05, "loss": 0.1042, "step": 12788 }, { "epoch": 0.28180931762217193, "grad_norm": 0.5741987824440002, "learning_rate": 2.5282198304067533e-05, "loss": 0.0978, "step": 12789 }, { "epoch": 0.2818313529116881, "grad_norm": 1.1493843793869019, "learning_rate": 2.528141883022063e-05, "loss": 0.0791, "step": 12790 }, { "epoch": 0.2818533882012042, "grad_norm": 0.804430365562439, "learning_rate": 2.5280639304004953e-05, "loss": 0.0997, "step": 12791 }, { "epoch": 0.28187542349072037, "grad_norm": 0.7656916975975037, "learning_rate": 2.5279859725424467e-05, "loss": 0.1249, "step": 12792 }, { "epoch": 0.28189745878023653, "grad_norm": 0.6734591126441956, "learning_rate": 2.5279080094483132e-05, "loss": 0.0833, "step": 12793 }, { "epoch": 0.2819194940697527, "grad_norm": 0.4070731997489929, "learning_rate": 2.5278300411184943e-05, "loss": 0.1203, "step": 12794 }, { "epoch": 0.28194152935926886, "grad_norm": 0.8022586703300476, "learning_rate": 2.527752067553385e-05, "loss": 0.0997, "step": 12795 }, { "epoch": 0.281963564648785, "grad_norm": 1.1259528398513794, "learning_rate": 2.527674088753384e-05, "loss": 0.1189, "step": 12796 }, { "epoch": 0.2819855999383012, "grad_norm": 0.7424703240394592, "learning_rate": 2.527596104718887e-05, "loss": 0.0914, "step": 12797 }, { "epoch": 0.28200763522781735, "grad_norm": 0.8192082643508911, "learning_rate": 2.5275181154502922e-05, "loss": 0.0932, "step": 12798 }, { "epoch": 0.2820296705173335, "grad_norm": 0.8737223744392395, "learning_rate": 2.5274401209479968e-05, "loss": 0.0701, "step": 12799 }, { "epoch": 0.2820517058068497, "grad_norm": 1.0769588947296143, "learning_rate": 2.527362121212398e-05, "loss": 0.1326, "step": 12800 }, { "epoch": 0.28207374109636585, "grad_norm": 0.859927773475647, "learning_rate": 2.527284116243893e-05, "loss": 0.0919, "step": 12801 }, { "epoch": 0.282095776385882, "grad_norm": 1.203845500946045, "learning_rate": 2.527206106042879e-05, "loss": 0.1409, "step": 12802 }, { "epoch": 0.2821178116753981, "grad_norm": 0.7104058265686035, "learning_rate": 2.5271280906097532e-05, "loss": 0.1263, "step": 12803 }, { "epoch": 0.2821398469649143, "grad_norm": 0.9164320230484009, "learning_rate": 2.5270500699449136e-05, "loss": 0.0671, "step": 12804 }, { "epoch": 0.28216188225443045, "grad_norm": 0.8111739754676819, "learning_rate": 2.526972044048757e-05, "loss": 0.1097, "step": 12805 }, { "epoch": 0.2821839175439466, "grad_norm": 0.6763941645622253, "learning_rate": 2.5268940129216815e-05, "loss": 0.0864, "step": 12806 }, { "epoch": 0.2822059528334628, "grad_norm": 1.0932539701461792, "learning_rate": 2.526815976564084e-05, "loss": 0.124, "step": 12807 }, { "epoch": 0.28222798812297895, "grad_norm": 0.8606000542640686, "learning_rate": 2.5267379349763618e-05, "loss": 0.1069, "step": 12808 }, { "epoch": 0.2822500234124951, "grad_norm": 0.943414568901062, "learning_rate": 2.5266598881589128e-05, "loss": 0.091, "step": 12809 }, { "epoch": 0.2822720587020113, "grad_norm": 0.714232325553894, "learning_rate": 2.5265818361121344e-05, "loss": 0.1051, "step": 12810 }, { "epoch": 0.28229409399152744, "grad_norm": 0.6246431469917297, "learning_rate": 2.5265037788364248e-05, "loss": 0.0762, "step": 12811 }, { "epoch": 0.2823161292810436, "grad_norm": 0.6722438931465149, "learning_rate": 2.5264257163321803e-05, "loss": 0.0927, "step": 12812 }, { "epoch": 0.28233816457055977, "grad_norm": 0.885313868522644, "learning_rate": 2.5263476485997995e-05, "loss": 0.1051, "step": 12813 }, { "epoch": 0.28236019986007593, "grad_norm": 0.6061228513717651, "learning_rate": 2.5262695756396794e-05, "loss": 0.0689, "step": 12814 }, { "epoch": 0.2823822351495921, "grad_norm": 1.0145775079727173, "learning_rate": 2.5261914974522185e-05, "loss": 0.1334, "step": 12815 }, { "epoch": 0.2824042704391082, "grad_norm": 0.7176371812820435, "learning_rate": 2.526113414037814e-05, "loss": 0.1046, "step": 12816 }, { "epoch": 0.28242630572862437, "grad_norm": 0.8796529173851013, "learning_rate": 2.5260353253968637e-05, "loss": 0.0844, "step": 12817 }, { "epoch": 0.28244834101814054, "grad_norm": 0.7014297842979431, "learning_rate": 2.5259572315297648e-05, "loss": 0.0682, "step": 12818 }, { "epoch": 0.2824703763076567, "grad_norm": 0.8822676539421082, "learning_rate": 2.525879132436916e-05, "loss": 0.0833, "step": 12819 }, { "epoch": 0.28249241159717287, "grad_norm": 1.0144339799880981, "learning_rate": 2.5258010281187144e-05, "loss": 0.1012, "step": 12820 }, { "epoch": 0.28251444688668903, "grad_norm": 0.7170712947845459, "learning_rate": 2.5257229185755584e-05, "loss": 0.0871, "step": 12821 }, { "epoch": 0.2825364821762052, "grad_norm": 0.7372123003005981, "learning_rate": 2.525644803807845e-05, "loss": 0.0855, "step": 12822 }, { "epoch": 0.28255851746572136, "grad_norm": 0.8505312204360962, "learning_rate": 2.525566683815973e-05, "loss": 0.109, "step": 12823 }, { "epoch": 0.2825805527552375, "grad_norm": 0.9379806518554688, "learning_rate": 2.52548855860034e-05, "loss": 0.1234, "step": 12824 }, { "epoch": 0.2826025880447537, "grad_norm": 1.000409483909607, "learning_rate": 2.5254104281613437e-05, "loss": 0.1081, "step": 12825 }, { "epoch": 0.28262462333426985, "grad_norm": 0.7381903529167175, "learning_rate": 2.5253322924993824e-05, "loss": 0.1124, "step": 12826 }, { "epoch": 0.282646658623786, "grad_norm": 0.9591221213340759, "learning_rate": 2.5252541516148537e-05, "loss": 0.146, "step": 12827 }, { "epoch": 0.2826686939133021, "grad_norm": 0.724646806716919, "learning_rate": 2.5251760055081555e-05, "loss": 0.095, "step": 12828 }, { "epoch": 0.2826907292028183, "grad_norm": 0.6514208912849426, "learning_rate": 2.525097854179687e-05, "loss": 0.0971, "step": 12829 }, { "epoch": 0.28271276449233446, "grad_norm": 1.0330082178115845, "learning_rate": 2.525019697629845e-05, "loss": 0.102, "step": 12830 }, { "epoch": 0.2827347997818506, "grad_norm": 0.6254727840423584, "learning_rate": 2.524941535859028e-05, "loss": 0.109, "step": 12831 }, { "epoch": 0.2827568350713668, "grad_norm": 0.707158088684082, "learning_rate": 2.524863368867634e-05, "loss": 0.1072, "step": 12832 }, { "epoch": 0.28277887036088295, "grad_norm": 0.49734410643577576, "learning_rate": 2.524785196656061e-05, "loss": 0.0836, "step": 12833 }, { "epoch": 0.2828009056503991, "grad_norm": 0.7903046011924744, "learning_rate": 2.5247070192247082e-05, "loss": 0.0845, "step": 12834 }, { "epoch": 0.2828229409399153, "grad_norm": 0.7375057339668274, "learning_rate": 2.524628836573973e-05, "loss": 0.116, "step": 12835 }, { "epoch": 0.28284497622943144, "grad_norm": 0.7624058723449707, "learning_rate": 2.5245506487042532e-05, "loss": 0.0978, "step": 12836 }, { "epoch": 0.2828670115189476, "grad_norm": 0.6554057598114014, "learning_rate": 2.5244724556159476e-05, "loss": 0.0835, "step": 12837 }, { "epoch": 0.2828890468084638, "grad_norm": 1.6454638242721558, "learning_rate": 2.5243942573094546e-05, "loss": 0.1162, "step": 12838 }, { "epoch": 0.28291108209797994, "grad_norm": 0.671248197555542, "learning_rate": 2.5243160537851725e-05, "loss": 0.0856, "step": 12839 }, { "epoch": 0.2829331173874961, "grad_norm": 0.901705265045166, "learning_rate": 2.5242378450434995e-05, "loss": 0.0798, "step": 12840 }, { "epoch": 0.2829551526770122, "grad_norm": 0.8269386887550354, "learning_rate": 2.524159631084833e-05, "loss": 0.119, "step": 12841 }, { "epoch": 0.2829771879665284, "grad_norm": 0.8839775919914246, "learning_rate": 2.5240814119095735e-05, "loss": 0.111, "step": 12842 }, { "epoch": 0.28299922325604454, "grad_norm": 1.0696368217468262, "learning_rate": 2.5240031875181177e-05, "loss": 0.1129, "step": 12843 }, { "epoch": 0.2830212585455607, "grad_norm": 0.7030462026596069, "learning_rate": 2.5239249579108645e-05, "loss": 0.1275, "step": 12844 }, { "epoch": 0.28304329383507687, "grad_norm": 0.851784884929657, "learning_rate": 2.5238467230882122e-05, "loss": 0.1048, "step": 12845 }, { "epoch": 0.28306532912459303, "grad_norm": 0.8887539505958557, "learning_rate": 2.52376848305056e-05, "loss": 0.1262, "step": 12846 }, { "epoch": 0.2830873644141092, "grad_norm": 0.9593825340270996, "learning_rate": 2.5236902377983056e-05, "loss": 0.0882, "step": 12847 }, { "epoch": 0.28310939970362536, "grad_norm": 0.4670258164405823, "learning_rate": 2.5236119873318483e-05, "loss": 0.115, "step": 12848 }, { "epoch": 0.28313143499314153, "grad_norm": 0.5352155566215515, "learning_rate": 2.523533731651586e-05, "loss": 0.09, "step": 12849 }, { "epoch": 0.2831534702826577, "grad_norm": 0.8882862329483032, "learning_rate": 2.523455470757917e-05, "loss": 0.1333, "step": 12850 }, { "epoch": 0.28317550557217386, "grad_norm": 1.0635616779327393, "learning_rate": 2.523377204651241e-05, "loss": 0.1043, "step": 12851 }, { "epoch": 0.28319754086169, "grad_norm": 0.6182461380958557, "learning_rate": 2.523298933331956e-05, "loss": 0.0856, "step": 12852 }, { "epoch": 0.28321957615120613, "grad_norm": 0.9065653085708618, "learning_rate": 2.523220656800461e-05, "loss": 0.1569, "step": 12853 }, { "epoch": 0.2832416114407223, "grad_norm": 1.0152369737625122, "learning_rate": 2.523142375057154e-05, "loss": 0.0831, "step": 12854 }, { "epoch": 0.28326364673023846, "grad_norm": 0.7495786547660828, "learning_rate": 2.523064088102435e-05, "loss": 0.1288, "step": 12855 }, { "epoch": 0.2832856820197546, "grad_norm": 0.6963849663734436, "learning_rate": 2.5229857959367012e-05, "loss": 0.0943, "step": 12856 }, { "epoch": 0.2833077173092708, "grad_norm": 1.0526578426361084, "learning_rate": 2.5229074985603527e-05, "loss": 0.1167, "step": 12857 }, { "epoch": 0.28332975259878695, "grad_norm": 0.6811690330505371, "learning_rate": 2.5228291959737873e-05, "loss": 0.09, "step": 12858 }, { "epoch": 0.2833517878883031, "grad_norm": 0.737589955329895, "learning_rate": 2.5227508881774048e-05, "loss": 0.1086, "step": 12859 }, { "epoch": 0.2833738231778193, "grad_norm": 0.9685093760490417, "learning_rate": 2.5226725751716034e-05, "loss": 0.1318, "step": 12860 }, { "epoch": 0.28339585846733545, "grad_norm": 0.7089986205101013, "learning_rate": 2.5225942569567824e-05, "loss": 0.0911, "step": 12861 }, { "epoch": 0.2834178937568516, "grad_norm": 0.8864790201187134, "learning_rate": 2.52251593353334e-05, "loss": 0.0844, "step": 12862 }, { "epoch": 0.2834399290463678, "grad_norm": 1.0676079988479614, "learning_rate": 2.522437604901676e-05, "loss": 0.0794, "step": 12863 }, { "epoch": 0.28346196433588394, "grad_norm": 0.6369121074676514, "learning_rate": 2.522359271062189e-05, "loss": 0.1147, "step": 12864 }, { "epoch": 0.28348399962540005, "grad_norm": 0.7505406737327576, "learning_rate": 2.5222809320152782e-05, "loss": 0.086, "step": 12865 }, { "epoch": 0.2835060349149162, "grad_norm": 0.665552020072937, "learning_rate": 2.522202587761342e-05, "loss": 0.1163, "step": 12866 }, { "epoch": 0.2835280702044324, "grad_norm": 0.805991530418396, "learning_rate": 2.5221242383007797e-05, "loss": 0.0994, "step": 12867 }, { "epoch": 0.28355010549394855, "grad_norm": 0.6006197333335876, "learning_rate": 2.522045883633991e-05, "loss": 0.0746, "step": 12868 }, { "epoch": 0.2835721407834647, "grad_norm": 0.8871548175811768, "learning_rate": 2.5219675237613745e-05, "loss": 0.0827, "step": 12869 }, { "epoch": 0.2835941760729809, "grad_norm": 0.8862918615341187, "learning_rate": 2.521889158683329e-05, "loss": 0.1322, "step": 12870 }, { "epoch": 0.28361621136249704, "grad_norm": 0.8794779777526855, "learning_rate": 2.5218107884002545e-05, "loss": 0.0978, "step": 12871 }, { "epoch": 0.2836382466520132, "grad_norm": 0.8294655084609985, "learning_rate": 2.5217324129125496e-05, "loss": 0.096, "step": 12872 }, { "epoch": 0.28366028194152937, "grad_norm": 1.4809517860412598, "learning_rate": 2.521654032220614e-05, "loss": 0.1025, "step": 12873 }, { "epoch": 0.28368231723104553, "grad_norm": 2.581941843032837, "learning_rate": 2.5215756463248454e-05, "loss": 0.1092, "step": 12874 }, { "epoch": 0.2837043525205617, "grad_norm": 1.124984622001648, "learning_rate": 2.5214972552256454e-05, "loss": 0.1388, "step": 12875 }, { "epoch": 0.28372638781007786, "grad_norm": 0.5882278084754944, "learning_rate": 2.5214188589234116e-05, "loss": 0.1047, "step": 12876 }, { "epoch": 0.283748423099594, "grad_norm": 0.6647313237190247, "learning_rate": 2.5213404574185436e-05, "loss": 0.0857, "step": 12877 }, { "epoch": 0.28377045838911014, "grad_norm": 0.6592897772789001, "learning_rate": 2.5212620507114415e-05, "loss": 0.0987, "step": 12878 }, { "epoch": 0.2837924936786263, "grad_norm": 1.0528278350830078, "learning_rate": 2.521183638802504e-05, "loss": 0.1141, "step": 12879 }, { "epoch": 0.28381452896814247, "grad_norm": 0.7128461599349976, "learning_rate": 2.5211052216921304e-05, "loss": 0.0998, "step": 12880 }, { "epoch": 0.28383656425765863, "grad_norm": 0.9672567844390869, "learning_rate": 2.5210267993807204e-05, "loss": 0.0923, "step": 12881 }, { "epoch": 0.2838585995471748, "grad_norm": 0.9174475073814392, "learning_rate": 2.5209483718686734e-05, "loss": 0.126, "step": 12882 }, { "epoch": 0.28388063483669096, "grad_norm": 0.6589284539222717, "learning_rate": 2.5208699391563892e-05, "loss": 0.0821, "step": 12883 }, { "epoch": 0.2839026701262071, "grad_norm": 1.1915957927703857, "learning_rate": 2.5207915012442664e-05, "loss": 0.0876, "step": 12884 }, { "epoch": 0.2839247054157233, "grad_norm": 0.727789044380188, "learning_rate": 2.5207130581327053e-05, "loss": 0.1141, "step": 12885 }, { "epoch": 0.28394674070523945, "grad_norm": 1.0462586879730225, "learning_rate": 2.5206346098221055e-05, "loss": 0.0944, "step": 12886 }, { "epoch": 0.2839687759947556, "grad_norm": 1.0519963502883911, "learning_rate": 2.520556156312866e-05, "loss": 0.1422, "step": 12887 }, { "epoch": 0.2839908112842718, "grad_norm": 1.2535972595214844, "learning_rate": 2.520477697605387e-05, "loss": 0.12, "step": 12888 }, { "epoch": 0.28401284657378795, "grad_norm": 0.9459906816482544, "learning_rate": 2.5203992337000673e-05, "loss": 0.0824, "step": 12889 }, { "epoch": 0.28403488186330406, "grad_norm": 0.9233335256576538, "learning_rate": 2.5203207645973077e-05, "loss": 0.1572, "step": 12890 }, { "epoch": 0.2840569171528202, "grad_norm": 1.754808783531189, "learning_rate": 2.5202422902975074e-05, "loss": 0.0999, "step": 12891 }, { "epoch": 0.2840789524423364, "grad_norm": 1.7434009313583374, "learning_rate": 2.520163810801066e-05, "loss": 0.0858, "step": 12892 }, { "epoch": 0.28410098773185255, "grad_norm": 0.8948975205421448, "learning_rate": 2.5200853261083828e-05, "loss": 0.1277, "step": 12893 }, { "epoch": 0.2841230230213687, "grad_norm": 0.7967669367790222, "learning_rate": 2.5200068362198585e-05, "loss": 0.0963, "step": 12894 }, { "epoch": 0.2841450583108849, "grad_norm": 0.7540026903152466, "learning_rate": 2.5199283411358916e-05, "loss": 0.0904, "step": 12895 }, { "epoch": 0.28416709360040104, "grad_norm": 1.1248120069503784, "learning_rate": 2.519849840856884e-05, "loss": 0.1084, "step": 12896 }, { "epoch": 0.2841891288899172, "grad_norm": 1.21278977394104, "learning_rate": 2.519771335383233e-05, "loss": 0.1119, "step": 12897 }, { "epoch": 0.2842111641794334, "grad_norm": 0.8934578895568848, "learning_rate": 2.5196928247153405e-05, "loss": 0.153, "step": 12898 }, { "epoch": 0.28423319946894954, "grad_norm": 0.9632738828659058, "learning_rate": 2.5196143088536056e-05, "loss": 0.0718, "step": 12899 }, { "epoch": 0.2842552347584657, "grad_norm": 0.7777654528617859, "learning_rate": 2.519535787798428e-05, "loss": 0.1009, "step": 12900 }, { "epoch": 0.28427727004798187, "grad_norm": 0.6561382412910461, "learning_rate": 2.519457261550208e-05, "loss": 0.1029, "step": 12901 }, { "epoch": 0.284299305337498, "grad_norm": 0.6515147089958191, "learning_rate": 2.5193787301093457e-05, "loss": 0.0941, "step": 12902 }, { "epoch": 0.28432134062701414, "grad_norm": 0.900073230266571, "learning_rate": 2.5193001934762406e-05, "loss": 0.0671, "step": 12903 }, { "epoch": 0.2843433759165303, "grad_norm": 1.1929802894592285, "learning_rate": 2.519221651651293e-05, "loss": 0.1123, "step": 12904 }, { "epoch": 0.28436541120604647, "grad_norm": 0.69673752784729, "learning_rate": 2.5191431046349032e-05, "loss": 0.0761, "step": 12905 }, { "epoch": 0.28438744649556263, "grad_norm": 1.0340462923049927, "learning_rate": 2.519064552427471e-05, "loss": 0.129, "step": 12906 }, { "epoch": 0.2844094817850788, "grad_norm": 0.4829888343811035, "learning_rate": 2.5189859950293967e-05, "loss": 0.0861, "step": 12907 }, { "epoch": 0.28443151707459496, "grad_norm": 0.8412280082702637, "learning_rate": 2.5189074324410798e-05, "loss": 0.0724, "step": 12908 }, { "epoch": 0.28445355236411113, "grad_norm": 0.5565673112869263, "learning_rate": 2.5188288646629215e-05, "loss": 0.0916, "step": 12909 }, { "epoch": 0.2844755876536273, "grad_norm": 0.6941252946853638, "learning_rate": 2.5187502916953214e-05, "loss": 0.0994, "step": 12910 }, { "epoch": 0.28449762294314346, "grad_norm": 0.9984186291694641, "learning_rate": 2.5186717135386796e-05, "loss": 0.0885, "step": 12911 }, { "epoch": 0.2845196582326596, "grad_norm": 0.6756015419960022, "learning_rate": 2.518593130193396e-05, "loss": 0.0648, "step": 12912 }, { "epoch": 0.2845416935221758, "grad_norm": 0.48095667362213135, "learning_rate": 2.5185145416598725e-05, "loss": 0.1007, "step": 12913 }, { "epoch": 0.28456372881169195, "grad_norm": 0.818915069103241, "learning_rate": 2.5184359479385073e-05, "loss": 0.0846, "step": 12914 }, { "epoch": 0.28458576410120806, "grad_norm": 0.9192948937416077, "learning_rate": 2.5183573490297022e-05, "loss": 0.0818, "step": 12915 }, { "epoch": 0.2846077993907242, "grad_norm": 1.835247278213501, "learning_rate": 2.518278744933857e-05, "loss": 0.1063, "step": 12916 }, { "epoch": 0.2846298346802404, "grad_norm": 0.6403505802154541, "learning_rate": 2.518200135651372e-05, "loss": 0.1041, "step": 12917 }, { "epoch": 0.28465186996975655, "grad_norm": 1.04350745677948, "learning_rate": 2.5181215211826477e-05, "loss": 0.1312, "step": 12918 }, { "epoch": 0.2846739052592727, "grad_norm": 0.7398790121078491, "learning_rate": 2.5180429015280847e-05, "loss": 0.0866, "step": 12919 }, { "epoch": 0.2846959405487889, "grad_norm": 0.4668518006801605, "learning_rate": 2.5179642766880832e-05, "loss": 0.091, "step": 12920 }, { "epoch": 0.28471797583830505, "grad_norm": 0.6997042894363403, "learning_rate": 2.5178856466630437e-05, "loss": 0.0898, "step": 12921 }, { "epoch": 0.2847400111278212, "grad_norm": 0.49167630076408386, "learning_rate": 2.5178070114533664e-05, "loss": 0.0882, "step": 12922 }, { "epoch": 0.2847620464173374, "grad_norm": 0.9971901774406433, "learning_rate": 2.5177283710594527e-05, "loss": 0.1004, "step": 12923 }, { "epoch": 0.28478408170685354, "grad_norm": 0.6259168386459351, "learning_rate": 2.517649725481703e-05, "loss": 0.0476, "step": 12924 }, { "epoch": 0.2848061169963697, "grad_norm": 1.4464025497436523, "learning_rate": 2.5175710747205165e-05, "loss": 0.0996, "step": 12925 }, { "epoch": 0.28482815228588587, "grad_norm": 0.5230530500411987, "learning_rate": 2.5174924187762956e-05, "loss": 0.0923, "step": 12926 }, { "epoch": 0.284850187575402, "grad_norm": 1.2424877882003784, "learning_rate": 2.51741375764944e-05, "loss": 0.0843, "step": 12927 }, { "epoch": 0.28487222286491815, "grad_norm": 0.7720536589622498, "learning_rate": 2.5173350913403507e-05, "loss": 0.0918, "step": 12928 }, { "epoch": 0.2848942581544343, "grad_norm": 1.3368909358978271, "learning_rate": 2.5172564198494275e-05, "loss": 0.1311, "step": 12929 }, { "epoch": 0.2849162934439505, "grad_norm": 0.8529839515686035, "learning_rate": 2.517177743177073e-05, "loss": 0.0816, "step": 12930 }, { "epoch": 0.28493832873346664, "grad_norm": 0.9897011518478394, "learning_rate": 2.517099061323686e-05, "loss": 0.1005, "step": 12931 }, { "epoch": 0.2849603640229828, "grad_norm": 0.9625180959701538, "learning_rate": 2.5170203742896687e-05, "loss": 0.1278, "step": 12932 }, { "epoch": 0.28498239931249897, "grad_norm": 0.8655409812927246, "learning_rate": 2.5169416820754204e-05, "loss": 0.0858, "step": 12933 }, { "epoch": 0.28500443460201513, "grad_norm": 0.8571525812149048, "learning_rate": 2.5168629846813438e-05, "loss": 0.1112, "step": 12934 }, { "epoch": 0.2850264698915313, "grad_norm": 0.6577383875846863, "learning_rate": 2.5167842821078382e-05, "loss": 0.1191, "step": 12935 }, { "epoch": 0.28504850518104746, "grad_norm": 0.5937185287475586, "learning_rate": 2.5167055743553048e-05, "loss": 0.1298, "step": 12936 }, { "epoch": 0.2850705404705636, "grad_norm": 0.676819920539856, "learning_rate": 2.516626861424145e-05, "loss": 0.1122, "step": 12937 }, { "epoch": 0.2850925757600798, "grad_norm": 0.9458550810813904, "learning_rate": 2.5165481433147594e-05, "loss": 0.0903, "step": 12938 }, { "epoch": 0.2851146110495959, "grad_norm": 1.1900057792663574, "learning_rate": 2.5164694200275492e-05, "loss": 0.0762, "step": 12939 }, { "epoch": 0.28513664633911207, "grad_norm": 1.0237005949020386, "learning_rate": 2.516390691562915e-05, "loss": 0.1247, "step": 12940 }, { "epoch": 0.28515868162862823, "grad_norm": 0.7335453629493713, "learning_rate": 2.5163119579212573e-05, "loss": 0.0725, "step": 12941 }, { "epoch": 0.2851807169181444, "grad_norm": 0.7437767386436462, "learning_rate": 2.5162332191029784e-05, "loss": 0.0834, "step": 12942 }, { "epoch": 0.28520275220766056, "grad_norm": 0.561591386795044, "learning_rate": 2.516154475108479e-05, "loss": 0.1045, "step": 12943 }, { "epoch": 0.2852247874971767, "grad_norm": 0.4348059594631195, "learning_rate": 2.5160757259381594e-05, "loss": 0.0871, "step": 12944 }, { "epoch": 0.2852468227866929, "grad_norm": 1.0123529434204102, "learning_rate": 2.515996971592422e-05, "loss": 0.0914, "step": 12945 }, { "epoch": 0.28526885807620905, "grad_norm": 0.8127322793006897, "learning_rate": 2.5159182120716668e-05, "loss": 0.12, "step": 12946 }, { "epoch": 0.2852908933657252, "grad_norm": 0.6865473389625549, "learning_rate": 2.5158394473762952e-05, "loss": 0.0867, "step": 12947 }, { "epoch": 0.2853129286552414, "grad_norm": 0.939228355884552, "learning_rate": 2.5157606775067084e-05, "loss": 0.0776, "step": 12948 }, { "epoch": 0.28533496394475755, "grad_norm": 0.4282481372356415, "learning_rate": 2.5156819024633085e-05, "loss": 0.113, "step": 12949 }, { "epoch": 0.2853569992342737, "grad_norm": 1.57817542552948, "learning_rate": 2.5156031222464956e-05, "loss": 0.1253, "step": 12950 }, { "epoch": 0.2853790345237899, "grad_norm": 0.7474378943443298, "learning_rate": 2.5155243368566712e-05, "loss": 0.0946, "step": 12951 }, { "epoch": 0.285401069813306, "grad_norm": 0.528432309627533, "learning_rate": 2.515445546294237e-05, "loss": 0.0871, "step": 12952 }, { "epoch": 0.28542310510282215, "grad_norm": 0.8367342948913574, "learning_rate": 2.5153667505595944e-05, "loss": 0.1226, "step": 12953 }, { "epoch": 0.2854451403923383, "grad_norm": 0.9034682512283325, "learning_rate": 2.515287949653144e-05, "loss": 0.1258, "step": 12954 }, { "epoch": 0.2854671756818545, "grad_norm": 0.8130605220794678, "learning_rate": 2.515209143575288e-05, "loss": 0.1083, "step": 12955 }, { "epoch": 0.28548921097137064, "grad_norm": 2.074531316757202, "learning_rate": 2.515130332326427e-05, "loss": 0.1499, "step": 12956 }, { "epoch": 0.2855112462608868, "grad_norm": 0.8152934908866882, "learning_rate": 2.515051515906963e-05, "loss": 0.0699, "step": 12957 }, { "epoch": 0.285533281550403, "grad_norm": 1.6296716928482056, "learning_rate": 2.5149726943172974e-05, "loss": 0.086, "step": 12958 }, { "epoch": 0.28555531683991914, "grad_norm": 0.7416595816612244, "learning_rate": 2.5148938675578317e-05, "loss": 0.1281, "step": 12959 }, { "epoch": 0.2855773521294353, "grad_norm": 0.6938530802726746, "learning_rate": 2.5148150356289675e-05, "loss": 0.0954, "step": 12960 }, { "epoch": 0.28559938741895147, "grad_norm": 0.7170335054397583, "learning_rate": 2.5147361985311055e-05, "loss": 0.1002, "step": 12961 }, { "epoch": 0.28562142270846763, "grad_norm": 0.8618252873420715, "learning_rate": 2.5146573562646484e-05, "loss": 0.0963, "step": 12962 }, { "epoch": 0.2856434579979838, "grad_norm": 0.8967399001121521, "learning_rate": 2.5145785088299974e-05, "loss": 0.1384, "step": 12963 }, { "epoch": 0.2856654932874999, "grad_norm": 0.5724571943283081, "learning_rate": 2.514499656227553e-05, "loss": 0.0842, "step": 12964 }, { "epoch": 0.28568752857701607, "grad_norm": 0.7054327130317688, "learning_rate": 2.5144207984577188e-05, "loss": 0.0748, "step": 12965 }, { "epoch": 0.28570956386653223, "grad_norm": 0.7691763639450073, "learning_rate": 2.5143419355208954e-05, "loss": 0.1023, "step": 12966 }, { "epoch": 0.2857315991560484, "grad_norm": 0.7074808478355408, "learning_rate": 2.5142630674174842e-05, "loss": 0.0926, "step": 12967 }, { "epoch": 0.28575363444556456, "grad_norm": 0.6894553303718567, "learning_rate": 2.514184194147888e-05, "loss": 0.1012, "step": 12968 }, { "epoch": 0.28577566973508073, "grad_norm": 0.7804470658302307, "learning_rate": 2.514105315712507e-05, "loss": 0.0894, "step": 12969 }, { "epoch": 0.2857977050245969, "grad_norm": 0.7111267447471619, "learning_rate": 2.5140264321117443e-05, "loss": 0.0679, "step": 12970 }, { "epoch": 0.28581974031411306, "grad_norm": 0.9724889993667603, "learning_rate": 2.513947543346001e-05, "loss": 0.0841, "step": 12971 }, { "epoch": 0.2858417756036292, "grad_norm": 0.8499370217323303, "learning_rate": 2.5138686494156794e-05, "loss": 0.1131, "step": 12972 }, { "epoch": 0.2858638108931454, "grad_norm": 0.87872713804245, "learning_rate": 2.5137897503211805e-05, "loss": 0.0956, "step": 12973 }, { "epoch": 0.28588584618266155, "grad_norm": 0.6225234866142273, "learning_rate": 2.5137108460629075e-05, "loss": 0.0963, "step": 12974 }, { "epoch": 0.2859078814721777, "grad_norm": 0.7022160887718201, "learning_rate": 2.5136319366412613e-05, "loss": 0.0889, "step": 12975 }, { "epoch": 0.2859299167616938, "grad_norm": 1.1425925493240356, "learning_rate": 2.5135530220566443e-05, "loss": 0.0784, "step": 12976 }, { "epoch": 0.28595195205121, "grad_norm": 0.47871163487434387, "learning_rate": 2.5134741023094578e-05, "loss": 0.1154, "step": 12977 }, { "epoch": 0.28597398734072615, "grad_norm": 0.7666566371917725, "learning_rate": 2.5133951774001044e-05, "loss": 0.0937, "step": 12978 }, { "epoch": 0.2859960226302423, "grad_norm": 0.6820563673973083, "learning_rate": 2.513316247328986e-05, "loss": 0.1205, "step": 12979 }, { "epoch": 0.2860180579197585, "grad_norm": 1.0006088018417358, "learning_rate": 2.513237312096505e-05, "loss": 0.0681, "step": 12980 }, { "epoch": 0.28604009320927465, "grad_norm": 0.9249187111854553, "learning_rate": 2.513158371703062e-05, "loss": 0.128, "step": 12981 }, { "epoch": 0.2860621284987908, "grad_norm": 0.7665625810623169, "learning_rate": 2.5130794261490608e-05, "loss": 0.0846, "step": 12982 }, { "epoch": 0.286084163788307, "grad_norm": 0.5888425707817078, "learning_rate": 2.513000475434903e-05, "loss": 0.0989, "step": 12983 }, { "epoch": 0.28610619907782314, "grad_norm": 0.6885518431663513, "learning_rate": 2.5129215195609904e-05, "loss": 0.101, "step": 12984 }, { "epoch": 0.2861282343673393, "grad_norm": 0.8266375660896301, "learning_rate": 2.512842558527725e-05, "loss": 0.1171, "step": 12985 }, { "epoch": 0.28615026965685547, "grad_norm": 0.47622695565223694, "learning_rate": 2.5127635923355094e-05, "loss": 0.1161, "step": 12986 }, { "epoch": 0.28617230494637164, "grad_norm": 0.7939360737800598, "learning_rate": 2.512684620984746e-05, "loss": 0.0794, "step": 12987 }, { "epoch": 0.2861943402358878, "grad_norm": 0.5477630496025085, "learning_rate": 2.5126056444758365e-05, "loss": 0.0911, "step": 12988 }, { "epoch": 0.2862163755254039, "grad_norm": 0.6365480422973633, "learning_rate": 2.5125266628091836e-05, "loss": 0.0977, "step": 12989 }, { "epoch": 0.2862384108149201, "grad_norm": 0.5321412086486816, "learning_rate": 2.5124476759851896e-05, "loss": 0.1072, "step": 12990 }, { "epoch": 0.28626044610443624, "grad_norm": 0.643645703792572, "learning_rate": 2.5123686840042567e-05, "loss": 0.1001, "step": 12991 }, { "epoch": 0.2862824813939524, "grad_norm": 0.7258797287940979, "learning_rate": 2.5122896868667868e-05, "loss": 0.0914, "step": 12992 }, { "epoch": 0.28630451668346857, "grad_norm": 0.7349564433097839, "learning_rate": 2.512210684573183e-05, "loss": 0.0599, "step": 12993 }, { "epoch": 0.28632655197298473, "grad_norm": 0.6417205333709717, "learning_rate": 2.512131677123847e-05, "loss": 0.0984, "step": 12994 }, { "epoch": 0.2863485872625009, "grad_norm": 1.1560724973678589, "learning_rate": 2.5120526645191822e-05, "loss": 0.0996, "step": 12995 }, { "epoch": 0.28637062255201706, "grad_norm": 1.0294774770736694, "learning_rate": 2.5119736467595903e-05, "loss": 0.0833, "step": 12996 }, { "epoch": 0.2863926578415332, "grad_norm": 1.0207064151763916, "learning_rate": 2.511894623845474e-05, "loss": 0.1524, "step": 12997 }, { "epoch": 0.2864146931310494, "grad_norm": 0.4121081233024597, "learning_rate": 2.5118155957772354e-05, "loss": 0.0713, "step": 12998 }, { "epoch": 0.28643672842056556, "grad_norm": 0.4363574981689453, "learning_rate": 2.511736562555278e-05, "loss": 0.0893, "step": 12999 }, { "epoch": 0.2864587637100817, "grad_norm": 0.9983174204826355, "learning_rate": 2.5116575241800033e-05, "loss": 0.1293, "step": 13000 }, { "epoch": 0.28648079899959783, "grad_norm": 0.5379474759101868, "learning_rate": 2.5115784806518146e-05, "loss": 0.1528, "step": 13001 }, { "epoch": 0.286502834289114, "grad_norm": 0.7891483306884766, "learning_rate": 2.5114994319711135e-05, "loss": 0.0878, "step": 13002 }, { "epoch": 0.28652486957863016, "grad_norm": 1.0162811279296875, "learning_rate": 2.511420378138304e-05, "loss": 0.0813, "step": 13003 }, { "epoch": 0.2865469048681463, "grad_norm": 0.6284421682357788, "learning_rate": 2.5113413191537885e-05, "loss": 0.1547, "step": 13004 }, { "epoch": 0.2865689401576625, "grad_norm": 1.0426207780838013, "learning_rate": 2.5112622550179688e-05, "loss": 0.0977, "step": 13005 }, { "epoch": 0.28659097544717865, "grad_norm": 0.47270578145980835, "learning_rate": 2.511183185731248e-05, "loss": 0.0953, "step": 13006 }, { "epoch": 0.2866130107366948, "grad_norm": 1.1616415977478027, "learning_rate": 2.5111041112940296e-05, "loss": 0.1514, "step": 13007 }, { "epoch": 0.286635046026211, "grad_norm": 1.2403823137283325, "learning_rate": 2.5110250317067155e-05, "loss": 0.0869, "step": 13008 }, { "epoch": 0.28665708131572715, "grad_norm": 1.0176676511764526, "learning_rate": 2.510945946969708e-05, "loss": 0.0936, "step": 13009 }, { "epoch": 0.2866791166052433, "grad_norm": 1.4946858882904053, "learning_rate": 2.5108668570834122e-05, "loss": 0.1113, "step": 13010 }, { "epoch": 0.2867011518947595, "grad_norm": 0.79281085729599, "learning_rate": 2.510787762048228e-05, "loss": 0.1109, "step": 13011 }, { "epoch": 0.28672318718427564, "grad_norm": 0.7862378358840942, "learning_rate": 2.5107086618645608e-05, "loss": 0.0833, "step": 13012 }, { "epoch": 0.28674522247379175, "grad_norm": 0.6871078014373779, "learning_rate": 2.5106295565328116e-05, "loss": 0.0944, "step": 13013 }, { "epoch": 0.2867672577633079, "grad_norm": 0.67179936170578, "learning_rate": 2.5105504460533846e-05, "loss": 0.0819, "step": 13014 }, { "epoch": 0.2867892930528241, "grad_norm": 1.3058226108551025, "learning_rate": 2.510471330426682e-05, "loss": 0.0871, "step": 13015 }, { "epoch": 0.28681132834234024, "grad_norm": 0.6611136198043823, "learning_rate": 2.5103922096531072e-05, "loss": 0.077, "step": 13016 }, { "epoch": 0.2868333636318564, "grad_norm": 0.6512553691864014, "learning_rate": 2.510313083733063e-05, "loss": 0.118, "step": 13017 }, { "epoch": 0.2868553989213726, "grad_norm": 0.5570469498634338, "learning_rate": 2.5102339526669527e-05, "loss": 0.0767, "step": 13018 }, { "epoch": 0.28687743421088874, "grad_norm": 0.8502042889595032, "learning_rate": 2.5101548164551788e-05, "loss": 0.1074, "step": 13019 }, { "epoch": 0.2868994695004049, "grad_norm": 1.1272752285003662, "learning_rate": 2.510075675098145e-05, "loss": 0.102, "step": 13020 }, { "epoch": 0.28692150478992107, "grad_norm": 0.5182015895843506, "learning_rate": 2.5099965285962536e-05, "loss": 0.1069, "step": 13021 }, { "epoch": 0.28694354007943723, "grad_norm": 0.9723609089851379, "learning_rate": 2.509917376949909e-05, "loss": 0.0956, "step": 13022 }, { "epoch": 0.2869655753689534, "grad_norm": 0.5642294883728027, "learning_rate": 2.5098382201595133e-05, "loss": 0.0533, "step": 13023 }, { "epoch": 0.28698761065846956, "grad_norm": 0.7481503486633301, "learning_rate": 2.50975905822547e-05, "loss": 0.138, "step": 13024 }, { "epoch": 0.2870096459479857, "grad_norm": 0.6285322904586792, "learning_rate": 2.5096798911481825e-05, "loss": 0.0949, "step": 13025 }, { "epoch": 0.28703168123750183, "grad_norm": 0.6917303204536438, "learning_rate": 2.5096007189280538e-05, "loss": 0.0837, "step": 13026 }, { "epoch": 0.287053716527018, "grad_norm": 0.6718170642852783, "learning_rate": 2.5095215415654874e-05, "loss": 0.0762, "step": 13027 }, { "epoch": 0.28707575181653416, "grad_norm": 0.6836065053939819, "learning_rate": 2.5094423590608865e-05, "loss": 0.1243, "step": 13028 }, { "epoch": 0.28709778710605033, "grad_norm": 0.35300546884536743, "learning_rate": 2.509363171414654e-05, "loss": 0.0889, "step": 13029 }, { "epoch": 0.2871198223955665, "grad_norm": 0.4319070875644684, "learning_rate": 2.5092839786271938e-05, "loss": 0.0747, "step": 13030 }, { "epoch": 0.28714185768508266, "grad_norm": 1.6794860363006592, "learning_rate": 2.509204780698909e-05, "loss": 0.1056, "step": 13031 }, { "epoch": 0.2871638929745988, "grad_norm": 0.8139516115188599, "learning_rate": 2.5091255776302025e-05, "loss": 0.1198, "step": 13032 }, { "epoch": 0.287185928264115, "grad_norm": 0.7470147013664246, "learning_rate": 2.5090463694214795e-05, "loss": 0.0803, "step": 13033 }, { "epoch": 0.28720796355363115, "grad_norm": 0.7987405061721802, "learning_rate": 2.5089671560731412e-05, "loss": 0.0931, "step": 13034 }, { "epoch": 0.2872299988431473, "grad_norm": 0.5305077433586121, "learning_rate": 2.5088879375855928e-05, "loss": 0.0971, "step": 13035 }, { "epoch": 0.2872520341326635, "grad_norm": 0.9919933676719666, "learning_rate": 2.508808713959237e-05, "loss": 0.1435, "step": 13036 }, { "epoch": 0.28727406942217965, "grad_norm": 0.7758237719535828, "learning_rate": 2.5087294851944776e-05, "loss": 0.084, "step": 13037 }, { "epoch": 0.28729610471169575, "grad_norm": 0.49848833680152893, "learning_rate": 2.5086502512917172e-05, "loss": 0.0848, "step": 13038 }, { "epoch": 0.2873181400012119, "grad_norm": 0.8264907598495483, "learning_rate": 2.5085710122513616e-05, "loss": 0.098, "step": 13039 }, { "epoch": 0.2873401752907281, "grad_norm": 0.6916388273239136, "learning_rate": 2.5084917680738114e-05, "loss": 0.0823, "step": 13040 }, { "epoch": 0.28736221058024425, "grad_norm": 0.7259942889213562, "learning_rate": 2.5084125187594726e-05, "loss": 0.0968, "step": 13041 }, { "epoch": 0.2873842458697604, "grad_norm": 0.7289857268333435, "learning_rate": 2.5083332643087483e-05, "loss": 0.0992, "step": 13042 }, { "epoch": 0.2874062811592766, "grad_norm": 0.7172713875770569, "learning_rate": 2.508254004722042e-05, "loss": 0.0854, "step": 13043 }, { "epoch": 0.28742831644879274, "grad_norm": 0.7591555118560791, "learning_rate": 2.5081747399997566e-05, "loss": 0.0829, "step": 13044 }, { "epoch": 0.2874503517383089, "grad_norm": 1.3754934072494507, "learning_rate": 2.5080954701422973e-05, "loss": 0.1239, "step": 13045 }, { "epoch": 0.28747238702782507, "grad_norm": 1.3869214057922363, "learning_rate": 2.5080161951500672e-05, "loss": 0.1063, "step": 13046 }, { "epoch": 0.28749442231734124, "grad_norm": 0.6207695603370667, "learning_rate": 2.50793691502347e-05, "loss": 0.0936, "step": 13047 }, { "epoch": 0.2875164576068574, "grad_norm": 0.7901611328125, "learning_rate": 2.5078576297629094e-05, "loss": 0.0618, "step": 13048 }, { "epoch": 0.28753849289637357, "grad_norm": 0.7043701410293579, "learning_rate": 2.5077783393687896e-05, "loss": 0.0956, "step": 13049 }, { "epoch": 0.28756052818588973, "grad_norm": 0.7406294345855713, "learning_rate": 2.507699043841514e-05, "loss": 0.0636, "step": 13050 }, { "epoch": 0.28758256347540584, "grad_norm": 0.951724112033844, "learning_rate": 2.5076197431814873e-05, "loss": 0.1084, "step": 13051 }, { "epoch": 0.287604598764922, "grad_norm": 1.184381365776062, "learning_rate": 2.5075404373891125e-05, "loss": 0.1153, "step": 13052 }, { "epoch": 0.28762663405443817, "grad_norm": 0.7731248736381531, "learning_rate": 2.5074611264647942e-05, "loss": 0.0823, "step": 13053 }, { "epoch": 0.28764866934395433, "grad_norm": 0.8250216245651245, "learning_rate": 2.5073818104089365e-05, "loss": 0.0695, "step": 13054 }, { "epoch": 0.2876707046334705, "grad_norm": 0.847512423992157, "learning_rate": 2.5073024892219422e-05, "loss": 0.1208, "step": 13055 }, { "epoch": 0.28769273992298666, "grad_norm": 1.0424797534942627, "learning_rate": 2.5072231629042162e-05, "loss": 0.1176, "step": 13056 }, { "epoch": 0.2877147752125028, "grad_norm": 0.8538994789123535, "learning_rate": 2.507143831456163e-05, "loss": 0.1133, "step": 13057 }, { "epoch": 0.287736810502019, "grad_norm": 0.6863552331924438, "learning_rate": 2.5070644948781866e-05, "loss": 0.0784, "step": 13058 }, { "epoch": 0.28775884579153516, "grad_norm": 1.1131014823913574, "learning_rate": 2.50698515317069e-05, "loss": 0.1617, "step": 13059 }, { "epoch": 0.2877808810810513, "grad_norm": 0.5383844375610352, "learning_rate": 2.506905806334078e-05, "loss": 0.091, "step": 13060 }, { "epoch": 0.2878029163705675, "grad_norm": 0.7949021458625793, "learning_rate": 2.506826454368755e-05, "loss": 0.1195, "step": 13061 }, { "epoch": 0.28782495166008365, "grad_norm": 0.8903758525848389, "learning_rate": 2.506747097275125e-05, "loss": 0.101, "step": 13062 }, { "epoch": 0.28784698694959976, "grad_norm": 0.5544508695602417, "learning_rate": 2.506667735053592e-05, "loss": 0.0651, "step": 13063 }, { "epoch": 0.2878690222391159, "grad_norm": 0.9836490750312805, "learning_rate": 2.506588367704561e-05, "loss": 0.1372, "step": 13064 }, { "epoch": 0.2878910575286321, "grad_norm": 0.5669311881065369, "learning_rate": 2.5065089952284348e-05, "loss": 0.1022, "step": 13065 }, { "epoch": 0.28791309281814825, "grad_norm": 0.551837146282196, "learning_rate": 2.506429617625619e-05, "loss": 0.1261, "step": 13066 }, { "epoch": 0.2879351281076644, "grad_norm": 0.717289388179779, "learning_rate": 2.5063502348965174e-05, "loss": 0.1317, "step": 13067 }, { "epoch": 0.2879571633971806, "grad_norm": 0.9783852696418762, "learning_rate": 2.5062708470415345e-05, "loss": 0.1347, "step": 13068 }, { "epoch": 0.28797919868669675, "grad_norm": 0.8421875238418579, "learning_rate": 2.5061914540610745e-05, "loss": 0.0859, "step": 13069 }, { "epoch": 0.2880012339762129, "grad_norm": 0.5263852477073669, "learning_rate": 2.5061120559555418e-05, "loss": 0.1435, "step": 13070 }, { "epoch": 0.2880232692657291, "grad_norm": 0.9005296230316162, "learning_rate": 2.5060326527253407e-05, "loss": 0.0777, "step": 13071 }, { "epoch": 0.28804530455524524, "grad_norm": 1.13457453250885, "learning_rate": 2.505953244370876e-05, "loss": 0.0768, "step": 13072 }, { "epoch": 0.2880673398447614, "grad_norm": 1.0218911170959473, "learning_rate": 2.505873830892552e-05, "loss": 0.1005, "step": 13073 }, { "epoch": 0.28808937513427757, "grad_norm": 0.6986886262893677, "learning_rate": 2.505794412290774e-05, "loss": 0.1005, "step": 13074 }, { "epoch": 0.2881114104237937, "grad_norm": 2.2269322872161865, "learning_rate": 2.5057149885659444e-05, "loss": 0.0874, "step": 13075 }, { "epoch": 0.28813344571330984, "grad_norm": 1.0812711715698242, "learning_rate": 2.5056355597184696e-05, "loss": 0.0951, "step": 13076 }, { "epoch": 0.288155481002826, "grad_norm": 0.7648225426673889, "learning_rate": 2.5055561257487538e-05, "loss": 0.0834, "step": 13077 }, { "epoch": 0.2881775162923422, "grad_norm": 0.9926742315292358, "learning_rate": 2.5054766866572008e-05, "loss": 0.1015, "step": 13078 }, { "epoch": 0.28819955158185834, "grad_norm": 1.0415340662002563, "learning_rate": 2.505397242444217e-05, "loss": 0.1372, "step": 13079 }, { "epoch": 0.2882215868713745, "grad_norm": 0.4955728352069855, "learning_rate": 2.5053177931102048e-05, "loss": 0.1032, "step": 13080 }, { "epoch": 0.28824362216089067, "grad_norm": 1.4356275796890259, "learning_rate": 2.5052383386555704e-05, "loss": 0.0793, "step": 13081 }, { "epoch": 0.28826565745040683, "grad_norm": 0.7255608439445496, "learning_rate": 2.505158879080718e-05, "loss": 0.1162, "step": 13082 }, { "epoch": 0.288287692739923, "grad_norm": 0.8748085498809814, "learning_rate": 2.5050794143860527e-05, "loss": 0.0957, "step": 13083 }, { "epoch": 0.28830972802943916, "grad_norm": 0.8581655025482178, "learning_rate": 2.504999944571979e-05, "loss": 0.078, "step": 13084 }, { "epoch": 0.2883317633189553, "grad_norm": 0.6728262901306152, "learning_rate": 2.5049204696389013e-05, "loss": 0.0815, "step": 13085 }, { "epoch": 0.2883537986084715, "grad_norm": 0.44585803151130676, "learning_rate": 2.504840989587225e-05, "loss": 0.1079, "step": 13086 }, { "epoch": 0.28837583389798765, "grad_norm": 0.8922337293624878, "learning_rate": 2.5047615044173546e-05, "loss": 0.1016, "step": 13087 }, { "epoch": 0.28839786918750376, "grad_norm": 0.582802414894104, "learning_rate": 2.5046820141296947e-05, "loss": 0.0656, "step": 13088 }, { "epoch": 0.28841990447701993, "grad_norm": 0.8733104467391968, "learning_rate": 2.5046025187246515e-05, "loss": 0.1144, "step": 13089 }, { "epoch": 0.2884419397665361, "grad_norm": 0.8323185443878174, "learning_rate": 2.504523018202628e-05, "loss": 0.1048, "step": 13090 }, { "epoch": 0.28846397505605226, "grad_norm": 1.042291283607483, "learning_rate": 2.5044435125640305e-05, "loss": 0.1077, "step": 13091 }, { "epoch": 0.2884860103455684, "grad_norm": 0.5667807459831238, "learning_rate": 2.504364001809264e-05, "loss": 0.0946, "step": 13092 }, { "epoch": 0.2885080456350846, "grad_norm": 0.7622038125991821, "learning_rate": 2.5042844859387323e-05, "loss": 0.1219, "step": 13093 }, { "epoch": 0.28853008092460075, "grad_norm": 0.7694903016090393, "learning_rate": 2.5042049649528414e-05, "loss": 0.1149, "step": 13094 }, { "epoch": 0.2885521162141169, "grad_norm": 0.7384423613548279, "learning_rate": 2.5041254388519963e-05, "loss": 0.0834, "step": 13095 }, { "epoch": 0.2885741515036331, "grad_norm": 0.6879094839096069, "learning_rate": 2.5040459076366017e-05, "loss": 0.088, "step": 13096 }, { "epoch": 0.28859618679314925, "grad_norm": 0.9000089764595032, "learning_rate": 2.503966371307063e-05, "loss": 0.0913, "step": 13097 }, { "epoch": 0.2886182220826654, "grad_norm": 0.7870866656303406, "learning_rate": 2.503886829863785e-05, "loss": 0.0855, "step": 13098 }, { "epoch": 0.2886402573721816, "grad_norm": 0.676097571849823, "learning_rate": 2.5038072833071734e-05, "loss": 0.0802, "step": 13099 }, { "epoch": 0.2886622926616977, "grad_norm": 0.7406219244003296, "learning_rate": 2.503727731637633e-05, "loss": 0.1098, "step": 13100 }, { "epoch": 0.28868432795121385, "grad_norm": 1.0626113414764404, "learning_rate": 2.503648174855569e-05, "loss": 0.1088, "step": 13101 }, { "epoch": 0.28870636324073, "grad_norm": 0.6553846001625061, "learning_rate": 2.5035686129613862e-05, "loss": 0.0865, "step": 13102 }, { "epoch": 0.2887283985302462, "grad_norm": 0.7604628205299377, "learning_rate": 2.5034890459554906e-05, "loss": 0.0711, "step": 13103 }, { "epoch": 0.28875043381976234, "grad_norm": 0.6240234971046448, "learning_rate": 2.5034094738382874e-05, "loss": 0.0862, "step": 13104 }, { "epoch": 0.2887724691092785, "grad_norm": 0.7866238355636597, "learning_rate": 2.5033298966101813e-05, "loss": 0.1201, "step": 13105 }, { "epoch": 0.28879450439879467, "grad_norm": 0.6821600198745728, "learning_rate": 2.503250314271578e-05, "loss": 0.0995, "step": 13106 }, { "epoch": 0.28881653968831084, "grad_norm": 0.9319878220558167, "learning_rate": 2.503170726822883e-05, "loss": 0.1393, "step": 13107 }, { "epoch": 0.288838574977827, "grad_norm": 1.116322636604309, "learning_rate": 2.503091134264502e-05, "loss": 0.0963, "step": 13108 }, { "epoch": 0.28886061026734317, "grad_norm": 0.7212627530097961, "learning_rate": 2.5030115365968395e-05, "loss": 0.1, "step": 13109 }, { "epoch": 0.28888264555685933, "grad_norm": 0.5687845349311829, "learning_rate": 2.5029319338203017e-05, "loss": 0.0914, "step": 13110 }, { "epoch": 0.2889046808463755, "grad_norm": 0.5184164047241211, "learning_rate": 2.502852325935293e-05, "loss": 0.0814, "step": 13111 }, { "epoch": 0.2889267161358916, "grad_norm": 0.6454169750213623, "learning_rate": 2.50277271294222e-05, "loss": 0.0968, "step": 13112 }, { "epoch": 0.28894875142540777, "grad_norm": 0.7900346517562866, "learning_rate": 2.5026930948414878e-05, "loss": 0.0778, "step": 13113 }, { "epoch": 0.28897078671492393, "grad_norm": 0.6603078842163086, "learning_rate": 2.5026134716335025e-05, "loss": 0.1003, "step": 13114 }, { "epoch": 0.2889928220044401, "grad_norm": 0.6404696106910706, "learning_rate": 2.5025338433186686e-05, "loss": 0.1167, "step": 13115 }, { "epoch": 0.28901485729395626, "grad_norm": 1.337896466255188, "learning_rate": 2.5024542098973925e-05, "loss": 0.0885, "step": 13116 }, { "epoch": 0.2890368925834724, "grad_norm": 0.7997991442680359, "learning_rate": 2.5023745713700795e-05, "loss": 0.1095, "step": 13117 }, { "epoch": 0.2890589278729886, "grad_norm": 0.4668605327606201, "learning_rate": 2.502294927737135e-05, "loss": 0.0695, "step": 13118 }, { "epoch": 0.28908096316250476, "grad_norm": 1.0102702379226685, "learning_rate": 2.5022152789989657e-05, "loss": 0.1331, "step": 13119 }, { "epoch": 0.2891029984520209, "grad_norm": 0.6358599066734314, "learning_rate": 2.5021356251559756e-05, "loss": 0.0914, "step": 13120 }, { "epoch": 0.2891250337415371, "grad_norm": 0.7092490196228027, "learning_rate": 2.5020559662085722e-05, "loss": 0.0621, "step": 13121 }, { "epoch": 0.28914706903105325, "grad_norm": 0.9607323408126831, "learning_rate": 2.5019763021571594e-05, "loss": 0.1399, "step": 13122 }, { "epoch": 0.2891691043205694, "grad_norm": 0.7752434611320496, "learning_rate": 2.501896633002145e-05, "loss": 0.0649, "step": 13123 }, { "epoch": 0.2891911396100856, "grad_norm": 0.8242852687835693, "learning_rate": 2.5018169587439333e-05, "loss": 0.0958, "step": 13124 }, { "epoch": 0.2892131748996017, "grad_norm": 0.9419968724250793, "learning_rate": 2.501737279382931e-05, "loss": 0.1132, "step": 13125 }, { "epoch": 0.28923521018911785, "grad_norm": 0.9803864359855652, "learning_rate": 2.5016575949195433e-05, "loss": 0.1301, "step": 13126 }, { "epoch": 0.289257245478634, "grad_norm": 0.7928091883659363, "learning_rate": 2.5015779053541764e-05, "loss": 0.0892, "step": 13127 }, { "epoch": 0.2892792807681502, "grad_norm": 0.7332343459129333, "learning_rate": 2.5014982106872357e-05, "loss": 0.0918, "step": 13128 }, { "epoch": 0.28930131605766635, "grad_norm": 0.7852743864059448, "learning_rate": 2.5014185109191286e-05, "loss": 0.0923, "step": 13129 }, { "epoch": 0.2893233513471825, "grad_norm": 0.8243123292922974, "learning_rate": 2.5013388060502588e-05, "loss": 0.0911, "step": 13130 }, { "epoch": 0.2893453866366987, "grad_norm": 0.9635123610496521, "learning_rate": 2.5012590960810344e-05, "loss": 0.089, "step": 13131 }, { "epoch": 0.28936742192621484, "grad_norm": 0.8003509640693665, "learning_rate": 2.5011793810118602e-05, "loss": 0.1198, "step": 13132 }, { "epoch": 0.289389457215731, "grad_norm": 0.5374305844306946, "learning_rate": 2.5010996608431424e-05, "loss": 0.102, "step": 13133 }, { "epoch": 0.28941149250524717, "grad_norm": 0.8629404306411743, "learning_rate": 2.5010199355752873e-05, "loss": 0.0627, "step": 13134 }, { "epoch": 0.28943352779476333, "grad_norm": 0.7698205709457397, "learning_rate": 2.5009402052087007e-05, "loss": 0.1133, "step": 13135 }, { "epoch": 0.2894555630842795, "grad_norm": 1.118186593055725, "learning_rate": 2.5008604697437893e-05, "loss": 0.078, "step": 13136 }, { "epoch": 0.2894775983737956, "grad_norm": 0.7123792171478271, "learning_rate": 2.5007807291809586e-05, "loss": 0.0651, "step": 13137 }, { "epoch": 0.2894996336633118, "grad_norm": 0.8332819938659668, "learning_rate": 2.5007009835206146e-05, "loss": 0.0975, "step": 13138 }, { "epoch": 0.28952166895282794, "grad_norm": 0.8840906620025635, "learning_rate": 2.5006212327631647e-05, "loss": 0.0831, "step": 13139 }, { "epoch": 0.2895437042423441, "grad_norm": 0.8055788278579712, "learning_rate": 2.500541476909014e-05, "loss": 0.0951, "step": 13140 }, { "epoch": 0.28956573953186027, "grad_norm": 0.7735571265220642, "learning_rate": 2.5004617159585687e-05, "loss": 0.0935, "step": 13141 }, { "epoch": 0.28958777482137643, "grad_norm": 0.8181522488594055, "learning_rate": 2.5003819499122355e-05, "loss": 0.0768, "step": 13142 }, { "epoch": 0.2896098101108926, "grad_norm": 0.9860718846321106, "learning_rate": 2.5003021787704207e-05, "loss": 0.0962, "step": 13143 }, { "epoch": 0.28963184540040876, "grad_norm": 1.1828129291534424, "learning_rate": 2.50022240253353e-05, "loss": 0.0983, "step": 13144 }, { "epoch": 0.2896538806899249, "grad_norm": 0.5415358543395996, "learning_rate": 2.5001426212019708e-05, "loss": 0.0847, "step": 13145 }, { "epoch": 0.2896759159794411, "grad_norm": 0.5858107805252075, "learning_rate": 2.500062834776149e-05, "loss": 0.0668, "step": 13146 }, { "epoch": 0.28969795126895725, "grad_norm": 0.4286552369594574, "learning_rate": 2.4999830432564706e-05, "loss": 0.0739, "step": 13147 }, { "epoch": 0.2897199865584734, "grad_norm": 0.7024149298667908, "learning_rate": 2.4999032466433427e-05, "loss": 0.0965, "step": 13148 }, { "epoch": 0.28974202184798953, "grad_norm": 0.961618959903717, "learning_rate": 2.4998234449371707e-05, "loss": 0.0865, "step": 13149 }, { "epoch": 0.2897640571375057, "grad_norm": 0.7618271708488464, "learning_rate": 2.4997436381383622e-05, "loss": 0.1302, "step": 13150 }, { "epoch": 0.28978609242702186, "grad_norm": 0.6313585638999939, "learning_rate": 2.499663826247323e-05, "loss": 0.0904, "step": 13151 }, { "epoch": 0.289808127716538, "grad_norm": 0.8313382863998413, "learning_rate": 2.4995840092644606e-05, "loss": 0.0824, "step": 13152 }, { "epoch": 0.2898301630060542, "grad_norm": 0.9153732061386108, "learning_rate": 2.49950418719018e-05, "loss": 0.0974, "step": 13153 }, { "epoch": 0.28985219829557035, "grad_norm": 0.8074787855148315, "learning_rate": 2.499424360024889e-05, "loss": 0.1299, "step": 13154 }, { "epoch": 0.2898742335850865, "grad_norm": 0.6374593377113342, "learning_rate": 2.4993445277689936e-05, "loss": 0.1104, "step": 13155 }, { "epoch": 0.2898962688746027, "grad_norm": 0.8360535502433777, "learning_rate": 2.499264690422901e-05, "loss": 0.0899, "step": 13156 }, { "epoch": 0.28991830416411885, "grad_norm": 0.7988741397857666, "learning_rate": 2.499184847987017e-05, "loss": 0.0941, "step": 13157 }, { "epoch": 0.289940339453635, "grad_norm": 0.9987679123878479, "learning_rate": 2.4991050004617494e-05, "loss": 0.1327, "step": 13158 }, { "epoch": 0.2899623747431512, "grad_norm": 0.6725371479988098, "learning_rate": 2.4990251478475034e-05, "loss": 0.0705, "step": 13159 }, { "epoch": 0.28998441003266734, "grad_norm": 0.5921391248703003, "learning_rate": 2.4989452901446873e-05, "loss": 0.088, "step": 13160 }, { "epoch": 0.2900064453221835, "grad_norm": 1.0688574314117432, "learning_rate": 2.4988654273537067e-05, "loss": 0.0945, "step": 13161 }, { "epoch": 0.2900284806116996, "grad_norm": 0.7845801711082458, "learning_rate": 2.4987855594749688e-05, "loss": 0.1409, "step": 13162 }, { "epoch": 0.2900505159012158, "grad_norm": 0.7735200524330139, "learning_rate": 2.4987056865088806e-05, "loss": 0.1022, "step": 13163 }, { "epoch": 0.29007255119073194, "grad_norm": 0.8230414986610413, "learning_rate": 2.498625808455849e-05, "loss": 0.0934, "step": 13164 }, { "epoch": 0.2900945864802481, "grad_norm": 0.7667228579521179, "learning_rate": 2.4985459253162804e-05, "loss": 0.0902, "step": 13165 }, { "epoch": 0.29011662176976427, "grad_norm": 0.7879992723464966, "learning_rate": 2.4984660370905823e-05, "loss": 0.1207, "step": 13166 }, { "epoch": 0.29013865705928044, "grad_norm": 0.8634366989135742, "learning_rate": 2.4983861437791615e-05, "loss": 0.1259, "step": 13167 }, { "epoch": 0.2901606923487966, "grad_norm": 0.8072128891944885, "learning_rate": 2.4983062453824238e-05, "loss": 0.1295, "step": 13168 }, { "epoch": 0.29018272763831277, "grad_norm": 1.0087448358535767, "learning_rate": 2.4982263419007773e-05, "loss": 0.1234, "step": 13169 }, { "epoch": 0.29020476292782893, "grad_norm": 0.7203376293182373, "learning_rate": 2.498146433334629e-05, "loss": 0.0899, "step": 13170 }, { "epoch": 0.2902267982173451, "grad_norm": 0.6800669431686401, "learning_rate": 2.4980665196843854e-05, "loss": 0.1043, "step": 13171 }, { "epoch": 0.29024883350686126, "grad_norm": 1.0755560398101807, "learning_rate": 2.497986600950454e-05, "loss": 0.1419, "step": 13172 }, { "epoch": 0.2902708687963774, "grad_norm": 0.6456467509269714, "learning_rate": 2.497906677133242e-05, "loss": 0.1039, "step": 13173 }, { "epoch": 0.29029290408589353, "grad_norm": 0.6907275319099426, "learning_rate": 2.497826748233156e-05, "loss": 0.0709, "step": 13174 }, { "epoch": 0.2903149393754097, "grad_norm": 0.9178808927536011, "learning_rate": 2.4977468142506028e-05, "loss": 0.0843, "step": 13175 }, { "epoch": 0.29033697466492586, "grad_norm": 0.5725552439689636, "learning_rate": 2.4976668751859902e-05, "loss": 0.0782, "step": 13176 }, { "epoch": 0.290359009954442, "grad_norm": 0.9466739296913147, "learning_rate": 2.4975869310397257e-05, "loss": 0.0974, "step": 13177 }, { "epoch": 0.2903810452439582, "grad_norm": 0.7772063612937927, "learning_rate": 2.4975069818122157e-05, "loss": 0.1158, "step": 13178 }, { "epoch": 0.29040308053347436, "grad_norm": 1.0426393747329712, "learning_rate": 2.4974270275038676e-05, "loss": 0.1157, "step": 13179 }, { "epoch": 0.2904251158229905, "grad_norm": 1.035007119178772, "learning_rate": 2.497347068115089e-05, "loss": 0.1001, "step": 13180 }, { "epoch": 0.2904471511125067, "grad_norm": 0.5579033493995667, "learning_rate": 2.497267103646287e-05, "loss": 0.0763, "step": 13181 }, { "epoch": 0.29046918640202285, "grad_norm": 1.377934217453003, "learning_rate": 2.4971871340978688e-05, "loss": 0.0999, "step": 13182 }, { "epoch": 0.290491221691539, "grad_norm": 0.675349235534668, "learning_rate": 2.4971071594702418e-05, "loss": 0.0971, "step": 13183 }, { "epoch": 0.2905132569810552, "grad_norm": 0.5226446986198425, "learning_rate": 2.497027179763813e-05, "loss": 0.083, "step": 13184 }, { "epoch": 0.29053529227057134, "grad_norm": 0.5431139469146729, "learning_rate": 2.4969471949789912e-05, "loss": 0.0755, "step": 13185 }, { "epoch": 0.29055732756008745, "grad_norm": 0.5946769714355469, "learning_rate": 2.4968672051161817e-05, "loss": 0.0878, "step": 13186 }, { "epoch": 0.2905793628496036, "grad_norm": 0.8033459782600403, "learning_rate": 2.496787210175793e-05, "loss": 0.081, "step": 13187 }, { "epoch": 0.2906013981391198, "grad_norm": 0.748365581035614, "learning_rate": 2.4967072101582327e-05, "loss": 0.1367, "step": 13188 }, { "epoch": 0.29062343342863595, "grad_norm": 0.7426856756210327, "learning_rate": 2.4966272050639077e-05, "loss": 0.1107, "step": 13189 }, { "epoch": 0.2906454687181521, "grad_norm": 0.9108640551567078, "learning_rate": 2.496547194893227e-05, "loss": 0.1238, "step": 13190 }, { "epoch": 0.2906675040076683, "grad_norm": 0.726988673210144, "learning_rate": 2.496467179646596e-05, "loss": 0.1227, "step": 13191 }, { "epoch": 0.29068953929718444, "grad_norm": 0.705719530582428, "learning_rate": 2.496387159324424e-05, "loss": 0.0668, "step": 13192 }, { "epoch": 0.2907115745867006, "grad_norm": 0.6748814582824707, "learning_rate": 2.4963071339271172e-05, "loss": 0.0634, "step": 13193 }, { "epoch": 0.29073360987621677, "grad_norm": 0.6888374090194702, "learning_rate": 2.4962271034550844e-05, "loss": 0.1077, "step": 13194 }, { "epoch": 0.29075564516573293, "grad_norm": 1.2518157958984375, "learning_rate": 2.496147067908733e-05, "loss": 0.0968, "step": 13195 }, { "epoch": 0.2907776804552491, "grad_norm": 0.7723900079727173, "learning_rate": 2.49606702728847e-05, "loss": 0.0873, "step": 13196 }, { "epoch": 0.29079971574476526, "grad_norm": 0.6427212357521057, "learning_rate": 2.495986981594703e-05, "loss": 0.113, "step": 13197 }, { "epoch": 0.29082175103428143, "grad_norm": 0.6914231181144714, "learning_rate": 2.4959069308278412e-05, "loss": 0.1138, "step": 13198 }, { "epoch": 0.29084378632379754, "grad_norm": 0.5760478377342224, "learning_rate": 2.4958268749882907e-05, "loss": 0.1012, "step": 13199 }, { "epoch": 0.2908658216133137, "grad_norm": 1.1554687023162842, "learning_rate": 2.4957468140764603e-05, "loss": 0.1012, "step": 13200 }, { "epoch": 0.29088785690282987, "grad_norm": 1.2466187477111816, "learning_rate": 2.495666748092757e-05, "loss": 0.1443, "step": 13201 }, { "epoch": 0.29090989219234603, "grad_norm": 0.7817434072494507, "learning_rate": 2.4955866770375896e-05, "loss": 0.0754, "step": 13202 }, { "epoch": 0.2909319274818622, "grad_norm": 0.5937586426734924, "learning_rate": 2.495506600911365e-05, "loss": 0.0872, "step": 13203 }, { "epoch": 0.29095396277137836, "grad_norm": 0.562172532081604, "learning_rate": 2.4954265197144913e-05, "loss": 0.0673, "step": 13204 }, { "epoch": 0.2909759980608945, "grad_norm": 0.7035818099975586, "learning_rate": 2.4953464334473767e-05, "loss": 0.0924, "step": 13205 }, { "epoch": 0.2909980333504107, "grad_norm": 1.113997220993042, "learning_rate": 2.495266342110429e-05, "loss": 0.1142, "step": 13206 }, { "epoch": 0.29102006863992685, "grad_norm": 0.7374112606048584, "learning_rate": 2.495186245704056e-05, "loss": 0.0938, "step": 13207 }, { "epoch": 0.291042103929443, "grad_norm": 0.6279958486557007, "learning_rate": 2.4951061442286662e-05, "loss": 0.0864, "step": 13208 }, { "epoch": 0.2910641392189592, "grad_norm": 0.7493615746498108, "learning_rate": 2.495026037684667e-05, "loss": 0.1005, "step": 13209 }, { "epoch": 0.29108617450847535, "grad_norm": 0.7430419325828552, "learning_rate": 2.4949459260724663e-05, "loss": 0.075, "step": 13210 }, { "epoch": 0.29110820979799146, "grad_norm": 0.8353984951972961, "learning_rate": 2.4948658093924728e-05, "loss": 0.0915, "step": 13211 }, { "epoch": 0.2911302450875076, "grad_norm": 0.9108537435531616, "learning_rate": 2.4947856876450942e-05, "loss": 0.0878, "step": 13212 }, { "epoch": 0.2911522803770238, "grad_norm": 0.6605448722839355, "learning_rate": 2.4947055608307388e-05, "loss": 0.0753, "step": 13213 }, { "epoch": 0.29117431566653995, "grad_norm": 0.6716578602790833, "learning_rate": 2.4946254289498144e-05, "loss": 0.1107, "step": 13214 }, { "epoch": 0.2911963509560561, "grad_norm": 0.5534617900848389, "learning_rate": 2.494545292002729e-05, "loss": 0.1242, "step": 13215 }, { "epoch": 0.2912183862455723, "grad_norm": 1.1251139640808105, "learning_rate": 2.4944651499898914e-05, "loss": 0.1066, "step": 13216 }, { "epoch": 0.29124042153508845, "grad_norm": 0.9792665243148804, "learning_rate": 2.4943850029117096e-05, "loss": 0.0853, "step": 13217 }, { "epoch": 0.2912624568246046, "grad_norm": 0.8646479249000549, "learning_rate": 2.4943048507685917e-05, "loss": 0.1291, "step": 13218 }, { "epoch": 0.2912844921141208, "grad_norm": 0.6985100507736206, "learning_rate": 2.494224693560946e-05, "loss": 0.1147, "step": 13219 }, { "epoch": 0.29130652740363694, "grad_norm": 0.8697172999382019, "learning_rate": 2.4941445312891807e-05, "loss": 0.1571, "step": 13220 }, { "epoch": 0.2913285626931531, "grad_norm": 0.905368447303772, "learning_rate": 2.4940643639537042e-05, "loss": 0.1009, "step": 13221 }, { "epoch": 0.29135059798266927, "grad_norm": 0.8275235891342163, "learning_rate": 2.493984191554925e-05, "loss": 0.1035, "step": 13222 }, { "epoch": 0.2913726332721854, "grad_norm": 0.6830988526344299, "learning_rate": 2.493904014093251e-05, "loss": 0.1098, "step": 13223 }, { "epoch": 0.29139466856170154, "grad_norm": 0.6174512505531311, "learning_rate": 2.4938238315690913e-05, "loss": 0.1171, "step": 13224 }, { "epoch": 0.2914167038512177, "grad_norm": 0.5764219164848328, "learning_rate": 2.4937436439828537e-05, "loss": 0.1101, "step": 13225 }, { "epoch": 0.29143873914073387, "grad_norm": 0.5631698369979858, "learning_rate": 2.493663451334947e-05, "loss": 0.1058, "step": 13226 }, { "epoch": 0.29146077443025004, "grad_norm": 0.5605459213256836, "learning_rate": 2.4935832536257792e-05, "loss": 0.1001, "step": 13227 }, { "epoch": 0.2914828097197662, "grad_norm": 0.5626515746116638, "learning_rate": 2.493503050855759e-05, "loss": 0.1294, "step": 13228 }, { "epoch": 0.29150484500928237, "grad_norm": 1.068180799484253, "learning_rate": 2.4934228430252953e-05, "loss": 0.087, "step": 13229 }, { "epoch": 0.29152688029879853, "grad_norm": 0.5216764211654663, "learning_rate": 2.493342630134796e-05, "loss": 0.1118, "step": 13230 }, { "epoch": 0.2915489155883147, "grad_norm": 0.5464177131652832, "learning_rate": 2.4932624121846704e-05, "loss": 0.0818, "step": 13231 }, { "epoch": 0.29157095087783086, "grad_norm": 0.6836860775947571, "learning_rate": 2.4931821891753266e-05, "loss": 0.1166, "step": 13232 }, { "epoch": 0.291592986167347, "grad_norm": 0.9148392677307129, "learning_rate": 2.493101961107173e-05, "loss": 0.104, "step": 13233 }, { "epoch": 0.2916150214568632, "grad_norm": 0.6597577333450317, "learning_rate": 2.493021727980619e-05, "loss": 0.0787, "step": 13234 }, { "epoch": 0.29163705674637935, "grad_norm": 0.48780345916748047, "learning_rate": 2.4929414897960726e-05, "loss": 0.1131, "step": 13235 }, { "epoch": 0.29165909203589546, "grad_norm": 0.30513593554496765, "learning_rate": 2.492861246553943e-05, "loss": 0.0927, "step": 13236 }, { "epoch": 0.2916811273254116, "grad_norm": 0.7669328451156616, "learning_rate": 2.492780998254638e-05, "loss": 0.0949, "step": 13237 }, { "epoch": 0.2917031626149278, "grad_norm": 0.7039342522621155, "learning_rate": 2.4927007448985676e-05, "loss": 0.0618, "step": 13238 }, { "epoch": 0.29172519790444396, "grad_norm": 0.7617730498313904, "learning_rate": 2.49262048648614e-05, "loss": 0.0972, "step": 13239 }, { "epoch": 0.2917472331939601, "grad_norm": 0.7104114890098572, "learning_rate": 2.4925402230177638e-05, "loss": 0.0878, "step": 13240 }, { "epoch": 0.2917692684834763, "grad_norm": 0.7750684022903442, "learning_rate": 2.4924599544938483e-05, "loss": 0.1011, "step": 13241 }, { "epoch": 0.29179130377299245, "grad_norm": 0.8766257166862488, "learning_rate": 2.4923796809148016e-05, "loss": 0.0855, "step": 13242 }, { "epoch": 0.2918133390625086, "grad_norm": 0.7253164649009705, "learning_rate": 2.492299402281033e-05, "loss": 0.0752, "step": 13243 }, { "epoch": 0.2918353743520248, "grad_norm": 0.7836902141571045, "learning_rate": 2.4922191185929518e-05, "loss": 0.1346, "step": 13244 }, { "epoch": 0.29185740964154094, "grad_norm": 0.7392383217811584, "learning_rate": 2.4921388298509666e-05, "loss": 0.1194, "step": 13245 }, { "epoch": 0.2918794449310571, "grad_norm": 0.8794757723808289, "learning_rate": 2.492058536055486e-05, "loss": 0.1211, "step": 13246 }, { "epoch": 0.2919014802205733, "grad_norm": 0.5745155811309814, "learning_rate": 2.4919782372069196e-05, "loss": 0.0785, "step": 13247 }, { "epoch": 0.2919235155100894, "grad_norm": 0.7899231910705566, "learning_rate": 2.491897933305676e-05, "loss": 0.1412, "step": 13248 }, { "epoch": 0.29194555079960555, "grad_norm": 0.7594380974769592, "learning_rate": 2.4918176243521636e-05, "loss": 0.0681, "step": 13249 }, { "epoch": 0.2919675860891217, "grad_norm": 0.6996287107467651, "learning_rate": 2.4917373103467933e-05, "loss": 0.0686, "step": 13250 }, { "epoch": 0.2919896213786379, "grad_norm": 1.03275465965271, "learning_rate": 2.4916569912899723e-05, "loss": 0.1187, "step": 13251 }, { "epoch": 0.29201165666815404, "grad_norm": 0.5145279169082642, "learning_rate": 2.491576667182111e-05, "loss": 0.0716, "step": 13252 }, { "epoch": 0.2920336919576702, "grad_norm": 0.5215493440628052, "learning_rate": 2.4914963380236177e-05, "loss": 0.076, "step": 13253 }, { "epoch": 0.29205572724718637, "grad_norm": 0.8056696653366089, "learning_rate": 2.491416003814902e-05, "loss": 0.0786, "step": 13254 }, { "epoch": 0.29207776253670253, "grad_norm": 0.7234950661659241, "learning_rate": 2.491335664556373e-05, "loss": 0.1016, "step": 13255 }, { "epoch": 0.2920997978262187, "grad_norm": 0.8065552711486816, "learning_rate": 2.4912553202484396e-05, "loss": 0.0636, "step": 13256 }, { "epoch": 0.29212183311573486, "grad_norm": 1.456178069114685, "learning_rate": 2.4911749708915116e-05, "loss": 0.1143, "step": 13257 }, { "epoch": 0.29214386840525103, "grad_norm": 2.6278467178344727, "learning_rate": 2.4910946164859976e-05, "loss": 0.1297, "step": 13258 }, { "epoch": 0.2921659036947672, "grad_norm": 0.821168065071106, "learning_rate": 2.4910142570323074e-05, "loss": 0.0916, "step": 13259 }, { "epoch": 0.29218793898428336, "grad_norm": 0.6371126770973206, "learning_rate": 2.4909338925308505e-05, "loss": 0.1282, "step": 13260 }, { "epoch": 0.29220997427379947, "grad_norm": 0.8817780613899231, "learning_rate": 2.4908535229820357e-05, "loss": 0.1087, "step": 13261 }, { "epoch": 0.29223200956331563, "grad_norm": 0.5147528648376465, "learning_rate": 2.4907731483862723e-05, "loss": 0.0944, "step": 13262 }, { "epoch": 0.2922540448528318, "grad_norm": 0.5960580706596375, "learning_rate": 2.4906927687439703e-05, "loss": 0.0984, "step": 13263 }, { "epoch": 0.29227608014234796, "grad_norm": 1.0857644081115723, "learning_rate": 2.4906123840555386e-05, "loss": 0.1026, "step": 13264 }, { "epoch": 0.2922981154318641, "grad_norm": 0.5747016668319702, "learning_rate": 2.4905319943213873e-05, "loss": 0.0852, "step": 13265 }, { "epoch": 0.2923201507213803, "grad_norm": 0.7214599847793579, "learning_rate": 2.490451599541924e-05, "loss": 0.1194, "step": 13266 }, { "epoch": 0.29234218601089645, "grad_norm": 0.6673681139945984, "learning_rate": 2.490371199717561e-05, "loss": 0.0907, "step": 13267 }, { "epoch": 0.2923642213004126, "grad_norm": 0.7892741560935974, "learning_rate": 2.4902907948487058e-05, "loss": 0.1269, "step": 13268 }, { "epoch": 0.2923862565899288, "grad_norm": 1.0734831094741821, "learning_rate": 2.490210384935769e-05, "loss": 0.1161, "step": 13269 }, { "epoch": 0.29240829187944495, "grad_norm": 0.5369725823402405, "learning_rate": 2.4901299699791593e-05, "loss": 0.0906, "step": 13270 }, { "epoch": 0.2924303271689611, "grad_norm": 0.832740068435669, "learning_rate": 2.4900495499792864e-05, "loss": 0.0878, "step": 13271 }, { "epoch": 0.2924523624584773, "grad_norm": 0.5206719636917114, "learning_rate": 2.489969124936561e-05, "loss": 0.083, "step": 13272 }, { "epoch": 0.2924743977479934, "grad_norm": 1.10873281955719, "learning_rate": 2.4898886948513916e-05, "loss": 0.1136, "step": 13273 }, { "epoch": 0.29249643303750955, "grad_norm": 0.6126641631126404, "learning_rate": 2.4898082597241883e-05, "loss": 0.0659, "step": 13274 }, { "epoch": 0.2925184683270257, "grad_norm": 0.9109814763069153, "learning_rate": 2.4897278195553608e-05, "loss": 0.1164, "step": 13275 }, { "epoch": 0.2925405036165419, "grad_norm": 0.9610476493835449, "learning_rate": 2.4896473743453185e-05, "loss": 0.0892, "step": 13276 }, { "epoch": 0.29256253890605805, "grad_norm": 0.8699972629547119, "learning_rate": 2.4895669240944717e-05, "loss": 0.075, "step": 13277 }, { "epoch": 0.2925845741955742, "grad_norm": 0.6726310849189758, "learning_rate": 2.48948646880323e-05, "loss": 0.0832, "step": 13278 }, { "epoch": 0.2926066094850904, "grad_norm": 0.865817666053772, "learning_rate": 2.489406008472003e-05, "loss": 0.1387, "step": 13279 }, { "epoch": 0.29262864477460654, "grad_norm": 0.6345837712287903, "learning_rate": 2.4893255431012005e-05, "loss": 0.0943, "step": 13280 }, { "epoch": 0.2926506800641227, "grad_norm": 0.6980109810829163, "learning_rate": 2.4892450726912324e-05, "loss": 0.0925, "step": 13281 }, { "epoch": 0.29267271535363887, "grad_norm": 0.7058621644973755, "learning_rate": 2.489164597242509e-05, "loss": 0.1354, "step": 13282 }, { "epoch": 0.29269475064315503, "grad_norm": 0.7740260362625122, "learning_rate": 2.4890841167554397e-05, "loss": 0.1146, "step": 13283 }, { "epoch": 0.2927167859326712, "grad_norm": 0.7783434987068176, "learning_rate": 2.4890036312304347e-05, "loss": 0.0897, "step": 13284 }, { "epoch": 0.2927388212221873, "grad_norm": 0.6157234907150269, "learning_rate": 2.4889231406679034e-05, "loss": 0.0575, "step": 13285 }, { "epoch": 0.29276085651170347, "grad_norm": 0.7013765573501587, "learning_rate": 2.488842645068257e-05, "loss": 0.079, "step": 13286 }, { "epoch": 0.29278289180121964, "grad_norm": 0.6774924993515015, "learning_rate": 2.4887621444319047e-05, "loss": 0.0599, "step": 13287 }, { "epoch": 0.2928049270907358, "grad_norm": 0.6614856719970703, "learning_rate": 2.4886816387592564e-05, "loss": 0.103, "step": 13288 }, { "epoch": 0.29282696238025197, "grad_norm": 0.46915391087532043, "learning_rate": 2.488601128050722e-05, "loss": 0.0798, "step": 13289 }, { "epoch": 0.29284899766976813, "grad_norm": 0.884795069694519, "learning_rate": 2.488520612306712e-05, "loss": 0.0942, "step": 13290 }, { "epoch": 0.2928710329592843, "grad_norm": 0.7260774374008179, "learning_rate": 2.4884400915276368e-05, "loss": 0.1169, "step": 13291 }, { "epoch": 0.29289306824880046, "grad_norm": 0.7795673608779907, "learning_rate": 2.4883595657139056e-05, "loss": 0.0989, "step": 13292 }, { "epoch": 0.2929151035383166, "grad_norm": 1.033758521080017, "learning_rate": 2.4882790348659298e-05, "loss": 0.0975, "step": 13293 }, { "epoch": 0.2929371388278328, "grad_norm": 0.517703115940094, "learning_rate": 2.4881984989841187e-05, "loss": 0.1023, "step": 13294 }, { "epoch": 0.29295917411734895, "grad_norm": 0.9822458028793335, "learning_rate": 2.4881179580688825e-05, "loss": 0.1036, "step": 13295 }, { "epoch": 0.2929812094068651, "grad_norm": 0.49740907549858093, "learning_rate": 2.488037412120632e-05, "loss": 0.094, "step": 13296 }, { "epoch": 0.2930032446963813, "grad_norm": 0.6272714138031006, "learning_rate": 2.4879568611397764e-05, "loss": 0.0919, "step": 13297 }, { "epoch": 0.2930252799858974, "grad_norm": 0.46428412199020386, "learning_rate": 2.4878763051267277e-05, "loss": 0.0752, "step": 13298 }, { "epoch": 0.29304731527541356, "grad_norm": 0.8696837425231934, "learning_rate": 2.4877957440818945e-05, "loss": 0.1394, "step": 13299 }, { "epoch": 0.2930693505649297, "grad_norm": 0.5341225862503052, "learning_rate": 2.4877151780056882e-05, "loss": 0.0904, "step": 13300 }, { "epoch": 0.2930913858544459, "grad_norm": 1.3118209838867188, "learning_rate": 2.4876346068985186e-05, "loss": 0.1241, "step": 13301 }, { "epoch": 0.29311342114396205, "grad_norm": 0.7993959784507751, "learning_rate": 2.4875540307607965e-05, "loss": 0.0947, "step": 13302 }, { "epoch": 0.2931354564334782, "grad_norm": 0.5478119850158691, "learning_rate": 2.4874734495929324e-05, "loss": 0.1214, "step": 13303 }, { "epoch": 0.2931574917229944, "grad_norm": 0.5788471698760986, "learning_rate": 2.4873928633953365e-05, "loss": 0.1313, "step": 13304 }, { "epoch": 0.29317952701251054, "grad_norm": 0.7940206527709961, "learning_rate": 2.487312272168419e-05, "loss": 0.0638, "step": 13305 }, { "epoch": 0.2932015623020267, "grad_norm": 0.5630128383636475, "learning_rate": 2.487231675912591e-05, "loss": 0.0745, "step": 13306 }, { "epoch": 0.2932235975915429, "grad_norm": 0.6056554317474365, "learning_rate": 2.4871510746282618e-05, "loss": 0.0922, "step": 13307 }, { "epoch": 0.29324563288105904, "grad_norm": 0.8631853461265564, "learning_rate": 2.4870704683158438e-05, "loss": 0.09, "step": 13308 }, { "epoch": 0.2932676681705752, "grad_norm": 0.5297254920005798, "learning_rate": 2.4869898569757462e-05, "loss": 0.0399, "step": 13309 }, { "epoch": 0.2932897034600913, "grad_norm": 0.771430253982544, "learning_rate": 2.4869092406083797e-05, "loss": 0.1336, "step": 13310 }, { "epoch": 0.2933117387496075, "grad_norm": 0.4904540479183197, "learning_rate": 2.486828619214156e-05, "loss": 0.0867, "step": 13311 }, { "epoch": 0.29333377403912364, "grad_norm": 0.8643612861633301, "learning_rate": 2.4867479927934844e-05, "loss": 0.0837, "step": 13312 }, { "epoch": 0.2933558093286398, "grad_norm": 0.9652087688446045, "learning_rate": 2.4866673613467762e-05, "loss": 0.1107, "step": 13313 }, { "epoch": 0.29337784461815597, "grad_norm": 0.8015756607055664, "learning_rate": 2.486586724874442e-05, "loss": 0.1005, "step": 13314 }, { "epoch": 0.29339987990767213, "grad_norm": 0.8086035251617432, "learning_rate": 2.486506083376893e-05, "loss": 0.0979, "step": 13315 }, { "epoch": 0.2934219151971883, "grad_norm": 0.9420755505561829, "learning_rate": 2.4864254368545386e-05, "loss": 0.1101, "step": 13316 }, { "epoch": 0.29344395048670446, "grad_norm": 1.2011586427688599, "learning_rate": 2.486344785307791e-05, "loss": 0.099, "step": 13317 }, { "epoch": 0.29346598577622063, "grad_norm": 0.47952646017074585, "learning_rate": 2.4862641287370605e-05, "loss": 0.0528, "step": 13318 }, { "epoch": 0.2934880210657368, "grad_norm": 1.0491048097610474, "learning_rate": 2.486183467142758e-05, "loss": 0.0913, "step": 13319 }, { "epoch": 0.29351005635525296, "grad_norm": 0.7223345041275024, "learning_rate": 2.4861028005252944e-05, "loss": 0.0892, "step": 13320 }, { "epoch": 0.2935320916447691, "grad_norm": 0.694373607635498, "learning_rate": 2.4860221288850802e-05, "loss": 0.059, "step": 13321 }, { "epoch": 0.29355412693428523, "grad_norm": 0.9109083414077759, "learning_rate": 2.4859414522225265e-05, "loss": 0.1009, "step": 13322 }, { "epoch": 0.2935761622238014, "grad_norm": 0.6262505650520325, "learning_rate": 2.4858607705380445e-05, "loss": 0.0915, "step": 13323 }, { "epoch": 0.29359819751331756, "grad_norm": 0.5481124520301819, "learning_rate": 2.4857800838320445e-05, "loss": 0.1216, "step": 13324 }, { "epoch": 0.2936202328028337, "grad_norm": 0.8537718057632446, "learning_rate": 2.4856993921049383e-05, "loss": 0.127, "step": 13325 }, { "epoch": 0.2936422680923499, "grad_norm": 0.4725101888179779, "learning_rate": 2.4856186953571365e-05, "loss": 0.0786, "step": 13326 }, { "epoch": 0.29366430338186605, "grad_norm": 2.5377883911132812, "learning_rate": 2.4855379935890503e-05, "loss": 0.1064, "step": 13327 }, { "epoch": 0.2936863386713822, "grad_norm": 0.5294045209884644, "learning_rate": 2.48545728680109e-05, "loss": 0.1285, "step": 13328 }, { "epoch": 0.2937083739608984, "grad_norm": 1.198262095451355, "learning_rate": 2.485376574993668e-05, "loss": 0.1193, "step": 13329 }, { "epoch": 0.29373040925041455, "grad_norm": 0.6762762069702148, "learning_rate": 2.4852958581671944e-05, "loss": 0.0959, "step": 13330 }, { "epoch": 0.2937524445399307, "grad_norm": 0.7066066265106201, "learning_rate": 2.4852151363220804e-05, "loss": 0.0789, "step": 13331 }, { "epoch": 0.2937744798294469, "grad_norm": 0.48661744594573975, "learning_rate": 2.4851344094587377e-05, "loss": 0.0594, "step": 13332 }, { "epoch": 0.29379651511896304, "grad_norm": 1.1737463474273682, "learning_rate": 2.4850536775775773e-05, "loss": 0.1289, "step": 13333 }, { "epoch": 0.2938185504084792, "grad_norm": 0.9111899137496948, "learning_rate": 2.4849729406790103e-05, "loss": 0.1145, "step": 13334 }, { "epoch": 0.2938405856979953, "grad_norm": 0.588746964931488, "learning_rate": 2.4848921987634473e-05, "loss": 0.1394, "step": 13335 }, { "epoch": 0.2938626209875115, "grad_norm": 0.9240061044692993, "learning_rate": 2.4848114518313008e-05, "loss": 0.0911, "step": 13336 }, { "epoch": 0.29388465627702764, "grad_norm": 0.7073052525520325, "learning_rate": 2.484730699882981e-05, "loss": 0.062, "step": 13337 }, { "epoch": 0.2939066915665438, "grad_norm": 0.9299662113189697, "learning_rate": 2.4846499429189006e-05, "loss": 0.0637, "step": 13338 }, { "epoch": 0.29392872685606, "grad_norm": 1.296303153038025, "learning_rate": 2.4845691809394693e-05, "loss": 0.1192, "step": 13339 }, { "epoch": 0.29395076214557614, "grad_norm": 0.553118109703064, "learning_rate": 2.4844884139450994e-05, "loss": 0.0629, "step": 13340 }, { "epoch": 0.2939727974350923, "grad_norm": 0.9919127821922302, "learning_rate": 2.484407641936202e-05, "loss": 0.1363, "step": 13341 }, { "epoch": 0.29399483272460847, "grad_norm": 0.6068598628044128, "learning_rate": 2.4843268649131887e-05, "loss": 0.1086, "step": 13342 }, { "epoch": 0.29401686801412463, "grad_norm": 0.7090914845466614, "learning_rate": 2.4842460828764705e-05, "loss": 0.1034, "step": 13343 }, { "epoch": 0.2940389033036408, "grad_norm": 0.8498724699020386, "learning_rate": 2.4841652958264594e-05, "loss": 0.0896, "step": 13344 }, { "epoch": 0.29406093859315696, "grad_norm": 0.8248289823532104, "learning_rate": 2.484084503763567e-05, "loss": 0.1176, "step": 13345 }, { "epoch": 0.2940829738826731, "grad_norm": 0.6356966495513916, "learning_rate": 2.484003706688204e-05, "loss": 0.0861, "step": 13346 }, { "epoch": 0.29410500917218924, "grad_norm": 1.3748977184295654, "learning_rate": 2.4839229046007828e-05, "loss": 0.089, "step": 13347 }, { "epoch": 0.2941270444617054, "grad_norm": 0.7012276649475098, "learning_rate": 2.4838420975017146e-05, "loss": 0.0782, "step": 13348 }, { "epoch": 0.29414907975122156, "grad_norm": 0.8211236596107483, "learning_rate": 2.483761285391411e-05, "loss": 0.0742, "step": 13349 }, { "epoch": 0.29417111504073773, "grad_norm": 0.8116015195846558, "learning_rate": 2.4836804682702833e-05, "loss": 0.0986, "step": 13350 }, { "epoch": 0.2941931503302539, "grad_norm": 0.746192216873169, "learning_rate": 2.4835996461387442e-05, "loss": 0.1003, "step": 13351 }, { "epoch": 0.29421518561977006, "grad_norm": 0.7580350041389465, "learning_rate": 2.4835188189972037e-05, "loss": 0.1148, "step": 13352 }, { "epoch": 0.2942372209092862, "grad_norm": 0.7846686244010925, "learning_rate": 2.4834379868460753e-05, "loss": 0.093, "step": 13353 }, { "epoch": 0.2942592561988024, "grad_norm": 0.8600572347640991, "learning_rate": 2.4833571496857695e-05, "loss": 0.0962, "step": 13354 }, { "epoch": 0.29428129148831855, "grad_norm": 0.5528723001480103, "learning_rate": 2.4832763075166984e-05, "loss": 0.127, "step": 13355 }, { "epoch": 0.2943033267778347, "grad_norm": 0.7148142457008362, "learning_rate": 2.483195460339274e-05, "loss": 0.0813, "step": 13356 }, { "epoch": 0.2943253620673509, "grad_norm": 0.8612674474716187, "learning_rate": 2.483114608153908e-05, "loss": 0.1015, "step": 13357 }, { "epoch": 0.29434739735686705, "grad_norm": 0.525995135307312, "learning_rate": 2.483033750961012e-05, "loss": 0.0838, "step": 13358 }, { "epoch": 0.29436943264638316, "grad_norm": 0.5301322340965271, "learning_rate": 2.4829528887609976e-05, "loss": 0.0784, "step": 13359 }, { "epoch": 0.2943914679358993, "grad_norm": 1.472065806388855, "learning_rate": 2.4828720215542773e-05, "loss": 0.1208, "step": 13360 }, { "epoch": 0.2944135032254155, "grad_norm": 0.8523470759391785, "learning_rate": 2.4827911493412626e-05, "loss": 0.1036, "step": 13361 }, { "epoch": 0.29443553851493165, "grad_norm": 0.8483055233955383, "learning_rate": 2.482710272122366e-05, "loss": 0.1002, "step": 13362 }, { "epoch": 0.2944575738044478, "grad_norm": 0.5097438097000122, "learning_rate": 2.4826293898979984e-05, "loss": 0.0656, "step": 13363 }, { "epoch": 0.294479609093964, "grad_norm": 0.6699212193489075, "learning_rate": 2.4825485026685726e-05, "loss": 0.0982, "step": 13364 }, { "epoch": 0.29450164438348014, "grad_norm": 0.4340676963329315, "learning_rate": 2.4824676104345007e-05, "loss": 0.1055, "step": 13365 }, { "epoch": 0.2945236796729963, "grad_norm": 0.6343974471092224, "learning_rate": 2.482386713196194e-05, "loss": 0.0744, "step": 13366 }, { "epoch": 0.2945457149625125, "grad_norm": 1.1476367712020874, "learning_rate": 2.482305810954065e-05, "loss": 0.0911, "step": 13367 }, { "epoch": 0.29456775025202864, "grad_norm": 0.7338737845420837, "learning_rate": 2.482224903708526e-05, "loss": 0.072, "step": 13368 }, { "epoch": 0.2945897855415448, "grad_norm": 0.6683822870254517, "learning_rate": 2.4821439914599887e-05, "loss": 0.1246, "step": 13369 }, { "epoch": 0.29461182083106097, "grad_norm": 0.8179154992103577, "learning_rate": 2.4820630742088658e-05, "loss": 0.1, "step": 13370 }, { "epoch": 0.29463385612057713, "grad_norm": 0.7942855358123779, "learning_rate": 2.4819821519555685e-05, "loss": 0.1204, "step": 13371 }, { "epoch": 0.29465589141009324, "grad_norm": 0.6130000352859497, "learning_rate": 2.4819012247005094e-05, "loss": 0.0701, "step": 13372 }, { "epoch": 0.2946779266996094, "grad_norm": 0.6489866971969604, "learning_rate": 2.4818202924441013e-05, "loss": 0.1058, "step": 13373 }, { "epoch": 0.29469996198912557, "grad_norm": 0.7248372435569763, "learning_rate": 2.481739355186756e-05, "loss": 0.0889, "step": 13374 }, { "epoch": 0.29472199727864173, "grad_norm": 0.7587376832962036, "learning_rate": 2.4816584129288854e-05, "loss": 0.0701, "step": 13375 }, { "epoch": 0.2947440325681579, "grad_norm": 0.5261807441711426, "learning_rate": 2.481577465670902e-05, "loss": 0.1051, "step": 13376 }, { "epoch": 0.29476606785767406, "grad_norm": 0.9044821858406067, "learning_rate": 2.4814965134132187e-05, "loss": 0.1132, "step": 13377 }, { "epoch": 0.29478810314719023, "grad_norm": 0.6760430932044983, "learning_rate": 2.4814155561562472e-05, "loss": 0.0992, "step": 13378 }, { "epoch": 0.2948101384367064, "grad_norm": 0.6116646528244019, "learning_rate": 2.4813345939003996e-05, "loss": 0.0664, "step": 13379 }, { "epoch": 0.29483217372622256, "grad_norm": 0.7316461205482483, "learning_rate": 2.481253626646089e-05, "loss": 0.0526, "step": 13380 }, { "epoch": 0.2948542090157387, "grad_norm": 0.6222548484802246, "learning_rate": 2.4811726543937274e-05, "loss": 0.087, "step": 13381 }, { "epoch": 0.2948762443052549, "grad_norm": 0.7300085425376892, "learning_rate": 2.481091677143727e-05, "loss": 0.0915, "step": 13382 }, { "epoch": 0.29489827959477105, "grad_norm": 0.8538649082183838, "learning_rate": 2.481010694896501e-05, "loss": 0.1129, "step": 13383 }, { "epoch": 0.29492031488428716, "grad_norm": 1.080915093421936, "learning_rate": 2.4809297076524617e-05, "loss": 0.1055, "step": 13384 }, { "epoch": 0.2949423501738033, "grad_norm": 0.6326948404312134, "learning_rate": 2.480848715412021e-05, "loss": 0.0639, "step": 13385 }, { "epoch": 0.2949643854633195, "grad_norm": 0.7695247530937195, "learning_rate": 2.4807677181755917e-05, "loss": 0.074, "step": 13386 }, { "epoch": 0.29498642075283565, "grad_norm": 0.7136578559875488, "learning_rate": 2.4806867159435867e-05, "loss": 0.095, "step": 13387 }, { "epoch": 0.2950084560423518, "grad_norm": 0.7205800414085388, "learning_rate": 2.480605708716418e-05, "loss": 0.09, "step": 13388 }, { "epoch": 0.295030491331868, "grad_norm": 0.334897518157959, "learning_rate": 2.4805246964944995e-05, "loss": 0.0847, "step": 13389 }, { "epoch": 0.29505252662138415, "grad_norm": 1.128862738609314, "learning_rate": 2.480443679278242e-05, "loss": 0.0787, "step": 13390 }, { "epoch": 0.2950745619109003, "grad_norm": 1.2969499826431274, "learning_rate": 2.4803626570680597e-05, "loss": 0.0956, "step": 13391 }, { "epoch": 0.2950965972004165, "grad_norm": 0.897212564945221, "learning_rate": 2.480281629864364e-05, "loss": 0.1209, "step": 13392 }, { "epoch": 0.29511863248993264, "grad_norm": 0.4547201693058014, "learning_rate": 2.4802005976675688e-05, "loss": 0.1029, "step": 13393 }, { "epoch": 0.2951406677794488, "grad_norm": 0.6630011200904846, "learning_rate": 2.4801195604780862e-05, "loss": 0.0857, "step": 13394 }, { "epoch": 0.29516270306896497, "grad_norm": 0.7584981322288513, "learning_rate": 2.4800385182963294e-05, "loss": 0.067, "step": 13395 }, { "epoch": 0.2951847383584811, "grad_norm": 0.8467617630958557, "learning_rate": 2.4799574711227102e-05, "loss": 0.0788, "step": 13396 }, { "epoch": 0.29520677364799724, "grad_norm": 0.3400491774082184, "learning_rate": 2.4798764189576425e-05, "loss": 0.1196, "step": 13397 }, { "epoch": 0.2952288089375134, "grad_norm": 0.6824998259544373, "learning_rate": 2.479795361801539e-05, "loss": 0.0738, "step": 13398 }, { "epoch": 0.2952508442270296, "grad_norm": 0.7093075513839722, "learning_rate": 2.4797142996548125e-05, "loss": 0.0803, "step": 13399 }, { "epoch": 0.29527287951654574, "grad_norm": 0.6997154355049133, "learning_rate": 2.479633232517875e-05, "loss": 0.1079, "step": 13400 }, { "epoch": 0.2952949148060619, "grad_norm": 0.9943286180496216, "learning_rate": 2.4795521603911405e-05, "loss": 0.099, "step": 13401 }, { "epoch": 0.29531695009557807, "grad_norm": 1.5362014770507812, "learning_rate": 2.4794710832750212e-05, "loss": 0.1183, "step": 13402 }, { "epoch": 0.29533898538509423, "grad_norm": 1.0067473649978638, "learning_rate": 2.4793900011699307e-05, "loss": 0.1124, "step": 13403 }, { "epoch": 0.2953610206746104, "grad_norm": 0.8990232348442078, "learning_rate": 2.479308914076282e-05, "loss": 0.1255, "step": 13404 }, { "epoch": 0.29538305596412656, "grad_norm": 0.8436777591705322, "learning_rate": 2.4792278219944878e-05, "loss": 0.088, "step": 13405 }, { "epoch": 0.2954050912536427, "grad_norm": 0.7545964121818542, "learning_rate": 2.4791467249249607e-05, "loss": 0.0777, "step": 13406 }, { "epoch": 0.2954271265431589, "grad_norm": 0.6403762698173523, "learning_rate": 2.4790656228681145e-05, "loss": 0.0935, "step": 13407 }, { "epoch": 0.29544916183267506, "grad_norm": 0.6226674318313599, "learning_rate": 2.478984515824362e-05, "loss": 0.0943, "step": 13408 }, { "epoch": 0.29547119712219116, "grad_norm": 0.5605269074440002, "learning_rate": 2.4789034037941166e-05, "loss": 0.047, "step": 13409 }, { "epoch": 0.29549323241170733, "grad_norm": 0.5897181630134583, "learning_rate": 2.4788222867777914e-05, "loss": 0.1196, "step": 13410 }, { "epoch": 0.2955152677012235, "grad_norm": 0.7415323257446289, "learning_rate": 2.478741164775799e-05, "loss": 0.1208, "step": 13411 }, { "epoch": 0.29553730299073966, "grad_norm": 0.8351756930351257, "learning_rate": 2.4786600377885532e-05, "loss": 0.0926, "step": 13412 }, { "epoch": 0.2955593382802558, "grad_norm": 0.9116300940513611, "learning_rate": 2.478578905816467e-05, "loss": 0.1028, "step": 13413 }, { "epoch": 0.295581373569772, "grad_norm": 0.7669317126274109, "learning_rate": 2.4784977688599535e-05, "loss": 0.0906, "step": 13414 }, { "epoch": 0.29560340885928815, "grad_norm": 0.8030121326446533, "learning_rate": 2.4784166269194266e-05, "loss": 0.1046, "step": 13415 }, { "epoch": 0.2956254441488043, "grad_norm": 0.5460618734359741, "learning_rate": 2.4783354799952987e-05, "loss": 0.0582, "step": 13416 }, { "epoch": 0.2956474794383205, "grad_norm": 0.4109499752521515, "learning_rate": 2.4782543280879838e-05, "loss": 0.0657, "step": 13417 }, { "epoch": 0.29566951472783665, "grad_norm": 0.9944824576377869, "learning_rate": 2.478173171197895e-05, "loss": 0.1146, "step": 13418 }, { "epoch": 0.2956915500173528, "grad_norm": 1.2591840028762817, "learning_rate": 2.4780920093254455e-05, "loss": 0.0676, "step": 13419 }, { "epoch": 0.295713585306869, "grad_norm": 0.6524100303649902, "learning_rate": 2.4780108424710494e-05, "loss": 0.091, "step": 13420 }, { "epoch": 0.2957356205963851, "grad_norm": 0.4669366478919983, "learning_rate": 2.477929670635119e-05, "loss": 0.0817, "step": 13421 }, { "epoch": 0.29575765588590125, "grad_norm": 0.6121420860290527, "learning_rate": 2.477848493818069e-05, "loss": 0.0783, "step": 13422 }, { "epoch": 0.2957796911754174, "grad_norm": 0.6798283457756042, "learning_rate": 2.477767312020312e-05, "loss": 0.0823, "step": 13423 }, { "epoch": 0.2958017264649336, "grad_norm": 0.594526469707489, "learning_rate": 2.4776861252422615e-05, "loss": 0.0805, "step": 13424 }, { "epoch": 0.29582376175444974, "grad_norm": 0.5779548287391663, "learning_rate": 2.4776049334843314e-05, "loss": 0.0617, "step": 13425 }, { "epoch": 0.2958457970439659, "grad_norm": 0.5788320899009705, "learning_rate": 2.4775237367469358e-05, "loss": 0.1011, "step": 13426 }, { "epoch": 0.2958678323334821, "grad_norm": 0.739877462387085, "learning_rate": 2.4774425350304868e-05, "loss": 0.1058, "step": 13427 }, { "epoch": 0.29588986762299824, "grad_norm": 1.056169033050537, "learning_rate": 2.477361328335399e-05, "loss": 0.1406, "step": 13428 }, { "epoch": 0.2959119029125144, "grad_norm": 0.9163057804107666, "learning_rate": 2.4772801166620858e-05, "loss": 0.0762, "step": 13429 }, { "epoch": 0.29593393820203057, "grad_norm": 0.9684416651725769, "learning_rate": 2.4771989000109612e-05, "loss": 0.0933, "step": 13430 }, { "epoch": 0.29595597349154673, "grad_norm": 0.8708315491676331, "learning_rate": 2.4771176783824383e-05, "loss": 0.1118, "step": 13431 }, { "epoch": 0.2959780087810629, "grad_norm": 0.8329777717590332, "learning_rate": 2.4770364517769313e-05, "loss": 0.0941, "step": 13432 }, { "epoch": 0.296000044070579, "grad_norm": 0.752720057964325, "learning_rate": 2.4769552201948538e-05, "loss": 0.0682, "step": 13433 }, { "epoch": 0.29602207936009517, "grad_norm": 0.9178187847137451, "learning_rate": 2.4768739836366192e-05, "loss": 0.0726, "step": 13434 }, { "epoch": 0.29604411464961133, "grad_norm": 0.5922109484672546, "learning_rate": 2.4767927421026415e-05, "loss": 0.0927, "step": 13435 }, { "epoch": 0.2960661499391275, "grad_norm": 0.6250370740890503, "learning_rate": 2.4767114955933348e-05, "loss": 0.0855, "step": 13436 }, { "epoch": 0.29608818522864366, "grad_norm": 0.6845642328262329, "learning_rate": 2.476630244109113e-05, "loss": 0.0904, "step": 13437 }, { "epoch": 0.29611022051815983, "grad_norm": 1.2808315753936768, "learning_rate": 2.476548987650389e-05, "loss": 0.0985, "step": 13438 }, { "epoch": 0.296132255807676, "grad_norm": 0.7845099568367004, "learning_rate": 2.476467726217578e-05, "loss": 0.0966, "step": 13439 }, { "epoch": 0.29615429109719216, "grad_norm": 0.985759973526001, "learning_rate": 2.4763864598110923e-05, "loss": 0.1132, "step": 13440 }, { "epoch": 0.2961763263867083, "grad_norm": 0.9366055130958557, "learning_rate": 2.4763051884313476e-05, "loss": 0.098, "step": 13441 }, { "epoch": 0.2961983616762245, "grad_norm": 0.6802192330360413, "learning_rate": 2.4762239120787567e-05, "loss": 0.1019, "step": 13442 }, { "epoch": 0.29622039696574065, "grad_norm": 0.5785133838653564, "learning_rate": 2.4761426307537342e-05, "loss": 0.0935, "step": 13443 }, { "epoch": 0.2962424322552568, "grad_norm": 0.8540863990783691, "learning_rate": 2.4760613444566937e-05, "loss": 0.0969, "step": 13444 }, { "epoch": 0.296264467544773, "grad_norm": 0.6894983649253845, "learning_rate": 2.4759800531880488e-05, "loss": 0.0979, "step": 13445 }, { "epoch": 0.2962865028342891, "grad_norm": 0.522305428981781, "learning_rate": 2.475898756948215e-05, "loss": 0.0707, "step": 13446 }, { "epoch": 0.29630853812380525, "grad_norm": 0.744705319404602, "learning_rate": 2.4758174557376053e-05, "loss": 0.0703, "step": 13447 }, { "epoch": 0.2963305734133214, "grad_norm": 0.9325647950172424, "learning_rate": 2.475736149556634e-05, "loss": 0.1109, "step": 13448 }, { "epoch": 0.2963526087028376, "grad_norm": 1.1321632862091064, "learning_rate": 2.475654838405715e-05, "loss": 0.0959, "step": 13449 }, { "epoch": 0.29637464399235375, "grad_norm": 1.2349592447280884, "learning_rate": 2.4755735222852627e-05, "loss": 0.1199, "step": 13450 }, { "epoch": 0.2963966792818699, "grad_norm": 0.5098162293434143, "learning_rate": 2.4754922011956916e-05, "loss": 0.1024, "step": 13451 }, { "epoch": 0.2964187145713861, "grad_norm": 0.9036990404129028, "learning_rate": 2.4754108751374155e-05, "loss": 0.0968, "step": 13452 }, { "epoch": 0.29644074986090224, "grad_norm": 1.0620336532592773, "learning_rate": 2.475329544110849e-05, "loss": 0.1019, "step": 13453 }, { "epoch": 0.2964627851504184, "grad_norm": 0.7105528712272644, "learning_rate": 2.4752482081164054e-05, "loss": 0.0659, "step": 13454 }, { "epoch": 0.29648482043993457, "grad_norm": 0.8749493360519409, "learning_rate": 2.4751668671545e-05, "loss": 0.0868, "step": 13455 }, { "epoch": 0.29650685572945074, "grad_norm": 0.7678765058517456, "learning_rate": 2.4750855212255473e-05, "loss": 0.0949, "step": 13456 }, { "epoch": 0.2965288910189669, "grad_norm": 0.91229647397995, "learning_rate": 2.4750041703299605e-05, "loss": 0.0952, "step": 13457 }, { "epoch": 0.296550926308483, "grad_norm": 0.7687968015670776, "learning_rate": 2.4749228144681552e-05, "loss": 0.1273, "step": 13458 }, { "epoch": 0.2965729615979992, "grad_norm": 0.44710755348205566, "learning_rate": 2.474841453640545e-05, "loss": 0.0784, "step": 13459 }, { "epoch": 0.29659499688751534, "grad_norm": 0.6702457666397095, "learning_rate": 2.474760087847545e-05, "loss": 0.0716, "step": 13460 }, { "epoch": 0.2966170321770315, "grad_norm": 1.1588916778564453, "learning_rate": 2.4746787170895683e-05, "loss": 0.0893, "step": 13461 }, { "epoch": 0.29663906746654767, "grad_norm": 0.8730086088180542, "learning_rate": 2.4745973413670308e-05, "loss": 0.0914, "step": 13462 }, { "epoch": 0.29666110275606383, "grad_norm": 0.8395768404006958, "learning_rate": 2.474515960680346e-05, "loss": 0.1139, "step": 13463 }, { "epoch": 0.29668313804558, "grad_norm": 0.6906737685203552, "learning_rate": 2.474434575029929e-05, "loss": 0.0935, "step": 13464 }, { "epoch": 0.29670517333509616, "grad_norm": 0.9486370086669922, "learning_rate": 2.4743531844161945e-05, "loss": 0.1021, "step": 13465 }, { "epoch": 0.2967272086246123, "grad_norm": 0.8951801061630249, "learning_rate": 2.4742717888395564e-05, "loss": 0.0927, "step": 13466 }, { "epoch": 0.2967492439141285, "grad_norm": 0.9000560641288757, "learning_rate": 2.4741903883004298e-05, "loss": 0.1251, "step": 13467 }, { "epoch": 0.29677127920364466, "grad_norm": 0.3336871862411499, "learning_rate": 2.4741089827992293e-05, "loss": 0.1081, "step": 13468 }, { "epoch": 0.2967933144931608, "grad_norm": 0.4844415783882141, "learning_rate": 2.4740275723363687e-05, "loss": 0.1163, "step": 13469 }, { "epoch": 0.296815349782677, "grad_norm": 1.185299038887024, "learning_rate": 2.473946156912264e-05, "loss": 0.103, "step": 13470 }, { "epoch": 0.2968373850721931, "grad_norm": 0.5644189715385437, "learning_rate": 2.4738647365273288e-05, "loss": 0.0842, "step": 13471 }, { "epoch": 0.29685942036170926, "grad_norm": 0.814387857913971, "learning_rate": 2.473783311181979e-05, "loss": 0.0707, "step": 13472 }, { "epoch": 0.2968814556512254, "grad_norm": 1.1289048194885254, "learning_rate": 2.473701880876628e-05, "loss": 0.1296, "step": 13473 }, { "epoch": 0.2969034909407416, "grad_norm": 0.5628938674926758, "learning_rate": 2.473620445611691e-05, "loss": 0.065, "step": 13474 }, { "epoch": 0.29692552623025775, "grad_norm": 1.0861868858337402, "learning_rate": 2.4735390053875832e-05, "loss": 0.0876, "step": 13475 }, { "epoch": 0.2969475615197739, "grad_norm": 0.6224632263183594, "learning_rate": 2.4734575602047197e-05, "loss": 0.0849, "step": 13476 }, { "epoch": 0.2969695968092901, "grad_norm": 0.7106029987335205, "learning_rate": 2.4733761100635144e-05, "loss": 0.0825, "step": 13477 }, { "epoch": 0.29699163209880625, "grad_norm": 0.6125195026397705, "learning_rate": 2.4732946549643827e-05, "loss": 0.1224, "step": 13478 }, { "epoch": 0.2970136673883224, "grad_norm": 0.5218155384063721, "learning_rate": 2.4732131949077393e-05, "loss": 0.1012, "step": 13479 }, { "epoch": 0.2970357026778386, "grad_norm": 0.7917353510856628, "learning_rate": 2.4731317298939992e-05, "loss": 0.0864, "step": 13480 }, { "epoch": 0.29705773796735474, "grad_norm": 0.8103252053260803, "learning_rate": 2.473050259923577e-05, "loss": 0.1106, "step": 13481 }, { "epoch": 0.2970797732568709, "grad_norm": 1.0381170511245728, "learning_rate": 2.4729687849968888e-05, "loss": 0.1082, "step": 13482 }, { "epoch": 0.297101808546387, "grad_norm": 1.0245120525360107, "learning_rate": 2.472887305114349e-05, "loss": 0.0967, "step": 13483 }, { "epoch": 0.2971238438359032, "grad_norm": 0.5963265299797058, "learning_rate": 2.4728058202763712e-05, "loss": 0.102, "step": 13484 }, { "epoch": 0.29714587912541934, "grad_norm": 0.8752249479293823, "learning_rate": 2.4727243304833727e-05, "loss": 0.0712, "step": 13485 }, { "epoch": 0.2971679144149355, "grad_norm": 0.48081302642822266, "learning_rate": 2.4726428357357672e-05, "loss": 0.069, "step": 13486 }, { "epoch": 0.2971899497044517, "grad_norm": 1.1572738885879517, "learning_rate": 2.4725613360339703e-05, "loss": 0.1136, "step": 13487 }, { "epoch": 0.29721198499396784, "grad_norm": 0.8531805872917175, "learning_rate": 2.4724798313783967e-05, "loss": 0.0994, "step": 13488 }, { "epoch": 0.297234020283484, "grad_norm": 0.8593847751617432, "learning_rate": 2.4723983217694622e-05, "loss": 0.1006, "step": 13489 }, { "epoch": 0.29725605557300017, "grad_norm": 0.7650920152664185, "learning_rate": 2.472316807207582e-05, "loss": 0.0881, "step": 13490 }, { "epoch": 0.29727809086251633, "grad_norm": 0.33503299951553345, "learning_rate": 2.4722352876931704e-05, "loss": 0.0997, "step": 13491 }, { "epoch": 0.2973001261520325, "grad_norm": 0.6639070510864258, "learning_rate": 2.472153763226643e-05, "loss": 0.1205, "step": 13492 }, { "epoch": 0.29732216144154866, "grad_norm": 0.7803738117218018, "learning_rate": 2.472072233808415e-05, "loss": 0.0883, "step": 13493 }, { "epoch": 0.2973441967310648, "grad_norm": 0.6855388879776001, "learning_rate": 2.4719906994389023e-05, "loss": 0.1692, "step": 13494 }, { "epoch": 0.29736623202058093, "grad_norm": 0.683699369430542, "learning_rate": 2.4719091601185196e-05, "loss": 0.1049, "step": 13495 }, { "epoch": 0.2973882673100971, "grad_norm": 0.6394681930541992, "learning_rate": 2.4718276158476823e-05, "loss": 0.0646, "step": 13496 }, { "epoch": 0.29741030259961326, "grad_norm": 0.5903406739234924, "learning_rate": 2.4717460666268062e-05, "loss": 0.0602, "step": 13497 }, { "epoch": 0.29743233788912943, "grad_norm": 0.8083774447441101, "learning_rate": 2.4716645124563055e-05, "loss": 0.1218, "step": 13498 }, { "epoch": 0.2974543731786456, "grad_norm": 0.8569087386131287, "learning_rate": 2.471582953336597e-05, "loss": 0.1016, "step": 13499 }, { "epoch": 0.29747640846816176, "grad_norm": 1.1110434532165527, "learning_rate": 2.4715013892680954e-05, "loss": 0.0959, "step": 13500 }, { "epoch": 0.2974984437576779, "grad_norm": 0.5563902854919434, "learning_rate": 2.4714198202512163e-05, "loss": 0.0874, "step": 13501 }, { "epoch": 0.2975204790471941, "grad_norm": 0.6122907400131226, "learning_rate": 2.471338246286375e-05, "loss": 0.0634, "step": 13502 }, { "epoch": 0.29754251433671025, "grad_norm": 0.9492273330688477, "learning_rate": 2.471256667373987e-05, "loss": 0.1253, "step": 13503 }, { "epoch": 0.2975645496262264, "grad_norm": 0.6164414286613464, "learning_rate": 2.4711750835144683e-05, "loss": 0.08, "step": 13504 }, { "epoch": 0.2975865849157426, "grad_norm": 0.4805721342563629, "learning_rate": 2.471093494708234e-05, "loss": 0.1, "step": 13505 }, { "epoch": 0.29760862020525874, "grad_norm": 0.5847203135490417, "learning_rate": 2.4710119009557e-05, "loss": 0.1114, "step": 13506 }, { "epoch": 0.2976306554947749, "grad_norm": 0.7356805205345154, "learning_rate": 2.470930302257281e-05, "loss": 0.0887, "step": 13507 }, { "epoch": 0.297652690784291, "grad_norm": 0.76033616065979, "learning_rate": 2.4708486986133942e-05, "loss": 0.0823, "step": 13508 }, { "epoch": 0.2976747260738072, "grad_norm": 1.0588756799697876, "learning_rate": 2.4707670900244538e-05, "loss": 0.0835, "step": 13509 }, { "epoch": 0.29769676136332335, "grad_norm": 0.8557716012001038, "learning_rate": 2.4706854764908763e-05, "loss": 0.1141, "step": 13510 }, { "epoch": 0.2977187966528395, "grad_norm": 0.8631636500358582, "learning_rate": 2.470603858013077e-05, "loss": 0.1221, "step": 13511 }, { "epoch": 0.2977408319423557, "grad_norm": 2.808389902114868, "learning_rate": 2.470522234591472e-05, "loss": 0.0636, "step": 13512 }, { "epoch": 0.29776286723187184, "grad_norm": 0.9024446606636047, "learning_rate": 2.4704406062264765e-05, "loss": 0.0958, "step": 13513 }, { "epoch": 0.297784902521388, "grad_norm": 0.9603205919265747, "learning_rate": 2.470358972918507e-05, "loss": 0.0882, "step": 13514 }, { "epoch": 0.29780693781090417, "grad_norm": 1.0018833875656128, "learning_rate": 2.4702773346679787e-05, "loss": 0.0814, "step": 13515 }, { "epoch": 0.29782897310042034, "grad_norm": 0.6617297530174255, "learning_rate": 2.4701956914753073e-05, "loss": 0.0819, "step": 13516 }, { "epoch": 0.2978510083899365, "grad_norm": 1.12373685836792, "learning_rate": 2.4701140433409093e-05, "loss": 0.1474, "step": 13517 }, { "epoch": 0.29787304367945266, "grad_norm": 0.6989801526069641, "learning_rate": 2.4700323902652e-05, "loss": 0.0669, "step": 13518 }, { "epoch": 0.29789507896896883, "grad_norm": 0.9884027242660522, "learning_rate": 2.469950732248596e-05, "loss": 0.1246, "step": 13519 }, { "epoch": 0.29791711425848494, "grad_norm": 0.828045129776001, "learning_rate": 2.4698690692915127e-05, "loss": 0.1112, "step": 13520 }, { "epoch": 0.2979391495480011, "grad_norm": 0.8002883791923523, "learning_rate": 2.4697874013943658e-05, "loss": 0.1309, "step": 13521 }, { "epoch": 0.29796118483751727, "grad_norm": 1.4089804887771606, "learning_rate": 2.469705728557572e-05, "loss": 0.108, "step": 13522 }, { "epoch": 0.29798322012703343, "grad_norm": 0.835392415523529, "learning_rate": 2.4696240507815468e-05, "loss": 0.1353, "step": 13523 }, { "epoch": 0.2980052554165496, "grad_norm": 0.47181224822998047, "learning_rate": 2.469542368066706e-05, "loss": 0.0937, "step": 13524 }, { "epoch": 0.29802729070606576, "grad_norm": 0.7806248068809509, "learning_rate": 2.4694606804134664e-05, "loss": 0.1311, "step": 13525 }, { "epoch": 0.2980493259955819, "grad_norm": 0.38435837626457214, "learning_rate": 2.4693789878222436e-05, "loss": 0.0882, "step": 13526 }, { "epoch": 0.2980713612850981, "grad_norm": 0.6144071221351624, "learning_rate": 2.4692972902934542e-05, "loss": 0.0771, "step": 13527 }, { "epoch": 0.29809339657461426, "grad_norm": 1.4048523902893066, "learning_rate": 2.4692155878275135e-05, "loss": 0.09, "step": 13528 }, { "epoch": 0.2981154318641304, "grad_norm": 0.7495740056037903, "learning_rate": 2.469133880424838e-05, "loss": 0.0557, "step": 13529 }, { "epoch": 0.2981374671536466, "grad_norm": 0.7389311790466309, "learning_rate": 2.4690521680858437e-05, "loss": 0.117, "step": 13530 }, { "epoch": 0.29815950244316275, "grad_norm": 0.6680216789245605, "learning_rate": 2.4689704508109477e-05, "loss": 0.0935, "step": 13531 }, { "epoch": 0.29818153773267886, "grad_norm": 0.8393700122833252, "learning_rate": 2.4688887286005656e-05, "loss": 0.091, "step": 13532 }, { "epoch": 0.298203573022195, "grad_norm": 0.8115555644035339, "learning_rate": 2.468807001455113e-05, "loss": 0.1102, "step": 13533 }, { "epoch": 0.2982256083117112, "grad_norm": 0.8510535359382629, "learning_rate": 2.4687252693750072e-05, "loss": 0.0573, "step": 13534 }, { "epoch": 0.29824764360122735, "grad_norm": 0.934107780456543, "learning_rate": 2.468643532360664e-05, "loss": 0.1149, "step": 13535 }, { "epoch": 0.2982696788907435, "grad_norm": 0.49435439705848694, "learning_rate": 2.4685617904125e-05, "loss": 0.0736, "step": 13536 }, { "epoch": 0.2982917141802597, "grad_norm": 0.9056532979011536, "learning_rate": 2.4684800435309318e-05, "loss": 0.115, "step": 13537 }, { "epoch": 0.29831374946977585, "grad_norm": 0.9134058356285095, "learning_rate": 2.468398291716375e-05, "loss": 0.0795, "step": 13538 }, { "epoch": 0.298335784759292, "grad_norm": 0.8268365263938904, "learning_rate": 2.4683165349692465e-05, "loss": 0.1071, "step": 13539 }, { "epoch": 0.2983578200488082, "grad_norm": 0.7381752133369446, "learning_rate": 2.4682347732899623e-05, "loss": 0.0951, "step": 13540 }, { "epoch": 0.29837985533832434, "grad_norm": 0.7003832459449768, "learning_rate": 2.4681530066789394e-05, "loss": 0.1107, "step": 13541 }, { "epoch": 0.2984018906278405, "grad_norm": 0.7759072780609131, "learning_rate": 2.468071235136594e-05, "loss": 0.0797, "step": 13542 }, { "epoch": 0.29842392591735667, "grad_norm": 0.9679850339889526, "learning_rate": 2.467989458663343e-05, "loss": 0.1206, "step": 13543 }, { "epoch": 0.29844596120687283, "grad_norm": 0.6948527693748474, "learning_rate": 2.467907677259602e-05, "loss": 0.1065, "step": 13544 }, { "epoch": 0.29846799649638894, "grad_norm": 0.7729071378707886, "learning_rate": 2.4678258909257887e-05, "loss": 0.1159, "step": 13545 }, { "epoch": 0.2984900317859051, "grad_norm": 0.8057355284690857, "learning_rate": 2.4677440996623192e-05, "loss": 0.0976, "step": 13546 }, { "epoch": 0.2985120670754213, "grad_norm": 1.0470298528671265, "learning_rate": 2.4676623034696096e-05, "loss": 0.0886, "step": 13547 }, { "epoch": 0.29853410236493744, "grad_norm": 0.707004964351654, "learning_rate": 2.467580502348078e-05, "loss": 0.0677, "step": 13548 }, { "epoch": 0.2985561376544536, "grad_norm": 0.8602137565612793, "learning_rate": 2.467498696298139e-05, "loss": 0.0629, "step": 13549 }, { "epoch": 0.29857817294396977, "grad_norm": 0.6500256061553955, "learning_rate": 2.4674168853202106e-05, "loss": 0.0902, "step": 13550 }, { "epoch": 0.29860020823348593, "grad_norm": 0.5052477717399597, "learning_rate": 2.467335069414709e-05, "loss": 0.0641, "step": 13551 }, { "epoch": 0.2986222435230021, "grad_norm": 0.833671510219574, "learning_rate": 2.4672532485820516e-05, "loss": 0.1093, "step": 13552 }, { "epoch": 0.29864427881251826, "grad_norm": 0.842044472694397, "learning_rate": 2.4671714228226544e-05, "loss": 0.0983, "step": 13553 }, { "epoch": 0.2986663141020344, "grad_norm": 0.9455719590187073, "learning_rate": 2.467089592136935e-05, "loss": 0.0993, "step": 13554 }, { "epoch": 0.2986883493915506, "grad_norm": 0.7451348304748535, "learning_rate": 2.467007756525309e-05, "loss": 0.0954, "step": 13555 }, { "epoch": 0.29871038468106675, "grad_norm": 0.8035701513290405, "learning_rate": 2.4669259159881943e-05, "loss": 0.1039, "step": 13556 }, { "epoch": 0.29873241997058286, "grad_norm": 0.7008086442947388, "learning_rate": 2.4668440705260074e-05, "loss": 0.1169, "step": 13557 }, { "epoch": 0.298754455260099, "grad_norm": 0.8026074171066284, "learning_rate": 2.4667622201391656e-05, "loss": 0.0753, "step": 13558 }, { "epoch": 0.2987764905496152, "grad_norm": 0.8837010860443115, "learning_rate": 2.4666803648280844e-05, "loss": 0.1269, "step": 13559 }, { "epoch": 0.29879852583913136, "grad_norm": 1.0388365983963013, "learning_rate": 2.4665985045931825e-05, "loss": 0.1173, "step": 13560 }, { "epoch": 0.2988205611286475, "grad_norm": 0.6755045056343079, "learning_rate": 2.466516639434876e-05, "loss": 0.074, "step": 13561 }, { "epoch": 0.2988425964181637, "grad_norm": 0.8379060626029968, "learning_rate": 2.4664347693535817e-05, "loss": 0.1036, "step": 13562 }, { "epoch": 0.29886463170767985, "grad_norm": 1.051452875137329, "learning_rate": 2.466352894349717e-05, "loss": 0.1295, "step": 13563 }, { "epoch": 0.298886666997196, "grad_norm": 0.6586713790893555, "learning_rate": 2.466271014423699e-05, "loss": 0.1436, "step": 13564 }, { "epoch": 0.2989087022867122, "grad_norm": 0.9869629740715027, "learning_rate": 2.4661891295759447e-05, "loss": 0.1282, "step": 13565 }, { "epoch": 0.29893073757622834, "grad_norm": 1.2380294799804688, "learning_rate": 2.4661072398068704e-05, "loss": 0.1037, "step": 13566 }, { "epoch": 0.2989527728657445, "grad_norm": 0.6988005042076111, "learning_rate": 2.466025345116894e-05, "loss": 0.084, "step": 13567 }, { "epoch": 0.2989748081552607, "grad_norm": 0.6695698499679565, "learning_rate": 2.465943445506433e-05, "loss": 0.1311, "step": 13568 }, { "epoch": 0.2989968434447768, "grad_norm": 0.8036381602287292, "learning_rate": 2.4658615409759038e-05, "loss": 0.0884, "step": 13569 }, { "epoch": 0.29901887873429295, "grad_norm": 0.6373510360717773, "learning_rate": 2.465779631525724e-05, "loss": 0.1211, "step": 13570 }, { "epoch": 0.2990409140238091, "grad_norm": 0.4540172517299652, "learning_rate": 2.4656977171563103e-05, "loss": 0.0892, "step": 13571 }, { "epoch": 0.2990629493133253, "grad_norm": 0.5823593735694885, "learning_rate": 2.4656157978680802e-05, "loss": 0.0912, "step": 13572 }, { "epoch": 0.29908498460284144, "grad_norm": 0.7210216522216797, "learning_rate": 2.4655338736614513e-05, "loss": 0.1311, "step": 13573 }, { "epoch": 0.2991070198923576, "grad_norm": 1.0782063007354736, "learning_rate": 2.4654519445368407e-05, "loss": 0.1097, "step": 13574 }, { "epoch": 0.29912905518187377, "grad_norm": 0.6855221390724182, "learning_rate": 2.4653700104946658e-05, "loss": 0.1109, "step": 13575 }, { "epoch": 0.29915109047138994, "grad_norm": 1.4324287176132202, "learning_rate": 2.4652880715353433e-05, "loss": 0.1098, "step": 13576 }, { "epoch": 0.2991731257609061, "grad_norm": 0.42753180861473083, "learning_rate": 2.4652061276592914e-05, "loss": 0.0794, "step": 13577 }, { "epoch": 0.29919516105042226, "grad_norm": 0.8516405820846558, "learning_rate": 2.465124178866927e-05, "loss": 0.095, "step": 13578 }, { "epoch": 0.29921719633993843, "grad_norm": 0.8679489493370056, "learning_rate": 2.4650422251586672e-05, "loss": 0.1091, "step": 13579 }, { "epoch": 0.2992392316294546, "grad_norm": 0.7414798736572266, "learning_rate": 2.4649602665349304e-05, "loss": 0.0991, "step": 13580 }, { "epoch": 0.29926126691897076, "grad_norm": 1.0379457473754883, "learning_rate": 2.4648783029961335e-05, "loss": 0.1002, "step": 13581 }, { "epoch": 0.29928330220848687, "grad_norm": 0.9637370705604553, "learning_rate": 2.4647963345426935e-05, "loss": 0.0912, "step": 13582 }, { "epoch": 0.29930533749800303, "grad_norm": 0.8519969582557678, "learning_rate": 2.464714361175029e-05, "loss": 0.0798, "step": 13583 }, { "epoch": 0.2993273727875192, "grad_norm": 0.8579883575439453, "learning_rate": 2.4646323828935564e-05, "loss": 0.1002, "step": 13584 }, { "epoch": 0.29934940807703536, "grad_norm": 0.9797711372375488, "learning_rate": 2.4645503996986942e-05, "loss": 0.0941, "step": 13585 }, { "epoch": 0.2993714433665515, "grad_norm": 0.6652364730834961, "learning_rate": 2.4644684115908593e-05, "loss": 0.0912, "step": 13586 }, { "epoch": 0.2993934786560677, "grad_norm": 0.9323142766952515, "learning_rate": 2.4643864185704697e-05, "loss": 0.0934, "step": 13587 }, { "epoch": 0.29941551394558386, "grad_norm": 0.7184208035469055, "learning_rate": 2.4643044206379427e-05, "loss": 0.0623, "step": 13588 }, { "epoch": 0.2994375492351, "grad_norm": 0.9013224244117737, "learning_rate": 2.4642224177936965e-05, "loss": 0.0986, "step": 13589 }, { "epoch": 0.2994595845246162, "grad_norm": 0.5589280128479004, "learning_rate": 2.4641404100381483e-05, "loss": 0.0849, "step": 13590 }, { "epoch": 0.29948161981413235, "grad_norm": 0.5416707992553711, "learning_rate": 2.4640583973717163e-05, "loss": 0.1038, "step": 13591 }, { "epoch": 0.2995036551036485, "grad_norm": 1.4094539880752563, "learning_rate": 2.4639763797948174e-05, "loss": 0.1248, "step": 13592 }, { "epoch": 0.2995256903931647, "grad_norm": 1.0178937911987305, "learning_rate": 2.4638943573078704e-05, "loss": 0.1054, "step": 13593 }, { "epoch": 0.2995477256826808, "grad_norm": 0.6792887449264526, "learning_rate": 2.4638123299112924e-05, "loss": 0.0647, "step": 13594 }, { "epoch": 0.29956976097219695, "grad_norm": 0.7305552959442139, "learning_rate": 2.463730297605501e-05, "loss": 0.0796, "step": 13595 }, { "epoch": 0.2995917962617131, "grad_norm": 1.1929246187210083, "learning_rate": 2.4636482603909146e-05, "loss": 0.0785, "step": 13596 }, { "epoch": 0.2996138315512293, "grad_norm": 0.9195129871368408, "learning_rate": 2.463566218267951e-05, "loss": 0.1002, "step": 13597 }, { "epoch": 0.29963586684074545, "grad_norm": 0.7274168729782104, "learning_rate": 2.4634841712370283e-05, "loss": 0.0712, "step": 13598 }, { "epoch": 0.2996579021302616, "grad_norm": 0.9790772795677185, "learning_rate": 2.463402119298563e-05, "loss": 0.098, "step": 13599 }, { "epoch": 0.2996799374197778, "grad_norm": 0.8844179511070251, "learning_rate": 2.463320062452975e-05, "loss": 0.1263, "step": 13600 }, { "epoch": 0.29970197270929394, "grad_norm": 0.8060041069984436, "learning_rate": 2.463238000700681e-05, "loss": 0.1255, "step": 13601 }, { "epoch": 0.2997240079988101, "grad_norm": 1.1362565755844116, "learning_rate": 2.4631559340420997e-05, "loss": 0.1293, "step": 13602 }, { "epoch": 0.29974604328832627, "grad_norm": 0.9763280749320984, "learning_rate": 2.463073862477648e-05, "loss": 0.111, "step": 13603 }, { "epoch": 0.29976807857784243, "grad_norm": 0.816029965877533, "learning_rate": 2.4629917860077454e-05, "loss": 0.0991, "step": 13604 }, { "epoch": 0.2997901138673586, "grad_norm": 0.651232123374939, "learning_rate": 2.462909704632809e-05, "loss": 0.0977, "step": 13605 }, { "epoch": 0.2998121491568747, "grad_norm": 0.9208180904388428, "learning_rate": 2.462827618353257e-05, "loss": 0.0993, "step": 13606 }, { "epoch": 0.2998341844463909, "grad_norm": 1.6782413721084595, "learning_rate": 2.4627455271695075e-05, "loss": 0.1248, "step": 13607 }, { "epoch": 0.29985621973590704, "grad_norm": 0.9184123873710632, "learning_rate": 2.4626634310819792e-05, "loss": 0.1237, "step": 13608 }, { "epoch": 0.2998782550254232, "grad_norm": 0.9987538456916809, "learning_rate": 2.4625813300910897e-05, "loss": 0.1115, "step": 13609 }, { "epoch": 0.29990029031493937, "grad_norm": 0.6715801358222961, "learning_rate": 2.4624992241972573e-05, "loss": 0.0751, "step": 13610 }, { "epoch": 0.29992232560445553, "grad_norm": 0.6677350997924805, "learning_rate": 2.4624171134008996e-05, "loss": 0.1035, "step": 13611 }, { "epoch": 0.2999443608939717, "grad_norm": 0.43733862042427063, "learning_rate": 2.4623349977024362e-05, "loss": 0.0726, "step": 13612 }, { "epoch": 0.29996639618348786, "grad_norm": 0.9244567155838013, "learning_rate": 2.4622528771022847e-05, "loss": 0.0919, "step": 13613 }, { "epoch": 0.299988431473004, "grad_norm": 0.757413923740387, "learning_rate": 2.4621707516008627e-05, "loss": 0.0821, "step": 13614 }, { "epoch": 0.3000104667625202, "grad_norm": 0.8121310472488403, "learning_rate": 2.4620886211985897e-05, "loss": 0.0739, "step": 13615 }, { "epoch": 0.30003250205203635, "grad_norm": 1.3367282152175903, "learning_rate": 2.4620064858958832e-05, "loss": 0.1602, "step": 13616 }, { "epoch": 0.3000545373415525, "grad_norm": 1.12571120262146, "learning_rate": 2.4619243456931616e-05, "loss": 0.1169, "step": 13617 }, { "epoch": 0.3000765726310687, "grad_norm": 0.8562189340591431, "learning_rate": 2.4618422005908432e-05, "loss": 0.1134, "step": 13618 }, { "epoch": 0.3000986079205848, "grad_norm": 0.9931122660636902, "learning_rate": 2.4617600505893473e-05, "loss": 0.1595, "step": 13619 }, { "epoch": 0.30012064321010096, "grad_norm": 0.9888513684272766, "learning_rate": 2.4616778956890912e-05, "loss": 0.1006, "step": 13620 }, { "epoch": 0.3001426784996171, "grad_norm": 0.7041383385658264, "learning_rate": 2.4615957358904945e-05, "loss": 0.0959, "step": 13621 }, { "epoch": 0.3001647137891333, "grad_norm": 0.9406672120094299, "learning_rate": 2.4615135711939747e-05, "loss": 0.1061, "step": 13622 }, { "epoch": 0.30018674907864945, "grad_norm": 0.7599326968193054, "learning_rate": 2.461431401599951e-05, "loss": 0.0896, "step": 13623 }, { "epoch": 0.3002087843681656, "grad_norm": 0.616442084312439, "learning_rate": 2.4613492271088414e-05, "loss": 0.0815, "step": 13624 }, { "epoch": 0.3002308196576818, "grad_norm": 0.8357491493225098, "learning_rate": 2.4612670477210645e-05, "loss": 0.1105, "step": 13625 }, { "epoch": 0.30025285494719794, "grad_norm": 0.8586992025375366, "learning_rate": 2.461184863437039e-05, "loss": 0.1593, "step": 13626 }, { "epoch": 0.3002748902367141, "grad_norm": 1.2829663753509521, "learning_rate": 2.461102674257184e-05, "loss": 0.1091, "step": 13627 }, { "epoch": 0.3002969255262303, "grad_norm": 0.6371418833732605, "learning_rate": 2.461020480181917e-05, "loss": 0.1096, "step": 13628 }, { "epoch": 0.30031896081574644, "grad_norm": 0.6707606911659241, "learning_rate": 2.460938281211658e-05, "loss": 0.0939, "step": 13629 }, { "epoch": 0.3003409961052626, "grad_norm": 0.5874187350273132, "learning_rate": 2.460856077346825e-05, "loss": 0.0585, "step": 13630 }, { "epoch": 0.3003630313947787, "grad_norm": 1.1257638931274414, "learning_rate": 2.460773868587837e-05, "loss": 0.1238, "step": 13631 }, { "epoch": 0.3003850666842949, "grad_norm": 0.6840614080429077, "learning_rate": 2.4606916549351116e-05, "loss": 0.0671, "step": 13632 }, { "epoch": 0.30040710197381104, "grad_norm": 0.5086144804954529, "learning_rate": 2.460609436389069e-05, "loss": 0.0862, "step": 13633 }, { "epoch": 0.3004291372633272, "grad_norm": 0.8029184341430664, "learning_rate": 2.4605272129501275e-05, "loss": 0.1133, "step": 13634 }, { "epoch": 0.30045117255284337, "grad_norm": 0.4352015256881714, "learning_rate": 2.4604449846187055e-05, "loss": 0.0652, "step": 13635 }, { "epoch": 0.30047320784235954, "grad_norm": 0.6720622181892395, "learning_rate": 2.4603627513952223e-05, "loss": 0.0942, "step": 13636 }, { "epoch": 0.3004952431318757, "grad_norm": 0.5135588049888611, "learning_rate": 2.4602805132800964e-05, "loss": 0.0787, "step": 13637 }, { "epoch": 0.30051727842139186, "grad_norm": 0.8690914511680603, "learning_rate": 2.4601982702737473e-05, "loss": 0.0804, "step": 13638 }, { "epoch": 0.30053931371090803, "grad_norm": 0.4655371308326721, "learning_rate": 2.4601160223765933e-05, "loss": 0.0853, "step": 13639 }, { "epoch": 0.3005613490004242, "grad_norm": 0.8217254281044006, "learning_rate": 2.4600337695890536e-05, "loss": 0.0846, "step": 13640 }, { "epoch": 0.30058338428994036, "grad_norm": 0.7427951097488403, "learning_rate": 2.4599515119115473e-05, "loss": 0.0703, "step": 13641 }, { "epoch": 0.3006054195794565, "grad_norm": 0.7472497820854187, "learning_rate": 2.459869249344493e-05, "loss": 0.0799, "step": 13642 }, { "epoch": 0.30062745486897263, "grad_norm": 0.7231919765472412, "learning_rate": 2.4597869818883097e-05, "loss": 0.072, "step": 13643 }, { "epoch": 0.3006494901584888, "grad_norm": 0.9991726875305176, "learning_rate": 2.459704709543417e-05, "loss": 0.1166, "step": 13644 }, { "epoch": 0.30067152544800496, "grad_norm": 0.8407342433929443, "learning_rate": 2.4596224323102333e-05, "loss": 0.0913, "step": 13645 }, { "epoch": 0.3006935607375211, "grad_norm": 0.6604230403900146, "learning_rate": 2.4595401501891783e-05, "loss": 0.088, "step": 13646 }, { "epoch": 0.3007155960270373, "grad_norm": 0.9248512983322144, "learning_rate": 2.4594578631806703e-05, "loss": 0.0899, "step": 13647 }, { "epoch": 0.30073763131655346, "grad_norm": 0.6189907193183899, "learning_rate": 2.459375571285129e-05, "loss": 0.0642, "step": 13648 }, { "epoch": 0.3007596666060696, "grad_norm": 0.7602015733718872, "learning_rate": 2.4592932745029737e-05, "loss": 0.0911, "step": 13649 }, { "epoch": 0.3007817018955858, "grad_norm": 0.6772928833961487, "learning_rate": 2.4592109728346232e-05, "loss": 0.1025, "step": 13650 }, { "epoch": 0.30080373718510195, "grad_norm": 1.1656203269958496, "learning_rate": 2.4591286662804966e-05, "loss": 0.0844, "step": 13651 }, { "epoch": 0.3008257724746181, "grad_norm": 0.9255523085594177, "learning_rate": 2.459046354841014e-05, "loss": 0.103, "step": 13652 }, { "epoch": 0.3008478077641343, "grad_norm": 0.7756907939910889, "learning_rate": 2.4589640385165934e-05, "loss": 0.0975, "step": 13653 }, { "epoch": 0.30086984305365044, "grad_norm": 0.558562695980072, "learning_rate": 2.458881717307655e-05, "loss": 0.0787, "step": 13654 }, { "epoch": 0.3008918783431666, "grad_norm": 0.6479307413101196, "learning_rate": 2.4587993912146176e-05, "loss": 0.0856, "step": 13655 }, { "epoch": 0.3009139136326827, "grad_norm": 0.4783400297164917, "learning_rate": 2.4587170602379012e-05, "loss": 0.0732, "step": 13656 }, { "epoch": 0.3009359489221989, "grad_norm": 0.5629055500030518, "learning_rate": 2.4586347243779244e-05, "loss": 0.0912, "step": 13657 }, { "epoch": 0.30095798421171505, "grad_norm": 0.6610467433929443, "learning_rate": 2.4585523836351065e-05, "loss": 0.084, "step": 13658 }, { "epoch": 0.3009800195012312, "grad_norm": 0.6501219868659973, "learning_rate": 2.458470038009868e-05, "loss": 0.0744, "step": 13659 }, { "epoch": 0.3010020547907474, "grad_norm": 0.7956716418266296, "learning_rate": 2.4583876875026273e-05, "loss": 0.0846, "step": 13660 }, { "epoch": 0.30102409008026354, "grad_norm": 0.8815304636955261, "learning_rate": 2.4583053321138047e-05, "loss": 0.1027, "step": 13661 }, { "epoch": 0.3010461253697797, "grad_norm": 0.7685027718544006, "learning_rate": 2.4582229718438183e-05, "loss": 0.1058, "step": 13662 }, { "epoch": 0.30106816065929587, "grad_norm": 0.8581007719039917, "learning_rate": 2.458140606693089e-05, "loss": 0.0832, "step": 13663 }, { "epoch": 0.30109019594881203, "grad_norm": 0.7353556156158447, "learning_rate": 2.458058236662036e-05, "loss": 0.0698, "step": 13664 }, { "epoch": 0.3011122312383282, "grad_norm": 0.7862026691436768, "learning_rate": 2.4579758617510783e-05, "loss": 0.0771, "step": 13665 }, { "epoch": 0.30113426652784436, "grad_norm": 0.6474448442459106, "learning_rate": 2.4578934819606355e-05, "loss": 0.0896, "step": 13666 }, { "epoch": 0.3011563018173605, "grad_norm": 0.7546496987342834, "learning_rate": 2.4578110972911278e-05, "loss": 0.1009, "step": 13667 }, { "epoch": 0.30117833710687664, "grad_norm": 1.000064730644226, "learning_rate": 2.4577287077429745e-05, "loss": 0.1278, "step": 13668 }, { "epoch": 0.3012003723963928, "grad_norm": 1.3239132165908813, "learning_rate": 2.4576463133165956e-05, "loss": 0.0994, "step": 13669 }, { "epoch": 0.30122240768590897, "grad_norm": 1.0114920139312744, "learning_rate": 2.4575639140124104e-05, "loss": 0.0893, "step": 13670 }, { "epoch": 0.30124444297542513, "grad_norm": 0.6120356321334839, "learning_rate": 2.4574815098308387e-05, "loss": 0.1068, "step": 13671 }, { "epoch": 0.3012664782649413, "grad_norm": 0.8763300180435181, "learning_rate": 2.4573991007723003e-05, "loss": 0.1105, "step": 13672 }, { "epoch": 0.30128851355445746, "grad_norm": 0.9303725957870483, "learning_rate": 2.4573166868372144e-05, "loss": 0.1056, "step": 13673 }, { "epoch": 0.3013105488439736, "grad_norm": 0.6954305171966553, "learning_rate": 2.4572342680260017e-05, "loss": 0.0714, "step": 13674 }, { "epoch": 0.3013325841334898, "grad_norm": 0.615070641040802, "learning_rate": 2.4571518443390817e-05, "loss": 0.1021, "step": 13675 }, { "epoch": 0.30135461942300595, "grad_norm": 0.8973832130432129, "learning_rate": 2.4570694157768738e-05, "loss": 0.1319, "step": 13676 }, { "epoch": 0.3013766547125221, "grad_norm": 1.1879053115844727, "learning_rate": 2.4569869823397982e-05, "loss": 0.1274, "step": 13677 }, { "epoch": 0.3013986900020383, "grad_norm": 0.6919171810150146, "learning_rate": 2.4569045440282747e-05, "loss": 0.0941, "step": 13678 }, { "epoch": 0.30142072529155445, "grad_norm": 0.8361255526542664, "learning_rate": 2.4568221008427233e-05, "loss": 0.0904, "step": 13679 }, { "epoch": 0.30144276058107056, "grad_norm": 0.6864049434661865, "learning_rate": 2.4567396527835636e-05, "loss": 0.1433, "step": 13680 }, { "epoch": 0.3014647958705867, "grad_norm": 0.680496871471405, "learning_rate": 2.4566571998512163e-05, "loss": 0.0984, "step": 13681 }, { "epoch": 0.3014868311601029, "grad_norm": 0.6229336857795715, "learning_rate": 2.4565747420461e-05, "loss": 0.0676, "step": 13682 }, { "epoch": 0.30150886644961905, "grad_norm": 0.6926342248916626, "learning_rate": 2.4564922793686364e-05, "loss": 0.1101, "step": 13683 }, { "epoch": 0.3015309017391352, "grad_norm": 0.7657617926597595, "learning_rate": 2.4564098118192445e-05, "loss": 0.0907, "step": 13684 }, { "epoch": 0.3015529370286514, "grad_norm": 0.7431966662406921, "learning_rate": 2.4563273393983445e-05, "loss": 0.0842, "step": 13685 }, { "epoch": 0.30157497231816754, "grad_norm": 0.7031615376472473, "learning_rate": 2.456244862106357e-05, "loss": 0.0897, "step": 13686 }, { "epoch": 0.3015970076076837, "grad_norm": 1.1528804302215576, "learning_rate": 2.456162379943701e-05, "loss": 0.1116, "step": 13687 }, { "epoch": 0.3016190428971999, "grad_norm": 0.6017950773239136, "learning_rate": 2.4560798929107975e-05, "loss": 0.0601, "step": 13688 }, { "epoch": 0.30164107818671604, "grad_norm": 0.7262822389602661, "learning_rate": 2.4559974010080662e-05, "loss": 0.1412, "step": 13689 }, { "epoch": 0.3016631134762322, "grad_norm": 0.8193494081497192, "learning_rate": 2.455914904235928e-05, "loss": 0.0788, "step": 13690 }, { "epoch": 0.30168514876574837, "grad_norm": 0.6566799879074097, "learning_rate": 2.455832402594802e-05, "loss": 0.0729, "step": 13691 }, { "epoch": 0.30170718405526453, "grad_norm": 0.8267746567726135, "learning_rate": 2.4557498960851093e-05, "loss": 0.089, "step": 13692 }, { "epoch": 0.30172921934478064, "grad_norm": 0.5178698301315308, "learning_rate": 2.45566738470727e-05, "loss": 0.1059, "step": 13693 }, { "epoch": 0.3017512546342968, "grad_norm": 0.9291454553604126, "learning_rate": 2.4555848684617044e-05, "loss": 0.1076, "step": 13694 }, { "epoch": 0.30177328992381297, "grad_norm": 0.5628723502159119, "learning_rate": 2.455502347348832e-05, "loss": 0.0972, "step": 13695 }, { "epoch": 0.30179532521332914, "grad_norm": 0.6558627486228943, "learning_rate": 2.4554198213690743e-05, "loss": 0.1182, "step": 13696 }, { "epoch": 0.3018173605028453, "grad_norm": 0.6611039638519287, "learning_rate": 2.4553372905228506e-05, "loss": 0.1002, "step": 13697 }, { "epoch": 0.30183939579236146, "grad_norm": 0.7745614051818848, "learning_rate": 2.4552547548105822e-05, "loss": 0.1099, "step": 13698 }, { "epoch": 0.30186143108187763, "grad_norm": 0.7034820914268494, "learning_rate": 2.455172214232689e-05, "loss": 0.0604, "step": 13699 }, { "epoch": 0.3018834663713938, "grad_norm": 0.5883671641349792, "learning_rate": 2.4550896687895915e-05, "loss": 0.0656, "step": 13700 }, { "epoch": 0.30190550166090996, "grad_norm": 0.8434990048408508, "learning_rate": 2.45500711848171e-05, "loss": 0.1196, "step": 13701 }, { "epoch": 0.3019275369504261, "grad_norm": 0.5420497059822083, "learning_rate": 2.4549245633094653e-05, "loss": 0.0672, "step": 13702 }, { "epoch": 0.3019495722399423, "grad_norm": 0.776068389415741, "learning_rate": 2.4548420032732777e-05, "loss": 0.0766, "step": 13703 }, { "epoch": 0.30197160752945845, "grad_norm": 1.1501911878585815, "learning_rate": 2.454759438373568e-05, "loss": 0.1028, "step": 13704 }, { "epoch": 0.30199364281897456, "grad_norm": 0.8399270176887512, "learning_rate": 2.4546768686107562e-05, "loss": 0.1084, "step": 13705 }, { "epoch": 0.3020156781084907, "grad_norm": 0.6486334204673767, "learning_rate": 2.454594293985263e-05, "loss": 0.1041, "step": 13706 }, { "epoch": 0.3020377133980069, "grad_norm": 0.6332508325576782, "learning_rate": 2.4545117144975096e-05, "loss": 0.1005, "step": 13707 }, { "epoch": 0.30205974868752306, "grad_norm": 0.7797363996505737, "learning_rate": 2.4544291301479162e-05, "loss": 0.0906, "step": 13708 }, { "epoch": 0.3020817839770392, "grad_norm": 0.7260175943374634, "learning_rate": 2.4543465409369032e-05, "loss": 0.0906, "step": 13709 }, { "epoch": 0.3021038192665554, "grad_norm": 1.4598397016525269, "learning_rate": 2.454263946864891e-05, "loss": 0.1443, "step": 13710 }, { "epoch": 0.30212585455607155, "grad_norm": 0.8005373477935791, "learning_rate": 2.4541813479323017e-05, "loss": 0.063, "step": 13711 }, { "epoch": 0.3021478898455877, "grad_norm": 0.48067036271095276, "learning_rate": 2.4540987441395546e-05, "loss": 0.0619, "step": 13712 }, { "epoch": 0.3021699251351039, "grad_norm": 0.7439176440238953, "learning_rate": 2.4540161354870714e-05, "loss": 0.1482, "step": 13713 }, { "epoch": 0.30219196042462004, "grad_norm": 0.9517788290977478, "learning_rate": 2.453933521975272e-05, "loss": 0.1146, "step": 13714 }, { "epoch": 0.3022139957141362, "grad_norm": 0.7482877373695374, "learning_rate": 2.453850903604578e-05, "loss": 0.0881, "step": 13715 }, { "epoch": 0.3022360310036524, "grad_norm": 0.8614257574081421, "learning_rate": 2.4537682803754095e-05, "loss": 0.115, "step": 13716 }, { "epoch": 0.30225806629316854, "grad_norm": 1.437992811203003, "learning_rate": 2.4536856522881877e-05, "loss": 0.1056, "step": 13717 }, { "epoch": 0.30228010158268465, "grad_norm": 0.7732852697372437, "learning_rate": 2.4536030193433335e-05, "loss": 0.133, "step": 13718 }, { "epoch": 0.3023021368722008, "grad_norm": 0.7346112132072449, "learning_rate": 2.453520381541268e-05, "loss": 0.0951, "step": 13719 }, { "epoch": 0.302324172161717, "grad_norm": 0.7381201386451721, "learning_rate": 2.453437738882412e-05, "loss": 0.1254, "step": 13720 }, { "epoch": 0.30234620745123314, "grad_norm": 0.6322768926620483, "learning_rate": 2.453355091367186e-05, "loss": 0.1124, "step": 13721 }, { "epoch": 0.3023682427407493, "grad_norm": 0.834348201751709, "learning_rate": 2.4532724389960114e-05, "loss": 0.0866, "step": 13722 }, { "epoch": 0.30239027803026547, "grad_norm": 0.655997633934021, "learning_rate": 2.4531897817693088e-05, "loss": 0.1252, "step": 13723 }, { "epoch": 0.30241231331978163, "grad_norm": 0.663987398147583, "learning_rate": 2.4531071196874998e-05, "loss": 0.0927, "step": 13724 }, { "epoch": 0.3024343486092978, "grad_norm": 0.7638411521911621, "learning_rate": 2.453024452751005e-05, "loss": 0.0994, "step": 13725 }, { "epoch": 0.30245638389881396, "grad_norm": 0.48878246545791626, "learning_rate": 2.452941780960246e-05, "loss": 0.042, "step": 13726 }, { "epoch": 0.3024784191883301, "grad_norm": 0.8219462037086487, "learning_rate": 2.4528591043156427e-05, "loss": 0.0941, "step": 13727 }, { "epoch": 0.3025004544778463, "grad_norm": 0.43184006214141846, "learning_rate": 2.452776422817618e-05, "loss": 0.0703, "step": 13728 }, { "epoch": 0.30252248976736246, "grad_norm": 0.8119638562202454, "learning_rate": 2.452693736466591e-05, "loss": 0.1404, "step": 13729 }, { "epoch": 0.30254452505687857, "grad_norm": 0.6108893156051636, "learning_rate": 2.4526110452629846e-05, "loss": 0.1088, "step": 13730 }, { "epoch": 0.30256656034639473, "grad_norm": 0.8492932915687561, "learning_rate": 2.4525283492072194e-05, "loss": 0.0883, "step": 13731 }, { "epoch": 0.3025885956359109, "grad_norm": 0.6255626082420349, "learning_rate": 2.4524456482997162e-05, "loss": 0.1322, "step": 13732 }, { "epoch": 0.30261063092542706, "grad_norm": 0.8591809272766113, "learning_rate": 2.4523629425408968e-05, "loss": 0.0919, "step": 13733 }, { "epoch": 0.3026326662149432, "grad_norm": 0.6311051249504089, "learning_rate": 2.452280231931182e-05, "loss": 0.0911, "step": 13734 }, { "epoch": 0.3026547015044594, "grad_norm": 1.2268234491348267, "learning_rate": 2.4521975164709936e-05, "loss": 0.1185, "step": 13735 }, { "epoch": 0.30267673679397555, "grad_norm": 0.6088860630989075, "learning_rate": 2.4521147961607523e-05, "loss": 0.0643, "step": 13736 }, { "epoch": 0.3026987720834917, "grad_norm": 0.6898157000541687, "learning_rate": 2.45203207100088e-05, "loss": 0.1007, "step": 13737 }, { "epoch": 0.3027208073730079, "grad_norm": 1.1997807025909424, "learning_rate": 2.4519493409917982e-05, "loss": 0.0931, "step": 13738 }, { "epoch": 0.30274284266252405, "grad_norm": 0.5468133687973022, "learning_rate": 2.4518666061339273e-05, "loss": 0.0833, "step": 13739 }, { "epoch": 0.3027648779520402, "grad_norm": 0.9771739840507507, "learning_rate": 2.4517838664276897e-05, "loss": 0.082, "step": 13740 }, { "epoch": 0.3027869132415564, "grad_norm": 0.8242700695991516, "learning_rate": 2.4517011218735067e-05, "loss": 0.0849, "step": 13741 }, { "epoch": 0.3028089485310725, "grad_norm": 0.7524100542068481, "learning_rate": 2.4516183724717992e-05, "loss": 0.0931, "step": 13742 }, { "epoch": 0.30283098382058865, "grad_norm": 0.9482688307762146, "learning_rate": 2.4515356182229888e-05, "loss": 0.0837, "step": 13743 }, { "epoch": 0.3028530191101048, "grad_norm": 0.8379777073860168, "learning_rate": 2.4514528591274975e-05, "loss": 0.1102, "step": 13744 }, { "epoch": 0.302875054399621, "grad_norm": 0.8575495481491089, "learning_rate": 2.451370095185747e-05, "loss": 0.0694, "step": 13745 }, { "epoch": 0.30289708968913714, "grad_norm": 0.6002458930015564, "learning_rate": 2.4512873263981575e-05, "loss": 0.1299, "step": 13746 }, { "epoch": 0.3029191249786533, "grad_norm": 1.2580729722976685, "learning_rate": 2.4512045527651522e-05, "loss": 0.1052, "step": 13747 }, { "epoch": 0.3029411602681695, "grad_norm": 0.7062817215919495, "learning_rate": 2.451121774287152e-05, "loss": 0.0951, "step": 13748 }, { "epoch": 0.30296319555768564, "grad_norm": 0.8995420932769775, "learning_rate": 2.4510389909645784e-05, "loss": 0.1031, "step": 13749 }, { "epoch": 0.3029852308472018, "grad_norm": 0.5929905772209167, "learning_rate": 2.4509562027978534e-05, "loss": 0.0822, "step": 13750 }, { "epoch": 0.30300726613671797, "grad_norm": 0.37116575241088867, "learning_rate": 2.450873409787398e-05, "loss": 0.0856, "step": 13751 }, { "epoch": 0.30302930142623413, "grad_norm": 0.9083079099655151, "learning_rate": 2.450790611933635e-05, "loss": 0.0654, "step": 13752 }, { "epoch": 0.3030513367157503, "grad_norm": 0.5999420881271362, "learning_rate": 2.4507078092369855e-05, "loss": 0.0961, "step": 13753 }, { "epoch": 0.30307337200526646, "grad_norm": 0.8754787445068359, "learning_rate": 2.4506250016978714e-05, "loss": 0.0843, "step": 13754 }, { "epoch": 0.30309540729478257, "grad_norm": 0.6951744556427002, "learning_rate": 2.450542189316714e-05, "loss": 0.0696, "step": 13755 }, { "epoch": 0.30311744258429874, "grad_norm": 0.7151210308074951, "learning_rate": 2.450459372093936e-05, "loss": 0.1086, "step": 13756 }, { "epoch": 0.3031394778738149, "grad_norm": 1.1975159645080566, "learning_rate": 2.4503765500299582e-05, "loss": 0.0971, "step": 13757 }, { "epoch": 0.30316151316333106, "grad_norm": 0.8030954003334045, "learning_rate": 2.4502937231252033e-05, "loss": 0.0787, "step": 13758 }, { "epoch": 0.30318354845284723, "grad_norm": 0.5015662312507629, "learning_rate": 2.450210891380093e-05, "loss": 0.1097, "step": 13759 }, { "epoch": 0.3032055837423634, "grad_norm": 0.9276918768882751, "learning_rate": 2.4501280547950486e-05, "loss": 0.1516, "step": 13760 }, { "epoch": 0.30322761903187956, "grad_norm": 0.806801974773407, "learning_rate": 2.450045213370493e-05, "loss": 0.0785, "step": 13761 }, { "epoch": 0.3032496543213957, "grad_norm": 0.8913671970367432, "learning_rate": 2.4499623671068478e-05, "loss": 0.1112, "step": 13762 }, { "epoch": 0.3032716896109119, "grad_norm": 1.3950682878494263, "learning_rate": 2.4498795160045344e-05, "loss": 0.1007, "step": 13763 }, { "epoch": 0.30329372490042805, "grad_norm": 0.7830260396003723, "learning_rate": 2.4497966600639755e-05, "loss": 0.0844, "step": 13764 }, { "epoch": 0.3033157601899442, "grad_norm": 0.8077616691589355, "learning_rate": 2.4497137992855928e-05, "loss": 0.1429, "step": 13765 }, { "epoch": 0.3033377954794604, "grad_norm": 0.8546079397201538, "learning_rate": 2.4496309336698083e-05, "loss": 0.1017, "step": 13766 }, { "epoch": 0.3033598307689765, "grad_norm": 1.2658268213272095, "learning_rate": 2.4495480632170445e-05, "loss": 0.1231, "step": 13767 }, { "epoch": 0.30338186605849266, "grad_norm": 0.6157722473144531, "learning_rate": 2.4494651879277232e-05, "loss": 0.1024, "step": 13768 }, { "epoch": 0.3034039013480088, "grad_norm": 0.601948082447052, "learning_rate": 2.4493823078022663e-05, "loss": 0.0861, "step": 13769 }, { "epoch": 0.303425936637525, "grad_norm": 0.517540454864502, "learning_rate": 2.4492994228410963e-05, "loss": 0.1171, "step": 13770 }, { "epoch": 0.30344797192704115, "grad_norm": 0.8214406967163086, "learning_rate": 2.4492165330446353e-05, "loss": 0.0854, "step": 13771 }, { "epoch": 0.3034700072165573, "grad_norm": 1.0482841730117798, "learning_rate": 2.4491336384133056e-05, "loss": 0.101, "step": 13772 }, { "epoch": 0.3034920425060735, "grad_norm": 0.6263948678970337, "learning_rate": 2.449050738947529e-05, "loss": 0.0909, "step": 13773 }, { "epoch": 0.30351407779558964, "grad_norm": 0.8056332468986511, "learning_rate": 2.4489678346477286e-05, "loss": 0.08, "step": 13774 }, { "epoch": 0.3035361130851058, "grad_norm": 1.2873575687408447, "learning_rate": 2.4488849255143257e-05, "loss": 0.1027, "step": 13775 }, { "epoch": 0.30355814837462197, "grad_norm": 0.6921677589416504, "learning_rate": 2.4488020115477436e-05, "loss": 0.0792, "step": 13776 }, { "epoch": 0.30358018366413814, "grad_norm": 0.6969487071037292, "learning_rate": 2.4487190927484036e-05, "loss": 0.0759, "step": 13777 }, { "epoch": 0.3036022189536543, "grad_norm": 0.7899941802024841, "learning_rate": 2.4486361691167284e-05, "loss": 0.0774, "step": 13778 }, { "epoch": 0.3036242542431704, "grad_norm": 0.6703488230705261, "learning_rate": 2.448553240653141e-05, "loss": 0.0767, "step": 13779 }, { "epoch": 0.3036462895326866, "grad_norm": 0.9637462496757507, "learning_rate": 2.448470307358063e-05, "loss": 0.068, "step": 13780 }, { "epoch": 0.30366832482220274, "grad_norm": 0.9769481420516968, "learning_rate": 2.4483873692319168e-05, "loss": 0.1155, "step": 13781 }, { "epoch": 0.3036903601117189, "grad_norm": 0.6623464226722717, "learning_rate": 2.4483044262751256e-05, "loss": 0.0822, "step": 13782 }, { "epoch": 0.30371239540123507, "grad_norm": 0.591699481010437, "learning_rate": 2.448221478488111e-05, "loss": 0.0751, "step": 13783 }, { "epoch": 0.30373443069075123, "grad_norm": 1.0754683017730713, "learning_rate": 2.4481385258712968e-05, "loss": 0.1054, "step": 13784 }, { "epoch": 0.3037564659802674, "grad_norm": 1.0996278524398804, "learning_rate": 2.4480555684251037e-05, "loss": 0.0721, "step": 13785 }, { "epoch": 0.30377850126978356, "grad_norm": 0.6302517056465149, "learning_rate": 2.447972606149956e-05, "loss": 0.1017, "step": 13786 }, { "epoch": 0.3038005365592997, "grad_norm": 0.7058700323104858, "learning_rate": 2.4478896390462747e-05, "loss": 0.1026, "step": 13787 }, { "epoch": 0.3038225718488159, "grad_norm": 0.7948753833770752, "learning_rate": 2.4478066671144834e-05, "loss": 0.1207, "step": 13788 }, { "epoch": 0.30384460713833206, "grad_norm": 0.9017615914344788, "learning_rate": 2.4477236903550047e-05, "loss": 0.0873, "step": 13789 }, { "epoch": 0.3038666424278482, "grad_norm": 0.6168756484985352, "learning_rate": 2.4476407087682605e-05, "loss": 0.0519, "step": 13790 }, { "epoch": 0.3038886777173644, "grad_norm": 0.7696450352668762, "learning_rate": 2.447557722354675e-05, "loss": 0.0924, "step": 13791 }, { "epoch": 0.3039107130068805, "grad_norm": 0.6447020769119263, "learning_rate": 2.447474731114669e-05, "loss": 0.0905, "step": 13792 }, { "epoch": 0.30393274829639666, "grad_norm": 0.7770545482635498, "learning_rate": 2.4473917350486668e-05, "loss": 0.0828, "step": 13793 }, { "epoch": 0.3039547835859128, "grad_norm": 0.9531611800193787, "learning_rate": 2.44730873415709e-05, "loss": 0.0752, "step": 13794 }, { "epoch": 0.303976818875429, "grad_norm": 0.5533004403114319, "learning_rate": 2.447225728440362e-05, "loss": 0.0873, "step": 13795 }, { "epoch": 0.30399885416494515, "grad_norm": 0.6481887102127075, "learning_rate": 2.447142717898905e-05, "loss": 0.0936, "step": 13796 }, { "epoch": 0.3040208894544613, "grad_norm": 0.7625824213027954, "learning_rate": 2.447059702533143e-05, "loss": 0.1181, "step": 13797 }, { "epoch": 0.3040429247439775, "grad_norm": 0.6525274515151978, "learning_rate": 2.4469766823434973e-05, "loss": 0.097, "step": 13798 }, { "epoch": 0.30406496003349365, "grad_norm": 0.9323300123214722, "learning_rate": 2.446893657330392e-05, "loss": 0.0778, "step": 13799 }, { "epoch": 0.3040869953230098, "grad_norm": 0.7399901151657104, "learning_rate": 2.4468106274942497e-05, "loss": 0.1015, "step": 13800 }, { "epoch": 0.304109030612526, "grad_norm": 0.6788649559020996, "learning_rate": 2.446727592835493e-05, "loss": 0.0877, "step": 13801 }, { "epoch": 0.30413106590204214, "grad_norm": 0.7966964244842529, "learning_rate": 2.4466445533545447e-05, "loss": 0.1072, "step": 13802 }, { "epoch": 0.3041531011915583, "grad_norm": 0.693604588508606, "learning_rate": 2.4465615090518287e-05, "loss": 0.1011, "step": 13803 }, { "epoch": 0.3041751364810744, "grad_norm": 0.9511861205101013, "learning_rate": 2.446478459927767e-05, "loss": 0.1109, "step": 13804 }, { "epoch": 0.3041971717705906, "grad_norm": 0.6761671304702759, "learning_rate": 2.446395405982783e-05, "loss": 0.1075, "step": 13805 }, { "epoch": 0.30421920706010674, "grad_norm": 0.7686618566513062, "learning_rate": 2.4463123472172993e-05, "loss": 0.0957, "step": 13806 }, { "epoch": 0.3042412423496229, "grad_norm": 0.8886815905570984, "learning_rate": 2.44622928363174e-05, "loss": 0.117, "step": 13807 }, { "epoch": 0.3042632776391391, "grad_norm": 0.5953124761581421, "learning_rate": 2.4461462152265267e-05, "loss": 0.0862, "step": 13808 }, { "epoch": 0.30428531292865524, "grad_norm": 0.7111725211143494, "learning_rate": 2.446063142002084e-05, "loss": 0.1064, "step": 13809 }, { "epoch": 0.3043073482181714, "grad_norm": 0.7507372498512268, "learning_rate": 2.4459800639588347e-05, "loss": 0.0989, "step": 13810 }, { "epoch": 0.30432938350768757, "grad_norm": 0.7490376830101013, "learning_rate": 2.445896981097201e-05, "loss": 0.1155, "step": 13811 }, { "epoch": 0.30435141879720373, "grad_norm": 0.8932855725288391, "learning_rate": 2.445813893417607e-05, "loss": 0.1235, "step": 13812 }, { "epoch": 0.3043734540867199, "grad_norm": 1.0942373275756836, "learning_rate": 2.4457308009204753e-05, "loss": 0.1357, "step": 13813 }, { "epoch": 0.30439548937623606, "grad_norm": 1.0646706819534302, "learning_rate": 2.44564770360623e-05, "loss": 0.1222, "step": 13814 }, { "epoch": 0.3044175246657522, "grad_norm": 0.7092656493186951, "learning_rate": 2.4455646014752938e-05, "loss": 0.1121, "step": 13815 }, { "epoch": 0.30443955995526834, "grad_norm": 0.6926553845405579, "learning_rate": 2.44548149452809e-05, "loss": 0.1, "step": 13816 }, { "epoch": 0.3044615952447845, "grad_norm": 0.749838650226593, "learning_rate": 2.4453983827650418e-05, "loss": 0.124, "step": 13817 }, { "epoch": 0.30448363053430066, "grad_norm": 0.8862075805664062, "learning_rate": 2.4453152661865728e-05, "loss": 0.136, "step": 13818 }, { "epoch": 0.30450566582381683, "grad_norm": 0.585923433303833, "learning_rate": 2.4452321447931057e-05, "loss": 0.0776, "step": 13819 }, { "epoch": 0.304527701113333, "grad_norm": 0.8058111071586609, "learning_rate": 2.4451490185850654e-05, "loss": 0.1138, "step": 13820 }, { "epoch": 0.30454973640284916, "grad_norm": 0.780501663684845, "learning_rate": 2.4450658875628737e-05, "loss": 0.0974, "step": 13821 }, { "epoch": 0.3045717716923653, "grad_norm": 0.806968629360199, "learning_rate": 2.4449827517269548e-05, "loss": 0.0927, "step": 13822 }, { "epoch": 0.3045938069818815, "grad_norm": 1.3055003881454468, "learning_rate": 2.444899611077732e-05, "loss": 0.112, "step": 13823 }, { "epoch": 0.30461584227139765, "grad_norm": 0.636078417301178, "learning_rate": 2.4448164656156284e-05, "loss": 0.1278, "step": 13824 }, { "epoch": 0.3046378775609138, "grad_norm": 0.962710976600647, "learning_rate": 2.4447333153410683e-05, "loss": 0.1083, "step": 13825 }, { "epoch": 0.30465991285043, "grad_norm": 0.6300520300865173, "learning_rate": 2.4446501602544747e-05, "loss": 0.0832, "step": 13826 }, { "epoch": 0.30468194813994615, "grad_norm": 1.1673004627227783, "learning_rate": 2.4445670003562712e-05, "loss": 0.0955, "step": 13827 }, { "epoch": 0.3047039834294623, "grad_norm": 0.7305272221565247, "learning_rate": 2.444483835646882e-05, "loss": 0.0955, "step": 13828 }, { "epoch": 0.3047260187189784, "grad_norm": 0.8095971941947937, "learning_rate": 2.4444006661267294e-05, "loss": 0.0957, "step": 13829 }, { "epoch": 0.3047480540084946, "grad_norm": 0.8107330203056335, "learning_rate": 2.444317491796238e-05, "loss": 0.0915, "step": 13830 }, { "epoch": 0.30477008929801075, "grad_norm": 0.886501133441925, "learning_rate": 2.4442343126558312e-05, "loss": 0.09, "step": 13831 }, { "epoch": 0.3047921245875269, "grad_norm": 0.4194563925266266, "learning_rate": 2.444151128705933e-05, "loss": 0.0951, "step": 13832 }, { "epoch": 0.3048141598770431, "grad_norm": 0.42356395721435547, "learning_rate": 2.444067939946967e-05, "loss": 0.0942, "step": 13833 }, { "epoch": 0.30483619516655924, "grad_norm": 0.6812066435813904, "learning_rate": 2.4439847463793562e-05, "loss": 0.0844, "step": 13834 }, { "epoch": 0.3048582304560754, "grad_norm": 0.7139016389846802, "learning_rate": 2.443901548003525e-05, "loss": 0.1142, "step": 13835 }, { "epoch": 0.30488026574559157, "grad_norm": 0.6935765743255615, "learning_rate": 2.4438183448198972e-05, "loss": 0.0911, "step": 13836 }, { "epoch": 0.30490230103510774, "grad_norm": 1.0799975395202637, "learning_rate": 2.4437351368288963e-05, "loss": 0.117, "step": 13837 }, { "epoch": 0.3049243363246239, "grad_norm": 0.8516343235969543, "learning_rate": 2.4436519240309462e-05, "loss": 0.1198, "step": 13838 }, { "epoch": 0.30494637161414007, "grad_norm": 0.9484459161758423, "learning_rate": 2.443568706426471e-05, "loss": 0.1348, "step": 13839 }, { "epoch": 0.30496840690365623, "grad_norm": 0.6390947103500366, "learning_rate": 2.4434854840158942e-05, "loss": 0.0761, "step": 13840 }, { "epoch": 0.30499044219317234, "grad_norm": 0.636478841304779, "learning_rate": 2.44340225679964e-05, "loss": 0.0749, "step": 13841 }, { "epoch": 0.3050124774826885, "grad_norm": 0.8754390478134155, "learning_rate": 2.4433190247781323e-05, "loss": 0.0965, "step": 13842 }, { "epoch": 0.30503451277220467, "grad_norm": 0.8745808601379395, "learning_rate": 2.443235787951795e-05, "loss": 0.0927, "step": 13843 }, { "epoch": 0.30505654806172083, "grad_norm": 0.6046476364135742, "learning_rate": 2.443152546321052e-05, "loss": 0.0881, "step": 13844 }, { "epoch": 0.305078583351237, "grad_norm": 0.8315587639808655, "learning_rate": 2.4430692998863276e-05, "loss": 0.1062, "step": 13845 }, { "epoch": 0.30510061864075316, "grad_norm": 0.8999134302139282, "learning_rate": 2.442986048648045e-05, "loss": 0.1345, "step": 13846 }, { "epoch": 0.3051226539302693, "grad_norm": 0.9254477024078369, "learning_rate": 2.442902792606629e-05, "loss": 0.0909, "step": 13847 }, { "epoch": 0.3051446892197855, "grad_norm": 0.8805665373802185, "learning_rate": 2.4428195317625037e-05, "loss": 0.1223, "step": 13848 }, { "epoch": 0.30516672450930166, "grad_norm": 0.605819821357727, "learning_rate": 2.4427362661160924e-05, "loss": 0.1037, "step": 13849 }, { "epoch": 0.3051887597988178, "grad_norm": 0.9906567931175232, "learning_rate": 2.4426529956678205e-05, "loss": 0.1004, "step": 13850 }, { "epoch": 0.305210795088334, "grad_norm": 0.6840834617614746, "learning_rate": 2.4425697204181108e-05, "loss": 0.1068, "step": 13851 }, { "epoch": 0.30523283037785015, "grad_norm": 0.5229073762893677, "learning_rate": 2.4424864403673882e-05, "loss": 0.0779, "step": 13852 }, { "epoch": 0.30525486566736626, "grad_norm": 0.7764260172843933, "learning_rate": 2.4424031555160766e-05, "loss": 0.1004, "step": 13853 }, { "epoch": 0.3052769009568824, "grad_norm": 0.7041639089584351, "learning_rate": 2.4423198658646004e-05, "loss": 0.0632, "step": 13854 }, { "epoch": 0.3052989362463986, "grad_norm": 0.6668650507926941, "learning_rate": 2.4422365714133843e-05, "loss": 0.0995, "step": 13855 }, { "epoch": 0.30532097153591475, "grad_norm": 0.5438739061355591, "learning_rate": 2.4421532721628517e-05, "loss": 0.0812, "step": 13856 }, { "epoch": 0.3053430068254309, "grad_norm": 0.508522629737854, "learning_rate": 2.4420699681134272e-05, "loss": 0.0649, "step": 13857 }, { "epoch": 0.3053650421149471, "grad_norm": 0.5550470948219299, "learning_rate": 2.441986659265535e-05, "loss": 0.0701, "step": 13858 }, { "epoch": 0.30538707740446325, "grad_norm": 0.6887676119804382, "learning_rate": 2.4419033456196002e-05, "loss": 0.1182, "step": 13859 }, { "epoch": 0.3054091126939794, "grad_norm": 0.36169397830963135, "learning_rate": 2.4418200271760465e-05, "loss": 0.0827, "step": 13860 }, { "epoch": 0.3054311479834956, "grad_norm": 0.6719381809234619, "learning_rate": 2.441736703935298e-05, "loss": 0.0931, "step": 13861 }, { "epoch": 0.30545318327301174, "grad_norm": 0.7861025333404541, "learning_rate": 2.4416533758977797e-05, "loss": 0.0593, "step": 13862 }, { "epoch": 0.3054752185625279, "grad_norm": 0.7112718224525452, "learning_rate": 2.4415700430639154e-05, "loss": 0.1017, "step": 13863 }, { "epoch": 0.30549725385204407, "grad_norm": 0.7731306552886963, "learning_rate": 2.4414867054341306e-05, "loss": 0.099, "step": 13864 }, { "epoch": 0.30551928914156024, "grad_norm": 0.6217331290245056, "learning_rate": 2.4414033630088484e-05, "loss": 0.1089, "step": 13865 }, { "epoch": 0.30554132443107634, "grad_norm": 0.6171731352806091, "learning_rate": 2.4413200157884948e-05, "loss": 0.0929, "step": 13866 }, { "epoch": 0.3055633597205925, "grad_norm": 0.7090082764625549, "learning_rate": 2.441236663773493e-05, "loss": 0.0769, "step": 13867 }, { "epoch": 0.3055853950101087, "grad_norm": 0.7381675243377686, "learning_rate": 2.4411533069642685e-05, "loss": 0.1228, "step": 13868 }, { "epoch": 0.30560743029962484, "grad_norm": 0.6654790043830872, "learning_rate": 2.4410699453612453e-05, "loss": 0.0781, "step": 13869 }, { "epoch": 0.305629465589141, "grad_norm": 0.7012090682983398, "learning_rate": 2.4409865789648484e-05, "loss": 0.098, "step": 13870 }, { "epoch": 0.30565150087865717, "grad_norm": 0.6772254705429077, "learning_rate": 2.4409032077755023e-05, "loss": 0.109, "step": 13871 }, { "epoch": 0.30567353616817333, "grad_norm": 0.6493242979049683, "learning_rate": 2.4408198317936316e-05, "loss": 0.0784, "step": 13872 }, { "epoch": 0.3056955714576895, "grad_norm": 0.5840228796005249, "learning_rate": 2.4407364510196605e-05, "loss": 0.0968, "step": 13873 }, { "epoch": 0.30571760674720566, "grad_norm": 0.6314476728439331, "learning_rate": 2.4406530654540146e-05, "loss": 0.0604, "step": 13874 }, { "epoch": 0.3057396420367218, "grad_norm": 0.8179489374160767, "learning_rate": 2.4405696750971183e-05, "loss": 0.141, "step": 13875 }, { "epoch": 0.305761677326238, "grad_norm": 0.5866951942443848, "learning_rate": 2.4404862799493962e-05, "loss": 0.0816, "step": 13876 }, { "epoch": 0.30578371261575416, "grad_norm": 0.5563718676567078, "learning_rate": 2.440402880011273e-05, "loss": 0.0902, "step": 13877 }, { "epoch": 0.30580574790527026, "grad_norm": 0.9142704606056213, "learning_rate": 2.440319475283174e-05, "loss": 0.0942, "step": 13878 }, { "epoch": 0.30582778319478643, "grad_norm": 0.6713523268699646, "learning_rate": 2.440236065765523e-05, "loss": 0.0751, "step": 13879 }, { "epoch": 0.3058498184843026, "grad_norm": 0.5379669070243835, "learning_rate": 2.440152651458746e-05, "loss": 0.0723, "step": 13880 }, { "epoch": 0.30587185377381876, "grad_norm": 0.9074822068214417, "learning_rate": 2.4400692323632675e-05, "loss": 0.1136, "step": 13881 }, { "epoch": 0.3058938890633349, "grad_norm": 0.6399339437484741, "learning_rate": 2.4399858084795125e-05, "loss": 0.079, "step": 13882 }, { "epoch": 0.3059159243528511, "grad_norm": 0.756743311882019, "learning_rate": 2.4399023798079052e-05, "loss": 0.1018, "step": 13883 }, { "epoch": 0.30593795964236725, "grad_norm": 0.9023784399032593, "learning_rate": 2.439818946348871e-05, "loss": 0.1143, "step": 13884 }, { "epoch": 0.3059599949318834, "grad_norm": 0.5179248452186584, "learning_rate": 2.4397355081028352e-05, "loss": 0.0853, "step": 13885 }, { "epoch": 0.3059820302213996, "grad_norm": 0.7805225253105164, "learning_rate": 2.4396520650702227e-05, "loss": 0.0908, "step": 13886 }, { "epoch": 0.30600406551091575, "grad_norm": 0.7822652459144592, "learning_rate": 2.439568617251458e-05, "loss": 0.0951, "step": 13887 }, { "epoch": 0.3060261008004319, "grad_norm": 0.6683124303817749, "learning_rate": 2.4394851646469666e-05, "loss": 0.0982, "step": 13888 }, { "epoch": 0.3060481360899481, "grad_norm": 0.6226552128791809, "learning_rate": 2.4394017072571733e-05, "loss": 0.0647, "step": 13889 }, { "epoch": 0.3060701713794642, "grad_norm": 0.5766825079917908, "learning_rate": 2.439318245082504e-05, "loss": 0.0846, "step": 13890 }, { "epoch": 0.30609220666898035, "grad_norm": 0.6943331956863403, "learning_rate": 2.4392347781233827e-05, "loss": 0.0766, "step": 13891 }, { "epoch": 0.3061142419584965, "grad_norm": 1.247715950012207, "learning_rate": 2.4391513063802348e-05, "loss": 0.1075, "step": 13892 }, { "epoch": 0.3061362772480127, "grad_norm": 1.0808444023132324, "learning_rate": 2.439067829853486e-05, "loss": 0.1438, "step": 13893 }, { "epoch": 0.30615831253752884, "grad_norm": 0.6333097219467163, "learning_rate": 2.438984348543561e-05, "loss": 0.0927, "step": 13894 }, { "epoch": 0.306180347827045, "grad_norm": 0.9367178678512573, "learning_rate": 2.4389008624508856e-05, "loss": 0.0782, "step": 13895 }, { "epoch": 0.30620238311656117, "grad_norm": 0.566206157207489, "learning_rate": 2.4388173715758843e-05, "loss": 0.0856, "step": 13896 }, { "epoch": 0.30622441840607734, "grad_norm": 0.8915623426437378, "learning_rate": 2.4387338759189827e-05, "loss": 0.0947, "step": 13897 }, { "epoch": 0.3062464536955935, "grad_norm": 1.1429132223129272, "learning_rate": 2.438650375480606e-05, "loss": 0.1215, "step": 13898 }, { "epoch": 0.30626848898510967, "grad_norm": 0.9256459474563599, "learning_rate": 2.4385668702611795e-05, "loss": 0.111, "step": 13899 }, { "epoch": 0.30629052427462583, "grad_norm": 0.8828682899475098, "learning_rate": 2.438483360261129e-05, "loss": 0.0838, "step": 13900 }, { "epoch": 0.306312559564142, "grad_norm": 0.48158547282218933, "learning_rate": 2.438399845480879e-05, "loss": 0.083, "step": 13901 }, { "epoch": 0.30633459485365816, "grad_norm": 1.4263951778411865, "learning_rate": 2.438316325920856e-05, "loss": 0.1374, "step": 13902 }, { "epoch": 0.30635663014317427, "grad_norm": 1.1294811964035034, "learning_rate": 2.4382328015814846e-05, "loss": 0.0917, "step": 13903 }, { "epoch": 0.30637866543269043, "grad_norm": 0.5665521025657654, "learning_rate": 2.43814927246319e-05, "loss": 0.1022, "step": 13904 }, { "epoch": 0.3064007007222066, "grad_norm": 0.8658581376075745, "learning_rate": 2.4380657385663985e-05, "loss": 0.1039, "step": 13905 }, { "epoch": 0.30642273601172276, "grad_norm": 0.7573450207710266, "learning_rate": 2.4379821998915354e-05, "loss": 0.0821, "step": 13906 }, { "epoch": 0.3064447713012389, "grad_norm": 0.6564881205558777, "learning_rate": 2.4378986564390257e-05, "loss": 0.0885, "step": 13907 }, { "epoch": 0.3064668065907551, "grad_norm": 0.7190157175064087, "learning_rate": 2.437815108209295e-05, "loss": 0.0794, "step": 13908 }, { "epoch": 0.30648884188027126, "grad_norm": 0.6808413863182068, "learning_rate": 2.4377315552027688e-05, "loss": 0.1067, "step": 13909 }, { "epoch": 0.3065108771697874, "grad_norm": 0.6015639901161194, "learning_rate": 2.4376479974198738e-05, "loss": 0.0853, "step": 13910 }, { "epoch": 0.3065329124593036, "grad_norm": 1.0083366632461548, "learning_rate": 2.4375644348610343e-05, "loss": 0.1091, "step": 13911 }, { "epoch": 0.30655494774881975, "grad_norm": 0.7017619609832764, "learning_rate": 2.4374808675266766e-05, "loss": 0.1263, "step": 13912 }, { "epoch": 0.3065769830383359, "grad_norm": 1.1022255420684814, "learning_rate": 2.4373972954172255e-05, "loss": 0.0973, "step": 13913 }, { "epoch": 0.3065990183278521, "grad_norm": 0.7649087309837341, "learning_rate": 2.4373137185331077e-05, "loss": 0.0802, "step": 13914 }, { "epoch": 0.3066210536173682, "grad_norm": 0.6411373615264893, "learning_rate": 2.4372301368747486e-05, "loss": 0.057, "step": 13915 }, { "epoch": 0.30664308890688435, "grad_norm": 0.5807182192802429, "learning_rate": 2.4371465504425735e-05, "loss": 0.0705, "step": 13916 }, { "epoch": 0.3066651241964005, "grad_norm": 0.7460898160934448, "learning_rate": 2.4370629592370085e-05, "loss": 0.101, "step": 13917 }, { "epoch": 0.3066871594859167, "grad_norm": 0.9419075846672058, "learning_rate": 2.4369793632584797e-05, "loss": 0.0867, "step": 13918 }, { "epoch": 0.30670919477543285, "grad_norm": 0.854066014289856, "learning_rate": 2.4368957625074122e-05, "loss": 0.0859, "step": 13919 }, { "epoch": 0.306731230064949, "grad_norm": 0.8836999535560608, "learning_rate": 2.436812156984233e-05, "loss": 0.1152, "step": 13920 }, { "epoch": 0.3067532653544652, "grad_norm": 1.100538969039917, "learning_rate": 2.436728546689366e-05, "loss": 0.1364, "step": 13921 }, { "epoch": 0.30677530064398134, "grad_norm": 0.57133549451828, "learning_rate": 2.436644931623239e-05, "loss": 0.1084, "step": 13922 }, { "epoch": 0.3067973359334975, "grad_norm": 0.7080347537994385, "learning_rate": 2.4365613117862766e-05, "loss": 0.0817, "step": 13923 }, { "epoch": 0.30681937122301367, "grad_norm": 0.7678402066230774, "learning_rate": 2.436477687178905e-05, "loss": 0.0593, "step": 13924 }, { "epoch": 0.30684140651252984, "grad_norm": 0.5315178036689758, "learning_rate": 2.436394057801551e-05, "loss": 0.0775, "step": 13925 }, { "epoch": 0.306863441802046, "grad_norm": 0.9707676768302917, "learning_rate": 2.4363104236546397e-05, "loss": 0.1128, "step": 13926 }, { "epoch": 0.30688547709156216, "grad_norm": 0.592370331287384, "learning_rate": 2.4362267847385972e-05, "loss": 0.1006, "step": 13927 }, { "epoch": 0.3069075123810783, "grad_norm": 0.995147168636322, "learning_rate": 2.4361431410538494e-05, "loss": 0.1509, "step": 13928 }, { "epoch": 0.30692954767059444, "grad_norm": 1.1000422239303589, "learning_rate": 2.436059492600823e-05, "loss": 0.0773, "step": 13929 }, { "epoch": 0.3069515829601106, "grad_norm": 1.043059229850769, "learning_rate": 2.435975839379944e-05, "loss": 0.0932, "step": 13930 }, { "epoch": 0.30697361824962677, "grad_norm": 0.716379702091217, "learning_rate": 2.4358921813916374e-05, "loss": 0.1, "step": 13931 }, { "epoch": 0.30699565353914293, "grad_norm": 1.1008933782577515, "learning_rate": 2.4358085186363307e-05, "loss": 0.1194, "step": 13932 }, { "epoch": 0.3070176888286591, "grad_norm": 0.6440427899360657, "learning_rate": 2.435724851114449e-05, "loss": 0.0816, "step": 13933 }, { "epoch": 0.30703972411817526, "grad_norm": 0.7485318183898926, "learning_rate": 2.435641178826419e-05, "loss": 0.1257, "step": 13934 }, { "epoch": 0.3070617594076914, "grad_norm": 1.233289122581482, "learning_rate": 2.4355575017726668e-05, "loss": 0.1141, "step": 13935 }, { "epoch": 0.3070837946972076, "grad_norm": 1.5261863470077515, "learning_rate": 2.435473819953618e-05, "loss": 0.1012, "step": 13936 }, { "epoch": 0.30710582998672376, "grad_norm": 0.9777770638465881, "learning_rate": 2.4353901333697002e-05, "loss": 0.0986, "step": 13937 }, { "epoch": 0.3071278652762399, "grad_norm": 1.0536283254623413, "learning_rate": 2.4353064420213384e-05, "loss": 0.1222, "step": 13938 }, { "epoch": 0.3071499005657561, "grad_norm": 0.9380987286567688, "learning_rate": 2.435222745908959e-05, "loss": 0.0625, "step": 13939 }, { "epoch": 0.3071719358552722, "grad_norm": 0.5928642749786377, "learning_rate": 2.4351390450329892e-05, "loss": 0.0973, "step": 13940 }, { "epoch": 0.30719397114478836, "grad_norm": 0.8432943224906921, "learning_rate": 2.435055339393855e-05, "loss": 0.118, "step": 13941 }, { "epoch": 0.3072160064343045, "grad_norm": 0.7522554993629456, "learning_rate": 2.434971628991982e-05, "loss": 0.0791, "step": 13942 }, { "epoch": 0.3072380417238207, "grad_norm": 0.3600004017353058, "learning_rate": 2.4348879138277972e-05, "loss": 0.0604, "step": 13943 }, { "epoch": 0.30726007701333685, "grad_norm": 0.814989447593689, "learning_rate": 2.434804193901727e-05, "loss": 0.0701, "step": 13944 }, { "epoch": 0.307282112302853, "grad_norm": 0.6369028091430664, "learning_rate": 2.434720469214198e-05, "loss": 0.0932, "step": 13945 }, { "epoch": 0.3073041475923692, "grad_norm": 0.8828464150428772, "learning_rate": 2.4346367397656362e-05, "loss": 0.0767, "step": 13946 }, { "epoch": 0.30732618288188535, "grad_norm": 0.6154330372810364, "learning_rate": 2.4345530055564685e-05, "loss": 0.1062, "step": 13947 }, { "epoch": 0.3073482181714015, "grad_norm": 0.9428202509880066, "learning_rate": 2.4344692665871206e-05, "loss": 0.1125, "step": 13948 }, { "epoch": 0.3073702534609177, "grad_norm": 0.7305864691734314, "learning_rate": 2.43438552285802e-05, "loss": 0.1269, "step": 13949 }, { "epoch": 0.30739228875043384, "grad_norm": 0.5264191627502441, "learning_rate": 2.434301774369593e-05, "loss": 0.0856, "step": 13950 }, { "epoch": 0.30741432403995, "grad_norm": 0.6197468042373657, "learning_rate": 2.4342180211222662e-05, "loss": 0.0887, "step": 13951 }, { "epoch": 0.3074363593294661, "grad_norm": 1.0692826509475708, "learning_rate": 2.4341342631164658e-05, "loss": 0.1093, "step": 13952 }, { "epoch": 0.3074583946189823, "grad_norm": 0.642974853515625, "learning_rate": 2.4340505003526183e-05, "loss": 0.1024, "step": 13953 }, { "epoch": 0.30748042990849844, "grad_norm": 0.873161792755127, "learning_rate": 2.4339667328311515e-05, "loss": 0.0956, "step": 13954 }, { "epoch": 0.3075024651980146, "grad_norm": 0.6014338731765747, "learning_rate": 2.433882960552491e-05, "loss": 0.0744, "step": 13955 }, { "epoch": 0.30752450048753077, "grad_norm": 0.6999241709709167, "learning_rate": 2.4337991835170636e-05, "loss": 0.069, "step": 13956 }, { "epoch": 0.30754653577704694, "grad_norm": 0.49259674549102783, "learning_rate": 2.4337154017252962e-05, "loss": 0.0556, "step": 13957 }, { "epoch": 0.3075685710665631, "grad_norm": 0.5140793919563293, "learning_rate": 2.4336316151776158e-05, "loss": 0.0795, "step": 13958 }, { "epoch": 0.30759060635607927, "grad_norm": 0.4948965609073639, "learning_rate": 2.4335478238744484e-05, "loss": 0.0965, "step": 13959 }, { "epoch": 0.30761264164559543, "grad_norm": 0.7060368061065674, "learning_rate": 2.433464027816222e-05, "loss": 0.0642, "step": 13960 }, { "epoch": 0.3076346769351116, "grad_norm": 1.14213228225708, "learning_rate": 2.4333802270033625e-05, "loss": 0.0898, "step": 13961 }, { "epoch": 0.30765671222462776, "grad_norm": 0.6873607039451599, "learning_rate": 2.433296421436297e-05, "loss": 0.1061, "step": 13962 }, { "epoch": 0.3076787475141439, "grad_norm": 0.8555143475532532, "learning_rate": 2.4332126111154522e-05, "loss": 0.1328, "step": 13963 }, { "epoch": 0.3077007828036601, "grad_norm": 0.8892527222633362, "learning_rate": 2.4331287960412552e-05, "loss": 0.1207, "step": 13964 }, { "epoch": 0.3077228180931762, "grad_norm": 0.7223513722419739, "learning_rate": 2.433044976214133e-05, "loss": 0.098, "step": 13965 }, { "epoch": 0.30774485338269236, "grad_norm": 0.8491353988647461, "learning_rate": 2.432961151634512e-05, "loss": 0.0829, "step": 13966 }, { "epoch": 0.3077668886722085, "grad_norm": 0.6227455735206604, "learning_rate": 2.4328773223028203e-05, "loss": 0.1093, "step": 13967 }, { "epoch": 0.3077889239617247, "grad_norm": 0.6765183210372925, "learning_rate": 2.432793488219483e-05, "loss": 0.0775, "step": 13968 }, { "epoch": 0.30781095925124086, "grad_norm": 0.5443935990333557, "learning_rate": 2.4327096493849297e-05, "loss": 0.0721, "step": 13969 }, { "epoch": 0.307832994540757, "grad_norm": 0.9413245916366577, "learning_rate": 2.432625805799585e-05, "loss": 0.1006, "step": 13970 }, { "epoch": 0.3078550298302732, "grad_norm": 0.7838086485862732, "learning_rate": 2.432541957463877e-05, "loss": 0.0733, "step": 13971 }, { "epoch": 0.30787706511978935, "grad_norm": 0.7323838472366333, "learning_rate": 2.4324581043782332e-05, "loss": 0.0882, "step": 13972 }, { "epoch": 0.3078991004093055, "grad_norm": 0.6812787652015686, "learning_rate": 2.4323742465430802e-05, "loss": 0.0826, "step": 13973 }, { "epoch": 0.3079211356988217, "grad_norm": 0.4215805232524872, "learning_rate": 2.4322903839588445e-05, "loss": 0.0968, "step": 13974 }, { "epoch": 0.30794317098833784, "grad_norm": 0.7564852237701416, "learning_rate": 2.4322065166259547e-05, "loss": 0.087, "step": 13975 }, { "epoch": 0.307965206277854, "grad_norm": 0.6767787337303162, "learning_rate": 2.4321226445448374e-05, "loss": 0.0938, "step": 13976 }, { "epoch": 0.3079872415673701, "grad_norm": 0.7128503918647766, "learning_rate": 2.4320387677159188e-05, "loss": 0.0968, "step": 13977 }, { "epoch": 0.3080092768568863, "grad_norm": 1.0771653652191162, "learning_rate": 2.4319548861396272e-05, "loss": 0.0877, "step": 13978 }, { "epoch": 0.30803131214640245, "grad_norm": 0.7032403349876404, "learning_rate": 2.4318709998163902e-05, "loss": 0.0834, "step": 13979 }, { "epoch": 0.3080533474359186, "grad_norm": 0.6003203988075256, "learning_rate": 2.431787108746634e-05, "loss": 0.0852, "step": 13980 }, { "epoch": 0.3080753827254348, "grad_norm": 0.7616837024688721, "learning_rate": 2.4317032129307862e-05, "loss": 0.0787, "step": 13981 }, { "epoch": 0.30809741801495094, "grad_norm": 0.9866212010383606, "learning_rate": 2.4316193123692743e-05, "loss": 0.1288, "step": 13982 }, { "epoch": 0.3081194533044671, "grad_norm": 0.7239048480987549, "learning_rate": 2.4315354070625267e-05, "loss": 0.0959, "step": 13983 }, { "epoch": 0.30814148859398327, "grad_norm": 0.6891945600509644, "learning_rate": 2.4314514970109685e-05, "loss": 0.0805, "step": 13984 }, { "epoch": 0.30816352388349944, "grad_norm": 0.5774838328361511, "learning_rate": 2.431367582215029e-05, "loss": 0.0647, "step": 13985 }, { "epoch": 0.3081855591730156, "grad_norm": 0.5654079914093018, "learning_rate": 2.4312836626751344e-05, "loss": 0.1033, "step": 13986 }, { "epoch": 0.30820759446253176, "grad_norm": 0.628118097782135, "learning_rate": 2.4311997383917137e-05, "loss": 0.1312, "step": 13987 }, { "epoch": 0.30822962975204793, "grad_norm": 0.9151418209075928, "learning_rate": 2.4311158093651924e-05, "loss": 0.0694, "step": 13988 }, { "epoch": 0.30825166504156404, "grad_norm": 0.6100529432296753, "learning_rate": 2.4310318755959994e-05, "loss": 0.0834, "step": 13989 }, { "epoch": 0.3082737003310802, "grad_norm": 0.6961539387702942, "learning_rate": 2.430947937084562e-05, "loss": 0.1008, "step": 13990 }, { "epoch": 0.30829573562059637, "grad_norm": 0.7985739707946777, "learning_rate": 2.430863993831307e-05, "loss": 0.0938, "step": 13991 }, { "epoch": 0.30831777091011253, "grad_norm": 0.7448156476020813, "learning_rate": 2.430780045836663e-05, "loss": 0.0843, "step": 13992 }, { "epoch": 0.3083398061996287, "grad_norm": 1.1130732297897339, "learning_rate": 2.430696093101057e-05, "loss": 0.1235, "step": 13993 }, { "epoch": 0.30836184148914486, "grad_norm": 0.6504031419754028, "learning_rate": 2.4306121356249164e-05, "loss": 0.149, "step": 13994 }, { "epoch": 0.308383876778661, "grad_norm": 0.6465916633605957, "learning_rate": 2.4305281734086695e-05, "loss": 0.0957, "step": 13995 }, { "epoch": 0.3084059120681772, "grad_norm": 0.8554603457450867, "learning_rate": 2.4304442064527433e-05, "loss": 0.098, "step": 13996 }, { "epoch": 0.30842794735769336, "grad_norm": 0.8335779905319214, "learning_rate": 2.430360234757566e-05, "loss": 0.0851, "step": 13997 }, { "epoch": 0.3084499826472095, "grad_norm": 0.6736240386962891, "learning_rate": 2.4302762583235652e-05, "loss": 0.108, "step": 13998 }, { "epoch": 0.3084720179367257, "grad_norm": 0.7802972793579102, "learning_rate": 2.4301922771511685e-05, "loss": 0.1144, "step": 13999 }, { "epoch": 0.30849405322624185, "grad_norm": 0.8622035384178162, "learning_rate": 2.430108291240804e-05, "loss": 0.1045, "step": 14000 }, { "epoch": 0.308516088515758, "grad_norm": 0.5562471151351929, "learning_rate": 2.430024300592899e-05, "loss": 0.0714, "step": 14001 }, { "epoch": 0.3085381238052741, "grad_norm": 0.9130287766456604, "learning_rate": 2.4299403052078816e-05, "loss": 0.1345, "step": 14002 }, { "epoch": 0.3085601590947903, "grad_norm": 1.1172479391098022, "learning_rate": 2.429856305086179e-05, "loss": 0.1445, "step": 14003 }, { "epoch": 0.30858219438430645, "grad_norm": 0.7069829702377319, "learning_rate": 2.42977230022822e-05, "loss": 0.1362, "step": 14004 }, { "epoch": 0.3086042296738226, "grad_norm": 0.8335660099983215, "learning_rate": 2.429688290634432e-05, "loss": 0.049, "step": 14005 }, { "epoch": 0.3086262649633388, "grad_norm": 0.44871723651885986, "learning_rate": 2.4296042763052437e-05, "loss": 0.0616, "step": 14006 }, { "epoch": 0.30864830025285495, "grad_norm": 0.6836323142051697, "learning_rate": 2.4295202572410815e-05, "loss": 0.0875, "step": 14007 }, { "epoch": 0.3086703355423711, "grad_norm": 0.6703310012817383, "learning_rate": 2.4294362334423744e-05, "loss": 0.1174, "step": 14008 }, { "epoch": 0.3086923708318873, "grad_norm": 0.9971582293510437, "learning_rate": 2.42935220490955e-05, "loss": 0.1072, "step": 14009 }, { "epoch": 0.30871440612140344, "grad_norm": 1.3636999130249023, "learning_rate": 2.429268171643037e-05, "loss": 0.104, "step": 14010 }, { "epoch": 0.3087364414109196, "grad_norm": 0.8846110105514526, "learning_rate": 2.4291841336432622e-05, "loss": 0.0744, "step": 14011 }, { "epoch": 0.30875847670043577, "grad_norm": 0.7387099862098694, "learning_rate": 2.4291000909106546e-05, "loss": 0.0928, "step": 14012 }, { "epoch": 0.30878051198995193, "grad_norm": 0.7274589538574219, "learning_rate": 2.4290160434456423e-05, "loss": 0.0798, "step": 14013 }, { "epoch": 0.30880254727946804, "grad_norm": 0.8084093928337097, "learning_rate": 2.4289319912486524e-05, "loss": 0.0891, "step": 14014 }, { "epoch": 0.3088245825689842, "grad_norm": 0.7157453894615173, "learning_rate": 2.4288479343201147e-05, "loss": 0.0869, "step": 14015 }, { "epoch": 0.30884661785850037, "grad_norm": 1.0792850255966187, "learning_rate": 2.428763872660456e-05, "loss": 0.1185, "step": 14016 }, { "epoch": 0.30886865314801654, "grad_norm": 0.6842207908630371, "learning_rate": 2.428679806270105e-05, "loss": 0.0637, "step": 14017 }, { "epoch": 0.3088906884375327, "grad_norm": 0.845965564250946, "learning_rate": 2.428595735149489e-05, "loss": 0.104, "step": 14018 }, { "epoch": 0.30891272372704887, "grad_norm": 0.8649724721908569, "learning_rate": 2.4285116592990378e-05, "loss": 0.105, "step": 14019 }, { "epoch": 0.30893475901656503, "grad_norm": 0.7855616807937622, "learning_rate": 2.428427578719178e-05, "loss": 0.1223, "step": 14020 }, { "epoch": 0.3089567943060812, "grad_norm": 0.8032551407814026, "learning_rate": 2.4283434934103396e-05, "loss": 0.097, "step": 14021 }, { "epoch": 0.30897882959559736, "grad_norm": 0.7347750067710876, "learning_rate": 2.4282594033729493e-05, "loss": 0.1109, "step": 14022 }, { "epoch": 0.3090008648851135, "grad_norm": 0.6270482540130615, "learning_rate": 2.4281753086074367e-05, "loss": 0.1146, "step": 14023 }, { "epoch": 0.3090229001746297, "grad_norm": 0.9027945399284363, "learning_rate": 2.428091209114229e-05, "loss": 0.0863, "step": 14024 }, { "epoch": 0.30904493546414585, "grad_norm": 0.5831340551376343, "learning_rate": 2.428007104893755e-05, "loss": 0.1095, "step": 14025 }, { "epoch": 0.30906697075366196, "grad_norm": 0.7145771980285645, "learning_rate": 2.4279229959464434e-05, "loss": 0.0996, "step": 14026 }, { "epoch": 0.3090890060431781, "grad_norm": 1.1396440267562866, "learning_rate": 2.4278388822727227e-05, "loss": 0.11, "step": 14027 }, { "epoch": 0.3091110413326943, "grad_norm": 0.8638570308685303, "learning_rate": 2.427754763873021e-05, "loss": 0.0921, "step": 14028 }, { "epoch": 0.30913307662221046, "grad_norm": 0.90311598777771, "learning_rate": 2.4276706407477665e-05, "loss": 0.1275, "step": 14029 }, { "epoch": 0.3091551119117266, "grad_norm": 0.6450012922286987, "learning_rate": 2.427586512897388e-05, "loss": 0.0942, "step": 14030 }, { "epoch": 0.3091771472012428, "grad_norm": 0.8926008343696594, "learning_rate": 2.4275023803223136e-05, "loss": 0.1198, "step": 14031 }, { "epoch": 0.30919918249075895, "grad_norm": 0.6019108891487122, "learning_rate": 2.4274182430229728e-05, "loss": 0.0751, "step": 14032 }, { "epoch": 0.3092212177802751, "grad_norm": 1.2065560817718506, "learning_rate": 2.4273341009997935e-05, "loss": 0.0917, "step": 14033 }, { "epoch": 0.3092432530697913, "grad_norm": 0.6878619194030762, "learning_rate": 2.427249954253204e-05, "loss": 0.0713, "step": 14034 }, { "epoch": 0.30926528835930744, "grad_norm": 0.9553483128547668, "learning_rate": 2.4271658027836334e-05, "loss": 0.0982, "step": 14035 }, { "epoch": 0.3092873236488236, "grad_norm": 1.4922890663146973, "learning_rate": 2.4270816465915103e-05, "loss": 0.1024, "step": 14036 }, { "epoch": 0.3093093589383398, "grad_norm": 0.6636000871658325, "learning_rate": 2.4269974856772632e-05, "loss": 0.0798, "step": 14037 }, { "epoch": 0.30933139422785594, "grad_norm": 0.5274093747138977, "learning_rate": 2.4269133200413208e-05, "loss": 0.0834, "step": 14038 }, { "epoch": 0.30935342951737205, "grad_norm": 0.7871072888374329, "learning_rate": 2.426829149684112e-05, "loss": 0.0618, "step": 14039 }, { "epoch": 0.3093754648068882, "grad_norm": 0.7603991031646729, "learning_rate": 2.4267449746060648e-05, "loss": 0.1069, "step": 14040 }, { "epoch": 0.3093975000964044, "grad_norm": 0.5999777317047119, "learning_rate": 2.426660794807609e-05, "loss": 0.1049, "step": 14041 }, { "epoch": 0.30941953538592054, "grad_norm": 0.9524775743484497, "learning_rate": 2.426576610289173e-05, "loss": 0.0889, "step": 14042 }, { "epoch": 0.3094415706754367, "grad_norm": 0.8358468413352966, "learning_rate": 2.4264924210511846e-05, "loss": 0.0993, "step": 14043 }, { "epoch": 0.30946360596495287, "grad_norm": 0.805678129196167, "learning_rate": 2.4264082270940738e-05, "loss": 0.1151, "step": 14044 }, { "epoch": 0.30948564125446903, "grad_norm": 0.7982310056686401, "learning_rate": 2.4263240284182696e-05, "loss": 0.1059, "step": 14045 }, { "epoch": 0.3095076765439852, "grad_norm": 0.7753199338912964, "learning_rate": 2.4262398250242004e-05, "loss": 0.1088, "step": 14046 }, { "epoch": 0.30952971183350136, "grad_norm": 0.5387705564498901, "learning_rate": 2.4261556169122946e-05, "loss": 0.0721, "step": 14047 }, { "epoch": 0.30955174712301753, "grad_norm": 0.9761992692947388, "learning_rate": 2.4260714040829815e-05, "loss": 0.0961, "step": 14048 }, { "epoch": 0.3095737824125337, "grad_norm": 1.009799599647522, "learning_rate": 2.425987186536691e-05, "loss": 0.1371, "step": 14049 }, { "epoch": 0.30959581770204986, "grad_norm": 0.9222860336303711, "learning_rate": 2.4259029642738503e-05, "loss": 0.108, "step": 14050 }, { "epoch": 0.30961785299156597, "grad_norm": 0.6767631769180298, "learning_rate": 2.4258187372948897e-05, "loss": 0.0978, "step": 14051 }, { "epoch": 0.30963988828108213, "grad_norm": 1.2955435514450073, "learning_rate": 2.4257345056002378e-05, "loss": 0.1354, "step": 14052 }, { "epoch": 0.3096619235705983, "grad_norm": 0.6078606247901917, "learning_rate": 2.4256502691903236e-05, "loss": 0.0628, "step": 14053 }, { "epoch": 0.30968395886011446, "grad_norm": 0.528459370136261, "learning_rate": 2.425566028065576e-05, "loss": 0.0618, "step": 14054 }, { "epoch": 0.3097059941496306, "grad_norm": 0.7476321458816528, "learning_rate": 2.4254817822264244e-05, "loss": 0.0775, "step": 14055 }, { "epoch": 0.3097280294391468, "grad_norm": 0.6715140342712402, "learning_rate": 2.4253975316732985e-05, "loss": 0.0877, "step": 14056 }, { "epoch": 0.30975006472866295, "grad_norm": 0.6590815186500549, "learning_rate": 2.4253132764066254e-05, "loss": 0.0816, "step": 14057 }, { "epoch": 0.3097721000181791, "grad_norm": 0.6455041766166687, "learning_rate": 2.4252290164268366e-05, "loss": 0.0849, "step": 14058 }, { "epoch": 0.3097941353076953, "grad_norm": 0.8251458406448364, "learning_rate": 2.42514475173436e-05, "loss": 0.1001, "step": 14059 }, { "epoch": 0.30981617059721145, "grad_norm": 0.5747025609016418, "learning_rate": 2.4250604823296247e-05, "loss": 0.0895, "step": 14060 }, { "epoch": 0.3098382058867276, "grad_norm": 0.6225799322128296, "learning_rate": 2.4249762082130607e-05, "loss": 0.0703, "step": 14061 }, { "epoch": 0.3098602411762438, "grad_norm": 0.7704699635505676, "learning_rate": 2.4248919293850972e-05, "loss": 0.1297, "step": 14062 }, { "epoch": 0.3098822764657599, "grad_norm": 0.8187122941017151, "learning_rate": 2.4248076458461628e-05, "loss": 0.0915, "step": 14063 }, { "epoch": 0.30990431175527605, "grad_norm": 0.7445207238197327, "learning_rate": 2.424723357596687e-05, "loss": 0.0801, "step": 14064 }, { "epoch": 0.3099263470447922, "grad_norm": 0.758625864982605, "learning_rate": 2.4246390646370993e-05, "loss": 0.082, "step": 14065 }, { "epoch": 0.3099483823343084, "grad_norm": 1.0029349327087402, "learning_rate": 2.424554766967829e-05, "loss": 0.102, "step": 14066 }, { "epoch": 0.30997041762382455, "grad_norm": 0.8684883117675781, "learning_rate": 2.4244704645893057e-05, "loss": 0.0749, "step": 14067 }, { "epoch": 0.3099924529133407, "grad_norm": 0.664401113986969, "learning_rate": 2.4243861575019585e-05, "loss": 0.102, "step": 14068 }, { "epoch": 0.3100144882028569, "grad_norm": 0.9423975944519043, "learning_rate": 2.4243018457062167e-05, "loss": 0.1094, "step": 14069 }, { "epoch": 0.31003652349237304, "grad_norm": 0.5721042156219482, "learning_rate": 2.42421752920251e-05, "loss": 0.0734, "step": 14070 }, { "epoch": 0.3100585587818892, "grad_norm": 0.7315841317176819, "learning_rate": 2.4241332079912685e-05, "loss": 0.0991, "step": 14071 }, { "epoch": 0.31008059407140537, "grad_norm": 0.742747962474823, "learning_rate": 2.42404888207292e-05, "loss": 0.0872, "step": 14072 }, { "epoch": 0.31010262936092153, "grad_norm": 0.7327691912651062, "learning_rate": 2.4239645514478956e-05, "loss": 0.0997, "step": 14073 }, { "epoch": 0.3101246646504377, "grad_norm": 0.6393716931343079, "learning_rate": 2.423880216116624e-05, "loss": 0.1064, "step": 14074 }, { "epoch": 0.31014669993995386, "grad_norm": 0.780026376247406, "learning_rate": 2.423795876079535e-05, "loss": 0.0684, "step": 14075 }, { "epoch": 0.31016873522946997, "grad_norm": 0.821989893913269, "learning_rate": 2.4237115313370586e-05, "loss": 0.0998, "step": 14076 }, { "epoch": 0.31019077051898614, "grad_norm": 0.782550036907196, "learning_rate": 2.423627181889624e-05, "loss": 0.0846, "step": 14077 }, { "epoch": 0.3102128058085023, "grad_norm": 0.5787237286567688, "learning_rate": 2.4235428277376607e-05, "loss": 0.0958, "step": 14078 }, { "epoch": 0.31023484109801847, "grad_norm": 0.7667141556739807, "learning_rate": 2.4234584688815984e-05, "loss": 0.0912, "step": 14079 }, { "epoch": 0.31025687638753463, "grad_norm": 0.7531928420066833, "learning_rate": 2.423374105321867e-05, "loss": 0.0938, "step": 14080 }, { "epoch": 0.3102789116770508, "grad_norm": 0.44051119685173035, "learning_rate": 2.423289737058896e-05, "loss": 0.0623, "step": 14081 }, { "epoch": 0.31030094696656696, "grad_norm": 0.9338640570640564, "learning_rate": 2.4232053640931155e-05, "loss": 0.0929, "step": 14082 }, { "epoch": 0.3103229822560831, "grad_norm": 0.5879543423652649, "learning_rate": 2.423120986424955e-05, "loss": 0.1062, "step": 14083 }, { "epoch": 0.3103450175455993, "grad_norm": 0.6381657719612122, "learning_rate": 2.423036604054844e-05, "loss": 0.0754, "step": 14084 }, { "epoch": 0.31036705283511545, "grad_norm": 1.0274115800857544, "learning_rate": 2.422952216983213e-05, "loss": 0.1106, "step": 14085 }, { "epoch": 0.3103890881246316, "grad_norm": 0.6091867685317993, "learning_rate": 2.4228678252104917e-05, "loss": 0.1155, "step": 14086 }, { "epoch": 0.3104111234141478, "grad_norm": 0.9210439324378967, "learning_rate": 2.422783428737109e-05, "loss": 0.0717, "step": 14087 }, { "epoch": 0.3104331587036639, "grad_norm": 0.7152347564697266, "learning_rate": 2.4226990275634956e-05, "loss": 0.1019, "step": 14088 }, { "epoch": 0.31045519399318006, "grad_norm": 0.6727108955383301, "learning_rate": 2.422614621690082e-05, "loss": 0.08, "step": 14089 }, { "epoch": 0.3104772292826962, "grad_norm": 0.38543424010276794, "learning_rate": 2.4225302111172965e-05, "loss": 0.0742, "step": 14090 }, { "epoch": 0.3104992645722124, "grad_norm": 0.616517186164856, "learning_rate": 2.42244579584557e-05, "loss": 0.1204, "step": 14091 }, { "epoch": 0.31052129986172855, "grad_norm": 0.7257866859436035, "learning_rate": 2.422361375875333e-05, "loss": 0.1015, "step": 14092 }, { "epoch": 0.3105433351512447, "grad_norm": 0.7619901299476624, "learning_rate": 2.4222769512070143e-05, "loss": 0.1332, "step": 14093 }, { "epoch": 0.3105653704407609, "grad_norm": 1.0150212049484253, "learning_rate": 2.422192521841045e-05, "loss": 0.0749, "step": 14094 }, { "epoch": 0.31058740573027704, "grad_norm": 0.7910951375961304, "learning_rate": 2.4221080877778542e-05, "loss": 0.1028, "step": 14095 }, { "epoch": 0.3106094410197932, "grad_norm": 0.7542445063591003, "learning_rate": 2.4220236490178726e-05, "loss": 0.0838, "step": 14096 }, { "epoch": 0.3106314763093094, "grad_norm": 0.7202720046043396, "learning_rate": 2.4219392055615304e-05, "loss": 0.062, "step": 14097 }, { "epoch": 0.31065351159882554, "grad_norm": 0.5159023404121399, "learning_rate": 2.4218547574092573e-05, "loss": 0.0764, "step": 14098 }, { "epoch": 0.3106755468883417, "grad_norm": 0.5965805053710938, "learning_rate": 2.4217703045614837e-05, "loss": 0.091, "step": 14099 }, { "epoch": 0.3106975821778578, "grad_norm": 0.7189128994941711, "learning_rate": 2.4216858470186393e-05, "loss": 0.0999, "step": 14100 }, { "epoch": 0.310719617467374, "grad_norm": 0.7174777388572693, "learning_rate": 2.421601384781155e-05, "loss": 0.0845, "step": 14101 }, { "epoch": 0.31074165275689014, "grad_norm": 0.9069473743438721, "learning_rate": 2.42151691784946e-05, "loss": 0.0814, "step": 14102 }, { "epoch": 0.3107636880464063, "grad_norm": 0.9500591158866882, "learning_rate": 2.4214324462239857e-05, "loss": 0.1037, "step": 14103 }, { "epoch": 0.31078572333592247, "grad_norm": 0.7478330731391907, "learning_rate": 2.421347969905162e-05, "loss": 0.0983, "step": 14104 }, { "epoch": 0.31080775862543863, "grad_norm": 0.4709404408931732, "learning_rate": 2.421263488893419e-05, "loss": 0.0879, "step": 14105 }, { "epoch": 0.3108297939149548, "grad_norm": 0.8239227533340454, "learning_rate": 2.4211790031891865e-05, "loss": 0.1364, "step": 14106 }, { "epoch": 0.31085182920447096, "grad_norm": 1.2935653924942017, "learning_rate": 2.4210945127928957e-05, "loss": 0.1235, "step": 14107 }, { "epoch": 0.31087386449398713, "grad_norm": 1.0634715557098389, "learning_rate": 2.4210100177049765e-05, "loss": 0.0781, "step": 14108 }, { "epoch": 0.3108958997835033, "grad_norm": 0.7728392481803894, "learning_rate": 2.4209255179258595e-05, "loss": 0.1067, "step": 14109 }, { "epoch": 0.31091793507301946, "grad_norm": 0.6352226138114929, "learning_rate": 2.420841013455975e-05, "loss": 0.0894, "step": 14110 }, { "epoch": 0.3109399703625356, "grad_norm": 0.6766725778579712, "learning_rate": 2.4207565042957536e-05, "loss": 0.1451, "step": 14111 }, { "epoch": 0.3109620056520518, "grad_norm": 0.7127416729927063, "learning_rate": 2.4206719904456253e-05, "loss": 0.1184, "step": 14112 }, { "epoch": 0.3109840409415679, "grad_norm": 0.8900436758995056, "learning_rate": 2.420587471906021e-05, "loss": 0.0986, "step": 14113 }, { "epoch": 0.31100607623108406, "grad_norm": 0.7753919959068298, "learning_rate": 2.420502948677371e-05, "loss": 0.1121, "step": 14114 }, { "epoch": 0.3110281115206002, "grad_norm": 0.9392744898796082, "learning_rate": 2.4204184207601057e-05, "loss": 0.0938, "step": 14115 }, { "epoch": 0.3110501468101164, "grad_norm": 0.9380021691322327, "learning_rate": 2.4203338881546558e-05, "loss": 0.0952, "step": 14116 }, { "epoch": 0.31107218209963255, "grad_norm": 0.509902834892273, "learning_rate": 2.420249350861452e-05, "loss": 0.0937, "step": 14117 }, { "epoch": 0.3110942173891487, "grad_norm": 0.9357900023460388, "learning_rate": 2.4201648088809253e-05, "loss": 0.1029, "step": 14118 }, { "epoch": 0.3111162526786649, "grad_norm": 1.0361276865005493, "learning_rate": 2.4200802622135053e-05, "loss": 0.115, "step": 14119 }, { "epoch": 0.31113828796818105, "grad_norm": 0.8279998898506165, "learning_rate": 2.4199957108596232e-05, "loss": 0.1245, "step": 14120 }, { "epoch": 0.3111603232576972, "grad_norm": 0.5531088709831238, "learning_rate": 2.419911154819709e-05, "loss": 0.1163, "step": 14121 }, { "epoch": 0.3111823585472134, "grad_norm": 0.6284053325653076, "learning_rate": 2.4198265940941948e-05, "loss": 0.0828, "step": 14122 }, { "epoch": 0.31120439383672954, "grad_norm": 0.5455856919288635, "learning_rate": 2.4197420286835108e-05, "loss": 0.0939, "step": 14123 }, { "epoch": 0.3112264291262457, "grad_norm": 0.8505403995513916, "learning_rate": 2.4196574585880865e-05, "loss": 0.1156, "step": 14124 }, { "epoch": 0.3112484644157618, "grad_norm": 0.44982409477233887, "learning_rate": 2.419572883808354e-05, "loss": 0.1014, "step": 14125 }, { "epoch": 0.311270499705278, "grad_norm": 0.39056307077407837, "learning_rate": 2.4194883043447438e-05, "loss": 0.0948, "step": 14126 }, { "epoch": 0.31129253499479415, "grad_norm": 0.7058888673782349, "learning_rate": 2.4194037201976867e-05, "loss": 0.1116, "step": 14127 }, { "epoch": 0.3113145702843103, "grad_norm": 0.8108016848564148, "learning_rate": 2.4193191313676127e-05, "loss": 0.0902, "step": 14128 }, { "epoch": 0.3113366055738265, "grad_norm": 0.6227855086326599, "learning_rate": 2.419234537854954e-05, "loss": 0.1203, "step": 14129 }, { "epoch": 0.31135864086334264, "grad_norm": 0.8041371703147888, "learning_rate": 2.4191499396601413e-05, "loss": 0.0984, "step": 14130 }, { "epoch": 0.3113806761528588, "grad_norm": 0.5634319186210632, "learning_rate": 2.419065336783604e-05, "loss": 0.1009, "step": 14131 }, { "epoch": 0.31140271144237497, "grad_norm": 0.6816039681434631, "learning_rate": 2.418980729225775e-05, "loss": 0.0917, "step": 14132 }, { "epoch": 0.31142474673189113, "grad_norm": 1.0119589567184448, "learning_rate": 2.4188961169870843e-05, "loss": 0.1268, "step": 14133 }, { "epoch": 0.3114467820214073, "grad_norm": 0.7891397476196289, "learning_rate": 2.4188115000679623e-05, "loss": 0.0934, "step": 14134 }, { "epoch": 0.31146881731092346, "grad_norm": 1.1192611455917358, "learning_rate": 2.418726878468841e-05, "loss": 0.0595, "step": 14135 }, { "epoch": 0.3114908526004396, "grad_norm": 1.0786590576171875, "learning_rate": 2.418642252190151e-05, "loss": 0.1028, "step": 14136 }, { "epoch": 0.3115128878899558, "grad_norm": 0.8847305774688721, "learning_rate": 2.4185576212323234e-05, "loss": 0.1042, "step": 14137 }, { "epoch": 0.3115349231794719, "grad_norm": 0.6839258074760437, "learning_rate": 2.418472985595789e-05, "loss": 0.0843, "step": 14138 }, { "epoch": 0.31155695846898807, "grad_norm": 0.8845343589782715, "learning_rate": 2.4183883452809798e-05, "loss": 0.101, "step": 14139 }, { "epoch": 0.31157899375850423, "grad_norm": 0.7158572673797607, "learning_rate": 2.418303700288326e-05, "loss": 0.0923, "step": 14140 }, { "epoch": 0.3116010290480204, "grad_norm": 0.7197746634483337, "learning_rate": 2.4182190506182586e-05, "loss": 0.0756, "step": 14141 }, { "epoch": 0.31162306433753656, "grad_norm": 0.5974748134613037, "learning_rate": 2.4181343962712097e-05, "loss": 0.067, "step": 14142 }, { "epoch": 0.3116450996270527, "grad_norm": 1.3904507160186768, "learning_rate": 2.4180497372476096e-05, "loss": 0.1226, "step": 14143 }, { "epoch": 0.3116671349165689, "grad_norm": 0.4730139672756195, "learning_rate": 2.4179650735478903e-05, "loss": 0.0943, "step": 14144 }, { "epoch": 0.31168917020608505, "grad_norm": 0.6876633763313293, "learning_rate": 2.4178804051724823e-05, "loss": 0.102, "step": 14145 }, { "epoch": 0.3117112054956012, "grad_norm": 0.6256846189498901, "learning_rate": 2.4177957321218174e-05, "loss": 0.1047, "step": 14146 }, { "epoch": 0.3117332407851174, "grad_norm": 0.9232366681098938, "learning_rate": 2.4177110543963264e-05, "loss": 0.1037, "step": 14147 }, { "epoch": 0.31175527607463355, "grad_norm": 0.8116625547409058, "learning_rate": 2.417626371996441e-05, "loss": 0.1198, "step": 14148 }, { "epoch": 0.3117773113641497, "grad_norm": 0.6283400058746338, "learning_rate": 2.4175416849225925e-05, "loss": 0.109, "step": 14149 }, { "epoch": 0.3117993466536658, "grad_norm": 0.9173807501792908, "learning_rate": 2.417456993175212e-05, "loss": 0.1275, "step": 14150 }, { "epoch": 0.311821381943182, "grad_norm": 0.6904391646385193, "learning_rate": 2.4173722967547313e-05, "loss": 0.0766, "step": 14151 }, { "epoch": 0.31184341723269815, "grad_norm": 0.6583626866340637, "learning_rate": 2.4172875956615817e-05, "loss": 0.0791, "step": 14152 }, { "epoch": 0.3118654525222143, "grad_norm": 0.8056066632270813, "learning_rate": 2.4172028898961944e-05, "loss": 0.103, "step": 14153 }, { "epoch": 0.3118874878117305, "grad_norm": 0.6939327120780945, "learning_rate": 2.4171181794590005e-05, "loss": 0.1075, "step": 14154 }, { "epoch": 0.31190952310124664, "grad_norm": 0.9235542416572571, "learning_rate": 2.4170334643504324e-05, "loss": 0.1285, "step": 14155 }, { "epoch": 0.3119315583907628, "grad_norm": 0.3239367604255676, "learning_rate": 2.4169487445709205e-05, "loss": 0.0524, "step": 14156 }, { "epoch": 0.311953593680279, "grad_norm": 0.6029881834983826, "learning_rate": 2.4168640201208977e-05, "loss": 0.0787, "step": 14157 }, { "epoch": 0.31197562896979514, "grad_norm": 0.7175725102424622, "learning_rate": 2.4167792910007943e-05, "loss": 0.0728, "step": 14158 }, { "epoch": 0.3119976642593113, "grad_norm": 0.6038074493408203, "learning_rate": 2.4166945572110427e-05, "loss": 0.0956, "step": 14159 }, { "epoch": 0.31201969954882747, "grad_norm": 0.6493709683418274, "learning_rate": 2.4166098187520738e-05, "loss": 0.0764, "step": 14160 }, { "epoch": 0.31204173483834363, "grad_norm": 0.5694007277488708, "learning_rate": 2.41652507562432e-05, "loss": 0.0984, "step": 14161 }, { "epoch": 0.31206377012785974, "grad_norm": 0.6422861218452454, "learning_rate": 2.4164403278282118e-05, "loss": 0.1029, "step": 14162 }, { "epoch": 0.3120858054173759, "grad_norm": 0.6358123421669006, "learning_rate": 2.4163555753641822e-05, "loss": 0.0872, "step": 14163 }, { "epoch": 0.31210784070689207, "grad_norm": 0.8622059226036072, "learning_rate": 2.4162708182326617e-05, "loss": 0.0837, "step": 14164 }, { "epoch": 0.31212987599640823, "grad_norm": 0.732324481010437, "learning_rate": 2.4161860564340835e-05, "loss": 0.0565, "step": 14165 }, { "epoch": 0.3121519112859244, "grad_norm": 0.8576582670211792, "learning_rate": 2.4161012899688775e-05, "loss": 0.0798, "step": 14166 }, { "epoch": 0.31217394657544056, "grad_norm": 0.9800061583518982, "learning_rate": 2.416016518837477e-05, "loss": 0.097, "step": 14167 }, { "epoch": 0.31219598186495673, "grad_norm": 0.6379064917564392, "learning_rate": 2.4159317430403127e-05, "loss": 0.1207, "step": 14168 }, { "epoch": 0.3122180171544729, "grad_norm": 0.7047670483589172, "learning_rate": 2.415846962577817e-05, "loss": 0.1036, "step": 14169 }, { "epoch": 0.31224005244398906, "grad_norm": 0.5678251385688782, "learning_rate": 2.4157621774504218e-05, "loss": 0.1021, "step": 14170 }, { "epoch": 0.3122620877335052, "grad_norm": 0.8482834696769714, "learning_rate": 2.4156773876585586e-05, "loss": 0.0889, "step": 14171 }, { "epoch": 0.3122841230230214, "grad_norm": 0.5644136071205139, "learning_rate": 2.4155925932026597e-05, "loss": 0.0944, "step": 14172 }, { "epoch": 0.31230615831253755, "grad_norm": 0.5339115262031555, "learning_rate": 2.4155077940831562e-05, "loss": 0.0741, "step": 14173 }, { "epoch": 0.3123281936020537, "grad_norm": 0.8998257517814636, "learning_rate": 2.4154229903004812e-05, "loss": 0.097, "step": 14174 }, { "epoch": 0.3123502288915698, "grad_norm": 0.9403762221336365, "learning_rate": 2.4153381818550654e-05, "loss": 0.0814, "step": 14175 }, { "epoch": 0.312372264181086, "grad_norm": 1.0052073001861572, "learning_rate": 2.415253368747342e-05, "loss": 0.0855, "step": 14176 }, { "epoch": 0.31239429947060215, "grad_norm": 0.8016062378883362, "learning_rate": 2.415168550977742e-05, "loss": 0.0995, "step": 14177 }, { "epoch": 0.3124163347601183, "grad_norm": 1.1201776266098022, "learning_rate": 2.4150837285466977e-05, "loss": 0.0828, "step": 14178 }, { "epoch": 0.3124383700496345, "grad_norm": 0.6913590431213379, "learning_rate": 2.4149989014546416e-05, "loss": 0.1004, "step": 14179 }, { "epoch": 0.31246040533915065, "grad_norm": 0.7745420336723328, "learning_rate": 2.4149140697020053e-05, "loss": 0.0814, "step": 14180 }, { "epoch": 0.3124824406286668, "grad_norm": 1.0352739095687866, "learning_rate": 2.4148292332892214e-05, "loss": 0.1255, "step": 14181 }, { "epoch": 0.312504475918183, "grad_norm": 0.6463792324066162, "learning_rate": 2.4147443922167212e-05, "loss": 0.0653, "step": 14182 }, { "epoch": 0.31252651120769914, "grad_norm": 0.6486992239952087, "learning_rate": 2.414659546484937e-05, "loss": 0.072, "step": 14183 }, { "epoch": 0.3125485464972153, "grad_norm": 0.6456437706947327, "learning_rate": 2.414574696094302e-05, "loss": 0.0611, "step": 14184 }, { "epoch": 0.31257058178673147, "grad_norm": 0.8441078066825867, "learning_rate": 2.4144898410452472e-05, "loss": 0.0914, "step": 14185 }, { "epoch": 0.31259261707624764, "grad_norm": 0.7070795297622681, "learning_rate": 2.414404981338205e-05, "loss": 0.0908, "step": 14186 }, { "epoch": 0.31261465236576375, "grad_norm": 0.8759196996688843, "learning_rate": 2.414320116973608e-05, "loss": 0.0714, "step": 14187 }, { "epoch": 0.3126366876552799, "grad_norm": 0.6194648146629333, "learning_rate": 2.4142352479518894e-05, "loss": 0.0864, "step": 14188 }, { "epoch": 0.3126587229447961, "grad_norm": 1.233891248703003, "learning_rate": 2.4141503742734793e-05, "loss": 0.1405, "step": 14189 }, { "epoch": 0.31268075823431224, "grad_norm": 0.720525860786438, "learning_rate": 2.4140654959388115e-05, "loss": 0.1179, "step": 14190 }, { "epoch": 0.3127027935238284, "grad_norm": 0.8143134713172913, "learning_rate": 2.4139806129483177e-05, "loss": 0.0844, "step": 14191 }, { "epoch": 0.31272482881334457, "grad_norm": 0.8440017104148865, "learning_rate": 2.4138957253024306e-05, "loss": 0.0885, "step": 14192 }, { "epoch": 0.31274686410286073, "grad_norm": 0.5237610936164856, "learning_rate": 2.4138108330015824e-05, "loss": 0.0788, "step": 14193 }, { "epoch": 0.3127688993923769, "grad_norm": 0.7718036770820618, "learning_rate": 2.413725936046206e-05, "loss": 0.0803, "step": 14194 }, { "epoch": 0.31279093468189306, "grad_norm": 0.5608933568000793, "learning_rate": 2.4136410344367332e-05, "loss": 0.1053, "step": 14195 }, { "epoch": 0.3128129699714092, "grad_norm": 0.6251232624053955, "learning_rate": 2.413556128173597e-05, "loss": 0.0868, "step": 14196 }, { "epoch": 0.3128350052609254, "grad_norm": 0.9813430905342102, "learning_rate": 2.413471217257229e-05, "loss": 0.1127, "step": 14197 }, { "epoch": 0.31285704055044156, "grad_norm": 0.592099666595459, "learning_rate": 2.4133863016880626e-05, "loss": 0.1377, "step": 14198 }, { "epoch": 0.31287907583995767, "grad_norm": 0.6774019002914429, "learning_rate": 2.4133013814665297e-05, "loss": 0.1237, "step": 14199 }, { "epoch": 0.31290111112947383, "grad_norm": 0.7974403500556946, "learning_rate": 2.4132164565930638e-05, "loss": 0.1165, "step": 14200 }, { "epoch": 0.31292314641899, "grad_norm": 1.0075021982192993, "learning_rate": 2.4131315270680964e-05, "loss": 0.129, "step": 14201 }, { "epoch": 0.31294518170850616, "grad_norm": 0.6304255723953247, "learning_rate": 2.41304659289206e-05, "loss": 0.0899, "step": 14202 }, { "epoch": 0.3129672169980223, "grad_norm": 1.0172091722488403, "learning_rate": 2.4129616540653877e-05, "loss": 0.1222, "step": 14203 }, { "epoch": 0.3129892522875385, "grad_norm": 0.9822158813476562, "learning_rate": 2.4128767105885127e-05, "loss": 0.1401, "step": 14204 }, { "epoch": 0.31301128757705465, "grad_norm": 0.7979351878166199, "learning_rate": 2.4127917624618673e-05, "loss": 0.0803, "step": 14205 }, { "epoch": 0.3130333228665708, "grad_norm": 0.6163874864578247, "learning_rate": 2.412706809685883e-05, "loss": 0.067, "step": 14206 }, { "epoch": 0.313055358156087, "grad_norm": 0.7381216883659363, "learning_rate": 2.4126218522609942e-05, "loss": 0.1066, "step": 14207 }, { "epoch": 0.31307739344560315, "grad_norm": 0.4772413969039917, "learning_rate": 2.412536890187633e-05, "loss": 0.0551, "step": 14208 }, { "epoch": 0.3130994287351193, "grad_norm": 0.8338894844055176, "learning_rate": 2.4124519234662314e-05, "loss": 0.0937, "step": 14209 }, { "epoch": 0.3131214640246355, "grad_norm": 0.6335092186927795, "learning_rate": 2.412366952097223e-05, "loss": 0.0916, "step": 14210 }, { "epoch": 0.31314349931415164, "grad_norm": 0.9258427023887634, "learning_rate": 2.412281976081041e-05, "loss": 0.1403, "step": 14211 }, { "epoch": 0.31316553460366775, "grad_norm": 0.7546110153198242, "learning_rate": 2.4121969954181174e-05, "loss": 0.1003, "step": 14212 }, { "epoch": 0.3131875698931839, "grad_norm": 0.5384177565574646, "learning_rate": 2.4121120101088856e-05, "loss": 0.0659, "step": 14213 }, { "epoch": 0.3132096051827001, "grad_norm": 0.7985456585884094, "learning_rate": 2.412027020153778e-05, "loss": 0.0858, "step": 14214 }, { "epoch": 0.31323164047221624, "grad_norm": 0.4398229122161865, "learning_rate": 2.4119420255532276e-05, "loss": 0.0965, "step": 14215 }, { "epoch": 0.3132536757617324, "grad_norm": 0.5883079171180725, "learning_rate": 2.4118570263076674e-05, "loss": 0.0717, "step": 14216 }, { "epoch": 0.3132757110512486, "grad_norm": 0.969017505645752, "learning_rate": 2.411772022417531e-05, "loss": 0.1057, "step": 14217 }, { "epoch": 0.31329774634076474, "grad_norm": 0.5979542136192322, "learning_rate": 2.41168701388325e-05, "loss": 0.0993, "step": 14218 }, { "epoch": 0.3133197816302809, "grad_norm": 0.6979429125785828, "learning_rate": 2.4116020007052585e-05, "loss": 0.0947, "step": 14219 }, { "epoch": 0.31334181691979707, "grad_norm": 0.6646077632904053, "learning_rate": 2.4115169828839893e-05, "loss": 0.075, "step": 14220 }, { "epoch": 0.31336385220931323, "grad_norm": 0.7362068295478821, "learning_rate": 2.4114319604198748e-05, "loss": 0.1235, "step": 14221 }, { "epoch": 0.3133858874988294, "grad_norm": 0.8361850380897522, "learning_rate": 2.4113469333133488e-05, "loss": 0.0773, "step": 14222 }, { "epoch": 0.31340792278834556, "grad_norm": 1.1432621479034424, "learning_rate": 2.411261901564844e-05, "loss": 0.1481, "step": 14223 }, { "epoch": 0.31342995807786167, "grad_norm": 0.8567782640457153, "learning_rate": 2.4111768651747944e-05, "loss": 0.0972, "step": 14224 }, { "epoch": 0.31345199336737783, "grad_norm": 0.8293429613113403, "learning_rate": 2.4110918241436322e-05, "loss": 0.0944, "step": 14225 }, { "epoch": 0.313474028656894, "grad_norm": 0.7255282402038574, "learning_rate": 2.4110067784717907e-05, "loss": 0.0814, "step": 14226 }, { "epoch": 0.31349606394641016, "grad_norm": 1.0135653018951416, "learning_rate": 2.4109217281597024e-05, "loss": 0.0868, "step": 14227 }, { "epoch": 0.31351809923592633, "grad_norm": 0.808515727519989, "learning_rate": 2.410836673207802e-05, "loss": 0.0798, "step": 14228 }, { "epoch": 0.3135401345254425, "grad_norm": 0.9554924368858337, "learning_rate": 2.4107516136165218e-05, "loss": 0.1001, "step": 14229 }, { "epoch": 0.31356216981495866, "grad_norm": 1.0982383489608765, "learning_rate": 2.4106665493862955e-05, "loss": 0.1051, "step": 14230 }, { "epoch": 0.3135842051044748, "grad_norm": 1.1713868379592896, "learning_rate": 2.4105814805175558e-05, "loss": 0.1255, "step": 14231 }, { "epoch": 0.313606240393991, "grad_norm": 0.5892992615699768, "learning_rate": 2.4104964070107367e-05, "loss": 0.0856, "step": 14232 }, { "epoch": 0.31362827568350715, "grad_norm": 1.371066927909851, "learning_rate": 2.4104113288662707e-05, "loss": 0.0963, "step": 14233 }, { "epoch": 0.3136503109730233, "grad_norm": 0.6641709208488464, "learning_rate": 2.410326246084592e-05, "loss": 0.1058, "step": 14234 }, { "epoch": 0.3136723462625395, "grad_norm": 0.8636254072189331, "learning_rate": 2.410241158666133e-05, "loss": 0.1051, "step": 14235 }, { "epoch": 0.3136943815520556, "grad_norm": 0.6983291506767273, "learning_rate": 2.410156066611328e-05, "loss": 0.0969, "step": 14236 }, { "epoch": 0.31371641684157175, "grad_norm": 0.8368589878082275, "learning_rate": 2.4100709699206107e-05, "loss": 0.0711, "step": 14237 }, { "epoch": 0.3137384521310879, "grad_norm": 0.9625820517539978, "learning_rate": 2.409985868594413e-05, "loss": 0.1029, "step": 14238 }, { "epoch": 0.3137604874206041, "grad_norm": 0.7969644665718079, "learning_rate": 2.4099007626331696e-05, "loss": 0.111, "step": 14239 }, { "epoch": 0.31378252271012025, "grad_norm": 1.0586140155792236, "learning_rate": 2.409815652037314e-05, "loss": 0.1081, "step": 14240 }, { "epoch": 0.3138045579996364, "grad_norm": 0.5581023097038269, "learning_rate": 2.4097305368072795e-05, "loss": 0.0717, "step": 14241 }, { "epoch": 0.3138265932891526, "grad_norm": 0.5496017336845398, "learning_rate": 2.409645416943499e-05, "loss": 0.0881, "step": 14242 }, { "epoch": 0.31384862857866874, "grad_norm": 0.5954890847206116, "learning_rate": 2.4095602924464067e-05, "loss": 0.0805, "step": 14243 }, { "epoch": 0.3138706638681849, "grad_norm": 0.8833056688308716, "learning_rate": 2.409475163316436e-05, "loss": 0.1036, "step": 14244 }, { "epoch": 0.31389269915770107, "grad_norm": 0.5655806660652161, "learning_rate": 2.409390029554021e-05, "loss": 0.0812, "step": 14245 }, { "epoch": 0.31391473444721724, "grad_norm": 0.8161236047744751, "learning_rate": 2.4093048911595946e-05, "loss": 0.0965, "step": 14246 }, { "epoch": 0.3139367697367334, "grad_norm": 0.6959249377250671, "learning_rate": 2.409219748133591e-05, "loss": 0.09, "step": 14247 }, { "epoch": 0.31395880502624957, "grad_norm": 0.8219736814498901, "learning_rate": 2.4091346004764435e-05, "loss": 0.0949, "step": 14248 }, { "epoch": 0.3139808403157657, "grad_norm": 0.6908409595489502, "learning_rate": 2.4090494481885862e-05, "loss": 0.0841, "step": 14249 }, { "epoch": 0.31400287560528184, "grad_norm": 0.848788321018219, "learning_rate": 2.408964291270452e-05, "loss": 0.0865, "step": 14250 }, { "epoch": 0.314024910894798, "grad_norm": 0.8522870540618896, "learning_rate": 2.4088791297224757e-05, "loss": 0.1107, "step": 14251 }, { "epoch": 0.31404694618431417, "grad_norm": 1.1042962074279785, "learning_rate": 2.4087939635450907e-05, "loss": 0.109, "step": 14252 }, { "epoch": 0.31406898147383033, "grad_norm": 1.4223430156707764, "learning_rate": 2.4087087927387307e-05, "loss": 0.099, "step": 14253 }, { "epoch": 0.3140910167633465, "grad_norm": 1.0286505222320557, "learning_rate": 2.4086236173038292e-05, "loss": 0.0869, "step": 14254 }, { "epoch": 0.31411305205286266, "grad_norm": 0.9320114850997925, "learning_rate": 2.4085384372408208e-05, "loss": 0.0783, "step": 14255 }, { "epoch": 0.3141350873423788, "grad_norm": 0.8285447955131531, "learning_rate": 2.408453252550139e-05, "loss": 0.1166, "step": 14256 }, { "epoch": 0.314157122631895, "grad_norm": 0.6776894330978394, "learning_rate": 2.408368063232217e-05, "loss": 0.1063, "step": 14257 }, { "epoch": 0.31417915792141116, "grad_norm": 1.3485387563705444, "learning_rate": 2.4082828692874895e-05, "loss": 0.1309, "step": 14258 }, { "epoch": 0.3142011932109273, "grad_norm": 1.1046149730682373, "learning_rate": 2.4081976707163906e-05, "loss": 0.1027, "step": 14259 }, { "epoch": 0.3142232285004435, "grad_norm": 0.8588888645172119, "learning_rate": 2.4081124675193538e-05, "loss": 0.0905, "step": 14260 }, { "epoch": 0.3142452637899596, "grad_norm": 0.9386188983917236, "learning_rate": 2.408027259696813e-05, "loss": 0.1197, "step": 14261 }, { "epoch": 0.31426729907947576, "grad_norm": 0.9418943524360657, "learning_rate": 2.407942047249203e-05, "loss": 0.0829, "step": 14262 }, { "epoch": 0.3142893343689919, "grad_norm": 0.7288910150527954, "learning_rate": 2.407856830176957e-05, "loss": 0.0823, "step": 14263 }, { "epoch": 0.3143113696585081, "grad_norm": 0.5908868312835693, "learning_rate": 2.4077716084805093e-05, "loss": 0.0915, "step": 14264 }, { "epoch": 0.31433340494802425, "grad_norm": 0.3384385406970978, "learning_rate": 2.407686382160294e-05, "loss": 0.0516, "step": 14265 }, { "epoch": 0.3143554402375404, "grad_norm": 0.8700627088546753, "learning_rate": 2.407601151216745e-05, "loss": 0.096, "step": 14266 }, { "epoch": 0.3143774755270566, "grad_norm": 1.02828049659729, "learning_rate": 2.407515915650297e-05, "loss": 0.1032, "step": 14267 }, { "epoch": 0.31439951081657275, "grad_norm": 0.7320302724838257, "learning_rate": 2.4074306754613837e-05, "loss": 0.1514, "step": 14268 }, { "epoch": 0.3144215461060889, "grad_norm": 0.6835632920265198, "learning_rate": 2.4073454306504395e-05, "loss": 0.0897, "step": 14269 }, { "epoch": 0.3144435813956051, "grad_norm": 0.758892297744751, "learning_rate": 2.4072601812178985e-05, "loss": 0.121, "step": 14270 }, { "epoch": 0.31446561668512124, "grad_norm": 0.49246102571487427, "learning_rate": 2.4071749271641944e-05, "loss": 0.0484, "step": 14271 }, { "epoch": 0.3144876519746374, "grad_norm": 0.5654628872871399, "learning_rate": 2.4070896684897623e-05, "loss": 0.1003, "step": 14272 }, { "epoch": 0.3145096872641535, "grad_norm": 0.9317013621330261, "learning_rate": 2.4070044051950362e-05, "loss": 0.0978, "step": 14273 }, { "epoch": 0.3145317225536697, "grad_norm": 0.7832713723182678, "learning_rate": 2.4069191372804498e-05, "loss": 0.0772, "step": 14274 }, { "epoch": 0.31455375784318584, "grad_norm": 0.648857831954956, "learning_rate": 2.4068338647464384e-05, "loss": 0.0864, "step": 14275 }, { "epoch": 0.314575793132702, "grad_norm": 0.5497090220451355, "learning_rate": 2.4067485875934357e-05, "loss": 0.0617, "step": 14276 }, { "epoch": 0.3145978284222182, "grad_norm": 0.8528868556022644, "learning_rate": 2.406663305821876e-05, "loss": 0.0901, "step": 14277 }, { "epoch": 0.31461986371173434, "grad_norm": 0.6963984370231628, "learning_rate": 2.4065780194321943e-05, "loss": 0.1084, "step": 14278 }, { "epoch": 0.3146418990012505, "grad_norm": 0.8576169610023499, "learning_rate": 2.4064927284248243e-05, "loss": 0.1055, "step": 14279 }, { "epoch": 0.31466393429076667, "grad_norm": 0.7995283603668213, "learning_rate": 2.4064074328002007e-05, "loss": 0.1087, "step": 14280 }, { "epoch": 0.31468596958028283, "grad_norm": 0.8567225337028503, "learning_rate": 2.406322132558758e-05, "loss": 0.0868, "step": 14281 }, { "epoch": 0.314708004869799, "grad_norm": 0.31095021963119507, "learning_rate": 2.4062368277009306e-05, "loss": 0.0717, "step": 14282 }, { "epoch": 0.31473004015931516, "grad_norm": 0.6380060315132141, "learning_rate": 2.4061515182271534e-05, "loss": 0.0772, "step": 14283 }, { "epoch": 0.3147520754488313, "grad_norm": 1.0338830947875977, "learning_rate": 2.4060662041378604e-05, "loss": 0.1101, "step": 14284 }, { "epoch": 0.3147741107383475, "grad_norm": 0.6348835229873657, "learning_rate": 2.4059808854334867e-05, "loss": 0.0617, "step": 14285 }, { "epoch": 0.3147961460278636, "grad_norm": 0.47566869854927063, "learning_rate": 2.4058955621144657e-05, "loss": 0.1328, "step": 14286 }, { "epoch": 0.31481818131737976, "grad_norm": 0.3781341314315796, "learning_rate": 2.4058102341812338e-05, "loss": 0.1458, "step": 14287 }, { "epoch": 0.31484021660689593, "grad_norm": 0.7106605172157288, "learning_rate": 2.4057249016342242e-05, "loss": 0.0921, "step": 14288 }, { "epoch": 0.3148622518964121, "grad_norm": 0.7322853207588196, "learning_rate": 2.405639564473872e-05, "loss": 0.1081, "step": 14289 }, { "epoch": 0.31488428718592826, "grad_norm": 0.6924052834510803, "learning_rate": 2.405554222700612e-05, "loss": 0.0907, "step": 14290 }, { "epoch": 0.3149063224754444, "grad_norm": 2.18525767326355, "learning_rate": 2.405468876314878e-05, "loss": 0.0997, "step": 14291 }, { "epoch": 0.3149283577649606, "grad_norm": 0.9265870451927185, "learning_rate": 2.4053835253171062e-05, "loss": 0.0915, "step": 14292 }, { "epoch": 0.31495039305447675, "grad_norm": 1.0385520458221436, "learning_rate": 2.4052981697077307e-05, "loss": 0.1097, "step": 14293 }, { "epoch": 0.3149724283439929, "grad_norm": 0.7151055335998535, "learning_rate": 2.4052128094871857e-05, "loss": 0.1081, "step": 14294 }, { "epoch": 0.3149944636335091, "grad_norm": 0.7976043224334717, "learning_rate": 2.405127444655907e-05, "loss": 0.0924, "step": 14295 }, { "epoch": 0.31501649892302525, "grad_norm": 0.8478890657424927, "learning_rate": 2.4050420752143282e-05, "loss": 0.0979, "step": 14296 }, { "epoch": 0.3150385342125414, "grad_norm": 0.7732691764831543, "learning_rate": 2.404956701162885e-05, "loss": 0.1027, "step": 14297 }, { "epoch": 0.3150605695020575, "grad_norm": 0.8216585516929626, "learning_rate": 2.404871322502012e-05, "loss": 0.1259, "step": 14298 }, { "epoch": 0.3150826047915737, "grad_norm": 0.9901684522628784, "learning_rate": 2.4047859392321445e-05, "loss": 0.1079, "step": 14299 }, { "epoch": 0.31510464008108985, "grad_norm": 0.8245571851730347, "learning_rate": 2.4047005513537166e-05, "loss": 0.1234, "step": 14300 }, { "epoch": 0.315126675370606, "grad_norm": 0.6285645961761475, "learning_rate": 2.4046151588671638e-05, "loss": 0.0734, "step": 14301 }, { "epoch": 0.3151487106601222, "grad_norm": 0.6372392773628235, "learning_rate": 2.4045297617729212e-05, "loss": 0.0922, "step": 14302 }, { "epoch": 0.31517074594963834, "grad_norm": 0.728561520576477, "learning_rate": 2.404444360071423e-05, "loss": 0.0816, "step": 14303 }, { "epoch": 0.3151927812391545, "grad_norm": 0.5709342956542969, "learning_rate": 2.404358953763105e-05, "loss": 0.0669, "step": 14304 }, { "epoch": 0.31521481652867067, "grad_norm": 0.9832061529159546, "learning_rate": 2.4042735428484015e-05, "loss": 0.0735, "step": 14305 }, { "epoch": 0.31523685181818684, "grad_norm": 1.1110519170761108, "learning_rate": 2.4041881273277483e-05, "loss": 0.1203, "step": 14306 }, { "epoch": 0.315258887107703, "grad_norm": 0.5197107195854187, "learning_rate": 2.40410270720158e-05, "loss": 0.089, "step": 14307 }, { "epoch": 0.31528092239721917, "grad_norm": 0.7938389778137207, "learning_rate": 2.404017282470332e-05, "loss": 0.0799, "step": 14308 }, { "epoch": 0.31530295768673533, "grad_norm": 0.9194928407669067, "learning_rate": 2.403931853134439e-05, "loss": 0.0613, "step": 14309 }, { "epoch": 0.31532499297625144, "grad_norm": 0.778274416923523, "learning_rate": 2.403846419194336e-05, "loss": 0.1213, "step": 14310 }, { "epoch": 0.3153470282657676, "grad_norm": 0.6661803126335144, "learning_rate": 2.403760980650459e-05, "loss": 0.0868, "step": 14311 }, { "epoch": 0.31536906355528377, "grad_norm": 0.7681655287742615, "learning_rate": 2.403675537503243e-05, "loss": 0.1161, "step": 14312 }, { "epoch": 0.31539109884479993, "grad_norm": 0.7719321846961975, "learning_rate": 2.403590089753122e-05, "loss": 0.0963, "step": 14313 }, { "epoch": 0.3154131341343161, "grad_norm": 0.8448389768600464, "learning_rate": 2.4035046374005326e-05, "loss": 0.0662, "step": 14314 }, { "epoch": 0.31543516942383226, "grad_norm": 0.6113282442092896, "learning_rate": 2.4034191804459096e-05, "loss": 0.0528, "step": 14315 }, { "epoch": 0.3154572047133484, "grad_norm": 0.9762389063835144, "learning_rate": 2.4033337188896882e-05, "loss": 0.0935, "step": 14316 }, { "epoch": 0.3154792400028646, "grad_norm": 0.9564828872680664, "learning_rate": 2.403248252732304e-05, "loss": 0.1122, "step": 14317 }, { "epoch": 0.31550127529238076, "grad_norm": 0.6595818996429443, "learning_rate": 2.4031627819741918e-05, "loss": 0.0791, "step": 14318 }, { "epoch": 0.3155233105818969, "grad_norm": 0.9706674814224243, "learning_rate": 2.4030773066157874e-05, "loss": 0.0876, "step": 14319 }, { "epoch": 0.3155453458714131, "grad_norm": 0.653089165687561, "learning_rate": 2.402991826657526e-05, "loss": 0.0874, "step": 14320 }, { "epoch": 0.31556738116092925, "grad_norm": 0.4935953915119171, "learning_rate": 2.402906342099843e-05, "loss": 0.1126, "step": 14321 }, { "epoch": 0.3155894164504454, "grad_norm": 0.7949526309967041, "learning_rate": 2.4028208529431733e-05, "loss": 0.0982, "step": 14322 }, { "epoch": 0.3156114517399615, "grad_norm": 0.9789919257164001, "learning_rate": 2.4027353591879537e-05, "loss": 0.1065, "step": 14323 }, { "epoch": 0.3156334870294777, "grad_norm": 0.8459118604660034, "learning_rate": 2.4026498608346184e-05, "loss": 0.0795, "step": 14324 }, { "epoch": 0.31565552231899385, "grad_norm": 0.9032188057899475, "learning_rate": 2.4025643578836035e-05, "loss": 0.137, "step": 14325 }, { "epoch": 0.31567755760851, "grad_norm": 0.8654923439025879, "learning_rate": 2.4024788503353437e-05, "loss": 0.1165, "step": 14326 }, { "epoch": 0.3156995928980262, "grad_norm": 0.9904188513755798, "learning_rate": 2.402393338190276e-05, "loss": 0.1155, "step": 14327 }, { "epoch": 0.31572162818754235, "grad_norm": 0.8949147462844849, "learning_rate": 2.4023078214488346e-05, "loss": 0.1161, "step": 14328 }, { "epoch": 0.3157436634770585, "grad_norm": 0.46204304695129395, "learning_rate": 2.402222300111456e-05, "loss": 0.0691, "step": 14329 }, { "epoch": 0.3157656987665747, "grad_norm": 1.01203191280365, "learning_rate": 2.402136774178575e-05, "loss": 0.0961, "step": 14330 }, { "epoch": 0.31578773405609084, "grad_norm": 0.8757591247558594, "learning_rate": 2.402051243650628e-05, "loss": 0.0984, "step": 14331 }, { "epoch": 0.315809769345607, "grad_norm": 0.7070015668869019, "learning_rate": 2.4019657085280502e-05, "loss": 0.084, "step": 14332 }, { "epoch": 0.31583180463512317, "grad_norm": 1.1228338479995728, "learning_rate": 2.4018801688112775e-05, "loss": 0.1325, "step": 14333 }, { "epoch": 0.31585383992463933, "grad_norm": 0.5294241309165955, "learning_rate": 2.401794624500745e-05, "loss": 0.106, "step": 14334 }, { "epoch": 0.31587587521415544, "grad_norm": 0.5728617906570435, "learning_rate": 2.4017090755968893e-05, "loss": 0.0777, "step": 14335 }, { "epoch": 0.3158979105036716, "grad_norm": 0.6408634781837463, "learning_rate": 2.4016235221001458e-05, "loss": 0.0959, "step": 14336 }, { "epoch": 0.3159199457931878, "grad_norm": 0.7182334661483765, "learning_rate": 2.4015379640109503e-05, "loss": 0.1027, "step": 14337 }, { "epoch": 0.31594198108270394, "grad_norm": 0.7491819858551025, "learning_rate": 2.401452401329738e-05, "loss": 0.0579, "step": 14338 }, { "epoch": 0.3159640163722201, "grad_norm": 0.7475704550743103, "learning_rate": 2.4013668340569454e-05, "loss": 0.0708, "step": 14339 }, { "epoch": 0.31598605166173627, "grad_norm": 0.7911285758018494, "learning_rate": 2.4012812621930083e-05, "loss": 0.0746, "step": 14340 }, { "epoch": 0.31600808695125243, "grad_norm": 0.6499375700950623, "learning_rate": 2.401195685738363e-05, "loss": 0.083, "step": 14341 }, { "epoch": 0.3160301222407686, "grad_norm": 0.6183257102966309, "learning_rate": 2.401110104693444e-05, "loss": 0.0781, "step": 14342 }, { "epoch": 0.31605215753028476, "grad_norm": 0.8120002150535583, "learning_rate": 2.401024519058688e-05, "loss": 0.1033, "step": 14343 }, { "epoch": 0.3160741928198009, "grad_norm": 0.7719562649726868, "learning_rate": 2.4009389288345313e-05, "loss": 0.0811, "step": 14344 }, { "epoch": 0.3160962281093171, "grad_norm": 0.7947080135345459, "learning_rate": 2.4008533340214093e-05, "loss": 0.0951, "step": 14345 }, { "epoch": 0.31611826339883325, "grad_norm": 1.0611516237258911, "learning_rate": 2.4007677346197583e-05, "loss": 0.0845, "step": 14346 }, { "epoch": 0.3161402986883494, "grad_norm": 0.9768522381782532, "learning_rate": 2.400682130630014e-05, "loss": 0.0924, "step": 14347 }, { "epoch": 0.31616233397786553, "grad_norm": 0.639045774936676, "learning_rate": 2.4005965220526127e-05, "loss": 0.1021, "step": 14348 }, { "epoch": 0.3161843692673817, "grad_norm": 0.6861538290977478, "learning_rate": 2.4005109088879905e-05, "loss": 0.0831, "step": 14349 }, { "epoch": 0.31620640455689786, "grad_norm": 0.6530880928039551, "learning_rate": 2.4004252911365833e-05, "loss": 0.0774, "step": 14350 }, { "epoch": 0.316228439846414, "grad_norm": 0.7022190690040588, "learning_rate": 2.400339668798827e-05, "loss": 0.0893, "step": 14351 }, { "epoch": 0.3162504751359302, "grad_norm": 1.0827703475952148, "learning_rate": 2.4002540418751587e-05, "loss": 0.1311, "step": 14352 }, { "epoch": 0.31627251042544635, "grad_norm": 0.8235824704170227, "learning_rate": 2.4001684103660137e-05, "loss": 0.1002, "step": 14353 }, { "epoch": 0.3162945457149625, "grad_norm": 0.6333020329475403, "learning_rate": 2.4000827742718278e-05, "loss": 0.0854, "step": 14354 }, { "epoch": 0.3163165810044787, "grad_norm": 0.8479830026626587, "learning_rate": 2.3999971335930378e-05, "loss": 0.1129, "step": 14355 }, { "epoch": 0.31633861629399485, "grad_norm": 0.6082372069358826, "learning_rate": 2.3999114883300798e-05, "loss": 0.0583, "step": 14356 }, { "epoch": 0.316360651583511, "grad_norm": 0.6818718314170837, "learning_rate": 2.3998258384833897e-05, "loss": 0.1142, "step": 14357 }, { "epoch": 0.3163826868730272, "grad_norm": 1.042931079864502, "learning_rate": 2.3997401840534044e-05, "loss": 0.0915, "step": 14358 }, { "epoch": 0.31640472216254334, "grad_norm": 1.0155524015426636, "learning_rate": 2.3996545250405602e-05, "loss": 0.0905, "step": 14359 }, { "epoch": 0.31642675745205945, "grad_norm": 0.9036183953285217, "learning_rate": 2.3995688614452925e-05, "loss": 0.1233, "step": 14360 }, { "epoch": 0.3164487927415756, "grad_norm": 1.0802340507507324, "learning_rate": 2.399483193268038e-05, "loss": 0.0942, "step": 14361 }, { "epoch": 0.3164708280310918, "grad_norm": 0.8967235684394836, "learning_rate": 2.399397520509234e-05, "loss": 0.1299, "step": 14362 }, { "epoch": 0.31649286332060794, "grad_norm": 1.0802782773971558, "learning_rate": 2.399311843169316e-05, "loss": 0.101, "step": 14363 }, { "epoch": 0.3165148986101241, "grad_norm": 0.7686830163002014, "learning_rate": 2.3992261612487204e-05, "loss": 0.0834, "step": 14364 }, { "epoch": 0.31653693389964027, "grad_norm": 0.7667531371116638, "learning_rate": 2.3991404747478837e-05, "loss": 0.0722, "step": 14365 }, { "epoch": 0.31655896918915644, "grad_norm": 0.8388068079948425, "learning_rate": 2.3990547836672422e-05, "loss": 0.1041, "step": 14366 }, { "epoch": 0.3165810044786726, "grad_norm": 0.9952465295791626, "learning_rate": 2.398969088007233e-05, "loss": 0.0948, "step": 14367 }, { "epoch": 0.31660303976818877, "grad_norm": 0.6921476125717163, "learning_rate": 2.398883387768292e-05, "loss": 0.0892, "step": 14368 }, { "epoch": 0.31662507505770493, "grad_norm": 0.465816468000412, "learning_rate": 2.3987976829508556e-05, "loss": 0.065, "step": 14369 }, { "epoch": 0.3166471103472211, "grad_norm": 1.112911343574524, "learning_rate": 2.3987119735553608e-05, "loss": 0.1043, "step": 14370 }, { "epoch": 0.31666914563673726, "grad_norm": 0.6829991340637207, "learning_rate": 2.3986262595822444e-05, "loss": 0.0677, "step": 14371 }, { "epoch": 0.31669118092625337, "grad_norm": 0.6028040647506714, "learning_rate": 2.398540541031942e-05, "loss": 0.1035, "step": 14372 }, { "epoch": 0.31671321621576953, "grad_norm": 1.2225754261016846, "learning_rate": 2.398454817904891e-05, "loss": 0.0981, "step": 14373 }, { "epoch": 0.3167352515052857, "grad_norm": 0.9004992246627808, "learning_rate": 2.3983690902015277e-05, "loss": 0.1091, "step": 14374 }, { "epoch": 0.31675728679480186, "grad_norm": 1.4028822183609009, "learning_rate": 2.3982833579222892e-05, "loss": 0.0936, "step": 14375 }, { "epoch": 0.316779322084318, "grad_norm": 0.9829049706459045, "learning_rate": 2.3981976210676114e-05, "loss": 0.0938, "step": 14376 }, { "epoch": 0.3168013573738342, "grad_norm": 0.6674614548683167, "learning_rate": 2.398111879637932e-05, "loss": 0.0863, "step": 14377 }, { "epoch": 0.31682339266335036, "grad_norm": 0.8272227644920349, "learning_rate": 2.398026133633687e-05, "loss": 0.125, "step": 14378 }, { "epoch": 0.3168454279528665, "grad_norm": 1.013810396194458, "learning_rate": 2.3979403830553136e-05, "loss": 0.1255, "step": 14379 }, { "epoch": 0.3168674632423827, "grad_norm": 0.6254045367240906, "learning_rate": 2.3978546279032473e-05, "loss": 0.0703, "step": 14380 }, { "epoch": 0.31688949853189885, "grad_norm": 0.8012793660163879, "learning_rate": 2.397768868177927e-05, "loss": 0.1245, "step": 14381 }, { "epoch": 0.316911533821415, "grad_norm": 0.833559513092041, "learning_rate": 2.397683103879788e-05, "loss": 0.0727, "step": 14382 }, { "epoch": 0.3169335691109312, "grad_norm": 2.1659889221191406, "learning_rate": 2.3975973350092678e-05, "loss": 0.1669, "step": 14383 }, { "epoch": 0.31695560440044734, "grad_norm": 0.6399484276771545, "learning_rate": 2.3975115615668023e-05, "loss": 0.1268, "step": 14384 }, { "epoch": 0.31697763968996345, "grad_norm": 0.9167788624763489, "learning_rate": 2.39742578355283e-05, "loss": 0.0883, "step": 14385 }, { "epoch": 0.3169996749794796, "grad_norm": 0.4565116763114929, "learning_rate": 2.3973400009677866e-05, "loss": 0.0966, "step": 14386 }, { "epoch": 0.3170217102689958, "grad_norm": 0.6242250204086304, "learning_rate": 2.3972542138121095e-05, "loss": 0.1313, "step": 14387 }, { "epoch": 0.31704374555851195, "grad_norm": 0.6452916860580444, "learning_rate": 2.3971684220862353e-05, "loss": 0.0902, "step": 14388 }, { "epoch": 0.3170657808480281, "grad_norm": 0.5457330942153931, "learning_rate": 2.3970826257906017e-05, "loss": 0.0762, "step": 14389 }, { "epoch": 0.3170878161375443, "grad_norm": 1.2121553421020508, "learning_rate": 2.3969968249256448e-05, "loss": 0.1053, "step": 14390 }, { "epoch": 0.31710985142706044, "grad_norm": 0.4878684878349304, "learning_rate": 2.3969110194918024e-05, "loss": 0.1048, "step": 14391 }, { "epoch": 0.3171318867165766, "grad_norm": 0.5812232494354248, "learning_rate": 2.3968252094895108e-05, "loss": 0.1045, "step": 14392 }, { "epoch": 0.31715392200609277, "grad_norm": 0.5592819452285767, "learning_rate": 2.3967393949192074e-05, "loss": 0.0523, "step": 14393 }, { "epoch": 0.31717595729560893, "grad_norm": 0.6345582604408264, "learning_rate": 2.39665357578133e-05, "loss": 0.0663, "step": 14394 }, { "epoch": 0.3171979925851251, "grad_norm": 1.2200253009796143, "learning_rate": 2.3965677520763144e-05, "loss": 0.0811, "step": 14395 }, { "epoch": 0.31722002787464126, "grad_norm": 0.7273904085159302, "learning_rate": 2.3964819238045992e-05, "loss": 0.0996, "step": 14396 }, { "epoch": 0.3172420631641574, "grad_norm": 0.5917645692825317, "learning_rate": 2.3963960909666202e-05, "loss": 0.0906, "step": 14397 }, { "epoch": 0.31726409845367354, "grad_norm": 0.9200601577758789, "learning_rate": 2.396310253562816e-05, "loss": 0.0852, "step": 14398 }, { "epoch": 0.3172861337431897, "grad_norm": 0.8186157941818237, "learning_rate": 2.3962244115936225e-05, "loss": 0.1005, "step": 14399 }, { "epoch": 0.31730816903270587, "grad_norm": 0.7568019032478333, "learning_rate": 2.3961385650594775e-05, "loss": 0.1246, "step": 14400 }, { "epoch": 0.31733020432222203, "grad_norm": 0.9202977418899536, "learning_rate": 2.396052713960818e-05, "loss": 0.0593, "step": 14401 }, { "epoch": 0.3173522396117382, "grad_norm": 0.6175611019134521, "learning_rate": 2.3959668582980817e-05, "loss": 0.0518, "step": 14402 }, { "epoch": 0.31737427490125436, "grad_norm": 0.5674787759780884, "learning_rate": 2.395880998071706e-05, "loss": 0.072, "step": 14403 }, { "epoch": 0.3173963101907705, "grad_norm": 0.7313298583030701, "learning_rate": 2.395795133282128e-05, "loss": 0.1053, "step": 14404 }, { "epoch": 0.3174183454802867, "grad_norm": 0.8968206644058228, "learning_rate": 2.3957092639297845e-05, "loss": 0.0982, "step": 14405 }, { "epoch": 0.31744038076980285, "grad_norm": 0.6841621994972229, "learning_rate": 2.3956233900151142e-05, "loss": 0.1107, "step": 14406 }, { "epoch": 0.317462416059319, "grad_norm": 0.7788360118865967, "learning_rate": 2.395537511538553e-05, "loss": 0.1173, "step": 14407 }, { "epoch": 0.3174844513488352, "grad_norm": 0.8172231316566467, "learning_rate": 2.3954516285005394e-05, "loss": 0.1176, "step": 14408 }, { "epoch": 0.3175064866383513, "grad_norm": 0.7698987126350403, "learning_rate": 2.39536574090151e-05, "loss": 0.0974, "step": 14409 }, { "epoch": 0.31752852192786746, "grad_norm": 0.6602823138237, "learning_rate": 2.395279848741903e-05, "loss": 0.0685, "step": 14410 }, { "epoch": 0.3175505572173836, "grad_norm": 0.6313644647598267, "learning_rate": 2.395193952022156e-05, "loss": 0.061, "step": 14411 }, { "epoch": 0.3175725925068998, "grad_norm": 0.641374409198761, "learning_rate": 2.3951080507427058e-05, "loss": 0.1059, "step": 14412 }, { "epoch": 0.31759462779641595, "grad_norm": 0.7038992047309875, "learning_rate": 2.3950221449039908e-05, "loss": 0.0993, "step": 14413 }, { "epoch": 0.3176166630859321, "grad_norm": 0.6960590481758118, "learning_rate": 2.3949362345064473e-05, "loss": 0.0686, "step": 14414 }, { "epoch": 0.3176386983754483, "grad_norm": 0.728707492351532, "learning_rate": 2.3948503195505143e-05, "loss": 0.0584, "step": 14415 }, { "epoch": 0.31766073366496445, "grad_norm": 0.9690550565719604, "learning_rate": 2.3947644000366282e-05, "loss": 0.1076, "step": 14416 }, { "epoch": 0.3176827689544806, "grad_norm": 1.017020583152771, "learning_rate": 2.3946784759652275e-05, "loss": 0.1375, "step": 14417 }, { "epoch": 0.3177048042439968, "grad_norm": 0.5518986582756042, "learning_rate": 2.3945925473367496e-05, "loss": 0.0908, "step": 14418 }, { "epoch": 0.31772683953351294, "grad_norm": 1.1871367692947388, "learning_rate": 2.3945066141516318e-05, "loss": 0.1028, "step": 14419 }, { "epoch": 0.3177488748230291, "grad_norm": 0.939803957939148, "learning_rate": 2.3944206764103126e-05, "loss": 0.116, "step": 14420 }, { "epoch": 0.31777091011254527, "grad_norm": 0.7881577610969543, "learning_rate": 2.394334734113229e-05, "loss": 0.0881, "step": 14421 }, { "epoch": 0.3177929454020614, "grad_norm": 0.38288354873657227, "learning_rate": 2.3942487872608193e-05, "loss": 0.0684, "step": 14422 }, { "epoch": 0.31781498069157754, "grad_norm": 0.7957572937011719, "learning_rate": 2.3941628358535207e-05, "loss": 0.0769, "step": 14423 }, { "epoch": 0.3178370159810937, "grad_norm": 0.6661139726638794, "learning_rate": 2.3940768798917714e-05, "loss": 0.0869, "step": 14424 }, { "epoch": 0.31785905127060987, "grad_norm": 0.662244975566864, "learning_rate": 2.393990919376009e-05, "loss": 0.104, "step": 14425 }, { "epoch": 0.31788108656012604, "grad_norm": 0.9141809344291687, "learning_rate": 2.3939049543066717e-05, "loss": 0.1242, "step": 14426 }, { "epoch": 0.3179031218496422, "grad_norm": 0.7463394999504089, "learning_rate": 2.393818984684197e-05, "loss": 0.0906, "step": 14427 }, { "epoch": 0.31792515713915837, "grad_norm": 1.1333308219909668, "learning_rate": 2.3937330105090227e-05, "loss": 0.0855, "step": 14428 }, { "epoch": 0.31794719242867453, "grad_norm": 0.5609171986579895, "learning_rate": 2.393647031781587e-05, "loss": 0.0913, "step": 14429 }, { "epoch": 0.3179692277181907, "grad_norm": 0.7202062606811523, "learning_rate": 2.3935610485023282e-05, "loss": 0.0546, "step": 14430 }, { "epoch": 0.31799126300770686, "grad_norm": 0.9005333185195923, "learning_rate": 2.3934750606716835e-05, "loss": 0.0741, "step": 14431 }, { "epoch": 0.318013298297223, "grad_norm": 0.8620479702949524, "learning_rate": 2.393389068290091e-05, "loss": 0.0733, "step": 14432 }, { "epoch": 0.3180353335867392, "grad_norm": 0.526038646697998, "learning_rate": 2.393303071357989e-05, "loss": 0.0748, "step": 14433 }, { "epoch": 0.3180573688762553, "grad_norm": 0.8660317659378052, "learning_rate": 2.3932170698758155e-05, "loss": 0.1377, "step": 14434 }, { "epoch": 0.31807940416577146, "grad_norm": 0.9542217254638672, "learning_rate": 2.3931310638440086e-05, "loss": 0.0928, "step": 14435 }, { "epoch": 0.3181014394552876, "grad_norm": 0.858163595199585, "learning_rate": 2.393045053263006e-05, "loss": 0.0783, "step": 14436 }, { "epoch": 0.3181234747448038, "grad_norm": 1.0294957160949707, "learning_rate": 2.392959038133246e-05, "loss": 0.1131, "step": 14437 }, { "epoch": 0.31814551003431996, "grad_norm": 0.7373867630958557, "learning_rate": 2.3928730184551673e-05, "loss": 0.085, "step": 14438 }, { "epoch": 0.3181675453238361, "grad_norm": 0.47421795129776, "learning_rate": 2.392786994229207e-05, "loss": 0.0557, "step": 14439 }, { "epoch": 0.3181895806133523, "grad_norm": 0.5157762765884399, "learning_rate": 2.392700965455804e-05, "loss": 0.0964, "step": 14440 }, { "epoch": 0.31821161590286845, "grad_norm": 0.753592848777771, "learning_rate": 2.3926149321353958e-05, "loss": 0.1173, "step": 14441 }, { "epoch": 0.3182336511923846, "grad_norm": 0.5585976839065552, "learning_rate": 2.392528894268422e-05, "loss": 0.0925, "step": 14442 }, { "epoch": 0.3182556864819008, "grad_norm": 1.1937074661254883, "learning_rate": 2.392442851855319e-05, "loss": 0.0975, "step": 14443 }, { "epoch": 0.31827772177141694, "grad_norm": 0.6649289131164551, "learning_rate": 2.392356804896527e-05, "loss": 0.0937, "step": 14444 }, { "epoch": 0.3182997570609331, "grad_norm": 0.43717071413993835, "learning_rate": 2.3922707533924826e-05, "loss": 0.1247, "step": 14445 }, { "epoch": 0.3183217923504492, "grad_norm": 0.5487977862358093, "learning_rate": 2.392184697343625e-05, "loss": 0.0778, "step": 14446 }, { "epoch": 0.3183438276399654, "grad_norm": 0.9175513982772827, "learning_rate": 2.3920986367503918e-05, "loss": 0.1135, "step": 14447 }, { "epoch": 0.31836586292948155, "grad_norm": 0.8013801574707031, "learning_rate": 2.3920125716132228e-05, "loss": 0.1169, "step": 14448 }, { "epoch": 0.3183878982189977, "grad_norm": 0.8755786418914795, "learning_rate": 2.3919265019325545e-05, "loss": 0.0893, "step": 14449 }, { "epoch": 0.3184099335085139, "grad_norm": 0.6703680157661438, "learning_rate": 2.391840427708827e-05, "loss": 0.0955, "step": 14450 }, { "epoch": 0.31843196879803004, "grad_norm": 0.5863150954246521, "learning_rate": 2.3917543489424775e-05, "loss": 0.0965, "step": 14451 }, { "epoch": 0.3184540040875462, "grad_norm": 1.2179415225982666, "learning_rate": 2.3916682656339453e-05, "loss": 0.1185, "step": 14452 }, { "epoch": 0.31847603937706237, "grad_norm": 0.5604003667831421, "learning_rate": 2.3915821777836677e-05, "loss": 0.0744, "step": 14453 }, { "epoch": 0.31849807466657853, "grad_norm": 0.9918755888938904, "learning_rate": 2.3914960853920848e-05, "loss": 0.1312, "step": 14454 }, { "epoch": 0.3185201099560947, "grad_norm": 0.5396029949188232, "learning_rate": 2.391409988459634e-05, "loss": 0.0519, "step": 14455 }, { "epoch": 0.31854214524561086, "grad_norm": 0.7324404120445251, "learning_rate": 2.3913238869867537e-05, "loss": 0.1124, "step": 14456 }, { "epoch": 0.31856418053512703, "grad_norm": 1.172974944114685, "learning_rate": 2.3912377809738834e-05, "loss": 0.1561, "step": 14457 }, { "epoch": 0.3185862158246432, "grad_norm": 1.0515087842941284, "learning_rate": 2.391151670421461e-05, "loss": 0.097, "step": 14458 }, { "epoch": 0.3186082511141593, "grad_norm": 0.7443521022796631, "learning_rate": 2.3910655553299253e-05, "loss": 0.1348, "step": 14459 }, { "epoch": 0.31863028640367547, "grad_norm": 0.7513348460197449, "learning_rate": 2.3909794356997146e-05, "loss": 0.098, "step": 14460 }, { "epoch": 0.31865232169319163, "grad_norm": 0.7927639484405518, "learning_rate": 2.3908933115312685e-05, "loss": 0.1174, "step": 14461 }, { "epoch": 0.3186743569827078, "grad_norm": 0.6107750535011292, "learning_rate": 2.3908071828250245e-05, "loss": 0.0902, "step": 14462 }, { "epoch": 0.31869639227222396, "grad_norm": 1.1871038675308228, "learning_rate": 2.3907210495814216e-05, "loss": 0.1186, "step": 14463 }, { "epoch": 0.3187184275617401, "grad_norm": 1.1420063972473145, "learning_rate": 2.3906349118008995e-05, "loss": 0.0943, "step": 14464 }, { "epoch": 0.3187404628512563, "grad_norm": 0.9551335573196411, "learning_rate": 2.390548769483896e-05, "loss": 0.1021, "step": 14465 }, { "epoch": 0.31876249814077245, "grad_norm": 0.965455949306488, "learning_rate": 2.3904626226308493e-05, "loss": 0.0705, "step": 14466 }, { "epoch": 0.3187845334302886, "grad_norm": 0.592831552028656, "learning_rate": 2.3903764712421998e-05, "loss": 0.0856, "step": 14467 }, { "epoch": 0.3188065687198048, "grad_norm": 0.9405347108840942, "learning_rate": 2.390290315318385e-05, "loss": 0.094, "step": 14468 }, { "epoch": 0.31882860400932095, "grad_norm": 0.6142854690551758, "learning_rate": 2.3902041548598447e-05, "loss": 0.0961, "step": 14469 }, { "epoch": 0.3188506392988371, "grad_norm": 0.9475044012069702, "learning_rate": 2.390117989867017e-05, "loss": 0.107, "step": 14470 }, { "epoch": 0.3188726745883532, "grad_norm": 0.8608630895614624, "learning_rate": 2.3900318203403414e-05, "loss": 0.1141, "step": 14471 }, { "epoch": 0.3188947098778694, "grad_norm": 0.9142946600914001, "learning_rate": 2.3899456462802556e-05, "loss": 0.1122, "step": 14472 }, { "epoch": 0.31891674516738555, "grad_norm": 1.0376412868499756, "learning_rate": 2.3898594676872003e-05, "loss": 0.0952, "step": 14473 }, { "epoch": 0.3189387804569017, "grad_norm": 0.7998244166374207, "learning_rate": 2.3897732845616133e-05, "loss": 0.1094, "step": 14474 }, { "epoch": 0.3189608157464179, "grad_norm": 0.7759743928909302, "learning_rate": 2.389687096903934e-05, "loss": 0.1081, "step": 14475 }, { "epoch": 0.31898285103593405, "grad_norm": 0.975326657295227, "learning_rate": 2.3896009047146007e-05, "loss": 0.1073, "step": 14476 }, { "epoch": 0.3190048863254502, "grad_norm": 0.542430579662323, "learning_rate": 2.389514707994053e-05, "loss": 0.0798, "step": 14477 }, { "epoch": 0.3190269216149664, "grad_norm": 0.6553475260734558, "learning_rate": 2.38942850674273e-05, "loss": 0.1004, "step": 14478 }, { "epoch": 0.31904895690448254, "grad_norm": 0.7413042783737183, "learning_rate": 2.3893423009610707e-05, "loss": 0.0982, "step": 14479 }, { "epoch": 0.3190709921939987, "grad_norm": 0.6327543258666992, "learning_rate": 2.3892560906495144e-05, "loss": 0.095, "step": 14480 }, { "epoch": 0.31909302748351487, "grad_norm": 0.747093677520752, "learning_rate": 2.3891698758084996e-05, "loss": 0.0683, "step": 14481 }, { "epoch": 0.31911506277303103, "grad_norm": 0.9352444410324097, "learning_rate": 2.389083656438466e-05, "loss": 0.1271, "step": 14482 }, { "epoch": 0.31913709806254714, "grad_norm": 0.8110009431838989, "learning_rate": 2.3889974325398524e-05, "loss": 0.0912, "step": 14483 }, { "epoch": 0.3191591333520633, "grad_norm": 0.8467801809310913, "learning_rate": 2.3889112041130986e-05, "loss": 0.122, "step": 14484 }, { "epoch": 0.31918116864157947, "grad_norm": 0.754504382610321, "learning_rate": 2.388824971158643e-05, "loss": 0.0921, "step": 14485 }, { "epoch": 0.31920320393109564, "grad_norm": 0.6388694643974304, "learning_rate": 2.3887387336769253e-05, "loss": 0.089, "step": 14486 }, { "epoch": 0.3192252392206118, "grad_norm": 0.8865456581115723, "learning_rate": 2.3886524916683848e-05, "loss": 0.0951, "step": 14487 }, { "epoch": 0.31924727451012797, "grad_norm": 1.0541578531265259, "learning_rate": 2.38856624513346e-05, "loss": 0.0898, "step": 14488 }, { "epoch": 0.31926930979964413, "grad_norm": 0.8191978931427002, "learning_rate": 2.3884799940725914e-05, "loss": 0.0987, "step": 14489 }, { "epoch": 0.3192913450891603, "grad_norm": 0.7445244193077087, "learning_rate": 2.388393738486218e-05, "loss": 0.0654, "step": 14490 }, { "epoch": 0.31931338037867646, "grad_norm": 0.7730205059051514, "learning_rate": 2.3883074783747782e-05, "loss": 0.101, "step": 14491 }, { "epoch": 0.3193354156681926, "grad_norm": 1.2612663507461548, "learning_rate": 2.3882212137387124e-05, "loss": 0.143, "step": 14492 }, { "epoch": 0.3193574509577088, "grad_norm": 0.787879228591919, "learning_rate": 2.3881349445784596e-05, "loss": 0.0878, "step": 14493 }, { "epoch": 0.31937948624722495, "grad_norm": 0.6392892599105835, "learning_rate": 2.3880486708944596e-05, "loss": 0.0761, "step": 14494 }, { "epoch": 0.3194015215367411, "grad_norm": 1.6639459133148193, "learning_rate": 2.387962392687151e-05, "loss": 0.0658, "step": 14495 }, { "epoch": 0.3194235568262572, "grad_norm": 0.5065010786056519, "learning_rate": 2.3878761099569745e-05, "loss": 0.0983, "step": 14496 }, { "epoch": 0.3194455921157734, "grad_norm": 0.939780592918396, "learning_rate": 2.387789822704368e-05, "loss": 0.0913, "step": 14497 }, { "epoch": 0.31946762740528956, "grad_norm": 0.989881694316864, "learning_rate": 2.387703530929772e-05, "loss": 0.1096, "step": 14498 }, { "epoch": 0.3194896626948057, "grad_norm": 0.8279997706413269, "learning_rate": 2.3876172346336265e-05, "loss": 0.0843, "step": 14499 }, { "epoch": 0.3195116979843219, "grad_norm": 0.6668071746826172, "learning_rate": 2.38753093381637e-05, "loss": 0.0591, "step": 14500 }, { "epoch": 0.31953373327383805, "grad_norm": 0.6343361139297485, "learning_rate": 2.3874446284784422e-05, "loss": 0.0824, "step": 14501 }, { "epoch": 0.3195557685633542, "grad_norm": 0.6381044983863831, "learning_rate": 2.3873583186202837e-05, "loss": 0.0684, "step": 14502 }, { "epoch": 0.3195778038528704, "grad_norm": 0.6575391292572021, "learning_rate": 2.387272004242333e-05, "loss": 0.0877, "step": 14503 }, { "epoch": 0.31959983914238654, "grad_norm": 0.6771410703659058, "learning_rate": 2.3871856853450303e-05, "loss": 0.1213, "step": 14504 }, { "epoch": 0.3196218744319027, "grad_norm": 1.410502314567566, "learning_rate": 2.3870993619288153e-05, "loss": 0.0963, "step": 14505 }, { "epoch": 0.3196439097214189, "grad_norm": 0.8300888538360596, "learning_rate": 2.3870130339941277e-05, "loss": 0.1056, "step": 14506 }, { "epoch": 0.31966594501093504, "grad_norm": 0.8521990776062012, "learning_rate": 2.3869267015414067e-05, "loss": 0.0903, "step": 14507 }, { "epoch": 0.31968798030045115, "grad_norm": 0.8531901836395264, "learning_rate": 2.3868403645710924e-05, "loss": 0.1098, "step": 14508 }, { "epoch": 0.3197100155899673, "grad_norm": 0.6596524119377136, "learning_rate": 2.386754023083625e-05, "loss": 0.1007, "step": 14509 }, { "epoch": 0.3197320508794835, "grad_norm": 0.8462126851081848, "learning_rate": 2.3866676770794433e-05, "loss": 0.0756, "step": 14510 }, { "epoch": 0.31975408616899964, "grad_norm": 0.8564620614051819, "learning_rate": 2.3865813265589878e-05, "loss": 0.0725, "step": 14511 }, { "epoch": 0.3197761214585158, "grad_norm": 0.9391093254089355, "learning_rate": 2.386494971522698e-05, "loss": 0.1132, "step": 14512 }, { "epoch": 0.31979815674803197, "grad_norm": 0.5541835427284241, "learning_rate": 2.3864086119710142e-05, "loss": 0.0581, "step": 14513 }, { "epoch": 0.31982019203754813, "grad_norm": 0.8871290683746338, "learning_rate": 2.386322247904376e-05, "loss": 0.1043, "step": 14514 }, { "epoch": 0.3198422273270643, "grad_norm": 0.8862310647964478, "learning_rate": 2.3862358793232236e-05, "loss": 0.1282, "step": 14515 }, { "epoch": 0.31986426261658046, "grad_norm": 0.7917107939720154, "learning_rate": 2.3861495062279962e-05, "loss": 0.0699, "step": 14516 }, { "epoch": 0.31988629790609663, "grad_norm": 0.6068755388259888, "learning_rate": 2.3860631286191347e-05, "loss": 0.093, "step": 14517 }, { "epoch": 0.3199083331956128, "grad_norm": 0.7705651521682739, "learning_rate": 2.385976746497078e-05, "loss": 0.1061, "step": 14518 }, { "epoch": 0.31993036848512896, "grad_norm": 0.5403146147727966, "learning_rate": 2.3858903598622668e-05, "loss": 0.0422, "step": 14519 }, { "epoch": 0.31995240377464507, "grad_norm": 0.8185226917266846, "learning_rate": 2.3858039687151407e-05, "loss": 0.1257, "step": 14520 }, { "epoch": 0.31997443906416123, "grad_norm": 1.5365242958068848, "learning_rate": 2.38571757305614e-05, "loss": 0.0795, "step": 14521 }, { "epoch": 0.3199964743536774, "grad_norm": 0.720191240310669, "learning_rate": 2.3856311728857052e-05, "loss": 0.0679, "step": 14522 }, { "epoch": 0.32001850964319356, "grad_norm": 0.6595334410667419, "learning_rate": 2.3855447682042758e-05, "loss": 0.0757, "step": 14523 }, { "epoch": 0.3200405449327097, "grad_norm": 0.5826005935668945, "learning_rate": 2.3854583590122918e-05, "loss": 0.0562, "step": 14524 }, { "epoch": 0.3200625802222259, "grad_norm": 0.7932193279266357, "learning_rate": 2.3853719453101935e-05, "loss": 0.0972, "step": 14525 }, { "epoch": 0.32008461551174205, "grad_norm": 0.752882719039917, "learning_rate": 2.3852855270984214e-05, "loss": 0.1092, "step": 14526 }, { "epoch": 0.3201066508012582, "grad_norm": 0.8342503309249878, "learning_rate": 2.385199104377415e-05, "loss": 0.0978, "step": 14527 }, { "epoch": 0.3201286860907744, "grad_norm": 0.8282510042190552, "learning_rate": 2.3851126771476155e-05, "loss": 0.1119, "step": 14528 }, { "epoch": 0.32015072138029055, "grad_norm": 0.6631078124046326, "learning_rate": 2.3850262454094618e-05, "loss": 0.079, "step": 14529 }, { "epoch": 0.3201727566698067, "grad_norm": 0.8298934698104858, "learning_rate": 2.3849398091633952e-05, "loss": 0.1504, "step": 14530 }, { "epoch": 0.3201947919593229, "grad_norm": 0.8966346383094788, "learning_rate": 2.3848533684098554e-05, "loss": 0.0857, "step": 14531 }, { "epoch": 0.32021682724883904, "grad_norm": 0.8599383234977722, "learning_rate": 2.3847669231492835e-05, "loss": 0.0883, "step": 14532 }, { "epoch": 0.32023886253835515, "grad_norm": 0.5669949054718018, "learning_rate": 2.3846804733821184e-05, "loss": 0.0919, "step": 14533 }, { "epoch": 0.3202608978278713, "grad_norm": 0.7496862411499023, "learning_rate": 2.3845940191088018e-05, "loss": 0.0864, "step": 14534 }, { "epoch": 0.3202829331173875, "grad_norm": 1.2330964803695679, "learning_rate": 2.3845075603297734e-05, "loss": 0.0943, "step": 14535 }, { "epoch": 0.32030496840690365, "grad_norm": 0.8415387272834778, "learning_rate": 2.384421097045474e-05, "loss": 0.1104, "step": 14536 }, { "epoch": 0.3203270036964198, "grad_norm": 0.6006568670272827, "learning_rate": 2.384334629256343e-05, "loss": 0.1037, "step": 14537 }, { "epoch": 0.320349038985936, "grad_norm": 0.943540096282959, "learning_rate": 2.384248156962822e-05, "loss": 0.1309, "step": 14538 }, { "epoch": 0.32037107427545214, "grad_norm": 0.7008815407752991, "learning_rate": 2.3841616801653505e-05, "loss": 0.0827, "step": 14539 }, { "epoch": 0.3203931095649683, "grad_norm": 0.5211051106452942, "learning_rate": 2.3840751988643696e-05, "loss": 0.1082, "step": 14540 }, { "epoch": 0.32041514485448447, "grad_norm": 0.7561602592468262, "learning_rate": 2.3839887130603197e-05, "loss": 0.0961, "step": 14541 }, { "epoch": 0.32043718014400063, "grad_norm": 0.4453413784503937, "learning_rate": 2.3839022227536418e-05, "loss": 0.0637, "step": 14542 }, { "epoch": 0.3204592154335168, "grad_norm": 0.5052986741065979, "learning_rate": 2.383815727944775e-05, "loss": 0.0883, "step": 14543 }, { "epoch": 0.32048125072303296, "grad_norm": 0.9221075177192688, "learning_rate": 2.3837292286341613e-05, "loss": 0.1041, "step": 14544 }, { "epoch": 0.32050328601254907, "grad_norm": 0.823393702507019, "learning_rate": 2.3836427248222407e-05, "loss": 0.0923, "step": 14545 }, { "epoch": 0.32052532130206524, "grad_norm": 0.5222296118736267, "learning_rate": 2.383556216509454e-05, "loss": 0.093, "step": 14546 }, { "epoch": 0.3205473565915814, "grad_norm": 0.7237909436225891, "learning_rate": 2.383469703696241e-05, "loss": 0.0861, "step": 14547 }, { "epoch": 0.32056939188109757, "grad_norm": 0.6970497369766235, "learning_rate": 2.3833831863830434e-05, "loss": 0.0581, "step": 14548 }, { "epoch": 0.32059142717061373, "grad_norm": 1.2701815366744995, "learning_rate": 2.3832966645703015e-05, "loss": 0.1236, "step": 14549 }, { "epoch": 0.3206134624601299, "grad_norm": 0.6395798325538635, "learning_rate": 2.3832101382584563e-05, "loss": 0.0734, "step": 14550 }, { "epoch": 0.32063549774964606, "grad_norm": 0.7725600004196167, "learning_rate": 2.3831236074479487e-05, "loss": 0.0921, "step": 14551 }, { "epoch": 0.3206575330391622, "grad_norm": 0.4840937554836273, "learning_rate": 2.3830370721392177e-05, "loss": 0.1088, "step": 14552 }, { "epoch": 0.3206795683286784, "grad_norm": 1.0773489475250244, "learning_rate": 2.382950532332706e-05, "loss": 0.1095, "step": 14553 }, { "epoch": 0.32070160361819455, "grad_norm": 0.6900792121887207, "learning_rate": 2.3828639880288542e-05, "loss": 0.1079, "step": 14554 }, { "epoch": 0.3207236389077107, "grad_norm": 0.9126489758491516, "learning_rate": 2.3827774392281024e-05, "loss": 0.1147, "step": 14555 }, { "epoch": 0.3207456741972269, "grad_norm": 0.7314050197601318, "learning_rate": 2.3826908859308913e-05, "loss": 0.0812, "step": 14556 }, { "epoch": 0.320767709486743, "grad_norm": 0.6306816935539246, "learning_rate": 2.3826043281376626e-05, "loss": 0.1132, "step": 14557 }, { "epoch": 0.32078974477625916, "grad_norm": 0.8876867890357971, "learning_rate": 2.3825177658488567e-05, "loss": 0.1261, "step": 14558 }, { "epoch": 0.3208117800657753, "grad_norm": 0.5246372222900391, "learning_rate": 2.3824311990649147e-05, "loss": 0.064, "step": 14559 }, { "epoch": 0.3208338153552915, "grad_norm": 0.9297277331352234, "learning_rate": 2.3823446277862774e-05, "loss": 0.0925, "step": 14560 }, { "epoch": 0.32085585064480765, "grad_norm": 0.7634448409080505, "learning_rate": 2.3822580520133856e-05, "loss": 0.0998, "step": 14561 }, { "epoch": 0.3208778859343238, "grad_norm": 0.8145463466644287, "learning_rate": 2.3821714717466804e-05, "loss": 0.1038, "step": 14562 }, { "epoch": 0.32089992122384, "grad_norm": 0.7237520217895508, "learning_rate": 2.3820848869866027e-05, "loss": 0.0999, "step": 14563 }, { "epoch": 0.32092195651335614, "grad_norm": 0.9697214365005493, "learning_rate": 2.3819982977335935e-05, "loss": 0.09, "step": 14564 }, { "epoch": 0.3209439918028723, "grad_norm": 0.8347628712654114, "learning_rate": 2.3819117039880947e-05, "loss": 0.1211, "step": 14565 }, { "epoch": 0.3209660270923885, "grad_norm": 1.173041820526123, "learning_rate": 2.3818251057505468e-05, "loss": 0.1241, "step": 14566 }, { "epoch": 0.32098806238190464, "grad_norm": 0.9782916307449341, "learning_rate": 2.38173850302139e-05, "loss": 0.1087, "step": 14567 }, { "epoch": 0.3210100976714208, "grad_norm": 0.8033919334411621, "learning_rate": 2.3816518958010663e-05, "loss": 0.0881, "step": 14568 }, { "epoch": 0.32103213296093697, "grad_norm": 0.6012489199638367, "learning_rate": 2.3815652840900173e-05, "loss": 0.1123, "step": 14569 }, { "epoch": 0.3210541682504531, "grad_norm": 0.6923789978027344, "learning_rate": 2.3814786678886827e-05, "loss": 0.1215, "step": 14570 }, { "epoch": 0.32107620353996924, "grad_norm": 0.954933226108551, "learning_rate": 2.381392047197505e-05, "loss": 0.1011, "step": 14571 }, { "epoch": 0.3210982388294854, "grad_norm": 1.3671040534973145, "learning_rate": 2.3813054220169252e-05, "loss": 0.0972, "step": 14572 }, { "epoch": 0.32112027411900157, "grad_norm": 1.052531361579895, "learning_rate": 2.381218792347384e-05, "loss": 0.1212, "step": 14573 }, { "epoch": 0.32114230940851773, "grad_norm": 0.9895271062850952, "learning_rate": 2.381132158189323e-05, "loss": 0.1363, "step": 14574 }, { "epoch": 0.3211643446980339, "grad_norm": 1.2479310035705566, "learning_rate": 2.3810455195431835e-05, "loss": 0.1203, "step": 14575 }, { "epoch": 0.32118637998755006, "grad_norm": 1.1030372381210327, "learning_rate": 2.3809588764094067e-05, "loss": 0.0961, "step": 14576 }, { "epoch": 0.32120841527706623, "grad_norm": 0.719925582408905, "learning_rate": 2.380872228788434e-05, "loss": 0.0901, "step": 14577 }, { "epoch": 0.3212304505665824, "grad_norm": 0.8076639175415039, "learning_rate": 2.380785576680707e-05, "loss": 0.0967, "step": 14578 }, { "epoch": 0.32125248585609856, "grad_norm": 0.9818744659423828, "learning_rate": 2.3806989200866654e-05, "loss": 0.0947, "step": 14579 }, { "epoch": 0.3212745211456147, "grad_norm": 0.9189601540565491, "learning_rate": 2.3806122590067535e-05, "loss": 0.105, "step": 14580 }, { "epoch": 0.3212965564351309, "grad_norm": 0.9523614048957825, "learning_rate": 2.38052559344141e-05, "loss": 0.0754, "step": 14581 }, { "epoch": 0.321318591724647, "grad_norm": 0.8670716881752014, "learning_rate": 2.3804389233910783e-05, "loss": 0.1201, "step": 14582 }, { "epoch": 0.32134062701416316, "grad_norm": 0.8156070113182068, "learning_rate": 2.3803522488561987e-05, "loss": 0.1169, "step": 14583 }, { "epoch": 0.3213626623036793, "grad_norm": 0.9652864336967468, "learning_rate": 2.3802655698372128e-05, "loss": 0.0896, "step": 14584 }, { "epoch": 0.3213846975931955, "grad_norm": 0.8422355651855469, "learning_rate": 2.3801788863345623e-05, "loss": 0.1102, "step": 14585 }, { "epoch": 0.32140673288271165, "grad_norm": 0.9350513815879822, "learning_rate": 2.3800921983486894e-05, "loss": 0.0894, "step": 14586 }, { "epoch": 0.3214287681722278, "grad_norm": 0.9070419073104858, "learning_rate": 2.3800055058800342e-05, "loss": 0.0618, "step": 14587 }, { "epoch": 0.321450803461744, "grad_norm": 0.5754934549331665, "learning_rate": 2.3799188089290396e-05, "loss": 0.0653, "step": 14588 }, { "epoch": 0.32147283875126015, "grad_norm": 0.7229226231575012, "learning_rate": 2.3798321074961467e-05, "loss": 0.0795, "step": 14589 }, { "epoch": 0.3214948740407763, "grad_norm": 1.0301929712295532, "learning_rate": 2.379745401581797e-05, "loss": 0.1104, "step": 14590 }, { "epoch": 0.3215169093302925, "grad_norm": 0.9501638412475586, "learning_rate": 2.379658691186432e-05, "loss": 0.1267, "step": 14591 }, { "epoch": 0.32153894461980864, "grad_norm": 0.6567507386207581, "learning_rate": 2.3795719763104933e-05, "loss": 0.0973, "step": 14592 }, { "epoch": 0.3215609799093248, "grad_norm": 0.6069315075874329, "learning_rate": 2.3794852569544236e-05, "loss": 0.0802, "step": 14593 }, { "epoch": 0.32158301519884097, "grad_norm": 0.7033761739730835, "learning_rate": 2.379398533118663e-05, "loss": 0.0981, "step": 14594 }, { "epoch": 0.3216050504883571, "grad_norm": 0.6909646987915039, "learning_rate": 2.3793118048036546e-05, "loss": 0.072, "step": 14595 }, { "epoch": 0.32162708577787325, "grad_norm": 0.7001461982727051, "learning_rate": 2.3792250720098398e-05, "loss": 0.1177, "step": 14596 }, { "epoch": 0.3216491210673894, "grad_norm": 0.628007709980011, "learning_rate": 2.37913833473766e-05, "loss": 0.0994, "step": 14597 }, { "epoch": 0.3216711563569056, "grad_norm": 0.7596009373664856, "learning_rate": 2.3790515929875575e-05, "loss": 0.074, "step": 14598 }, { "epoch": 0.32169319164642174, "grad_norm": 0.45892786979675293, "learning_rate": 2.3789648467599736e-05, "loss": 0.0878, "step": 14599 }, { "epoch": 0.3217152269359379, "grad_norm": 0.8532448410987854, "learning_rate": 2.3788780960553503e-05, "loss": 0.0893, "step": 14600 }, { "epoch": 0.32173726222545407, "grad_norm": 1.1261913776397705, "learning_rate": 2.3787913408741297e-05, "loss": 0.0921, "step": 14601 }, { "epoch": 0.32175929751497023, "grad_norm": 0.8474097847938538, "learning_rate": 2.3787045812167534e-05, "loss": 0.0783, "step": 14602 }, { "epoch": 0.3217813328044864, "grad_norm": 1.197925090789795, "learning_rate": 2.3786178170836638e-05, "loss": 0.0975, "step": 14603 }, { "epoch": 0.32180336809400256, "grad_norm": 0.5731835961341858, "learning_rate": 2.378531048475302e-05, "loss": 0.1308, "step": 14604 }, { "epoch": 0.3218254033835187, "grad_norm": 0.6541830897331238, "learning_rate": 2.3784442753921113e-05, "loss": 0.113, "step": 14605 }, { "epoch": 0.3218474386730349, "grad_norm": 0.5964454412460327, "learning_rate": 2.378357497834532e-05, "loss": 0.0885, "step": 14606 }, { "epoch": 0.321869473962551, "grad_norm": 0.6542131900787354, "learning_rate": 2.3782707158030077e-05, "loss": 0.0786, "step": 14607 }, { "epoch": 0.32189150925206717, "grad_norm": 0.9432172179222107, "learning_rate": 2.378183929297979e-05, "loss": 0.0883, "step": 14608 }, { "epoch": 0.32191354454158333, "grad_norm": 0.6892482042312622, "learning_rate": 2.378097138319889e-05, "loss": 0.0854, "step": 14609 }, { "epoch": 0.3219355798310995, "grad_norm": 0.9808549284934998, "learning_rate": 2.3780103428691792e-05, "loss": 0.0922, "step": 14610 }, { "epoch": 0.32195761512061566, "grad_norm": 0.721286416053772, "learning_rate": 2.377923542946292e-05, "loss": 0.0908, "step": 14611 }, { "epoch": 0.3219796504101318, "grad_norm": 1.0335750579833984, "learning_rate": 2.3778367385516694e-05, "loss": 0.1083, "step": 14612 }, { "epoch": 0.322001685699648, "grad_norm": 0.6338298916816711, "learning_rate": 2.3777499296857534e-05, "loss": 0.0784, "step": 14613 }, { "epoch": 0.32202372098916415, "grad_norm": 0.7172465324401855, "learning_rate": 2.3776631163489865e-05, "loss": 0.0887, "step": 14614 }, { "epoch": 0.3220457562786803, "grad_norm": 0.6285393238067627, "learning_rate": 2.3775762985418107e-05, "loss": 0.1035, "step": 14615 }, { "epoch": 0.3220677915681965, "grad_norm": 0.8832074999809265, "learning_rate": 2.3774894762646682e-05, "loss": 0.0894, "step": 14616 }, { "epoch": 0.32208982685771265, "grad_norm": 0.7138260006904602, "learning_rate": 2.3774026495180012e-05, "loss": 0.1105, "step": 14617 }, { "epoch": 0.3221118621472288, "grad_norm": 0.4445676803588867, "learning_rate": 2.3773158183022525e-05, "loss": 0.0919, "step": 14618 }, { "epoch": 0.3221338974367449, "grad_norm": 0.7566606998443604, "learning_rate": 2.377228982617863e-05, "loss": 0.1015, "step": 14619 }, { "epoch": 0.3221559327262611, "grad_norm": 0.7865015864372253, "learning_rate": 2.377142142465277e-05, "loss": 0.1164, "step": 14620 }, { "epoch": 0.32217796801577725, "grad_norm": 0.3689131736755371, "learning_rate": 2.3770552978449344e-05, "loss": 0.1093, "step": 14621 }, { "epoch": 0.3222000033052934, "grad_norm": 1.1393996477127075, "learning_rate": 2.3769684487572795e-05, "loss": 0.0853, "step": 14622 }, { "epoch": 0.3222220385948096, "grad_norm": 0.7697170972824097, "learning_rate": 2.3768815952027543e-05, "loss": 0.073, "step": 14623 }, { "epoch": 0.32224407388432574, "grad_norm": 1.2918485403060913, "learning_rate": 2.376794737181801e-05, "loss": 0.1041, "step": 14624 }, { "epoch": 0.3222661091738419, "grad_norm": 0.8255013823509216, "learning_rate": 2.376707874694861e-05, "loss": 0.0921, "step": 14625 }, { "epoch": 0.3222881444633581, "grad_norm": 0.6011202931404114, "learning_rate": 2.3766210077423785e-05, "loss": 0.0775, "step": 14626 }, { "epoch": 0.32231017975287424, "grad_norm": 0.8708108067512512, "learning_rate": 2.3765341363247948e-05, "loss": 0.0866, "step": 14627 }, { "epoch": 0.3223322150423904, "grad_norm": 0.6717568039894104, "learning_rate": 2.376447260442553e-05, "loss": 0.0501, "step": 14628 }, { "epoch": 0.32235425033190657, "grad_norm": 0.7516454458236694, "learning_rate": 2.3763603800960947e-05, "loss": 0.0691, "step": 14629 }, { "epoch": 0.32237628562142273, "grad_norm": 0.8504942655563354, "learning_rate": 2.3762734952858637e-05, "loss": 0.113, "step": 14630 }, { "epoch": 0.3223983209109389, "grad_norm": 0.5482444167137146, "learning_rate": 2.3761866060123012e-05, "loss": 0.1127, "step": 14631 }, { "epoch": 0.322420356200455, "grad_norm": 0.7026543617248535, "learning_rate": 2.376099712275851e-05, "loss": 0.0935, "step": 14632 }, { "epoch": 0.32244239148997117, "grad_norm": 0.46388521790504456, "learning_rate": 2.376012814076955e-05, "loss": 0.0712, "step": 14633 }, { "epoch": 0.32246442677948733, "grad_norm": 0.7701705098152161, "learning_rate": 2.3759259114160562e-05, "loss": 0.0706, "step": 14634 }, { "epoch": 0.3224864620690035, "grad_norm": 0.614464521408081, "learning_rate": 2.375839004293597e-05, "loss": 0.0672, "step": 14635 }, { "epoch": 0.32250849735851966, "grad_norm": 0.9590575098991394, "learning_rate": 2.37575209271002e-05, "loss": 0.1122, "step": 14636 }, { "epoch": 0.32253053264803583, "grad_norm": 0.9312761425971985, "learning_rate": 2.375665176665768e-05, "loss": 0.0979, "step": 14637 }, { "epoch": 0.322552567937552, "grad_norm": 0.6507970094680786, "learning_rate": 2.3755782561612834e-05, "loss": 0.0952, "step": 14638 }, { "epoch": 0.32257460322706816, "grad_norm": 0.6609942317008972, "learning_rate": 2.3754913311970096e-05, "loss": 0.0833, "step": 14639 }, { "epoch": 0.3225966385165843, "grad_norm": 1.1738561391830444, "learning_rate": 2.3754044017733885e-05, "loss": 0.0941, "step": 14640 }, { "epoch": 0.3226186738061005, "grad_norm": 1.2802237272262573, "learning_rate": 2.375317467890864e-05, "loss": 0.0919, "step": 14641 }, { "epoch": 0.32264070909561665, "grad_norm": 1.166438341140747, "learning_rate": 2.3752305295498777e-05, "loss": 0.1188, "step": 14642 }, { "epoch": 0.3226627443851328, "grad_norm": 0.6023420095443726, "learning_rate": 2.3751435867508732e-05, "loss": 0.1108, "step": 14643 }, { "epoch": 0.3226847796746489, "grad_norm": 0.71697998046875, "learning_rate": 2.3750566394942928e-05, "loss": 0.0872, "step": 14644 }, { "epoch": 0.3227068149641651, "grad_norm": 0.45782271027565, "learning_rate": 2.3749696877805803e-05, "loss": 0.0802, "step": 14645 }, { "epoch": 0.32272885025368125, "grad_norm": 0.5818142890930176, "learning_rate": 2.3748827316101775e-05, "loss": 0.0808, "step": 14646 }, { "epoch": 0.3227508855431974, "grad_norm": 0.8378822803497314, "learning_rate": 2.374795770983528e-05, "loss": 0.0695, "step": 14647 }, { "epoch": 0.3227729208327136, "grad_norm": 0.6808394193649292, "learning_rate": 2.3747088059010748e-05, "loss": 0.1009, "step": 14648 }, { "epoch": 0.32279495612222975, "grad_norm": 0.7145998477935791, "learning_rate": 2.3746218363632602e-05, "loss": 0.0741, "step": 14649 }, { "epoch": 0.3228169914117459, "grad_norm": 0.45572391152381897, "learning_rate": 2.3745348623705273e-05, "loss": 0.0784, "step": 14650 }, { "epoch": 0.3228390267012621, "grad_norm": 0.9536109566688538, "learning_rate": 2.3744478839233203e-05, "loss": 0.0874, "step": 14651 }, { "epoch": 0.32286106199077824, "grad_norm": 0.8202791213989258, "learning_rate": 2.3743609010220807e-05, "loss": 0.1236, "step": 14652 }, { "epoch": 0.3228830972802944, "grad_norm": 0.6070545315742493, "learning_rate": 2.3742739136672526e-05, "loss": 0.0897, "step": 14653 }, { "epoch": 0.32290513256981057, "grad_norm": 1.1166819334030151, "learning_rate": 2.374186921859278e-05, "loss": 0.1317, "step": 14654 }, { "epoch": 0.32292716785932674, "grad_norm": 0.77192622423172, "learning_rate": 2.3740999255986007e-05, "loss": 0.0842, "step": 14655 }, { "epoch": 0.32294920314884284, "grad_norm": 0.5911685228347778, "learning_rate": 2.3740129248856643e-05, "loss": 0.0984, "step": 14656 }, { "epoch": 0.322971238438359, "grad_norm": 0.6591283679008484, "learning_rate": 2.373925919720911e-05, "loss": 0.1081, "step": 14657 }, { "epoch": 0.3229932737278752, "grad_norm": 0.603813886642456, "learning_rate": 2.3738389101047842e-05, "loss": 0.0909, "step": 14658 }, { "epoch": 0.32301530901739134, "grad_norm": 0.9193234443664551, "learning_rate": 2.3737518960377276e-05, "loss": 0.1089, "step": 14659 }, { "epoch": 0.3230373443069075, "grad_norm": 0.6176117062568665, "learning_rate": 2.373664877520184e-05, "loss": 0.0826, "step": 14660 }, { "epoch": 0.32305937959642367, "grad_norm": 1.049270749092102, "learning_rate": 2.3735778545525963e-05, "loss": 0.1261, "step": 14661 }, { "epoch": 0.32308141488593983, "grad_norm": 0.9071224331855774, "learning_rate": 2.3734908271354086e-05, "loss": 0.1127, "step": 14662 }, { "epoch": 0.323103450175456, "grad_norm": 1.2214274406433105, "learning_rate": 2.3734037952690636e-05, "loss": 0.0905, "step": 14663 }, { "epoch": 0.32312548546497216, "grad_norm": 0.725929856300354, "learning_rate": 2.3733167589540042e-05, "loss": 0.0718, "step": 14664 }, { "epoch": 0.3231475207544883, "grad_norm": 1.0961697101593018, "learning_rate": 2.3732297181906748e-05, "loss": 0.1031, "step": 14665 }, { "epoch": 0.3231695560440045, "grad_norm": 0.7334533929824829, "learning_rate": 2.3731426729795184e-05, "loss": 0.0963, "step": 14666 }, { "epoch": 0.32319159133352066, "grad_norm": 0.41861969232559204, "learning_rate": 2.3730556233209776e-05, "loss": 0.0669, "step": 14667 }, { "epoch": 0.3232136266230368, "grad_norm": 0.5317482352256775, "learning_rate": 2.3729685692154968e-05, "loss": 0.0816, "step": 14668 }, { "epoch": 0.32323566191255293, "grad_norm": 0.7051500678062439, "learning_rate": 2.3728815106635187e-05, "loss": 0.0863, "step": 14669 }, { "epoch": 0.3232576972020691, "grad_norm": 1.0024755001068115, "learning_rate": 2.372794447665487e-05, "loss": 0.1073, "step": 14670 }, { "epoch": 0.32327973249158526, "grad_norm": 0.7177253365516663, "learning_rate": 2.3727073802218448e-05, "loss": 0.0917, "step": 14671 }, { "epoch": 0.3233017677811014, "grad_norm": 0.8631664514541626, "learning_rate": 2.3726203083330364e-05, "loss": 0.0776, "step": 14672 }, { "epoch": 0.3233238030706176, "grad_norm": 0.8323395848274231, "learning_rate": 2.3725332319995047e-05, "loss": 0.1002, "step": 14673 }, { "epoch": 0.32334583836013375, "grad_norm": 0.6138839721679688, "learning_rate": 2.3724461512216933e-05, "loss": 0.0601, "step": 14674 }, { "epoch": 0.3233678736496499, "grad_norm": 0.4620446264743805, "learning_rate": 2.372359066000046e-05, "loss": 0.0554, "step": 14675 }, { "epoch": 0.3233899089391661, "grad_norm": 1.2045265436172485, "learning_rate": 2.372271976335006e-05, "loss": 0.0746, "step": 14676 }, { "epoch": 0.32341194422868225, "grad_norm": 0.5390248894691467, "learning_rate": 2.372184882227017e-05, "loss": 0.0778, "step": 14677 }, { "epoch": 0.3234339795181984, "grad_norm": 0.5584813356399536, "learning_rate": 2.3720977836765233e-05, "loss": 0.0766, "step": 14678 }, { "epoch": 0.3234560148077146, "grad_norm": 0.5578173398971558, "learning_rate": 2.3720106806839672e-05, "loss": 0.1052, "step": 14679 }, { "epoch": 0.32347805009723074, "grad_norm": 0.9490360617637634, "learning_rate": 2.3719235732497935e-05, "loss": 0.1031, "step": 14680 }, { "epoch": 0.32350008538674685, "grad_norm": 0.6121894121170044, "learning_rate": 2.371836461374445e-05, "loss": 0.0631, "step": 14681 }, { "epoch": 0.323522120676263, "grad_norm": 0.6064444780349731, "learning_rate": 2.371749345058366e-05, "loss": 0.1081, "step": 14682 }, { "epoch": 0.3235441559657792, "grad_norm": 1.0775179862976074, "learning_rate": 2.371662224302001e-05, "loss": 0.0958, "step": 14683 }, { "epoch": 0.32356619125529534, "grad_norm": 0.5460548996925354, "learning_rate": 2.371575099105792e-05, "loss": 0.0983, "step": 14684 }, { "epoch": 0.3235882265448115, "grad_norm": 0.8492133021354675, "learning_rate": 2.3714879694701835e-05, "loss": 0.1407, "step": 14685 }, { "epoch": 0.3236102618343277, "grad_norm": 0.8033236861228943, "learning_rate": 2.37140083539562e-05, "loss": 0.1218, "step": 14686 }, { "epoch": 0.32363229712384384, "grad_norm": 0.563718318939209, "learning_rate": 2.371313696882545e-05, "loss": 0.0773, "step": 14687 }, { "epoch": 0.32365433241336, "grad_norm": 0.9002882838249207, "learning_rate": 2.371226553931401e-05, "loss": 0.0897, "step": 14688 }, { "epoch": 0.32367636770287617, "grad_norm": 0.41434770822525024, "learning_rate": 2.371139406542634e-05, "loss": 0.0536, "step": 14689 }, { "epoch": 0.32369840299239233, "grad_norm": 0.7342561483383179, "learning_rate": 2.3710522547166865e-05, "loss": 0.1197, "step": 14690 }, { "epoch": 0.3237204382819085, "grad_norm": 0.6861972808837891, "learning_rate": 2.3709650984540026e-05, "loss": 0.1027, "step": 14691 }, { "epoch": 0.32374247357142466, "grad_norm": 0.861873209476471, "learning_rate": 2.3708779377550264e-05, "loss": 0.0959, "step": 14692 }, { "epoch": 0.32376450886094077, "grad_norm": 0.5910070538520813, "learning_rate": 2.3707907726202027e-05, "loss": 0.1031, "step": 14693 }, { "epoch": 0.32378654415045693, "grad_norm": 0.7757145762443542, "learning_rate": 2.370703603049974e-05, "loss": 0.0959, "step": 14694 }, { "epoch": 0.3238085794399731, "grad_norm": 1.0719988346099854, "learning_rate": 2.3706164290447847e-05, "loss": 0.1173, "step": 14695 }, { "epoch": 0.32383061472948926, "grad_norm": 0.7398656010627747, "learning_rate": 2.370529250605079e-05, "loss": 0.1129, "step": 14696 }, { "epoch": 0.32385265001900543, "grad_norm": 0.6899731755256653, "learning_rate": 2.3704420677313014e-05, "loss": 0.1246, "step": 14697 }, { "epoch": 0.3238746853085216, "grad_norm": 0.850753128528595, "learning_rate": 2.3703548804238954e-05, "loss": 0.1015, "step": 14698 }, { "epoch": 0.32389672059803776, "grad_norm": 0.7665040493011475, "learning_rate": 2.3702676886833057e-05, "loss": 0.0872, "step": 14699 }, { "epoch": 0.3239187558875539, "grad_norm": 0.7031270265579224, "learning_rate": 2.3701804925099754e-05, "loss": 0.1326, "step": 14700 }, { "epoch": 0.3239407911770701, "grad_norm": 0.7681015133857727, "learning_rate": 2.3700932919043497e-05, "loss": 0.11, "step": 14701 }, { "epoch": 0.32396282646658625, "grad_norm": 0.5741564631462097, "learning_rate": 2.370006086866872e-05, "loss": 0.1116, "step": 14702 }, { "epoch": 0.3239848617561024, "grad_norm": 0.49319881200790405, "learning_rate": 2.3699188773979868e-05, "loss": 0.0664, "step": 14703 }, { "epoch": 0.3240068970456186, "grad_norm": 0.7923737168312073, "learning_rate": 2.3698316634981383e-05, "loss": 0.1155, "step": 14704 }, { "epoch": 0.32402893233513475, "grad_norm": 1.0424540042877197, "learning_rate": 2.3697444451677705e-05, "loss": 0.1145, "step": 14705 }, { "epoch": 0.32405096762465085, "grad_norm": 0.5474136471748352, "learning_rate": 2.3696572224073282e-05, "loss": 0.0907, "step": 14706 }, { "epoch": 0.324073002914167, "grad_norm": 0.650351345539093, "learning_rate": 2.369569995217255e-05, "loss": 0.085, "step": 14707 }, { "epoch": 0.3240950382036832, "grad_norm": 0.9628604650497437, "learning_rate": 2.369482763597996e-05, "loss": 0.1181, "step": 14708 }, { "epoch": 0.32411707349319935, "grad_norm": 0.7969863414764404, "learning_rate": 2.3693955275499944e-05, "loss": 0.1089, "step": 14709 }, { "epoch": 0.3241391087827155, "grad_norm": 1.0512661933898926, "learning_rate": 2.3693082870736954e-05, "loss": 0.0639, "step": 14710 }, { "epoch": 0.3241611440722317, "grad_norm": 0.9097474217414856, "learning_rate": 2.369221042169543e-05, "loss": 0.1111, "step": 14711 }, { "epoch": 0.32418317936174784, "grad_norm": 0.7498271465301514, "learning_rate": 2.3691337928379822e-05, "loss": 0.1152, "step": 14712 }, { "epoch": 0.324205214651264, "grad_norm": 0.964607834815979, "learning_rate": 2.3690465390794565e-05, "loss": 0.1045, "step": 14713 }, { "epoch": 0.32422724994078017, "grad_norm": 0.892858624458313, "learning_rate": 2.368959280894411e-05, "loss": 0.1035, "step": 14714 }, { "epoch": 0.32424928523029634, "grad_norm": 0.5857395529747009, "learning_rate": 2.3688720182832896e-05, "loss": 0.1159, "step": 14715 }, { "epoch": 0.3242713205198125, "grad_norm": 0.5333738327026367, "learning_rate": 2.3687847512465374e-05, "loss": 0.0856, "step": 14716 }, { "epoch": 0.32429335580932867, "grad_norm": 0.518006443977356, "learning_rate": 2.3686974797845986e-05, "loss": 0.0854, "step": 14717 }, { "epoch": 0.3243153910988448, "grad_norm": 0.8273534774780273, "learning_rate": 2.3686102038979174e-05, "loss": 0.1189, "step": 14718 }, { "epoch": 0.32433742638836094, "grad_norm": 0.8108488321304321, "learning_rate": 2.3685229235869386e-05, "loss": 0.1029, "step": 14719 }, { "epoch": 0.3243594616778771, "grad_norm": 0.8717651963233948, "learning_rate": 2.368435638852107e-05, "loss": 0.0903, "step": 14720 }, { "epoch": 0.32438149696739327, "grad_norm": 0.9487866759300232, "learning_rate": 2.3683483496938673e-05, "loss": 0.1348, "step": 14721 }, { "epoch": 0.32440353225690943, "grad_norm": 0.5607938170433044, "learning_rate": 2.3682610561126635e-05, "loss": 0.1143, "step": 14722 }, { "epoch": 0.3244255675464256, "grad_norm": 1.1411508321762085, "learning_rate": 2.3681737581089405e-05, "loss": 0.1086, "step": 14723 }, { "epoch": 0.32444760283594176, "grad_norm": 0.6383035778999329, "learning_rate": 2.3680864556831433e-05, "loss": 0.0966, "step": 14724 }, { "epoch": 0.3244696381254579, "grad_norm": 0.7586010694503784, "learning_rate": 2.367999148835716e-05, "loss": 0.07, "step": 14725 }, { "epoch": 0.3244916734149741, "grad_norm": 0.6976491808891296, "learning_rate": 2.3679118375671036e-05, "loss": 0.1102, "step": 14726 }, { "epoch": 0.32451370870449026, "grad_norm": 0.8174994587898254, "learning_rate": 2.3678245218777507e-05, "loss": 0.0983, "step": 14727 }, { "epoch": 0.3245357439940064, "grad_norm": 0.9612289667129517, "learning_rate": 2.3677372017681025e-05, "loss": 0.105, "step": 14728 }, { "epoch": 0.3245577792835226, "grad_norm": 0.8997535705566406, "learning_rate": 2.367649877238603e-05, "loss": 0.099, "step": 14729 }, { "epoch": 0.3245798145730387, "grad_norm": 0.4969343841075897, "learning_rate": 2.3675625482896976e-05, "loss": 0.0719, "step": 14730 }, { "epoch": 0.32460184986255486, "grad_norm": 0.6811414361000061, "learning_rate": 2.367475214921831e-05, "loss": 0.0734, "step": 14731 }, { "epoch": 0.324623885152071, "grad_norm": 0.795910656452179, "learning_rate": 2.367387877135448e-05, "loss": 0.0713, "step": 14732 }, { "epoch": 0.3246459204415872, "grad_norm": 0.7756887674331665, "learning_rate": 2.3673005349309932e-05, "loss": 0.079, "step": 14733 }, { "epoch": 0.32466795573110335, "grad_norm": 0.5545459985733032, "learning_rate": 2.3672131883089115e-05, "loss": 0.0768, "step": 14734 }, { "epoch": 0.3246899910206195, "grad_norm": 0.5134952068328857, "learning_rate": 2.3671258372696482e-05, "loss": 0.0647, "step": 14735 }, { "epoch": 0.3247120263101357, "grad_norm": 0.9196705222129822, "learning_rate": 2.3670384818136482e-05, "loss": 0.0941, "step": 14736 }, { "epoch": 0.32473406159965185, "grad_norm": 0.6143852472305298, "learning_rate": 2.3669511219413566e-05, "loss": 0.0701, "step": 14737 }, { "epoch": 0.324756096889168, "grad_norm": 1.268688440322876, "learning_rate": 2.3668637576532176e-05, "loss": 0.0833, "step": 14738 }, { "epoch": 0.3247781321786842, "grad_norm": 0.7016024589538574, "learning_rate": 2.366776388949676e-05, "loss": 0.0885, "step": 14739 }, { "epoch": 0.32480016746820034, "grad_norm": 0.5479531288146973, "learning_rate": 2.3666890158311787e-05, "loss": 0.0955, "step": 14740 }, { "epoch": 0.3248222027577165, "grad_norm": 0.681285560131073, "learning_rate": 2.3666016382981685e-05, "loss": 0.0834, "step": 14741 }, { "epoch": 0.32484423804723267, "grad_norm": 0.41113871335983276, "learning_rate": 2.3665142563510918e-05, "loss": 0.0989, "step": 14742 }, { "epoch": 0.3248662733367488, "grad_norm": 1.1781107187271118, "learning_rate": 2.3664268699903936e-05, "loss": 0.1292, "step": 14743 }, { "epoch": 0.32488830862626494, "grad_norm": 1.0074594020843506, "learning_rate": 2.3663394792165183e-05, "loss": 0.096, "step": 14744 }, { "epoch": 0.3249103439157811, "grad_norm": 0.6341769695281982, "learning_rate": 2.3662520840299117e-05, "loss": 0.0697, "step": 14745 }, { "epoch": 0.3249323792052973, "grad_norm": 0.6636742353439331, "learning_rate": 2.3661646844310187e-05, "loss": 0.0977, "step": 14746 }, { "epoch": 0.32495441449481344, "grad_norm": 0.7779534459114075, "learning_rate": 2.3660772804202844e-05, "loss": 0.0929, "step": 14747 }, { "epoch": 0.3249764497843296, "grad_norm": 0.6628555655479431, "learning_rate": 2.365989871998154e-05, "loss": 0.0829, "step": 14748 }, { "epoch": 0.32499848507384577, "grad_norm": 0.8953575491905212, "learning_rate": 2.365902459165073e-05, "loss": 0.1304, "step": 14749 }, { "epoch": 0.32502052036336193, "grad_norm": 0.8467983603477478, "learning_rate": 2.3658150419214864e-05, "loss": 0.0996, "step": 14750 }, { "epoch": 0.3250425556528781, "grad_norm": 0.782996654510498, "learning_rate": 2.3657276202678395e-05, "loss": 0.1032, "step": 14751 }, { "epoch": 0.32506459094239426, "grad_norm": 0.9391193389892578, "learning_rate": 2.3656401942045777e-05, "loss": 0.0946, "step": 14752 }, { "epoch": 0.3250866262319104, "grad_norm": 0.9141072034835815, "learning_rate": 2.365552763732146e-05, "loss": 0.1212, "step": 14753 }, { "epoch": 0.3251086615214266, "grad_norm": 0.43341049551963806, "learning_rate": 2.3654653288509902e-05, "loss": 0.0648, "step": 14754 }, { "epoch": 0.3251306968109427, "grad_norm": 0.8321464657783508, "learning_rate": 2.365377889561555e-05, "loss": 0.1035, "step": 14755 }, { "epoch": 0.32515273210045886, "grad_norm": 0.8449912071228027, "learning_rate": 2.3652904458642865e-05, "loss": 0.1116, "step": 14756 }, { "epoch": 0.32517476738997503, "grad_norm": 0.7138485908508301, "learning_rate": 2.3652029977596292e-05, "loss": 0.0986, "step": 14757 }, { "epoch": 0.3251968026794912, "grad_norm": 0.7368402481079102, "learning_rate": 2.3651155452480296e-05, "loss": 0.0614, "step": 14758 }, { "epoch": 0.32521883796900736, "grad_norm": 0.41533011198043823, "learning_rate": 2.3650280883299325e-05, "loss": 0.0671, "step": 14759 }, { "epoch": 0.3252408732585235, "grad_norm": 0.4904475808143616, "learning_rate": 2.3649406270057835e-05, "loss": 0.0686, "step": 14760 }, { "epoch": 0.3252629085480397, "grad_norm": 0.628255307674408, "learning_rate": 2.364853161276028e-05, "loss": 0.1079, "step": 14761 }, { "epoch": 0.32528494383755585, "grad_norm": 0.5053190588951111, "learning_rate": 2.3647656911411114e-05, "loss": 0.0574, "step": 14762 }, { "epoch": 0.325306979127072, "grad_norm": 0.9066423177719116, "learning_rate": 2.3646782166014798e-05, "loss": 0.1322, "step": 14763 }, { "epoch": 0.3253290144165882, "grad_norm": 0.7795054316520691, "learning_rate": 2.3645907376575784e-05, "loss": 0.1005, "step": 14764 }, { "epoch": 0.32535104970610434, "grad_norm": 0.5506481528282166, "learning_rate": 2.3645032543098524e-05, "loss": 0.1086, "step": 14765 }, { "epoch": 0.3253730849956205, "grad_norm": 0.926353394985199, "learning_rate": 2.364415766558748e-05, "loss": 0.1242, "step": 14766 }, { "epoch": 0.3253951202851366, "grad_norm": 0.8988716006278992, "learning_rate": 2.3643282744047102e-05, "loss": 0.1031, "step": 14767 }, { "epoch": 0.3254171555746528, "grad_norm": 1.0089045763015747, "learning_rate": 2.3642407778481854e-05, "loss": 0.1083, "step": 14768 }, { "epoch": 0.32543919086416895, "grad_norm": 0.6033617258071899, "learning_rate": 2.3641532768896185e-05, "loss": 0.09, "step": 14769 }, { "epoch": 0.3254612261536851, "grad_norm": 0.5840200185775757, "learning_rate": 2.3640657715294558e-05, "loss": 0.1192, "step": 14770 }, { "epoch": 0.3254832614432013, "grad_norm": 0.875435471534729, "learning_rate": 2.363978261768143e-05, "loss": 0.1291, "step": 14771 }, { "epoch": 0.32550529673271744, "grad_norm": 0.6888864636421204, "learning_rate": 2.3638907476061252e-05, "loss": 0.0711, "step": 14772 }, { "epoch": 0.3255273320222336, "grad_norm": 0.8649430871009827, "learning_rate": 2.3638032290438484e-05, "loss": 0.0777, "step": 14773 }, { "epoch": 0.32554936731174977, "grad_norm": 1.0299351215362549, "learning_rate": 2.363715706081759e-05, "loss": 0.1217, "step": 14774 }, { "epoch": 0.32557140260126594, "grad_norm": 0.8957711458206177, "learning_rate": 2.3636281787203023e-05, "loss": 0.0967, "step": 14775 }, { "epoch": 0.3255934378907821, "grad_norm": 1.0168983936309814, "learning_rate": 2.363540646959924e-05, "loss": 0.1164, "step": 14776 }, { "epoch": 0.32561547318029826, "grad_norm": 0.8651344180107117, "learning_rate": 2.3634531108010708e-05, "loss": 0.109, "step": 14777 }, { "epoch": 0.32563750846981443, "grad_norm": 0.5697044730186462, "learning_rate": 2.363365570244187e-05, "loss": 0.0663, "step": 14778 }, { "epoch": 0.3256595437593306, "grad_norm": 0.7767298221588135, "learning_rate": 2.3632780252897194e-05, "loss": 0.0838, "step": 14779 }, { "epoch": 0.3256815790488467, "grad_norm": 0.9216198921203613, "learning_rate": 2.3631904759381145e-05, "loss": 0.0965, "step": 14780 }, { "epoch": 0.32570361433836287, "grad_norm": 1.486972689628601, "learning_rate": 2.363102922189817e-05, "loss": 0.1154, "step": 14781 }, { "epoch": 0.32572564962787903, "grad_norm": 0.5976458191871643, "learning_rate": 2.3630153640452736e-05, "loss": 0.0839, "step": 14782 }, { "epoch": 0.3257476849173952, "grad_norm": 0.6657495498657227, "learning_rate": 2.3629278015049306e-05, "loss": 0.1075, "step": 14783 }, { "epoch": 0.32576972020691136, "grad_norm": 0.831656277179718, "learning_rate": 2.362840234569233e-05, "loss": 0.096, "step": 14784 }, { "epoch": 0.3257917554964275, "grad_norm": 0.9347134232521057, "learning_rate": 2.3627526632386282e-05, "loss": 0.1148, "step": 14785 }, { "epoch": 0.3258137907859437, "grad_norm": 0.7378929257392883, "learning_rate": 2.362665087513561e-05, "loss": 0.0744, "step": 14786 }, { "epoch": 0.32583582607545986, "grad_norm": 1.1360825300216675, "learning_rate": 2.362577507394478e-05, "loss": 0.1111, "step": 14787 }, { "epoch": 0.325857861364976, "grad_norm": 0.9996440410614014, "learning_rate": 2.362489922881825e-05, "loss": 0.0975, "step": 14788 }, { "epoch": 0.3258798966544922, "grad_norm": 0.7389962673187256, "learning_rate": 2.3624023339760483e-05, "loss": 0.0724, "step": 14789 }, { "epoch": 0.32590193194400835, "grad_norm": 1.1994690895080566, "learning_rate": 2.362314740677594e-05, "loss": 0.0893, "step": 14790 }, { "epoch": 0.3259239672335245, "grad_norm": 1.0400160551071167, "learning_rate": 2.3622271429869085e-05, "loss": 0.1003, "step": 14791 }, { "epoch": 0.3259460025230406, "grad_norm": 0.7938522100448608, "learning_rate": 2.3621395409044377e-05, "loss": 0.0879, "step": 14792 }, { "epoch": 0.3259680378125568, "grad_norm": 0.6345827579498291, "learning_rate": 2.3620519344306283e-05, "loss": 0.09, "step": 14793 }, { "epoch": 0.32599007310207295, "grad_norm": 0.9979810118675232, "learning_rate": 2.3619643235659254e-05, "loss": 0.0827, "step": 14794 }, { "epoch": 0.3260121083915891, "grad_norm": 1.0295356512069702, "learning_rate": 2.3618767083107764e-05, "loss": 0.1069, "step": 14795 }, { "epoch": 0.3260341436811053, "grad_norm": 1.052936315536499, "learning_rate": 2.361789088665627e-05, "loss": 0.0896, "step": 14796 }, { "epoch": 0.32605617897062145, "grad_norm": 0.8792792558670044, "learning_rate": 2.361701464630924e-05, "loss": 0.0792, "step": 14797 }, { "epoch": 0.3260782142601376, "grad_norm": 0.9674112200737, "learning_rate": 2.361613836207113e-05, "loss": 0.0964, "step": 14798 }, { "epoch": 0.3261002495496538, "grad_norm": 0.7179070115089417, "learning_rate": 2.3615262033946407e-05, "loss": 0.1072, "step": 14799 }, { "epoch": 0.32612228483916994, "grad_norm": 1.0772805213928223, "learning_rate": 2.361438566193954e-05, "loss": 0.115, "step": 14800 }, { "epoch": 0.3261443201286861, "grad_norm": 0.36091557145118713, "learning_rate": 2.3613509246054982e-05, "loss": 0.0512, "step": 14801 }, { "epoch": 0.32616635541820227, "grad_norm": 0.6107581257820129, "learning_rate": 2.36126327862972e-05, "loss": 0.1492, "step": 14802 }, { "epoch": 0.32618839070771843, "grad_norm": 0.6515411138534546, "learning_rate": 2.3611756282670666e-05, "loss": 0.1018, "step": 14803 }, { "epoch": 0.3262104259972346, "grad_norm": 0.6631313562393188, "learning_rate": 2.361087973517984e-05, "loss": 0.0776, "step": 14804 }, { "epoch": 0.3262324612867507, "grad_norm": 0.5601704716682434, "learning_rate": 2.361000314382918e-05, "loss": 0.0615, "step": 14805 }, { "epoch": 0.3262544965762669, "grad_norm": 1.0883338451385498, "learning_rate": 2.360912650862316e-05, "loss": 0.0669, "step": 14806 }, { "epoch": 0.32627653186578304, "grad_norm": 0.6839714050292969, "learning_rate": 2.3608249829566246e-05, "loss": 0.069, "step": 14807 }, { "epoch": 0.3262985671552992, "grad_norm": 0.9245753884315491, "learning_rate": 2.3607373106662898e-05, "loss": 0.0928, "step": 14808 }, { "epoch": 0.32632060244481537, "grad_norm": 1.401296615600586, "learning_rate": 2.360649633991758e-05, "loss": 0.0876, "step": 14809 }, { "epoch": 0.32634263773433153, "grad_norm": 0.7491611838340759, "learning_rate": 2.3605619529334763e-05, "loss": 0.1274, "step": 14810 }, { "epoch": 0.3263646730238477, "grad_norm": 0.7972486615180969, "learning_rate": 2.3604742674918908e-05, "loss": 0.073, "step": 14811 }, { "epoch": 0.32638670831336386, "grad_norm": 0.8492093682289124, "learning_rate": 2.360386577667449e-05, "loss": 0.0831, "step": 14812 }, { "epoch": 0.32640874360288, "grad_norm": 0.7952030897140503, "learning_rate": 2.3602988834605962e-05, "loss": 0.0766, "step": 14813 }, { "epoch": 0.3264307788923962, "grad_norm": 0.5087858438491821, "learning_rate": 2.3602111848717807e-05, "loss": 0.077, "step": 14814 }, { "epoch": 0.32645281418191235, "grad_norm": 0.7116686105728149, "learning_rate": 2.360123481901448e-05, "loss": 0.0969, "step": 14815 }, { "epoch": 0.3264748494714285, "grad_norm": 0.7381458282470703, "learning_rate": 2.360035774550045e-05, "loss": 0.1289, "step": 14816 }, { "epoch": 0.32649688476094463, "grad_norm": 0.8146693110466003, "learning_rate": 2.3599480628180188e-05, "loss": 0.0858, "step": 14817 }, { "epoch": 0.3265189200504608, "grad_norm": 0.7041227221488953, "learning_rate": 2.3598603467058158e-05, "loss": 0.0944, "step": 14818 }, { "epoch": 0.32654095533997696, "grad_norm": 0.6642842292785645, "learning_rate": 2.3597726262138832e-05, "loss": 0.09, "step": 14819 }, { "epoch": 0.3265629906294931, "grad_norm": 0.7149049639701843, "learning_rate": 2.3596849013426677e-05, "loss": 0.0859, "step": 14820 }, { "epoch": 0.3265850259190093, "grad_norm": 0.6702252626419067, "learning_rate": 2.3595971720926157e-05, "loss": 0.0721, "step": 14821 }, { "epoch": 0.32660706120852545, "grad_norm": 1.0829148292541504, "learning_rate": 2.359509438464174e-05, "loss": 0.1102, "step": 14822 }, { "epoch": 0.3266290964980416, "grad_norm": 1.110763430595398, "learning_rate": 2.3594217004577903e-05, "loss": 0.1207, "step": 14823 }, { "epoch": 0.3266511317875578, "grad_norm": 0.6599185466766357, "learning_rate": 2.3593339580739107e-05, "loss": 0.0894, "step": 14824 }, { "epoch": 0.32667316707707394, "grad_norm": 0.8815330862998962, "learning_rate": 2.359246211312983e-05, "loss": 0.0849, "step": 14825 }, { "epoch": 0.3266952023665901, "grad_norm": 0.8370190858840942, "learning_rate": 2.3591584601754533e-05, "loss": 0.0953, "step": 14826 }, { "epoch": 0.3267172376561063, "grad_norm": 0.6448435187339783, "learning_rate": 2.3590707046617688e-05, "loss": 0.0947, "step": 14827 }, { "epoch": 0.32673927294562244, "grad_norm": 0.8687374591827393, "learning_rate": 2.3589829447723766e-05, "loss": 0.0763, "step": 14828 }, { "epoch": 0.32676130823513855, "grad_norm": 1.0075671672821045, "learning_rate": 2.3588951805077234e-05, "loss": 0.0935, "step": 14829 }, { "epoch": 0.3267833435246547, "grad_norm": 1.1298420429229736, "learning_rate": 2.3588074118682564e-05, "loss": 0.1231, "step": 14830 }, { "epoch": 0.3268053788141709, "grad_norm": 0.8670434355735779, "learning_rate": 2.3587196388544234e-05, "loss": 0.104, "step": 14831 }, { "epoch": 0.32682741410368704, "grad_norm": 0.5047993659973145, "learning_rate": 2.3586318614666704e-05, "loss": 0.0939, "step": 14832 }, { "epoch": 0.3268494493932032, "grad_norm": 0.7157438397407532, "learning_rate": 2.358544079705445e-05, "loss": 0.0817, "step": 14833 }, { "epoch": 0.32687148468271937, "grad_norm": 0.8816759586334229, "learning_rate": 2.358456293571194e-05, "loss": 0.0914, "step": 14834 }, { "epoch": 0.32689351997223554, "grad_norm": 0.7816691398620605, "learning_rate": 2.3583685030643648e-05, "loss": 0.0787, "step": 14835 }, { "epoch": 0.3269155552617517, "grad_norm": 0.7762150168418884, "learning_rate": 2.358280708185404e-05, "loss": 0.0723, "step": 14836 }, { "epoch": 0.32693759055126786, "grad_norm": 0.7385981678962708, "learning_rate": 2.3581929089347605e-05, "loss": 0.1081, "step": 14837 }, { "epoch": 0.32695962584078403, "grad_norm": 1.0663098096847534, "learning_rate": 2.3581051053128796e-05, "loss": 0.1093, "step": 14838 }, { "epoch": 0.3269816611303002, "grad_norm": 0.4467299282550812, "learning_rate": 2.358017297320209e-05, "loss": 0.0775, "step": 14839 }, { "epoch": 0.32700369641981636, "grad_norm": 0.7165716886520386, "learning_rate": 2.3579294849571966e-05, "loss": 0.0679, "step": 14840 }, { "epoch": 0.3270257317093325, "grad_norm": 1.031920313835144, "learning_rate": 2.3578416682242892e-05, "loss": 0.0652, "step": 14841 }, { "epoch": 0.32704776699884863, "grad_norm": 0.6789318323135376, "learning_rate": 2.3577538471219343e-05, "loss": 0.0919, "step": 14842 }, { "epoch": 0.3270698022883648, "grad_norm": 0.6530071496963501, "learning_rate": 2.357666021650579e-05, "loss": 0.0644, "step": 14843 }, { "epoch": 0.32709183757788096, "grad_norm": 0.9802469611167908, "learning_rate": 2.357578191810671e-05, "loss": 0.0676, "step": 14844 }, { "epoch": 0.3271138728673971, "grad_norm": 0.6213807463645935, "learning_rate": 2.357490357602657e-05, "loss": 0.1151, "step": 14845 }, { "epoch": 0.3271359081569133, "grad_norm": 0.8794264197349548, "learning_rate": 2.3574025190269844e-05, "loss": 0.1408, "step": 14846 }, { "epoch": 0.32715794344642946, "grad_norm": 0.9960561394691467, "learning_rate": 2.3573146760841018e-05, "loss": 0.0788, "step": 14847 }, { "epoch": 0.3271799787359456, "grad_norm": 0.7099752426147461, "learning_rate": 2.3572268287744557e-05, "loss": 0.0725, "step": 14848 }, { "epoch": 0.3272020140254618, "grad_norm": 0.8635347485542297, "learning_rate": 2.357138977098493e-05, "loss": 0.111, "step": 14849 }, { "epoch": 0.32722404931497795, "grad_norm": 0.8887455463409424, "learning_rate": 2.357051121056663e-05, "loss": 0.108, "step": 14850 }, { "epoch": 0.3272460846044941, "grad_norm": 0.620055079460144, "learning_rate": 2.3569632606494113e-05, "loss": 0.0539, "step": 14851 }, { "epoch": 0.3272681198940103, "grad_norm": 1.0116726160049438, "learning_rate": 2.3568753958771862e-05, "loss": 0.1228, "step": 14852 }, { "epoch": 0.32729015518352644, "grad_norm": 1.0130513906478882, "learning_rate": 2.356787526740435e-05, "loss": 0.15, "step": 14853 }, { "epoch": 0.32731219047304255, "grad_norm": 0.8688775300979614, "learning_rate": 2.356699653239606e-05, "loss": 0.0905, "step": 14854 }, { "epoch": 0.3273342257625587, "grad_norm": 0.6277415752410889, "learning_rate": 2.356611775375146e-05, "loss": 0.0836, "step": 14855 }, { "epoch": 0.3273562610520749, "grad_norm": 0.546077311038971, "learning_rate": 2.3565238931475027e-05, "loss": 0.085, "step": 14856 }, { "epoch": 0.32737829634159105, "grad_norm": 0.6191605925559998, "learning_rate": 2.356436006557124e-05, "loss": 0.1299, "step": 14857 }, { "epoch": 0.3274003316311072, "grad_norm": 0.5545650124549866, "learning_rate": 2.3563481156044577e-05, "loss": 0.0819, "step": 14858 }, { "epoch": 0.3274223669206234, "grad_norm": 0.6320422291755676, "learning_rate": 2.3562602202899515e-05, "loss": 0.0912, "step": 14859 }, { "epoch": 0.32744440221013954, "grad_norm": 0.5655336976051331, "learning_rate": 2.356172320614052e-05, "loss": 0.0739, "step": 14860 }, { "epoch": 0.3274664374996557, "grad_norm": 0.9909697771072388, "learning_rate": 2.3560844165772083e-05, "loss": 0.0935, "step": 14861 }, { "epoch": 0.32748847278917187, "grad_norm": 0.677767813205719, "learning_rate": 2.3559965081798672e-05, "loss": 0.1273, "step": 14862 }, { "epoch": 0.32751050807868803, "grad_norm": 0.8894343972206116, "learning_rate": 2.3559085954224775e-05, "loss": 0.0856, "step": 14863 }, { "epoch": 0.3275325433682042, "grad_norm": 0.9716536998748779, "learning_rate": 2.355820678305486e-05, "loss": 0.1096, "step": 14864 }, { "epoch": 0.32755457865772036, "grad_norm": 0.7666394710540771, "learning_rate": 2.3557327568293408e-05, "loss": 0.0713, "step": 14865 }, { "epoch": 0.3275766139472365, "grad_norm": 0.3982492685317993, "learning_rate": 2.35564483099449e-05, "loss": 0.0602, "step": 14866 }, { "epoch": 0.32759864923675264, "grad_norm": 0.6235237121582031, "learning_rate": 2.355556900801381e-05, "loss": 0.0981, "step": 14867 }, { "epoch": 0.3276206845262688, "grad_norm": 0.6165449619293213, "learning_rate": 2.3554689662504624e-05, "loss": 0.101, "step": 14868 }, { "epoch": 0.32764271981578497, "grad_norm": 0.7457374334335327, "learning_rate": 2.355381027342181e-05, "loss": 0.1099, "step": 14869 }, { "epoch": 0.32766475510530113, "grad_norm": 0.7463670969009399, "learning_rate": 2.3552930840769855e-05, "loss": 0.1055, "step": 14870 }, { "epoch": 0.3276867903948173, "grad_norm": 0.8038046956062317, "learning_rate": 2.355205136455324e-05, "loss": 0.1068, "step": 14871 }, { "epoch": 0.32770882568433346, "grad_norm": 0.829907238483429, "learning_rate": 2.3551171844776438e-05, "loss": 0.083, "step": 14872 }, { "epoch": 0.3277308609738496, "grad_norm": 0.7089143395423889, "learning_rate": 2.3550292281443934e-05, "loss": 0.1041, "step": 14873 }, { "epoch": 0.3277528962633658, "grad_norm": 0.6316052675247192, "learning_rate": 2.35494126745602e-05, "loss": 0.1068, "step": 14874 }, { "epoch": 0.32777493155288195, "grad_norm": 0.6778191924095154, "learning_rate": 2.3548533024129733e-05, "loss": 0.0872, "step": 14875 }, { "epoch": 0.3277969668423981, "grad_norm": 0.7827452421188354, "learning_rate": 2.3547653330157e-05, "loss": 0.0941, "step": 14876 }, { "epoch": 0.3278190021319143, "grad_norm": 0.7446569800376892, "learning_rate": 2.354677359264648e-05, "loss": 0.1177, "step": 14877 }, { "epoch": 0.32784103742143045, "grad_norm": 0.7823922634124756, "learning_rate": 2.3545893811602662e-05, "loss": 0.1077, "step": 14878 }, { "epoch": 0.32786307271094656, "grad_norm": 0.7208808660507202, "learning_rate": 2.3545013987030026e-05, "loss": 0.0831, "step": 14879 }, { "epoch": 0.3278851080004627, "grad_norm": 0.33592355251312256, "learning_rate": 2.354413411893305e-05, "loss": 0.0811, "step": 14880 }, { "epoch": 0.3279071432899789, "grad_norm": 0.7022990584373474, "learning_rate": 2.354325420731622e-05, "loss": 0.0647, "step": 14881 }, { "epoch": 0.32792917857949505, "grad_norm": 1.081749439239502, "learning_rate": 2.3542374252184008e-05, "loss": 0.1145, "step": 14882 }, { "epoch": 0.3279512138690112, "grad_norm": 0.7268089056015015, "learning_rate": 2.3541494253540907e-05, "loss": 0.0921, "step": 14883 }, { "epoch": 0.3279732491585274, "grad_norm": 0.924932062625885, "learning_rate": 2.35406142113914e-05, "loss": 0.1018, "step": 14884 }, { "epoch": 0.32799528444804354, "grad_norm": 0.7799166440963745, "learning_rate": 2.3539734125739958e-05, "loss": 0.0863, "step": 14885 }, { "epoch": 0.3280173197375597, "grad_norm": 0.9447663426399231, "learning_rate": 2.3538853996591075e-05, "loss": 0.1015, "step": 14886 }, { "epoch": 0.3280393550270759, "grad_norm": 0.9164073467254639, "learning_rate": 2.3537973823949226e-05, "loss": 0.0797, "step": 14887 }, { "epoch": 0.32806139031659204, "grad_norm": 0.6921780109405518, "learning_rate": 2.3537093607818903e-05, "loss": 0.0694, "step": 14888 }, { "epoch": 0.3280834256061082, "grad_norm": 0.6117858290672302, "learning_rate": 2.353621334820458e-05, "loss": 0.0696, "step": 14889 }, { "epoch": 0.32810546089562437, "grad_norm": 1.1227972507476807, "learning_rate": 2.353533304511075e-05, "loss": 0.1236, "step": 14890 }, { "epoch": 0.3281274961851405, "grad_norm": 0.6994627714157104, "learning_rate": 2.3534452698541887e-05, "loss": 0.0893, "step": 14891 }, { "epoch": 0.32814953147465664, "grad_norm": 0.8611591458320618, "learning_rate": 2.3533572308502482e-05, "loss": 0.1002, "step": 14892 }, { "epoch": 0.3281715667641728, "grad_norm": 0.8223095536231995, "learning_rate": 2.3532691874997013e-05, "loss": 0.0543, "step": 14893 }, { "epoch": 0.32819360205368897, "grad_norm": 1.129388451576233, "learning_rate": 2.3531811398029975e-05, "loss": 0.1073, "step": 14894 }, { "epoch": 0.32821563734320514, "grad_norm": 0.8771915435791016, "learning_rate": 2.3530930877605847e-05, "loss": 0.0944, "step": 14895 }, { "epoch": 0.3282376726327213, "grad_norm": 0.9977143406867981, "learning_rate": 2.3530050313729108e-05, "loss": 0.1106, "step": 14896 }, { "epoch": 0.32825970792223746, "grad_norm": 0.7190902233123779, "learning_rate": 2.352916970640425e-05, "loss": 0.0782, "step": 14897 }, { "epoch": 0.32828174321175363, "grad_norm": 0.34201955795288086, "learning_rate": 2.3528289055635753e-05, "loss": 0.069, "step": 14898 }, { "epoch": 0.3283037785012698, "grad_norm": 0.910563051700592, "learning_rate": 2.3527408361428115e-05, "loss": 0.1441, "step": 14899 }, { "epoch": 0.32832581379078596, "grad_norm": 0.7668402791023254, "learning_rate": 2.352652762378581e-05, "loss": 0.1171, "step": 14900 }, { "epoch": 0.3283478490803021, "grad_norm": 0.7974759936332703, "learning_rate": 2.3525646842713326e-05, "loss": 0.1089, "step": 14901 }, { "epoch": 0.3283698843698183, "grad_norm": 0.4951843321323395, "learning_rate": 2.3524766018215155e-05, "loss": 0.1047, "step": 14902 }, { "epoch": 0.3283919196593344, "grad_norm": 0.8710430264472961, "learning_rate": 2.3523885150295774e-05, "loss": 0.0977, "step": 14903 }, { "epoch": 0.32841395494885056, "grad_norm": 0.8622691035270691, "learning_rate": 2.352300423895968e-05, "loss": 0.0907, "step": 14904 }, { "epoch": 0.3284359902383667, "grad_norm": 0.8223705291748047, "learning_rate": 2.3522123284211347e-05, "loss": 0.1119, "step": 14905 }, { "epoch": 0.3284580255278829, "grad_norm": 0.988750159740448, "learning_rate": 2.3521242286055277e-05, "loss": 0.0873, "step": 14906 }, { "epoch": 0.32848006081739906, "grad_norm": 0.7868250608444214, "learning_rate": 2.352036124449595e-05, "loss": 0.1225, "step": 14907 }, { "epoch": 0.3285020961069152, "grad_norm": 0.5895336866378784, "learning_rate": 2.3519480159537852e-05, "loss": 0.0833, "step": 14908 }, { "epoch": 0.3285241313964314, "grad_norm": 1.0378891229629517, "learning_rate": 2.3518599031185472e-05, "loss": 0.1133, "step": 14909 }, { "epoch": 0.32854616668594755, "grad_norm": 1.2894772291183472, "learning_rate": 2.3517717859443305e-05, "loss": 0.1225, "step": 14910 }, { "epoch": 0.3285682019754637, "grad_norm": 1.0095903873443604, "learning_rate": 2.351683664431583e-05, "loss": 0.0848, "step": 14911 }, { "epoch": 0.3285902372649799, "grad_norm": 0.6345643997192383, "learning_rate": 2.3515955385807536e-05, "loss": 0.0738, "step": 14912 }, { "epoch": 0.32861227255449604, "grad_norm": 0.8409260511398315, "learning_rate": 2.351507408392292e-05, "loss": 0.0906, "step": 14913 }, { "epoch": 0.3286343078440122, "grad_norm": 0.9830416440963745, "learning_rate": 2.3514192738666463e-05, "loss": 0.1278, "step": 14914 }, { "epoch": 0.3286563431335284, "grad_norm": 0.7683547139167786, "learning_rate": 2.3513311350042658e-05, "loss": 0.105, "step": 14915 }, { "epoch": 0.3286783784230445, "grad_norm": 0.887811541557312, "learning_rate": 2.3512429918055987e-05, "loss": 0.1124, "step": 14916 }, { "epoch": 0.32870041371256065, "grad_norm": 0.6259961724281311, "learning_rate": 2.3511548442710953e-05, "loss": 0.0911, "step": 14917 }, { "epoch": 0.3287224490020768, "grad_norm": 0.6457082629203796, "learning_rate": 2.3510666924012035e-05, "loss": 0.0937, "step": 14918 }, { "epoch": 0.328744484291593, "grad_norm": 0.6158252358436584, "learning_rate": 2.3509785361963726e-05, "loss": 0.0681, "step": 14919 }, { "epoch": 0.32876651958110914, "grad_norm": 0.6340492963790894, "learning_rate": 2.3508903756570516e-05, "loss": 0.0762, "step": 14920 }, { "epoch": 0.3287885548706253, "grad_norm": 0.9092593789100647, "learning_rate": 2.3508022107836902e-05, "loss": 0.1169, "step": 14921 }, { "epoch": 0.32881059016014147, "grad_norm": 0.6059421896934509, "learning_rate": 2.3507140415767366e-05, "loss": 0.0829, "step": 14922 }, { "epoch": 0.32883262544965763, "grad_norm": 1.2396715879440308, "learning_rate": 2.3506258680366405e-05, "loss": 0.1024, "step": 14923 }, { "epoch": 0.3288546607391738, "grad_norm": 0.9178664088249207, "learning_rate": 2.35053769016385e-05, "loss": 0.1021, "step": 14924 }, { "epoch": 0.32887669602868996, "grad_norm": 0.8140486478805542, "learning_rate": 2.3504495079588156e-05, "loss": 0.0956, "step": 14925 }, { "epoch": 0.32889873131820613, "grad_norm": 1.6507006883621216, "learning_rate": 2.3503613214219852e-05, "loss": 0.096, "step": 14926 }, { "epoch": 0.3289207666077223, "grad_norm": 0.6518999934196472, "learning_rate": 2.350273130553809e-05, "loss": 0.0956, "step": 14927 }, { "epoch": 0.3289428018972384, "grad_norm": 0.9204018712043762, "learning_rate": 2.3501849353547358e-05, "loss": 0.0961, "step": 14928 }, { "epoch": 0.32896483718675457, "grad_norm": 0.6846562623977661, "learning_rate": 2.3500967358252145e-05, "loss": 0.0792, "step": 14929 }, { "epoch": 0.32898687247627073, "grad_norm": 1.08217191696167, "learning_rate": 2.350008531965695e-05, "loss": 0.1155, "step": 14930 }, { "epoch": 0.3290089077657869, "grad_norm": 0.801230788230896, "learning_rate": 2.349920323776626e-05, "loss": 0.0979, "step": 14931 }, { "epoch": 0.32903094305530306, "grad_norm": 0.9106273651123047, "learning_rate": 2.3498321112584572e-05, "loss": 0.099, "step": 14932 }, { "epoch": 0.3290529783448192, "grad_norm": 0.7133155465126038, "learning_rate": 2.3497438944116378e-05, "loss": 0.071, "step": 14933 }, { "epoch": 0.3290750136343354, "grad_norm": 0.7052655816078186, "learning_rate": 2.3496556732366168e-05, "loss": 0.0673, "step": 14934 }, { "epoch": 0.32909704892385155, "grad_norm": 0.7148565053939819, "learning_rate": 2.3495674477338443e-05, "loss": 0.109, "step": 14935 }, { "epoch": 0.3291190842133677, "grad_norm": 0.9031767845153809, "learning_rate": 2.349479217903769e-05, "loss": 0.1257, "step": 14936 }, { "epoch": 0.3291411195028839, "grad_norm": 0.8268365263938904, "learning_rate": 2.3493909837468404e-05, "loss": 0.0882, "step": 14937 }, { "epoch": 0.32916315479240005, "grad_norm": 0.9661137461662292, "learning_rate": 2.349302745263508e-05, "loss": 0.0762, "step": 14938 }, { "epoch": 0.3291851900819162, "grad_norm": 0.6148748397827148, "learning_rate": 2.3492145024542212e-05, "loss": 0.095, "step": 14939 }, { "epoch": 0.3292072253714323, "grad_norm": 1.429654836654663, "learning_rate": 2.3491262553194297e-05, "loss": 0.1029, "step": 14940 }, { "epoch": 0.3292292606609485, "grad_norm": 0.6806315183639526, "learning_rate": 2.3490380038595832e-05, "loss": 0.0849, "step": 14941 }, { "epoch": 0.32925129595046465, "grad_norm": 0.6796032786369324, "learning_rate": 2.3489497480751304e-05, "loss": 0.0913, "step": 14942 }, { "epoch": 0.3292733312399808, "grad_norm": 0.6898468732833862, "learning_rate": 2.3488614879665216e-05, "loss": 0.1049, "step": 14943 }, { "epoch": 0.329295366529497, "grad_norm": 0.9851124882698059, "learning_rate": 2.348773223534206e-05, "loss": 0.1643, "step": 14944 }, { "epoch": 0.32931740181901314, "grad_norm": 0.6618680953979492, "learning_rate": 2.3486849547786325e-05, "loss": 0.1182, "step": 14945 }, { "epoch": 0.3293394371085293, "grad_norm": 0.9784156680107117, "learning_rate": 2.348596681700252e-05, "loss": 0.0847, "step": 14946 }, { "epoch": 0.3293614723980455, "grad_norm": 1.1948851346969604, "learning_rate": 2.3485084042995138e-05, "loss": 0.1052, "step": 14947 }, { "epoch": 0.32938350768756164, "grad_norm": 0.44141295552253723, "learning_rate": 2.3484201225768674e-05, "loss": 0.0932, "step": 14948 }, { "epoch": 0.3294055429770778, "grad_norm": 0.5516396164894104, "learning_rate": 2.3483318365327616e-05, "loss": 0.0654, "step": 14949 }, { "epoch": 0.32942757826659397, "grad_norm": 0.6107796430587769, "learning_rate": 2.348243546167647e-05, "loss": 0.101, "step": 14950 }, { "epoch": 0.32944961355611013, "grad_norm": 0.9434934258460999, "learning_rate": 2.3481552514819735e-05, "loss": 0.0951, "step": 14951 }, { "epoch": 0.3294716488456263, "grad_norm": 1.0514603853225708, "learning_rate": 2.3480669524761904e-05, "loss": 0.0947, "step": 14952 }, { "epoch": 0.3294936841351424, "grad_norm": 0.764653205871582, "learning_rate": 2.3479786491507476e-05, "loss": 0.0879, "step": 14953 }, { "epoch": 0.32951571942465857, "grad_norm": 0.7076119184494019, "learning_rate": 2.3478903415060945e-05, "loss": 0.0748, "step": 14954 }, { "epoch": 0.32953775471417474, "grad_norm": 0.752621054649353, "learning_rate": 2.347802029542682e-05, "loss": 0.0878, "step": 14955 }, { "epoch": 0.3295597900036909, "grad_norm": 0.8954147696495056, "learning_rate": 2.3477137132609585e-05, "loss": 0.0949, "step": 14956 }, { "epoch": 0.32958182529320706, "grad_norm": 0.9067267775535583, "learning_rate": 2.3476253926613745e-05, "loss": 0.1424, "step": 14957 }, { "epoch": 0.32960386058272323, "grad_norm": 1.1511763334274292, "learning_rate": 2.34753706774438e-05, "loss": 0.1291, "step": 14958 }, { "epoch": 0.3296258958722394, "grad_norm": 0.81428062915802, "learning_rate": 2.347448738510425e-05, "loss": 0.0903, "step": 14959 }, { "epoch": 0.32964793116175556, "grad_norm": 0.5059829354286194, "learning_rate": 2.3473604049599582e-05, "loss": 0.0849, "step": 14960 }, { "epoch": 0.3296699664512717, "grad_norm": 0.515726625919342, "learning_rate": 2.347272067093431e-05, "loss": 0.0709, "step": 14961 }, { "epoch": 0.3296920017407879, "grad_norm": 0.5554551482200623, "learning_rate": 2.3471837249112934e-05, "loss": 0.0994, "step": 14962 }, { "epoch": 0.32971403703030405, "grad_norm": 0.9846471548080444, "learning_rate": 2.3470953784139942e-05, "loss": 0.0872, "step": 14963 }, { "epoch": 0.3297360723198202, "grad_norm": 1.331190586090088, "learning_rate": 2.3470070276019837e-05, "loss": 0.0765, "step": 14964 }, { "epoch": 0.3297581076093363, "grad_norm": 0.8613893389701843, "learning_rate": 2.346918672475713e-05, "loss": 0.1076, "step": 14965 }, { "epoch": 0.3297801428988525, "grad_norm": 0.5919505953788757, "learning_rate": 2.3468303130356306e-05, "loss": 0.0757, "step": 14966 }, { "epoch": 0.32980217818836866, "grad_norm": 0.7334373593330383, "learning_rate": 2.346741949282188e-05, "loss": 0.0764, "step": 14967 }, { "epoch": 0.3298242134778848, "grad_norm": 0.9914440512657166, "learning_rate": 2.3466535812158336e-05, "loss": 0.1162, "step": 14968 }, { "epoch": 0.329846248767401, "grad_norm": 0.5815432071685791, "learning_rate": 2.3465652088370195e-05, "loss": 0.0895, "step": 14969 }, { "epoch": 0.32986828405691715, "grad_norm": 0.7316205501556396, "learning_rate": 2.3464768321461945e-05, "loss": 0.1018, "step": 14970 }, { "epoch": 0.3298903193464333, "grad_norm": 0.9496960639953613, "learning_rate": 2.346388451143809e-05, "loss": 0.1004, "step": 14971 }, { "epoch": 0.3299123546359495, "grad_norm": 0.8434315323829651, "learning_rate": 2.3463000658303133e-05, "loss": 0.1025, "step": 14972 }, { "epoch": 0.32993438992546564, "grad_norm": 0.9052958488464355, "learning_rate": 2.3462116762061574e-05, "loss": 0.0937, "step": 14973 }, { "epoch": 0.3299564252149818, "grad_norm": 0.8944955468177795, "learning_rate": 2.346123282271792e-05, "loss": 0.09, "step": 14974 }, { "epoch": 0.329978460504498, "grad_norm": 0.677545964717865, "learning_rate": 2.3460348840276664e-05, "loss": 0.0754, "step": 14975 }, { "epoch": 0.33000049579401414, "grad_norm": 0.5285028219223022, "learning_rate": 2.3459464814742316e-05, "loss": 0.0766, "step": 14976 }, { "epoch": 0.33002253108353025, "grad_norm": 0.6866451501846313, "learning_rate": 2.345858074611938e-05, "loss": 0.0859, "step": 14977 }, { "epoch": 0.3300445663730464, "grad_norm": 0.6821892857551575, "learning_rate": 2.3457696634412356e-05, "loss": 0.124, "step": 14978 }, { "epoch": 0.3300666016625626, "grad_norm": 0.7580500245094299, "learning_rate": 2.345681247962575e-05, "loss": 0.0644, "step": 14979 }, { "epoch": 0.33008863695207874, "grad_norm": 0.5280499458312988, "learning_rate": 2.3455928281764063e-05, "loss": 0.078, "step": 14980 }, { "epoch": 0.3301106722415949, "grad_norm": 0.9418573975563049, "learning_rate": 2.3455044040831794e-05, "loss": 0.1029, "step": 14981 }, { "epoch": 0.33013270753111107, "grad_norm": 0.4546934962272644, "learning_rate": 2.3454159756833457e-05, "loss": 0.1072, "step": 14982 }, { "epoch": 0.33015474282062723, "grad_norm": 0.8048257231712341, "learning_rate": 2.3453275429773543e-05, "loss": 0.0916, "step": 14983 }, { "epoch": 0.3301767781101434, "grad_norm": 1.0186049938201904, "learning_rate": 2.3452391059656572e-05, "loss": 0.1212, "step": 14984 }, { "epoch": 0.33019881339965956, "grad_norm": 1.02760648727417, "learning_rate": 2.345150664648704e-05, "loss": 0.0965, "step": 14985 }, { "epoch": 0.3302208486891757, "grad_norm": 0.5649015307426453, "learning_rate": 2.3450622190269447e-05, "loss": 0.0894, "step": 14986 }, { "epoch": 0.3302428839786919, "grad_norm": 0.6014648675918579, "learning_rate": 2.3449737691008307e-05, "loss": 0.0903, "step": 14987 }, { "epoch": 0.33026491926820806, "grad_norm": 0.6617090106010437, "learning_rate": 2.3448853148708126e-05, "loss": 0.0915, "step": 14988 }, { "epoch": 0.3302869545577242, "grad_norm": 1.1156020164489746, "learning_rate": 2.34479685633734e-05, "loss": 0.0586, "step": 14989 }, { "epoch": 0.33030898984724033, "grad_norm": 0.7303658127784729, "learning_rate": 2.3447083935008643e-05, "loss": 0.1052, "step": 14990 }, { "epoch": 0.3303310251367565, "grad_norm": 0.8094229102134705, "learning_rate": 2.3446199263618354e-05, "loss": 0.0988, "step": 14991 }, { "epoch": 0.33035306042627266, "grad_norm": 1.189112663269043, "learning_rate": 2.3445314549207048e-05, "loss": 0.1227, "step": 14992 }, { "epoch": 0.3303750957157888, "grad_norm": 0.41986730694770813, "learning_rate": 2.3444429791779224e-05, "loss": 0.0885, "step": 14993 }, { "epoch": 0.330397131005305, "grad_norm": 1.01063072681427, "learning_rate": 2.3443544991339386e-05, "loss": 0.0926, "step": 14994 }, { "epoch": 0.33041916629482115, "grad_norm": 0.6786051392555237, "learning_rate": 2.3442660147892053e-05, "loss": 0.1081, "step": 14995 }, { "epoch": 0.3304412015843373, "grad_norm": 0.5714853405952454, "learning_rate": 2.344177526144172e-05, "loss": 0.0703, "step": 14996 }, { "epoch": 0.3304632368738535, "grad_norm": 1.2278445959091187, "learning_rate": 2.34408903319929e-05, "loss": 0.1023, "step": 14997 }, { "epoch": 0.33048527216336965, "grad_norm": 1.0069268941879272, "learning_rate": 2.34400053595501e-05, "loss": 0.1042, "step": 14998 }, { "epoch": 0.3305073074528858, "grad_norm": 1.1091728210449219, "learning_rate": 2.343912034411783e-05, "loss": 0.1561, "step": 14999 }, { "epoch": 0.330529342742402, "grad_norm": 0.5446799993515015, "learning_rate": 2.3438235285700586e-05, "loss": 0.1223, "step": 15000 }, { "epoch": 0.33055137803191814, "grad_norm": 0.37743058800697327, "learning_rate": 2.3437350184302894e-05, "loss": 0.0651, "step": 15001 }, { "epoch": 0.33057341332143425, "grad_norm": 0.8343467712402344, "learning_rate": 2.3436465039929244e-05, "loss": 0.0786, "step": 15002 }, { "epoch": 0.3305954486109504, "grad_norm": 0.6643718481063843, "learning_rate": 2.343557985258416e-05, "loss": 0.0934, "step": 15003 }, { "epoch": 0.3306174839004666, "grad_norm": 0.6559087038040161, "learning_rate": 2.3434694622272143e-05, "loss": 0.0871, "step": 15004 }, { "epoch": 0.33063951918998274, "grad_norm": 0.875584602355957, "learning_rate": 2.3433809348997707e-05, "loss": 0.0943, "step": 15005 }, { "epoch": 0.3306615544794989, "grad_norm": 0.7974764704704285, "learning_rate": 2.3432924032765348e-05, "loss": 0.078, "step": 15006 }, { "epoch": 0.3306835897690151, "grad_norm": 0.708474338054657, "learning_rate": 2.3432038673579593e-05, "loss": 0.0935, "step": 15007 }, { "epoch": 0.33070562505853124, "grad_norm": 0.5725628137588501, "learning_rate": 2.343115327144494e-05, "loss": 0.1204, "step": 15008 }, { "epoch": 0.3307276603480474, "grad_norm": 0.9261643886566162, "learning_rate": 2.3430267826365904e-05, "loss": 0.1168, "step": 15009 }, { "epoch": 0.33074969563756357, "grad_norm": 0.49417203664779663, "learning_rate": 2.3429382338346986e-05, "loss": 0.0947, "step": 15010 }, { "epoch": 0.33077173092707973, "grad_norm": 0.7467796206474304, "learning_rate": 2.3428496807392717e-05, "loss": 0.0945, "step": 15011 }, { "epoch": 0.3307937662165959, "grad_norm": 0.49666261672973633, "learning_rate": 2.342761123350758e-05, "loss": 0.0348, "step": 15012 }, { "epoch": 0.33081580150611206, "grad_norm": 0.6389216780662537, "learning_rate": 2.342672561669611e-05, "loss": 0.0932, "step": 15013 }, { "epoch": 0.3308378367956282, "grad_norm": 0.8651559352874756, "learning_rate": 2.3425839956962803e-05, "loss": 0.0729, "step": 15014 }, { "epoch": 0.33085987208514434, "grad_norm": 1.123497486114502, "learning_rate": 2.3424954254312177e-05, "loss": 0.0917, "step": 15015 }, { "epoch": 0.3308819073746605, "grad_norm": 0.9810870289802551, "learning_rate": 2.3424068508748738e-05, "loss": 0.1372, "step": 15016 }, { "epoch": 0.33090394266417666, "grad_norm": 0.4584200978279114, "learning_rate": 2.3423182720277e-05, "loss": 0.0883, "step": 15017 }, { "epoch": 0.33092597795369283, "grad_norm": 0.8922361135482788, "learning_rate": 2.342229688890148e-05, "loss": 0.1403, "step": 15018 }, { "epoch": 0.330948013243209, "grad_norm": 0.9215223789215088, "learning_rate": 2.3421411014626686e-05, "loss": 0.0762, "step": 15019 }, { "epoch": 0.33097004853272516, "grad_norm": 0.6355478763580322, "learning_rate": 2.342052509745713e-05, "loss": 0.071, "step": 15020 }, { "epoch": 0.3309920838222413, "grad_norm": 0.8855375647544861, "learning_rate": 2.3419639137397318e-05, "loss": 0.1009, "step": 15021 }, { "epoch": 0.3310141191117575, "grad_norm": 0.60923832654953, "learning_rate": 2.3418753134451777e-05, "loss": 0.0821, "step": 15022 }, { "epoch": 0.33103615440127365, "grad_norm": 1.1278213262557983, "learning_rate": 2.3417867088625004e-05, "loss": 0.0603, "step": 15023 }, { "epoch": 0.3310581896907898, "grad_norm": 0.6403681039810181, "learning_rate": 2.341698099992152e-05, "loss": 0.1262, "step": 15024 }, { "epoch": 0.331080224980306, "grad_norm": 0.6708784103393555, "learning_rate": 2.3416094868345846e-05, "loss": 0.0537, "step": 15025 }, { "epoch": 0.33110226026982215, "grad_norm": 0.5056177377700806, "learning_rate": 2.3415208693902486e-05, "loss": 0.0769, "step": 15026 }, { "epoch": 0.33112429555933826, "grad_norm": 0.9730745553970337, "learning_rate": 2.341432247659595e-05, "loss": 0.0969, "step": 15027 }, { "epoch": 0.3311463308488544, "grad_norm": 0.6502094268798828, "learning_rate": 2.341343621643076e-05, "loss": 0.0976, "step": 15028 }, { "epoch": 0.3311683661383706, "grad_norm": 0.5070884227752686, "learning_rate": 2.3412549913411427e-05, "loss": 0.0967, "step": 15029 }, { "epoch": 0.33119040142788675, "grad_norm": 0.5454899072647095, "learning_rate": 2.3411663567542466e-05, "loss": 0.1131, "step": 15030 }, { "epoch": 0.3312124367174029, "grad_norm": 0.591162919998169, "learning_rate": 2.3410777178828394e-05, "loss": 0.0957, "step": 15031 }, { "epoch": 0.3312344720069191, "grad_norm": 1.023773193359375, "learning_rate": 2.3409890747273723e-05, "loss": 0.1055, "step": 15032 }, { "epoch": 0.33125650729643524, "grad_norm": 0.7453376054763794, "learning_rate": 2.3409004272882967e-05, "loss": 0.0761, "step": 15033 }, { "epoch": 0.3312785425859514, "grad_norm": 0.9794389605522156, "learning_rate": 2.340811775566065e-05, "loss": 0.0724, "step": 15034 }, { "epoch": 0.3313005778754676, "grad_norm": 0.8885385990142822, "learning_rate": 2.3407231195611268e-05, "loss": 0.0992, "step": 15035 }, { "epoch": 0.33132261316498374, "grad_norm": 0.9686161279678345, "learning_rate": 2.340634459273936e-05, "loss": 0.1026, "step": 15036 }, { "epoch": 0.3313446484544999, "grad_norm": 1.119218349456787, "learning_rate": 2.3405457947049426e-05, "loss": 0.0729, "step": 15037 }, { "epoch": 0.33136668374401607, "grad_norm": 0.9271618723869324, "learning_rate": 2.3404571258545992e-05, "loss": 0.1153, "step": 15038 }, { "epoch": 0.3313887190335322, "grad_norm": 0.7215677499771118, "learning_rate": 2.3403684527233565e-05, "loss": 0.0789, "step": 15039 }, { "epoch": 0.33141075432304834, "grad_norm": 0.9756208062171936, "learning_rate": 2.3402797753116662e-05, "loss": 0.0721, "step": 15040 }, { "epoch": 0.3314327896125645, "grad_norm": 0.7735270857810974, "learning_rate": 2.3401910936199812e-05, "loss": 0.0851, "step": 15041 }, { "epoch": 0.33145482490208067, "grad_norm": 0.5656499862670898, "learning_rate": 2.3401024076487523e-05, "loss": 0.0765, "step": 15042 }, { "epoch": 0.33147686019159683, "grad_norm": 0.9210596084594727, "learning_rate": 2.3400137173984312e-05, "loss": 0.0797, "step": 15043 }, { "epoch": 0.331498895481113, "grad_norm": 0.8133765459060669, "learning_rate": 2.3399250228694696e-05, "loss": 0.0941, "step": 15044 }, { "epoch": 0.33152093077062916, "grad_norm": 0.4716878831386566, "learning_rate": 2.3398363240623197e-05, "loss": 0.0837, "step": 15045 }, { "epoch": 0.3315429660601453, "grad_norm": 0.5321792364120483, "learning_rate": 2.339747620977433e-05, "loss": 0.0668, "step": 15046 }, { "epoch": 0.3315650013496615, "grad_norm": 0.873182475566864, "learning_rate": 2.3396589136152617e-05, "loss": 0.1063, "step": 15047 }, { "epoch": 0.33158703663917766, "grad_norm": 0.5676478743553162, "learning_rate": 2.3395702019762565e-05, "loss": 0.0867, "step": 15048 }, { "epoch": 0.3316090719286938, "grad_norm": 0.9222946763038635, "learning_rate": 2.3394814860608707e-05, "loss": 0.0852, "step": 15049 }, { "epoch": 0.33163110721821, "grad_norm": 0.5542446970939636, "learning_rate": 2.3393927658695555e-05, "loss": 0.0481, "step": 15050 }, { "epoch": 0.33165314250772615, "grad_norm": 0.7176408171653748, "learning_rate": 2.3393040414027628e-05, "loss": 0.112, "step": 15051 }, { "epoch": 0.33167517779724226, "grad_norm": 0.6691516041755676, "learning_rate": 2.339215312660944e-05, "loss": 0.1049, "step": 15052 }, { "epoch": 0.3316972130867584, "grad_norm": 0.8381008505821228, "learning_rate": 2.3391265796445522e-05, "loss": 0.1238, "step": 15053 }, { "epoch": 0.3317192483762746, "grad_norm": 1.0869133472442627, "learning_rate": 2.3390378423540386e-05, "loss": 0.1027, "step": 15054 }, { "epoch": 0.33174128366579075, "grad_norm": 1.0433533191680908, "learning_rate": 2.338949100789855e-05, "loss": 0.1299, "step": 15055 }, { "epoch": 0.3317633189553069, "grad_norm": 0.8168616890907288, "learning_rate": 2.338860354952454e-05, "loss": 0.1174, "step": 15056 }, { "epoch": 0.3317853542448231, "grad_norm": 0.6078687310218811, "learning_rate": 2.3387716048422875e-05, "loss": 0.1068, "step": 15057 }, { "epoch": 0.33180738953433925, "grad_norm": 0.754878044128418, "learning_rate": 2.3386828504598073e-05, "loss": 0.0848, "step": 15058 }, { "epoch": 0.3318294248238554, "grad_norm": 0.7241894602775574, "learning_rate": 2.338594091805466e-05, "loss": 0.0668, "step": 15059 }, { "epoch": 0.3318514601133716, "grad_norm": 0.9278462529182434, "learning_rate": 2.3385053288797146e-05, "loss": 0.073, "step": 15060 }, { "epoch": 0.33187349540288774, "grad_norm": 0.6581071019172668, "learning_rate": 2.338416561683006e-05, "loss": 0.1233, "step": 15061 }, { "epoch": 0.3318955306924039, "grad_norm": 0.7621634602546692, "learning_rate": 2.338327790215793e-05, "loss": 0.097, "step": 15062 }, { "epoch": 0.33191756598192007, "grad_norm": 0.6517555117607117, "learning_rate": 2.338239014478526e-05, "loss": 0.082, "step": 15063 }, { "epoch": 0.3319396012714362, "grad_norm": 0.8158929944038391, "learning_rate": 2.338150234471659e-05, "loss": 0.1136, "step": 15064 }, { "epoch": 0.33196163656095234, "grad_norm": 0.6345484256744385, "learning_rate": 2.338061450195643e-05, "loss": 0.0773, "step": 15065 }, { "epoch": 0.3319836718504685, "grad_norm": 0.5228973031044006, "learning_rate": 2.3379726616509312e-05, "loss": 0.1289, "step": 15066 }, { "epoch": 0.3320057071399847, "grad_norm": 0.8111063241958618, "learning_rate": 2.3378838688379747e-05, "loss": 0.0835, "step": 15067 }, { "epoch": 0.33202774242950084, "grad_norm": 0.666355550289154, "learning_rate": 2.3377950717572264e-05, "loss": 0.1342, "step": 15068 }, { "epoch": 0.332049777719017, "grad_norm": 0.8722411394119263, "learning_rate": 2.3377062704091387e-05, "loss": 0.1152, "step": 15069 }, { "epoch": 0.33207181300853317, "grad_norm": 1.0088205337524414, "learning_rate": 2.3376174647941635e-05, "loss": 0.1159, "step": 15070 }, { "epoch": 0.33209384829804933, "grad_norm": 0.8363375663757324, "learning_rate": 2.3375286549127538e-05, "loss": 0.1272, "step": 15071 }, { "epoch": 0.3321158835875655, "grad_norm": 0.6523289680480957, "learning_rate": 2.3374398407653612e-05, "loss": 0.0649, "step": 15072 }, { "epoch": 0.33213791887708166, "grad_norm": 0.7152188420295715, "learning_rate": 2.3373510223524385e-05, "loss": 0.0726, "step": 15073 }, { "epoch": 0.3321599541665978, "grad_norm": 0.4623672366142273, "learning_rate": 2.3372621996744382e-05, "loss": 0.099, "step": 15074 }, { "epoch": 0.332181989456114, "grad_norm": 0.8344021439552307, "learning_rate": 2.337173372731812e-05, "loss": 0.1153, "step": 15075 }, { "epoch": 0.3322040247456301, "grad_norm": 0.8621276617050171, "learning_rate": 2.3370845415250136e-05, "loss": 0.0843, "step": 15076 }, { "epoch": 0.33222606003514626, "grad_norm": 0.6757711172103882, "learning_rate": 2.3369957060544943e-05, "loss": 0.0461, "step": 15077 }, { "epoch": 0.33224809532466243, "grad_norm": 0.7364138960838318, "learning_rate": 2.336906866320707e-05, "loss": 0.0893, "step": 15078 }, { "epoch": 0.3322701306141786, "grad_norm": 1.1353858709335327, "learning_rate": 2.3368180223241045e-05, "loss": 0.1319, "step": 15079 }, { "epoch": 0.33229216590369476, "grad_norm": 0.8927417397499084, "learning_rate": 2.336729174065139e-05, "loss": 0.0715, "step": 15080 }, { "epoch": 0.3323142011932109, "grad_norm": 0.6698751449584961, "learning_rate": 2.3366403215442632e-05, "loss": 0.0989, "step": 15081 }, { "epoch": 0.3323362364827271, "grad_norm": 0.6507358551025391, "learning_rate": 2.3365514647619298e-05, "loss": 0.067, "step": 15082 }, { "epoch": 0.33235827177224325, "grad_norm": 0.868266224861145, "learning_rate": 2.3364626037185906e-05, "loss": 0.0811, "step": 15083 }, { "epoch": 0.3323803070617594, "grad_norm": 0.5731257200241089, "learning_rate": 2.336373738414699e-05, "loss": 0.0817, "step": 15084 }, { "epoch": 0.3324023423512756, "grad_norm": 0.6119639873504639, "learning_rate": 2.3362848688507076e-05, "loss": 0.0804, "step": 15085 }, { "epoch": 0.33242437764079175, "grad_norm": 0.7341119050979614, "learning_rate": 2.336195995027069e-05, "loss": 0.101, "step": 15086 }, { "epoch": 0.3324464129303079, "grad_norm": 0.751043438911438, "learning_rate": 2.3361071169442358e-05, "loss": 0.0908, "step": 15087 }, { "epoch": 0.3324684482198241, "grad_norm": 0.988601565361023, "learning_rate": 2.3360182346026604e-05, "loss": 0.096, "step": 15088 }, { "epoch": 0.3324904835093402, "grad_norm": 0.7612494826316833, "learning_rate": 2.335929348002796e-05, "loss": 0.0796, "step": 15089 }, { "epoch": 0.33251251879885635, "grad_norm": 1.0212434530258179, "learning_rate": 2.335840457145095e-05, "loss": 0.1501, "step": 15090 }, { "epoch": 0.3325345540883725, "grad_norm": 1.2240312099456787, "learning_rate": 2.3357515620300105e-05, "loss": 0.1175, "step": 15091 }, { "epoch": 0.3325565893778887, "grad_norm": 0.8289715647697449, "learning_rate": 2.335662662657995e-05, "loss": 0.1131, "step": 15092 }, { "epoch": 0.33257862466740484, "grad_norm": 1.114748477935791, "learning_rate": 2.3355737590295017e-05, "loss": 0.1093, "step": 15093 }, { "epoch": 0.332600659956921, "grad_norm": 0.7226444482803345, "learning_rate": 2.335484851144983e-05, "loss": 0.0541, "step": 15094 }, { "epoch": 0.33262269524643717, "grad_norm": 0.7497251033782959, "learning_rate": 2.3353959390048926e-05, "loss": 0.0587, "step": 15095 }, { "epoch": 0.33264473053595334, "grad_norm": 0.8542574644088745, "learning_rate": 2.3353070226096817e-05, "loss": 0.0802, "step": 15096 }, { "epoch": 0.3326667658254695, "grad_norm": 0.8839929699897766, "learning_rate": 2.3352181019598043e-05, "loss": 0.103, "step": 15097 }, { "epoch": 0.33268880111498567, "grad_norm": 0.5969073176383972, "learning_rate": 2.3351291770557134e-05, "loss": 0.0563, "step": 15098 }, { "epoch": 0.33271083640450183, "grad_norm": 0.6952133178710938, "learning_rate": 2.3350402478978624e-05, "loss": 0.1024, "step": 15099 }, { "epoch": 0.332732871694018, "grad_norm": 0.5220822095870972, "learning_rate": 2.3349513144867028e-05, "loss": 0.1288, "step": 15100 }, { "epoch": 0.3327549069835341, "grad_norm": 0.4338352084159851, "learning_rate": 2.334862376822689e-05, "loss": 0.0815, "step": 15101 }, { "epoch": 0.33277694227305027, "grad_norm": 0.6725854277610779, "learning_rate": 2.334773434906273e-05, "loss": 0.096, "step": 15102 }, { "epoch": 0.33279897756256643, "grad_norm": 0.6427518129348755, "learning_rate": 2.3346844887379087e-05, "loss": 0.0711, "step": 15103 }, { "epoch": 0.3328210128520826, "grad_norm": 1.2367010116577148, "learning_rate": 2.334595538318048e-05, "loss": 0.0715, "step": 15104 }, { "epoch": 0.33284304814159876, "grad_norm": 1.0335365533828735, "learning_rate": 2.3345065836471455e-05, "loss": 0.124, "step": 15105 }, { "epoch": 0.3328650834311149, "grad_norm": 0.5115678310394287, "learning_rate": 2.334417624725653e-05, "loss": 0.0897, "step": 15106 }, { "epoch": 0.3328871187206311, "grad_norm": 0.8869833946228027, "learning_rate": 2.3343286615540238e-05, "loss": 0.1437, "step": 15107 }, { "epoch": 0.33290915401014726, "grad_norm": 0.4671134054660797, "learning_rate": 2.334239694132712e-05, "loss": 0.0951, "step": 15108 }, { "epoch": 0.3329311892996634, "grad_norm": 0.9224516153335571, "learning_rate": 2.33415072246217e-05, "loss": 0.123, "step": 15109 }, { "epoch": 0.3329532245891796, "grad_norm": 0.6717328429222107, "learning_rate": 2.3340617465428505e-05, "loss": 0.1101, "step": 15110 }, { "epoch": 0.33297525987869575, "grad_norm": 1.1666375398635864, "learning_rate": 2.3339727663752077e-05, "loss": 0.0757, "step": 15111 }, { "epoch": 0.3329972951682119, "grad_norm": 0.8205119371414185, "learning_rate": 2.3338837819596942e-05, "loss": 0.1086, "step": 15112 }, { "epoch": 0.333019330457728, "grad_norm": 0.6876012086868286, "learning_rate": 2.3337947932967636e-05, "loss": 0.0877, "step": 15113 }, { "epoch": 0.3330413657472442, "grad_norm": 0.73736572265625, "learning_rate": 2.333705800386869e-05, "loss": 0.1089, "step": 15114 }, { "epoch": 0.33306340103676035, "grad_norm": 1.0455760955810547, "learning_rate": 2.333616803230463e-05, "loss": 0.0798, "step": 15115 }, { "epoch": 0.3330854363262765, "grad_norm": 0.5707448124885559, "learning_rate": 2.3335278018280008e-05, "loss": 0.0987, "step": 15116 }, { "epoch": 0.3331074716157927, "grad_norm": 1.011120080947876, "learning_rate": 2.3334387961799338e-05, "loss": 0.0985, "step": 15117 }, { "epoch": 0.33312950690530885, "grad_norm": 1.2802486419677734, "learning_rate": 2.3333497862867162e-05, "loss": 0.1159, "step": 15118 }, { "epoch": 0.333151542194825, "grad_norm": 1.4700257778167725, "learning_rate": 2.3332607721488014e-05, "loss": 0.0631, "step": 15119 }, { "epoch": 0.3331735774843412, "grad_norm": 0.599355161190033, "learning_rate": 2.3331717537666427e-05, "loss": 0.092, "step": 15120 }, { "epoch": 0.33319561277385734, "grad_norm": 0.5555622577667236, "learning_rate": 2.333082731140693e-05, "loss": 0.0583, "step": 15121 }, { "epoch": 0.3332176480633735, "grad_norm": 0.9896494150161743, "learning_rate": 2.332993704271407e-05, "loss": 0.1307, "step": 15122 }, { "epoch": 0.33323968335288967, "grad_norm": 0.90447998046875, "learning_rate": 2.3329046731592364e-05, "loss": 0.111, "step": 15123 }, { "epoch": 0.33326171864240584, "grad_norm": 0.845391035079956, "learning_rate": 2.3328156378046364e-05, "loss": 0.0759, "step": 15124 }, { "epoch": 0.333283753931922, "grad_norm": 1.1099493503570557, "learning_rate": 2.3327265982080598e-05, "loss": 0.083, "step": 15125 }, { "epoch": 0.3333057892214381, "grad_norm": 0.7573719024658203, "learning_rate": 2.3326375543699596e-05, "loss": 0.0741, "step": 15126 }, { "epoch": 0.3333278245109543, "grad_norm": 0.7921680212020874, "learning_rate": 2.3325485062907898e-05, "loss": 0.1217, "step": 15127 }, { "epoch": 0.33334985980047044, "grad_norm": 0.8119968771934509, "learning_rate": 2.3324594539710048e-05, "loss": 0.1073, "step": 15128 }, { "epoch": 0.3333718950899866, "grad_norm": 1.031628131866455, "learning_rate": 2.332370397411057e-05, "loss": 0.094, "step": 15129 }, { "epoch": 0.33339393037950277, "grad_norm": 0.7051605582237244, "learning_rate": 2.3322813366113998e-05, "loss": 0.0765, "step": 15130 }, { "epoch": 0.33341596566901893, "grad_norm": 1.0870383977890015, "learning_rate": 2.332192271572488e-05, "loss": 0.0849, "step": 15131 }, { "epoch": 0.3334380009585351, "grad_norm": 0.598419725894928, "learning_rate": 2.3321032022947745e-05, "loss": 0.0984, "step": 15132 }, { "epoch": 0.33346003624805126, "grad_norm": 0.8795433640480042, "learning_rate": 2.3320141287787138e-05, "loss": 0.0856, "step": 15133 }, { "epoch": 0.3334820715375674, "grad_norm": 0.9385765194892883, "learning_rate": 2.3319250510247584e-05, "loss": 0.0786, "step": 15134 }, { "epoch": 0.3335041068270836, "grad_norm": 0.8710802793502808, "learning_rate": 2.331835969033363e-05, "loss": 0.0741, "step": 15135 }, { "epoch": 0.33352614211659976, "grad_norm": 1.2193012237548828, "learning_rate": 2.33174688280498e-05, "loss": 0.1165, "step": 15136 }, { "epoch": 0.3335481774061159, "grad_norm": 0.7029123306274414, "learning_rate": 2.3316577923400652e-05, "loss": 0.0984, "step": 15137 }, { "epoch": 0.33357021269563203, "grad_norm": 0.8328605890274048, "learning_rate": 2.3315686976390704e-05, "loss": 0.0989, "step": 15138 }, { "epoch": 0.3335922479851482, "grad_norm": 0.7365296483039856, "learning_rate": 2.331479598702451e-05, "loss": 0.0601, "step": 15139 }, { "epoch": 0.33361428327466436, "grad_norm": 1.37079656124115, "learning_rate": 2.3313904955306602e-05, "loss": 0.1215, "step": 15140 }, { "epoch": 0.3336363185641805, "grad_norm": 0.4912809133529663, "learning_rate": 2.3313013881241515e-05, "loss": 0.0689, "step": 15141 }, { "epoch": 0.3336583538536967, "grad_norm": 0.6438777446746826, "learning_rate": 2.3312122764833787e-05, "loss": 0.0808, "step": 15142 }, { "epoch": 0.33368038914321285, "grad_norm": 1.0949914455413818, "learning_rate": 2.3311231606087967e-05, "loss": 0.0932, "step": 15143 }, { "epoch": 0.333702424432729, "grad_norm": 0.9931690692901611, "learning_rate": 2.3310340405008582e-05, "loss": 0.0849, "step": 15144 }, { "epoch": 0.3337244597222452, "grad_norm": 0.7167768478393555, "learning_rate": 2.330944916160018e-05, "loss": 0.0768, "step": 15145 }, { "epoch": 0.33374649501176135, "grad_norm": 0.8657384514808655, "learning_rate": 2.3308557875867292e-05, "loss": 0.1085, "step": 15146 }, { "epoch": 0.3337685303012775, "grad_norm": 0.9923131465911865, "learning_rate": 2.3307666547814473e-05, "loss": 0.1002, "step": 15147 }, { "epoch": 0.3337905655907937, "grad_norm": 0.6838983297348022, "learning_rate": 2.3306775177446243e-05, "loss": 0.0946, "step": 15148 }, { "epoch": 0.33381260088030984, "grad_norm": 0.7395469546318054, "learning_rate": 2.3305883764767163e-05, "loss": 0.072, "step": 15149 }, { "epoch": 0.33383463616982595, "grad_norm": 0.661200761795044, "learning_rate": 2.3304992309781758e-05, "loss": 0.0781, "step": 15150 }, { "epoch": 0.3338566714593421, "grad_norm": 0.871705174446106, "learning_rate": 2.3304100812494572e-05, "loss": 0.088, "step": 15151 }, { "epoch": 0.3338787067488583, "grad_norm": 0.6512472629547119, "learning_rate": 2.330320927291015e-05, "loss": 0.0712, "step": 15152 }, { "epoch": 0.33390074203837444, "grad_norm": 0.5396845936775208, "learning_rate": 2.3302317691033028e-05, "loss": 0.0718, "step": 15153 }, { "epoch": 0.3339227773278906, "grad_norm": 0.9515482187271118, "learning_rate": 2.3301426066867754e-05, "loss": 0.1108, "step": 15154 }, { "epoch": 0.33394481261740677, "grad_norm": 0.6856574416160583, "learning_rate": 2.3300534400418862e-05, "loss": 0.0588, "step": 15155 }, { "epoch": 0.33396684790692294, "grad_norm": 0.6162351965904236, "learning_rate": 2.3299642691690897e-05, "loss": 0.1022, "step": 15156 }, { "epoch": 0.3339888831964391, "grad_norm": 0.4813095033168793, "learning_rate": 2.3298750940688406e-05, "loss": 0.0855, "step": 15157 }, { "epoch": 0.33401091848595527, "grad_norm": 0.6650312542915344, "learning_rate": 2.3297859147415924e-05, "loss": 0.0771, "step": 15158 }, { "epoch": 0.33403295377547143, "grad_norm": 0.7860421538352966, "learning_rate": 2.329696731187799e-05, "loss": 0.0856, "step": 15159 }, { "epoch": 0.3340549890649876, "grad_norm": 0.8248327970504761, "learning_rate": 2.3296075434079158e-05, "loss": 0.098, "step": 15160 }, { "epoch": 0.33407702435450376, "grad_norm": 0.3263639807701111, "learning_rate": 2.3295183514023964e-05, "loss": 0.0481, "step": 15161 }, { "epoch": 0.3340990596440199, "grad_norm": 0.9419910311698914, "learning_rate": 2.3294291551716957e-05, "loss": 0.0952, "step": 15162 }, { "epoch": 0.33412109493353603, "grad_norm": 0.6968479156494141, "learning_rate": 2.3293399547162667e-05, "loss": 0.0976, "step": 15163 }, { "epoch": 0.3341431302230522, "grad_norm": 0.776898980140686, "learning_rate": 2.3292507500365652e-05, "loss": 0.0834, "step": 15164 }, { "epoch": 0.33416516551256836, "grad_norm": 0.7918316125869751, "learning_rate": 2.329161541133044e-05, "loss": 0.1085, "step": 15165 }, { "epoch": 0.3341872008020845, "grad_norm": 0.7773035764694214, "learning_rate": 2.3290723280061598e-05, "loss": 0.0609, "step": 15166 }, { "epoch": 0.3342092360916007, "grad_norm": 0.7509167194366455, "learning_rate": 2.3289831106563646e-05, "loss": 0.1096, "step": 15167 }, { "epoch": 0.33423127138111686, "grad_norm": 0.6450148224830627, "learning_rate": 2.3288938890841146e-05, "loss": 0.0494, "step": 15168 }, { "epoch": 0.334253306670633, "grad_norm": 0.653534471988678, "learning_rate": 2.328804663289863e-05, "loss": 0.0775, "step": 15169 }, { "epoch": 0.3342753419601492, "grad_norm": 0.47084102034568787, "learning_rate": 2.328715433274065e-05, "loss": 0.0682, "step": 15170 }, { "epoch": 0.33429737724966535, "grad_norm": 0.5982682108879089, "learning_rate": 2.328626199037175e-05, "loss": 0.0596, "step": 15171 }, { "epoch": 0.3343194125391815, "grad_norm": 0.6582561731338501, "learning_rate": 2.3285369605796477e-05, "loss": 0.0801, "step": 15172 }, { "epoch": 0.3343414478286977, "grad_norm": 0.7752962112426758, "learning_rate": 2.3284477179019364e-05, "loss": 0.1464, "step": 15173 }, { "epoch": 0.33436348311821384, "grad_norm": 0.5286615490913391, "learning_rate": 2.3283584710044973e-05, "loss": 0.0763, "step": 15174 }, { "epoch": 0.33438551840772995, "grad_norm": 0.7206526398658752, "learning_rate": 2.3282692198877845e-05, "loss": 0.0832, "step": 15175 }, { "epoch": 0.3344075536972461, "grad_norm": 0.9905747771263123, "learning_rate": 2.328179964552252e-05, "loss": 0.1577, "step": 15176 }, { "epoch": 0.3344295889867623, "grad_norm": 0.9637004733085632, "learning_rate": 2.328090704998355e-05, "loss": 0.0994, "step": 15177 }, { "epoch": 0.33445162427627845, "grad_norm": 0.811306357383728, "learning_rate": 2.328001441226548e-05, "loss": 0.0847, "step": 15178 }, { "epoch": 0.3344736595657946, "grad_norm": 0.5681584477424622, "learning_rate": 2.3279121732372858e-05, "loss": 0.0852, "step": 15179 }, { "epoch": 0.3344956948553108, "grad_norm": 0.9862638711929321, "learning_rate": 2.3278229010310223e-05, "loss": 0.0925, "step": 15180 }, { "epoch": 0.33451773014482694, "grad_norm": 0.9587574601173401, "learning_rate": 2.3277336246082136e-05, "loss": 0.0923, "step": 15181 }, { "epoch": 0.3345397654343431, "grad_norm": 0.33672043681144714, "learning_rate": 2.3276443439693133e-05, "loss": 0.0823, "step": 15182 }, { "epoch": 0.33456180072385927, "grad_norm": 0.5334031581878662, "learning_rate": 2.3275550591147768e-05, "loss": 0.0663, "step": 15183 }, { "epoch": 0.33458383601337544, "grad_norm": 0.9036508202552795, "learning_rate": 2.3274657700450584e-05, "loss": 0.0784, "step": 15184 }, { "epoch": 0.3346058713028916, "grad_norm": 0.52244633436203, "learning_rate": 2.327376476760613e-05, "loss": 0.0865, "step": 15185 }, { "epoch": 0.33462790659240776, "grad_norm": 0.6324933767318726, "learning_rate": 2.3272871792618957e-05, "loss": 0.0556, "step": 15186 }, { "epoch": 0.3346499418819239, "grad_norm": 0.6376967430114746, "learning_rate": 2.327197877549361e-05, "loss": 0.0989, "step": 15187 }, { "epoch": 0.33467197717144004, "grad_norm": 0.8868761658668518, "learning_rate": 2.3271085716234642e-05, "loss": 0.0906, "step": 15188 }, { "epoch": 0.3346940124609562, "grad_norm": 0.846554696559906, "learning_rate": 2.3270192614846598e-05, "loss": 0.1228, "step": 15189 }, { "epoch": 0.33471604775047237, "grad_norm": 0.5345967411994934, "learning_rate": 2.3269299471334026e-05, "loss": 0.0892, "step": 15190 }, { "epoch": 0.33473808303998853, "grad_norm": 1.0281717777252197, "learning_rate": 2.326840628570148e-05, "loss": 0.1682, "step": 15191 }, { "epoch": 0.3347601183295047, "grad_norm": 0.7358110547065735, "learning_rate": 2.326751305795351e-05, "loss": 0.0683, "step": 15192 }, { "epoch": 0.33478215361902086, "grad_norm": 0.5847204923629761, "learning_rate": 2.3266619788094657e-05, "loss": 0.0929, "step": 15193 }, { "epoch": 0.334804188908537, "grad_norm": 0.5767553448677063, "learning_rate": 2.3265726476129476e-05, "loss": 0.0846, "step": 15194 }, { "epoch": 0.3348262241980532, "grad_norm": 0.599246084690094, "learning_rate": 2.326483312206252e-05, "loss": 0.1072, "step": 15195 }, { "epoch": 0.33484825948756936, "grad_norm": 0.6503000259399414, "learning_rate": 2.326393972589834e-05, "loss": 0.0907, "step": 15196 }, { "epoch": 0.3348702947770855, "grad_norm": 0.6822422742843628, "learning_rate": 2.326304628764148e-05, "loss": 0.0826, "step": 15197 }, { "epoch": 0.3348923300666017, "grad_norm": 1.0443809032440186, "learning_rate": 2.326215280729649e-05, "loss": 0.0958, "step": 15198 }, { "epoch": 0.33491436535611785, "grad_norm": 0.5477561354637146, "learning_rate": 2.3261259284867932e-05, "loss": 0.0854, "step": 15199 }, { "epoch": 0.33493640064563396, "grad_norm": 0.5574287176132202, "learning_rate": 2.3260365720360352e-05, "loss": 0.0928, "step": 15200 }, { "epoch": 0.3349584359351501, "grad_norm": 1.5469396114349365, "learning_rate": 2.3259472113778293e-05, "loss": 0.1355, "step": 15201 }, { "epoch": 0.3349804712246663, "grad_norm": 0.8970798254013062, "learning_rate": 2.325857846512632e-05, "loss": 0.1064, "step": 15202 }, { "epoch": 0.33500250651418245, "grad_norm": 2.95635724067688, "learning_rate": 2.3257684774408975e-05, "loss": 0.1042, "step": 15203 }, { "epoch": 0.3350245418036986, "grad_norm": 0.5561954975128174, "learning_rate": 2.325679104163081e-05, "loss": 0.1117, "step": 15204 }, { "epoch": 0.3350465770932148, "grad_norm": 0.7693867683410645, "learning_rate": 2.3255897266796385e-05, "loss": 0.1161, "step": 15205 }, { "epoch": 0.33506861238273095, "grad_norm": 0.6794174909591675, "learning_rate": 2.325500344991025e-05, "loss": 0.1137, "step": 15206 }, { "epoch": 0.3350906476722471, "grad_norm": 0.7694001197814941, "learning_rate": 2.325410959097695e-05, "loss": 0.0889, "step": 15207 }, { "epoch": 0.3351126829617633, "grad_norm": 0.694112241268158, "learning_rate": 2.325321569000105e-05, "loss": 0.0827, "step": 15208 }, { "epoch": 0.33513471825127944, "grad_norm": 0.4081483781337738, "learning_rate": 2.3252321746987092e-05, "loss": 0.0414, "step": 15209 }, { "epoch": 0.3351567535407956, "grad_norm": 0.6405598521232605, "learning_rate": 2.3251427761939634e-05, "loss": 0.0917, "step": 15210 }, { "epoch": 0.33517878883031177, "grad_norm": 0.6379502415657043, "learning_rate": 2.325053373486323e-05, "loss": 0.059, "step": 15211 }, { "epoch": 0.3352008241198279, "grad_norm": 0.6882701516151428, "learning_rate": 2.3249639665762437e-05, "loss": 0.1049, "step": 15212 }, { "epoch": 0.33522285940934404, "grad_norm": 0.48747915029525757, "learning_rate": 2.32487455546418e-05, "loss": 0.091, "step": 15213 }, { "epoch": 0.3352448946988602, "grad_norm": 1.0978604555130005, "learning_rate": 2.3247851401505883e-05, "loss": 0.1003, "step": 15214 }, { "epoch": 0.33526692998837637, "grad_norm": 0.48890939354896545, "learning_rate": 2.3246957206359234e-05, "loss": 0.1131, "step": 15215 }, { "epoch": 0.33528896527789254, "grad_norm": 0.8518884778022766, "learning_rate": 2.324606296920641e-05, "loss": 0.1064, "step": 15216 }, { "epoch": 0.3353110005674087, "grad_norm": 0.8584871292114258, "learning_rate": 2.3245168690051967e-05, "loss": 0.0875, "step": 15217 }, { "epoch": 0.33533303585692487, "grad_norm": 0.4480779767036438, "learning_rate": 2.3244274368900452e-05, "loss": 0.1193, "step": 15218 }, { "epoch": 0.33535507114644103, "grad_norm": 0.4545271694660187, "learning_rate": 2.3243380005756433e-05, "loss": 0.0705, "step": 15219 }, { "epoch": 0.3353771064359572, "grad_norm": 1.267914056777954, "learning_rate": 2.3242485600624458e-05, "loss": 0.0635, "step": 15220 }, { "epoch": 0.33539914172547336, "grad_norm": 0.6055148243904114, "learning_rate": 2.3241591153509082e-05, "loss": 0.0902, "step": 15221 }, { "epoch": 0.3354211770149895, "grad_norm": 0.5544800162315369, "learning_rate": 2.3240696664414865e-05, "loss": 0.0803, "step": 15222 }, { "epoch": 0.3354432123045057, "grad_norm": 0.5750944018363953, "learning_rate": 2.323980213334636e-05, "loss": 0.1213, "step": 15223 }, { "epoch": 0.33546524759402185, "grad_norm": 0.8825435042381287, "learning_rate": 2.3238907560308125e-05, "loss": 0.1167, "step": 15224 }, { "epoch": 0.33548728288353796, "grad_norm": 0.5081519484519958, "learning_rate": 2.3238012945304713e-05, "loss": 0.0619, "step": 15225 }, { "epoch": 0.3355093181730541, "grad_norm": 0.464523047208786, "learning_rate": 2.323711828834069e-05, "loss": 0.0864, "step": 15226 }, { "epoch": 0.3355313534625703, "grad_norm": 0.9112474918365479, "learning_rate": 2.32362235894206e-05, "loss": 0.0856, "step": 15227 }, { "epoch": 0.33555338875208646, "grad_norm": 0.7531083822250366, "learning_rate": 2.3235328848549012e-05, "loss": 0.0765, "step": 15228 }, { "epoch": 0.3355754240416026, "grad_norm": 0.7114598155021667, "learning_rate": 2.3234434065730474e-05, "loss": 0.0805, "step": 15229 }, { "epoch": 0.3355974593311188, "grad_norm": 0.7033476829528809, "learning_rate": 2.3233539240969544e-05, "loss": 0.1168, "step": 15230 }, { "epoch": 0.33561949462063495, "grad_norm": 0.9264533519744873, "learning_rate": 2.323264437427079e-05, "loss": 0.095, "step": 15231 }, { "epoch": 0.3356415299101511, "grad_norm": 1.0248253345489502, "learning_rate": 2.3231749465638762e-05, "loss": 0.1055, "step": 15232 }, { "epoch": 0.3356635651996673, "grad_norm": 0.8950638771057129, "learning_rate": 2.3230854515078017e-05, "loss": 0.1083, "step": 15233 }, { "epoch": 0.33568560048918344, "grad_norm": 0.8556563854217529, "learning_rate": 2.3229959522593122e-05, "loss": 0.0916, "step": 15234 }, { "epoch": 0.3357076357786996, "grad_norm": 0.7505549788475037, "learning_rate": 2.3229064488188624e-05, "loss": 0.095, "step": 15235 }, { "epoch": 0.3357296710682158, "grad_norm": 0.8696319460868835, "learning_rate": 2.322816941186909e-05, "loss": 0.0837, "step": 15236 }, { "epoch": 0.3357517063577319, "grad_norm": 0.6468709111213684, "learning_rate": 2.3227274293639075e-05, "loss": 0.0821, "step": 15237 }, { "epoch": 0.33577374164724805, "grad_norm": 0.6138450503349304, "learning_rate": 2.3226379133503144e-05, "loss": 0.0737, "step": 15238 }, { "epoch": 0.3357957769367642, "grad_norm": 0.5773710012435913, "learning_rate": 2.3225483931465852e-05, "loss": 0.0969, "step": 15239 }, { "epoch": 0.3358178122262804, "grad_norm": 1.0645675659179688, "learning_rate": 2.3224588687531756e-05, "loss": 0.087, "step": 15240 }, { "epoch": 0.33583984751579654, "grad_norm": 0.9180561304092407, "learning_rate": 2.3223693401705423e-05, "loss": 0.1144, "step": 15241 }, { "epoch": 0.3358618828053127, "grad_norm": 0.6424855589866638, "learning_rate": 2.3222798073991406e-05, "loss": 0.1031, "step": 15242 }, { "epoch": 0.33588391809482887, "grad_norm": 0.7444846630096436, "learning_rate": 2.3221902704394274e-05, "loss": 0.1058, "step": 15243 }, { "epoch": 0.33590595338434504, "grad_norm": 0.5655965209007263, "learning_rate": 2.3221007292918582e-05, "loss": 0.066, "step": 15244 }, { "epoch": 0.3359279886738612, "grad_norm": 0.7713042497634888, "learning_rate": 2.3220111839568888e-05, "loss": 0.0931, "step": 15245 }, { "epoch": 0.33595002396337736, "grad_norm": 0.8003765940666199, "learning_rate": 2.3219216344349756e-05, "loss": 0.0864, "step": 15246 }, { "epoch": 0.33597205925289353, "grad_norm": 0.7560799717903137, "learning_rate": 2.3218320807265745e-05, "loss": 0.1041, "step": 15247 }, { "epoch": 0.3359940945424097, "grad_norm": 0.9796701073646545, "learning_rate": 2.3217425228321428e-05, "loss": 0.0853, "step": 15248 }, { "epoch": 0.3360161298319258, "grad_norm": 0.9560401439666748, "learning_rate": 2.3216529607521354e-05, "loss": 0.0957, "step": 15249 }, { "epoch": 0.33603816512144197, "grad_norm": 0.6126472353935242, "learning_rate": 2.3215633944870086e-05, "loss": 0.1173, "step": 15250 }, { "epoch": 0.33606020041095813, "grad_norm": 0.8547509908676147, "learning_rate": 2.3214738240372193e-05, "loss": 0.0786, "step": 15251 }, { "epoch": 0.3360822357004743, "grad_norm": 0.5509254336357117, "learning_rate": 2.321384249403223e-05, "loss": 0.1111, "step": 15252 }, { "epoch": 0.33610427098999046, "grad_norm": 0.7459842562675476, "learning_rate": 2.3212946705854763e-05, "loss": 0.1021, "step": 15253 }, { "epoch": 0.3361263062795066, "grad_norm": 0.596253514289856, "learning_rate": 2.3212050875844354e-05, "loss": 0.0956, "step": 15254 }, { "epoch": 0.3361483415690228, "grad_norm": 0.5983395576477051, "learning_rate": 2.3211155004005567e-05, "loss": 0.0876, "step": 15255 }, { "epoch": 0.33617037685853896, "grad_norm": 0.7631446719169617, "learning_rate": 2.3210259090342967e-05, "loss": 0.0807, "step": 15256 }, { "epoch": 0.3361924121480551, "grad_norm": 0.6672134399414062, "learning_rate": 2.320936313486111e-05, "loss": 0.1202, "step": 15257 }, { "epoch": 0.3362144474375713, "grad_norm": 0.6688974499702454, "learning_rate": 2.3208467137564567e-05, "loss": 0.0977, "step": 15258 }, { "epoch": 0.33623648272708745, "grad_norm": 0.5904684662818909, "learning_rate": 2.3207571098457895e-05, "loss": 0.1185, "step": 15259 }, { "epoch": 0.3362585180166036, "grad_norm": 1.1715377569198608, "learning_rate": 2.320667501754567e-05, "loss": 0.1153, "step": 15260 }, { "epoch": 0.3362805533061198, "grad_norm": 0.87514328956604, "learning_rate": 2.320577889483244e-05, "loss": 0.0922, "step": 15261 }, { "epoch": 0.3363025885956359, "grad_norm": 0.8463454246520996, "learning_rate": 2.320488273032278e-05, "loss": 0.0653, "step": 15262 }, { "epoch": 0.33632462388515205, "grad_norm": 0.6288938522338867, "learning_rate": 2.3203986524021258e-05, "loss": 0.1147, "step": 15263 }, { "epoch": 0.3363466591746682, "grad_norm": 0.8152939081192017, "learning_rate": 2.320309027593243e-05, "loss": 0.0877, "step": 15264 }, { "epoch": 0.3363686944641844, "grad_norm": 0.9245986342430115, "learning_rate": 2.3202193986060863e-05, "loss": 0.1124, "step": 15265 }, { "epoch": 0.33639072975370055, "grad_norm": 0.6691561341285706, "learning_rate": 2.3201297654411122e-05, "loss": 0.0799, "step": 15266 }, { "epoch": 0.3364127650432167, "grad_norm": 0.8892279267311096, "learning_rate": 2.3200401280987783e-05, "loss": 0.0817, "step": 15267 }, { "epoch": 0.3364348003327329, "grad_norm": 0.9649592638015747, "learning_rate": 2.3199504865795392e-05, "loss": 0.0886, "step": 15268 }, { "epoch": 0.33645683562224904, "grad_norm": 0.7462313175201416, "learning_rate": 2.319860840883853e-05, "loss": 0.1014, "step": 15269 }, { "epoch": 0.3364788709117652, "grad_norm": 0.35366693139076233, "learning_rate": 2.319771191012176e-05, "loss": 0.0804, "step": 15270 }, { "epoch": 0.33650090620128137, "grad_norm": 0.4111363887786865, "learning_rate": 2.319681536964965e-05, "loss": 0.0677, "step": 15271 }, { "epoch": 0.33652294149079753, "grad_norm": 0.7716854810714722, "learning_rate": 2.3195918787426756e-05, "loss": 0.0943, "step": 15272 }, { "epoch": 0.3365449767803137, "grad_norm": 0.5782283544540405, "learning_rate": 2.319502216345766e-05, "loss": 0.0768, "step": 15273 }, { "epoch": 0.3365670120698298, "grad_norm": 0.6690795421600342, "learning_rate": 2.3194125497746918e-05, "loss": 0.0738, "step": 15274 }, { "epoch": 0.33658904735934597, "grad_norm": 0.7114755511283875, "learning_rate": 2.3193228790299104e-05, "loss": 0.1, "step": 15275 }, { "epoch": 0.33661108264886214, "grad_norm": 0.769349992275238, "learning_rate": 2.3192332041118776e-05, "loss": 0.0877, "step": 15276 }, { "epoch": 0.3366331179383783, "grad_norm": 0.6879185438156128, "learning_rate": 2.3191435250210516e-05, "loss": 0.0862, "step": 15277 }, { "epoch": 0.33665515322789447, "grad_norm": 0.8984180688858032, "learning_rate": 2.3190538417578876e-05, "loss": 0.093, "step": 15278 }, { "epoch": 0.33667718851741063, "grad_norm": 0.8126903176307678, "learning_rate": 2.3189641543228437e-05, "loss": 0.1292, "step": 15279 }, { "epoch": 0.3366992238069268, "grad_norm": 0.9110827445983887, "learning_rate": 2.318874462716376e-05, "loss": 0.0888, "step": 15280 }, { "epoch": 0.33672125909644296, "grad_norm": 0.8078421354293823, "learning_rate": 2.3187847669389413e-05, "loss": 0.0892, "step": 15281 }, { "epoch": 0.3367432943859591, "grad_norm": 0.743539571762085, "learning_rate": 2.3186950669909972e-05, "loss": 0.0699, "step": 15282 }, { "epoch": 0.3367653296754753, "grad_norm": 0.7074874639511108, "learning_rate": 2.3186053628729995e-05, "loss": 0.0763, "step": 15283 }, { "epoch": 0.33678736496499145, "grad_norm": 0.6547445058822632, "learning_rate": 2.3185156545854065e-05, "loss": 0.1012, "step": 15284 }, { "epoch": 0.3368094002545076, "grad_norm": 0.6453456878662109, "learning_rate": 2.3184259421286736e-05, "loss": 0.091, "step": 15285 }, { "epoch": 0.3368314355440237, "grad_norm": 0.8473171591758728, "learning_rate": 2.318336225503259e-05, "loss": 0.1176, "step": 15286 }, { "epoch": 0.3368534708335399, "grad_norm": 1.0099260807037354, "learning_rate": 2.3182465047096192e-05, "loss": 0.1269, "step": 15287 }, { "epoch": 0.33687550612305606, "grad_norm": 0.7528693079948425, "learning_rate": 2.3181567797482108e-05, "loss": 0.0976, "step": 15288 }, { "epoch": 0.3368975414125722, "grad_norm": 1.0899832248687744, "learning_rate": 2.3180670506194913e-05, "loss": 0.116, "step": 15289 }, { "epoch": 0.3369195767020884, "grad_norm": 0.7530904412269592, "learning_rate": 2.3179773173239177e-05, "loss": 0.085, "step": 15290 }, { "epoch": 0.33694161199160455, "grad_norm": 0.6007997393608093, "learning_rate": 2.317887579861947e-05, "loss": 0.0647, "step": 15291 }, { "epoch": 0.3369636472811207, "grad_norm": 0.8654094934463501, "learning_rate": 2.3177978382340363e-05, "loss": 0.1261, "step": 15292 }, { "epoch": 0.3369856825706369, "grad_norm": 0.6702721118927002, "learning_rate": 2.3177080924406423e-05, "loss": 0.1054, "step": 15293 }, { "epoch": 0.33700771786015304, "grad_norm": 0.8108092546463013, "learning_rate": 2.317618342482223e-05, "loss": 0.0915, "step": 15294 }, { "epoch": 0.3370297531496692, "grad_norm": 0.5075280070304871, "learning_rate": 2.317528588359235e-05, "loss": 0.0974, "step": 15295 }, { "epoch": 0.3370517884391854, "grad_norm": 0.6118848919868469, "learning_rate": 2.3174388300721352e-05, "loss": 0.0757, "step": 15296 }, { "epoch": 0.33707382372870154, "grad_norm": 0.6518932580947876, "learning_rate": 2.3173490676213812e-05, "loss": 0.1077, "step": 15297 }, { "epoch": 0.3370958590182177, "grad_norm": 1.2055771350860596, "learning_rate": 2.3172593010074305e-05, "loss": 0.1272, "step": 15298 }, { "epoch": 0.3371178943077338, "grad_norm": 0.8598802089691162, "learning_rate": 2.3171695302307394e-05, "loss": 0.0837, "step": 15299 }, { "epoch": 0.33713992959725, "grad_norm": 0.962185263633728, "learning_rate": 2.3170797552917663e-05, "loss": 0.1257, "step": 15300 }, { "epoch": 0.33716196488676614, "grad_norm": 1.1290910243988037, "learning_rate": 2.3169899761909667e-05, "loss": 0.1274, "step": 15301 }, { "epoch": 0.3371840001762823, "grad_norm": 1.0017669200897217, "learning_rate": 2.3169001929288002e-05, "loss": 0.1244, "step": 15302 }, { "epoch": 0.33720603546579847, "grad_norm": 0.7178560495376587, "learning_rate": 2.3168104055057223e-05, "loss": 0.0943, "step": 15303 }, { "epoch": 0.33722807075531464, "grad_norm": 0.5218456983566284, "learning_rate": 2.3167206139221914e-05, "loss": 0.0727, "step": 15304 }, { "epoch": 0.3372501060448308, "grad_norm": 0.6260051727294922, "learning_rate": 2.3166308181786644e-05, "loss": 0.0479, "step": 15305 }, { "epoch": 0.33727214133434696, "grad_norm": 0.9513266086578369, "learning_rate": 2.3165410182755985e-05, "loss": 0.1114, "step": 15306 }, { "epoch": 0.33729417662386313, "grad_norm": 0.8327803611755371, "learning_rate": 2.3164512142134515e-05, "loss": 0.125, "step": 15307 }, { "epoch": 0.3373162119133793, "grad_norm": 0.6749945282936096, "learning_rate": 2.31636140599268e-05, "loss": 0.0983, "step": 15308 }, { "epoch": 0.33733824720289546, "grad_norm": 1.1557353734970093, "learning_rate": 2.316271593613743e-05, "loss": 0.1528, "step": 15309 }, { "epoch": 0.3373602824924116, "grad_norm": 0.8451303243637085, "learning_rate": 2.316181777077097e-05, "loss": 0.0906, "step": 15310 }, { "epoch": 0.33738231778192773, "grad_norm": 0.9078958630561829, "learning_rate": 2.316091956383199e-05, "loss": 0.1191, "step": 15311 }, { "epoch": 0.3374043530714439, "grad_norm": 0.5511519312858582, "learning_rate": 2.3160021315325073e-05, "loss": 0.0935, "step": 15312 }, { "epoch": 0.33742638836096006, "grad_norm": 0.8815454244613647, "learning_rate": 2.315912302525479e-05, "loss": 0.1136, "step": 15313 }, { "epoch": 0.3374484236504762, "grad_norm": 0.3603486716747284, "learning_rate": 2.315822469362572e-05, "loss": 0.0585, "step": 15314 }, { "epoch": 0.3374704589399924, "grad_norm": 0.9433907270431519, "learning_rate": 2.315732632044244e-05, "loss": 0.1206, "step": 15315 }, { "epoch": 0.33749249422950856, "grad_norm": 0.5553556084632874, "learning_rate": 2.315642790570952e-05, "loss": 0.0758, "step": 15316 }, { "epoch": 0.3375145295190247, "grad_norm": 0.8425436019897461, "learning_rate": 2.315552944943154e-05, "loss": 0.0997, "step": 15317 }, { "epoch": 0.3375365648085409, "grad_norm": 0.9620298743247986, "learning_rate": 2.3154630951613072e-05, "loss": 0.088, "step": 15318 }, { "epoch": 0.33755860009805705, "grad_norm": 0.7446882724761963, "learning_rate": 2.3153732412258697e-05, "loss": 0.0747, "step": 15319 }, { "epoch": 0.3375806353875732, "grad_norm": 0.5187138915061951, "learning_rate": 2.3152833831372992e-05, "loss": 0.1031, "step": 15320 }, { "epoch": 0.3376026706770894, "grad_norm": 0.4138396978378296, "learning_rate": 2.315193520896053e-05, "loss": 0.0841, "step": 15321 }, { "epoch": 0.33762470596660554, "grad_norm": 0.8589495420455933, "learning_rate": 2.3151036545025892e-05, "loss": 0.0933, "step": 15322 }, { "epoch": 0.33764674125612165, "grad_norm": 1.1669906377792358, "learning_rate": 2.3150137839573655e-05, "loss": 0.0983, "step": 15323 }, { "epoch": 0.3376687765456378, "grad_norm": 0.5954866409301758, "learning_rate": 2.3149239092608394e-05, "loss": 0.0796, "step": 15324 }, { "epoch": 0.337690811835154, "grad_norm": 1.155495047569275, "learning_rate": 2.3148340304134687e-05, "loss": 0.105, "step": 15325 }, { "epoch": 0.33771284712467015, "grad_norm": 0.8484395742416382, "learning_rate": 2.3147441474157114e-05, "loss": 0.0804, "step": 15326 }, { "epoch": 0.3377348824141863, "grad_norm": 0.5730507373809814, "learning_rate": 2.3146542602680257e-05, "loss": 0.0701, "step": 15327 }, { "epoch": 0.3377569177037025, "grad_norm": 0.6631130576133728, "learning_rate": 2.3145643689708685e-05, "loss": 0.0832, "step": 15328 }, { "epoch": 0.33777895299321864, "grad_norm": 0.6845743060112, "learning_rate": 2.314474473524698e-05, "loss": 0.0865, "step": 15329 }, { "epoch": 0.3378009882827348, "grad_norm": 0.7192583084106445, "learning_rate": 2.3143845739299728e-05, "loss": 0.0768, "step": 15330 }, { "epoch": 0.33782302357225097, "grad_norm": 0.3817969858646393, "learning_rate": 2.3142946701871498e-05, "loss": 0.0769, "step": 15331 }, { "epoch": 0.33784505886176713, "grad_norm": 0.62002032995224, "learning_rate": 2.314204762296688e-05, "loss": 0.0908, "step": 15332 }, { "epoch": 0.3378670941512833, "grad_norm": 0.5058645606040955, "learning_rate": 2.314114850259044e-05, "loss": 0.1111, "step": 15333 }, { "epoch": 0.33788912944079946, "grad_norm": 0.7084429860115051, "learning_rate": 2.3140249340746772e-05, "loss": 0.0963, "step": 15334 }, { "epoch": 0.3379111647303156, "grad_norm": 1.729294776916504, "learning_rate": 2.3139350137440444e-05, "loss": 0.094, "step": 15335 }, { "epoch": 0.33793320001983174, "grad_norm": 0.46501967310905457, "learning_rate": 2.313845089267604e-05, "loss": 0.0874, "step": 15336 }, { "epoch": 0.3379552353093479, "grad_norm": 0.8687354922294617, "learning_rate": 2.3137551606458148e-05, "loss": 0.1145, "step": 15337 }, { "epoch": 0.33797727059886407, "grad_norm": 0.6774877309799194, "learning_rate": 2.3136652278791337e-05, "loss": 0.0864, "step": 15338 }, { "epoch": 0.33799930588838023, "grad_norm": 0.6554986238479614, "learning_rate": 2.3135752909680194e-05, "loss": 0.1017, "step": 15339 }, { "epoch": 0.3380213411778964, "grad_norm": 0.45883291959762573, "learning_rate": 2.3134853499129298e-05, "loss": 0.0614, "step": 15340 }, { "epoch": 0.33804337646741256, "grad_norm": 0.9185265302658081, "learning_rate": 2.313395404714323e-05, "loss": 0.0953, "step": 15341 }, { "epoch": 0.3380654117569287, "grad_norm": 0.5866894721984863, "learning_rate": 2.3133054553726574e-05, "loss": 0.048, "step": 15342 }, { "epoch": 0.3380874470464449, "grad_norm": 1.053280234336853, "learning_rate": 2.313215501888391e-05, "loss": 0.1144, "step": 15343 }, { "epoch": 0.33810948233596105, "grad_norm": 0.7596219182014465, "learning_rate": 2.313125544261982e-05, "loss": 0.1054, "step": 15344 }, { "epoch": 0.3381315176254772, "grad_norm": 0.7377911806106567, "learning_rate": 2.3130355824938884e-05, "loss": 0.0889, "step": 15345 }, { "epoch": 0.3381535529149934, "grad_norm": 0.6120761632919312, "learning_rate": 2.312945616584569e-05, "loss": 0.0793, "step": 15346 }, { "epoch": 0.33817558820450955, "grad_norm": 0.7393002510070801, "learning_rate": 2.3128556465344808e-05, "loss": 0.0718, "step": 15347 }, { "epoch": 0.33819762349402566, "grad_norm": 0.5940440893173218, "learning_rate": 2.3127656723440837e-05, "loss": 0.103, "step": 15348 }, { "epoch": 0.3382196587835418, "grad_norm": 0.6687866449356079, "learning_rate": 2.3126756940138348e-05, "loss": 0.0895, "step": 15349 }, { "epoch": 0.338241694073058, "grad_norm": 0.5422927141189575, "learning_rate": 2.3125857115441928e-05, "loss": 0.0809, "step": 15350 }, { "epoch": 0.33826372936257415, "grad_norm": 1.0122461318969727, "learning_rate": 2.312495724935616e-05, "loss": 0.1035, "step": 15351 }, { "epoch": 0.3382857646520903, "grad_norm": 1.1034893989562988, "learning_rate": 2.3124057341885632e-05, "loss": 0.1162, "step": 15352 }, { "epoch": 0.3383077999416065, "grad_norm": 0.7360532879829407, "learning_rate": 2.3123157393034917e-05, "loss": 0.0846, "step": 15353 }, { "epoch": 0.33832983523112264, "grad_norm": 0.7713994383811951, "learning_rate": 2.312225740280861e-05, "loss": 0.0915, "step": 15354 }, { "epoch": 0.3383518705206388, "grad_norm": 0.8247507810592651, "learning_rate": 2.312135737121129e-05, "loss": 0.0865, "step": 15355 }, { "epoch": 0.338373905810155, "grad_norm": 0.7841312885284424, "learning_rate": 2.312045729824754e-05, "loss": 0.1006, "step": 15356 }, { "epoch": 0.33839594109967114, "grad_norm": 0.6784317493438721, "learning_rate": 2.311955718392195e-05, "loss": 0.1328, "step": 15357 }, { "epoch": 0.3384179763891873, "grad_norm": 0.8645599484443665, "learning_rate": 2.31186570282391e-05, "loss": 0.1198, "step": 15358 }, { "epoch": 0.33844001167870347, "grad_norm": 0.5792816281318665, "learning_rate": 2.3117756831203572e-05, "loss": 0.0892, "step": 15359 }, { "epoch": 0.3384620469682196, "grad_norm": 1.479508876800537, "learning_rate": 2.311685659281996e-05, "loss": 0.1241, "step": 15360 }, { "epoch": 0.33848408225773574, "grad_norm": 1.304220199584961, "learning_rate": 2.3115956313092844e-05, "loss": 0.121, "step": 15361 }, { "epoch": 0.3385061175472519, "grad_norm": 0.7213878035545349, "learning_rate": 2.311505599202681e-05, "loss": 0.0804, "step": 15362 }, { "epoch": 0.33852815283676807, "grad_norm": 0.8724250793457031, "learning_rate": 2.3114155629626444e-05, "loss": 0.0686, "step": 15363 }, { "epoch": 0.33855018812628423, "grad_norm": 0.4582046866416931, "learning_rate": 2.311325522589633e-05, "loss": 0.1052, "step": 15364 }, { "epoch": 0.3385722234158004, "grad_norm": 0.554619550704956, "learning_rate": 2.3112354780841063e-05, "loss": 0.0709, "step": 15365 }, { "epoch": 0.33859425870531656, "grad_norm": 0.40272289514541626, "learning_rate": 2.3111454294465215e-05, "loss": 0.0796, "step": 15366 }, { "epoch": 0.33861629399483273, "grad_norm": 0.5273189544677734, "learning_rate": 2.3110553766773384e-05, "loss": 0.0777, "step": 15367 }, { "epoch": 0.3386383292843489, "grad_norm": 0.6344074606895447, "learning_rate": 2.3109653197770153e-05, "loss": 0.1268, "step": 15368 }, { "epoch": 0.33866036457386506, "grad_norm": 0.733696699142456, "learning_rate": 2.310875258746011e-05, "loss": 0.0671, "step": 15369 }, { "epoch": 0.3386823998633812, "grad_norm": 0.8972806930541992, "learning_rate": 2.310785193584784e-05, "loss": 0.0977, "step": 15370 }, { "epoch": 0.3387044351528974, "grad_norm": 0.8099140524864197, "learning_rate": 2.310695124293794e-05, "loss": 0.1216, "step": 15371 }, { "epoch": 0.33872647044241355, "grad_norm": 0.7317430377006531, "learning_rate": 2.310605050873498e-05, "loss": 0.0951, "step": 15372 }, { "epoch": 0.33874850573192966, "grad_norm": 0.9293587803840637, "learning_rate": 2.3105149733243565e-05, "loss": 0.0999, "step": 15373 }, { "epoch": 0.3387705410214458, "grad_norm": 1.033528208732605, "learning_rate": 2.3104248916468276e-05, "loss": 0.097, "step": 15374 }, { "epoch": 0.338792576310962, "grad_norm": 1.0272722244262695, "learning_rate": 2.3103348058413693e-05, "loss": 0.1037, "step": 15375 }, { "epoch": 0.33881461160047815, "grad_norm": 0.7525886297225952, "learning_rate": 2.3102447159084422e-05, "loss": 0.1014, "step": 15376 }, { "epoch": 0.3388366468899943, "grad_norm": 0.7480970621109009, "learning_rate": 2.3101546218485035e-05, "loss": 0.0643, "step": 15377 }, { "epoch": 0.3388586821795105, "grad_norm": 0.7543812394142151, "learning_rate": 2.3100645236620135e-05, "loss": 0.092, "step": 15378 }, { "epoch": 0.33888071746902665, "grad_norm": 0.7585586309432983, "learning_rate": 2.3099744213494304e-05, "loss": 0.1051, "step": 15379 }, { "epoch": 0.3389027527585428, "grad_norm": 0.5769937634468079, "learning_rate": 2.309884314911213e-05, "loss": 0.0819, "step": 15380 }, { "epoch": 0.338924788048059, "grad_norm": 0.5480906367301941, "learning_rate": 2.309794204347821e-05, "loss": 0.091, "step": 15381 }, { "epoch": 0.33894682333757514, "grad_norm": 0.7300557494163513, "learning_rate": 2.3097040896597123e-05, "loss": 0.106, "step": 15382 }, { "epoch": 0.3389688586270913, "grad_norm": 0.8203446269035339, "learning_rate": 2.3096139708473465e-05, "loss": 0.0886, "step": 15383 }, { "epoch": 0.33899089391660747, "grad_norm": 0.4840013086795807, "learning_rate": 2.3095238479111833e-05, "loss": 0.0724, "step": 15384 }, { "epoch": 0.3390129292061236, "grad_norm": 0.5872516632080078, "learning_rate": 2.3094337208516802e-05, "loss": 0.0933, "step": 15385 }, { "epoch": 0.33903496449563975, "grad_norm": 0.5465506315231323, "learning_rate": 2.3093435896692975e-05, "loss": 0.0866, "step": 15386 }, { "epoch": 0.3390569997851559, "grad_norm": 0.6164981722831726, "learning_rate": 2.3092534543644935e-05, "loss": 0.0927, "step": 15387 }, { "epoch": 0.3390790350746721, "grad_norm": 0.710309624671936, "learning_rate": 2.3091633149377284e-05, "loss": 0.0688, "step": 15388 }, { "epoch": 0.33910107036418824, "grad_norm": 0.609295666217804, "learning_rate": 2.3090731713894605e-05, "loss": 0.0969, "step": 15389 }, { "epoch": 0.3391231056537044, "grad_norm": 0.8281307816505432, "learning_rate": 2.3089830237201483e-05, "loss": 0.0649, "step": 15390 }, { "epoch": 0.33914514094322057, "grad_norm": 0.7960008382797241, "learning_rate": 2.3088928719302525e-05, "loss": 0.122, "step": 15391 }, { "epoch": 0.33916717623273673, "grad_norm": 0.6791632771492004, "learning_rate": 2.3088027160202313e-05, "loss": 0.0973, "step": 15392 }, { "epoch": 0.3391892115222529, "grad_norm": 0.7217361927032471, "learning_rate": 2.308712555990544e-05, "loss": 0.0964, "step": 15393 }, { "epoch": 0.33921124681176906, "grad_norm": 0.5377988815307617, "learning_rate": 2.3086223918416505e-05, "loss": 0.0835, "step": 15394 }, { "epoch": 0.3392332821012852, "grad_norm": 0.5841768383979797, "learning_rate": 2.308532223574009e-05, "loss": 0.1062, "step": 15395 }, { "epoch": 0.3392553173908014, "grad_norm": 0.6612200736999512, "learning_rate": 2.3084420511880794e-05, "loss": 0.0708, "step": 15396 }, { "epoch": 0.3392773526803175, "grad_norm": 0.9334219098091125, "learning_rate": 2.308351874684321e-05, "loss": 0.0789, "step": 15397 }, { "epoch": 0.33929938796983367, "grad_norm": 0.7707300186157227, "learning_rate": 2.308261694063193e-05, "loss": 0.0993, "step": 15398 }, { "epoch": 0.33932142325934983, "grad_norm": 1.0475796461105347, "learning_rate": 2.308171509325155e-05, "loss": 0.0868, "step": 15399 }, { "epoch": 0.339343458548866, "grad_norm": 0.652442455291748, "learning_rate": 2.3080813204706657e-05, "loss": 0.0884, "step": 15400 }, { "epoch": 0.33936549383838216, "grad_norm": 0.4801042973995209, "learning_rate": 2.3079911275001856e-05, "loss": 0.1347, "step": 15401 }, { "epoch": 0.3393875291278983, "grad_norm": 1.008213996887207, "learning_rate": 2.3079009304141723e-05, "loss": 0.1388, "step": 15402 }, { "epoch": 0.3394095644174145, "grad_norm": 0.7080046534538269, "learning_rate": 2.307810729213087e-05, "loss": 0.0967, "step": 15403 }, { "epoch": 0.33943159970693065, "grad_norm": 0.7567164897918701, "learning_rate": 2.3077205238973887e-05, "loss": 0.0719, "step": 15404 }, { "epoch": 0.3394536349964468, "grad_norm": 0.7210429906845093, "learning_rate": 2.3076303144675365e-05, "loss": 0.1142, "step": 15405 }, { "epoch": 0.339475670285963, "grad_norm": 0.7638477683067322, "learning_rate": 2.3075401009239898e-05, "loss": 0.1054, "step": 15406 }, { "epoch": 0.33949770557547915, "grad_norm": 0.8196065425872803, "learning_rate": 2.3074498832672085e-05, "loss": 0.084, "step": 15407 }, { "epoch": 0.3395197408649953, "grad_norm": 0.6214743852615356, "learning_rate": 2.307359661497652e-05, "loss": 0.114, "step": 15408 }, { "epoch": 0.3395417761545115, "grad_norm": 0.6303545236587524, "learning_rate": 2.3072694356157795e-05, "loss": 0.0938, "step": 15409 }, { "epoch": 0.3395638114440276, "grad_norm": 0.763839066028595, "learning_rate": 2.3071792056220507e-05, "loss": 0.1176, "step": 15410 }, { "epoch": 0.33958584673354375, "grad_norm": 0.7268596291542053, "learning_rate": 2.3070889715169263e-05, "loss": 0.1061, "step": 15411 }, { "epoch": 0.3396078820230599, "grad_norm": 0.888904869556427, "learning_rate": 2.3069987333008644e-05, "loss": 0.1216, "step": 15412 }, { "epoch": 0.3396299173125761, "grad_norm": 0.9536369442939758, "learning_rate": 2.3069084909743254e-05, "loss": 0.1237, "step": 15413 }, { "epoch": 0.33965195260209224, "grad_norm": 0.8098581433296204, "learning_rate": 2.3068182445377684e-05, "loss": 0.0824, "step": 15414 }, { "epoch": 0.3396739878916084, "grad_norm": 0.8250192999839783, "learning_rate": 2.306727993991654e-05, "loss": 0.0617, "step": 15415 }, { "epoch": 0.3396960231811246, "grad_norm": 0.9695687890052795, "learning_rate": 2.3066377393364405e-05, "loss": 0.084, "step": 15416 }, { "epoch": 0.33971805847064074, "grad_norm": 0.47262123227119446, "learning_rate": 2.3065474805725888e-05, "loss": 0.0667, "step": 15417 }, { "epoch": 0.3397400937601569, "grad_norm": 1.088785171508789, "learning_rate": 2.3064572177005582e-05, "loss": 0.1088, "step": 15418 }, { "epoch": 0.33976212904967307, "grad_norm": 0.943839967250824, "learning_rate": 2.3063669507208085e-05, "loss": 0.0852, "step": 15419 }, { "epoch": 0.33978416433918923, "grad_norm": 0.8211948275566101, "learning_rate": 2.3062766796337997e-05, "loss": 0.0792, "step": 15420 }, { "epoch": 0.3398061996287054, "grad_norm": 0.8668825030326843, "learning_rate": 2.3061864044399917e-05, "loss": 0.087, "step": 15421 }, { "epoch": 0.3398282349182215, "grad_norm": 0.6689053773880005, "learning_rate": 2.306096125139844e-05, "loss": 0.0823, "step": 15422 }, { "epoch": 0.33985027020773767, "grad_norm": 0.44271236658096313, "learning_rate": 2.306005841733816e-05, "loss": 0.0625, "step": 15423 }, { "epoch": 0.33987230549725383, "grad_norm": 0.49332529306411743, "learning_rate": 2.3059155542223684e-05, "loss": 0.0906, "step": 15424 }, { "epoch": 0.33989434078677, "grad_norm": 0.762287974357605, "learning_rate": 2.3058252626059603e-05, "loss": 0.0706, "step": 15425 }, { "epoch": 0.33991637607628616, "grad_norm": 1.0182729959487915, "learning_rate": 2.3057349668850525e-05, "loss": 0.1042, "step": 15426 }, { "epoch": 0.33993841136580233, "grad_norm": 0.3670189082622528, "learning_rate": 2.305644667060104e-05, "loss": 0.0812, "step": 15427 }, { "epoch": 0.3399604466553185, "grad_norm": 1.0255850553512573, "learning_rate": 2.3055543631315757e-05, "loss": 0.0934, "step": 15428 }, { "epoch": 0.33998248194483466, "grad_norm": 0.5647116303443909, "learning_rate": 2.3054640550999266e-05, "loss": 0.0849, "step": 15429 }, { "epoch": 0.3400045172343508, "grad_norm": 0.8818351030349731, "learning_rate": 2.3053737429656172e-05, "loss": 0.0857, "step": 15430 }, { "epoch": 0.340026552523867, "grad_norm": 0.6048516035079956, "learning_rate": 2.3052834267291076e-05, "loss": 0.1011, "step": 15431 }, { "epoch": 0.34004858781338315, "grad_norm": 0.8734989762306213, "learning_rate": 2.305193106390858e-05, "loss": 0.1556, "step": 15432 }, { "epoch": 0.3400706231028993, "grad_norm": 0.6334683895111084, "learning_rate": 2.3051027819513274e-05, "loss": 0.0831, "step": 15433 }, { "epoch": 0.3400926583924154, "grad_norm": 0.8170569539070129, "learning_rate": 2.305012453410977e-05, "loss": 0.1164, "step": 15434 }, { "epoch": 0.3401146936819316, "grad_norm": 0.5809335112571716, "learning_rate": 2.3049221207702664e-05, "loss": 0.0521, "step": 15435 }, { "epoch": 0.34013672897144775, "grad_norm": 1.1811212301254272, "learning_rate": 2.304831784029656e-05, "loss": 0.1022, "step": 15436 }, { "epoch": 0.3401587642609639, "grad_norm": 0.5375023484230042, "learning_rate": 2.3047414431896056e-05, "loss": 0.0875, "step": 15437 }, { "epoch": 0.3401807995504801, "grad_norm": 0.8210951685905457, "learning_rate": 2.3046510982505755e-05, "loss": 0.0884, "step": 15438 }, { "epoch": 0.34020283483999625, "grad_norm": 0.7265130281448364, "learning_rate": 2.304560749213026e-05, "loss": 0.0892, "step": 15439 }, { "epoch": 0.3402248701295124, "grad_norm": 0.6790237426757812, "learning_rate": 2.304470396077417e-05, "loss": 0.0846, "step": 15440 }, { "epoch": 0.3402469054190286, "grad_norm": 0.535037636756897, "learning_rate": 2.3043800388442087e-05, "loss": 0.0748, "step": 15441 }, { "epoch": 0.34026894070854474, "grad_norm": 0.5696888566017151, "learning_rate": 2.3042896775138615e-05, "loss": 0.0883, "step": 15442 }, { "epoch": 0.3402909759980609, "grad_norm": 0.7450220584869385, "learning_rate": 2.3041993120868364e-05, "loss": 0.0713, "step": 15443 }, { "epoch": 0.34031301128757707, "grad_norm": 0.7072078585624695, "learning_rate": 2.3041089425635925e-05, "loss": 0.0755, "step": 15444 }, { "epoch": 0.34033504657709324, "grad_norm": 0.6433742046356201, "learning_rate": 2.3040185689445905e-05, "loss": 0.0885, "step": 15445 }, { "epoch": 0.3403570818666094, "grad_norm": 0.8580341935157776, "learning_rate": 2.3039281912302907e-05, "loss": 0.1277, "step": 15446 }, { "epoch": 0.3403791171561255, "grad_norm": 0.6191847324371338, "learning_rate": 2.3038378094211534e-05, "loss": 0.0777, "step": 15447 }, { "epoch": 0.3404011524456417, "grad_norm": 0.5666726231575012, "learning_rate": 2.3037474235176394e-05, "loss": 0.087, "step": 15448 }, { "epoch": 0.34042318773515784, "grad_norm": 0.9580427408218384, "learning_rate": 2.3036570335202088e-05, "loss": 0.1209, "step": 15449 }, { "epoch": 0.340445223024674, "grad_norm": 0.6406887173652649, "learning_rate": 2.3035666394293214e-05, "loss": 0.0826, "step": 15450 }, { "epoch": 0.34046725831419017, "grad_norm": 1.0630213022232056, "learning_rate": 2.303476241245439e-05, "loss": 0.0779, "step": 15451 }, { "epoch": 0.34048929360370633, "grad_norm": 0.858821451663971, "learning_rate": 2.303385838969021e-05, "loss": 0.089, "step": 15452 }, { "epoch": 0.3405113288932225, "grad_norm": 0.8725850582122803, "learning_rate": 2.303295432600528e-05, "loss": 0.1086, "step": 15453 }, { "epoch": 0.34053336418273866, "grad_norm": 0.7201847434043884, "learning_rate": 2.3032050221404203e-05, "loss": 0.0953, "step": 15454 }, { "epoch": 0.3405553994722548, "grad_norm": 0.8347505927085876, "learning_rate": 2.3031146075891594e-05, "loss": 0.0954, "step": 15455 }, { "epoch": 0.340577434761771, "grad_norm": 0.9569936394691467, "learning_rate": 2.3030241889472046e-05, "loss": 0.0856, "step": 15456 }, { "epoch": 0.34059947005128716, "grad_norm": 0.633286714553833, "learning_rate": 2.3029337662150173e-05, "loss": 0.0897, "step": 15457 }, { "epoch": 0.3406215053408033, "grad_norm": 0.7105488181114197, "learning_rate": 2.3028433393930573e-05, "loss": 0.0669, "step": 15458 }, { "epoch": 0.34064354063031943, "grad_norm": 0.8116397857666016, "learning_rate": 2.302752908481786e-05, "loss": 0.1054, "step": 15459 }, { "epoch": 0.3406655759198356, "grad_norm": 0.5727707147598267, "learning_rate": 2.302662473481664e-05, "loss": 0.1036, "step": 15460 }, { "epoch": 0.34068761120935176, "grad_norm": 0.7721270322799683, "learning_rate": 2.3025720343931515e-05, "loss": 0.0575, "step": 15461 }, { "epoch": 0.3407096464988679, "grad_norm": 0.4208921790122986, "learning_rate": 2.3024815912167085e-05, "loss": 0.0996, "step": 15462 }, { "epoch": 0.3407316817883841, "grad_norm": 0.6346296668052673, "learning_rate": 2.3023911439527966e-05, "loss": 0.0788, "step": 15463 }, { "epoch": 0.34075371707790025, "grad_norm": 1.014073371887207, "learning_rate": 2.302300692601877e-05, "loss": 0.1128, "step": 15464 }, { "epoch": 0.3407757523674164, "grad_norm": 0.7522448301315308, "learning_rate": 2.302210237164409e-05, "loss": 0.1092, "step": 15465 }, { "epoch": 0.3407977876569326, "grad_norm": 0.51325523853302, "learning_rate": 2.3021197776408547e-05, "loss": 0.0625, "step": 15466 }, { "epoch": 0.34081982294644875, "grad_norm": 0.6633250713348389, "learning_rate": 2.3020293140316745e-05, "loss": 0.0771, "step": 15467 }, { "epoch": 0.3408418582359649, "grad_norm": 0.821717381477356, "learning_rate": 2.3019388463373286e-05, "loss": 0.1063, "step": 15468 }, { "epoch": 0.3408638935254811, "grad_norm": 0.7330865859985352, "learning_rate": 2.3018483745582774e-05, "loss": 0.0917, "step": 15469 }, { "epoch": 0.34088592881499724, "grad_norm": 0.36547908186912537, "learning_rate": 2.3017578986949835e-05, "loss": 0.1073, "step": 15470 }, { "epoch": 0.3409079641045134, "grad_norm": 0.3475608825683594, "learning_rate": 2.301667418747906e-05, "loss": 0.0644, "step": 15471 }, { "epoch": 0.3409299993940295, "grad_norm": 0.3594878315925598, "learning_rate": 2.3015769347175064e-05, "loss": 0.0737, "step": 15472 }, { "epoch": 0.3409520346835457, "grad_norm": 0.7104803919792175, "learning_rate": 2.301486446604246e-05, "loss": 0.114, "step": 15473 }, { "epoch": 0.34097406997306184, "grad_norm": 0.7737597823143005, "learning_rate": 2.3013959544085852e-05, "loss": 0.0945, "step": 15474 }, { "epoch": 0.340996105262578, "grad_norm": 0.7553849220275879, "learning_rate": 2.301305458130985e-05, "loss": 0.069, "step": 15475 }, { "epoch": 0.3410181405520942, "grad_norm": 0.8553573489189148, "learning_rate": 2.3012149577719062e-05, "loss": 0.0942, "step": 15476 }, { "epoch": 0.34104017584161034, "grad_norm": 0.7705918550491333, "learning_rate": 2.3011244533318105e-05, "loss": 0.0848, "step": 15477 }, { "epoch": 0.3410622111311265, "grad_norm": 1.2280558347702026, "learning_rate": 2.3010339448111585e-05, "loss": 0.087, "step": 15478 }, { "epoch": 0.34108424642064267, "grad_norm": 0.7546740770339966, "learning_rate": 2.3009434322104102e-05, "loss": 0.0863, "step": 15479 }, { "epoch": 0.34110628171015883, "grad_norm": 1.104761004447937, "learning_rate": 2.3008529155300283e-05, "loss": 0.1007, "step": 15480 }, { "epoch": 0.341128316999675, "grad_norm": 0.8395060300827026, "learning_rate": 2.3007623947704723e-05, "loss": 0.1248, "step": 15481 }, { "epoch": 0.34115035228919116, "grad_norm": 0.7565115094184875, "learning_rate": 2.3006718699322047e-05, "loss": 0.1005, "step": 15482 }, { "epoch": 0.3411723875787073, "grad_norm": 0.7816988229751587, "learning_rate": 2.3005813410156855e-05, "loss": 0.0873, "step": 15483 }, { "epoch": 0.34119442286822343, "grad_norm": 0.5218232870101929, "learning_rate": 2.3004908080213765e-05, "loss": 0.0691, "step": 15484 }, { "epoch": 0.3412164581577396, "grad_norm": 0.9411409497261047, "learning_rate": 2.3004002709497386e-05, "loss": 0.1147, "step": 15485 }, { "epoch": 0.34123849344725576, "grad_norm": 0.4925355017185211, "learning_rate": 2.3003097298012325e-05, "loss": 0.082, "step": 15486 }, { "epoch": 0.34126052873677193, "grad_norm": 1.3879987001419067, "learning_rate": 2.30021918457632e-05, "loss": 0.102, "step": 15487 }, { "epoch": 0.3412825640262881, "grad_norm": 0.640841007232666, "learning_rate": 2.300128635275462e-05, "loss": 0.0983, "step": 15488 }, { "epoch": 0.34130459931580426, "grad_norm": 0.9405918717384338, "learning_rate": 2.30003808189912e-05, "loss": 0.1149, "step": 15489 }, { "epoch": 0.3413266346053204, "grad_norm": 0.8038412928581238, "learning_rate": 2.299947524447755e-05, "loss": 0.1157, "step": 15490 }, { "epoch": 0.3413486698948366, "grad_norm": 0.8225154280662537, "learning_rate": 2.2998569629218285e-05, "loss": 0.0818, "step": 15491 }, { "epoch": 0.34137070518435275, "grad_norm": 1.0282820463180542, "learning_rate": 2.2997663973218014e-05, "loss": 0.0616, "step": 15492 }, { "epoch": 0.3413927404738689, "grad_norm": 0.8624434471130371, "learning_rate": 2.299675827648135e-05, "loss": 0.1006, "step": 15493 }, { "epoch": 0.3414147757633851, "grad_norm": 0.7702829241752625, "learning_rate": 2.299585253901291e-05, "loss": 0.1045, "step": 15494 }, { "epoch": 0.34143681105290125, "grad_norm": 0.8750264048576355, "learning_rate": 2.2994946760817304e-05, "loss": 0.1011, "step": 15495 }, { "epoch": 0.34145884634241735, "grad_norm": 0.7235816717147827, "learning_rate": 2.2994040941899143e-05, "loss": 0.0963, "step": 15496 }, { "epoch": 0.3414808816319335, "grad_norm": 0.5253271460533142, "learning_rate": 2.299313508226305e-05, "loss": 0.0898, "step": 15497 }, { "epoch": 0.3415029169214497, "grad_norm": 0.48744797706604004, "learning_rate": 2.2992229181913633e-05, "loss": 0.0469, "step": 15498 }, { "epoch": 0.34152495221096585, "grad_norm": 0.6660159826278687, "learning_rate": 2.299132324085551e-05, "loss": 0.0749, "step": 15499 }, { "epoch": 0.341546987500482, "grad_norm": 0.6952356100082397, "learning_rate": 2.2990417259093286e-05, "loss": 0.1018, "step": 15500 }, { "epoch": 0.3415690227899982, "grad_norm": 0.5315253138542175, "learning_rate": 2.2989511236631587e-05, "loss": 0.1089, "step": 15501 }, { "epoch": 0.34159105807951434, "grad_norm": 0.9655186533927917, "learning_rate": 2.298860517347502e-05, "loss": 0.0852, "step": 15502 }, { "epoch": 0.3416130933690305, "grad_norm": 0.7778629660606384, "learning_rate": 2.2987699069628206e-05, "loss": 0.0713, "step": 15503 }, { "epoch": 0.34163512865854667, "grad_norm": 0.5770049691200256, "learning_rate": 2.2986792925095754e-05, "loss": 0.0829, "step": 15504 }, { "epoch": 0.34165716394806284, "grad_norm": 0.7824114561080933, "learning_rate": 2.298588673988228e-05, "loss": 0.0849, "step": 15505 }, { "epoch": 0.341679199237579, "grad_norm": 0.9751504063606262, "learning_rate": 2.2984980513992406e-05, "loss": 0.0605, "step": 15506 }, { "epoch": 0.34170123452709517, "grad_norm": 0.6794764399528503, "learning_rate": 2.298407424743075e-05, "loss": 0.0672, "step": 15507 }, { "epoch": 0.34172326981661133, "grad_norm": 0.5752079486846924, "learning_rate": 2.298316794020192e-05, "loss": 0.1152, "step": 15508 }, { "epoch": 0.34174530510612744, "grad_norm": 0.8829163908958435, "learning_rate": 2.2982261592310528e-05, "loss": 0.112, "step": 15509 }, { "epoch": 0.3417673403956436, "grad_norm": 0.6052986979484558, "learning_rate": 2.2981355203761202e-05, "loss": 0.0622, "step": 15510 }, { "epoch": 0.34178937568515977, "grad_norm": 1.1100064516067505, "learning_rate": 2.298044877455855e-05, "loss": 0.0835, "step": 15511 }, { "epoch": 0.34181141097467593, "grad_norm": 0.5274067521095276, "learning_rate": 2.29795423047072e-05, "loss": 0.0952, "step": 15512 }, { "epoch": 0.3418334462641921, "grad_norm": 0.612687349319458, "learning_rate": 2.2978635794211755e-05, "loss": 0.0549, "step": 15513 }, { "epoch": 0.34185548155370826, "grad_norm": 0.3802291452884674, "learning_rate": 2.2977729243076848e-05, "loss": 0.0501, "step": 15514 }, { "epoch": 0.3418775168432244, "grad_norm": 0.952269434928894, "learning_rate": 2.2976822651307083e-05, "loss": 0.1253, "step": 15515 }, { "epoch": 0.3418995521327406, "grad_norm": 0.593894898891449, "learning_rate": 2.297591601890708e-05, "loss": 0.0951, "step": 15516 }, { "epoch": 0.34192158742225676, "grad_norm": 0.9965342283248901, "learning_rate": 2.2975009345881463e-05, "loss": 0.1084, "step": 15517 }, { "epoch": 0.3419436227117729, "grad_norm": 0.6396911144256592, "learning_rate": 2.2974102632234844e-05, "loss": 0.0957, "step": 15518 }, { "epoch": 0.3419656580012891, "grad_norm": 0.7019976377487183, "learning_rate": 2.2973195877971847e-05, "loss": 0.0726, "step": 15519 }, { "epoch": 0.34198769329080525, "grad_norm": 0.5210567712783813, "learning_rate": 2.297228908309709e-05, "loss": 0.0783, "step": 15520 }, { "epoch": 0.34200972858032136, "grad_norm": 0.5376829504966736, "learning_rate": 2.2971382247615184e-05, "loss": 0.094, "step": 15521 }, { "epoch": 0.3420317638698375, "grad_norm": 0.7316378355026245, "learning_rate": 2.297047537153076e-05, "loss": 0.0847, "step": 15522 }, { "epoch": 0.3420537991593537, "grad_norm": 1.08315908908844, "learning_rate": 2.2969568454848426e-05, "loss": 0.1057, "step": 15523 }, { "epoch": 0.34207583444886985, "grad_norm": 0.745107889175415, "learning_rate": 2.2968661497572807e-05, "loss": 0.0744, "step": 15524 }, { "epoch": 0.342097869738386, "grad_norm": 0.9442466497421265, "learning_rate": 2.296775449970852e-05, "loss": 0.1091, "step": 15525 }, { "epoch": 0.3421199050279022, "grad_norm": 0.8091951012611389, "learning_rate": 2.296684746126019e-05, "loss": 0.1072, "step": 15526 }, { "epoch": 0.34214194031741835, "grad_norm": 0.7109048962593079, "learning_rate": 2.2965940382232433e-05, "loss": 0.0584, "step": 15527 }, { "epoch": 0.3421639756069345, "grad_norm": 0.6846466064453125, "learning_rate": 2.296503326262987e-05, "loss": 0.0777, "step": 15528 }, { "epoch": 0.3421860108964507, "grad_norm": 0.7158821821212769, "learning_rate": 2.296412610245712e-05, "loss": 0.0815, "step": 15529 }, { "epoch": 0.34220804618596684, "grad_norm": 0.8213937878608704, "learning_rate": 2.2963218901718804e-05, "loss": 0.1077, "step": 15530 }, { "epoch": 0.342230081475483, "grad_norm": 0.9434403777122498, "learning_rate": 2.296231166041955e-05, "loss": 0.1047, "step": 15531 }, { "epoch": 0.34225211676499917, "grad_norm": 0.7508414387702942, "learning_rate": 2.2961404378563972e-05, "loss": 0.0928, "step": 15532 }, { "epoch": 0.3422741520545153, "grad_norm": 0.5739777088165283, "learning_rate": 2.296049705615669e-05, "loss": 0.1136, "step": 15533 }, { "epoch": 0.34229618734403144, "grad_norm": 0.8396819829940796, "learning_rate": 2.295958969320232e-05, "loss": 0.1359, "step": 15534 }, { "epoch": 0.3423182226335476, "grad_norm": 0.5445755124092102, "learning_rate": 2.2958682289705504e-05, "loss": 0.0953, "step": 15535 }, { "epoch": 0.3423402579230638, "grad_norm": 0.7522442936897278, "learning_rate": 2.295777484567084e-05, "loss": 0.0936, "step": 15536 }, { "epoch": 0.34236229321257994, "grad_norm": 0.9533315300941467, "learning_rate": 2.295686736110297e-05, "loss": 0.1036, "step": 15537 }, { "epoch": 0.3423843285020961, "grad_norm": 0.9802852869033813, "learning_rate": 2.2955959836006502e-05, "loss": 0.1001, "step": 15538 }, { "epoch": 0.34240636379161227, "grad_norm": 0.7160168886184692, "learning_rate": 2.295505227038607e-05, "loss": 0.0961, "step": 15539 }, { "epoch": 0.34242839908112843, "grad_norm": 0.6115617156028748, "learning_rate": 2.2954144664246288e-05, "loss": 0.09, "step": 15540 }, { "epoch": 0.3424504343706446, "grad_norm": 0.9036780595779419, "learning_rate": 2.2953237017591784e-05, "loss": 0.0844, "step": 15541 }, { "epoch": 0.34247246966016076, "grad_norm": 0.5995199680328369, "learning_rate": 2.2952329330427173e-05, "loss": 0.0983, "step": 15542 }, { "epoch": 0.3424945049496769, "grad_norm": 0.6497060060501099, "learning_rate": 2.295142160275709e-05, "loss": 0.0968, "step": 15543 }, { "epoch": 0.3425165402391931, "grad_norm": 0.7219603061676025, "learning_rate": 2.2950513834586153e-05, "loss": 0.0903, "step": 15544 }, { "epoch": 0.34253857552870925, "grad_norm": 0.5726301074028015, "learning_rate": 2.2949606025918986e-05, "loss": 0.0823, "step": 15545 }, { "epoch": 0.34256061081822536, "grad_norm": 0.6934178471565247, "learning_rate": 2.2948698176760208e-05, "loss": 0.0717, "step": 15546 }, { "epoch": 0.34258264610774153, "grad_norm": 0.8252490758895874, "learning_rate": 2.294779028711445e-05, "loss": 0.1015, "step": 15547 }, { "epoch": 0.3426046813972577, "grad_norm": 0.8033679127693176, "learning_rate": 2.2946882356986335e-05, "loss": 0.1059, "step": 15548 }, { "epoch": 0.34262671668677386, "grad_norm": 0.5330632328987122, "learning_rate": 2.2945974386380485e-05, "loss": 0.1104, "step": 15549 }, { "epoch": 0.34264875197629, "grad_norm": 0.6267482042312622, "learning_rate": 2.2945066375301527e-05, "loss": 0.065, "step": 15550 }, { "epoch": 0.3426707872658062, "grad_norm": 0.9543006420135498, "learning_rate": 2.2944158323754087e-05, "loss": 0.0947, "step": 15551 }, { "epoch": 0.34269282255532235, "grad_norm": 0.7561203241348267, "learning_rate": 2.2943250231742784e-05, "loss": 0.1063, "step": 15552 }, { "epoch": 0.3427148578448385, "grad_norm": 0.6837005019187927, "learning_rate": 2.2942342099272252e-05, "loss": 0.1083, "step": 15553 }, { "epoch": 0.3427368931343547, "grad_norm": 0.7989661693572998, "learning_rate": 2.2941433926347114e-05, "loss": 0.0736, "step": 15554 }, { "epoch": 0.34275892842387085, "grad_norm": 0.8376283049583435, "learning_rate": 2.2940525712971992e-05, "loss": 0.0908, "step": 15555 }, { "epoch": 0.342780963713387, "grad_norm": 0.8776078224182129, "learning_rate": 2.2939617459151522e-05, "loss": 0.1082, "step": 15556 }, { "epoch": 0.3428029990029032, "grad_norm": 0.3893052935600281, "learning_rate": 2.293870916489031e-05, "loss": 0.0536, "step": 15557 }, { "epoch": 0.3428250342924193, "grad_norm": 0.5368766188621521, "learning_rate": 2.2937800830193006e-05, "loss": 0.0998, "step": 15558 }, { "epoch": 0.34284706958193545, "grad_norm": 0.7267708778381348, "learning_rate": 2.2936892455064214e-05, "loss": 0.0823, "step": 15559 }, { "epoch": 0.3428691048714516, "grad_norm": 0.5028982758522034, "learning_rate": 2.2935984039508583e-05, "loss": 0.0615, "step": 15560 }, { "epoch": 0.3428911401609678, "grad_norm": 0.6648319959640503, "learning_rate": 2.2935075583530726e-05, "loss": 0.0998, "step": 15561 }, { "epoch": 0.34291317545048394, "grad_norm": 1.0786126852035522, "learning_rate": 2.2934167087135273e-05, "loss": 0.1074, "step": 15562 }, { "epoch": 0.3429352107400001, "grad_norm": 1.0778255462646484, "learning_rate": 2.2933258550326855e-05, "loss": 0.0873, "step": 15563 }, { "epoch": 0.34295724602951627, "grad_norm": 0.8158806562423706, "learning_rate": 2.29323499731101e-05, "loss": 0.0961, "step": 15564 }, { "epoch": 0.34297928131903244, "grad_norm": 0.45148172974586487, "learning_rate": 2.2931441355489625e-05, "loss": 0.072, "step": 15565 }, { "epoch": 0.3430013166085486, "grad_norm": 0.4056069254875183, "learning_rate": 2.2930532697470067e-05, "loss": 0.0679, "step": 15566 }, { "epoch": 0.34302335189806477, "grad_norm": 0.7978208661079407, "learning_rate": 2.292962399905606e-05, "loss": 0.1105, "step": 15567 }, { "epoch": 0.34304538718758093, "grad_norm": 0.6281329989433289, "learning_rate": 2.292871526025222e-05, "loss": 0.1178, "step": 15568 }, { "epoch": 0.3430674224770971, "grad_norm": 0.9857895970344543, "learning_rate": 2.292780648106318e-05, "loss": 0.1141, "step": 15569 }, { "epoch": 0.3430894577666132, "grad_norm": 0.8963272571563721, "learning_rate": 2.292689766149358e-05, "loss": 0.0838, "step": 15570 }, { "epoch": 0.34311149305612937, "grad_norm": 0.7309543490409851, "learning_rate": 2.2925988801548028e-05, "loss": 0.0825, "step": 15571 }, { "epoch": 0.34313352834564553, "grad_norm": 0.5780864953994751, "learning_rate": 2.292507990123117e-05, "loss": 0.0935, "step": 15572 }, { "epoch": 0.3431555636351617, "grad_norm": 0.7853258848190308, "learning_rate": 2.2924170960547632e-05, "loss": 0.0836, "step": 15573 }, { "epoch": 0.34317759892467786, "grad_norm": 0.5280391573905945, "learning_rate": 2.292326197950204e-05, "loss": 0.0956, "step": 15574 }, { "epoch": 0.343199634214194, "grad_norm": 0.5804572105407715, "learning_rate": 2.2922352958099026e-05, "loss": 0.0837, "step": 15575 }, { "epoch": 0.3432216695037102, "grad_norm": 1.284508466720581, "learning_rate": 2.2921443896343216e-05, "loss": 0.1232, "step": 15576 }, { "epoch": 0.34324370479322636, "grad_norm": 0.8415805101394653, "learning_rate": 2.2920534794239252e-05, "loss": 0.0605, "step": 15577 }, { "epoch": 0.3432657400827425, "grad_norm": 0.6701679825782776, "learning_rate": 2.291962565179175e-05, "loss": 0.0902, "step": 15578 }, { "epoch": 0.3432877753722587, "grad_norm": 0.9444369673728943, "learning_rate": 2.2918716469005357e-05, "loss": 0.0782, "step": 15579 }, { "epoch": 0.34330981066177485, "grad_norm": 0.675673246383667, "learning_rate": 2.2917807245884685e-05, "loss": 0.0913, "step": 15580 }, { "epoch": 0.343331845951291, "grad_norm": 0.7893633246421814, "learning_rate": 2.291689798243438e-05, "loss": 0.1287, "step": 15581 }, { "epoch": 0.3433538812408072, "grad_norm": 0.4009445309638977, "learning_rate": 2.2915988678659066e-05, "loss": 0.0789, "step": 15582 }, { "epoch": 0.3433759165303233, "grad_norm": 0.6571391224861145, "learning_rate": 2.291507933456338e-05, "loss": 0.0601, "step": 15583 }, { "epoch": 0.34339795181983945, "grad_norm": 0.7390123009681702, "learning_rate": 2.2914169950151948e-05, "loss": 0.081, "step": 15584 }, { "epoch": 0.3434199871093556, "grad_norm": 0.5977465510368347, "learning_rate": 2.2913260525429405e-05, "loss": 0.0642, "step": 15585 }, { "epoch": 0.3434420223988718, "grad_norm": 1.2604660987854004, "learning_rate": 2.2912351060400378e-05, "loss": 0.1208, "step": 15586 }, { "epoch": 0.34346405768838795, "grad_norm": 0.5740699768066406, "learning_rate": 2.291144155506951e-05, "loss": 0.0962, "step": 15587 }, { "epoch": 0.3434860929779041, "grad_norm": 0.8870011568069458, "learning_rate": 2.291053200944143e-05, "loss": 0.0955, "step": 15588 }, { "epoch": 0.3435081282674203, "grad_norm": 0.48593243956565857, "learning_rate": 2.290962242352076e-05, "loss": 0.0785, "step": 15589 }, { "epoch": 0.34353016355693644, "grad_norm": 0.6885992288589478, "learning_rate": 2.290871279731215e-05, "loss": 0.1169, "step": 15590 }, { "epoch": 0.3435521988464526, "grad_norm": 0.648871660232544, "learning_rate": 2.290780313082022e-05, "loss": 0.0601, "step": 15591 }, { "epoch": 0.34357423413596877, "grad_norm": 0.9507111310958862, "learning_rate": 2.2906893424049606e-05, "loss": 0.1023, "step": 15592 }, { "epoch": 0.34359626942548493, "grad_norm": 0.8608511686325073, "learning_rate": 2.290598367700495e-05, "loss": 0.1039, "step": 15593 }, { "epoch": 0.3436183047150011, "grad_norm": 0.6385560035705566, "learning_rate": 2.2905073889690876e-05, "loss": 0.0806, "step": 15594 }, { "epoch": 0.3436403400045172, "grad_norm": 0.8681311011314392, "learning_rate": 2.2904164062112023e-05, "loss": 0.1018, "step": 15595 }, { "epoch": 0.3436623752940334, "grad_norm": 0.768975555896759, "learning_rate": 2.2903254194273023e-05, "loss": 0.1063, "step": 15596 }, { "epoch": 0.34368441058354954, "grad_norm": 0.8557432889938354, "learning_rate": 2.290234428617851e-05, "loss": 0.09, "step": 15597 }, { "epoch": 0.3437064458730657, "grad_norm": 0.3185635209083557, "learning_rate": 2.290143433783312e-05, "loss": 0.0707, "step": 15598 }, { "epoch": 0.34372848116258187, "grad_norm": 0.6924816966056824, "learning_rate": 2.290052434924149e-05, "loss": 0.0674, "step": 15599 }, { "epoch": 0.34375051645209803, "grad_norm": 0.6414652466773987, "learning_rate": 2.2899614320408252e-05, "loss": 0.1059, "step": 15600 }, { "epoch": 0.3437725517416142, "grad_norm": 0.7544388175010681, "learning_rate": 2.2898704251338042e-05, "loss": 0.0905, "step": 15601 }, { "epoch": 0.34379458703113036, "grad_norm": 0.5999082326889038, "learning_rate": 2.2897794142035495e-05, "loss": 0.1134, "step": 15602 }, { "epoch": 0.3438166223206465, "grad_norm": 1.1945233345031738, "learning_rate": 2.2896883992505253e-05, "loss": 0.0884, "step": 15603 }, { "epoch": 0.3438386576101627, "grad_norm": 0.3469068109989166, "learning_rate": 2.289597380275194e-05, "loss": 0.0649, "step": 15604 }, { "epoch": 0.34386069289967885, "grad_norm": 0.7007250189781189, "learning_rate": 2.28950635727802e-05, "loss": 0.0467, "step": 15605 }, { "epoch": 0.343882728189195, "grad_norm": 0.803195059299469, "learning_rate": 2.289415330259467e-05, "loss": 0.1049, "step": 15606 }, { "epoch": 0.34390476347871113, "grad_norm": 0.5449765920639038, "learning_rate": 2.289324299219998e-05, "loss": 0.0757, "step": 15607 }, { "epoch": 0.3439267987682273, "grad_norm": 0.7146263122558594, "learning_rate": 2.2892332641600768e-05, "loss": 0.0996, "step": 15608 }, { "epoch": 0.34394883405774346, "grad_norm": 0.7846525311470032, "learning_rate": 2.2891422250801678e-05, "loss": 0.1041, "step": 15609 }, { "epoch": 0.3439708693472596, "grad_norm": 0.627589225769043, "learning_rate": 2.2890511819807344e-05, "loss": 0.0693, "step": 15610 }, { "epoch": 0.3439929046367758, "grad_norm": 0.6763204336166382, "learning_rate": 2.28896013486224e-05, "loss": 0.072, "step": 15611 }, { "epoch": 0.34401493992629195, "grad_norm": 1.1823879480361938, "learning_rate": 2.2888690837251488e-05, "loss": 0.1221, "step": 15612 }, { "epoch": 0.3440369752158081, "grad_norm": 0.6856310367584229, "learning_rate": 2.288778028569924e-05, "loss": 0.07, "step": 15613 }, { "epoch": 0.3440590105053243, "grad_norm": 0.7949039340019226, "learning_rate": 2.2886869693970297e-05, "loss": 0.1003, "step": 15614 }, { "epoch": 0.34408104579484045, "grad_norm": 0.8826439380645752, "learning_rate": 2.28859590620693e-05, "loss": 0.0887, "step": 15615 }, { "epoch": 0.3441030810843566, "grad_norm": 0.772039532661438, "learning_rate": 2.2885048390000884e-05, "loss": 0.0701, "step": 15616 }, { "epoch": 0.3441251163738728, "grad_norm": 0.7404252886772156, "learning_rate": 2.2884137677769687e-05, "loss": 0.0711, "step": 15617 }, { "epoch": 0.34414715166338894, "grad_norm": 0.5740768313407898, "learning_rate": 2.2883226925380353e-05, "loss": 0.1349, "step": 15618 }, { "epoch": 0.3441691869529051, "grad_norm": 0.9480957984924316, "learning_rate": 2.2882316132837514e-05, "loss": 0.0985, "step": 15619 }, { "epoch": 0.3441912222424212, "grad_norm": 1.1255013942718506, "learning_rate": 2.288140530014581e-05, "loss": 0.0961, "step": 15620 }, { "epoch": 0.3442132575319374, "grad_norm": 0.9795185327529907, "learning_rate": 2.2880494427309886e-05, "loss": 0.1776, "step": 15621 }, { "epoch": 0.34423529282145354, "grad_norm": 0.8315873146057129, "learning_rate": 2.2879583514334374e-05, "loss": 0.0981, "step": 15622 }, { "epoch": 0.3442573281109697, "grad_norm": 0.6021668910980225, "learning_rate": 2.287867256122392e-05, "loss": 0.0911, "step": 15623 }, { "epoch": 0.34427936340048587, "grad_norm": 0.6450057029724121, "learning_rate": 2.2877761567983164e-05, "loss": 0.0722, "step": 15624 }, { "epoch": 0.34430139869000204, "grad_norm": 0.6191999316215515, "learning_rate": 2.287685053461674e-05, "loss": 0.0742, "step": 15625 }, { "epoch": 0.3443234339795182, "grad_norm": 0.774321973323822, "learning_rate": 2.2875939461129296e-05, "loss": 0.0718, "step": 15626 }, { "epoch": 0.34434546926903437, "grad_norm": 0.8857584595680237, "learning_rate": 2.287502834752547e-05, "loss": 0.1124, "step": 15627 }, { "epoch": 0.34436750455855053, "grad_norm": 0.5226861834526062, "learning_rate": 2.2874117193809895e-05, "loss": 0.0647, "step": 15628 }, { "epoch": 0.3443895398480667, "grad_norm": 1.0055004358291626, "learning_rate": 2.2873205999987223e-05, "loss": 0.0919, "step": 15629 }, { "epoch": 0.34441157513758286, "grad_norm": 0.8675538301467896, "learning_rate": 2.287229476606209e-05, "loss": 0.0814, "step": 15630 }, { "epoch": 0.344433610427099, "grad_norm": 0.7394942045211792, "learning_rate": 2.287138349203914e-05, "loss": 0.1048, "step": 15631 }, { "epoch": 0.34445564571661513, "grad_norm": 0.8911638259887695, "learning_rate": 2.2870472177923012e-05, "loss": 0.0897, "step": 15632 }, { "epoch": 0.3444776810061313, "grad_norm": 0.6990329027175903, "learning_rate": 2.2869560823718348e-05, "loss": 0.077, "step": 15633 }, { "epoch": 0.34449971629564746, "grad_norm": 0.6582764983177185, "learning_rate": 2.2868649429429794e-05, "loss": 0.0773, "step": 15634 }, { "epoch": 0.3445217515851636, "grad_norm": 0.5917112827301025, "learning_rate": 2.286773799506199e-05, "loss": 0.0676, "step": 15635 }, { "epoch": 0.3445437868746798, "grad_norm": 0.8840169310569763, "learning_rate": 2.2866826520619565e-05, "loss": 0.0904, "step": 15636 }, { "epoch": 0.34456582216419596, "grad_norm": 0.6926472783088684, "learning_rate": 2.2865915006107187e-05, "loss": 0.1122, "step": 15637 }, { "epoch": 0.3445878574537121, "grad_norm": 0.4054352343082428, "learning_rate": 2.286500345152948e-05, "loss": 0.0847, "step": 15638 }, { "epoch": 0.3446098927432283, "grad_norm": 0.5931341052055359, "learning_rate": 2.2864091856891095e-05, "loss": 0.0982, "step": 15639 }, { "epoch": 0.34463192803274445, "grad_norm": 1.297052025794983, "learning_rate": 2.286318022219667e-05, "loss": 0.1142, "step": 15640 }, { "epoch": 0.3446539633222606, "grad_norm": 0.773459255695343, "learning_rate": 2.286226854745085e-05, "loss": 0.1292, "step": 15641 }, { "epoch": 0.3446759986117768, "grad_norm": 0.722446620464325, "learning_rate": 2.286135683265829e-05, "loss": 0.0974, "step": 15642 }, { "epoch": 0.34469803390129294, "grad_norm": 0.7119460701942444, "learning_rate": 2.2860445077823617e-05, "loss": 0.1088, "step": 15643 }, { "epoch": 0.34472006919080905, "grad_norm": 0.9515073299407959, "learning_rate": 2.2859533282951485e-05, "loss": 0.0778, "step": 15644 }, { "epoch": 0.3447421044803252, "grad_norm": 0.9982091784477234, "learning_rate": 2.285862144804653e-05, "loss": 0.1189, "step": 15645 }, { "epoch": 0.3447641397698414, "grad_norm": 0.7424820065498352, "learning_rate": 2.2857709573113406e-05, "loss": 0.0772, "step": 15646 }, { "epoch": 0.34478617505935755, "grad_norm": 0.6288601756095886, "learning_rate": 2.2856797658156753e-05, "loss": 0.1072, "step": 15647 }, { "epoch": 0.3448082103488737, "grad_norm": 1.3799757957458496, "learning_rate": 2.2855885703181218e-05, "loss": 0.1076, "step": 15648 }, { "epoch": 0.3448302456383899, "grad_norm": 1.1448177099227905, "learning_rate": 2.285497370819144e-05, "loss": 0.1221, "step": 15649 }, { "epoch": 0.34485228092790604, "grad_norm": 0.8506858348846436, "learning_rate": 2.2854061673192075e-05, "loss": 0.1285, "step": 15650 }, { "epoch": 0.3448743162174222, "grad_norm": 0.859228789806366, "learning_rate": 2.285314959818776e-05, "loss": 0.0957, "step": 15651 }, { "epoch": 0.34489635150693837, "grad_norm": 1.2588986158370972, "learning_rate": 2.285223748318314e-05, "loss": 0.114, "step": 15652 }, { "epoch": 0.34491838679645453, "grad_norm": 0.999616265296936, "learning_rate": 2.2851325328182866e-05, "loss": 0.0819, "step": 15653 }, { "epoch": 0.3449404220859707, "grad_norm": 0.4330684244632721, "learning_rate": 2.285041313319158e-05, "loss": 0.1215, "step": 15654 }, { "epoch": 0.34496245737548686, "grad_norm": 0.681854248046875, "learning_rate": 2.284950089821393e-05, "loss": 0.1074, "step": 15655 }, { "epoch": 0.34498449266500303, "grad_norm": 0.7222291827201843, "learning_rate": 2.2848588623254568e-05, "loss": 0.073, "step": 15656 }, { "epoch": 0.34500652795451914, "grad_norm": 1.0493165254592896, "learning_rate": 2.2847676308318132e-05, "loss": 0.0857, "step": 15657 }, { "epoch": 0.3450285632440353, "grad_norm": 0.9473996758460999, "learning_rate": 2.284676395340927e-05, "loss": 0.1333, "step": 15658 }, { "epoch": 0.34505059853355147, "grad_norm": 0.6593798398971558, "learning_rate": 2.2845851558532635e-05, "loss": 0.1019, "step": 15659 }, { "epoch": 0.34507263382306763, "grad_norm": 0.45557713508605957, "learning_rate": 2.2844939123692867e-05, "loss": 0.099, "step": 15660 }, { "epoch": 0.3450946691125838, "grad_norm": 0.7165402770042419, "learning_rate": 2.284402664889462e-05, "loss": 0.0658, "step": 15661 }, { "epoch": 0.34511670440209996, "grad_norm": 1.1095608472824097, "learning_rate": 2.284311413414254e-05, "loss": 0.1075, "step": 15662 }, { "epoch": 0.3451387396916161, "grad_norm": 1.1296813488006592, "learning_rate": 2.284220157944127e-05, "loss": 0.1093, "step": 15663 }, { "epoch": 0.3451607749811323, "grad_norm": 2.553605318069458, "learning_rate": 2.2841288984795465e-05, "loss": 0.0696, "step": 15664 }, { "epoch": 0.34518281027064845, "grad_norm": 0.8389136791229248, "learning_rate": 2.2840376350209767e-05, "loss": 0.0869, "step": 15665 }, { "epoch": 0.3452048455601646, "grad_norm": 0.7502111792564392, "learning_rate": 2.2839463675688834e-05, "loss": 0.0916, "step": 15666 }, { "epoch": 0.3452268808496808, "grad_norm": 1.7609894275665283, "learning_rate": 2.2838550961237304e-05, "loss": 0.1145, "step": 15667 }, { "epoch": 0.34524891613919695, "grad_norm": 0.6749935150146484, "learning_rate": 2.283763820685983e-05, "loss": 0.0991, "step": 15668 }, { "epoch": 0.34527095142871306, "grad_norm": 1.1698124408721924, "learning_rate": 2.2836725412561064e-05, "loss": 0.097, "step": 15669 }, { "epoch": 0.3452929867182292, "grad_norm": 0.4020954668521881, "learning_rate": 2.2835812578345655e-05, "loss": 0.0495, "step": 15670 }, { "epoch": 0.3453150220077454, "grad_norm": 0.752305805683136, "learning_rate": 2.2834899704218247e-05, "loss": 0.1467, "step": 15671 }, { "epoch": 0.34533705729726155, "grad_norm": 0.7195764183998108, "learning_rate": 2.2833986790183494e-05, "loss": 0.0825, "step": 15672 }, { "epoch": 0.3453590925867777, "grad_norm": 0.723987877368927, "learning_rate": 2.2833073836246046e-05, "loss": 0.0953, "step": 15673 }, { "epoch": 0.3453811278762939, "grad_norm": 0.9939140677452087, "learning_rate": 2.2832160842410555e-05, "loss": 0.1058, "step": 15674 }, { "epoch": 0.34540316316581005, "grad_norm": 0.5951517820358276, "learning_rate": 2.2831247808681665e-05, "loss": 0.1002, "step": 15675 }, { "epoch": 0.3454251984553262, "grad_norm": 1.1146570444107056, "learning_rate": 2.283033473506403e-05, "loss": 0.1267, "step": 15676 }, { "epoch": 0.3454472337448424, "grad_norm": 0.6505219340324402, "learning_rate": 2.2829421621562307e-05, "loss": 0.1253, "step": 15677 }, { "epoch": 0.34546926903435854, "grad_norm": 1.028485894203186, "learning_rate": 2.2828508468181133e-05, "loss": 0.0882, "step": 15678 }, { "epoch": 0.3454913043238747, "grad_norm": 0.7587677240371704, "learning_rate": 2.2827595274925174e-05, "loss": 0.0763, "step": 15679 }, { "epoch": 0.34551333961339087, "grad_norm": 0.6700853705406189, "learning_rate": 2.282668204179907e-05, "loss": 0.0905, "step": 15680 }, { "epoch": 0.34553537490290703, "grad_norm": 0.8261275291442871, "learning_rate": 2.2825768768807483e-05, "loss": 0.0906, "step": 15681 }, { "epoch": 0.34555741019242314, "grad_norm": 0.6887623071670532, "learning_rate": 2.2824855455955056e-05, "loss": 0.0951, "step": 15682 }, { "epoch": 0.3455794454819393, "grad_norm": 0.35434210300445557, "learning_rate": 2.2823942103246447e-05, "loss": 0.0716, "step": 15683 }, { "epoch": 0.34560148077145547, "grad_norm": 0.5023024678230286, "learning_rate": 2.28230287106863e-05, "loss": 0.0806, "step": 15684 }, { "epoch": 0.34562351606097164, "grad_norm": 0.7735083699226379, "learning_rate": 2.2822115278279275e-05, "loss": 0.0879, "step": 15685 }, { "epoch": 0.3456455513504878, "grad_norm": 0.756040096282959, "learning_rate": 2.2821201806030017e-05, "loss": 0.1043, "step": 15686 }, { "epoch": 0.34566758664000397, "grad_norm": 0.7780510783195496, "learning_rate": 2.282028829394319e-05, "loss": 0.102, "step": 15687 }, { "epoch": 0.34568962192952013, "grad_norm": 0.9141318202018738, "learning_rate": 2.2819374742023443e-05, "loss": 0.079, "step": 15688 }, { "epoch": 0.3457116572190363, "grad_norm": 0.679362952709198, "learning_rate": 2.2818461150275424e-05, "loss": 0.0765, "step": 15689 }, { "epoch": 0.34573369250855246, "grad_norm": 0.6888589859008789, "learning_rate": 2.2817547518703795e-05, "loss": 0.097, "step": 15690 }, { "epoch": 0.3457557277980686, "grad_norm": 0.5590100884437561, "learning_rate": 2.2816633847313198e-05, "loss": 0.0588, "step": 15691 }, { "epoch": 0.3457777630875848, "grad_norm": 1.0729056596755981, "learning_rate": 2.28157201361083e-05, "loss": 0.114, "step": 15692 }, { "epoch": 0.34579979837710095, "grad_norm": 0.817478358745575, "learning_rate": 2.2814806385093738e-05, "loss": 0.1099, "step": 15693 }, { "epoch": 0.34582183366661706, "grad_norm": 1.1639045476913452, "learning_rate": 2.2813892594274186e-05, "loss": 0.1134, "step": 15694 }, { "epoch": 0.3458438689561332, "grad_norm": 0.6993629336357117, "learning_rate": 2.2812978763654283e-05, "loss": 0.0835, "step": 15695 }, { "epoch": 0.3458659042456494, "grad_norm": 0.6665972471237183, "learning_rate": 2.2812064893238692e-05, "loss": 0.0859, "step": 15696 }, { "epoch": 0.34588793953516556, "grad_norm": 0.7882569432258606, "learning_rate": 2.2811150983032062e-05, "loss": 0.0735, "step": 15697 }, { "epoch": 0.3459099748246817, "grad_norm": 0.8256979584693909, "learning_rate": 2.2810237033039057e-05, "loss": 0.074, "step": 15698 }, { "epoch": 0.3459320101141979, "grad_norm": 0.7428344488143921, "learning_rate": 2.2809323043264324e-05, "loss": 0.0829, "step": 15699 }, { "epoch": 0.34595404540371405, "grad_norm": 0.6704764366149902, "learning_rate": 2.2808409013712523e-05, "loss": 0.0717, "step": 15700 }, { "epoch": 0.3459760806932302, "grad_norm": 0.6634418964385986, "learning_rate": 2.2807494944388304e-05, "loss": 0.058, "step": 15701 }, { "epoch": 0.3459981159827464, "grad_norm": 1.030586838722229, "learning_rate": 2.2806580835296328e-05, "loss": 0.1016, "step": 15702 }, { "epoch": 0.34602015127226254, "grad_norm": 0.7295882105827332, "learning_rate": 2.280566668644125e-05, "loss": 0.1239, "step": 15703 }, { "epoch": 0.3460421865617787, "grad_norm": 0.7517279386520386, "learning_rate": 2.2804752497827725e-05, "loss": 0.1079, "step": 15704 }, { "epoch": 0.3460642218512949, "grad_norm": 0.7119961977005005, "learning_rate": 2.280383826946041e-05, "loss": 0.1069, "step": 15705 }, { "epoch": 0.346086257140811, "grad_norm": 0.5699567198753357, "learning_rate": 2.2802924001343964e-05, "loss": 0.0822, "step": 15706 }, { "epoch": 0.34610829243032715, "grad_norm": 1.1085342168807983, "learning_rate": 2.2802009693483042e-05, "loss": 0.0884, "step": 15707 }, { "epoch": 0.3461303277198433, "grad_norm": 1.0291794538497925, "learning_rate": 2.2801095345882296e-05, "loss": 0.0966, "step": 15708 }, { "epoch": 0.3461523630093595, "grad_norm": 0.5764392018318176, "learning_rate": 2.2800180958546394e-05, "loss": 0.1016, "step": 15709 }, { "epoch": 0.34617439829887564, "grad_norm": 0.8211067914962769, "learning_rate": 2.2799266531479985e-05, "loss": 0.0812, "step": 15710 }, { "epoch": 0.3461964335883918, "grad_norm": 0.434322714805603, "learning_rate": 2.279835206468773e-05, "loss": 0.1146, "step": 15711 }, { "epoch": 0.34621846887790797, "grad_norm": 0.7233543395996094, "learning_rate": 2.2797437558174286e-05, "loss": 0.094, "step": 15712 }, { "epoch": 0.34624050416742413, "grad_norm": 0.7323859333992004, "learning_rate": 2.2796523011944307e-05, "loss": 0.1237, "step": 15713 }, { "epoch": 0.3462625394569403, "grad_norm": 0.7664365768432617, "learning_rate": 2.2795608426002462e-05, "loss": 0.0771, "step": 15714 }, { "epoch": 0.34628457474645646, "grad_norm": 0.3627753257751465, "learning_rate": 2.2794693800353403e-05, "loss": 0.065, "step": 15715 }, { "epoch": 0.34630661003597263, "grad_norm": 0.8003020882606506, "learning_rate": 2.2793779135001786e-05, "loss": 0.0652, "step": 15716 }, { "epoch": 0.3463286453254888, "grad_norm": 0.4311179220676422, "learning_rate": 2.279286442995227e-05, "loss": 0.1012, "step": 15717 }, { "epoch": 0.34635068061500496, "grad_norm": 0.5380690693855286, "learning_rate": 2.2791949685209517e-05, "loss": 0.0949, "step": 15718 }, { "epoch": 0.34637271590452107, "grad_norm": 0.6565305590629578, "learning_rate": 2.279103490077819e-05, "loss": 0.1043, "step": 15719 }, { "epoch": 0.34639475119403723, "grad_norm": 1.0030549764633179, "learning_rate": 2.279012007666294e-05, "loss": 0.087, "step": 15720 }, { "epoch": 0.3464167864835534, "grad_norm": 0.9611507654190063, "learning_rate": 2.2789205212868435e-05, "loss": 0.1132, "step": 15721 }, { "epoch": 0.34643882177306956, "grad_norm": 0.4900299608707428, "learning_rate": 2.278829030939933e-05, "loss": 0.0842, "step": 15722 }, { "epoch": 0.3464608570625857, "grad_norm": 0.27485501766204834, "learning_rate": 2.278737536626029e-05, "loss": 0.1014, "step": 15723 }, { "epoch": 0.3464828923521019, "grad_norm": 0.7073739171028137, "learning_rate": 2.278646038345596e-05, "loss": 0.1009, "step": 15724 }, { "epoch": 0.34650492764161805, "grad_norm": 2.197664499282837, "learning_rate": 2.2785545360991023e-05, "loss": 0.1085, "step": 15725 }, { "epoch": 0.3465269629311342, "grad_norm": 0.7325596213340759, "learning_rate": 2.2784630298870118e-05, "loss": 0.0818, "step": 15726 }, { "epoch": 0.3465489982206504, "grad_norm": 0.5434614419937134, "learning_rate": 2.2783715197097927e-05, "loss": 0.0825, "step": 15727 }, { "epoch": 0.34657103351016655, "grad_norm": 1.1195623874664307, "learning_rate": 2.2782800055679092e-05, "loss": 0.1013, "step": 15728 }, { "epoch": 0.3465930687996827, "grad_norm": 0.5892848968505859, "learning_rate": 2.278188487461829e-05, "loss": 0.0875, "step": 15729 }, { "epoch": 0.3466151040891989, "grad_norm": 1.0121444463729858, "learning_rate": 2.2780969653920175e-05, "loss": 0.0698, "step": 15730 }, { "epoch": 0.346637139378715, "grad_norm": 0.9338071942329407, "learning_rate": 2.2780054393589403e-05, "loss": 0.106, "step": 15731 }, { "epoch": 0.34665917466823115, "grad_norm": 0.7906869649887085, "learning_rate": 2.2779139093630643e-05, "loss": 0.0927, "step": 15732 }, { "epoch": 0.3466812099577473, "grad_norm": 0.7718551754951477, "learning_rate": 2.2778223754048558e-05, "loss": 0.0761, "step": 15733 }, { "epoch": 0.3467032452472635, "grad_norm": 0.7148812413215637, "learning_rate": 2.2777308374847812e-05, "loss": 0.0768, "step": 15734 }, { "epoch": 0.34672528053677965, "grad_norm": 0.8523630499839783, "learning_rate": 2.2776392956033056e-05, "loss": 0.0953, "step": 15735 }, { "epoch": 0.3467473158262958, "grad_norm": 0.8864356875419617, "learning_rate": 2.277547749760897e-05, "loss": 0.0953, "step": 15736 }, { "epoch": 0.346769351115812, "grad_norm": 0.7713064551353455, "learning_rate": 2.2774561999580204e-05, "loss": 0.0732, "step": 15737 }, { "epoch": 0.34679138640532814, "grad_norm": 0.7082939743995667, "learning_rate": 2.277364646195142e-05, "loss": 0.0836, "step": 15738 }, { "epoch": 0.3468134216948443, "grad_norm": 0.7219741940498352, "learning_rate": 2.277273088472729e-05, "loss": 0.1418, "step": 15739 }, { "epoch": 0.34683545698436047, "grad_norm": 0.8279860019683838, "learning_rate": 2.2771815267912478e-05, "loss": 0.0961, "step": 15740 }, { "epoch": 0.34685749227387663, "grad_norm": 0.545566737651825, "learning_rate": 2.2770899611511635e-05, "loss": 0.0833, "step": 15741 }, { "epoch": 0.3468795275633928, "grad_norm": 0.9684289693832397, "learning_rate": 2.2769983915529436e-05, "loss": 0.0978, "step": 15742 }, { "epoch": 0.3469015628529089, "grad_norm": 0.6772574782371521, "learning_rate": 2.276906817997054e-05, "loss": 0.0927, "step": 15743 }, { "epoch": 0.34692359814242507, "grad_norm": 0.4736139476299286, "learning_rate": 2.276815240483962e-05, "loss": 0.0732, "step": 15744 }, { "epoch": 0.34694563343194124, "grad_norm": 0.7887052893638611, "learning_rate": 2.276723659014133e-05, "loss": 0.0787, "step": 15745 }, { "epoch": 0.3469676687214574, "grad_norm": 0.7001731991767883, "learning_rate": 2.2766320735880338e-05, "loss": 0.0855, "step": 15746 }, { "epoch": 0.34698970401097357, "grad_norm": 0.7627991437911987, "learning_rate": 2.2765404842061313e-05, "loss": 0.069, "step": 15747 }, { "epoch": 0.34701173930048973, "grad_norm": 0.6573037505149841, "learning_rate": 2.276448890868892e-05, "loss": 0.0587, "step": 15748 }, { "epoch": 0.3470337745900059, "grad_norm": 0.8123222589492798, "learning_rate": 2.2763572935767814e-05, "loss": 0.097, "step": 15749 }, { "epoch": 0.34705580987952206, "grad_norm": 0.8379802107810974, "learning_rate": 2.2762656923302668e-05, "loss": 0.0757, "step": 15750 }, { "epoch": 0.3470778451690382, "grad_norm": 0.688858687877655, "learning_rate": 2.276174087129815e-05, "loss": 0.1067, "step": 15751 }, { "epoch": 0.3470998804585544, "grad_norm": 0.4590275287628174, "learning_rate": 2.2760824779758924e-05, "loss": 0.0679, "step": 15752 }, { "epoch": 0.34712191574807055, "grad_norm": 0.7579532265663147, "learning_rate": 2.2759908648689656e-05, "loss": 0.1105, "step": 15753 }, { "epoch": 0.3471439510375867, "grad_norm": 0.9524000287055969, "learning_rate": 2.2758992478095013e-05, "loss": 0.1044, "step": 15754 }, { "epoch": 0.3471659863271029, "grad_norm": 0.5323660373687744, "learning_rate": 2.2758076267979658e-05, "loss": 0.0794, "step": 15755 }, { "epoch": 0.347188021616619, "grad_norm": 0.5193942189216614, "learning_rate": 2.2757160018348262e-05, "loss": 0.0782, "step": 15756 }, { "epoch": 0.34721005690613516, "grad_norm": 0.6679472923278809, "learning_rate": 2.275624372920549e-05, "loss": 0.1006, "step": 15757 }, { "epoch": 0.3472320921956513, "grad_norm": 0.9241570830345154, "learning_rate": 2.275532740055601e-05, "loss": 0.1443, "step": 15758 }, { "epoch": 0.3472541274851675, "grad_norm": 0.5226077437400818, "learning_rate": 2.2754411032404486e-05, "loss": 0.077, "step": 15759 }, { "epoch": 0.34727616277468365, "grad_norm": 0.8716298937797546, "learning_rate": 2.275349462475559e-05, "loss": 0.1012, "step": 15760 }, { "epoch": 0.3472981980641998, "grad_norm": 0.9472816586494446, "learning_rate": 2.2752578177613993e-05, "loss": 0.1062, "step": 15761 }, { "epoch": 0.347320233353716, "grad_norm": 0.6864118576049805, "learning_rate": 2.275166169098435e-05, "loss": 0.1084, "step": 15762 }, { "epoch": 0.34734226864323214, "grad_norm": 0.7043130397796631, "learning_rate": 2.2750745164871343e-05, "loss": 0.1017, "step": 15763 }, { "epoch": 0.3473643039327483, "grad_norm": 0.6659364700317383, "learning_rate": 2.274982859927963e-05, "loss": 0.1521, "step": 15764 }, { "epoch": 0.3473863392222645, "grad_norm": 1.0546451807022095, "learning_rate": 2.274891199421389e-05, "loss": 0.1133, "step": 15765 }, { "epoch": 0.34740837451178064, "grad_norm": 0.636772871017456, "learning_rate": 2.2747995349678778e-05, "loss": 0.0908, "step": 15766 }, { "epoch": 0.3474304098012968, "grad_norm": 0.8144022822380066, "learning_rate": 2.2747078665678976e-05, "loss": 0.0969, "step": 15767 }, { "epoch": 0.3474524450908129, "grad_norm": 0.7559825778007507, "learning_rate": 2.2746161942219148e-05, "loss": 0.071, "step": 15768 }, { "epoch": 0.3474744803803291, "grad_norm": 0.5681222677230835, "learning_rate": 2.2745245179303966e-05, "loss": 0.0868, "step": 15769 }, { "epoch": 0.34749651566984524, "grad_norm": 0.885175347328186, "learning_rate": 2.2744328376938096e-05, "loss": 0.0915, "step": 15770 }, { "epoch": 0.3475185509593614, "grad_norm": 0.6853723526000977, "learning_rate": 2.2743411535126203e-05, "loss": 0.0815, "step": 15771 }, { "epoch": 0.34754058624887757, "grad_norm": 0.7229897975921631, "learning_rate": 2.274249465387297e-05, "loss": 0.0907, "step": 15772 }, { "epoch": 0.34756262153839373, "grad_norm": 0.7606834769248962, "learning_rate": 2.2741577733183055e-05, "loss": 0.0545, "step": 15773 }, { "epoch": 0.3475846568279099, "grad_norm": 1.1934125423431396, "learning_rate": 2.2740660773061134e-05, "loss": 0.1448, "step": 15774 }, { "epoch": 0.34760669211742606, "grad_norm": 0.8220300674438477, "learning_rate": 2.2739743773511873e-05, "loss": 0.1063, "step": 15775 }, { "epoch": 0.34762872740694223, "grad_norm": 0.6091263294219971, "learning_rate": 2.2738826734539956e-05, "loss": 0.0857, "step": 15776 }, { "epoch": 0.3476507626964584, "grad_norm": 0.8119180798530579, "learning_rate": 2.273790965615004e-05, "loss": 0.072, "step": 15777 }, { "epoch": 0.34767279798597456, "grad_norm": 0.7532089948654175, "learning_rate": 2.27369925383468e-05, "loss": 0.1043, "step": 15778 }, { "epoch": 0.3476948332754907, "grad_norm": 0.5126133561134338, "learning_rate": 2.2736075381134905e-05, "loss": 0.1155, "step": 15779 }, { "epoch": 0.34771686856500683, "grad_norm": 0.9007224440574646, "learning_rate": 2.2735158184519033e-05, "loss": 0.0938, "step": 15780 }, { "epoch": 0.347738903854523, "grad_norm": 0.9602604508399963, "learning_rate": 2.273424094850385e-05, "loss": 0.0992, "step": 15781 }, { "epoch": 0.34776093914403916, "grad_norm": 0.867943286895752, "learning_rate": 2.2733323673094035e-05, "loss": 0.0871, "step": 15782 }, { "epoch": 0.3477829744335553, "grad_norm": 0.31334877014160156, "learning_rate": 2.2732406358294255e-05, "loss": 0.074, "step": 15783 }, { "epoch": 0.3478050097230715, "grad_norm": 0.5737961530685425, "learning_rate": 2.273148900410918e-05, "loss": 0.0714, "step": 15784 }, { "epoch": 0.34782704501258765, "grad_norm": 0.9745418429374695, "learning_rate": 2.2730571610543486e-05, "loss": 0.0862, "step": 15785 }, { "epoch": 0.3478490803021038, "grad_norm": 0.7940707802772522, "learning_rate": 2.272965417760185e-05, "loss": 0.109, "step": 15786 }, { "epoch": 0.34787111559162, "grad_norm": 0.9595789909362793, "learning_rate": 2.2728736705288936e-05, "loss": 0.1166, "step": 15787 }, { "epoch": 0.34789315088113615, "grad_norm": 0.5215433835983276, "learning_rate": 2.2727819193609422e-05, "loss": 0.0808, "step": 15788 }, { "epoch": 0.3479151861706523, "grad_norm": 1.1448445320129395, "learning_rate": 2.272690164256798e-05, "loss": 0.1155, "step": 15789 }, { "epoch": 0.3479372214601685, "grad_norm": 1.3144166469573975, "learning_rate": 2.2725984052169286e-05, "loss": 0.0909, "step": 15790 }, { "epoch": 0.34795925674968464, "grad_norm": 0.7769909501075745, "learning_rate": 2.2725066422418008e-05, "loss": 0.0954, "step": 15791 }, { "epoch": 0.3479812920392008, "grad_norm": 0.7262081503868103, "learning_rate": 2.272414875331883e-05, "loss": 0.097, "step": 15792 }, { "epoch": 0.3480033273287169, "grad_norm": 0.5493295788764954, "learning_rate": 2.272323104487642e-05, "loss": 0.0778, "step": 15793 }, { "epoch": 0.3480253626182331, "grad_norm": 0.7404688000679016, "learning_rate": 2.2722313297095454e-05, "loss": 0.0851, "step": 15794 }, { "epoch": 0.34804739790774925, "grad_norm": 0.9140723943710327, "learning_rate": 2.27213955099806e-05, "loss": 0.1244, "step": 15795 }, { "epoch": 0.3480694331972654, "grad_norm": 0.5691168308258057, "learning_rate": 2.272047768353654e-05, "loss": 0.0763, "step": 15796 }, { "epoch": 0.3480914684867816, "grad_norm": 0.6736329793930054, "learning_rate": 2.2719559817767945e-05, "loss": 0.0693, "step": 15797 }, { "epoch": 0.34811350377629774, "grad_norm": 0.44614189863204956, "learning_rate": 2.2718641912679493e-05, "loss": 0.074, "step": 15798 }, { "epoch": 0.3481355390658139, "grad_norm": 0.5321516394615173, "learning_rate": 2.2717723968275865e-05, "loss": 0.1035, "step": 15799 }, { "epoch": 0.34815757435533007, "grad_norm": 0.540825605392456, "learning_rate": 2.2716805984561722e-05, "loss": 0.0721, "step": 15800 }, { "epoch": 0.34817960964484623, "grad_norm": 0.6154764294624329, "learning_rate": 2.2715887961541754e-05, "loss": 0.1179, "step": 15801 }, { "epoch": 0.3482016449343624, "grad_norm": 0.3523431122303009, "learning_rate": 2.2714969899220626e-05, "loss": 0.0728, "step": 15802 }, { "epoch": 0.34822368022387856, "grad_norm": 0.7840595841407776, "learning_rate": 2.2714051797603024e-05, "loss": 0.0768, "step": 15803 }, { "epoch": 0.3482457155133947, "grad_norm": 1.004675030708313, "learning_rate": 2.2713133656693617e-05, "loss": 0.0843, "step": 15804 }, { "epoch": 0.34826775080291084, "grad_norm": 0.5747542381286621, "learning_rate": 2.271221547649709e-05, "loss": 0.0861, "step": 15805 }, { "epoch": 0.348289786092427, "grad_norm": 0.9377110600471497, "learning_rate": 2.2711297257018103e-05, "loss": 0.1102, "step": 15806 }, { "epoch": 0.34831182138194317, "grad_norm": 0.8561923503875732, "learning_rate": 2.271037899826135e-05, "loss": 0.0886, "step": 15807 }, { "epoch": 0.34833385667145933, "grad_norm": 0.7824245691299438, "learning_rate": 2.2709460700231503e-05, "loss": 0.1056, "step": 15808 }, { "epoch": 0.3483558919609755, "grad_norm": 0.7235265374183655, "learning_rate": 2.2708542362933234e-05, "loss": 0.1175, "step": 15809 }, { "epoch": 0.34837792725049166, "grad_norm": 0.860194742679596, "learning_rate": 2.270762398637123e-05, "loss": 0.0788, "step": 15810 }, { "epoch": 0.3483999625400078, "grad_norm": 0.5857822299003601, "learning_rate": 2.2706705570550163e-05, "loss": 0.0984, "step": 15811 }, { "epoch": 0.348421997829524, "grad_norm": 0.6444290280342102, "learning_rate": 2.2705787115474714e-05, "loss": 0.0686, "step": 15812 }, { "epoch": 0.34844403311904015, "grad_norm": 0.6770687103271484, "learning_rate": 2.2704868621149554e-05, "loss": 0.0661, "step": 15813 }, { "epoch": 0.3484660684085563, "grad_norm": 1.1911060810089111, "learning_rate": 2.270395008757937e-05, "loss": 0.1361, "step": 15814 }, { "epoch": 0.3484881036980725, "grad_norm": 0.5601806640625, "learning_rate": 2.2703031514768835e-05, "loss": 0.0515, "step": 15815 }, { "epoch": 0.34851013898758865, "grad_norm": 0.6392313241958618, "learning_rate": 2.270211290272263e-05, "loss": 0.0952, "step": 15816 }, { "epoch": 0.34853217427710476, "grad_norm": 0.7118151187896729, "learning_rate": 2.2701194251445433e-05, "loss": 0.0869, "step": 15817 }, { "epoch": 0.3485542095666209, "grad_norm": 0.38614794611930847, "learning_rate": 2.270027556094193e-05, "loss": 0.0696, "step": 15818 }, { "epoch": 0.3485762448561371, "grad_norm": 0.7045984268188477, "learning_rate": 2.2699356831216787e-05, "loss": 0.0911, "step": 15819 }, { "epoch": 0.34859828014565325, "grad_norm": 0.6744403839111328, "learning_rate": 2.2698438062274692e-05, "loss": 0.0734, "step": 15820 }, { "epoch": 0.3486203154351694, "grad_norm": 0.6596711277961731, "learning_rate": 2.2697519254120326e-05, "loss": 0.0953, "step": 15821 }, { "epoch": 0.3486423507246856, "grad_norm": 0.7010897994041443, "learning_rate": 2.2696600406758366e-05, "loss": 0.0755, "step": 15822 }, { "epoch": 0.34866438601420174, "grad_norm": 0.8617119193077087, "learning_rate": 2.2695681520193492e-05, "loss": 0.0808, "step": 15823 }, { "epoch": 0.3486864213037179, "grad_norm": 0.6768491268157959, "learning_rate": 2.2694762594430386e-05, "loss": 0.1008, "step": 15824 }, { "epoch": 0.3487084565932341, "grad_norm": 1.2274898290634155, "learning_rate": 2.269384362947373e-05, "loss": 0.1045, "step": 15825 }, { "epoch": 0.34873049188275024, "grad_norm": 0.8601554036140442, "learning_rate": 2.2692924625328202e-05, "loss": 0.1042, "step": 15826 }, { "epoch": 0.3487525271722664, "grad_norm": 0.8816344141960144, "learning_rate": 2.269200558199848e-05, "loss": 0.0898, "step": 15827 }, { "epoch": 0.34877456246178257, "grad_norm": 0.8685908913612366, "learning_rate": 2.269108649948925e-05, "loss": 0.1238, "step": 15828 }, { "epoch": 0.34879659775129873, "grad_norm": 1.1093701124191284, "learning_rate": 2.2690167377805197e-05, "loss": 0.0752, "step": 15829 }, { "epoch": 0.34881863304081484, "grad_norm": 0.6090832352638245, "learning_rate": 2.2689248216950993e-05, "loss": 0.065, "step": 15830 }, { "epoch": 0.348840668330331, "grad_norm": 0.6222887635231018, "learning_rate": 2.2688329016931318e-05, "loss": 0.0628, "step": 15831 }, { "epoch": 0.34886270361984717, "grad_norm": 0.645600438117981, "learning_rate": 2.268740977775087e-05, "loss": 0.0878, "step": 15832 }, { "epoch": 0.34888473890936333, "grad_norm": 0.7106400728225708, "learning_rate": 2.2686490499414325e-05, "loss": 0.0941, "step": 15833 }, { "epoch": 0.3489067741988795, "grad_norm": 0.6051266193389893, "learning_rate": 2.2685571181926356e-05, "loss": 0.0772, "step": 15834 }, { "epoch": 0.34892880948839566, "grad_norm": 0.6465545296669006, "learning_rate": 2.268465182529165e-05, "loss": 0.102, "step": 15835 }, { "epoch": 0.34895084477791183, "grad_norm": 0.7381782531738281, "learning_rate": 2.2683732429514895e-05, "loss": 0.1131, "step": 15836 }, { "epoch": 0.348972880067428, "grad_norm": 1.0214886665344238, "learning_rate": 2.268281299460077e-05, "loss": 0.0617, "step": 15837 }, { "epoch": 0.34899491535694416, "grad_norm": 0.6489319801330566, "learning_rate": 2.2681893520553955e-05, "loss": 0.1231, "step": 15838 }, { "epoch": 0.3490169506464603, "grad_norm": 0.8709153532981873, "learning_rate": 2.268097400737914e-05, "loss": 0.0957, "step": 15839 }, { "epoch": 0.3490389859359765, "grad_norm": 0.7334156036376953, "learning_rate": 2.2680054455081005e-05, "loss": 0.0807, "step": 15840 }, { "epoch": 0.34906102122549265, "grad_norm": 0.7206369042396545, "learning_rate": 2.2679134863664237e-05, "loss": 0.0787, "step": 15841 }, { "epoch": 0.34908305651500876, "grad_norm": 0.6790843605995178, "learning_rate": 2.2678215233133513e-05, "loss": 0.0799, "step": 15842 }, { "epoch": 0.3491050918045249, "grad_norm": 0.6049646139144897, "learning_rate": 2.2677295563493524e-05, "loss": 0.0834, "step": 15843 }, { "epoch": 0.3491271270940411, "grad_norm": 0.5500785112380981, "learning_rate": 2.267637585474895e-05, "loss": 0.0925, "step": 15844 }, { "epoch": 0.34914916238355725, "grad_norm": 0.30872172117233276, "learning_rate": 2.267545610690448e-05, "loss": 0.0916, "step": 15845 }, { "epoch": 0.3491711976730734, "grad_norm": 0.6062443256378174, "learning_rate": 2.2674536319964795e-05, "loss": 0.0894, "step": 15846 }, { "epoch": 0.3491932329625896, "grad_norm": 0.6255510449409485, "learning_rate": 2.2673616493934578e-05, "loss": 0.0769, "step": 15847 }, { "epoch": 0.34921526825210575, "grad_norm": 0.9966807961463928, "learning_rate": 2.267269662881852e-05, "loss": 0.0837, "step": 15848 }, { "epoch": 0.3492373035416219, "grad_norm": 0.5585118532180786, "learning_rate": 2.2671776724621307e-05, "loss": 0.0827, "step": 15849 }, { "epoch": 0.3492593388311381, "grad_norm": 0.9577010869979858, "learning_rate": 2.2670856781347615e-05, "loss": 0.095, "step": 15850 }, { "epoch": 0.34928137412065424, "grad_norm": 1.1243616342544556, "learning_rate": 2.266993679900214e-05, "loss": 0.1568, "step": 15851 }, { "epoch": 0.3493034094101704, "grad_norm": 0.7146484851837158, "learning_rate": 2.266901677758956e-05, "loss": 0.1082, "step": 15852 }, { "epoch": 0.34932544469968657, "grad_norm": 0.7418161034584045, "learning_rate": 2.266809671711457e-05, "loss": 0.0865, "step": 15853 }, { "epoch": 0.3493474799892027, "grad_norm": 0.7852185368537903, "learning_rate": 2.266717661758185e-05, "loss": 0.0857, "step": 15854 }, { "epoch": 0.34936951527871885, "grad_norm": 0.6382864117622375, "learning_rate": 2.2666256478996085e-05, "loss": 0.1054, "step": 15855 }, { "epoch": 0.349391550568235, "grad_norm": 0.9407486319541931, "learning_rate": 2.2665336301361963e-05, "loss": 0.1152, "step": 15856 }, { "epoch": 0.3494135858577512, "grad_norm": 1.4121792316436768, "learning_rate": 2.266441608468418e-05, "loss": 0.0802, "step": 15857 }, { "epoch": 0.34943562114726734, "grad_norm": 1.077144980430603, "learning_rate": 2.2663495828967412e-05, "loss": 0.137, "step": 15858 }, { "epoch": 0.3494576564367835, "grad_norm": 1.132079005241394, "learning_rate": 2.2662575534216355e-05, "loss": 0.1768, "step": 15859 }, { "epoch": 0.34947969172629967, "grad_norm": 0.7878971695899963, "learning_rate": 2.2661655200435684e-05, "loss": 0.1121, "step": 15860 }, { "epoch": 0.34950172701581583, "grad_norm": 0.6804063320159912, "learning_rate": 2.26607348276301e-05, "loss": 0.1023, "step": 15861 }, { "epoch": 0.349523762305332, "grad_norm": 1.0137943029403687, "learning_rate": 2.265981441580428e-05, "loss": 0.1016, "step": 15862 }, { "epoch": 0.34954579759484816, "grad_norm": 0.7777931690216064, "learning_rate": 2.2658893964962927e-05, "loss": 0.1075, "step": 15863 }, { "epoch": 0.3495678328843643, "grad_norm": 1.0040491819381714, "learning_rate": 2.2657973475110712e-05, "loss": 0.1008, "step": 15864 }, { "epoch": 0.3495898681738805, "grad_norm": 0.7806123495101929, "learning_rate": 2.2657052946252332e-05, "loss": 0.0908, "step": 15865 }, { "epoch": 0.34961190346339666, "grad_norm": 1.0484338998794556, "learning_rate": 2.2656132378392476e-05, "loss": 0.1102, "step": 15866 }, { "epoch": 0.34963393875291277, "grad_norm": 0.7878987193107605, "learning_rate": 2.2655211771535833e-05, "loss": 0.0853, "step": 15867 }, { "epoch": 0.34965597404242893, "grad_norm": 0.8137596249580383, "learning_rate": 2.2654291125687095e-05, "loss": 0.1073, "step": 15868 }, { "epoch": 0.3496780093319451, "grad_norm": 0.9545654654502869, "learning_rate": 2.2653370440850946e-05, "loss": 0.1229, "step": 15869 }, { "epoch": 0.34970004462146126, "grad_norm": 0.6384352445602417, "learning_rate": 2.2652449717032073e-05, "loss": 0.0963, "step": 15870 }, { "epoch": 0.3497220799109774, "grad_norm": 0.7454450726509094, "learning_rate": 2.265152895423517e-05, "loss": 0.1125, "step": 15871 }, { "epoch": 0.3497441152004936, "grad_norm": 0.8395557999610901, "learning_rate": 2.2650608152464933e-05, "loss": 0.0935, "step": 15872 }, { "epoch": 0.34976615049000975, "grad_norm": 0.8012323975563049, "learning_rate": 2.2649687311726043e-05, "loss": 0.0935, "step": 15873 }, { "epoch": 0.3497881857795259, "grad_norm": 0.857986569404602, "learning_rate": 2.2648766432023198e-05, "loss": 0.1094, "step": 15874 }, { "epoch": 0.3498102210690421, "grad_norm": 0.8996657133102417, "learning_rate": 2.2647845513361075e-05, "loss": 0.0876, "step": 15875 }, { "epoch": 0.34983225635855825, "grad_norm": 0.8274625539779663, "learning_rate": 2.2646924555744382e-05, "loss": 0.1283, "step": 15876 }, { "epoch": 0.3498542916480744, "grad_norm": 0.7872760891914368, "learning_rate": 2.2646003559177793e-05, "loss": 0.079, "step": 15877 }, { "epoch": 0.3498763269375906, "grad_norm": 0.7200978398323059, "learning_rate": 2.2645082523666016e-05, "loss": 0.1051, "step": 15878 }, { "epoch": 0.3498983622271067, "grad_norm": 0.7562389969825745, "learning_rate": 2.2644161449213728e-05, "loss": 0.1035, "step": 15879 }, { "epoch": 0.34992039751662285, "grad_norm": 0.5178600549697876, "learning_rate": 2.264324033582563e-05, "loss": 0.0769, "step": 15880 }, { "epoch": 0.349942432806139, "grad_norm": 0.6688610911369324, "learning_rate": 2.264231918350641e-05, "loss": 0.0981, "step": 15881 }, { "epoch": 0.3499644680956552, "grad_norm": 0.8560647368431091, "learning_rate": 2.264139799226076e-05, "loss": 0.0888, "step": 15882 }, { "epoch": 0.34998650338517134, "grad_norm": 0.675460159778595, "learning_rate": 2.264047676209337e-05, "loss": 0.114, "step": 15883 }, { "epoch": 0.3500085386746875, "grad_norm": 0.8351489901542664, "learning_rate": 2.263955549300894e-05, "loss": 0.1105, "step": 15884 }, { "epoch": 0.3500305739642037, "grad_norm": 0.6544060707092285, "learning_rate": 2.2638634185012152e-05, "loss": 0.0829, "step": 15885 }, { "epoch": 0.35005260925371984, "grad_norm": 0.8935003876686096, "learning_rate": 2.2637712838107708e-05, "loss": 0.0801, "step": 15886 }, { "epoch": 0.350074644543236, "grad_norm": 1.1365841627120972, "learning_rate": 2.263679145230029e-05, "loss": 0.1076, "step": 15887 }, { "epoch": 0.35009667983275217, "grad_norm": 0.7120198011398315, "learning_rate": 2.2635870027594608e-05, "loss": 0.0757, "step": 15888 }, { "epoch": 0.35011871512226833, "grad_norm": 0.6842082142829895, "learning_rate": 2.263494856399534e-05, "loss": 0.0783, "step": 15889 }, { "epoch": 0.3501407504117845, "grad_norm": 0.5147572159767151, "learning_rate": 2.263402706150718e-05, "loss": 0.0878, "step": 15890 }, { "epoch": 0.35016278570130066, "grad_norm": 0.9084627032279968, "learning_rate": 2.2633105520134837e-05, "loss": 0.0617, "step": 15891 }, { "epoch": 0.35018482099081677, "grad_norm": 0.6899381279945374, "learning_rate": 2.2632183939882985e-05, "loss": 0.1053, "step": 15892 }, { "epoch": 0.35020685628033293, "grad_norm": 0.9360331296920776, "learning_rate": 2.2631262320756335e-05, "loss": 0.0734, "step": 15893 }, { "epoch": 0.3502288915698491, "grad_norm": 0.6486285328865051, "learning_rate": 2.2630340662759566e-05, "loss": 0.0827, "step": 15894 }, { "epoch": 0.35025092685936526, "grad_norm": 0.6494487524032593, "learning_rate": 2.262941896589739e-05, "loss": 0.0886, "step": 15895 }, { "epoch": 0.35027296214888143, "grad_norm": 0.8024867177009583, "learning_rate": 2.2628497230174482e-05, "loss": 0.1557, "step": 15896 }, { "epoch": 0.3502949974383976, "grad_norm": 0.7156458497047424, "learning_rate": 2.2627575455595554e-05, "loss": 0.0668, "step": 15897 }, { "epoch": 0.35031703272791376, "grad_norm": 0.9691264629364014, "learning_rate": 2.2626653642165286e-05, "loss": 0.0988, "step": 15898 }, { "epoch": 0.3503390680174299, "grad_norm": 0.5969436764717102, "learning_rate": 2.2625731789888392e-05, "loss": 0.072, "step": 15899 }, { "epoch": 0.3503611033069461, "grad_norm": 0.7275614142417908, "learning_rate": 2.262480989876955e-05, "loss": 0.1071, "step": 15900 }, { "epoch": 0.35038313859646225, "grad_norm": 0.9781387448310852, "learning_rate": 2.2623887968813462e-05, "loss": 0.106, "step": 15901 }, { "epoch": 0.3504051738859784, "grad_norm": 0.6487857699394226, "learning_rate": 2.2622966000024825e-05, "loss": 0.1016, "step": 15902 }, { "epoch": 0.3504272091754946, "grad_norm": 0.6139421463012695, "learning_rate": 2.2622043992408337e-05, "loss": 0.1125, "step": 15903 }, { "epoch": 0.3504492444650107, "grad_norm": 0.6388642191886902, "learning_rate": 2.262112194596869e-05, "loss": 0.0868, "step": 15904 }, { "epoch": 0.35047127975452685, "grad_norm": 0.5735225081443787, "learning_rate": 2.2620199860710582e-05, "loss": 0.0673, "step": 15905 }, { "epoch": 0.350493315044043, "grad_norm": 0.738091766834259, "learning_rate": 2.261927773663871e-05, "loss": 0.0717, "step": 15906 }, { "epoch": 0.3505153503335592, "grad_norm": 0.6830098032951355, "learning_rate": 2.261835557375777e-05, "loss": 0.0854, "step": 15907 }, { "epoch": 0.35053738562307535, "grad_norm": 0.9279611706733704, "learning_rate": 2.261743337207246e-05, "loss": 0.1145, "step": 15908 }, { "epoch": 0.3505594209125915, "grad_norm": 0.763277530670166, "learning_rate": 2.2616511131587476e-05, "loss": 0.0832, "step": 15909 }, { "epoch": 0.3505814562021077, "grad_norm": 0.6774911284446716, "learning_rate": 2.2615588852307524e-05, "loss": 0.0694, "step": 15910 }, { "epoch": 0.35060349149162384, "grad_norm": 1.2836792469024658, "learning_rate": 2.2614666534237286e-05, "loss": 0.1843, "step": 15911 }, { "epoch": 0.35062552678114, "grad_norm": 0.6205142736434937, "learning_rate": 2.261374417738147e-05, "loss": 0.0654, "step": 15912 }, { "epoch": 0.35064756207065617, "grad_norm": 0.8363509774208069, "learning_rate": 2.261282178174477e-05, "loss": 0.0992, "step": 15913 }, { "epoch": 0.35066959736017234, "grad_norm": 0.704277753829956, "learning_rate": 2.261189934733189e-05, "loss": 0.08, "step": 15914 }, { "epoch": 0.3506916326496885, "grad_norm": 0.7360816597938538, "learning_rate": 2.261097687414752e-05, "loss": 0.1001, "step": 15915 }, { "epoch": 0.3507136679392046, "grad_norm": 0.744632363319397, "learning_rate": 2.2610054362196374e-05, "loss": 0.1094, "step": 15916 }, { "epoch": 0.3507357032287208, "grad_norm": 0.8538476824760437, "learning_rate": 2.2609131811483132e-05, "loss": 0.0845, "step": 15917 }, { "epoch": 0.35075773851823694, "grad_norm": 0.3106634020805359, "learning_rate": 2.2608209222012502e-05, "loss": 0.1061, "step": 15918 }, { "epoch": 0.3507797738077531, "grad_norm": 0.7072358727455139, "learning_rate": 2.2607286593789183e-05, "loss": 0.0643, "step": 15919 }, { "epoch": 0.35080180909726927, "grad_norm": 0.8318910598754883, "learning_rate": 2.2606363926817874e-05, "loss": 0.0784, "step": 15920 }, { "epoch": 0.35082384438678543, "grad_norm": 0.9962444305419922, "learning_rate": 2.2605441221103274e-05, "loss": 0.0977, "step": 15921 }, { "epoch": 0.3508458796763016, "grad_norm": 0.7882223129272461, "learning_rate": 2.2604518476650084e-05, "loss": 0.0916, "step": 15922 }, { "epoch": 0.35086791496581776, "grad_norm": 0.5909342765808105, "learning_rate": 2.2603595693463005e-05, "loss": 0.0897, "step": 15923 }, { "epoch": 0.3508899502553339, "grad_norm": 0.7832052111625671, "learning_rate": 2.2602672871546738e-05, "loss": 0.1082, "step": 15924 }, { "epoch": 0.3509119855448501, "grad_norm": 0.6027784943580627, "learning_rate": 2.2601750010905974e-05, "loss": 0.0979, "step": 15925 }, { "epoch": 0.35093402083436626, "grad_norm": 0.6520070433616638, "learning_rate": 2.2600827111545423e-05, "loss": 0.1361, "step": 15926 }, { "epoch": 0.3509560561238824, "grad_norm": 0.6078379154205322, "learning_rate": 2.2599904173469784e-05, "loss": 0.1008, "step": 15927 }, { "epoch": 0.3509780914133986, "grad_norm": 0.5293527245521545, "learning_rate": 2.2598981196683764e-05, "loss": 0.0717, "step": 15928 }, { "epoch": 0.3510001267029147, "grad_norm": 1.005146861076355, "learning_rate": 2.2598058181192055e-05, "loss": 0.064, "step": 15929 }, { "epoch": 0.35102216199243086, "grad_norm": 0.8351560235023499, "learning_rate": 2.259713512699936e-05, "loss": 0.1151, "step": 15930 }, { "epoch": 0.351044197281947, "grad_norm": 0.7717859148979187, "learning_rate": 2.2596212034110378e-05, "loss": 0.1091, "step": 15931 }, { "epoch": 0.3510662325714632, "grad_norm": 0.579375684261322, "learning_rate": 2.2595288902529815e-05, "loss": 0.0711, "step": 15932 }, { "epoch": 0.35108826786097935, "grad_norm": 0.36954766511917114, "learning_rate": 2.2594365732262378e-05, "loss": 0.1042, "step": 15933 }, { "epoch": 0.3511103031504955, "grad_norm": 0.8766772150993347, "learning_rate": 2.2593442523312758e-05, "loss": 0.1209, "step": 15934 }, { "epoch": 0.3511323384400117, "grad_norm": 0.6777498126029968, "learning_rate": 2.259251927568567e-05, "loss": 0.069, "step": 15935 }, { "epoch": 0.35115437372952785, "grad_norm": 0.5656194090843201, "learning_rate": 2.2591595989385808e-05, "loss": 0.0895, "step": 15936 }, { "epoch": 0.351176409019044, "grad_norm": 0.45083969831466675, "learning_rate": 2.2590672664417876e-05, "loss": 0.1099, "step": 15937 }, { "epoch": 0.3511984443085602, "grad_norm": 0.6620469093322754, "learning_rate": 2.2589749300786576e-05, "loss": 0.0886, "step": 15938 }, { "epoch": 0.35122047959807634, "grad_norm": 0.867210328578949, "learning_rate": 2.258882589849661e-05, "loss": 0.1487, "step": 15939 }, { "epoch": 0.3512425148875925, "grad_norm": 0.6374427676200867, "learning_rate": 2.2587902457552687e-05, "loss": 0.0806, "step": 15940 }, { "epoch": 0.3512645501771086, "grad_norm": 0.710737943649292, "learning_rate": 2.258697897795951e-05, "loss": 0.0782, "step": 15941 }, { "epoch": 0.3512865854666248, "grad_norm": 0.6466237306594849, "learning_rate": 2.258605545972178e-05, "loss": 0.0721, "step": 15942 }, { "epoch": 0.35130862075614094, "grad_norm": 1.4211394786834717, "learning_rate": 2.2585131902844202e-05, "loss": 0.1168, "step": 15943 }, { "epoch": 0.3513306560456571, "grad_norm": 0.6091178059577942, "learning_rate": 2.258420830733148e-05, "loss": 0.0793, "step": 15944 }, { "epoch": 0.3513526913351733, "grad_norm": 0.5377285480499268, "learning_rate": 2.2583284673188316e-05, "loss": 0.0332, "step": 15945 }, { "epoch": 0.35137472662468944, "grad_norm": 0.8808043003082275, "learning_rate": 2.2582361000419416e-05, "loss": 0.1022, "step": 15946 }, { "epoch": 0.3513967619142056, "grad_norm": 0.5521315932273865, "learning_rate": 2.258143728902949e-05, "loss": 0.0713, "step": 15947 }, { "epoch": 0.35141879720372177, "grad_norm": 0.8495767116546631, "learning_rate": 2.258051353902323e-05, "loss": 0.0842, "step": 15948 }, { "epoch": 0.35144083249323793, "grad_norm": 0.7319793701171875, "learning_rate": 2.257958975040536e-05, "loss": 0.0874, "step": 15949 }, { "epoch": 0.3514628677827541, "grad_norm": 1.3522764444351196, "learning_rate": 2.2578665923180568e-05, "loss": 0.1005, "step": 15950 }, { "epoch": 0.35148490307227026, "grad_norm": 1.7081336975097656, "learning_rate": 2.257774205735357e-05, "loss": 0.098, "step": 15951 }, { "epoch": 0.3515069383617864, "grad_norm": 0.7435722947120667, "learning_rate": 2.2576818152929064e-05, "loss": 0.1069, "step": 15952 }, { "epoch": 0.35152897365130253, "grad_norm": 0.4858928322792053, "learning_rate": 2.2575894209911765e-05, "loss": 0.0727, "step": 15953 }, { "epoch": 0.3515510089408187, "grad_norm": 0.4319301247596741, "learning_rate": 2.2574970228306368e-05, "loss": 0.0848, "step": 15954 }, { "epoch": 0.35157304423033486, "grad_norm": 1.1611278057098389, "learning_rate": 2.257404620811759e-05, "loss": 0.0886, "step": 15955 }, { "epoch": 0.35159507951985103, "grad_norm": 0.46975719928741455, "learning_rate": 2.2573122149350133e-05, "loss": 0.0761, "step": 15956 }, { "epoch": 0.3516171148093672, "grad_norm": 0.4272075295448303, "learning_rate": 2.2572198052008702e-05, "loss": 0.0806, "step": 15957 }, { "epoch": 0.35163915009888336, "grad_norm": 1.0081154108047485, "learning_rate": 2.257127391609801e-05, "loss": 0.0869, "step": 15958 }, { "epoch": 0.3516611853883995, "grad_norm": 0.9734418988227844, "learning_rate": 2.2570349741622757e-05, "loss": 0.0917, "step": 15959 }, { "epoch": 0.3516832206779157, "grad_norm": 0.6110876798629761, "learning_rate": 2.2569425528587658e-05, "loss": 0.1008, "step": 15960 }, { "epoch": 0.35170525596743185, "grad_norm": 0.7916805744171143, "learning_rate": 2.2568501276997408e-05, "loss": 0.1328, "step": 15961 }, { "epoch": 0.351727291256948, "grad_norm": 0.9034042954444885, "learning_rate": 2.2567576986856726e-05, "loss": 0.1082, "step": 15962 }, { "epoch": 0.3517493265464642, "grad_norm": 0.562413215637207, "learning_rate": 2.2566652658170313e-05, "loss": 0.0672, "step": 15963 }, { "epoch": 0.35177136183598035, "grad_norm": 0.707863986492157, "learning_rate": 2.2565728290942888e-05, "loss": 0.0962, "step": 15964 }, { "epoch": 0.3517933971254965, "grad_norm": 1.0746885538101196, "learning_rate": 2.2564803885179143e-05, "loss": 0.0998, "step": 15965 }, { "epoch": 0.3518154324150126, "grad_norm": 0.7363250255584717, "learning_rate": 2.25638794408838e-05, "loss": 0.1349, "step": 15966 }, { "epoch": 0.3518374677045288, "grad_norm": 0.6240730285644531, "learning_rate": 2.256295495806156e-05, "loss": 0.0671, "step": 15967 }, { "epoch": 0.35185950299404495, "grad_norm": 0.609234631061554, "learning_rate": 2.256203043671714e-05, "loss": 0.0678, "step": 15968 }, { "epoch": 0.3518815382835611, "grad_norm": 0.6597641706466675, "learning_rate": 2.2561105876855243e-05, "loss": 0.0656, "step": 15969 }, { "epoch": 0.3519035735730773, "grad_norm": 0.7394371032714844, "learning_rate": 2.2560181278480576e-05, "loss": 0.0884, "step": 15970 }, { "epoch": 0.35192560886259344, "grad_norm": 0.9790071249008179, "learning_rate": 2.2559256641597854e-05, "loss": 0.1067, "step": 15971 }, { "epoch": 0.3519476441521096, "grad_norm": 1.46531081199646, "learning_rate": 2.255833196621178e-05, "loss": 0.081, "step": 15972 }, { "epoch": 0.35196967944162577, "grad_norm": 0.7740635275840759, "learning_rate": 2.2557407252327074e-05, "loss": 0.1129, "step": 15973 }, { "epoch": 0.35199171473114194, "grad_norm": 1.3152323961257935, "learning_rate": 2.2556482499948438e-05, "loss": 0.0892, "step": 15974 }, { "epoch": 0.3520137500206581, "grad_norm": 1.1649044752120972, "learning_rate": 2.255555770908058e-05, "loss": 0.097, "step": 15975 }, { "epoch": 0.35203578531017427, "grad_norm": 0.40320026874542236, "learning_rate": 2.2554632879728223e-05, "loss": 0.0747, "step": 15976 }, { "epoch": 0.35205782059969043, "grad_norm": 0.616476833820343, "learning_rate": 2.2553708011896065e-05, "loss": 0.069, "step": 15977 }, { "epoch": 0.35207985588920654, "grad_norm": 0.6313005089759827, "learning_rate": 2.2552783105588822e-05, "loss": 0.1314, "step": 15978 }, { "epoch": 0.3521018911787227, "grad_norm": 0.8331462740898132, "learning_rate": 2.2551858160811206e-05, "loss": 0.0908, "step": 15979 }, { "epoch": 0.35212392646823887, "grad_norm": 0.8080002665519714, "learning_rate": 2.2550933177567923e-05, "loss": 0.0799, "step": 15980 }, { "epoch": 0.35214596175775503, "grad_norm": 0.7570206522941589, "learning_rate": 2.2550008155863693e-05, "loss": 0.1096, "step": 15981 }, { "epoch": 0.3521679970472712, "grad_norm": 0.718323826789856, "learning_rate": 2.254908309570322e-05, "loss": 0.1105, "step": 15982 }, { "epoch": 0.35219003233678736, "grad_norm": 0.8404526114463806, "learning_rate": 2.2548157997091218e-05, "loss": 0.0841, "step": 15983 }, { "epoch": 0.3522120676263035, "grad_norm": 0.5583196878433228, "learning_rate": 2.2547232860032403e-05, "loss": 0.0735, "step": 15984 }, { "epoch": 0.3522341029158197, "grad_norm": 1.000324010848999, "learning_rate": 2.2546307684531483e-05, "loss": 0.0771, "step": 15985 }, { "epoch": 0.35225613820533586, "grad_norm": 0.7972632050514221, "learning_rate": 2.2545382470593166e-05, "loss": 0.087, "step": 15986 }, { "epoch": 0.352278173494852, "grad_norm": 0.721032977104187, "learning_rate": 2.254445721822218e-05, "loss": 0.0796, "step": 15987 }, { "epoch": 0.3523002087843682, "grad_norm": 0.6249303817749023, "learning_rate": 2.2543531927423218e-05, "loss": 0.0697, "step": 15988 }, { "epoch": 0.35232224407388435, "grad_norm": 0.6013278961181641, "learning_rate": 2.2542606598201006e-05, "loss": 0.0638, "step": 15989 }, { "epoch": 0.35234427936340046, "grad_norm": 0.6881958246231079, "learning_rate": 2.254168123056025e-05, "loss": 0.1012, "step": 15990 }, { "epoch": 0.3523663146529166, "grad_norm": 0.5122583508491516, "learning_rate": 2.2540755824505675e-05, "loss": 0.0791, "step": 15991 }, { "epoch": 0.3523883499424328, "grad_norm": 0.803409218788147, "learning_rate": 2.2539830380041985e-05, "loss": 0.1096, "step": 15992 }, { "epoch": 0.35241038523194895, "grad_norm": 1.4822096824645996, "learning_rate": 2.2538904897173896e-05, "loss": 0.1225, "step": 15993 }, { "epoch": 0.3524324205214651, "grad_norm": 2.1190526485443115, "learning_rate": 2.2537979375906116e-05, "loss": 0.0921, "step": 15994 }, { "epoch": 0.3524544558109813, "grad_norm": 0.7508959770202637, "learning_rate": 2.253705381624337e-05, "loss": 0.1066, "step": 15995 }, { "epoch": 0.35247649110049745, "grad_norm": 1.027853012084961, "learning_rate": 2.2536128218190367e-05, "loss": 0.0833, "step": 15996 }, { "epoch": 0.3524985263900136, "grad_norm": 0.9971176981925964, "learning_rate": 2.2535202581751823e-05, "loss": 0.1143, "step": 15997 }, { "epoch": 0.3525205616795298, "grad_norm": 0.6035630702972412, "learning_rate": 2.2534276906932447e-05, "loss": 0.0749, "step": 15998 }, { "epoch": 0.35254259696904594, "grad_norm": 0.8545060753822327, "learning_rate": 2.2533351193736963e-05, "loss": 0.0677, "step": 15999 }, { "epoch": 0.3525646322585621, "grad_norm": 0.6227867007255554, "learning_rate": 2.253242544217008e-05, "loss": 0.0913, "step": 16000 }, { "epoch": 0.35258666754807827, "grad_norm": 0.8808972835540771, "learning_rate": 2.253149965223651e-05, "loss": 0.0619, "step": 16001 }, { "epoch": 0.35260870283759443, "grad_norm": 0.8058643937110901, "learning_rate": 2.253057382394098e-05, "loss": 0.0667, "step": 16002 }, { "epoch": 0.35263073812711054, "grad_norm": 0.6030107140541077, "learning_rate": 2.2529647957288197e-05, "loss": 0.0946, "step": 16003 }, { "epoch": 0.3526527734166267, "grad_norm": 0.7399283051490784, "learning_rate": 2.2528722052282882e-05, "loss": 0.0705, "step": 16004 }, { "epoch": 0.3526748087061429, "grad_norm": 0.8496963977813721, "learning_rate": 2.2527796108929742e-05, "loss": 0.1071, "step": 16005 }, { "epoch": 0.35269684399565904, "grad_norm": 1.1296786069869995, "learning_rate": 2.252687012723351e-05, "loss": 0.1191, "step": 16006 }, { "epoch": 0.3527188792851752, "grad_norm": 0.667220950126648, "learning_rate": 2.2525944107198885e-05, "loss": 0.063, "step": 16007 }, { "epoch": 0.35274091457469137, "grad_norm": 1.4577704668045044, "learning_rate": 2.2525018048830593e-05, "loss": 0.0981, "step": 16008 }, { "epoch": 0.35276294986420753, "grad_norm": 0.9440173506736755, "learning_rate": 2.2524091952133346e-05, "loss": 0.0941, "step": 16009 }, { "epoch": 0.3527849851537237, "grad_norm": 0.88611900806427, "learning_rate": 2.2523165817111867e-05, "loss": 0.0623, "step": 16010 }, { "epoch": 0.35280702044323986, "grad_norm": 0.589445948600769, "learning_rate": 2.2522239643770867e-05, "loss": 0.0839, "step": 16011 }, { "epoch": 0.352829055732756, "grad_norm": 0.4763738512992859, "learning_rate": 2.252131343211507e-05, "loss": 0.0902, "step": 16012 }, { "epoch": 0.3528510910222722, "grad_norm": 0.8365718722343445, "learning_rate": 2.2520387182149187e-05, "loss": 0.1346, "step": 16013 }, { "epoch": 0.35287312631178835, "grad_norm": 0.7809893488883972, "learning_rate": 2.2519460893877942e-05, "loss": 0.0648, "step": 16014 }, { "epoch": 0.35289516160130446, "grad_norm": 0.5855510234832764, "learning_rate": 2.2518534567306054e-05, "loss": 0.1077, "step": 16015 }, { "epoch": 0.35291719689082063, "grad_norm": 0.9216366410255432, "learning_rate": 2.2517608202438233e-05, "loss": 0.1109, "step": 16016 }, { "epoch": 0.3529392321803368, "grad_norm": 0.5369631052017212, "learning_rate": 2.25166817992792e-05, "loss": 0.0982, "step": 16017 }, { "epoch": 0.35296126746985296, "grad_norm": 0.8771203756332397, "learning_rate": 2.2515755357833682e-05, "loss": 0.1189, "step": 16018 }, { "epoch": 0.3529833027593691, "grad_norm": 0.8077150583267212, "learning_rate": 2.2514828878106385e-05, "loss": 0.1332, "step": 16019 }, { "epoch": 0.3530053380488853, "grad_norm": 0.9920670390129089, "learning_rate": 2.2513902360102038e-05, "loss": 0.0918, "step": 16020 }, { "epoch": 0.35302737333840145, "grad_norm": 0.5905532836914062, "learning_rate": 2.2512975803825354e-05, "loss": 0.0857, "step": 16021 }, { "epoch": 0.3530494086279176, "grad_norm": 0.827880859375, "learning_rate": 2.251204920928106e-05, "loss": 0.0929, "step": 16022 }, { "epoch": 0.3530714439174338, "grad_norm": 0.7453194856643677, "learning_rate": 2.2511122576473872e-05, "loss": 0.1101, "step": 16023 }, { "epoch": 0.35309347920694995, "grad_norm": 0.8091663718223572, "learning_rate": 2.251019590540851e-05, "loss": 0.1063, "step": 16024 }, { "epoch": 0.3531155144964661, "grad_norm": 0.5709226727485657, "learning_rate": 2.2509269196089686e-05, "loss": 0.0676, "step": 16025 }, { "epoch": 0.3531375497859823, "grad_norm": 1.0089571475982666, "learning_rate": 2.2508342448522134e-05, "loss": 0.0847, "step": 16026 }, { "epoch": 0.3531595850754984, "grad_norm": 0.8607602119445801, "learning_rate": 2.250741566271056e-05, "loss": 0.0691, "step": 16027 }, { "epoch": 0.35318162036501455, "grad_norm": 0.7592977285385132, "learning_rate": 2.2506488838659697e-05, "loss": 0.0865, "step": 16028 }, { "epoch": 0.3532036556545307, "grad_norm": 0.7368190884590149, "learning_rate": 2.2505561976374262e-05, "loss": 0.0814, "step": 16029 }, { "epoch": 0.3532256909440469, "grad_norm": 0.8976472020149231, "learning_rate": 2.250463507585897e-05, "loss": 0.0769, "step": 16030 }, { "epoch": 0.35324772623356304, "grad_norm": 0.5098313689231873, "learning_rate": 2.2503708137118555e-05, "loss": 0.0705, "step": 16031 }, { "epoch": 0.3532697615230792, "grad_norm": 0.9312385320663452, "learning_rate": 2.2502781160157727e-05, "loss": 0.1031, "step": 16032 }, { "epoch": 0.35329179681259537, "grad_norm": 0.4973612129688263, "learning_rate": 2.250185414498121e-05, "loss": 0.0909, "step": 16033 }, { "epoch": 0.35331383210211154, "grad_norm": 0.6160998344421387, "learning_rate": 2.250092709159373e-05, "loss": 0.0767, "step": 16034 }, { "epoch": 0.3533358673916277, "grad_norm": 0.8587709069252014, "learning_rate": 2.25e-05, "loss": 0.0801, "step": 16035 }, { "epoch": 0.35335790268114387, "grad_norm": 1.3351908922195435, "learning_rate": 2.249907287020475e-05, "loss": 0.1434, "step": 16036 }, { "epoch": 0.35337993797066003, "grad_norm": 0.6051190495491028, "learning_rate": 2.2498145702212706e-05, "loss": 0.0729, "step": 16037 }, { "epoch": 0.3534019732601762, "grad_norm": 0.8973617553710938, "learning_rate": 2.249721849602858e-05, "loss": 0.0723, "step": 16038 }, { "epoch": 0.35342400854969236, "grad_norm": 0.8875977993011475, "learning_rate": 2.2496291251657103e-05, "loss": 0.1227, "step": 16039 }, { "epoch": 0.35344604383920847, "grad_norm": 1.019323468208313, "learning_rate": 2.2495363969102996e-05, "loss": 0.1066, "step": 16040 }, { "epoch": 0.35346807912872463, "grad_norm": 0.4896811842918396, "learning_rate": 2.249443664837098e-05, "loss": 0.0767, "step": 16041 }, { "epoch": 0.3534901144182408, "grad_norm": 0.7559228539466858, "learning_rate": 2.2493509289465773e-05, "loss": 0.0711, "step": 16042 }, { "epoch": 0.35351214970775696, "grad_norm": 0.8367164134979248, "learning_rate": 2.249258189239211e-05, "loss": 0.0772, "step": 16043 }, { "epoch": 0.3535341849972731, "grad_norm": 1.346801519393921, "learning_rate": 2.2491654457154708e-05, "loss": 0.1008, "step": 16044 }, { "epoch": 0.3535562202867893, "grad_norm": 0.7201282382011414, "learning_rate": 2.2490726983758294e-05, "loss": 0.132, "step": 16045 }, { "epoch": 0.35357825557630546, "grad_norm": 0.9675203561782837, "learning_rate": 2.248979947220759e-05, "loss": 0.1094, "step": 16046 }, { "epoch": 0.3536002908658216, "grad_norm": 0.4322252869606018, "learning_rate": 2.2488871922507325e-05, "loss": 0.0834, "step": 16047 }, { "epoch": 0.3536223261553378, "grad_norm": 0.9017423391342163, "learning_rate": 2.2487944334662215e-05, "loss": 0.0784, "step": 16048 }, { "epoch": 0.35364436144485395, "grad_norm": 0.8375214338302612, "learning_rate": 2.2487016708676987e-05, "loss": 0.1012, "step": 16049 }, { "epoch": 0.3536663967343701, "grad_norm": 0.8750816583633423, "learning_rate": 2.248608904455637e-05, "loss": 0.1023, "step": 16050 }, { "epoch": 0.3536884320238863, "grad_norm": 0.6564747095108032, "learning_rate": 2.248516134230509e-05, "loss": 0.1066, "step": 16051 }, { "epoch": 0.3537104673134024, "grad_norm": 0.6537913680076599, "learning_rate": 2.2484233601927868e-05, "loss": 0.0755, "step": 16052 }, { "epoch": 0.35373250260291855, "grad_norm": 0.2977134883403778, "learning_rate": 2.248330582342943e-05, "loss": 0.0917, "step": 16053 }, { "epoch": 0.3537545378924347, "grad_norm": 0.4795166552066803, "learning_rate": 2.2482378006814502e-05, "loss": 0.0843, "step": 16054 }, { "epoch": 0.3537765731819509, "grad_norm": 0.6183699369430542, "learning_rate": 2.248145015208781e-05, "loss": 0.09, "step": 16055 }, { "epoch": 0.35379860847146705, "grad_norm": 0.5626376867294312, "learning_rate": 2.2480522259254088e-05, "loss": 0.0689, "step": 16056 }, { "epoch": 0.3538206437609832, "grad_norm": 0.9898071885108948, "learning_rate": 2.2479594328318042e-05, "loss": 0.1009, "step": 16057 }, { "epoch": 0.3538426790504994, "grad_norm": 0.5643009543418884, "learning_rate": 2.2478666359284423e-05, "loss": 0.0819, "step": 16058 }, { "epoch": 0.35386471434001554, "grad_norm": 0.585789144039154, "learning_rate": 2.2477738352157936e-05, "loss": 0.0803, "step": 16059 }, { "epoch": 0.3538867496295317, "grad_norm": 0.547517716884613, "learning_rate": 2.2476810306943322e-05, "loss": 0.0916, "step": 16060 }, { "epoch": 0.35390878491904787, "grad_norm": 0.37242522835731506, "learning_rate": 2.2475882223645304e-05, "loss": 0.0925, "step": 16061 }, { "epoch": 0.35393082020856403, "grad_norm": 0.9962188601493835, "learning_rate": 2.247495410226861e-05, "loss": 0.0748, "step": 16062 }, { "epoch": 0.3539528554980802, "grad_norm": 0.8380700349807739, "learning_rate": 2.2474025942817968e-05, "loss": 0.0984, "step": 16063 }, { "epoch": 0.3539748907875963, "grad_norm": 0.8083046674728394, "learning_rate": 2.2473097745298102e-05, "loss": 0.0827, "step": 16064 }, { "epoch": 0.3539969260771125, "grad_norm": 0.8223394751548767, "learning_rate": 2.2472169509713738e-05, "loss": 0.0857, "step": 16065 }, { "epoch": 0.35401896136662864, "grad_norm": 0.889787495136261, "learning_rate": 2.2471241236069612e-05, "loss": 0.1346, "step": 16066 }, { "epoch": 0.3540409966561448, "grad_norm": 0.6949034333229065, "learning_rate": 2.2470312924370447e-05, "loss": 0.1017, "step": 16067 }, { "epoch": 0.35406303194566097, "grad_norm": 0.773309588432312, "learning_rate": 2.246938457462097e-05, "loss": 0.0683, "step": 16068 }, { "epoch": 0.35408506723517713, "grad_norm": 0.5278576016426086, "learning_rate": 2.2468456186825917e-05, "loss": 0.0758, "step": 16069 }, { "epoch": 0.3541071025246933, "grad_norm": 0.6691811084747314, "learning_rate": 2.2467527760990007e-05, "loss": 0.0918, "step": 16070 }, { "epoch": 0.35412913781420946, "grad_norm": 0.8036237359046936, "learning_rate": 2.246659929711798e-05, "loss": 0.0931, "step": 16071 }, { "epoch": 0.3541511731037256, "grad_norm": 0.9414463043212891, "learning_rate": 2.2465670795214553e-05, "loss": 0.1123, "step": 16072 }, { "epoch": 0.3541732083932418, "grad_norm": 0.7956227660179138, "learning_rate": 2.246474225528446e-05, "loss": 0.1063, "step": 16073 }, { "epoch": 0.35419524368275795, "grad_norm": 0.6603604555130005, "learning_rate": 2.2463813677332436e-05, "loss": 0.1156, "step": 16074 }, { "epoch": 0.3542172789722741, "grad_norm": 0.43838271498680115, "learning_rate": 2.2462885061363207e-05, "loss": 0.0951, "step": 16075 }, { "epoch": 0.3542393142617903, "grad_norm": 0.7896808981895447, "learning_rate": 2.2461956407381498e-05, "loss": 0.0961, "step": 16076 }, { "epoch": 0.3542613495513064, "grad_norm": 0.7565776109695435, "learning_rate": 2.2461027715392045e-05, "loss": 0.0942, "step": 16077 }, { "epoch": 0.35428338484082256, "grad_norm": 1.087308645248413, "learning_rate": 2.2460098985399577e-05, "loss": 0.0724, "step": 16078 }, { "epoch": 0.3543054201303387, "grad_norm": 0.7781979441642761, "learning_rate": 2.2459170217408825e-05, "loss": 0.0921, "step": 16079 }, { "epoch": 0.3543274554198549, "grad_norm": 0.5996968746185303, "learning_rate": 2.2458241411424523e-05, "loss": 0.082, "step": 16080 }, { "epoch": 0.35434949070937105, "grad_norm": 0.32130730152130127, "learning_rate": 2.2457312567451395e-05, "loss": 0.0555, "step": 16081 }, { "epoch": 0.3543715259988872, "grad_norm": 0.568889856338501, "learning_rate": 2.2456383685494177e-05, "loss": 0.077, "step": 16082 }, { "epoch": 0.3543935612884034, "grad_norm": 0.68137127161026, "learning_rate": 2.2455454765557596e-05, "loss": 0.0873, "step": 16083 }, { "epoch": 0.35441559657791954, "grad_norm": 0.882271945476532, "learning_rate": 2.245452580764638e-05, "loss": 0.1574, "step": 16084 }, { "epoch": 0.3544376318674357, "grad_norm": 0.4987489879131317, "learning_rate": 2.2453596811765277e-05, "loss": 0.0936, "step": 16085 }, { "epoch": 0.3544596671569519, "grad_norm": 0.7221957445144653, "learning_rate": 2.2452667777919005e-05, "loss": 0.0615, "step": 16086 }, { "epoch": 0.35448170244646804, "grad_norm": 0.8271570801734924, "learning_rate": 2.24517387061123e-05, "loss": 0.1065, "step": 16087 }, { "epoch": 0.3545037377359842, "grad_norm": 0.8320437073707581, "learning_rate": 2.2450809596349894e-05, "loss": 0.0644, "step": 16088 }, { "epoch": 0.3545257730255003, "grad_norm": 0.9907040596008301, "learning_rate": 2.244988044863652e-05, "loss": 0.0821, "step": 16089 }, { "epoch": 0.3545478083150165, "grad_norm": 0.708500862121582, "learning_rate": 2.2448951262976907e-05, "loss": 0.0881, "step": 16090 }, { "epoch": 0.35456984360453264, "grad_norm": 0.709311306476593, "learning_rate": 2.2448022039375786e-05, "loss": 0.0983, "step": 16091 }, { "epoch": 0.3545918788940488, "grad_norm": 0.7883860468864441, "learning_rate": 2.2447092777837906e-05, "loss": 0.0719, "step": 16092 }, { "epoch": 0.35461391418356497, "grad_norm": 0.5806108713150024, "learning_rate": 2.244616347836798e-05, "loss": 0.063, "step": 16093 }, { "epoch": 0.35463594947308114, "grad_norm": 0.6431939601898193, "learning_rate": 2.2445234140970754e-05, "loss": 0.0998, "step": 16094 }, { "epoch": 0.3546579847625973, "grad_norm": 1.295898675918579, "learning_rate": 2.244430476565096e-05, "loss": 0.0829, "step": 16095 }, { "epoch": 0.35468002005211346, "grad_norm": 0.5269333720207214, "learning_rate": 2.244337535241333e-05, "loss": 0.0871, "step": 16096 }, { "epoch": 0.35470205534162963, "grad_norm": 0.7710840106010437, "learning_rate": 2.2442445901262592e-05, "loss": 0.1143, "step": 16097 }, { "epoch": 0.3547240906311458, "grad_norm": 0.9091232419013977, "learning_rate": 2.2441516412203494e-05, "loss": 0.112, "step": 16098 }, { "epoch": 0.35474612592066196, "grad_norm": 0.9199269413948059, "learning_rate": 2.2440586885240755e-05, "loss": 0.1187, "step": 16099 }, { "epoch": 0.3547681612101781, "grad_norm": 0.4827498197555542, "learning_rate": 2.243965732037912e-05, "loss": 0.0651, "step": 16100 }, { "epoch": 0.3547901964996943, "grad_norm": 0.7692115902900696, "learning_rate": 2.243872771762332e-05, "loss": 0.0903, "step": 16101 }, { "epoch": 0.3548122317892104, "grad_norm": 0.7846118807792664, "learning_rate": 2.2437798076978095e-05, "loss": 0.0952, "step": 16102 }, { "epoch": 0.35483426707872656, "grad_norm": 0.7815549969673157, "learning_rate": 2.2436868398448175e-05, "loss": 0.1194, "step": 16103 }, { "epoch": 0.3548563023682427, "grad_norm": 0.6230742931365967, "learning_rate": 2.2435938682038295e-05, "loss": 0.0695, "step": 16104 }, { "epoch": 0.3548783376577589, "grad_norm": 0.6513053178787231, "learning_rate": 2.243500892775319e-05, "loss": 0.0968, "step": 16105 }, { "epoch": 0.35490037294727506, "grad_norm": 0.8364579677581787, "learning_rate": 2.24340791355976e-05, "loss": 0.099, "step": 16106 }, { "epoch": 0.3549224082367912, "grad_norm": 1.1457948684692383, "learning_rate": 2.2433149305576254e-05, "loss": 0.0772, "step": 16107 }, { "epoch": 0.3549444435263074, "grad_norm": 0.6251906156539917, "learning_rate": 2.2432219437693898e-05, "loss": 0.0869, "step": 16108 }, { "epoch": 0.35496647881582355, "grad_norm": 0.780327558517456, "learning_rate": 2.2431289531955258e-05, "loss": 0.0761, "step": 16109 }, { "epoch": 0.3549885141053397, "grad_norm": 0.8176359534263611, "learning_rate": 2.243035958836508e-05, "loss": 0.0865, "step": 16110 }, { "epoch": 0.3550105493948559, "grad_norm": 0.856237530708313, "learning_rate": 2.2429429606928097e-05, "loss": 0.0694, "step": 16111 }, { "epoch": 0.35503258468437204, "grad_norm": 0.7606557011604309, "learning_rate": 2.2428499587649037e-05, "loss": 0.1023, "step": 16112 }, { "epoch": 0.3550546199738882, "grad_norm": 0.8085416555404663, "learning_rate": 2.2427569530532653e-05, "loss": 0.0977, "step": 16113 }, { "epoch": 0.3550766552634043, "grad_norm": 0.8954286575317383, "learning_rate": 2.242663943558367e-05, "loss": 0.0777, "step": 16114 }, { "epoch": 0.3550986905529205, "grad_norm": 0.8819732666015625, "learning_rate": 2.2425709302806834e-05, "loss": 0.1127, "step": 16115 }, { "epoch": 0.35512072584243665, "grad_norm": 0.7796874046325684, "learning_rate": 2.2424779132206873e-05, "loss": 0.0833, "step": 16116 }, { "epoch": 0.3551427611319528, "grad_norm": 0.9213991761207581, "learning_rate": 2.2423848923788534e-05, "loss": 0.1186, "step": 16117 }, { "epoch": 0.355164796421469, "grad_norm": 0.9527359008789062, "learning_rate": 2.2422918677556557e-05, "loss": 0.0908, "step": 16118 }, { "epoch": 0.35518683171098514, "grad_norm": 0.7653155326843262, "learning_rate": 2.2421988393515668e-05, "loss": 0.1008, "step": 16119 }, { "epoch": 0.3552088670005013, "grad_norm": 0.7401347756385803, "learning_rate": 2.242105807167061e-05, "loss": 0.1086, "step": 16120 }, { "epoch": 0.35523090229001747, "grad_norm": 1.0647677183151245, "learning_rate": 2.242012771202613e-05, "loss": 0.1006, "step": 16121 }, { "epoch": 0.35525293757953363, "grad_norm": 0.5020309090614319, "learning_rate": 2.2419197314586955e-05, "loss": 0.1046, "step": 16122 }, { "epoch": 0.3552749728690498, "grad_norm": 0.7346461415290833, "learning_rate": 2.2418266879357834e-05, "loss": 0.078, "step": 16123 }, { "epoch": 0.35529700815856596, "grad_norm": 0.8361897468566895, "learning_rate": 2.24173364063435e-05, "loss": 0.0867, "step": 16124 }, { "epoch": 0.35531904344808213, "grad_norm": 0.689653217792511, "learning_rate": 2.2416405895548692e-05, "loss": 0.1194, "step": 16125 }, { "epoch": 0.35534107873759824, "grad_norm": 0.6370666027069092, "learning_rate": 2.2415475346978154e-05, "loss": 0.0556, "step": 16126 }, { "epoch": 0.3553631140271144, "grad_norm": 0.6978938579559326, "learning_rate": 2.2414544760636624e-05, "loss": 0.072, "step": 16127 }, { "epoch": 0.35538514931663057, "grad_norm": 0.978925883769989, "learning_rate": 2.2413614136528843e-05, "loss": 0.1246, "step": 16128 }, { "epoch": 0.35540718460614673, "grad_norm": 0.9502733945846558, "learning_rate": 2.2412683474659547e-05, "loss": 0.1351, "step": 16129 }, { "epoch": 0.3554292198956629, "grad_norm": 0.43663930892944336, "learning_rate": 2.2411752775033476e-05, "loss": 0.0857, "step": 16130 }, { "epoch": 0.35545125518517906, "grad_norm": 0.9512131214141846, "learning_rate": 2.241082203765538e-05, "loss": 0.1173, "step": 16131 }, { "epoch": 0.3554732904746952, "grad_norm": 0.7263275384902954, "learning_rate": 2.240989126252999e-05, "loss": 0.1091, "step": 16132 }, { "epoch": 0.3554953257642114, "grad_norm": 0.38845622539520264, "learning_rate": 2.2408960449662045e-05, "loss": 0.0746, "step": 16133 }, { "epoch": 0.35551736105372755, "grad_norm": 0.550363302230835, "learning_rate": 2.24080295990563e-05, "loss": 0.0778, "step": 16134 }, { "epoch": 0.3555393963432437, "grad_norm": 0.4118063151836395, "learning_rate": 2.2407098710717484e-05, "loss": 0.089, "step": 16135 }, { "epoch": 0.3555614316327599, "grad_norm": 0.5033949613571167, "learning_rate": 2.2406167784650345e-05, "loss": 0.0607, "step": 16136 }, { "epoch": 0.35558346692227605, "grad_norm": 0.4867297410964966, "learning_rate": 2.240523682085962e-05, "loss": 0.0988, "step": 16137 }, { "epoch": 0.3556055022117922, "grad_norm": 0.7163647413253784, "learning_rate": 2.2404305819350056e-05, "loss": 0.0684, "step": 16138 }, { "epoch": 0.3556275375013083, "grad_norm": 0.5065510869026184, "learning_rate": 2.2403374780126386e-05, "loss": 0.1029, "step": 16139 }, { "epoch": 0.3556495727908245, "grad_norm": 0.9913777112960815, "learning_rate": 2.240244370319336e-05, "loss": 0.0968, "step": 16140 }, { "epoch": 0.35567160808034065, "grad_norm": 0.8179965615272522, "learning_rate": 2.240151258855572e-05, "loss": 0.09, "step": 16141 }, { "epoch": 0.3556936433698568, "grad_norm": 1.0620627403259277, "learning_rate": 2.2400581436218212e-05, "loss": 0.0794, "step": 16142 }, { "epoch": 0.355715678659373, "grad_norm": 0.4741855561733246, "learning_rate": 2.2399650246185567e-05, "loss": 0.0696, "step": 16143 }, { "epoch": 0.35573771394888914, "grad_norm": 0.3819200396537781, "learning_rate": 2.239871901846254e-05, "loss": 0.0931, "step": 16144 }, { "epoch": 0.3557597492384053, "grad_norm": 0.8256522417068481, "learning_rate": 2.2397787753053868e-05, "loss": 0.0889, "step": 16145 }, { "epoch": 0.3557817845279215, "grad_norm": 0.8393145799636841, "learning_rate": 2.2396856449964293e-05, "loss": 0.0989, "step": 16146 }, { "epoch": 0.35580381981743764, "grad_norm": 1.0414001941680908, "learning_rate": 2.2395925109198565e-05, "loss": 0.0891, "step": 16147 }, { "epoch": 0.3558258551069538, "grad_norm": 0.9699722528457642, "learning_rate": 2.2394993730761425e-05, "loss": 0.1325, "step": 16148 }, { "epoch": 0.35584789039646997, "grad_norm": 0.823157787322998, "learning_rate": 2.2394062314657614e-05, "loss": 0.0982, "step": 16149 }, { "epoch": 0.35586992568598613, "grad_norm": 0.8349745273590088, "learning_rate": 2.239313086089188e-05, "loss": 0.0917, "step": 16150 }, { "epoch": 0.35589196097550224, "grad_norm": 0.6635657548904419, "learning_rate": 2.2392199369468964e-05, "loss": 0.0711, "step": 16151 }, { "epoch": 0.3559139962650184, "grad_norm": 0.5084856152534485, "learning_rate": 2.2391267840393615e-05, "loss": 0.1129, "step": 16152 }, { "epoch": 0.35593603155453457, "grad_norm": 0.6002604961395264, "learning_rate": 2.2390336273670575e-05, "loss": 0.1034, "step": 16153 }, { "epoch": 0.35595806684405074, "grad_norm": 0.8871265053749084, "learning_rate": 2.2389404669304585e-05, "loss": 0.1031, "step": 16154 }, { "epoch": 0.3559801021335669, "grad_norm": 0.46198171377182007, "learning_rate": 2.2388473027300397e-05, "loss": 0.1206, "step": 16155 }, { "epoch": 0.35600213742308306, "grad_norm": 0.7557127475738525, "learning_rate": 2.2387541347662756e-05, "loss": 0.078, "step": 16156 }, { "epoch": 0.35602417271259923, "grad_norm": 1.0470622777938843, "learning_rate": 2.2386609630396405e-05, "loss": 0.1143, "step": 16157 }, { "epoch": 0.3560462080021154, "grad_norm": 0.6562021374702454, "learning_rate": 2.238567787550609e-05, "loss": 0.1076, "step": 16158 }, { "epoch": 0.35606824329163156, "grad_norm": 1.2215561866760254, "learning_rate": 2.238474608299656e-05, "loss": 0.0919, "step": 16159 }, { "epoch": 0.3560902785811477, "grad_norm": 0.5389183759689331, "learning_rate": 2.2383814252872554e-05, "loss": 0.0821, "step": 16160 }, { "epoch": 0.3561123138706639, "grad_norm": 0.5937625765800476, "learning_rate": 2.2382882385138823e-05, "loss": 0.1002, "step": 16161 }, { "epoch": 0.35613434916018005, "grad_norm": 0.634560227394104, "learning_rate": 2.2381950479800112e-05, "loss": 0.0886, "step": 16162 }, { "epoch": 0.35615638444969616, "grad_norm": 0.8499979376792908, "learning_rate": 2.238101853686117e-05, "loss": 0.0963, "step": 16163 }, { "epoch": 0.3561784197392123, "grad_norm": 0.6383079886436462, "learning_rate": 2.2380086556326745e-05, "loss": 0.1176, "step": 16164 }, { "epoch": 0.3562004550287285, "grad_norm": 0.7764288187026978, "learning_rate": 2.2379154538201575e-05, "loss": 0.0734, "step": 16165 }, { "epoch": 0.35622249031824466, "grad_norm": 0.6348257064819336, "learning_rate": 2.237822248249042e-05, "loss": 0.1049, "step": 16166 }, { "epoch": 0.3562445256077608, "grad_norm": 0.7831934094429016, "learning_rate": 2.2377290389198022e-05, "loss": 0.0822, "step": 16167 }, { "epoch": 0.356266560897277, "grad_norm": 0.7863941788673401, "learning_rate": 2.2376358258329125e-05, "loss": 0.0839, "step": 16168 }, { "epoch": 0.35628859618679315, "grad_norm": 0.8005473017692566, "learning_rate": 2.2375426089888482e-05, "loss": 0.0988, "step": 16169 }, { "epoch": 0.3563106314763093, "grad_norm": 0.639742374420166, "learning_rate": 2.2374493883880837e-05, "loss": 0.0792, "step": 16170 }, { "epoch": 0.3563326667658255, "grad_norm": 0.5521142482757568, "learning_rate": 2.237356164031094e-05, "loss": 0.0832, "step": 16171 }, { "epoch": 0.35635470205534164, "grad_norm": 0.5279203653335571, "learning_rate": 2.237262935918354e-05, "loss": 0.1015, "step": 16172 }, { "epoch": 0.3563767373448578, "grad_norm": 1.1763534545898438, "learning_rate": 2.2371697040503384e-05, "loss": 0.1129, "step": 16173 }, { "epoch": 0.356398772634374, "grad_norm": 0.6988101005554199, "learning_rate": 2.2370764684275225e-05, "loss": 0.0979, "step": 16174 }, { "epoch": 0.35642080792389014, "grad_norm": 0.4796079993247986, "learning_rate": 2.236983229050381e-05, "loss": 0.1105, "step": 16175 }, { "epoch": 0.35644284321340625, "grad_norm": 0.8018458485603333, "learning_rate": 2.236889985919388e-05, "loss": 0.0713, "step": 16176 }, { "epoch": 0.3564648785029224, "grad_norm": 0.9908903241157532, "learning_rate": 2.2367967390350194e-05, "loss": 0.0736, "step": 16177 }, { "epoch": 0.3564869137924386, "grad_norm": 0.7582022547721863, "learning_rate": 2.2367034883977506e-05, "loss": 0.0965, "step": 16178 }, { "epoch": 0.35650894908195474, "grad_norm": 1.5726511478424072, "learning_rate": 2.236610234008055e-05, "loss": 0.1228, "step": 16179 }, { "epoch": 0.3565309843714709, "grad_norm": 1.0948805809020996, "learning_rate": 2.236516975866409e-05, "loss": 0.0924, "step": 16180 }, { "epoch": 0.35655301966098707, "grad_norm": 0.8893847465515137, "learning_rate": 2.2364237139732867e-05, "loss": 0.0815, "step": 16181 }, { "epoch": 0.35657505495050323, "grad_norm": 0.45516839623451233, "learning_rate": 2.236330448329164e-05, "loss": 0.0508, "step": 16182 }, { "epoch": 0.3565970902400194, "grad_norm": 0.7056659460067749, "learning_rate": 2.2362371789345154e-05, "loss": 0.1143, "step": 16183 }, { "epoch": 0.35661912552953556, "grad_norm": 0.6196484565734863, "learning_rate": 2.2361439057898157e-05, "loss": 0.0921, "step": 16184 }, { "epoch": 0.35664116081905173, "grad_norm": 1.421196460723877, "learning_rate": 2.2360506288955407e-05, "loss": 0.0826, "step": 16185 }, { "epoch": 0.3566631961085679, "grad_norm": 0.674164891242981, "learning_rate": 2.2359573482521647e-05, "loss": 0.1095, "step": 16186 }, { "epoch": 0.35668523139808406, "grad_norm": 1.0077614784240723, "learning_rate": 2.2358640638601638e-05, "loss": 0.1227, "step": 16187 }, { "epoch": 0.35670726668760017, "grad_norm": 0.9561879634857178, "learning_rate": 2.2357707757200123e-05, "loss": 0.147, "step": 16188 }, { "epoch": 0.35672930197711633, "grad_norm": 0.5228080749511719, "learning_rate": 2.2356774838321855e-05, "loss": 0.0587, "step": 16189 }, { "epoch": 0.3567513372666325, "grad_norm": 0.8747479915618896, "learning_rate": 2.2355841881971592e-05, "loss": 0.1066, "step": 16190 }, { "epoch": 0.35677337255614866, "grad_norm": 0.6885095834732056, "learning_rate": 2.2354908888154083e-05, "loss": 0.0934, "step": 16191 }, { "epoch": 0.3567954078456648, "grad_norm": 0.7991983294487, "learning_rate": 2.2353975856874076e-05, "loss": 0.1061, "step": 16192 }, { "epoch": 0.356817443135181, "grad_norm": 0.6773956418037415, "learning_rate": 2.2353042788136323e-05, "loss": 0.0872, "step": 16193 }, { "epoch": 0.35683947842469715, "grad_norm": 0.7617387175559998, "learning_rate": 2.2352109681945586e-05, "loss": 0.0899, "step": 16194 }, { "epoch": 0.3568615137142133, "grad_norm": 0.6487808227539062, "learning_rate": 2.2351176538306607e-05, "loss": 0.0716, "step": 16195 }, { "epoch": 0.3568835490037295, "grad_norm": 0.6173257827758789, "learning_rate": 2.2350243357224147e-05, "loss": 0.0837, "step": 16196 }, { "epoch": 0.35690558429324565, "grad_norm": 0.5577865242958069, "learning_rate": 2.2349310138702953e-05, "loss": 0.0723, "step": 16197 }, { "epoch": 0.3569276195827618, "grad_norm": 0.8811728358268738, "learning_rate": 2.234837688274779e-05, "loss": 0.0938, "step": 16198 }, { "epoch": 0.356949654872278, "grad_norm": 0.9766008853912354, "learning_rate": 2.2347443589363393e-05, "loss": 0.1066, "step": 16199 }, { "epoch": 0.3569716901617941, "grad_norm": 0.8070616126060486, "learning_rate": 2.2346510258554528e-05, "loss": 0.0847, "step": 16200 }, { "epoch": 0.35699372545131025, "grad_norm": 0.887931227684021, "learning_rate": 2.234557689032595e-05, "loss": 0.1229, "step": 16201 }, { "epoch": 0.3570157607408264, "grad_norm": 0.9423471689224243, "learning_rate": 2.2344643484682407e-05, "loss": 0.1196, "step": 16202 }, { "epoch": 0.3570377960303426, "grad_norm": 0.6320716142654419, "learning_rate": 2.234371004162866e-05, "loss": 0.073, "step": 16203 }, { "epoch": 0.35705983131985874, "grad_norm": 0.5228672027587891, "learning_rate": 2.2342776561169455e-05, "loss": 0.1303, "step": 16204 }, { "epoch": 0.3570818666093749, "grad_norm": 1.3706756830215454, "learning_rate": 2.2341843043309555e-05, "loss": 0.1031, "step": 16205 }, { "epoch": 0.3571039018988911, "grad_norm": 0.8685479760169983, "learning_rate": 2.234090948805371e-05, "loss": 0.06, "step": 16206 }, { "epoch": 0.35712593718840724, "grad_norm": 0.563848078250885, "learning_rate": 2.2339975895406678e-05, "loss": 0.0768, "step": 16207 }, { "epoch": 0.3571479724779234, "grad_norm": 0.6339492797851562, "learning_rate": 2.233904226537321e-05, "loss": 0.1047, "step": 16208 }, { "epoch": 0.35717000776743957, "grad_norm": 0.5154995322227478, "learning_rate": 2.2338108597958065e-05, "loss": 0.0571, "step": 16209 }, { "epoch": 0.35719204305695573, "grad_norm": 0.5519179105758667, "learning_rate": 2.2337174893166e-05, "loss": 0.074, "step": 16210 }, { "epoch": 0.3572140783464719, "grad_norm": 0.5540315508842468, "learning_rate": 2.2336241151001766e-05, "loss": 0.078, "step": 16211 }, { "epoch": 0.35723611363598806, "grad_norm": 1.139281153678894, "learning_rate": 2.2335307371470123e-05, "loss": 0.101, "step": 16212 }, { "epoch": 0.35725814892550417, "grad_norm": 0.6528056263923645, "learning_rate": 2.2334373554575825e-05, "loss": 0.0732, "step": 16213 }, { "epoch": 0.35728018421502034, "grad_norm": 0.8184446096420288, "learning_rate": 2.233343970032363e-05, "loss": 0.068, "step": 16214 }, { "epoch": 0.3573022195045365, "grad_norm": 0.8908300399780273, "learning_rate": 2.2332505808718297e-05, "loss": 0.1224, "step": 16215 }, { "epoch": 0.35732425479405266, "grad_norm": 0.7188669443130493, "learning_rate": 2.2331571879764576e-05, "loss": 0.0958, "step": 16216 }, { "epoch": 0.35734629008356883, "grad_norm": 0.5121687650680542, "learning_rate": 2.233063791346723e-05, "loss": 0.0872, "step": 16217 }, { "epoch": 0.357368325373085, "grad_norm": 1.0743694305419922, "learning_rate": 2.2329703909831013e-05, "loss": 0.0985, "step": 16218 }, { "epoch": 0.35739036066260116, "grad_norm": 0.734129011631012, "learning_rate": 2.2328769868860685e-05, "loss": 0.098, "step": 16219 }, { "epoch": 0.3574123959521173, "grad_norm": 1.136475920677185, "learning_rate": 2.2327835790561e-05, "loss": 0.1286, "step": 16220 }, { "epoch": 0.3574344312416335, "grad_norm": 0.9110962152481079, "learning_rate": 2.2326901674936717e-05, "loss": 0.1234, "step": 16221 }, { "epoch": 0.35745646653114965, "grad_norm": 0.5035417675971985, "learning_rate": 2.23259675219926e-05, "loss": 0.0742, "step": 16222 }, { "epoch": 0.3574785018206658, "grad_norm": 0.2959025204181671, "learning_rate": 2.2325033331733397e-05, "loss": 0.0822, "step": 16223 }, { "epoch": 0.357500537110182, "grad_norm": 1.0710655450820923, "learning_rate": 2.2324099104163876e-05, "loss": 0.1433, "step": 16224 }, { "epoch": 0.3575225723996981, "grad_norm": 0.6230751872062683, "learning_rate": 2.2323164839288784e-05, "loss": 0.0815, "step": 16225 }, { "epoch": 0.35754460768921426, "grad_norm": 0.6864767670631409, "learning_rate": 2.2322230537112892e-05, "loss": 0.0594, "step": 16226 }, { "epoch": 0.3575666429787304, "grad_norm": 0.6969805955886841, "learning_rate": 2.2321296197640947e-05, "loss": 0.0673, "step": 16227 }, { "epoch": 0.3575886782682466, "grad_norm": 0.952231228351593, "learning_rate": 2.2320361820877722e-05, "loss": 0.0934, "step": 16228 }, { "epoch": 0.35761071355776275, "grad_norm": 0.46904534101486206, "learning_rate": 2.231942740682797e-05, "loss": 0.1058, "step": 16229 }, { "epoch": 0.3576327488472789, "grad_norm": 0.7416757941246033, "learning_rate": 2.2318492955496442e-05, "loss": 0.1085, "step": 16230 }, { "epoch": 0.3576547841367951, "grad_norm": 0.4279773533344269, "learning_rate": 2.2317558466887908e-05, "loss": 0.0642, "step": 16231 }, { "epoch": 0.35767681942631124, "grad_norm": 0.5102454423904419, "learning_rate": 2.2316623941007125e-05, "loss": 0.1051, "step": 16232 }, { "epoch": 0.3576988547158274, "grad_norm": 0.6268807649612427, "learning_rate": 2.2315689377858854e-05, "loss": 0.0696, "step": 16233 }, { "epoch": 0.3577208900053436, "grad_norm": 0.637427031993866, "learning_rate": 2.2314754777447848e-05, "loss": 0.078, "step": 16234 }, { "epoch": 0.35774292529485974, "grad_norm": 0.8840513229370117, "learning_rate": 2.231382013977888e-05, "loss": 0.0848, "step": 16235 }, { "epoch": 0.3577649605843759, "grad_norm": 0.9471838474273682, "learning_rate": 2.2312885464856703e-05, "loss": 0.1128, "step": 16236 }, { "epoch": 0.357786995873892, "grad_norm": 0.7104203104972839, "learning_rate": 2.2311950752686073e-05, "loss": 0.1148, "step": 16237 }, { "epoch": 0.3578090311634082, "grad_norm": 0.6452697515487671, "learning_rate": 2.2311016003271764e-05, "loss": 0.0715, "step": 16238 }, { "epoch": 0.35783106645292434, "grad_norm": 0.6728762984275818, "learning_rate": 2.231008121661853e-05, "loss": 0.0931, "step": 16239 }, { "epoch": 0.3578531017424405, "grad_norm": 1.0509662628173828, "learning_rate": 2.2309146392731128e-05, "loss": 0.133, "step": 16240 }, { "epoch": 0.35787513703195667, "grad_norm": 1.077788233757019, "learning_rate": 2.2308211531614328e-05, "loss": 0.1028, "step": 16241 }, { "epoch": 0.35789717232147283, "grad_norm": 0.9942609667778015, "learning_rate": 2.2307276633272884e-05, "loss": 0.0981, "step": 16242 }, { "epoch": 0.357919207610989, "grad_norm": 0.8257482647895813, "learning_rate": 2.230634169771156e-05, "loss": 0.0908, "step": 16243 }, { "epoch": 0.35794124290050516, "grad_norm": 0.7394503951072693, "learning_rate": 2.2305406724935125e-05, "loss": 0.0996, "step": 16244 }, { "epoch": 0.35796327819002133, "grad_norm": 0.7296931147575378, "learning_rate": 2.2304471714948334e-05, "loss": 0.0926, "step": 16245 }, { "epoch": 0.3579853134795375, "grad_norm": 0.4787268340587616, "learning_rate": 2.2303536667755953e-05, "loss": 0.096, "step": 16246 }, { "epoch": 0.35800734876905366, "grad_norm": 0.696973979473114, "learning_rate": 2.2302601583362743e-05, "loss": 0.0874, "step": 16247 }, { "epoch": 0.3580293840585698, "grad_norm": 0.7307865619659424, "learning_rate": 2.2301666461773466e-05, "loss": 0.0899, "step": 16248 }, { "epoch": 0.358051419348086, "grad_norm": 0.7206984162330627, "learning_rate": 2.2300731302992884e-05, "loss": 0.0806, "step": 16249 }, { "epoch": 0.3580734546376021, "grad_norm": 1.281449317932129, "learning_rate": 2.2299796107025766e-05, "loss": 0.0887, "step": 16250 }, { "epoch": 0.35809548992711826, "grad_norm": 0.7522051334381104, "learning_rate": 2.229886087387687e-05, "loss": 0.0978, "step": 16251 }, { "epoch": 0.3581175252166344, "grad_norm": 0.7770183086395264, "learning_rate": 2.2297925603550962e-05, "loss": 0.0726, "step": 16252 }, { "epoch": 0.3581395605061506, "grad_norm": 1.3648258447647095, "learning_rate": 2.2296990296052807e-05, "loss": 0.092, "step": 16253 }, { "epoch": 0.35816159579566675, "grad_norm": 1.0877718925476074, "learning_rate": 2.2296054951387168e-05, "loss": 0.1062, "step": 16254 }, { "epoch": 0.3581836310851829, "grad_norm": 0.7478231191635132, "learning_rate": 2.2295119569558806e-05, "loss": 0.093, "step": 16255 }, { "epoch": 0.3582056663746991, "grad_norm": 0.9897716045379639, "learning_rate": 2.2294184150572487e-05, "loss": 0.0818, "step": 16256 }, { "epoch": 0.35822770166421525, "grad_norm": 0.7062758207321167, "learning_rate": 2.2293248694432982e-05, "loss": 0.0957, "step": 16257 }, { "epoch": 0.3582497369537314, "grad_norm": 1.1566325426101685, "learning_rate": 2.2292313201145045e-05, "loss": 0.0936, "step": 16258 }, { "epoch": 0.3582717722432476, "grad_norm": 0.6342554092407227, "learning_rate": 2.229137767071345e-05, "loss": 0.0785, "step": 16259 }, { "epoch": 0.35829380753276374, "grad_norm": 0.4867476224899292, "learning_rate": 2.2290442103142955e-05, "loss": 0.0611, "step": 16260 }, { "epoch": 0.3583158428222799, "grad_norm": 0.6311413049697876, "learning_rate": 2.2289506498438334e-05, "loss": 0.1039, "step": 16261 }, { "epoch": 0.358337878111796, "grad_norm": 0.7251940369606018, "learning_rate": 2.2288570856604345e-05, "loss": 0.0754, "step": 16262 }, { "epoch": 0.3583599134013122, "grad_norm": 0.81698077917099, "learning_rate": 2.228763517764576e-05, "loss": 0.0861, "step": 16263 }, { "epoch": 0.35838194869082834, "grad_norm": 0.9757198691368103, "learning_rate": 2.228669946156733e-05, "loss": 0.0987, "step": 16264 }, { "epoch": 0.3584039839803445, "grad_norm": 0.6595020890235901, "learning_rate": 2.228576370837384e-05, "loss": 0.0688, "step": 16265 }, { "epoch": 0.3584260192698607, "grad_norm": 0.6327180862426758, "learning_rate": 2.2284827918070047e-05, "loss": 0.0943, "step": 16266 }, { "epoch": 0.35844805455937684, "grad_norm": 0.8798370361328125, "learning_rate": 2.228389209066072e-05, "loss": 0.1075, "step": 16267 }, { "epoch": 0.358470089848893, "grad_norm": 0.4788569211959839, "learning_rate": 2.2282956226150627e-05, "loss": 0.1096, "step": 16268 }, { "epoch": 0.35849212513840917, "grad_norm": 0.7083258628845215, "learning_rate": 2.228202032454453e-05, "loss": 0.0565, "step": 16269 }, { "epoch": 0.35851416042792533, "grad_norm": 1.3381415605545044, "learning_rate": 2.22810843858472e-05, "loss": 0.1015, "step": 16270 }, { "epoch": 0.3585361957174415, "grad_norm": 0.8197479248046875, "learning_rate": 2.2280148410063397e-05, "loss": 0.0986, "step": 16271 }, { "epoch": 0.35855823100695766, "grad_norm": 0.5969488620758057, "learning_rate": 2.22792123971979e-05, "loss": 0.0958, "step": 16272 }, { "epoch": 0.3585802662964738, "grad_norm": 0.842810332775116, "learning_rate": 2.227827634725547e-05, "loss": 0.0692, "step": 16273 }, { "epoch": 0.35860230158598994, "grad_norm": 0.8914668560028076, "learning_rate": 2.227734026024087e-05, "loss": 0.1126, "step": 16274 }, { "epoch": 0.3586243368755061, "grad_norm": 1.0194734334945679, "learning_rate": 2.227640413615888e-05, "loss": 0.0906, "step": 16275 }, { "epoch": 0.35864637216502226, "grad_norm": 1.061407208442688, "learning_rate": 2.227546797501426e-05, "loss": 0.129, "step": 16276 }, { "epoch": 0.35866840745453843, "grad_norm": 0.9755694270133972, "learning_rate": 2.2274531776811785e-05, "loss": 0.1036, "step": 16277 }, { "epoch": 0.3586904427440546, "grad_norm": 1.4928768873214722, "learning_rate": 2.2273595541556213e-05, "loss": 0.1079, "step": 16278 }, { "epoch": 0.35871247803357076, "grad_norm": 0.6482706665992737, "learning_rate": 2.2272659269252315e-05, "loss": 0.1164, "step": 16279 }, { "epoch": 0.3587345133230869, "grad_norm": 1.2230480909347534, "learning_rate": 2.227172295990487e-05, "loss": 0.1017, "step": 16280 }, { "epoch": 0.3587565486126031, "grad_norm": 0.5693660974502563, "learning_rate": 2.2270786613518637e-05, "loss": 0.1128, "step": 16281 }, { "epoch": 0.35877858390211925, "grad_norm": 0.6012223362922668, "learning_rate": 2.2269850230098392e-05, "loss": 0.0846, "step": 16282 }, { "epoch": 0.3588006191916354, "grad_norm": 0.6469887495040894, "learning_rate": 2.22689138096489e-05, "loss": 0.1003, "step": 16283 }, { "epoch": 0.3588226544811516, "grad_norm": 0.5828612446784973, "learning_rate": 2.2267977352174935e-05, "loss": 0.0835, "step": 16284 }, { "epoch": 0.35884468977066775, "grad_norm": 1.0077284574508667, "learning_rate": 2.226704085768126e-05, "loss": 0.1004, "step": 16285 }, { "epoch": 0.3588667250601839, "grad_norm": 0.6957470774650574, "learning_rate": 2.226610432617265e-05, "loss": 0.08, "step": 16286 }, { "epoch": 0.3588887603497, "grad_norm": 0.8215264678001404, "learning_rate": 2.2265167757653876e-05, "loss": 0.1146, "step": 16287 }, { "epoch": 0.3589107956392162, "grad_norm": 0.45429304242134094, "learning_rate": 2.22642311521297e-05, "loss": 0.0766, "step": 16288 }, { "epoch": 0.35893283092873235, "grad_norm": 0.5138984322547913, "learning_rate": 2.2263294509604905e-05, "loss": 0.0914, "step": 16289 }, { "epoch": 0.3589548662182485, "grad_norm": 0.5241867303848267, "learning_rate": 2.2262357830084256e-05, "loss": 0.0687, "step": 16290 }, { "epoch": 0.3589769015077647, "grad_norm": 0.8478512763977051, "learning_rate": 2.2261421113572523e-05, "loss": 0.0993, "step": 16291 }, { "epoch": 0.35899893679728084, "grad_norm": 0.7600411176681519, "learning_rate": 2.226048436007448e-05, "loss": 0.0861, "step": 16292 }, { "epoch": 0.359020972086797, "grad_norm": 1.148740530014038, "learning_rate": 2.2259547569594893e-05, "loss": 0.1003, "step": 16293 }, { "epoch": 0.3590430073763132, "grad_norm": 0.8036683201789856, "learning_rate": 2.2258610742138543e-05, "loss": 0.0969, "step": 16294 }, { "epoch": 0.35906504266582934, "grad_norm": 0.6757003664970398, "learning_rate": 2.2257673877710195e-05, "loss": 0.1289, "step": 16295 }, { "epoch": 0.3590870779553455, "grad_norm": 0.7525094151496887, "learning_rate": 2.225673697631462e-05, "loss": 0.0907, "step": 16296 }, { "epoch": 0.35910911324486167, "grad_norm": 0.6868141293525696, "learning_rate": 2.225580003795659e-05, "loss": 0.0756, "step": 16297 }, { "epoch": 0.35913114853437783, "grad_norm": 0.9044342041015625, "learning_rate": 2.2254863062640883e-05, "loss": 0.0875, "step": 16298 }, { "epoch": 0.35915318382389394, "grad_norm": 0.9082952737808228, "learning_rate": 2.2253926050372265e-05, "loss": 0.102, "step": 16299 }, { "epoch": 0.3591752191134101, "grad_norm": 0.566961944103241, "learning_rate": 2.2252989001155517e-05, "loss": 0.0919, "step": 16300 }, { "epoch": 0.35919725440292627, "grad_norm": 0.6487112045288086, "learning_rate": 2.2252051914995404e-05, "loss": 0.0873, "step": 16301 }, { "epoch": 0.35921928969244243, "grad_norm": 0.5802627801895142, "learning_rate": 2.2251114791896702e-05, "loss": 0.0934, "step": 16302 }, { "epoch": 0.3592413249819586, "grad_norm": 1.095776915550232, "learning_rate": 2.2250177631864183e-05, "loss": 0.1107, "step": 16303 }, { "epoch": 0.35926336027147476, "grad_norm": 0.8389540910720825, "learning_rate": 2.2249240434902622e-05, "loss": 0.1151, "step": 16304 }, { "epoch": 0.35928539556099093, "grad_norm": 0.7499970197677612, "learning_rate": 2.2248303201016792e-05, "loss": 0.106, "step": 16305 }, { "epoch": 0.3593074308505071, "grad_norm": 0.6279894113540649, "learning_rate": 2.2247365930211465e-05, "loss": 0.0672, "step": 16306 }, { "epoch": 0.35932946614002326, "grad_norm": 0.8158965110778809, "learning_rate": 2.2246428622491422e-05, "loss": 0.0826, "step": 16307 }, { "epoch": 0.3593515014295394, "grad_norm": 0.4835391938686371, "learning_rate": 2.2245491277861424e-05, "loss": 0.0944, "step": 16308 }, { "epoch": 0.3593735367190556, "grad_norm": 0.7418059706687927, "learning_rate": 2.2244553896326262e-05, "loss": 0.0863, "step": 16309 }, { "epoch": 0.35939557200857175, "grad_norm": 1.1406022310256958, "learning_rate": 2.22436164778907e-05, "loss": 0.1353, "step": 16310 }, { "epoch": 0.35941760729808786, "grad_norm": 0.5818644762039185, "learning_rate": 2.224267902255951e-05, "loss": 0.0801, "step": 16311 }, { "epoch": 0.359439642587604, "grad_norm": 0.46359118819236755, "learning_rate": 2.2241741530337475e-05, "loss": 0.0676, "step": 16312 }, { "epoch": 0.3594616778771202, "grad_norm": 0.6785435676574707, "learning_rate": 2.224080400122937e-05, "loss": 0.0801, "step": 16313 }, { "epoch": 0.35948371316663635, "grad_norm": 0.4613017141819, "learning_rate": 2.2239866435239963e-05, "loss": 0.0691, "step": 16314 }, { "epoch": 0.3595057484561525, "grad_norm": 0.43484196066856384, "learning_rate": 2.2238928832374037e-05, "loss": 0.1025, "step": 16315 }, { "epoch": 0.3595277837456687, "grad_norm": 0.6602590680122375, "learning_rate": 2.2237991192636365e-05, "loss": 0.089, "step": 16316 }, { "epoch": 0.35954981903518485, "grad_norm": 0.7504848837852478, "learning_rate": 2.2237053516031722e-05, "loss": 0.0799, "step": 16317 }, { "epoch": 0.359571854324701, "grad_norm": 1.0674282312393188, "learning_rate": 2.2236115802564883e-05, "loss": 0.1191, "step": 16318 }, { "epoch": 0.3595938896142172, "grad_norm": 0.8144029378890991, "learning_rate": 2.2235178052240625e-05, "loss": 0.1165, "step": 16319 }, { "epoch": 0.35961592490373334, "grad_norm": 0.48691749572753906, "learning_rate": 2.223424026506373e-05, "loss": 0.1002, "step": 16320 }, { "epoch": 0.3596379601932495, "grad_norm": 0.87263423204422, "learning_rate": 2.2233302441038964e-05, "loss": 0.0605, "step": 16321 }, { "epoch": 0.35965999548276567, "grad_norm": 0.6574423909187317, "learning_rate": 2.2232364580171117e-05, "loss": 0.1223, "step": 16322 }, { "epoch": 0.35968203077228184, "grad_norm": 0.8250653147697449, "learning_rate": 2.2231426682464952e-05, "loss": 0.0863, "step": 16323 }, { "epoch": 0.35970406606179794, "grad_norm": 0.7125102877616882, "learning_rate": 2.2230488747925255e-05, "loss": 0.1072, "step": 16324 }, { "epoch": 0.3597261013513141, "grad_norm": 0.7172076106071472, "learning_rate": 2.2229550776556802e-05, "loss": 0.1066, "step": 16325 }, { "epoch": 0.3597481366408303, "grad_norm": 0.8956709504127502, "learning_rate": 2.2228612768364375e-05, "loss": 0.0943, "step": 16326 }, { "epoch": 0.35977017193034644, "grad_norm": 0.8048840165138245, "learning_rate": 2.222767472335274e-05, "loss": 0.0898, "step": 16327 }, { "epoch": 0.3597922072198626, "grad_norm": 0.36460769176483154, "learning_rate": 2.2226736641526685e-05, "loss": 0.0875, "step": 16328 }, { "epoch": 0.35981424250937877, "grad_norm": 1.0381057262420654, "learning_rate": 2.222579852289098e-05, "loss": 0.0991, "step": 16329 }, { "epoch": 0.35983627779889493, "grad_norm": 0.6206127405166626, "learning_rate": 2.2224860367450416e-05, "loss": 0.1188, "step": 16330 }, { "epoch": 0.3598583130884111, "grad_norm": 0.772091805934906, "learning_rate": 2.2223922175209757e-05, "loss": 0.0765, "step": 16331 }, { "epoch": 0.35988034837792726, "grad_norm": 0.7345081567764282, "learning_rate": 2.2222983946173797e-05, "loss": 0.0708, "step": 16332 }, { "epoch": 0.3599023836674434, "grad_norm": 0.8288009166717529, "learning_rate": 2.2222045680347304e-05, "loss": 0.0913, "step": 16333 }, { "epoch": 0.3599244189569596, "grad_norm": 0.8781663179397583, "learning_rate": 2.2221107377735053e-05, "loss": 0.1306, "step": 16334 }, { "epoch": 0.35994645424647576, "grad_norm": 0.8266136050224304, "learning_rate": 2.2220169038341837e-05, "loss": 0.0883, "step": 16335 }, { "epoch": 0.35996848953599186, "grad_norm": 0.5996132493019104, "learning_rate": 2.221923066217242e-05, "loss": 0.0663, "step": 16336 }, { "epoch": 0.35999052482550803, "grad_norm": 0.4319651424884796, "learning_rate": 2.2218292249231596e-05, "loss": 0.069, "step": 16337 }, { "epoch": 0.3600125601150242, "grad_norm": 0.5660994052886963, "learning_rate": 2.2217353799524136e-05, "loss": 0.0639, "step": 16338 }, { "epoch": 0.36003459540454036, "grad_norm": 0.8627372980117798, "learning_rate": 2.221641531305483e-05, "loss": 0.1036, "step": 16339 }, { "epoch": 0.3600566306940565, "grad_norm": 0.6032102108001709, "learning_rate": 2.2215476789828444e-05, "loss": 0.0696, "step": 16340 }, { "epoch": 0.3600786659835727, "grad_norm": 0.6561459302902222, "learning_rate": 2.221453822984977e-05, "loss": 0.0715, "step": 16341 }, { "epoch": 0.36010070127308885, "grad_norm": 0.7902922034263611, "learning_rate": 2.221359963312358e-05, "loss": 0.0875, "step": 16342 }, { "epoch": 0.360122736562605, "grad_norm": 1.080645203590393, "learning_rate": 2.221266099965466e-05, "loss": 0.1248, "step": 16343 }, { "epoch": 0.3601447718521212, "grad_norm": 0.9232574105262756, "learning_rate": 2.221172232944779e-05, "loss": 0.0978, "step": 16344 }, { "epoch": 0.36016680714163735, "grad_norm": 0.7995384335517883, "learning_rate": 2.2210783622507753e-05, "loss": 0.0735, "step": 16345 }, { "epoch": 0.3601888424311535, "grad_norm": 0.9727712273597717, "learning_rate": 2.220984487883932e-05, "loss": 0.1243, "step": 16346 }, { "epoch": 0.3602108777206697, "grad_norm": 0.7750920653343201, "learning_rate": 2.2208906098447292e-05, "loss": 0.085, "step": 16347 }, { "epoch": 0.36023291301018584, "grad_norm": 0.7407605051994324, "learning_rate": 2.2207967281336435e-05, "loss": 0.1363, "step": 16348 }, { "epoch": 0.36025494829970195, "grad_norm": 0.33139124512672424, "learning_rate": 2.2207028427511534e-05, "loss": 0.0591, "step": 16349 }, { "epoch": 0.3602769835892181, "grad_norm": 0.33328691124916077, "learning_rate": 2.2206089536977374e-05, "loss": 0.068, "step": 16350 }, { "epoch": 0.3602990188787343, "grad_norm": 0.7537394762039185, "learning_rate": 2.2205150609738736e-05, "loss": 0.095, "step": 16351 }, { "epoch": 0.36032105416825044, "grad_norm": 0.9140864610671997, "learning_rate": 2.2204211645800403e-05, "loss": 0.1026, "step": 16352 }, { "epoch": 0.3603430894577666, "grad_norm": 0.7001189589500427, "learning_rate": 2.2203272645167155e-05, "loss": 0.108, "step": 16353 }, { "epoch": 0.3603651247472828, "grad_norm": 0.5776461958885193, "learning_rate": 2.2202333607843778e-05, "loss": 0.0638, "step": 16354 }, { "epoch": 0.36038716003679894, "grad_norm": 1.0632723569869995, "learning_rate": 2.220139453383505e-05, "loss": 0.1153, "step": 16355 }, { "epoch": 0.3604091953263151, "grad_norm": 1.1311516761779785, "learning_rate": 2.2200455423145763e-05, "loss": 0.1153, "step": 16356 }, { "epoch": 0.36043123061583127, "grad_norm": 0.4252052307128906, "learning_rate": 2.2199516275780694e-05, "loss": 0.0983, "step": 16357 }, { "epoch": 0.36045326590534743, "grad_norm": 1.6574827432632446, "learning_rate": 2.2198577091744624e-05, "loss": 0.0848, "step": 16358 }, { "epoch": 0.3604753011948636, "grad_norm": 0.8653546571731567, "learning_rate": 2.2197637871042347e-05, "loss": 0.1329, "step": 16359 }, { "epoch": 0.36049733648437976, "grad_norm": 0.6056857109069824, "learning_rate": 2.2196698613678638e-05, "loss": 0.064, "step": 16360 }, { "epoch": 0.36051937177389587, "grad_norm": 0.46416324377059937, "learning_rate": 2.219575931965828e-05, "loss": 0.088, "step": 16361 }, { "epoch": 0.36054140706341203, "grad_norm": 0.6418724656105042, "learning_rate": 2.2194819988986066e-05, "loss": 0.0949, "step": 16362 }, { "epoch": 0.3605634423529282, "grad_norm": 0.7433066368103027, "learning_rate": 2.2193880621666773e-05, "loss": 0.0763, "step": 16363 }, { "epoch": 0.36058547764244436, "grad_norm": 0.786102831363678, "learning_rate": 2.219294121770519e-05, "loss": 0.1034, "step": 16364 }, { "epoch": 0.3606075129319605, "grad_norm": 0.6580560803413391, "learning_rate": 2.21920017771061e-05, "loss": 0.0524, "step": 16365 }, { "epoch": 0.3606295482214767, "grad_norm": 0.5094498991966248, "learning_rate": 2.219106229987429e-05, "loss": 0.0849, "step": 16366 }, { "epoch": 0.36065158351099286, "grad_norm": 0.6768559217453003, "learning_rate": 2.219012278601454e-05, "loss": 0.0863, "step": 16367 }, { "epoch": 0.360673618800509, "grad_norm": 0.8153602480888367, "learning_rate": 2.2189183235531644e-05, "loss": 0.0882, "step": 16368 }, { "epoch": 0.3606956540900252, "grad_norm": 1.1072560548782349, "learning_rate": 2.2188243648430372e-05, "loss": 0.1313, "step": 16369 }, { "epoch": 0.36071768937954135, "grad_norm": 0.8241178393363953, "learning_rate": 2.218730402471553e-05, "loss": 0.0616, "step": 16370 }, { "epoch": 0.3607397246690575, "grad_norm": 0.8527505397796631, "learning_rate": 2.2186364364391893e-05, "loss": 0.109, "step": 16371 }, { "epoch": 0.3607617599585737, "grad_norm": 1.304484486579895, "learning_rate": 2.2185424667464248e-05, "loss": 0.1249, "step": 16372 }, { "epoch": 0.3607837952480898, "grad_norm": 0.8705913424491882, "learning_rate": 2.2184484933937383e-05, "loss": 0.085, "step": 16373 }, { "epoch": 0.36080583053760595, "grad_norm": 0.741386353969574, "learning_rate": 2.2183545163816082e-05, "loss": 0.1392, "step": 16374 }, { "epoch": 0.3608278658271221, "grad_norm": 1.262681484222412, "learning_rate": 2.2182605357105134e-05, "loss": 0.1106, "step": 16375 }, { "epoch": 0.3608499011166383, "grad_norm": 0.9941565990447998, "learning_rate": 2.2181665513809324e-05, "loss": 0.1052, "step": 16376 }, { "epoch": 0.36087193640615445, "grad_norm": 0.9939237236976624, "learning_rate": 2.2180725633933438e-05, "loss": 0.0985, "step": 16377 }, { "epoch": 0.3608939716956706, "grad_norm": 0.9358225464820862, "learning_rate": 2.217978571748227e-05, "loss": 0.1211, "step": 16378 }, { "epoch": 0.3609160069851868, "grad_norm": 0.4210267961025238, "learning_rate": 2.2178845764460603e-05, "loss": 0.0739, "step": 16379 }, { "epoch": 0.36093804227470294, "grad_norm": 0.654254138469696, "learning_rate": 2.2177905774873225e-05, "loss": 0.0702, "step": 16380 }, { "epoch": 0.3609600775642191, "grad_norm": 0.6822264194488525, "learning_rate": 2.2176965748724927e-05, "loss": 0.0915, "step": 16381 }, { "epoch": 0.36098211285373527, "grad_norm": 2.4010632038116455, "learning_rate": 2.2176025686020486e-05, "loss": 0.114, "step": 16382 }, { "epoch": 0.36100414814325144, "grad_norm": 0.9689947962760925, "learning_rate": 2.2175085586764706e-05, "loss": 0.1084, "step": 16383 }, { "epoch": 0.3610261834327676, "grad_norm": 1.135668158531189, "learning_rate": 2.217414545096236e-05, "loss": 0.1037, "step": 16384 }, { "epoch": 0.36104821872228376, "grad_norm": 0.6481689214706421, "learning_rate": 2.2173205278618247e-05, "loss": 0.0851, "step": 16385 }, { "epoch": 0.3610702540117999, "grad_norm": 0.8254814147949219, "learning_rate": 2.2172265069737153e-05, "loss": 0.0809, "step": 16386 }, { "epoch": 0.36109228930131604, "grad_norm": 1.0239639282226562, "learning_rate": 2.217132482432387e-05, "loss": 0.1283, "step": 16387 }, { "epoch": 0.3611143245908322, "grad_norm": 0.62956303358078, "learning_rate": 2.217038454238318e-05, "loss": 0.0776, "step": 16388 }, { "epoch": 0.36113635988034837, "grad_norm": 0.6439211368560791, "learning_rate": 2.216944422391988e-05, "loss": 0.0973, "step": 16389 }, { "epoch": 0.36115839516986453, "grad_norm": 0.49931472539901733, "learning_rate": 2.2168503868938752e-05, "loss": 0.0855, "step": 16390 }, { "epoch": 0.3611804304593807, "grad_norm": 1.1707898378372192, "learning_rate": 2.2167563477444595e-05, "loss": 0.0915, "step": 16391 }, { "epoch": 0.36120246574889686, "grad_norm": 0.7126749753952026, "learning_rate": 2.216662304944219e-05, "loss": 0.0997, "step": 16392 }, { "epoch": 0.361224501038413, "grad_norm": 0.6038623452186584, "learning_rate": 2.216568258493633e-05, "loss": 0.0616, "step": 16393 }, { "epoch": 0.3612465363279292, "grad_norm": 0.6652736663818359, "learning_rate": 2.2164742083931807e-05, "loss": 0.1146, "step": 16394 }, { "epoch": 0.36126857161744536, "grad_norm": 0.7811381816864014, "learning_rate": 2.216380154643341e-05, "loss": 0.1318, "step": 16395 }, { "epoch": 0.3612906069069615, "grad_norm": 0.7056403160095215, "learning_rate": 2.216286097244593e-05, "loss": 0.1096, "step": 16396 }, { "epoch": 0.3613126421964777, "grad_norm": 0.6582862138748169, "learning_rate": 2.2161920361974163e-05, "loss": 0.0995, "step": 16397 }, { "epoch": 0.3613346774859938, "grad_norm": 1.0788826942443848, "learning_rate": 2.216097971502289e-05, "loss": 0.1333, "step": 16398 }, { "epoch": 0.36135671277550996, "grad_norm": 0.8037044405937195, "learning_rate": 2.216003903159691e-05, "loss": 0.0911, "step": 16399 }, { "epoch": 0.3613787480650261, "grad_norm": 0.728580117225647, "learning_rate": 2.2159098311701007e-05, "loss": 0.1121, "step": 16400 }, { "epoch": 0.3614007833545423, "grad_norm": 0.5380464792251587, "learning_rate": 2.215815755533998e-05, "loss": 0.0807, "step": 16401 }, { "epoch": 0.36142281864405845, "grad_norm": 0.8773146271705627, "learning_rate": 2.215721676251862e-05, "loss": 0.1081, "step": 16402 }, { "epoch": 0.3614448539335746, "grad_norm": 0.9368146061897278, "learning_rate": 2.2156275933241716e-05, "loss": 0.072, "step": 16403 }, { "epoch": 0.3614668892230908, "grad_norm": 1.0602983236312866, "learning_rate": 2.215533506751406e-05, "loss": 0.0893, "step": 16404 }, { "epoch": 0.36148892451260695, "grad_norm": 0.42030808329582214, "learning_rate": 2.2154394165340444e-05, "loss": 0.0751, "step": 16405 }, { "epoch": 0.3615109598021231, "grad_norm": 0.6377501487731934, "learning_rate": 2.2153453226725667e-05, "loss": 0.1337, "step": 16406 }, { "epoch": 0.3615329950916393, "grad_norm": 0.7428511381149292, "learning_rate": 2.215251225167451e-05, "loss": 0.0992, "step": 16407 }, { "epoch": 0.36155503038115544, "grad_norm": 0.867110550403595, "learning_rate": 2.215157124019178e-05, "loss": 0.0674, "step": 16408 }, { "epoch": 0.3615770656706716, "grad_norm": 0.5292559862136841, "learning_rate": 2.215063019228226e-05, "loss": 0.0714, "step": 16409 }, { "epoch": 0.3615991009601877, "grad_norm": 0.6571993827819824, "learning_rate": 2.2149689107950743e-05, "loss": 0.0836, "step": 16410 }, { "epoch": 0.3616211362497039, "grad_norm": 0.8129145503044128, "learning_rate": 2.2148747987202025e-05, "loss": 0.0843, "step": 16411 }, { "epoch": 0.36164317153922004, "grad_norm": 0.8625961542129517, "learning_rate": 2.2147806830040905e-05, "loss": 0.1068, "step": 16412 }, { "epoch": 0.3616652068287362, "grad_norm": 0.7125381231307983, "learning_rate": 2.2146865636472168e-05, "loss": 0.0989, "step": 16413 }, { "epoch": 0.3616872421182524, "grad_norm": 0.9679234623908997, "learning_rate": 2.214592440650062e-05, "loss": 0.0952, "step": 16414 }, { "epoch": 0.36170927740776854, "grad_norm": 0.7565062046051025, "learning_rate": 2.2144983140131036e-05, "loss": 0.0918, "step": 16415 }, { "epoch": 0.3617313126972847, "grad_norm": 0.9116647243499756, "learning_rate": 2.2144041837368226e-05, "loss": 0.0743, "step": 16416 }, { "epoch": 0.36175334798680087, "grad_norm": 0.823168158531189, "learning_rate": 2.2143100498216978e-05, "loss": 0.0901, "step": 16417 }, { "epoch": 0.36177538327631703, "grad_norm": 0.6730746030807495, "learning_rate": 2.214215912268209e-05, "loss": 0.092, "step": 16418 }, { "epoch": 0.3617974185658332, "grad_norm": 0.6015874147415161, "learning_rate": 2.2141217710768355e-05, "loss": 0.0739, "step": 16419 }, { "epoch": 0.36181945385534936, "grad_norm": 0.6133683323860168, "learning_rate": 2.2140276262480572e-05, "loss": 0.0989, "step": 16420 }, { "epoch": 0.3618414891448655, "grad_norm": 0.9295287728309631, "learning_rate": 2.2139334777823535e-05, "loss": 0.13, "step": 16421 }, { "epoch": 0.3618635244343817, "grad_norm": 0.8261310458183289, "learning_rate": 2.2138393256802028e-05, "loss": 0.0984, "step": 16422 }, { "epoch": 0.3618855597238978, "grad_norm": 0.52657151222229, "learning_rate": 2.2137451699420867e-05, "loss": 0.0954, "step": 16423 }, { "epoch": 0.36190759501341396, "grad_norm": 0.7175430059432983, "learning_rate": 2.213651010568483e-05, "loss": 0.0943, "step": 16424 }, { "epoch": 0.3619296303029301, "grad_norm": 0.7390445470809937, "learning_rate": 2.213556847559872e-05, "loss": 0.078, "step": 16425 }, { "epoch": 0.3619516655924463, "grad_norm": 0.7681275010108948, "learning_rate": 2.2134626809167336e-05, "loss": 0.1085, "step": 16426 }, { "epoch": 0.36197370088196246, "grad_norm": 0.7535334825515747, "learning_rate": 2.2133685106395473e-05, "loss": 0.0619, "step": 16427 }, { "epoch": 0.3619957361714786, "grad_norm": 0.8804960250854492, "learning_rate": 2.2132743367287926e-05, "loss": 0.1105, "step": 16428 }, { "epoch": 0.3620177714609948, "grad_norm": 0.7329438924789429, "learning_rate": 2.2131801591849493e-05, "loss": 0.0743, "step": 16429 }, { "epoch": 0.36203980675051095, "grad_norm": 0.7044264674186707, "learning_rate": 2.2130859780084964e-05, "loss": 0.0615, "step": 16430 }, { "epoch": 0.3620618420400271, "grad_norm": 0.9377428889274597, "learning_rate": 2.2129917931999147e-05, "loss": 0.0692, "step": 16431 }, { "epoch": 0.3620838773295433, "grad_norm": 0.23379719257354736, "learning_rate": 2.2128976047596833e-05, "loss": 0.0543, "step": 16432 }, { "epoch": 0.36210591261905944, "grad_norm": 1.0012508630752563, "learning_rate": 2.212803412688282e-05, "loss": 0.078, "step": 16433 }, { "epoch": 0.3621279479085756, "grad_norm": 0.7508058547973633, "learning_rate": 2.212709216986191e-05, "loss": 0.0936, "step": 16434 }, { "epoch": 0.3621499831980917, "grad_norm": 0.6075186729431152, "learning_rate": 2.2126150176538896e-05, "loss": 0.0875, "step": 16435 }, { "epoch": 0.3621720184876079, "grad_norm": 0.5648573040962219, "learning_rate": 2.212520814691858e-05, "loss": 0.1261, "step": 16436 }, { "epoch": 0.36219405377712405, "grad_norm": 0.5895106196403503, "learning_rate": 2.2124266081005757e-05, "loss": 0.1128, "step": 16437 }, { "epoch": 0.3622160890666402, "grad_norm": 0.7343639731407166, "learning_rate": 2.2123323978805224e-05, "loss": 0.1173, "step": 16438 }, { "epoch": 0.3622381243561564, "grad_norm": 0.7754695415496826, "learning_rate": 2.2122381840321785e-05, "loss": 0.0705, "step": 16439 }, { "epoch": 0.36226015964567254, "grad_norm": 0.6317604184150696, "learning_rate": 2.2121439665560236e-05, "loss": 0.0781, "step": 16440 }, { "epoch": 0.3622821949351887, "grad_norm": 0.9181004762649536, "learning_rate": 2.2120497454525375e-05, "loss": 0.1275, "step": 16441 }, { "epoch": 0.36230423022470487, "grad_norm": 0.8332899212837219, "learning_rate": 2.2119555207222e-05, "loss": 0.0887, "step": 16442 }, { "epoch": 0.36232626551422104, "grad_norm": 0.5161581039428711, "learning_rate": 2.2118612923654912e-05, "loss": 0.0887, "step": 16443 }, { "epoch": 0.3623483008037372, "grad_norm": 0.8203938007354736, "learning_rate": 2.2117670603828915e-05, "loss": 0.0627, "step": 16444 }, { "epoch": 0.36237033609325336, "grad_norm": 0.6861408352851868, "learning_rate": 2.2116728247748803e-05, "loss": 0.1189, "step": 16445 }, { "epoch": 0.36239237138276953, "grad_norm": 1.2868201732635498, "learning_rate": 2.211578585541938e-05, "loss": 0.1258, "step": 16446 }, { "epoch": 0.36241440667228564, "grad_norm": 0.5412918925285339, "learning_rate": 2.211484342684544e-05, "loss": 0.0789, "step": 16447 }, { "epoch": 0.3624364419618018, "grad_norm": 0.7375284433364868, "learning_rate": 2.211390096203179e-05, "loss": 0.1245, "step": 16448 }, { "epoch": 0.36245847725131797, "grad_norm": 0.7370436191558838, "learning_rate": 2.2112958460983226e-05, "loss": 0.0946, "step": 16449 }, { "epoch": 0.36248051254083413, "grad_norm": 1.3116010427474976, "learning_rate": 2.2112015923704552e-05, "loss": 0.1035, "step": 16450 }, { "epoch": 0.3625025478303503, "grad_norm": 0.6818839311599731, "learning_rate": 2.2111073350200563e-05, "loss": 0.0993, "step": 16451 }, { "epoch": 0.36252458311986646, "grad_norm": 0.8071311116218567, "learning_rate": 2.2110130740476073e-05, "loss": 0.0823, "step": 16452 }, { "epoch": 0.3625466184093826, "grad_norm": 0.8824349641799927, "learning_rate": 2.2109188094535865e-05, "loss": 0.1065, "step": 16453 }, { "epoch": 0.3625686536988988, "grad_norm": 0.7597813010215759, "learning_rate": 2.210824541238476e-05, "loss": 0.0691, "step": 16454 }, { "epoch": 0.36259068898841496, "grad_norm": 0.4997781813144684, "learning_rate": 2.2107302694027538e-05, "loss": 0.0685, "step": 16455 }, { "epoch": 0.3626127242779311, "grad_norm": 1.1076425313949585, "learning_rate": 2.2106359939469016e-05, "loss": 0.0811, "step": 16456 }, { "epoch": 0.3626347595674473, "grad_norm": 0.7812309265136719, "learning_rate": 2.2105417148713995e-05, "loss": 0.0808, "step": 16457 }, { "epoch": 0.36265679485696345, "grad_norm": 0.6570700407028198, "learning_rate": 2.2104474321767275e-05, "loss": 0.0816, "step": 16458 }, { "epoch": 0.3626788301464796, "grad_norm": 0.9178552627563477, "learning_rate": 2.210353145863365e-05, "loss": 0.0878, "step": 16459 }, { "epoch": 0.3627008654359957, "grad_norm": 0.819760799407959, "learning_rate": 2.2102588559317937e-05, "loss": 0.1146, "step": 16460 }, { "epoch": 0.3627229007255119, "grad_norm": 0.7718576192855835, "learning_rate": 2.210164562382493e-05, "loss": 0.1142, "step": 16461 }, { "epoch": 0.36274493601502805, "grad_norm": 0.5408479571342468, "learning_rate": 2.2100702652159434e-05, "loss": 0.0833, "step": 16462 }, { "epoch": 0.3627669713045442, "grad_norm": 0.6869649291038513, "learning_rate": 2.209975964432625e-05, "loss": 0.0997, "step": 16463 }, { "epoch": 0.3627890065940604, "grad_norm": 0.8271881937980652, "learning_rate": 2.2098816600330186e-05, "loss": 0.0994, "step": 16464 }, { "epoch": 0.36281104188357655, "grad_norm": 0.6237945556640625, "learning_rate": 2.209787352017604e-05, "loss": 0.0657, "step": 16465 }, { "epoch": 0.3628330771730927, "grad_norm": 1.215325117111206, "learning_rate": 2.2096930403868617e-05, "loss": 0.0964, "step": 16466 }, { "epoch": 0.3628551124626089, "grad_norm": 0.7317004203796387, "learning_rate": 2.2095987251412722e-05, "loss": 0.1174, "step": 16467 }, { "epoch": 0.36287714775212504, "grad_norm": 0.4005925953388214, "learning_rate": 2.2095044062813163e-05, "loss": 0.0552, "step": 16468 }, { "epoch": 0.3628991830416412, "grad_norm": 0.5280052423477173, "learning_rate": 2.2094100838074736e-05, "loss": 0.0718, "step": 16469 }, { "epoch": 0.36292121833115737, "grad_norm": 0.7957727313041687, "learning_rate": 2.209315757720225e-05, "loss": 0.0822, "step": 16470 }, { "epoch": 0.36294325362067353, "grad_norm": 0.6579598188400269, "learning_rate": 2.2092214280200508e-05, "loss": 0.0696, "step": 16471 }, { "epoch": 0.36296528891018964, "grad_norm": 0.8155558705329895, "learning_rate": 2.2091270947074316e-05, "loss": 0.094, "step": 16472 }, { "epoch": 0.3629873241997058, "grad_norm": 0.488896906375885, "learning_rate": 2.209032757782848e-05, "loss": 0.0603, "step": 16473 }, { "epoch": 0.36300935948922197, "grad_norm": 0.6994946599006653, "learning_rate": 2.20893841724678e-05, "loss": 0.0923, "step": 16474 }, { "epoch": 0.36303139477873814, "grad_norm": 0.5665867328643799, "learning_rate": 2.208844073099709e-05, "loss": 0.1062, "step": 16475 }, { "epoch": 0.3630534300682543, "grad_norm": 0.9173603653907776, "learning_rate": 2.2087497253421148e-05, "loss": 0.0692, "step": 16476 }, { "epoch": 0.36307546535777047, "grad_norm": 0.510504424571991, "learning_rate": 2.2086553739744784e-05, "loss": 0.0807, "step": 16477 }, { "epoch": 0.36309750064728663, "grad_norm": 0.6514605283737183, "learning_rate": 2.2085610189972795e-05, "loss": 0.0936, "step": 16478 }, { "epoch": 0.3631195359368028, "grad_norm": 0.4223494827747345, "learning_rate": 2.208466660411e-05, "loss": 0.0782, "step": 16479 }, { "epoch": 0.36314157122631896, "grad_norm": 1.5174134969711304, "learning_rate": 2.208372298216119e-05, "loss": 0.1304, "step": 16480 }, { "epoch": 0.3631636065158351, "grad_norm": 0.7863543033599854, "learning_rate": 2.208277932413119e-05, "loss": 0.1033, "step": 16481 }, { "epoch": 0.3631856418053513, "grad_norm": 0.7655673623085022, "learning_rate": 2.2081835630024792e-05, "loss": 0.1018, "step": 16482 }, { "epoch": 0.36320767709486745, "grad_norm": 0.441171258687973, "learning_rate": 2.2080891899846813e-05, "loss": 0.0875, "step": 16483 }, { "epoch": 0.36322971238438356, "grad_norm": 0.7596501708030701, "learning_rate": 2.207994813360205e-05, "loss": 0.1014, "step": 16484 }, { "epoch": 0.3632517476738997, "grad_norm": 1.102195143699646, "learning_rate": 2.2079004331295315e-05, "loss": 0.0945, "step": 16485 }, { "epoch": 0.3632737829634159, "grad_norm": 0.6161078810691833, "learning_rate": 2.2078060492931415e-05, "loss": 0.0958, "step": 16486 }, { "epoch": 0.36329581825293206, "grad_norm": 0.7830296158790588, "learning_rate": 2.2077116618515157e-05, "loss": 0.0869, "step": 16487 }, { "epoch": 0.3633178535424482, "grad_norm": 0.6641445755958557, "learning_rate": 2.207617270805135e-05, "loss": 0.074, "step": 16488 }, { "epoch": 0.3633398888319644, "grad_norm": 0.9723119139671326, "learning_rate": 2.2075228761544797e-05, "loss": 0.124, "step": 16489 }, { "epoch": 0.36336192412148055, "grad_norm": 0.6732895374298096, "learning_rate": 2.2074284779000314e-05, "loss": 0.0871, "step": 16490 }, { "epoch": 0.3633839594109967, "grad_norm": 0.7100324034690857, "learning_rate": 2.2073340760422707e-05, "loss": 0.1079, "step": 16491 }, { "epoch": 0.3634059947005129, "grad_norm": 0.4597442150115967, "learning_rate": 2.207239670581678e-05, "loss": 0.0746, "step": 16492 }, { "epoch": 0.36342802999002904, "grad_norm": 0.8180698156356812, "learning_rate": 2.2071452615187343e-05, "loss": 0.097, "step": 16493 }, { "epoch": 0.3634500652795452, "grad_norm": 0.9305778741836548, "learning_rate": 2.207050848853921e-05, "loss": 0.1061, "step": 16494 }, { "epoch": 0.3634721005690614, "grad_norm": 0.6347110867500305, "learning_rate": 2.206956432587718e-05, "loss": 0.116, "step": 16495 }, { "epoch": 0.36349413585857754, "grad_norm": 0.8006847500801086, "learning_rate": 2.2068620127206072e-05, "loss": 0.1337, "step": 16496 }, { "epoch": 0.36351617114809365, "grad_norm": 0.8056777715682983, "learning_rate": 2.2067675892530685e-05, "loss": 0.0777, "step": 16497 }, { "epoch": 0.3635382064376098, "grad_norm": 1.0185188055038452, "learning_rate": 2.206673162185584e-05, "loss": 0.0939, "step": 16498 }, { "epoch": 0.363560241727126, "grad_norm": 2.0509626865386963, "learning_rate": 2.2065787315186346e-05, "loss": 0.0986, "step": 16499 }, { "epoch": 0.36358227701664214, "grad_norm": 0.599897027015686, "learning_rate": 2.2064842972527e-05, "loss": 0.1003, "step": 16500 }, { "epoch": 0.3636043123061583, "grad_norm": 0.7670013904571533, "learning_rate": 2.206389859388262e-05, "loss": 0.1481, "step": 16501 }, { "epoch": 0.36362634759567447, "grad_norm": 1.2917739152908325, "learning_rate": 2.2062954179258023e-05, "loss": 0.1138, "step": 16502 }, { "epoch": 0.36364838288519064, "grad_norm": 0.6597457528114319, "learning_rate": 2.2062009728658007e-05, "loss": 0.1264, "step": 16503 }, { "epoch": 0.3636704181747068, "grad_norm": 0.5909834504127502, "learning_rate": 2.2061065242087395e-05, "loss": 0.1114, "step": 16504 }, { "epoch": 0.36369245346422296, "grad_norm": 1.0538803339004517, "learning_rate": 2.2060120719550986e-05, "loss": 0.1253, "step": 16505 }, { "epoch": 0.36371448875373913, "grad_norm": 0.5347208976745605, "learning_rate": 2.2059176161053596e-05, "loss": 0.0689, "step": 16506 }, { "epoch": 0.3637365240432553, "grad_norm": 0.8854671716690063, "learning_rate": 2.2058231566600037e-05, "loss": 0.0867, "step": 16507 }, { "epoch": 0.36375855933277146, "grad_norm": 0.6045030355453491, "learning_rate": 2.2057286936195123e-05, "loss": 0.0838, "step": 16508 }, { "epoch": 0.36378059462228757, "grad_norm": 0.7557721138000488, "learning_rate": 2.205634226984366e-05, "loss": 0.0863, "step": 16509 }, { "epoch": 0.36380262991180373, "grad_norm": 1.4651756286621094, "learning_rate": 2.2055397567550463e-05, "loss": 0.1143, "step": 16510 }, { "epoch": 0.3638246652013199, "grad_norm": 0.4019348919391632, "learning_rate": 2.205445282932034e-05, "loss": 0.0791, "step": 16511 }, { "epoch": 0.36384670049083606, "grad_norm": 1.294732689857483, "learning_rate": 2.2053508055158105e-05, "loss": 0.0848, "step": 16512 }, { "epoch": 0.3638687357803522, "grad_norm": 0.7003150582313538, "learning_rate": 2.2052563245068575e-05, "loss": 0.0648, "step": 16513 }, { "epoch": 0.3638907710698684, "grad_norm": 0.5680519938468933, "learning_rate": 2.205161839905655e-05, "loss": 0.0834, "step": 16514 }, { "epoch": 0.36391280635938456, "grad_norm": 0.8232942223548889, "learning_rate": 2.2050673517126864e-05, "loss": 0.081, "step": 16515 }, { "epoch": 0.3639348416489007, "grad_norm": 0.643495500087738, "learning_rate": 2.204972859928431e-05, "loss": 0.0639, "step": 16516 }, { "epoch": 0.3639568769384169, "grad_norm": 0.7391760945320129, "learning_rate": 2.204878364553371e-05, "loss": 0.1201, "step": 16517 }, { "epoch": 0.36397891222793305, "grad_norm": 0.747083306312561, "learning_rate": 2.2047838655879874e-05, "loss": 0.0748, "step": 16518 }, { "epoch": 0.3640009475174492, "grad_norm": 0.45950162410736084, "learning_rate": 2.2046893630327616e-05, "loss": 0.0683, "step": 16519 }, { "epoch": 0.3640229828069654, "grad_norm": 0.8760620355606079, "learning_rate": 2.204594856888175e-05, "loss": 0.0814, "step": 16520 }, { "epoch": 0.3640450180964815, "grad_norm": 1.1500517129898071, "learning_rate": 2.2045003471547092e-05, "loss": 0.1489, "step": 16521 }, { "epoch": 0.36406705338599765, "grad_norm": 0.430462509393692, "learning_rate": 2.2044058338328445e-05, "loss": 0.0664, "step": 16522 }, { "epoch": 0.3640890886755138, "grad_norm": 0.5382892489433289, "learning_rate": 2.2043113169230642e-05, "loss": 0.1024, "step": 16523 }, { "epoch": 0.36411112396503, "grad_norm": 0.8161360025405884, "learning_rate": 2.2042167964258482e-05, "loss": 0.0907, "step": 16524 }, { "epoch": 0.36413315925454615, "grad_norm": 0.46920084953308105, "learning_rate": 2.2041222723416788e-05, "loss": 0.0934, "step": 16525 }, { "epoch": 0.3641551945440623, "grad_norm": 0.7488927841186523, "learning_rate": 2.2040277446710364e-05, "loss": 0.0714, "step": 16526 }, { "epoch": 0.3641772298335785, "grad_norm": 0.43216025829315186, "learning_rate": 2.203933213414404e-05, "loss": 0.0784, "step": 16527 }, { "epoch": 0.36419926512309464, "grad_norm": 0.709998369216919, "learning_rate": 2.2038386785722616e-05, "loss": 0.0725, "step": 16528 }, { "epoch": 0.3642213004126108, "grad_norm": 0.8508041501045227, "learning_rate": 2.2037441401450915e-05, "loss": 0.1023, "step": 16529 }, { "epoch": 0.36424333570212697, "grad_norm": 0.633176863193512, "learning_rate": 2.203649598133375e-05, "loss": 0.1003, "step": 16530 }, { "epoch": 0.36426537099164313, "grad_norm": 0.9057834148406982, "learning_rate": 2.2035550525375942e-05, "loss": 0.1031, "step": 16531 }, { "epoch": 0.3642874062811593, "grad_norm": 0.5838632583618164, "learning_rate": 2.20346050335823e-05, "loss": 0.0712, "step": 16532 }, { "epoch": 0.36430944157067546, "grad_norm": 0.7747031450271606, "learning_rate": 2.2033659505957645e-05, "loss": 0.1199, "step": 16533 }, { "epoch": 0.36433147686019157, "grad_norm": 0.5108795762062073, "learning_rate": 2.2032713942506787e-05, "loss": 0.0947, "step": 16534 }, { "epoch": 0.36435351214970774, "grad_norm": 0.7264218926429749, "learning_rate": 2.2031768343234542e-05, "loss": 0.0932, "step": 16535 }, { "epoch": 0.3643755474392239, "grad_norm": 0.5753141045570374, "learning_rate": 2.2030822708145734e-05, "loss": 0.0805, "step": 16536 }, { "epoch": 0.36439758272874007, "grad_norm": 0.7668614983558655, "learning_rate": 2.2029877037245178e-05, "loss": 0.0765, "step": 16537 }, { "epoch": 0.36441961801825623, "grad_norm": 0.5932404398918152, "learning_rate": 2.2028931330537687e-05, "loss": 0.0753, "step": 16538 }, { "epoch": 0.3644416533077724, "grad_norm": 0.901710569858551, "learning_rate": 2.202798558802808e-05, "loss": 0.1126, "step": 16539 }, { "epoch": 0.36446368859728856, "grad_norm": 0.4095197319984436, "learning_rate": 2.2027039809721174e-05, "loss": 0.115, "step": 16540 }, { "epoch": 0.3644857238868047, "grad_norm": 0.7637404799461365, "learning_rate": 2.2026093995621784e-05, "loss": 0.1318, "step": 16541 }, { "epoch": 0.3645077591763209, "grad_norm": 0.8064156174659729, "learning_rate": 2.2025148145734734e-05, "loss": 0.0882, "step": 16542 }, { "epoch": 0.36452979446583705, "grad_norm": 0.6846227645874023, "learning_rate": 2.202420226006483e-05, "loss": 0.0943, "step": 16543 }, { "epoch": 0.3645518297553532, "grad_norm": 1.0353673696517944, "learning_rate": 2.2023256338616904e-05, "loss": 0.1103, "step": 16544 }, { "epoch": 0.3645738650448694, "grad_norm": 0.7325056791305542, "learning_rate": 2.2022310381395763e-05, "loss": 0.0891, "step": 16545 }, { "epoch": 0.3645959003343855, "grad_norm": 0.6332017183303833, "learning_rate": 2.202136438840623e-05, "loss": 0.0794, "step": 16546 }, { "epoch": 0.36461793562390166, "grad_norm": 1.0883122682571411, "learning_rate": 2.2020418359653126e-05, "loss": 0.1325, "step": 16547 }, { "epoch": 0.3646399709134178, "grad_norm": 0.7399710416793823, "learning_rate": 2.2019472295141266e-05, "loss": 0.0843, "step": 16548 }, { "epoch": 0.364662006202934, "grad_norm": 0.6723195314407349, "learning_rate": 2.2018526194875465e-05, "loss": 0.0648, "step": 16549 }, { "epoch": 0.36468404149245015, "grad_norm": 0.49709513783454895, "learning_rate": 2.2017580058860553e-05, "loss": 0.0626, "step": 16550 }, { "epoch": 0.3647060767819663, "grad_norm": 0.5356273651123047, "learning_rate": 2.2016633887101335e-05, "loss": 0.0851, "step": 16551 }, { "epoch": 0.3647281120714825, "grad_norm": 0.905582070350647, "learning_rate": 2.2015687679602643e-05, "loss": 0.0943, "step": 16552 }, { "epoch": 0.36475014736099864, "grad_norm": 0.693619966506958, "learning_rate": 2.201474143636929e-05, "loss": 0.08, "step": 16553 }, { "epoch": 0.3647721826505148, "grad_norm": 0.6080203652381897, "learning_rate": 2.20137951574061e-05, "loss": 0.1056, "step": 16554 }, { "epoch": 0.364794217940031, "grad_norm": 0.5714483261108398, "learning_rate": 2.201284884271789e-05, "loss": 0.1035, "step": 16555 }, { "epoch": 0.36481625322954714, "grad_norm": 0.5314763188362122, "learning_rate": 2.2011902492309475e-05, "loss": 0.0817, "step": 16556 }, { "epoch": 0.3648382885190633, "grad_norm": 0.7090016603469849, "learning_rate": 2.2010956106185686e-05, "loss": 0.0776, "step": 16557 }, { "epoch": 0.36486032380857947, "grad_norm": 0.9976741075515747, "learning_rate": 2.2010009684351334e-05, "loss": 0.1406, "step": 16558 }, { "epoch": 0.3648823590980956, "grad_norm": 0.8666207790374756, "learning_rate": 2.2009063226811247e-05, "loss": 0.0959, "step": 16559 }, { "epoch": 0.36490439438761174, "grad_norm": 0.5832615494728088, "learning_rate": 2.200811673357024e-05, "loss": 0.0898, "step": 16560 }, { "epoch": 0.3649264296771279, "grad_norm": 0.9639356136322021, "learning_rate": 2.2007170204633138e-05, "loss": 0.1281, "step": 16561 }, { "epoch": 0.36494846496664407, "grad_norm": 0.561507523059845, "learning_rate": 2.200622364000476e-05, "loss": 0.106, "step": 16562 }, { "epoch": 0.36497050025616024, "grad_norm": 0.5520572066307068, "learning_rate": 2.2005277039689926e-05, "loss": 0.0981, "step": 16563 }, { "epoch": 0.3649925355456764, "grad_norm": 0.5594615340232849, "learning_rate": 2.2004330403693467e-05, "loss": 0.0794, "step": 16564 }, { "epoch": 0.36501457083519256, "grad_norm": 0.7806246280670166, "learning_rate": 2.200338373202019e-05, "loss": 0.0588, "step": 16565 }, { "epoch": 0.36503660612470873, "grad_norm": 0.6310198903083801, "learning_rate": 2.2002437024674923e-05, "loss": 0.0605, "step": 16566 }, { "epoch": 0.3650586414142249, "grad_norm": 0.9085533618927002, "learning_rate": 2.2001490281662496e-05, "loss": 0.1216, "step": 16567 }, { "epoch": 0.36508067670374106, "grad_norm": 0.6105809807777405, "learning_rate": 2.200054350298772e-05, "loss": 0.0479, "step": 16568 }, { "epoch": 0.3651027119932572, "grad_norm": 0.678043007850647, "learning_rate": 2.1999596688655422e-05, "loss": 0.1211, "step": 16569 }, { "epoch": 0.3651247472827734, "grad_norm": 0.9890684485435486, "learning_rate": 2.1998649838670425e-05, "loss": 0.0854, "step": 16570 }, { "epoch": 0.3651467825722895, "grad_norm": 0.6218820810317993, "learning_rate": 2.1997702953037552e-05, "loss": 0.0683, "step": 16571 }, { "epoch": 0.36516881786180566, "grad_norm": 0.7034907341003418, "learning_rate": 2.1996756031761624e-05, "loss": 0.0908, "step": 16572 }, { "epoch": 0.3651908531513218, "grad_norm": 1.2348743677139282, "learning_rate": 2.199580907484747e-05, "loss": 0.0939, "step": 16573 }, { "epoch": 0.365212888440838, "grad_norm": 1.2708911895751953, "learning_rate": 2.19948620822999e-05, "loss": 0.0849, "step": 16574 }, { "epoch": 0.36523492373035416, "grad_norm": 0.7916923761367798, "learning_rate": 2.1993915054123756e-05, "loss": 0.0959, "step": 16575 }, { "epoch": 0.3652569590198703, "grad_norm": 1.053332805633545, "learning_rate": 2.1992967990323848e-05, "loss": 0.1209, "step": 16576 }, { "epoch": 0.3652789943093865, "grad_norm": 0.6884745359420776, "learning_rate": 2.1992020890905002e-05, "loss": 0.1111, "step": 16577 }, { "epoch": 0.36530102959890265, "grad_norm": 0.7177544236183167, "learning_rate": 2.1991073755872046e-05, "loss": 0.1052, "step": 16578 }, { "epoch": 0.3653230648884188, "grad_norm": 0.7940173149108887, "learning_rate": 2.1990126585229804e-05, "loss": 0.0948, "step": 16579 }, { "epoch": 0.365345100177935, "grad_norm": 0.9906995892524719, "learning_rate": 2.19891793789831e-05, "loss": 0.126, "step": 16580 }, { "epoch": 0.36536713546745114, "grad_norm": 1.405869722366333, "learning_rate": 2.1988232137136753e-05, "loss": 0.0692, "step": 16581 }, { "epoch": 0.3653891707569673, "grad_norm": 0.5822445750236511, "learning_rate": 2.1987284859695597e-05, "loss": 0.081, "step": 16582 }, { "epoch": 0.3654112060464834, "grad_norm": 0.7027645111083984, "learning_rate": 2.1986337546664447e-05, "loss": 0.0891, "step": 16583 }, { "epoch": 0.3654332413359996, "grad_norm": 0.6382482647895813, "learning_rate": 2.1985390198048135e-05, "loss": 0.107, "step": 16584 }, { "epoch": 0.36545527662551575, "grad_norm": 0.7504134774208069, "learning_rate": 2.1984442813851486e-05, "loss": 0.1398, "step": 16585 }, { "epoch": 0.3654773119150319, "grad_norm": 0.8147053122520447, "learning_rate": 2.1983495394079328e-05, "loss": 0.1007, "step": 16586 }, { "epoch": 0.3654993472045481, "grad_norm": 0.5039618611335754, "learning_rate": 2.198254793873648e-05, "loss": 0.0772, "step": 16587 }, { "epoch": 0.36552138249406424, "grad_norm": 0.48935970664024353, "learning_rate": 2.1981600447827772e-05, "loss": 0.0549, "step": 16588 }, { "epoch": 0.3655434177835804, "grad_norm": 0.8209589719772339, "learning_rate": 2.1980652921358028e-05, "loss": 0.1046, "step": 16589 }, { "epoch": 0.36556545307309657, "grad_norm": 0.647800862789154, "learning_rate": 2.1979705359332074e-05, "loss": 0.07, "step": 16590 }, { "epoch": 0.36558748836261273, "grad_norm": 1.0196118354797363, "learning_rate": 2.197875776175474e-05, "loss": 0.1222, "step": 16591 }, { "epoch": 0.3656095236521289, "grad_norm": 0.6065532565116882, "learning_rate": 2.197781012863085e-05, "loss": 0.074, "step": 16592 }, { "epoch": 0.36563155894164506, "grad_norm": 0.6849094033241272, "learning_rate": 2.1976862459965225e-05, "loss": 0.1086, "step": 16593 }, { "epoch": 0.3656535942311612, "grad_norm": 0.8708454966545105, "learning_rate": 2.197591475576271e-05, "loss": 0.0638, "step": 16594 }, { "epoch": 0.3656756295206774, "grad_norm": 0.6769197583198547, "learning_rate": 2.1974967016028115e-05, "loss": 0.0867, "step": 16595 }, { "epoch": 0.3656976648101935, "grad_norm": 1.0473920106887817, "learning_rate": 2.197401924076627e-05, "loss": 0.0891, "step": 16596 }, { "epoch": 0.36571970009970967, "grad_norm": 0.9738056063652039, "learning_rate": 2.1973071429982007e-05, "loss": 0.1209, "step": 16597 }, { "epoch": 0.36574173538922583, "grad_norm": 0.7829124331474304, "learning_rate": 2.1972123583680154e-05, "loss": 0.0783, "step": 16598 }, { "epoch": 0.365763770678742, "grad_norm": 1.5016874074935913, "learning_rate": 2.1971175701865533e-05, "loss": 0.1359, "step": 16599 }, { "epoch": 0.36578580596825816, "grad_norm": 0.5611637234687805, "learning_rate": 2.197022778454298e-05, "loss": 0.0768, "step": 16600 }, { "epoch": 0.3658078412577743, "grad_norm": 0.4170016050338745, "learning_rate": 2.1969279831717313e-05, "loss": 0.0661, "step": 16601 }, { "epoch": 0.3658298765472905, "grad_norm": 0.7802464962005615, "learning_rate": 2.1968331843393375e-05, "loss": 0.1113, "step": 16602 }, { "epoch": 0.36585191183680665, "grad_norm": 0.6548101902008057, "learning_rate": 2.196738381957598e-05, "loss": 0.0798, "step": 16603 }, { "epoch": 0.3658739471263228, "grad_norm": 1.0630470514297485, "learning_rate": 2.1966435760269963e-05, "loss": 0.0875, "step": 16604 }, { "epoch": 0.365895982415839, "grad_norm": 0.48693981766700745, "learning_rate": 2.196548766548016e-05, "loss": 0.0875, "step": 16605 }, { "epoch": 0.36591801770535515, "grad_norm": 0.6726164817810059, "learning_rate": 2.1964539535211384e-05, "loss": 0.0782, "step": 16606 }, { "epoch": 0.3659400529948713, "grad_norm": 0.6163985133171082, "learning_rate": 2.1963591369468477e-05, "loss": 0.0775, "step": 16607 }, { "epoch": 0.3659620882843874, "grad_norm": 0.853410005569458, "learning_rate": 2.1962643168256268e-05, "loss": 0.079, "step": 16608 }, { "epoch": 0.3659841235739036, "grad_norm": 0.6318094730377197, "learning_rate": 2.196169493157958e-05, "loss": 0.0973, "step": 16609 }, { "epoch": 0.36600615886341975, "grad_norm": 0.7737494111061096, "learning_rate": 2.1960746659443248e-05, "loss": 0.1029, "step": 16610 }, { "epoch": 0.3660281941529359, "grad_norm": 0.8699164390563965, "learning_rate": 2.19597983518521e-05, "loss": 0.0959, "step": 16611 }, { "epoch": 0.3660502294424521, "grad_norm": 0.9155798554420471, "learning_rate": 2.1958850008810966e-05, "loss": 0.1247, "step": 16612 }, { "epoch": 0.36607226473196824, "grad_norm": 0.5466079115867615, "learning_rate": 2.1957901630324682e-05, "loss": 0.046, "step": 16613 }, { "epoch": 0.3660943000214844, "grad_norm": 0.39345672726631165, "learning_rate": 2.1956953216398068e-05, "loss": 0.0736, "step": 16614 }, { "epoch": 0.3661163353110006, "grad_norm": 0.7784777283668518, "learning_rate": 2.1956004767035963e-05, "loss": 0.078, "step": 16615 }, { "epoch": 0.36613837060051674, "grad_norm": 0.9250409603118896, "learning_rate": 2.1955056282243195e-05, "loss": 0.0853, "step": 16616 }, { "epoch": 0.3661604058900329, "grad_norm": 0.8368646502494812, "learning_rate": 2.1954107762024598e-05, "loss": 0.1043, "step": 16617 }, { "epoch": 0.36618244117954907, "grad_norm": 0.5419523119926453, "learning_rate": 2.1953159206384995e-05, "loss": 0.0713, "step": 16618 }, { "epoch": 0.36620447646906523, "grad_norm": 0.9613297581672668, "learning_rate": 2.195221061532923e-05, "loss": 0.1063, "step": 16619 }, { "epoch": 0.36622651175858134, "grad_norm": 0.7530114054679871, "learning_rate": 2.1951261988862127e-05, "loss": 0.1076, "step": 16620 }, { "epoch": 0.3662485470480975, "grad_norm": 0.34563082456588745, "learning_rate": 2.195031332698852e-05, "loss": 0.0863, "step": 16621 }, { "epoch": 0.36627058233761367, "grad_norm": 0.8598031401634216, "learning_rate": 2.1949364629713236e-05, "loss": 0.0752, "step": 16622 }, { "epoch": 0.36629261762712984, "grad_norm": 0.80867999792099, "learning_rate": 2.1948415897041114e-05, "loss": 0.0766, "step": 16623 }, { "epoch": 0.366314652916646, "grad_norm": 0.7687203288078308, "learning_rate": 2.1947467128976984e-05, "loss": 0.1135, "step": 16624 }, { "epoch": 0.36633668820616216, "grad_norm": 0.8467620611190796, "learning_rate": 2.1946518325525675e-05, "loss": 0.0947, "step": 16625 }, { "epoch": 0.36635872349567833, "grad_norm": 0.5308418273925781, "learning_rate": 2.194556948669203e-05, "loss": 0.0613, "step": 16626 }, { "epoch": 0.3663807587851945, "grad_norm": 0.6690287590026855, "learning_rate": 2.1944620612480872e-05, "loss": 0.0924, "step": 16627 }, { "epoch": 0.36640279407471066, "grad_norm": 0.9845423698425293, "learning_rate": 2.194367170289704e-05, "loss": 0.1183, "step": 16628 }, { "epoch": 0.3664248293642268, "grad_norm": 0.7356869578361511, "learning_rate": 2.1942722757945355e-05, "loss": 0.0895, "step": 16629 }, { "epoch": 0.366446864653743, "grad_norm": 0.7467507719993591, "learning_rate": 2.194177377763067e-05, "loss": 0.081, "step": 16630 }, { "epoch": 0.36646889994325915, "grad_norm": 0.4571549594402313, "learning_rate": 2.1940824761957802e-05, "loss": 0.0703, "step": 16631 }, { "epoch": 0.3664909352327753, "grad_norm": 0.981342077255249, "learning_rate": 2.1939875710931596e-05, "loss": 0.0775, "step": 16632 }, { "epoch": 0.3665129705222914, "grad_norm": 0.6908963322639465, "learning_rate": 2.1938926624556876e-05, "loss": 0.0957, "step": 16633 }, { "epoch": 0.3665350058118076, "grad_norm": 1.1327145099639893, "learning_rate": 2.193797750283849e-05, "loss": 0.1251, "step": 16634 }, { "epoch": 0.36655704110132376, "grad_norm": 0.6838080883026123, "learning_rate": 2.1937028345781258e-05, "loss": 0.1128, "step": 16635 }, { "epoch": 0.3665790763908399, "grad_norm": 0.5628892183303833, "learning_rate": 2.1936079153390024e-05, "loss": 0.0902, "step": 16636 }, { "epoch": 0.3666011116803561, "grad_norm": 0.6811597943305969, "learning_rate": 2.1935129925669615e-05, "loss": 0.0606, "step": 16637 }, { "epoch": 0.36662314696987225, "grad_norm": 0.6769895553588867, "learning_rate": 2.1934180662624875e-05, "loss": 0.0818, "step": 16638 }, { "epoch": 0.3666451822593884, "grad_norm": 0.5883647203445435, "learning_rate": 2.1933231364260634e-05, "loss": 0.0918, "step": 16639 }, { "epoch": 0.3666672175489046, "grad_norm": 0.4958956241607666, "learning_rate": 2.1932282030581726e-05, "loss": 0.0613, "step": 16640 }, { "epoch": 0.36668925283842074, "grad_norm": 0.8480364680290222, "learning_rate": 2.193133266159299e-05, "loss": 0.0748, "step": 16641 }, { "epoch": 0.3667112881279369, "grad_norm": 0.5947626233100891, "learning_rate": 2.1930383257299257e-05, "loss": 0.1033, "step": 16642 }, { "epoch": 0.36673332341745307, "grad_norm": 1.0393447875976562, "learning_rate": 2.1929433817705367e-05, "loss": 0.0633, "step": 16643 }, { "epoch": 0.36675535870696924, "grad_norm": 0.5878520011901855, "learning_rate": 2.1928484342816156e-05, "loss": 0.0912, "step": 16644 }, { "epoch": 0.36677739399648535, "grad_norm": 0.853912889957428, "learning_rate": 2.192753483263646e-05, "loss": 0.0848, "step": 16645 }, { "epoch": 0.3667994292860015, "grad_norm": 0.5865465402603149, "learning_rate": 2.1926585287171108e-05, "loss": 0.1085, "step": 16646 }, { "epoch": 0.3668214645755177, "grad_norm": 0.6605879068374634, "learning_rate": 2.1925635706424947e-05, "loss": 0.0836, "step": 16647 }, { "epoch": 0.36684349986503384, "grad_norm": 0.7643786668777466, "learning_rate": 2.192468609040281e-05, "loss": 0.1019, "step": 16648 }, { "epoch": 0.36686553515455, "grad_norm": 0.6936604976654053, "learning_rate": 2.192373643910953e-05, "loss": 0.0804, "step": 16649 }, { "epoch": 0.36688757044406617, "grad_norm": 0.7565038204193115, "learning_rate": 2.1922786752549957e-05, "loss": 0.0784, "step": 16650 }, { "epoch": 0.36690960573358233, "grad_norm": 0.9736347198486328, "learning_rate": 2.1921837030728914e-05, "loss": 0.1133, "step": 16651 }, { "epoch": 0.3669316410230985, "grad_norm": 1.2236677408218384, "learning_rate": 2.1920887273651236e-05, "loss": 0.0878, "step": 16652 }, { "epoch": 0.36695367631261466, "grad_norm": 0.7435475587844849, "learning_rate": 2.1919937481321778e-05, "loss": 0.059, "step": 16653 }, { "epoch": 0.3669757116021308, "grad_norm": 1.2952818870544434, "learning_rate": 2.191898765374536e-05, "loss": 0.1371, "step": 16654 }, { "epoch": 0.366997746891647, "grad_norm": 0.6747269034385681, "learning_rate": 2.1918037790926834e-05, "loss": 0.0763, "step": 16655 }, { "epoch": 0.36701978218116316, "grad_norm": 0.5084916949272156, "learning_rate": 2.191708789287102e-05, "loss": 0.0751, "step": 16656 }, { "epoch": 0.36704181747067927, "grad_norm": 0.6158051490783691, "learning_rate": 2.1916137959582782e-05, "loss": 0.0977, "step": 16657 }, { "epoch": 0.36706385276019543, "grad_norm": 0.7372516393661499, "learning_rate": 2.1915187991066937e-05, "loss": 0.114, "step": 16658 }, { "epoch": 0.3670858880497116, "grad_norm": 0.5316491723060608, "learning_rate": 2.1914237987328335e-05, "loss": 0.0763, "step": 16659 }, { "epoch": 0.36710792333922776, "grad_norm": 0.5462693572044373, "learning_rate": 2.1913287948371813e-05, "loss": 0.0512, "step": 16660 }, { "epoch": 0.3671299586287439, "grad_norm": 0.6964894533157349, "learning_rate": 2.19123378742022e-05, "loss": 0.1052, "step": 16661 }, { "epoch": 0.3671519939182601, "grad_norm": 0.8862354159355164, "learning_rate": 2.191138776482435e-05, "loss": 0.0951, "step": 16662 }, { "epoch": 0.36717402920777625, "grad_norm": 0.457444429397583, "learning_rate": 2.1910437620243095e-05, "loss": 0.0545, "step": 16663 }, { "epoch": 0.3671960644972924, "grad_norm": 0.7257418632507324, "learning_rate": 2.1909487440463274e-05, "loss": 0.0723, "step": 16664 }, { "epoch": 0.3672180997868086, "grad_norm": 0.8127177953720093, "learning_rate": 2.190853722548973e-05, "loss": 0.1147, "step": 16665 }, { "epoch": 0.36724013507632475, "grad_norm": 0.5440735816955566, "learning_rate": 2.1907586975327304e-05, "loss": 0.071, "step": 16666 }, { "epoch": 0.3672621703658409, "grad_norm": 0.9441350102424622, "learning_rate": 2.190663668998083e-05, "loss": 0.0777, "step": 16667 }, { "epoch": 0.3672842056553571, "grad_norm": 0.7682231068611145, "learning_rate": 2.1905686369455152e-05, "loss": 0.0934, "step": 16668 }, { "epoch": 0.36730624094487324, "grad_norm": 0.9040311574935913, "learning_rate": 2.1904736013755106e-05, "loss": 0.0629, "step": 16669 }, { "epoch": 0.36732827623438935, "grad_norm": 0.6016098856925964, "learning_rate": 2.1903785622885543e-05, "loss": 0.092, "step": 16670 }, { "epoch": 0.3673503115239055, "grad_norm": 0.9983264803886414, "learning_rate": 2.1902835196851293e-05, "loss": 0.1094, "step": 16671 }, { "epoch": 0.3673723468134217, "grad_norm": 0.8600966334342957, "learning_rate": 2.1901884735657202e-05, "loss": 0.1109, "step": 16672 }, { "epoch": 0.36739438210293784, "grad_norm": 0.9932435154914856, "learning_rate": 2.1900934239308114e-05, "loss": 0.1259, "step": 16673 }, { "epoch": 0.367416417392454, "grad_norm": 0.7776426076889038, "learning_rate": 2.1899983707808866e-05, "loss": 0.0969, "step": 16674 }, { "epoch": 0.3674384526819702, "grad_norm": 0.5776658058166504, "learning_rate": 2.1899033141164304e-05, "loss": 0.1169, "step": 16675 }, { "epoch": 0.36746048797148634, "grad_norm": 0.9823639392852783, "learning_rate": 2.1898082539379265e-05, "loss": 0.078, "step": 16676 }, { "epoch": 0.3674825232610025, "grad_norm": 1.619452953338623, "learning_rate": 2.1897131902458588e-05, "loss": 0.0989, "step": 16677 }, { "epoch": 0.36750455855051867, "grad_norm": 0.6470758318901062, "learning_rate": 2.1896181230407128e-05, "loss": 0.0748, "step": 16678 }, { "epoch": 0.36752659384003483, "grad_norm": 0.8772241473197937, "learning_rate": 2.1895230523229713e-05, "loss": 0.1004, "step": 16679 }, { "epoch": 0.367548629129551, "grad_norm": 0.6845853924751282, "learning_rate": 2.189427978093119e-05, "loss": 0.0828, "step": 16680 }, { "epoch": 0.36757066441906716, "grad_norm": 0.9885940551757812, "learning_rate": 2.1893329003516405e-05, "loss": 0.0905, "step": 16681 }, { "epoch": 0.36759269970858327, "grad_norm": 0.5885224342346191, "learning_rate": 2.1892378190990202e-05, "loss": 0.078, "step": 16682 }, { "epoch": 0.36761473499809943, "grad_norm": 0.3441317677497864, "learning_rate": 2.1891427343357418e-05, "loss": 0.0559, "step": 16683 }, { "epoch": 0.3676367702876156, "grad_norm": 0.9897894859313965, "learning_rate": 2.1890476460622904e-05, "loss": 0.1074, "step": 16684 }, { "epoch": 0.36765880557713176, "grad_norm": 0.7162662148475647, "learning_rate": 2.188952554279149e-05, "loss": 0.0799, "step": 16685 }, { "epoch": 0.36768084086664793, "grad_norm": 0.5921326279640198, "learning_rate": 2.1888574589868033e-05, "loss": 0.0817, "step": 16686 }, { "epoch": 0.3677028761561641, "grad_norm": 0.6462976932525635, "learning_rate": 2.188762360185737e-05, "loss": 0.0597, "step": 16687 }, { "epoch": 0.36772491144568026, "grad_norm": 0.886182427406311, "learning_rate": 2.1886672578764347e-05, "loss": 0.0913, "step": 16688 }, { "epoch": 0.3677469467351964, "grad_norm": 0.7459927201271057, "learning_rate": 2.1885721520593805e-05, "loss": 0.1221, "step": 16689 }, { "epoch": 0.3677689820247126, "grad_norm": 1.0429558753967285, "learning_rate": 2.1884770427350598e-05, "loss": 0.0959, "step": 16690 }, { "epoch": 0.36779101731422875, "grad_norm": 0.5550969243049622, "learning_rate": 2.1883819299039562e-05, "loss": 0.0943, "step": 16691 }, { "epoch": 0.3678130526037449, "grad_norm": 0.6633052229881287, "learning_rate": 2.1882868135665536e-05, "loss": 0.1055, "step": 16692 }, { "epoch": 0.3678350878932611, "grad_norm": 0.6342411637306213, "learning_rate": 2.1881916937233376e-05, "loss": 0.0693, "step": 16693 }, { "epoch": 0.3678571231827772, "grad_norm": 1.059301733970642, "learning_rate": 2.188096570374792e-05, "loss": 0.1021, "step": 16694 }, { "epoch": 0.36787915847229335, "grad_norm": 0.8161386251449585, "learning_rate": 2.188001443521402e-05, "loss": 0.1149, "step": 16695 }, { "epoch": 0.3679011937618095, "grad_norm": 0.8605714440345764, "learning_rate": 2.1879063131636514e-05, "loss": 0.0974, "step": 16696 }, { "epoch": 0.3679232290513257, "grad_norm": 0.383508563041687, "learning_rate": 2.1878111793020255e-05, "loss": 0.082, "step": 16697 }, { "epoch": 0.36794526434084185, "grad_norm": 0.7853299379348755, "learning_rate": 2.187716041937008e-05, "loss": 0.079, "step": 16698 }, { "epoch": 0.367967299630358, "grad_norm": 0.9957591891288757, "learning_rate": 2.187620901069084e-05, "loss": 0.1204, "step": 16699 }, { "epoch": 0.3679893349198742, "grad_norm": 0.5467033982276917, "learning_rate": 2.187525756698738e-05, "loss": 0.0862, "step": 16700 }, { "epoch": 0.36801137020939034, "grad_norm": 0.8069553375244141, "learning_rate": 2.187430608826455e-05, "loss": 0.1087, "step": 16701 }, { "epoch": 0.3680334054989065, "grad_norm": 0.7801413536071777, "learning_rate": 2.1873354574527185e-05, "loss": 0.0826, "step": 16702 }, { "epoch": 0.36805544078842267, "grad_norm": 0.5818250179290771, "learning_rate": 2.1872403025780145e-05, "loss": 0.1084, "step": 16703 }, { "epoch": 0.36807747607793884, "grad_norm": 0.6941260099411011, "learning_rate": 2.187145144202827e-05, "loss": 0.0966, "step": 16704 }, { "epoch": 0.368099511367455, "grad_norm": 0.8687889575958252, "learning_rate": 2.187049982327641e-05, "loss": 0.0929, "step": 16705 }, { "epoch": 0.36812154665697117, "grad_norm": 0.5536600947380066, "learning_rate": 2.186954816952941e-05, "loss": 0.0557, "step": 16706 }, { "epoch": 0.3681435819464873, "grad_norm": 0.978705644607544, "learning_rate": 2.1868596480792115e-05, "loss": 0.0905, "step": 16707 }, { "epoch": 0.36816561723600344, "grad_norm": 0.46176308393478394, "learning_rate": 2.1867644757069375e-05, "loss": 0.0916, "step": 16708 }, { "epoch": 0.3681876525255196, "grad_norm": 0.9151621460914612, "learning_rate": 2.186669299836604e-05, "loss": 0.101, "step": 16709 }, { "epoch": 0.36820968781503577, "grad_norm": 0.8484890460968018, "learning_rate": 2.186574120468695e-05, "loss": 0.105, "step": 16710 }, { "epoch": 0.36823172310455193, "grad_norm": 0.9973131418228149, "learning_rate": 2.1864789376036968e-05, "loss": 0.127, "step": 16711 }, { "epoch": 0.3682537583940681, "grad_norm": 0.7686946392059326, "learning_rate": 2.1863837512420923e-05, "loss": 0.0817, "step": 16712 }, { "epoch": 0.36827579368358426, "grad_norm": 0.49343180656433105, "learning_rate": 2.186288561384368e-05, "loss": 0.0682, "step": 16713 }, { "epoch": 0.3682978289731004, "grad_norm": 0.9882161617279053, "learning_rate": 2.1861933680310075e-05, "loss": 0.1523, "step": 16714 }, { "epoch": 0.3683198642626166, "grad_norm": 1.0371779203414917, "learning_rate": 2.186098171182496e-05, "loss": 0.1008, "step": 16715 }, { "epoch": 0.36834189955213276, "grad_norm": 0.7004093527793884, "learning_rate": 2.186002970839319e-05, "loss": 0.0698, "step": 16716 }, { "epoch": 0.3683639348416489, "grad_norm": 0.9010844230651855, "learning_rate": 2.185907767001961e-05, "loss": 0.0919, "step": 16717 }, { "epoch": 0.3683859701311651, "grad_norm": 0.44788989424705505, "learning_rate": 2.1858125596709072e-05, "loss": 0.0779, "step": 16718 }, { "epoch": 0.3684080054206812, "grad_norm": 0.6506717801094055, "learning_rate": 2.185717348846642e-05, "loss": 0.0685, "step": 16719 }, { "epoch": 0.36843004071019736, "grad_norm": 0.4580387771129608, "learning_rate": 2.1856221345296505e-05, "loss": 0.0697, "step": 16720 }, { "epoch": 0.3684520759997135, "grad_norm": 0.6837247014045715, "learning_rate": 2.1855269167204178e-05, "loss": 0.1049, "step": 16721 }, { "epoch": 0.3684741112892297, "grad_norm": 0.6496801376342773, "learning_rate": 2.1854316954194293e-05, "loss": 0.0714, "step": 16722 }, { "epoch": 0.36849614657874585, "grad_norm": 0.7000447511672974, "learning_rate": 2.1853364706271692e-05, "loss": 0.0882, "step": 16723 }, { "epoch": 0.368518181868262, "grad_norm": 0.8475162386894226, "learning_rate": 2.185241242344123e-05, "loss": 0.0945, "step": 16724 }, { "epoch": 0.3685402171577782, "grad_norm": 1.30948007106781, "learning_rate": 2.185146010570776e-05, "loss": 0.075, "step": 16725 }, { "epoch": 0.36856225244729435, "grad_norm": 0.5743579864501953, "learning_rate": 2.1850507753076125e-05, "loss": 0.1012, "step": 16726 }, { "epoch": 0.3685842877368105, "grad_norm": 0.780731201171875, "learning_rate": 2.1849555365551182e-05, "loss": 0.1194, "step": 16727 }, { "epoch": 0.3686063230263267, "grad_norm": 1.3140536546707153, "learning_rate": 2.1848602943137785e-05, "loss": 0.1138, "step": 16728 }, { "epoch": 0.36862835831584284, "grad_norm": 0.5956394076347351, "learning_rate": 2.1847650485840776e-05, "loss": 0.0805, "step": 16729 }, { "epoch": 0.368650393605359, "grad_norm": 0.3630072772502899, "learning_rate": 2.1846697993665013e-05, "loss": 0.0963, "step": 16730 }, { "epoch": 0.3686724288948751, "grad_norm": 0.5712106823921204, "learning_rate": 2.1845745466615343e-05, "loss": 0.0802, "step": 16731 }, { "epoch": 0.3686944641843913, "grad_norm": 1.3982852697372437, "learning_rate": 2.1844792904696625e-05, "loss": 0.0898, "step": 16732 }, { "epoch": 0.36871649947390744, "grad_norm": 0.7501336336135864, "learning_rate": 2.18438403079137e-05, "loss": 0.0473, "step": 16733 }, { "epoch": 0.3687385347634236, "grad_norm": 0.9234011769294739, "learning_rate": 2.1842887676271435e-05, "loss": 0.0747, "step": 16734 }, { "epoch": 0.3687605700529398, "grad_norm": 0.6887498497962952, "learning_rate": 2.1841935009774666e-05, "loss": 0.1008, "step": 16735 }, { "epoch": 0.36878260534245594, "grad_norm": 0.6161854267120361, "learning_rate": 2.184098230842825e-05, "loss": 0.095, "step": 16736 }, { "epoch": 0.3688046406319721, "grad_norm": 0.7927842736244202, "learning_rate": 2.1840029572237052e-05, "loss": 0.0982, "step": 16737 }, { "epoch": 0.36882667592148827, "grad_norm": 1.160968542098999, "learning_rate": 2.1839076801205914e-05, "loss": 0.1385, "step": 16738 }, { "epoch": 0.36884871121100443, "grad_norm": 0.7744256258010864, "learning_rate": 2.1838123995339688e-05, "loss": 0.0918, "step": 16739 }, { "epoch": 0.3688707465005206, "grad_norm": 0.7521402835845947, "learning_rate": 2.1837171154643228e-05, "loss": 0.0909, "step": 16740 }, { "epoch": 0.36889278179003676, "grad_norm": 0.6219337582588196, "learning_rate": 2.183621827912139e-05, "loss": 0.0698, "step": 16741 }, { "epoch": 0.3689148170795529, "grad_norm": 0.5843852758407593, "learning_rate": 2.1835265368779028e-05, "loss": 0.0709, "step": 16742 }, { "epoch": 0.3689368523690691, "grad_norm": 0.7639670372009277, "learning_rate": 2.1834312423620996e-05, "loss": 0.0795, "step": 16743 }, { "epoch": 0.3689588876585852, "grad_norm": 0.8780233263969421, "learning_rate": 2.183335944365214e-05, "loss": 0.0661, "step": 16744 }, { "epoch": 0.36898092294810136, "grad_norm": 0.6356443166732788, "learning_rate": 2.1832406428877322e-05, "loss": 0.0635, "step": 16745 }, { "epoch": 0.36900295823761753, "grad_norm": 0.8157985210418701, "learning_rate": 2.18314533793014e-05, "loss": 0.1073, "step": 16746 }, { "epoch": 0.3690249935271337, "grad_norm": 0.8740065097808838, "learning_rate": 2.183050029492922e-05, "loss": 0.1043, "step": 16747 }, { "epoch": 0.36904702881664986, "grad_norm": 1.277519941329956, "learning_rate": 2.1829547175765635e-05, "loss": 0.1339, "step": 16748 }, { "epoch": 0.369069064106166, "grad_norm": 0.7712857723236084, "learning_rate": 2.1828594021815506e-05, "loss": 0.0789, "step": 16749 }, { "epoch": 0.3690910993956822, "grad_norm": 0.9595075845718384, "learning_rate": 2.1827640833083687e-05, "loss": 0.0944, "step": 16750 }, { "epoch": 0.36911313468519835, "grad_norm": 0.5236776471138, "learning_rate": 2.182668760957503e-05, "loss": 0.0724, "step": 16751 }, { "epoch": 0.3691351699747145, "grad_norm": 0.7507544755935669, "learning_rate": 2.1825734351294392e-05, "loss": 0.1027, "step": 16752 }, { "epoch": 0.3691572052642307, "grad_norm": 0.797504723072052, "learning_rate": 2.182478105824663e-05, "loss": 0.088, "step": 16753 }, { "epoch": 0.36917924055374685, "grad_norm": 0.8991349935531616, "learning_rate": 2.1823827730436602e-05, "loss": 0.0784, "step": 16754 }, { "epoch": 0.369201275843263, "grad_norm": 0.7240842580795288, "learning_rate": 2.1822874367869156e-05, "loss": 0.0617, "step": 16755 }, { "epoch": 0.3692233111327791, "grad_norm": 1.2792093753814697, "learning_rate": 2.182192097054915e-05, "loss": 0.111, "step": 16756 }, { "epoch": 0.3692453464222953, "grad_norm": 0.7702672481536865, "learning_rate": 2.1820967538481446e-05, "loss": 0.0875, "step": 16757 }, { "epoch": 0.36926738171181145, "grad_norm": 0.8647765517234802, "learning_rate": 2.1820014071670896e-05, "loss": 0.106, "step": 16758 }, { "epoch": 0.3692894170013276, "grad_norm": 0.6952564120292664, "learning_rate": 2.1819060570122352e-05, "loss": 0.1159, "step": 16759 }, { "epoch": 0.3693114522908438, "grad_norm": 0.6899006962776184, "learning_rate": 2.1818107033840687e-05, "loss": 0.0597, "step": 16760 }, { "epoch": 0.36933348758035994, "grad_norm": 0.7125051021575928, "learning_rate": 2.181715346283074e-05, "loss": 0.0718, "step": 16761 }, { "epoch": 0.3693555228698761, "grad_norm": 0.7856823205947876, "learning_rate": 2.1816199857097373e-05, "loss": 0.0785, "step": 16762 }, { "epoch": 0.36937755815939227, "grad_norm": 0.6843078136444092, "learning_rate": 2.1815246216645447e-05, "loss": 0.1006, "step": 16763 }, { "epoch": 0.36939959344890844, "grad_norm": 0.5001823902130127, "learning_rate": 2.181429254147982e-05, "loss": 0.0707, "step": 16764 }, { "epoch": 0.3694216287384246, "grad_norm": 0.5877296328544617, "learning_rate": 2.1813338831605344e-05, "loss": 0.0855, "step": 16765 }, { "epoch": 0.36944366402794077, "grad_norm": 0.8481168150901794, "learning_rate": 2.181238508702688e-05, "loss": 0.0588, "step": 16766 }, { "epoch": 0.36946569931745693, "grad_norm": 1.1179264783859253, "learning_rate": 2.1811431307749283e-05, "loss": 0.0888, "step": 16767 }, { "epoch": 0.3694877346069731, "grad_norm": 0.6045681834220886, "learning_rate": 2.1810477493777417e-05, "loss": 0.0949, "step": 16768 }, { "epoch": 0.3695097698964892, "grad_norm": 0.6771760582923889, "learning_rate": 2.1809523645116137e-05, "loss": 0.1376, "step": 16769 }, { "epoch": 0.36953180518600537, "grad_norm": 0.6114033460617065, "learning_rate": 2.1808569761770304e-05, "loss": 0.0859, "step": 16770 }, { "epoch": 0.36955384047552153, "grad_norm": 0.7445707321166992, "learning_rate": 2.180761584374477e-05, "loss": 0.0765, "step": 16771 }, { "epoch": 0.3695758757650377, "grad_norm": 0.8379455208778381, "learning_rate": 2.18066618910444e-05, "loss": 0.067, "step": 16772 }, { "epoch": 0.36959791105455386, "grad_norm": 0.7984756231307983, "learning_rate": 2.180570790367405e-05, "loss": 0.0904, "step": 16773 }, { "epoch": 0.36961994634407, "grad_norm": 0.6239497065544128, "learning_rate": 2.180475388163858e-05, "loss": 0.0762, "step": 16774 }, { "epoch": 0.3696419816335862, "grad_norm": 0.9046334028244019, "learning_rate": 2.1803799824942846e-05, "loss": 0.0965, "step": 16775 }, { "epoch": 0.36966401692310236, "grad_norm": 0.8434550166130066, "learning_rate": 2.180284573359172e-05, "loss": 0.1165, "step": 16776 }, { "epoch": 0.3696860522126185, "grad_norm": 0.8484349846839905, "learning_rate": 2.1801891607590046e-05, "loss": 0.1196, "step": 16777 }, { "epoch": 0.3697080875021347, "grad_norm": 0.9086056351661682, "learning_rate": 2.1800937446942692e-05, "loss": 0.0782, "step": 16778 }, { "epoch": 0.36973012279165085, "grad_norm": 0.9446060657501221, "learning_rate": 2.1799983251654517e-05, "loss": 0.1323, "step": 16779 }, { "epoch": 0.369752158081167, "grad_norm": 0.9377152919769287, "learning_rate": 2.179902902173038e-05, "loss": 0.087, "step": 16780 }, { "epoch": 0.3697741933706831, "grad_norm": 0.7833287119865417, "learning_rate": 2.1798074757175145e-05, "loss": 0.119, "step": 16781 }, { "epoch": 0.3697962286601993, "grad_norm": 0.6642998456954956, "learning_rate": 2.1797120457993665e-05, "loss": 0.118, "step": 16782 }, { "epoch": 0.36981826394971545, "grad_norm": 0.7825532555580139, "learning_rate": 2.179616612419081e-05, "loss": 0.0729, "step": 16783 }, { "epoch": 0.3698402992392316, "grad_norm": 0.7213473320007324, "learning_rate": 2.1795211755771433e-05, "loss": 0.0971, "step": 16784 }, { "epoch": 0.3698623345287478, "grad_norm": 0.6153456568717957, "learning_rate": 2.1794257352740404e-05, "loss": 0.065, "step": 16785 }, { "epoch": 0.36988436981826395, "grad_norm": 0.6827948689460754, "learning_rate": 2.1793302915102574e-05, "loss": 0.0786, "step": 16786 }, { "epoch": 0.3699064051077801, "grad_norm": 1.0065093040466309, "learning_rate": 2.179234844286281e-05, "loss": 0.1243, "step": 16787 }, { "epoch": 0.3699284403972963, "grad_norm": 0.7221500277519226, "learning_rate": 2.1791393936025976e-05, "loss": 0.094, "step": 16788 }, { "epoch": 0.36995047568681244, "grad_norm": 1.1009093523025513, "learning_rate": 2.179043939459693e-05, "loss": 0.0786, "step": 16789 }, { "epoch": 0.3699725109763286, "grad_norm": 0.5264312624931335, "learning_rate": 2.178948481858053e-05, "loss": 0.0719, "step": 16790 }, { "epoch": 0.36999454626584477, "grad_norm": 0.5646422505378723, "learning_rate": 2.1788530207981647e-05, "loss": 0.0522, "step": 16791 }, { "epoch": 0.37001658155536093, "grad_norm": 0.9052441120147705, "learning_rate": 2.178757556280514e-05, "loss": 0.1094, "step": 16792 }, { "epoch": 0.37003861684487704, "grad_norm": 1.13783860206604, "learning_rate": 2.1786620883055866e-05, "loss": 0.1072, "step": 16793 }, { "epoch": 0.3700606521343932, "grad_norm": 0.9115992188453674, "learning_rate": 2.17856661687387e-05, "loss": 0.059, "step": 16794 }, { "epoch": 0.3700826874239094, "grad_norm": 0.6479644179344177, "learning_rate": 2.1784711419858496e-05, "loss": 0.1005, "step": 16795 }, { "epoch": 0.37010472271342554, "grad_norm": 0.7778227925300598, "learning_rate": 2.1783756636420115e-05, "loss": 0.0801, "step": 16796 }, { "epoch": 0.3701267580029417, "grad_norm": 0.9499505162239075, "learning_rate": 2.1782801818428425e-05, "loss": 0.1032, "step": 16797 }, { "epoch": 0.37014879329245787, "grad_norm": 0.8069160580635071, "learning_rate": 2.1781846965888286e-05, "loss": 0.0956, "step": 16798 }, { "epoch": 0.37017082858197403, "grad_norm": 0.6663130521774292, "learning_rate": 2.1780892078804566e-05, "loss": 0.0951, "step": 16799 }, { "epoch": 0.3701928638714902, "grad_norm": 0.4672110974788666, "learning_rate": 2.1779937157182128e-05, "loss": 0.0885, "step": 16800 }, { "epoch": 0.37021489916100636, "grad_norm": 0.8309043645858765, "learning_rate": 2.177898220102583e-05, "loss": 0.0982, "step": 16801 }, { "epoch": 0.3702369344505225, "grad_norm": 0.7723382711410522, "learning_rate": 2.177802721034055e-05, "loss": 0.1315, "step": 16802 }, { "epoch": 0.3702589697400387, "grad_norm": 0.473457932472229, "learning_rate": 2.177707218513113e-05, "loss": 0.0805, "step": 16803 }, { "epoch": 0.37028100502955485, "grad_norm": 0.8809234499931335, "learning_rate": 2.1776117125402455e-05, "loss": 0.0897, "step": 16804 }, { "epoch": 0.370303040319071, "grad_norm": 0.6895298957824707, "learning_rate": 2.1775162031159378e-05, "loss": 0.0757, "step": 16805 }, { "epoch": 0.37032507560858713, "grad_norm": 0.9327886700630188, "learning_rate": 2.1774206902406773e-05, "loss": 0.1117, "step": 16806 }, { "epoch": 0.3703471108981033, "grad_norm": 0.7971548438072205, "learning_rate": 2.1773251739149494e-05, "loss": 0.1023, "step": 16807 }, { "epoch": 0.37036914618761946, "grad_norm": 0.867908775806427, "learning_rate": 2.177229654139241e-05, "loss": 0.0996, "step": 16808 }, { "epoch": 0.3703911814771356, "grad_norm": 0.6971141695976257, "learning_rate": 2.1771341309140397e-05, "loss": 0.1007, "step": 16809 }, { "epoch": 0.3704132167666518, "grad_norm": 0.6250641942024231, "learning_rate": 2.1770386042398308e-05, "loss": 0.0723, "step": 16810 }, { "epoch": 0.37043525205616795, "grad_norm": 0.4725434184074402, "learning_rate": 2.176943074117101e-05, "loss": 0.0739, "step": 16811 }, { "epoch": 0.3704572873456841, "grad_norm": 0.5958347916603088, "learning_rate": 2.176847540546337e-05, "loss": 0.0701, "step": 16812 }, { "epoch": 0.3704793226352003, "grad_norm": 0.5812490582466125, "learning_rate": 2.176752003528026e-05, "loss": 0.0654, "step": 16813 }, { "epoch": 0.37050135792471645, "grad_norm": 1.0655392408370972, "learning_rate": 2.176656463062654e-05, "loss": 0.0979, "step": 16814 }, { "epoch": 0.3705233932142326, "grad_norm": 0.719855546951294, "learning_rate": 2.1765609191507073e-05, "loss": 0.0826, "step": 16815 }, { "epoch": 0.3705454285037488, "grad_norm": 0.8630625009536743, "learning_rate": 2.1764653717926735e-05, "loss": 0.1022, "step": 16816 }, { "epoch": 0.37056746379326494, "grad_norm": 0.6375589370727539, "learning_rate": 2.1763698209890388e-05, "loss": 0.0969, "step": 16817 }, { "epoch": 0.37058949908278105, "grad_norm": 0.8435619473457336, "learning_rate": 2.17627426674029e-05, "loss": 0.0829, "step": 16818 }, { "epoch": 0.3706115343722972, "grad_norm": 0.48330461978912354, "learning_rate": 2.1761787090469133e-05, "loss": 0.085, "step": 16819 }, { "epoch": 0.3706335696618134, "grad_norm": 0.6679237484931946, "learning_rate": 2.1760831479093963e-05, "loss": 0.1115, "step": 16820 }, { "epoch": 0.37065560495132954, "grad_norm": 0.5124390721321106, "learning_rate": 2.1759875833282248e-05, "loss": 0.0683, "step": 16821 }, { "epoch": 0.3706776402408457, "grad_norm": 0.6326079368591309, "learning_rate": 2.1758920153038864e-05, "loss": 0.0702, "step": 16822 }, { "epoch": 0.37069967553036187, "grad_norm": 0.6692277789115906, "learning_rate": 2.1757964438368673e-05, "loss": 0.0955, "step": 16823 }, { "epoch": 0.37072171081987804, "grad_norm": 1.0496586561203003, "learning_rate": 2.1757008689276548e-05, "loss": 0.0715, "step": 16824 }, { "epoch": 0.3707437461093942, "grad_norm": 0.790376603603363, "learning_rate": 2.1756052905767352e-05, "loss": 0.0604, "step": 16825 }, { "epoch": 0.37076578139891037, "grad_norm": 0.7790476083755493, "learning_rate": 2.175509708784596e-05, "loss": 0.0994, "step": 16826 }, { "epoch": 0.37078781668842653, "grad_norm": 0.6477798819541931, "learning_rate": 2.175414123551723e-05, "loss": 0.0875, "step": 16827 }, { "epoch": 0.3708098519779427, "grad_norm": 0.7741451859474182, "learning_rate": 2.175318534878604e-05, "loss": 0.0788, "step": 16828 }, { "epoch": 0.37083188726745886, "grad_norm": 0.5835859775543213, "learning_rate": 2.1752229427657256e-05, "loss": 0.1011, "step": 16829 }, { "epoch": 0.37085392255697497, "grad_norm": 0.8188279271125793, "learning_rate": 2.1751273472135744e-05, "loss": 0.0613, "step": 16830 }, { "epoch": 0.37087595784649113, "grad_norm": 0.6051695346832275, "learning_rate": 2.1750317482226378e-05, "loss": 0.0979, "step": 16831 }, { "epoch": 0.3708979931360073, "grad_norm": 0.561813473701477, "learning_rate": 2.1749361457934023e-05, "loss": 0.0794, "step": 16832 }, { "epoch": 0.37092002842552346, "grad_norm": 0.6351394653320312, "learning_rate": 2.1748405399263556e-05, "loss": 0.0944, "step": 16833 }, { "epoch": 0.3709420637150396, "grad_norm": 0.8344852924346924, "learning_rate": 2.1747449306219838e-05, "loss": 0.0842, "step": 16834 }, { "epoch": 0.3709640990045558, "grad_norm": 0.6472740173339844, "learning_rate": 2.1746493178807744e-05, "loss": 0.0887, "step": 16835 }, { "epoch": 0.37098613429407196, "grad_norm": 0.7258763313293457, "learning_rate": 2.174553701703214e-05, "loss": 0.0805, "step": 16836 }, { "epoch": 0.3710081695835881, "grad_norm": 0.8561624884605408, "learning_rate": 2.1744580820897902e-05, "loss": 0.0738, "step": 16837 }, { "epoch": 0.3710302048731043, "grad_norm": 0.7001128792762756, "learning_rate": 2.174362459040989e-05, "loss": 0.0758, "step": 16838 }, { "epoch": 0.37105224016262045, "grad_norm": 0.859544575214386, "learning_rate": 2.1742668325572987e-05, "loss": 0.1009, "step": 16839 }, { "epoch": 0.3710742754521366, "grad_norm": 0.6716193556785583, "learning_rate": 2.1741712026392058e-05, "loss": 0.0899, "step": 16840 }, { "epoch": 0.3710963107416528, "grad_norm": 1.3299294710159302, "learning_rate": 2.1740755692871976e-05, "loss": 0.126, "step": 16841 }, { "epoch": 0.37111834603116894, "grad_norm": 0.4607028067111969, "learning_rate": 2.1739799325017613e-05, "loss": 0.0875, "step": 16842 }, { "epoch": 0.37114038132068505, "grad_norm": 0.6200671195983887, "learning_rate": 2.1738842922833835e-05, "loss": 0.1064, "step": 16843 }, { "epoch": 0.3711624166102012, "grad_norm": 1.1946392059326172, "learning_rate": 2.173788648632551e-05, "loss": 0.0926, "step": 16844 }, { "epoch": 0.3711844518997174, "grad_norm": 0.6843498349189758, "learning_rate": 2.1736930015497525e-05, "loss": 0.1113, "step": 16845 }, { "epoch": 0.37120648718923355, "grad_norm": 0.8440722823143005, "learning_rate": 2.1735973510354738e-05, "loss": 0.095, "step": 16846 }, { "epoch": 0.3712285224787497, "grad_norm": 0.810357928276062, "learning_rate": 2.1735016970902024e-05, "loss": 0.0585, "step": 16847 }, { "epoch": 0.3712505577682659, "grad_norm": 0.39666998386383057, "learning_rate": 2.1734060397144265e-05, "loss": 0.0569, "step": 16848 }, { "epoch": 0.37127259305778204, "grad_norm": 0.8646584749221802, "learning_rate": 2.1733103789086317e-05, "loss": 0.0625, "step": 16849 }, { "epoch": 0.3712946283472982, "grad_norm": 0.5474182963371277, "learning_rate": 2.1732147146733066e-05, "loss": 0.0632, "step": 16850 }, { "epoch": 0.37131666363681437, "grad_norm": 0.9935309886932373, "learning_rate": 2.173119047008938e-05, "loss": 0.0961, "step": 16851 }, { "epoch": 0.37133869892633053, "grad_norm": 0.8152691721916199, "learning_rate": 2.1730233759160127e-05, "loss": 0.0709, "step": 16852 }, { "epoch": 0.3713607342158467, "grad_norm": 0.5336372256278992, "learning_rate": 2.1729277013950185e-05, "loss": 0.0803, "step": 16853 }, { "epoch": 0.37138276950536286, "grad_norm": 0.7282627820968628, "learning_rate": 2.172832023446443e-05, "loss": 0.0924, "step": 16854 }, { "epoch": 0.371404804794879, "grad_norm": 1.0100343227386475, "learning_rate": 2.172736342070773e-05, "loss": 0.0961, "step": 16855 }, { "epoch": 0.37142684008439514, "grad_norm": 0.792767345905304, "learning_rate": 2.172640657268496e-05, "loss": 0.0954, "step": 16856 }, { "epoch": 0.3714488753739113, "grad_norm": 1.0433430671691895, "learning_rate": 2.1725449690400995e-05, "loss": 0.0958, "step": 16857 }, { "epoch": 0.37147091066342747, "grad_norm": 0.8062641620635986, "learning_rate": 2.1724492773860712e-05, "loss": 0.09, "step": 16858 }, { "epoch": 0.37149294595294363, "grad_norm": 0.40345892310142517, "learning_rate": 2.1723535823068974e-05, "loss": 0.0837, "step": 16859 }, { "epoch": 0.3715149812424598, "grad_norm": 0.7948126792907715, "learning_rate": 2.172257883803067e-05, "loss": 0.125, "step": 16860 }, { "epoch": 0.37153701653197596, "grad_norm": 0.9914157390594482, "learning_rate": 2.1721621818750662e-05, "loss": 0.0941, "step": 16861 }, { "epoch": 0.3715590518214921, "grad_norm": 0.7450637221336365, "learning_rate": 2.1720664765233832e-05, "loss": 0.0816, "step": 16862 }, { "epoch": 0.3715810871110083, "grad_norm": 0.9210250973701477, "learning_rate": 2.171970767748505e-05, "loss": 0.0674, "step": 16863 }, { "epoch": 0.37160312240052445, "grad_norm": 0.6039182543754578, "learning_rate": 2.1718750555509197e-05, "loss": 0.0623, "step": 16864 }, { "epoch": 0.3716251576900406, "grad_norm": 0.907800018787384, "learning_rate": 2.1717793399311143e-05, "loss": 0.0839, "step": 16865 }, { "epoch": 0.3716471929795568, "grad_norm": 0.6656186580657959, "learning_rate": 2.1716836208895764e-05, "loss": 0.0718, "step": 16866 }, { "epoch": 0.3716692282690729, "grad_norm": 1.1498661041259766, "learning_rate": 2.1715878984267934e-05, "loss": 0.0816, "step": 16867 }, { "epoch": 0.37169126355858906, "grad_norm": 0.5675056576728821, "learning_rate": 2.1714921725432533e-05, "loss": 0.1066, "step": 16868 }, { "epoch": 0.3717132988481052, "grad_norm": 0.9264785051345825, "learning_rate": 2.1713964432394437e-05, "loss": 0.0978, "step": 16869 }, { "epoch": 0.3717353341376214, "grad_norm": 0.7687508463859558, "learning_rate": 2.1713007105158516e-05, "loss": 0.0789, "step": 16870 }, { "epoch": 0.37175736942713755, "grad_norm": 0.5595524907112122, "learning_rate": 2.1712049743729655e-05, "loss": 0.0985, "step": 16871 }, { "epoch": 0.3717794047166537, "grad_norm": 1.0236616134643555, "learning_rate": 2.1711092348112724e-05, "loss": 0.1049, "step": 16872 }, { "epoch": 0.3718014400061699, "grad_norm": 0.5219571590423584, "learning_rate": 2.17101349183126e-05, "loss": 0.0874, "step": 16873 }, { "epoch": 0.37182347529568605, "grad_norm": 0.5676682591438293, "learning_rate": 2.1709177454334157e-05, "loss": 0.0616, "step": 16874 }, { "epoch": 0.3718455105852022, "grad_norm": 0.761809766292572, "learning_rate": 2.1708219956182282e-05, "loss": 0.1037, "step": 16875 }, { "epoch": 0.3718675458747184, "grad_norm": 0.602996826171875, "learning_rate": 2.170726242386184e-05, "loss": 0.0738, "step": 16876 }, { "epoch": 0.37188958116423454, "grad_norm": 0.8602513670921326, "learning_rate": 2.1706304857377717e-05, "loss": 0.0815, "step": 16877 }, { "epoch": 0.3719116164537507, "grad_norm": 0.9487996697425842, "learning_rate": 2.1705347256734784e-05, "loss": 0.0778, "step": 16878 }, { "epoch": 0.37193365174326687, "grad_norm": 0.49932610988616943, "learning_rate": 2.1704389621937924e-05, "loss": 0.074, "step": 16879 }, { "epoch": 0.371955687032783, "grad_norm": 1.070114016532898, "learning_rate": 2.1703431952992016e-05, "loss": 0.0838, "step": 16880 }, { "epoch": 0.37197772232229914, "grad_norm": 0.6376912593841553, "learning_rate": 2.1702474249901933e-05, "loss": 0.0627, "step": 16881 }, { "epoch": 0.3719997576118153, "grad_norm": 0.6583728194236755, "learning_rate": 2.170151651267255e-05, "loss": 0.0956, "step": 16882 }, { "epoch": 0.37202179290133147, "grad_norm": 0.7756999135017395, "learning_rate": 2.1700558741308753e-05, "loss": 0.0785, "step": 16883 }, { "epoch": 0.37204382819084764, "grad_norm": 0.6454563140869141, "learning_rate": 2.1699600935815412e-05, "loss": 0.0687, "step": 16884 }, { "epoch": 0.3720658634803638, "grad_norm": 0.9112948179244995, "learning_rate": 2.1698643096197417e-05, "loss": 0.1001, "step": 16885 }, { "epoch": 0.37208789876987997, "grad_norm": 0.41552734375, "learning_rate": 2.1697685222459637e-05, "loss": 0.0703, "step": 16886 }, { "epoch": 0.37210993405939613, "grad_norm": 0.7217305898666382, "learning_rate": 2.169672731460696e-05, "loss": 0.1166, "step": 16887 }, { "epoch": 0.3721319693489123, "grad_norm": 0.6595507264137268, "learning_rate": 2.1695769372644256e-05, "loss": 0.0669, "step": 16888 }, { "epoch": 0.37215400463842846, "grad_norm": 0.5296484231948853, "learning_rate": 2.1694811396576406e-05, "loss": 0.0717, "step": 16889 }, { "epoch": 0.3721760399279446, "grad_norm": 0.5896413922309875, "learning_rate": 2.1693853386408296e-05, "loss": 0.1037, "step": 16890 }, { "epoch": 0.3721980752174608, "grad_norm": 0.43672946095466614, "learning_rate": 2.1692895342144793e-05, "loss": 0.0806, "step": 16891 }, { "epoch": 0.3722201105069769, "grad_norm": 0.5156735777854919, "learning_rate": 2.1691937263790796e-05, "loss": 0.077, "step": 16892 }, { "epoch": 0.37224214579649306, "grad_norm": 0.34171438217163086, "learning_rate": 2.1690979151351166e-05, "loss": 0.1019, "step": 16893 }, { "epoch": 0.3722641810860092, "grad_norm": 0.8691744208335876, "learning_rate": 2.1690021004830793e-05, "loss": 0.1026, "step": 16894 }, { "epoch": 0.3722862163755254, "grad_norm": 0.7069156765937805, "learning_rate": 2.1689062824234556e-05, "loss": 0.0839, "step": 16895 }, { "epoch": 0.37230825166504156, "grad_norm": 0.4312753677368164, "learning_rate": 2.1688104609567336e-05, "loss": 0.0698, "step": 16896 }, { "epoch": 0.3723302869545577, "grad_norm": 0.8909834027290344, "learning_rate": 2.1687146360834013e-05, "loss": 0.1017, "step": 16897 }, { "epoch": 0.3723523222440739, "grad_norm": 1.0634753704071045, "learning_rate": 2.168618807803947e-05, "loss": 0.0996, "step": 16898 }, { "epoch": 0.37237435753359005, "grad_norm": 0.7931820154190063, "learning_rate": 2.168522976118858e-05, "loss": 0.0935, "step": 16899 }, { "epoch": 0.3723963928231062, "grad_norm": 1.3736658096313477, "learning_rate": 2.168427141028623e-05, "loss": 0.1057, "step": 16900 }, { "epoch": 0.3724184281126224, "grad_norm": 0.7039534449577332, "learning_rate": 2.1683313025337305e-05, "loss": 0.0735, "step": 16901 }, { "epoch": 0.37244046340213854, "grad_norm": 0.7067264914512634, "learning_rate": 2.1682354606346684e-05, "loss": 0.0646, "step": 16902 }, { "epoch": 0.3724624986916547, "grad_norm": 0.5440199375152588, "learning_rate": 2.1681396153319243e-05, "loss": 0.0832, "step": 16903 }, { "epoch": 0.3724845339811708, "grad_norm": 0.5332900881767273, "learning_rate": 2.168043766625987e-05, "loss": 0.092, "step": 16904 }, { "epoch": 0.372506569270687, "grad_norm": 0.6453399062156677, "learning_rate": 2.167947914517345e-05, "loss": 0.0537, "step": 16905 }, { "epoch": 0.37252860456020315, "grad_norm": 0.7087504863739014, "learning_rate": 2.1678520590064856e-05, "loss": 0.0871, "step": 16906 }, { "epoch": 0.3725506398497193, "grad_norm": 0.7741361260414124, "learning_rate": 2.1677562000938977e-05, "loss": 0.054, "step": 16907 }, { "epoch": 0.3725726751392355, "grad_norm": 0.5056731700897217, "learning_rate": 2.1676603377800696e-05, "loss": 0.0724, "step": 16908 }, { "epoch": 0.37259471042875164, "grad_norm": 0.5839131474494934, "learning_rate": 2.167564472065489e-05, "loss": 0.1198, "step": 16909 }, { "epoch": 0.3726167457182678, "grad_norm": 0.6780502796173096, "learning_rate": 2.1674686029506446e-05, "loss": 0.0823, "step": 16910 }, { "epoch": 0.37263878100778397, "grad_norm": 0.8239796161651611, "learning_rate": 2.167372730436025e-05, "loss": 0.101, "step": 16911 }, { "epoch": 0.37266081629730013, "grad_norm": 0.5477815866470337, "learning_rate": 2.167276854522118e-05, "loss": 0.09, "step": 16912 }, { "epoch": 0.3726828515868163, "grad_norm": 1.0103429555892944, "learning_rate": 2.1671809752094123e-05, "loss": 0.0941, "step": 16913 }, { "epoch": 0.37270488687633246, "grad_norm": 0.7483251094818115, "learning_rate": 2.1670850924983956e-05, "loss": 0.0961, "step": 16914 }, { "epoch": 0.37272692216584863, "grad_norm": 0.7249030470848083, "learning_rate": 2.1669892063895573e-05, "loss": 0.0848, "step": 16915 }, { "epoch": 0.3727489574553648, "grad_norm": 0.6017749905586243, "learning_rate": 2.166893316883385e-05, "loss": 0.1043, "step": 16916 }, { "epoch": 0.3727709927448809, "grad_norm": 0.5299265384674072, "learning_rate": 2.1667974239803676e-05, "loss": 0.0788, "step": 16917 }, { "epoch": 0.37279302803439707, "grad_norm": 0.9314392805099487, "learning_rate": 2.1667015276809932e-05, "loss": 0.0688, "step": 16918 }, { "epoch": 0.37281506332391323, "grad_norm": 0.8306212425231934, "learning_rate": 2.1666056279857506e-05, "loss": 0.0612, "step": 16919 }, { "epoch": 0.3728370986134294, "grad_norm": 0.667161226272583, "learning_rate": 2.1665097248951283e-05, "loss": 0.1245, "step": 16920 }, { "epoch": 0.37285913390294556, "grad_norm": 0.6234893798828125, "learning_rate": 2.1664138184096145e-05, "loss": 0.0689, "step": 16921 }, { "epoch": 0.3728811691924617, "grad_norm": 0.8896240592002869, "learning_rate": 2.1663179085296975e-05, "loss": 0.091, "step": 16922 }, { "epoch": 0.3729032044819779, "grad_norm": 0.44456276297569275, "learning_rate": 2.1662219952558663e-05, "loss": 0.0494, "step": 16923 }, { "epoch": 0.37292523977149405, "grad_norm": 0.6956779956817627, "learning_rate": 2.166126078588609e-05, "loss": 0.0537, "step": 16924 }, { "epoch": 0.3729472750610102, "grad_norm": 0.7563011646270752, "learning_rate": 2.1660301585284147e-05, "loss": 0.0838, "step": 16925 }, { "epoch": 0.3729693103505264, "grad_norm": 0.7250006794929504, "learning_rate": 2.1659342350757708e-05, "loss": 0.1055, "step": 16926 }, { "epoch": 0.37299134564004255, "grad_norm": 0.5440091490745544, "learning_rate": 2.1658383082311676e-05, "loss": 0.0915, "step": 16927 }, { "epoch": 0.3730133809295587, "grad_norm": 0.6844791173934937, "learning_rate": 2.165742377995093e-05, "loss": 0.0955, "step": 16928 }, { "epoch": 0.3730354162190748, "grad_norm": 0.9039178490638733, "learning_rate": 2.1656464443680347e-05, "loss": 0.0775, "step": 16929 }, { "epoch": 0.373057451508591, "grad_norm": 0.7297022938728333, "learning_rate": 2.1655505073504825e-05, "loss": 0.0882, "step": 16930 }, { "epoch": 0.37307948679810715, "grad_norm": 0.7604967951774597, "learning_rate": 2.1654545669429248e-05, "loss": 0.1235, "step": 16931 }, { "epoch": 0.3731015220876233, "grad_norm": 1.1338149309158325, "learning_rate": 2.16535862314585e-05, "loss": 0.1098, "step": 16932 }, { "epoch": 0.3731235573771395, "grad_norm": 0.6710268259048462, "learning_rate": 2.1652626759597468e-05, "loss": 0.0923, "step": 16933 }, { "epoch": 0.37314559266665565, "grad_norm": 0.7092287540435791, "learning_rate": 2.165166725385104e-05, "loss": 0.0696, "step": 16934 }, { "epoch": 0.3731676279561718, "grad_norm": 0.937300980091095, "learning_rate": 2.1650707714224105e-05, "loss": 0.1277, "step": 16935 }, { "epoch": 0.373189663245688, "grad_norm": 0.8724274635314941, "learning_rate": 2.1649748140721553e-05, "loss": 0.0901, "step": 16936 }, { "epoch": 0.37321169853520414, "grad_norm": 0.4660589098930359, "learning_rate": 2.164878853334826e-05, "loss": 0.0766, "step": 16937 }, { "epoch": 0.3732337338247203, "grad_norm": 0.8290669322013855, "learning_rate": 2.164782889210913e-05, "loss": 0.0857, "step": 16938 }, { "epoch": 0.37325576911423647, "grad_norm": 0.3689265251159668, "learning_rate": 2.1646869217009038e-05, "loss": 0.078, "step": 16939 }, { "epoch": 0.37327780440375263, "grad_norm": 0.3895469307899475, "learning_rate": 2.1645909508052876e-05, "loss": 0.0722, "step": 16940 }, { "epoch": 0.37329983969326874, "grad_norm": 0.501947820186615, "learning_rate": 2.164494976524553e-05, "loss": 0.0653, "step": 16941 }, { "epoch": 0.3733218749827849, "grad_norm": 0.8125161528587341, "learning_rate": 2.16439899885919e-05, "loss": 0.1074, "step": 16942 }, { "epoch": 0.37334391027230107, "grad_norm": 0.6042426824569702, "learning_rate": 2.164303017809686e-05, "loss": 0.0611, "step": 16943 }, { "epoch": 0.37336594556181724, "grad_norm": 0.39303112030029297, "learning_rate": 2.1642070333765315e-05, "loss": 0.068, "step": 16944 }, { "epoch": 0.3733879808513334, "grad_norm": 0.7281045317649841, "learning_rate": 2.1641110455602132e-05, "loss": 0.0771, "step": 16945 }, { "epoch": 0.37341001614084957, "grad_norm": 0.7326208353042603, "learning_rate": 2.1640150543612217e-05, "loss": 0.0987, "step": 16946 }, { "epoch": 0.37343205143036573, "grad_norm": 0.8650643825531006, "learning_rate": 2.1639190597800452e-05, "loss": 0.0725, "step": 16947 }, { "epoch": 0.3734540867198819, "grad_norm": 0.7370145320892334, "learning_rate": 2.1638230618171734e-05, "loss": 0.1172, "step": 16948 }, { "epoch": 0.37347612200939806, "grad_norm": 0.58967524766922, "learning_rate": 2.163727060473094e-05, "loss": 0.0611, "step": 16949 }, { "epoch": 0.3734981572989142, "grad_norm": 0.8337624073028564, "learning_rate": 2.1636310557482976e-05, "loss": 0.0682, "step": 16950 }, { "epoch": 0.3735201925884304, "grad_norm": 0.7154979109764099, "learning_rate": 2.163535047643272e-05, "loss": 0.096, "step": 16951 }, { "epoch": 0.37354222787794655, "grad_norm": 0.9607551097869873, "learning_rate": 2.1634390361585067e-05, "loss": 0.0967, "step": 16952 }, { "epoch": 0.3735642631674627, "grad_norm": 0.9423758387565613, "learning_rate": 2.1633430212944904e-05, "loss": 0.0833, "step": 16953 }, { "epoch": 0.3735862984569788, "grad_norm": 0.6708297729492188, "learning_rate": 2.1632470030517128e-05, "loss": 0.0649, "step": 16954 }, { "epoch": 0.373608333746495, "grad_norm": 1.0943633317947388, "learning_rate": 2.163150981430662e-05, "loss": 0.1177, "step": 16955 }, { "epoch": 0.37363036903601116, "grad_norm": 0.7164129614830017, "learning_rate": 2.1630549564318282e-05, "loss": 0.055, "step": 16956 }, { "epoch": 0.3736524043255273, "grad_norm": 0.5854596495628357, "learning_rate": 2.1629589280556994e-05, "loss": 0.0573, "step": 16957 }, { "epoch": 0.3736744396150435, "grad_norm": 0.7083081603050232, "learning_rate": 2.1628628963027653e-05, "loss": 0.0886, "step": 16958 }, { "epoch": 0.37369647490455965, "grad_norm": 0.8171882033348083, "learning_rate": 2.1627668611735155e-05, "loss": 0.0813, "step": 16959 }, { "epoch": 0.3737185101940758, "grad_norm": 1.0786973237991333, "learning_rate": 2.1626708226684383e-05, "loss": 0.0595, "step": 16960 }, { "epoch": 0.373740545483592, "grad_norm": 0.48325684666633606, "learning_rate": 2.1625747807880236e-05, "loss": 0.0719, "step": 16961 }, { "epoch": 0.37376258077310814, "grad_norm": 0.6481069922447205, "learning_rate": 2.16247873553276e-05, "loss": 0.0858, "step": 16962 }, { "epoch": 0.3737846160626243, "grad_norm": 0.8071965575218201, "learning_rate": 2.1623826869031368e-05, "loss": 0.0979, "step": 16963 }, { "epoch": 0.3738066513521405, "grad_norm": 0.7563301920890808, "learning_rate": 2.1622866348996432e-05, "loss": 0.0956, "step": 16964 }, { "epoch": 0.37382868664165664, "grad_norm": 0.8397282361984253, "learning_rate": 2.1621905795227695e-05, "loss": 0.0996, "step": 16965 }, { "epoch": 0.37385072193117275, "grad_norm": 0.4728620648384094, "learning_rate": 2.162094520773003e-05, "loss": 0.0726, "step": 16966 }, { "epoch": 0.3738727572206889, "grad_norm": 0.670525312423706, "learning_rate": 2.1619984586508346e-05, "loss": 0.0756, "step": 16967 }, { "epoch": 0.3738947925102051, "grad_norm": 0.8497804999351501, "learning_rate": 2.1619023931567534e-05, "loss": 0.1014, "step": 16968 }, { "epoch": 0.37391682779972124, "grad_norm": 0.7684237360954285, "learning_rate": 2.1618063242912482e-05, "loss": 0.0859, "step": 16969 }, { "epoch": 0.3739388630892374, "grad_norm": 0.6166074872016907, "learning_rate": 2.161710252054808e-05, "loss": 0.0752, "step": 16970 }, { "epoch": 0.37396089837875357, "grad_norm": 0.8069295883178711, "learning_rate": 2.1616141764479233e-05, "loss": 0.0962, "step": 16971 }, { "epoch": 0.37398293366826973, "grad_norm": 0.7154547572135925, "learning_rate": 2.1615180974710822e-05, "loss": 0.0926, "step": 16972 }, { "epoch": 0.3740049689577859, "grad_norm": 0.7421739101409912, "learning_rate": 2.161422015124775e-05, "loss": 0.1132, "step": 16973 }, { "epoch": 0.37402700424730206, "grad_norm": 0.9504972100257874, "learning_rate": 2.1613259294094908e-05, "loss": 0.1263, "step": 16974 }, { "epoch": 0.37404903953681823, "grad_norm": 0.7021603584289551, "learning_rate": 2.1612298403257188e-05, "loss": 0.0874, "step": 16975 }, { "epoch": 0.3740710748263344, "grad_norm": 0.9818851351737976, "learning_rate": 2.1611337478739494e-05, "loss": 0.0896, "step": 16976 }, { "epoch": 0.37409311011585056, "grad_norm": 0.7070683240890503, "learning_rate": 2.1610376520546707e-05, "loss": 0.0574, "step": 16977 }, { "epoch": 0.3741151454053667, "grad_norm": 1.3322728872299194, "learning_rate": 2.1609415528683726e-05, "loss": 0.0849, "step": 16978 }, { "epoch": 0.37413718069488283, "grad_norm": 0.5846254229545593, "learning_rate": 2.160845450315545e-05, "loss": 0.1001, "step": 16979 }, { "epoch": 0.374159215984399, "grad_norm": 0.7086227536201477, "learning_rate": 2.160749344396677e-05, "loss": 0.0836, "step": 16980 }, { "epoch": 0.37418125127391516, "grad_norm": 0.5717924237251282, "learning_rate": 2.160653235112258e-05, "loss": 0.0898, "step": 16981 }, { "epoch": 0.3742032865634313, "grad_norm": 0.9056183099746704, "learning_rate": 2.1605571224627784e-05, "loss": 0.132, "step": 16982 }, { "epoch": 0.3742253218529475, "grad_norm": 0.5452899932861328, "learning_rate": 2.160461006448727e-05, "loss": 0.0918, "step": 16983 }, { "epoch": 0.37424735714246365, "grad_norm": 0.7637328505516052, "learning_rate": 2.1603648870705935e-05, "loss": 0.0646, "step": 16984 }, { "epoch": 0.3742693924319798, "grad_norm": 0.8888781666755676, "learning_rate": 2.1602687643288672e-05, "loss": 0.0903, "step": 16985 }, { "epoch": 0.374291427721496, "grad_norm": 1.074723243713379, "learning_rate": 2.1601726382240387e-05, "loss": 0.0926, "step": 16986 }, { "epoch": 0.37431346301101215, "grad_norm": 0.8005009889602661, "learning_rate": 2.160076508756596e-05, "loss": 0.104, "step": 16987 }, { "epoch": 0.3743354983005283, "grad_norm": 0.9669427871704102, "learning_rate": 2.1599803759270306e-05, "loss": 0.0497, "step": 16988 }, { "epoch": 0.3743575335900445, "grad_norm": 0.6566382646560669, "learning_rate": 2.1598842397358306e-05, "loss": 0.0848, "step": 16989 }, { "epoch": 0.37437956887956064, "grad_norm": 0.6848579049110413, "learning_rate": 2.1597881001834867e-05, "loss": 0.0905, "step": 16990 }, { "epoch": 0.37440160416907675, "grad_norm": 0.7920709848403931, "learning_rate": 2.1596919572704883e-05, "loss": 0.1186, "step": 16991 }, { "epoch": 0.3744236394585929, "grad_norm": 0.6634422540664673, "learning_rate": 2.1595958109973245e-05, "loss": 0.1149, "step": 16992 }, { "epoch": 0.3744456747481091, "grad_norm": 0.6787875294685364, "learning_rate": 2.1594996613644858e-05, "loss": 0.0875, "step": 16993 }, { "epoch": 0.37446771003762525, "grad_norm": 0.29243388772010803, "learning_rate": 2.159403508372462e-05, "loss": 0.0733, "step": 16994 }, { "epoch": 0.3744897453271414, "grad_norm": 0.6835233569145203, "learning_rate": 2.159307352021742e-05, "loss": 0.1087, "step": 16995 }, { "epoch": 0.3745117806166576, "grad_norm": 0.7451087832450867, "learning_rate": 2.159211192312816e-05, "loss": 0.1041, "step": 16996 }, { "epoch": 0.37453381590617374, "grad_norm": 0.5770570039749146, "learning_rate": 2.1591150292461744e-05, "loss": 0.1065, "step": 16997 }, { "epoch": 0.3745558511956899, "grad_norm": 0.5618364214897156, "learning_rate": 2.1590188628223062e-05, "loss": 0.12, "step": 16998 }, { "epoch": 0.37457788648520607, "grad_norm": 0.46856239438056946, "learning_rate": 2.1589226930417016e-05, "loss": 0.0413, "step": 16999 }, { "epoch": 0.37459992177472223, "grad_norm": 0.7800358533859253, "learning_rate": 2.1588265199048506e-05, "loss": 0.0876, "step": 17000 }, { "epoch": 0.3746219570642384, "grad_norm": 0.4243943989276886, "learning_rate": 2.1587303434122425e-05, "loss": 0.0735, "step": 17001 }, { "epoch": 0.37464399235375456, "grad_norm": 0.5635990500450134, "learning_rate": 2.1586341635643677e-05, "loss": 0.0654, "step": 17002 }, { "epoch": 0.37466602764327067, "grad_norm": 0.8870020508766174, "learning_rate": 2.1585379803617156e-05, "loss": 0.1015, "step": 17003 }, { "epoch": 0.37468806293278684, "grad_norm": 1.03389573097229, "learning_rate": 2.158441793804777e-05, "loss": 0.1268, "step": 17004 }, { "epoch": 0.374710098222303, "grad_norm": 0.6342195272445679, "learning_rate": 2.158345603894041e-05, "loss": 0.073, "step": 17005 }, { "epoch": 0.37473213351181917, "grad_norm": 0.6030488610267639, "learning_rate": 2.1582494106299978e-05, "loss": 0.0836, "step": 17006 }, { "epoch": 0.37475416880133533, "grad_norm": 0.6933826208114624, "learning_rate": 2.158153214013137e-05, "loss": 0.0651, "step": 17007 }, { "epoch": 0.3747762040908515, "grad_norm": 0.6570726633071899, "learning_rate": 2.1580570140439495e-05, "loss": 0.0936, "step": 17008 }, { "epoch": 0.37479823938036766, "grad_norm": 0.689467191696167, "learning_rate": 2.1579608107229246e-05, "loss": 0.1189, "step": 17009 }, { "epoch": 0.3748202746698838, "grad_norm": 0.7535380721092224, "learning_rate": 2.1578646040505525e-05, "loss": 0.1071, "step": 17010 }, { "epoch": 0.3748423099594, "grad_norm": 0.5554789304733276, "learning_rate": 2.157768394027323e-05, "loss": 0.0941, "step": 17011 }, { "epoch": 0.37486434524891615, "grad_norm": 0.7319984436035156, "learning_rate": 2.1576721806537265e-05, "loss": 0.1158, "step": 17012 }, { "epoch": 0.3748863805384323, "grad_norm": 0.3904477655887604, "learning_rate": 2.157575963930253e-05, "loss": 0.066, "step": 17013 }, { "epoch": 0.3749084158279485, "grad_norm": 1.1052483320236206, "learning_rate": 2.1574797438573925e-05, "loss": 0.1351, "step": 17014 }, { "epoch": 0.37493045111746465, "grad_norm": 0.5902799367904663, "learning_rate": 2.1573835204356347e-05, "loss": 0.074, "step": 17015 }, { "epoch": 0.37495248640698076, "grad_norm": 0.5488818883895874, "learning_rate": 2.1572872936654706e-05, "loss": 0.0704, "step": 17016 }, { "epoch": 0.3749745216964969, "grad_norm": 0.7619388103485107, "learning_rate": 2.15719106354739e-05, "loss": 0.1264, "step": 17017 }, { "epoch": 0.3749965569860131, "grad_norm": 0.7703987956047058, "learning_rate": 2.157094830081882e-05, "loss": 0.1046, "step": 17018 }, { "epoch": 0.37501859227552925, "grad_norm": 0.7876272797584534, "learning_rate": 2.1569985932694386e-05, "loss": 0.0902, "step": 17019 }, { "epoch": 0.3750406275650454, "grad_norm": 0.5435419082641602, "learning_rate": 2.1569023531105483e-05, "loss": 0.0993, "step": 17020 }, { "epoch": 0.3750626628545616, "grad_norm": 0.8470250368118286, "learning_rate": 2.1568061096057027e-05, "loss": 0.1008, "step": 17021 }, { "epoch": 0.37508469814407774, "grad_norm": 1.5200960636138916, "learning_rate": 2.1567098627553907e-05, "loss": 0.082, "step": 17022 }, { "epoch": 0.3751067334335939, "grad_norm": 1.0906710624694824, "learning_rate": 2.156613612560104e-05, "loss": 0.0925, "step": 17023 }, { "epoch": 0.3751287687231101, "grad_norm": 0.6927151083946228, "learning_rate": 2.1565173590203316e-05, "loss": 0.098, "step": 17024 }, { "epoch": 0.37515080401262624, "grad_norm": 0.5472874045372009, "learning_rate": 2.1564211021365643e-05, "loss": 0.083, "step": 17025 }, { "epoch": 0.3751728393021424, "grad_norm": 1.0424270629882812, "learning_rate": 2.1563248419092924e-05, "loss": 0.0891, "step": 17026 }, { "epoch": 0.37519487459165857, "grad_norm": 0.9464840888977051, "learning_rate": 2.156228578339006e-05, "loss": 0.087, "step": 17027 }, { "epoch": 0.3752169098811747, "grad_norm": 0.5521307587623596, "learning_rate": 2.1561323114261955e-05, "loss": 0.0618, "step": 17028 }, { "epoch": 0.37523894517069084, "grad_norm": 0.6554194688796997, "learning_rate": 2.1560360411713515e-05, "loss": 0.1315, "step": 17029 }, { "epoch": 0.375260980460207, "grad_norm": 0.6443668603897095, "learning_rate": 2.1559397675749642e-05, "loss": 0.0667, "step": 17030 }, { "epoch": 0.37528301574972317, "grad_norm": 1.7682145833969116, "learning_rate": 2.155843490637524e-05, "loss": 0.1068, "step": 17031 }, { "epoch": 0.37530505103923933, "grad_norm": 0.582603394985199, "learning_rate": 2.1557472103595207e-05, "loss": 0.1061, "step": 17032 }, { "epoch": 0.3753270863287555, "grad_norm": 0.456136018037796, "learning_rate": 2.155650926741446e-05, "loss": 0.0704, "step": 17033 }, { "epoch": 0.37534912161827166, "grad_norm": 0.42651209235191345, "learning_rate": 2.1555546397837887e-05, "loss": 0.0747, "step": 17034 }, { "epoch": 0.37537115690778783, "grad_norm": 0.7092471122741699, "learning_rate": 2.1554583494870402e-05, "loss": 0.0924, "step": 17035 }, { "epoch": 0.375393192197304, "grad_norm": 0.2621091604232788, "learning_rate": 2.155362055851691e-05, "loss": 0.069, "step": 17036 }, { "epoch": 0.37541522748682016, "grad_norm": 0.7146119475364685, "learning_rate": 2.1552657588782316e-05, "loss": 0.0737, "step": 17037 }, { "epoch": 0.3754372627763363, "grad_norm": 0.5855919718742371, "learning_rate": 2.1551694585671525e-05, "loss": 0.1116, "step": 17038 }, { "epoch": 0.3754592980658525, "grad_norm": 0.5577709078788757, "learning_rate": 2.155073154918944e-05, "loss": 0.1084, "step": 17039 }, { "epoch": 0.3754813333553686, "grad_norm": 0.5860586166381836, "learning_rate": 2.154976847934096e-05, "loss": 0.0743, "step": 17040 }, { "epoch": 0.37550336864488476, "grad_norm": 1.4053354263305664, "learning_rate": 2.1548805376131e-05, "loss": 0.1284, "step": 17041 }, { "epoch": 0.3755254039344009, "grad_norm": 0.8254926800727844, "learning_rate": 2.1547842239564464e-05, "loss": 0.1246, "step": 17042 }, { "epoch": 0.3755474392239171, "grad_norm": 1.2360776662826538, "learning_rate": 2.1546879069646255e-05, "loss": 0.1161, "step": 17043 }, { "epoch": 0.37556947451343325, "grad_norm": 0.7697697281837463, "learning_rate": 2.154591586638128e-05, "loss": 0.0884, "step": 17044 }, { "epoch": 0.3755915098029494, "grad_norm": 1.6338530778884888, "learning_rate": 2.1544952629774444e-05, "loss": 0.129, "step": 17045 }, { "epoch": 0.3756135450924656, "grad_norm": 0.9428921937942505, "learning_rate": 2.1543989359830657e-05, "loss": 0.1091, "step": 17046 }, { "epoch": 0.37563558038198175, "grad_norm": 0.9835083484649658, "learning_rate": 2.1543026056554823e-05, "loss": 0.0642, "step": 17047 }, { "epoch": 0.3756576156714979, "grad_norm": 0.5571909546852112, "learning_rate": 2.154206271995185e-05, "loss": 0.0813, "step": 17048 }, { "epoch": 0.3756796509610141, "grad_norm": 0.4399048388004303, "learning_rate": 2.1541099350026638e-05, "loss": 0.0945, "step": 17049 }, { "epoch": 0.37570168625053024, "grad_norm": 0.6149335503578186, "learning_rate": 2.1540135946784104e-05, "loss": 0.0575, "step": 17050 }, { "epoch": 0.3757237215400464, "grad_norm": 0.7495479583740234, "learning_rate": 2.153917251022915e-05, "loss": 0.0783, "step": 17051 }, { "epoch": 0.37574575682956257, "grad_norm": 0.6142324209213257, "learning_rate": 2.153820904036668e-05, "loss": 0.0687, "step": 17052 }, { "epoch": 0.3757677921190787, "grad_norm": 0.794097363948822, "learning_rate": 2.153724553720161e-05, "loss": 0.0885, "step": 17053 }, { "epoch": 0.37578982740859485, "grad_norm": 0.8026647567749023, "learning_rate": 2.153628200073884e-05, "loss": 0.0948, "step": 17054 }, { "epoch": 0.375811862698111, "grad_norm": 0.7120797634124756, "learning_rate": 2.1535318430983282e-05, "loss": 0.0633, "step": 17055 }, { "epoch": 0.3758338979876272, "grad_norm": 0.5106099247932434, "learning_rate": 2.1534354827939845e-05, "loss": 0.093, "step": 17056 }, { "epoch": 0.37585593327714334, "grad_norm": 1.0819907188415527, "learning_rate": 2.1533391191613437e-05, "loss": 0.0902, "step": 17057 }, { "epoch": 0.3758779685666595, "grad_norm": 0.9543290138244629, "learning_rate": 2.1532427522008954e-05, "loss": 0.1157, "step": 17058 }, { "epoch": 0.37590000385617567, "grad_norm": 0.8899629712104797, "learning_rate": 2.1531463819131324e-05, "loss": 0.0916, "step": 17059 }, { "epoch": 0.37592203914569183, "grad_norm": 0.3719918727874756, "learning_rate": 2.153050008298544e-05, "loss": 0.0621, "step": 17060 }, { "epoch": 0.375944074435208, "grad_norm": 0.490272581577301, "learning_rate": 2.1529536313576225e-05, "loss": 0.0661, "step": 17061 }, { "epoch": 0.37596610972472416, "grad_norm": 0.5589706897735596, "learning_rate": 2.152857251090857e-05, "loss": 0.0977, "step": 17062 }, { "epoch": 0.3759881450142403, "grad_norm": 0.8479093909263611, "learning_rate": 2.1527608674987402e-05, "loss": 0.111, "step": 17063 }, { "epoch": 0.3760101803037565, "grad_norm": 0.4247678816318512, "learning_rate": 2.152664480581762e-05, "loss": 0.1007, "step": 17064 }, { "epoch": 0.3760322155932726, "grad_norm": 0.47427892684936523, "learning_rate": 2.1525680903404137e-05, "loss": 0.0681, "step": 17065 }, { "epoch": 0.37605425088278877, "grad_norm": 0.8821510672569275, "learning_rate": 2.152471696775186e-05, "loss": 0.1027, "step": 17066 }, { "epoch": 0.37607628617230493, "grad_norm": 0.7596628665924072, "learning_rate": 2.15237529988657e-05, "loss": 0.0833, "step": 17067 }, { "epoch": 0.3760983214618211, "grad_norm": 0.4106292128562927, "learning_rate": 2.1522788996750568e-05, "loss": 0.09, "step": 17068 }, { "epoch": 0.37612035675133726, "grad_norm": 0.5648053288459778, "learning_rate": 2.1521824961411374e-05, "loss": 0.0849, "step": 17069 }, { "epoch": 0.3761423920408534, "grad_norm": 0.6305125951766968, "learning_rate": 2.1520860892853035e-05, "loss": 0.051, "step": 17070 }, { "epoch": 0.3761644273303696, "grad_norm": 0.6941003203392029, "learning_rate": 2.1519896791080447e-05, "loss": 0.1015, "step": 17071 }, { "epoch": 0.37618646261988575, "grad_norm": 1.321621060371399, "learning_rate": 2.1518932656098533e-05, "loss": 0.1237, "step": 17072 }, { "epoch": 0.3762084979094019, "grad_norm": 0.679874062538147, "learning_rate": 2.1517968487912193e-05, "loss": 0.0963, "step": 17073 }, { "epoch": 0.3762305331989181, "grad_norm": 1.1576027870178223, "learning_rate": 2.151700428652635e-05, "loss": 0.1326, "step": 17074 }, { "epoch": 0.37625256848843425, "grad_norm": 0.45510923862457275, "learning_rate": 2.1516040051945906e-05, "loss": 0.1058, "step": 17075 }, { "epoch": 0.3762746037779504, "grad_norm": 1.2857598066329956, "learning_rate": 2.1515075784175776e-05, "loss": 0.1179, "step": 17076 }, { "epoch": 0.3762966390674665, "grad_norm": 0.5696249008178711, "learning_rate": 2.1514111483220873e-05, "loss": 0.0885, "step": 17077 }, { "epoch": 0.3763186743569827, "grad_norm": 0.8207255601882935, "learning_rate": 2.1513147149086107e-05, "loss": 0.0842, "step": 17078 }, { "epoch": 0.37634070964649885, "grad_norm": 0.6687982082366943, "learning_rate": 2.1512182781776392e-05, "loss": 0.0812, "step": 17079 }, { "epoch": 0.376362744936015, "grad_norm": 0.773013710975647, "learning_rate": 2.1511218381296636e-05, "loss": 0.0972, "step": 17080 }, { "epoch": 0.3763847802255312, "grad_norm": 1.1254796981811523, "learning_rate": 2.151025394765175e-05, "loss": 0.0868, "step": 17081 }, { "epoch": 0.37640681551504734, "grad_norm": 0.8136423230171204, "learning_rate": 2.1509289480846656e-05, "loss": 0.0964, "step": 17082 }, { "epoch": 0.3764288508045635, "grad_norm": 0.6464848518371582, "learning_rate": 2.1508324980886255e-05, "loss": 0.0913, "step": 17083 }, { "epoch": 0.3764508860940797, "grad_norm": 0.6285816431045532, "learning_rate": 2.1507360447775467e-05, "loss": 0.0963, "step": 17084 }, { "epoch": 0.37647292138359584, "grad_norm": 0.7553257346153259, "learning_rate": 2.15063958815192e-05, "loss": 0.0785, "step": 17085 }, { "epoch": 0.376494956673112, "grad_norm": 0.6527366638183594, "learning_rate": 2.1505431282122372e-05, "loss": 0.0836, "step": 17086 }, { "epoch": 0.37651699196262817, "grad_norm": 0.6962355375289917, "learning_rate": 2.1504466649589897e-05, "loss": 0.0846, "step": 17087 }, { "epoch": 0.37653902725214433, "grad_norm": 0.5959759950637817, "learning_rate": 2.1503501983926683e-05, "loss": 0.1163, "step": 17088 }, { "epoch": 0.3765610625416605, "grad_norm": 0.8858786225318909, "learning_rate": 2.1502537285137644e-05, "loss": 0.0862, "step": 17089 }, { "epoch": 0.3765830978311766, "grad_norm": 0.8653730154037476, "learning_rate": 2.1501572553227695e-05, "loss": 0.0882, "step": 17090 }, { "epoch": 0.37660513312069277, "grad_norm": 0.48882097005844116, "learning_rate": 2.150060778820175e-05, "loss": 0.0836, "step": 17091 }, { "epoch": 0.37662716841020893, "grad_norm": 0.9182127714157104, "learning_rate": 2.1499642990064725e-05, "loss": 0.0602, "step": 17092 }, { "epoch": 0.3766492036997251, "grad_norm": 0.8464975357055664, "learning_rate": 2.1498678158821535e-05, "loss": 0.0846, "step": 17093 }, { "epoch": 0.37667123898924126, "grad_norm": 0.7135512828826904, "learning_rate": 2.1497713294477094e-05, "loss": 0.0713, "step": 17094 }, { "epoch": 0.37669327427875743, "grad_norm": 0.9226713180541992, "learning_rate": 2.1496748397036314e-05, "loss": 0.1021, "step": 17095 }, { "epoch": 0.3767153095682736, "grad_norm": 0.7122210264205933, "learning_rate": 2.149578346650411e-05, "loss": 0.0738, "step": 17096 }, { "epoch": 0.37673734485778976, "grad_norm": 0.8132309317588806, "learning_rate": 2.1494818502885394e-05, "loss": 0.1004, "step": 17097 }, { "epoch": 0.3767593801473059, "grad_norm": 0.6032670140266418, "learning_rate": 2.1493853506185086e-05, "loss": 0.0613, "step": 17098 }, { "epoch": 0.3767814154368221, "grad_norm": 1.2181792259216309, "learning_rate": 2.14928884764081e-05, "loss": 0.1103, "step": 17099 }, { "epoch": 0.37680345072633825, "grad_norm": 0.8371928930282593, "learning_rate": 2.149192341355935e-05, "loss": 0.0754, "step": 17100 }, { "epoch": 0.3768254860158544, "grad_norm": 0.5689893960952759, "learning_rate": 2.149095831764376e-05, "loss": 0.0632, "step": 17101 }, { "epoch": 0.3768475213053705, "grad_norm": 0.511742353439331, "learning_rate": 2.148999318866623e-05, "loss": 0.0812, "step": 17102 }, { "epoch": 0.3768695565948867, "grad_norm": 0.8472328782081604, "learning_rate": 2.1489028026631692e-05, "loss": 0.094, "step": 17103 }, { "epoch": 0.37689159188440285, "grad_norm": 0.9900967478752136, "learning_rate": 2.1488062831545046e-05, "loss": 0.0912, "step": 17104 }, { "epoch": 0.376913627173919, "grad_norm": 0.9630100131034851, "learning_rate": 2.1487097603411223e-05, "loss": 0.0918, "step": 17105 }, { "epoch": 0.3769356624634352, "grad_norm": 0.6656337380409241, "learning_rate": 2.148613234223513e-05, "loss": 0.0792, "step": 17106 }, { "epoch": 0.37695769775295135, "grad_norm": 0.5782677531242371, "learning_rate": 2.1485167048021688e-05, "loss": 0.1082, "step": 17107 }, { "epoch": 0.3769797330424675, "grad_norm": 0.7382770776748657, "learning_rate": 2.1484201720775808e-05, "loss": 0.1156, "step": 17108 }, { "epoch": 0.3770017683319837, "grad_norm": 0.9784314036369324, "learning_rate": 2.148323636050242e-05, "loss": 0.0793, "step": 17109 }, { "epoch": 0.37702380362149984, "grad_norm": 0.5811734199523926, "learning_rate": 2.1482270967206425e-05, "loss": 0.123, "step": 17110 }, { "epoch": 0.377045838911016, "grad_norm": 0.8877202272415161, "learning_rate": 2.1481305540892757e-05, "loss": 0.1323, "step": 17111 }, { "epoch": 0.37706787420053217, "grad_norm": 0.9152040481567383, "learning_rate": 2.1480340081566316e-05, "loss": 0.0987, "step": 17112 }, { "epoch": 0.37708990949004834, "grad_norm": 1.077877402305603, "learning_rate": 2.1479374589232034e-05, "loss": 0.0931, "step": 17113 }, { "epoch": 0.37711194477956445, "grad_norm": 0.7949652671813965, "learning_rate": 2.1478409063894818e-05, "loss": 0.1093, "step": 17114 }, { "epoch": 0.3771339800690806, "grad_norm": 0.29599621891975403, "learning_rate": 2.147744350555959e-05, "loss": 0.081, "step": 17115 }, { "epoch": 0.3771560153585968, "grad_norm": 0.9626181721687317, "learning_rate": 2.1476477914231273e-05, "loss": 0.0865, "step": 17116 }, { "epoch": 0.37717805064811294, "grad_norm": 0.49244531989097595, "learning_rate": 2.1475512289914777e-05, "loss": 0.0554, "step": 17117 }, { "epoch": 0.3772000859376291, "grad_norm": 0.6819325089454651, "learning_rate": 2.1474546632615025e-05, "loss": 0.0932, "step": 17118 }, { "epoch": 0.37722212122714527, "grad_norm": 0.4167216122150421, "learning_rate": 2.147358094233694e-05, "loss": 0.0579, "step": 17119 }, { "epoch": 0.37724415651666143, "grad_norm": 0.8321375250816345, "learning_rate": 2.1472615219085437e-05, "loss": 0.0966, "step": 17120 }, { "epoch": 0.3772661918061776, "grad_norm": 0.6453667879104614, "learning_rate": 2.1471649462865423e-05, "loss": 0.0396, "step": 17121 }, { "epoch": 0.37728822709569376, "grad_norm": 0.9688358306884766, "learning_rate": 2.1470683673681838e-05, "loss": 0.0736, "step": 17122 }, { "epoch": 0.3773102623852099, "grad_norm": 0.9378183484077454, "learning_rate": 2.1469717851539585e-05, "loss": 0.0973, "step": 17123 }, { "epoch": 0.3773322976747261, "grad_norm": 0.6065475344657898, "learning_rate": 2.1468751996443592e-05, "loss": 0.0882, "step": 17124 }, { "epoch": 0.37735433296424226, "grad_norm": 0.8000727891921997, "learning_rate": 2.1467786108398777e-05, "loss": 0.0956, "step": 17125 }, { "epoch": 0.3773763682537584, "grad_norm": 0.7980189323425293, "learning_rate": 2.1466820187410055e-05, "loss": 0.0806, "step": 17126 }, { "epoch": 0.37739840354327453, "grad_norm": 0.7876993417739868, "learning_rate": 2.1465854233482357e-05, "loss": 0.0953, "step": 17127 }, { "epoch": 0.3774204388327907, "grad_norm": 0.4686471223831177, "learning_rate": 2.1464888246620594e-05, "loss": 0.0944, "step": 17128 }, { "epoch": 0.37744247412230686, "grad_norm": 0.5623304843902588, "learning_rate": 2.1463922226829684e-05, "loss": 0.0898, "step": 17129 }, { "epoch": 0.377464509411823, "grad_norm": 0.44608765840530396, "learning_rate": 2.146295617411456e-05, "loss": 0.0674, "step": 17130 }, { "epoch": 0.3774865447013392, "grad_norm": 0.8031639456748962, "learning_rate": 2.1461990088480125e-05, "loss": 0.0862, "step": 17131 }, { "epoch": 0.37750857999085535, "grad_norm": 0.7706590890884399, "learning_rate": 2.1461023969931315e-05, "loss": 0.0829, "step": 17132 }, { "epoch": 0.3775306152803715, "grad_norm": 0.6984362602233887, "learning_rate": 2.146005781847304e-05, "loss": 0.0849, "step": 17133 }, { "epoch": 0.3775526505698877, "grad_norm": 0.5730251669883728, "learning_rate": 2.1459091634110235e-05, "loss": 0.0788, "step": 17134 }, { "epoch": 0.37757468585940385, "grad_norm": 0.7131014466285706, "learning_rate": 2.145812541684781e-05, "loss": 0.0853, "step": 17135 }, { "epoch": 0.37759672114892, "grad_norm": 0.6715791821479797, "learning_rate": 2.1457159166690683e-05, "loss": 0.0831, "step": 17136 }, { "epoch": 0.3776187564384362, "grad_norm": 0.5572408437728882, "learning_rate": 2.1456192883643786e-05, "loss": 0.0805, "step": 17137 }, { "epoch": 0.37764079172795234, "grad_norm": 0.5931612849235535, "learning_rate": 2.1455226567712034e-05, "loss": 0.0587, "step": 17138 }, { "epoch": 0.37766282701746845, "grad_norm": 1.2369654178619385, "learning_rate": 2.1454260218900357e-05, "loss": 0.0673, "step": 17139 }, { "epoch": 0.3776848623069846, "grad_norm": 1.1457722187042236, "learning_rate": 2.1453293837213665e-05, "loss": 0.1039, "step": 17140 }, { "epoch": 0.3777068975965008, "grad_norm": 0.5880216956138611, "learning_rate": 2.145232742265689e-05, "loss": 0.0788, "step": 17141 }, { "epoch": 0.37772893288601694, "grad_norm": 0.43236061930656433, "learning_rate": 2.1451360975234955e-05, "loss": 0.0613, "step": 17142 }, { "epoch": 0.3777509681755331, "grad_norm": 0.6923395991325378, "learning_rate": 2.1450394494952777e-05, "loss": 0.0936, "step": 17143 }, { "epoch": 0.3777730034650493, "grad_norm": 0.6978420615196228, "learning_rate": 2.1449427981815278e-05, "loss": 0.0813, "step": 17144 }, { "epoch": 0.37779503875456544, "grad_norm": 0.6470211744308472, "learning_rate": 2.144846143582739e-05, "loss": 0.0874, "step": 17145 }, { "epoch": 0.3778170740440816, "grad_norm": 0.7571940422058105, "learning_rate": 2.144749485699402e-05, "loss": 0.0775, "step": 17146 }, { "epoch": 0.37783910933359777, "grad_norm": 0.7872583270072937, "learning_rate": 2.144652824532011e-05, "loss": 0.1041, "step": 17147 }, { "epoch": 0.37786114462311393, "grad_norm": 1.0138717889785767, "learning_rate": 2.144556160081057e-05, "loss": 0.0961, "step": 17148 }, { "epoch": 0.3778831799126301, "grad_norm": 0.7918602228164673, "learning_rate": 2.144459492347033e-05, "loss": 0.0906, "step": 17149 }, { "epoch": 0.37790521520214626, "grad_norm": 0.976664125919342, "learning_rate": 2.1443628213304315e-05, "loss": 0.1164, "step": 17150 }, { "epoch": 0.37792725049166237, "grad_norm": 0.9340595602989197, "learning_rate": 2.1442661470317443e-05, "loss": 0.0582, "step": 17151 }, { "epoch": 0.37794928578117853, "grad_norm": 0.443598210811615, "learning_rate": 2.1441694694514638e-05, "loss": 0.0642, "step": 17152 }, { "epoch": 0.3779713210706947, "grad_norm": 0.7805333733558655, "learning_rate": 2.1440727885900833e-05, "loss": 0.0902, "step": 17153 }, { "epoch": 0.37799335636021086, "grad_norm": 0.7455296516418457, "learning_rate": 2.1439761044480947e-05, "loss": 0.0704, "step": 17154 }, { "epoch": 0.37801539164972703, "grad_norm": 0.5978743433952332, "learning_rate": 2.14387941702599e-05, "loss": 0.0672, "step": 17155 }, { "epoch": 0.3780374269392432, "grad_norm": 0.7373197078704834, "learning_rate": 2.1437827263242624e-05, "loss": 0.093, "step": 17156 }, { "epoch": 0.37805946222875936, "grad_norm": 0.6118744611740112, "learning_rate": 2.1436860323434044e-05, "loss": 0.0945, "step": 17157 }, { "epoch": 0.3780814975182755, "grad_norm": 0.5046663284301758, "learning_rate": 2.143589335083908e-05, "loss": 0.094, "step": 17158 }, { "epoch": 0.3781035328077917, "grad_norm": 0.8932536840438843, "learning_rate": 2.143492634546266e-05, "loss": 0.1147, "step": 17159 }, { "epoch": 0.37812556809730785, "grad_norm": 1.0666933059692383, "learning_rate": 2.143395930730971e-05, "loss": 0.1028, "step": 17160 }, { "epoch": 0.378147603386824, "grad_norm": 2.436619997024536, "learning_rate": 2.1432992236385155e-05, "loss": 0.1474, "step": 17161 }, { "epoch": 0.3781696386763402, "grad_norm": 0.7880847454071045, "learning_rate": 2.143202513269392e-05, "loss": 0.0981, "step": 17162 }, { "epoch": 0.37819167396585635, "grad_norm": 1.1262850761413574, "learning_rate": 2.1431057996240936e-05, "loss": 0.1327, "step": 17163 }, { "epoch": 0.37821370925537245, "grad_norm": 0.7539157271385193, "learning_rate": 2.143009082703112e-05, "loss": 0.0615, "step": 17164 }, { "epoch": 0.3782357445448886, "grad_norm": 0.6738946437835693, "learning_rate": 2.1429123625069404e-05, "loss": 0.0662, "step": 17165 }, { "epoch": 0.3782577798344048, "grad_norm": 0.7263525724411011, "learning_rate": 2.1428156390360716e-05, "loss": 0.0844, "step": 17166 }, { "epoch": 0.37827981512392095, "grad_norm": 0.7864629626274109, "learning_rate": 2.142718912290998e-05, "loss": 0.0804, "step": 17167 }, { "epoch": 0.3783018504134371, "grad_norm": 0.4231996536254883, "learning_rate": 2.142622182272212e-05, "loss": 0.0689, "step": 17168 }, { "epoch": 0.3783238857029533, "grad_norm": 0.5371891856193542, "learning_rate": 2.142525448980207e-05, "loss": 0.0765, "step": 17169 }, { "epoch": 0.37834592099246944, "grad_norm": 0.8072015047073364, "learning_rate": 2.1424287124154754e-05, "loss": 0.0867, "step": 17170 }, { "epoch": 0.3783679562819856, "grad_norm": 0.9018816947937012, "learning_rate": 2.1423319725785098e-05, "loss": 0.1092, "step": 17171 }, { "epoch": 0.37838999157150177, "grad_norm": 0.676726758480072, "learning_rate": 2.142235229469803e-05, "loss": 0.1048, "step": 17172 }, { "epoch": 0.37841202686101794, "grad_norm": 0.4980292320251465, "learning_rate": 2.1421384830898475e-05, "loss": 0.0682, "step": 17173 }, { "epoch": 0.3784340621505341, "grad_norm": 0.6056818962097168, "learning_rate": 2.1420417334391368e-05, "loss": 0.1086, "step": 17174 }, { "epoch": 0.37845609744005027, "grad_norm": 0.5999460816383362, "learning_rate": 2.141944980518163e-05, "loss": 0.1025, "step": 17175 }, { "epoch": 0.3784781327295664, "grad_norm": 1.0388106107711792, "learning_rate": 2.1418482243274194e-05, "loss": 0.083, "step": 17176 }, { "epoch": 0.37850016801908254, "grad_norm": 0.7348871827125549, "learning_rate": 2.1417514648673983e-05, "loss": 0.0839, "step": 17177 }, { "epoch": 0.3785222033085987, "grad_norm": 0.933952271938324, "learning_rate": 2.141654702138593e-05, "loss": 0.1147, "step": 17178 }, { "epoch": 0.37854423859811487, "grad_norm": 0.6311789751052856, "learning_rate": 2.141557936141496e-05, "loss": 0.0762, "step": 17179 }, { "epoch": 0.37856627388763103, "grad_norm": 0.5963493585586548, "learning_rate": 2.1414611668766007e-05, "loss": 0.0624, "step": 17180 }, { "epoch": 0.3785883091771472, "grad_norm": 0.8224615454673767, "learning_rate": 2.1413643943443994e-05, "loss": 0.1027, "step": 17181 }, { "epoch": 0.37861034446666336, "grad_norm": 0.9214668869972229, "learning_rate": 2.1412676185453856e-05, "loss": 0.0958, "step": 17182 }, { "epoch": 0.3786323797561795, "grad_norm": 0.49115845561027527, "learning_rate": 2.141170839480052e-05, "loss": 0.083, "step": 17183 }, { "epoch": 0.3786544150456957, "grad_norm": 0.5170478224754333, "learning_rate": 2.1410740571488912e-05, "loss": 0.0978, "step": 17184 }, { "epoch": 0.37867645033521186, "grad_norm": 0.8532876968383789, "learning_rate": 2.1409772715523967e-05, "loss": 0.0856, "step": 17185 }, { "epoch": 0.378698485624728, "grad_norm": 0.7752590179443359, "learning_rate": 2.140880482691061e-05, "loss": 0.0682, "step": 17186 }, { "epoch": 0.3787205209142442, "grad_norm": 0.5859456658363342, "learning_rate": 2.1407836905653777e-05, "loss": 0.0836, "step": 17187 }, { "epoch": 0.3787425562037603, "grad_norm": 0.4715133309364319, "learning_rate": 2.140686895175839e-05, "loss": 0.0981, "step": 17188 }, { "epoch": 0.37876459149327646, "grad_norm": 1.3965027332305908, "learning_rate": 2.1405900965229386e-05, "loss": 0.106, "step": 17189 }, { "epoch": 0.3787866267827926, "grad_norm": 0.8367109298706055, "learning_rate": 2.1404932946071695e-05, "loss": 0.0992, "step": 17190 }, { "epoch": 0.3788086620723088, "grad_norm": 1.1293905973434448, "learning_rate": 2.1403964894290245e-05, "loss": 0.0907, "step": 17191 }, { "epoch": 0.37883069736182495, "grad_norm": 0.6669299602508545, "learning_rate": 2.1402996809889967e-05, "loss": 0.0637, "step": 17192 }, { "epoch": 0.3788527326513411, "grad_norm": 0.7657347321510315, "learning_rate": 2.1402028692875793e-05, "loss": 0.1052, "step": 17193 }, { "epoch": 0.3788747679408573, "grad_norm": 0.8053663969039917, "learning_rate": 2.1401060543252653e-05, "loss": 0.121, "step": 17194 }, { "epoch": 0.37889680323037345, "grad_norm": 0.9661619663238525, "learning_rate": 2.140009236102548e-05, "loss": 0.0879, "step": 17195 }, { "epoch": 0.3789188385198896, "grad_norm": 0.7626832723617554, "learning_rate": 2.1399124146199203e-05, "loss": 0.105, "step": 17196 }, { "epoch": 0.3789408738094058, "grad_norm": 0.6473410725593567, "learning_rate": 2.1398155898778754e-05, "loss": 0.0834, "step": 17197 }, { "epoch": 0.37896290909892194, "grad_norm": 0.7294957041740417, "learning_rate": 2.1397187618769074e-05, "loss": 0.1383, "step": 17198 }, { "epoch": 0.3789849443884381, "grad_norm": 0.5916301608085632, "learning_rate": 2.139621930617508e-05, "loss": 0.0605, "step": 17199 }, { "epoch": 0.37900697967795427, "grad_norm": 1.1368159055709839, "learning_rate": 2.1395250961001713e-05, "loss": 0.0975, "step": 17200 }, { "epoch": 0.3790290149674704, "grad_norm": 0.7191513776779175, "learning_rate": 2.13942825832539e-05, "loss": 0.1092, "step": 17201 }, { "epoch": 0.37905105025698654, "grad_norm": 0.6222763657569885, "learning_rate": 2.139331417293658e-05, "loss": 0.0947, "step": 17202 }, { "epoch": 0.3790730855465027, "grad_norm": 0.5869060754776001, "learning_rate": 2.139234573005468e-05, "loss": 0.0854, "step": 17203 }, { "epoch": 0.3790951208360189, "grad_norm": 0.7530717849731445, "learning_rate": 2.139137725461314e-05, "loss": 0.0854, "step": 17204 }, { "epoch": 0.37911715612553504, "grad_norm": 0.7422190308570862, "learning_rate": 2.139040874661689e-05, "loss": 0.0595, "step": 17205 }, { "epoch": 0.3791391914150512, "grad_norm": 0.9157133102416992, "learning_rate": 2.138944020607086e-05, "loss": 0.0933, "step": 17206 }, { "epoch": 0.37916122670456737, "grad_norm": 0.5114741921424866, "learning_rate": 2.1388471632979977e-05, "loss": 0.061, "step": 17207 }, { "epoch": 0.37918326199408353, "grad_norm": 0.8482586741447449, "learning_rate": 2.1387503027349193e-05, "loss": 0.0871, "step": 17208 }, { "epoch": 0.3792052972835997, "grad_norm": 0.661442220211029, "learning_rate": 2.1386534389183422e-05, "loss": 0.0906, "step": 17209 }, { "epoch": 0.37922733257311586, "grad_norm": 0.5781726241111755, "learning_rate": 2.1385565718487608e-05, "loss": 0.103, "step": 17210 }, { "epoch": 0.379249367862632, "grad_norm": 0.7972607612609863, "learning_rate": 2.138459701526669e-05, "loss": 0.0817, "step": 17211 }, { "epoch": 0.3792714031521482, "grad_norm": 1.0993252992630005, "learning_rate": 2.138362827952559e-05, "loss": 0.1193, "step": 17212 }, { "epoch": 0.3792934384416643, "grad_norm": 0.743583619594574, "learning_rate": 2.138265951126925e-05, "loss": 0.1093, "step": 17213 }, { "epoch": 0.37931547373118046, "grad_norm": 0.6565268635749817, "learning_rate": 2.13816907105026e-05, "loss": 0.0869, "step": 17214 }, { "epoch": 0.37933750902069663, "grad_norm": 0.7126291990280151, "learning_rate": 2.1380721877230577e-05, "loss": 0.0891, "step": 17215 }, { "epoch": 0.3793595443102128, "grad_norm": 0.6244480609893799, "learning_rate": 2.137975301145812e-05, "loss": 0.0942, "step": 17216 }, { "epoch": 0.37938157959972896, "grad_norm": 0.7041870355606079, "learning_rate": 2.1378784113190153e-05, "loss": 0.091, "step": 17217 }, { "epoch": 0.3794036148892451, "grad_norm": 0.7211379408836365, "learning_rate": 2.1377815182431627e-05, "loss": 0.0713, "step": 17218 }, { "epoch": 0.3794256501787613, "grad_norm": 0.7530606389045715, "learning_rate": 2.137684621918746e-05, "loss": 0.1034, "step": 17219 }, { "epoch": 0.37944768546827745, "grad_norm": 0.6783199906349182, "learning_rate": 2.1375877223462603e-05, "loss": 0.0836, "step": 17220 }, { "epoch": 0.3794697207577936, "grad_norm": 0.4984530210494995, "learning_rate": 2.1374908195261978e-05, "loss": 0.0675, "step": 17221 }, { "epoch": 0.3794917560473098, "grad_norm": 0.8867143392562866, "learning_rate": 2.1373939134590532e-05, "loss": 0.1246, "step": 17222 }, { "epoch": 0.37951379133682595, "grad_norm": 0.7026920914649963, "learning_rate": 2.1372970041453194e-05, "loss": 0.1032, "step": 17223 }, { "epoch": 0.3795358266263421, "grad_norm": 0.6921208500862122, "learning_rate": 2.1372000915854904e-05, "loss": 0.1124, "step": 17224 }, { "epoch": 0.3795578619158583, "grad_norm": 0.5842906832695007, "learning_rate": 2.1371031757800592e-05, "loss": 0.0821, "step": 17225 }, { "epoch": 0.3795798972053744, "grad_norm": 0.7511463761329651, "learning_rate": 2.1370062567295197e-05, "loss": 0.0958, "step": 17226 }, { "epoch": 0.37960193249489055, "grad_norm": 0.6944553256034851, "learning_rate": 2.1369093344343663e-05, "loss": 0.0681, "step": 17227 }, { "epoch": 0.3796239677844067, "grad_norm": 0.547736644744873, "learning_rate": 2.136812408895092e-05, "loss": 0.1019, "step": 17228 }, { "epoch": 0.3796460030739229, "grad_norm": 1.3526605367660522, "learning_rate": 2.1367154801121907e-05, "loss": 0.0937, "step": 17229 }, { "epoch": 0.37966803836343904, "grad_norm": 0.45241737365722656, "learning_rate": 2.1366185480861557e-05, "loss": 0.0811, "step": 17230 }, { "epoch": 0.3796900736529552, "grad_norm": 0.5288161039352417, "learning_rate": 2.1365216128174815e-05, "loss": 0.0567, "step": 17231 }, { "epoch": 0.37971210894247137, "grad_norm": 0.6257076859474182, "learning_rate": 2.1364246743066613e-05, "loss": 0.0826, "step": 17232 }, { "epoch": 0.37973414423198754, "grad_norm": 0.7542537450790405, "learning_rate": 2.1363277325541887e-05, "loss": 0.0876, "step": 17233 }, { "epoch": 0.3797561795215037, "grad_norm": 0.8551121354103088, "learning_rate": 2.1362307875605577e-05, "loss": 0.0802, "step": 17234 }, { "epoch": 0.37977821481101987, "grad_norm": 0.6280112266540527, "learning_rate": 2.1361338393262624e-05, "loss": 0.0952, "step": 17235 }, { "epoch": 0.37980025010053603, "grad_norm": 0.6794717311859131, "learning_rate": 2.136036887851796e-05, "loss": 0.1197, "step": 17236 }, { "epoch": 0.3798222853900522, "grad_norm": 0.7481405138969421, "learning_rate": 2.1359399331376533e-05, "loss": 0.1468, "step": 17237 }, { "epoch": 0.3798443206795683, "grad_norm": 1.0021573305130005, "learning_rate": 2.135842975184327e-05, "loss": 0.0934, "step": 17238 }, { "epoch": 0.37986635596908447, "grad_norm": 0.7140572667121887, "learning_rate": 2.1357460139923118e-05, "loss": 0.1113, "step": 17239 }, { "epoch": 0.37988839125860063, "grad_norm": 0.6189897060394287, "learning_rate": 2.1356490495621008e-05, "loss": 0.0829, "step": 17240 }, { "epoch": 0.3799104265481168, "grad_norm": 0.8722425103187561, "learning_rate": 2.1355520818941883e-05, "loss": 0.0812, "step": 17241 }, { "epoch": 0.37993246183763296, "grad_norm": 0.5669612288475037, "learning_rate": 2.1354551109890683e-05, "loss": 0.098, "step": 17242 }, { "epoch": 0.3799544971271491, "grad_norm": 0.4900911748409271, "learning_rate": 2.135358136847235e-05, "loss": 0.0723, "step": 17243 }, { "epoch": 0.3799765324166653, "grad_norm": 0.8011273145675659, "learning_rate": 2.1352611594691817e-05, "loss": 0.0757, "step": 17244 }, { "epoch": 0.37999856770618146, "grad_norm": 1.007249116897583, "learning_rate": 2.135164178855403e-05, "loss": 0.1067, "step": 17245 }, { "epoch": 0.3800206029956976, "grad_norm": 0.7547706961631775, "learning_rate": 2.135067195006392e-05, "loss": 0.0885, "step": 17246 }, { "epoch": 0.3800426382852138, "grad_norm": 0.3358364999294281, "learning_rate": 2.134970207922644e-05, "loss": 0.0892, "step": 17247 }, { "epoch": 0.38006467357472995, "grad_norm": 0.8597936034202576, "learning_rate": 2.1348732176046513e-05, "loss": 0.1226, "step": 17248 }, { "epoch": 0.3800867088642461, "grad_norm": 0.9993793368339539, "learning_rate": 2.1347762240529092e-05, "loss": 0.112, "step": 17249 }, { "epoch": 0.3801087441537622, "grad_norm": 0.8929299116134644, "learning_rate": 2.1346792272679115e-05, "loss": 0.0831, "step": 17250 }, { "epoch": 0.3801307794432784, "grad_norm": 0.7374457120895386, "learning_rate": 2.1345822272501522e-05, "loss": 0.1143, "step": 17251 }, { "epoch": 0.38015281473279455, "grad_norm": 0.47926411032676697, "learning_rate": 2.1344852240001254e-05, "loss": 0.095, "step": 17252 }, { "epoch": 0.3801748500223107, "grad_norm": 0.8405956625938416, "learning_rate": 2.134388217518325e-05, "loss": 0.1136, "step": 17253 }, { "epoch": 0.3801968853118269, "grad_norm": 0.7135399580001831, "learning_rate": 2.1342912078052454e-05, "loss": 0.1132, "step": 17254 }, { "epoch": 0.38021892060134305, "grad_norm": 0.9123258590698242, "learning_rate": 2.1341941948613802e-05, "loss": 0.1205, "step": 17255 }, { "epoch": 0.3802409558908592, "grad_norm": 0.7760860919952393, "learning_rate": 2.134097178687224e-05, "loss": 0.0967, "step": 17256 }, { "epoch": 0.3802629911803754, "grad_norm": 0.7692577838897705, "learning_rate": 2.134000159283271e-05, "loss": 0.0772, "step": 17257 }, { "epoch": 0.38028502646989154, "grad_norm": 0.7125764489173889, "learning_rate": 2.1339031366500153e-05, "loss": 0.0711, "step": 17258 }, { "epoch": 0.3803070617594077, "grad_norm": 0.5717313885688782, "learning_rate": 2.1338061107879503e-05, "loss": 0.0562, "step": 17259 }, { "epoch": 0.38032909704892387, "grad_norm": 0.8329522013664246, "learning_rate": 2.1337090816975718e-05, "loss": 0.1127, "step": 17260 }, { "epoch": 0.38035113233844003, "grad_norm": 1.201458215713501, "learning_rate": 2.133612049379373e-05, "loss": 0.0877, "step": 17261 }, { "epoch": 0.3803731676279562, "grad_norm": 0.8328469395637512, "learning_rate": 2.133515013833848e-05, "loss": 0.0485, "step": 17262 }, { "epoch": 0.3803952029174723, "grad_norm": 0.8488159775733948, "learning_rate": 2.133417975061491e-05, "loss": 0.1059, "step": 17263 }, { "epoch": 0.3804172382069885, "grad_norm": 0.712762176990509, "learning_rate": 2.133320933062797e-05, "loss": 0.1244, "step": 17264 }, { "epoch": 0.38043927349650464, "grad_norm": 0.4435111880302429, "learning_rate": 2.1332238878382598e-05, "loss": 0.0798, "step": 17265 }, { "epoch": 0.3804613087860208, "grad_norm": 0.7487248182296753, "learning_rate": 2.1331268393883737e-05, "loss": 0.0831, "step": 17266 }, { "epoch": 0.38048334407553697, "grad_norm": 1.2102243900299072, "learning_rate": 2.133029787713633e-05, "loss": 0.067, "step": 17267 }, { "epoch": 0.38050537936505313, "grad_norm": 0.799776017665863, "learning_rate": 2.1329327328145324e-05, "loss": 0.1004, "step": 17268 }, { "epoch": 0.3805274146545693, "grad_norm": 3.0087621212005615, "learning_rate": 2.1328356746915662e-05, "loss": 0.0943, "step": 17269 }, { "epoch": 0.38054944994408546, "grad_norm": 1.4440702199935913, "learning_rate": 2.132738613345228e-05, "loss": 0.0803, "step": 17270 }, { "epoch": 0.3805714852336016, "grad_norm": 0.6202821731567383, "learning_rate": 2.132641548776013e-05, "loss": 0.1184, "step": 17271 }, { "epoch": 0.3805935205231178, "grad_norm": 0.5382614135742188, "learning_rate": 2.132544480984415e-05, "loss": 0.0885, "step": 17272 }, { "epoch": 0.38061555581263395, "grad_norm": 1.0481972694396973, "learning_rate": 2.1324474099709294e-05, "loss": 0.1133, "step": 17273 }, { "epoch": 0.3806375911021501, "grad_norm": 1.1779359579086304, "learning_rate": 2.132350335736049e-05, "loss": 0.0755, "step": 17274 }, { "epoch": 0.38065962639166623, "grad_norm": 0.7365292906761169, "learning_rate": 2.1322532582802702e-05, "loss": 0.0882, "step": 17275 }, { "epoch": 0.3806816616811824, "grad_norm": 0.7969805002212524, "learning_rate": 2.1321561776040862e-05, "loss": 0.0612, "step": 17276 }, { "epoch": 0.38070369697069856, "grad_norm": 0.9173728227615356, "learning_rate": 2.132059093707992e-05, "loss": 0.112, "step": 17277 }, { "epoch": 0.3807257322602147, "grad_norm": 0.7032520174980164, "learning_rate": 2.131962006592482e-05, "loss": 0.0837, "step": 17278 }, { "epoch": 0.3807477675497309, "grad_norm": 1.142282247543335, "learning_rate": 2.1318649162580504e-05, "loss": 0.0643, "step": 17279 }, { "epoch": 0.38076980283924705, "grad_norm": 0.7917296290397644, "learning_rate": 2.131767822705192e-05, "loss": 0.0674, "step": 17280 }, { "epoch": 0.3807918381287632, "grad_norm": 0.5774410963058472, "learning_rate": 2.1316707259344007e-05, "loss": 0.0973, "step": 17281 }, { "epoch": 0.3808138734182794, "grad_norm": 0.8998908400535583, "learning_rate": 2.131573625946172e-05, "loss": 0.0933, "step": 17282 }, { "epoch": 0.38083590870779555, "grad_norm": 0.7900334596633911, "learning_rate": 2.1314765227410005e-05, "loss": 0.1121, "step": 17283 }, { "epoch": 0.3808579439973117, "grad_norm": 0.6878535151481628, "learning_rate": 2.1313794163193802e-05, "loss": 0.0623, "step": 17284 }, { "epoch": 0.3808799792868279, "grad_norm": 0.5543951988220215, "learning_rate": 2.1312823066818062e-05, "loss": 0.096, "step": 17285 }, { "epoch": 0.38090201457634404, "grad_norm": 0.693149745464325, "learning_rate": 2.1311851938287728e-05, "loss": 0.1243, "step": 17286 }, { "epoch": 0.38092404986586015, "grad_norm": 0.5585781335830688, "learning_rate": 2.1310880777607747e-05, "loss": 0.091, "step": 17287 }, { "epoch": 0.3809460851553763, "grad_norm": 0.8429383039474487, "learning_rate": 2.1309909584783065e-05, "loss": 0.0824, "step": 17288 }, { "epoch": 0.3809681204448925, "grad_norm": 0.9773861765861511, "learning_rate": 2.130893835981863e-05, "loss": 0.1278, "step": 17289 }, { "epoch": 0.38099015573440864, "grad_norm": 0.7109647989273071, "learning_rate": 2.130796710271939e-05, "loss": 0.0967, "step": 17290 }, { "epoch": 0.3810121910239248, "grad_norm": 0.9763503074645996, "learning_rate": 2.130699581349029e-05, "loss": 0.0986, "step": 17291 }, { "epoch": 0.38103422631344097, "grad_norm": 0.6939453482627869, "learning_rate": 2.1306024492136274e-05, "loss": 0.0661, "step": 17292 }, { "epoch": 0.38105626160295714, "grad_norm": 0.6308698654174805, "learning_rate": 2.1305053138662304e-05, "loss": 0.0856, "step": 17293 }, { "epoch": 0.3810782968924733, "grad_norm": 0.5303246378898621, "learning_rate": 2.130408175307331e-05, "loss": 0.056, "step": 17294 }, { "epoch": 0.38110033218198947, "grad_norm": 0.7461785674095154, "learning_rate": 2.1303110335374246e-05, "loss": 0.0799, "step": 17295 }, { "epoch": 0.38112236747150563, "grad_norm": 0.9019518494606018, "learning_rate": 2.1302138885570064e-05, "loss": 0.0872, "step": 17296 }, { "epoch": 0.3811444027610218, "grad_norm": 0.6898455023765564, "learning_rate": 2.130116740366571e-05, "loss": 0.0803, "step": 17297 }, { "epoch": 0.38116643805053796, "grad_norm": 1.133370041847229, "learning_rate": 2.1300195889666127e-05, "loss": 0.1396, "step": 17298 }, { "epoch": 0.3811884733400541, "grad_norm": 0.5763731002807617, "learning_rate": 2.129922434357627e-05, "loss": 0.0575, "step": 17299 }, { "epoch": 0.38121050862957023, "grad_norm": 0.9716805815696716, "learning_rate": 2.129825276540109e-05, "loss": 0.0872, "step": 17300 }, { "epoch": 0.3812325439190864, "grad_norm": 0.8989391326904297, "learning_rate": 2.1297281155145528e-05, "loss": 0.0998, "step": 17301 }, { "epoch": 0.38125457920860256, "grad_norm": 0.596477746963501, "learning_rate": 2.129630951281454e-05, "loss": 0.0933, "step": 17302 }, { "epoch": 0.3812766144981187, "grad_norm": 0.4399525821208954, "learning_rate": 2.1295337838413063e-05, "loss": 0.076, "step": 17303 }, { "epoch": 0.3812986497876349, "grad_norm": 1.013915777206421, "learning_rate": 2.1294366131946057e-05, "loss": 0.0988, "step": 17304 }, { "epoch": 0.38132068507715106, "grad_norm": 0.6603627800941467, "learning_rate": 2.129339439341847e-05, "loss": 0.0713, "step": 17305 }, { "epoch": 0.3813427203666672, "grad_norm": 0.9384677410125732, "learning_rate": 2.1292422622835256e-05, "loss": 0.0782, "step": 17306 }, { "epoch": 0.3813647556561834, "grad_norm": 1.173818826675415, "learning_rate": 2.129145082020135e-05, "loss": 0.1037, "step": 17307 }, { "epoch": 0.38138679094569955, "grad_norm": 1.0319639444351196, "learning_rate": 2.1290478985521718e-05, "loss": 0.0928, "step": 17308 }, { "epoch": 0.3814088262352157, "grad_norm": 0.503481388092041, "learning_rate": 2.1289507118801303e-05, "loss": 0.0632, "step": 17309 }, { "epoch": 0.3814308615247319, "grad_norm": 0.9458233118057251, "learning_rate": 2.1288535220045055e-05, "loss": 0.0932, "step": 17310 }, { "epoch": 0.38145289681424804, "grad_norm": 0.6694425940513611, "learning_rate": 2.128756328925792e-05, "loss": 0.0812, "step": 17311 }, { "epoch": 0.38147493210376415, "grad_norm": 0.84220951795578, "learning_rate": 2.128659132644486e-05, "loss": 0.1504, "step": 17312 }, { "epoch": 0.3814969673932803, "grad_norm": 0.4904419779777527, "learning_rate": 2.1285619331610814e-05, "loss": 0.0725, "step": 17313 }, { "epoch": 0.3815190026827965, "grad_norm": 0.7385404706001282, "learning_rate": 2.128464730476074e-05, "loss": 0.0452, "step": 17314 }, { "epoch": 0.38154103797231265, "grad_norm": 0.6859337091445923, "learning_rate": 2.128367524589959e-05, "loss": 0.0694, "step": 17315 }, { "epoch": 0.3815630732618288, "grad_norm": 0.5406516194343567, "learning_rate": 2.128270315503231e-05, "loss": 0.0938, "step": 17316 }, { "epoch": 0.381585108551345, "grad_norm": 0.5276559591293335, "learning_rate": 2.1281731032163855e-05, "loss": 0.1132, "step": 17317 }, { "epoch": 0.38160714384086114, "grad_norm": 0.6421026587486267, "learning_rate": 2.1280758877299172e-05, "loss": 0.1114, "step": 17318 }, { "epoch": 0.3816291791303773, "grad_norm": 0.6258311867713928, "learning_rate": 2.127978669044322e-05, "loss": 0.0702, "step": 17319 }, { "epoch": 0.38165121441989347, "grad_norm": 0.8873285055160522, "learning_rate": 2.1278814471600944e-05, "loss": 0.1256, "step": 17320 }, { "epoch": 0.38167324970940963, "grad_norm": 0.6608837842941284, "learning_rate": 2.12778422207773e-05, "loss": 0.0612, "step": 17321 }, { "epoch": 0.3816952849989258, "grad_norm": 1.0209770202636719, "learning_rate": 2.1276869937977236e-05, "loss": 0.0853, "step": 17322 }, { "epoch": 0.38171732028844196, "grad_norm": 0.3878048360347748, "learning_rate": 2.1275897623205713e-05, "loss": 0.0543, "step": 17323 }, { "epoch": 0.3817393555779581, "grad_norm": 0.723792552947998, "learning_rate": 2.1274925276467675e-05, "loss": 0.1217, "step": 17324 }, { "epoch": 0.38176139086747424, "grad_norm": 0.6027804017066956, "learning_rate": 2.127395289776808e-05, "loss": 0.062, "step": 17325 }, { "epoch": 0.3817834261569904, "grad_norm": 0.6348294019699097, "learning_rate": 2.1272980487111876e-05, "loss": 0.107, "step": 17326 }, { "epoch": 0.38180546144650657, "grad_norm": 0.5289427638053894, "learning_rate": 2.1272008044504023e-05, "loss": 0.096, "step": 17327 }, { "epoch": 0.38182749673602273, "grad_norm": 0.5997738242149353, "learning_rate": 2.1271035569949462e-05, "loss": 0.0811, "step": 17328 }, { "epoch": 0.3818495320255389, "grad_norm": 0.9884282350540161, "learning_rate": 2.1270063063453163e-05, "loss": 0.0649, "step": 17329 }, { "epoch": 0.38187156731505506, "grad_norm": 0.8888514637947083, "learning_rate": 2.1269090525020062e-05, "loss": 0.0679, "step": 17330 }, { "epoch": 0.3818936026045712, "grad_norm": 0.820478618144989, "learning_rate": 2.1268117954655128e-05, "loss": 0.1106, "step": 17331 }, { "epoch": 0.3819156378940874, "grad_norm": 0.8674890995025635, "learning_rate": 2.1267145352363302e-05, "loss": 0.0574, "step": 17332 }, { "epoch": 0.38193767318360355, "grad_norm": 0.9753574132919312, "learning_rate": 2.126617271814955e-05, "loss": 0.0819, "step": 17333 }, { "epoch": 0.3819597084731197, "grad_norm": 0.5778481960296631, "learning_rate": 2.126520005201882e-05, "loss": 0.097, "step": 17334 }, { "epoch": 0.3819817437626359, "grad_norm": 0.6681706309318542, "learning_rate": 2.126422735397606e-05, "loss": 0.0711, "step": 17335 }, { "epoch": 0.38200377905215205, "grad_norm": 1.8580248355865479, "learning_rate": 2.1263254624026233e-05, "loss": 0.0926, "step": 17336 }, { "epoch": 0.38202581434166816, "grad_norm": 0.49327489733695984, "learning_rate": 2.126228186217429e-05, "loss": 0.0713, "step": 17337 }, { "epoch": 0.3820478496311843, "grad_norm": 0.8587260246276855, "learning_rate": 2.1261309068425192e-05, "loss": 0.0696, "step": 17338 }, { "epoch": 0.3820698849207005, "grad_norm": 0.7478148937225342, "learning_rate": 2.1260336242783887e-05, "loss": 0.0733, "step": 17339 }, { "epoch": 0.38209192021021665, "grad_norm": 0.6837975978851318, "learning_rate": 2.125936338525534e-05, "loss": 0.0708, "step": 17340 }, { "epoch": 0.3821139554997328, "grad_norm": 0.5158625245094299, "learning_rate": 2.125839049584449e-05, "loss": 0.0756, "step": 17341 }, { "epoch": 0.382135990789249, "grad_norm": 0.4676533639431, "learning_rate": 2.1257417574556303e-05, "loss": 0.0838, "step": 17342 }, { "epoch": 0.38215802607876515, "grad_norm": 0.7492944002151489, "learning_rate": 2.1256444621395732e-05, "loss": 0.0758, "step": 17343 }, { "epoch": 0.3821800613682813, "grad_norm": 0.70186448097229, "learning_rate": 2.1255471636367736e-05, "loss": 0.0845, "step": 17344 }, { "epoch": 0.3822020966577975, "grad_norm": 0.6894422173500061, "learning_rate": 2.1254498619477264e-05, "loss": 0.0789, "step": 17345 }, { "epoch": 0.38222413194731364, "grad_norm": 0.7396506667137146, "learning_rate": 2.1253525570729283e-05, "loss": 0.0648, "step": 17346 }, { "epoch": 0.3822461672368298, "grad_norm": 0.4636339247226715, "learning_rate": 2.125255249012874e-05, "loss": 0.064, "step": 17347 }, { "epoch": 0.38226820252634597, "grad_norm": 0.6122584939002991, "learning_rate": 2.1251579377680594e-05, "loss": 0.0579, "step": 17348 }, { "epoch": 0.3822902378158621, "grad_norm": 0.9575889110565186, "learning_rate": 2.12506062333898e-05, "loss": 0.0896, "step": 17349 }, { "epoch": 0.38231227310537824, "grad_norm": 0.6812251210212708, "learning_rate": 2.1249633057261322e-05, "loss": 0.0766, "step": 17350 }, { "epoch": 0.3823343083948944, "grad_norm": 0.6282948851585388, "learning_rate": 2.1248659849300105e-05, "loss": 0.0707, "step": 17351 }, { "epoch": 0.38235634368441057, "grad_norm": 0.5122347474098206, "learning_rate": 2.1247686609511117e-05, "loss": 0.0811, "step": 17352 }, { "epoch": 0.38237837897392674, "grad_norm": 0.9029929637908936, "learning_rate": 2.1246713337899308e-05, "loss": 0.0823, "step": 17353 }, { "epoch": 0.3824004142634429, "grad_norm": 0.5957341194152832, "learning_rate": 2.1245740034469638e-05, "loss": 0.0909, "step": 17354 }, { "epoch": 0.38242244955295907, "grad_norm": 0.7634414434432983, "learning_rate": 2.1244766699227064e-05, "loss": 0.089, "step": 17355 }, { "epoch": 0.38244448484247523, "grad_norm": 0.5602501034736633, "learning_rate": 2.124379333217655e-05, "loss": 0.0641, "step": 17356 }, { "epoch": 0.3824665201319914, "grad_norm": 0.532379686832428, "learning_rate": 2.1242819933323048e-05, "loss": 0.0866, "step": 17357 }, { "epoch": 0.38248855542150756, "grad_norm": 1.1027326583862305, "learning_rate": 2.1241846502671516e-05, "loss": 0.0983, "step": 17358 }, { "epoch": 0.3825105907110237, "grad_norm": 0.8005605340003967, "learning_rate": 2.1240873040226904e-05, "loss": 0.0692, "step": 17359 }, { "epoch": 0.3825326260005399, "grad_norm": 0.7839850187301636, "learning_rate": 2.1239899545994186e-05, "loss": 0.085, "step": 17360 }, { "epoch": 0.382554661290056, "grad_norm": 0.566889762878418, "learning_rate": 2.1238926019978314e-05, "loss": 0.1047, "step": 17361 }, { "epoch": 0.38257669657957216, "grad_norm": 0.46736395359039307, "learning_rate": 2.1237952462184244e-05, "loss": 0.0637, "step": 17362 }, { "epoch": 0.3825987318690883, "grad_norm": 0.7706587314605713, "learning_rate": 2.1236978872616937e-05, "loss": 0.102, "step": 17363 }, { "epoch": 0.3826207671586045, "grad_norm": 0.5536518096923828, "learning_rate": 2.1236005251281355e-05, "loss": 0.0629, "step": 17364 }, { "epoch": 0.38264280244812066, "grad_norm": 1.091005802154541, "learning_rate": 2.1235031598182454e-05, "loss": 0.1387, "step": 17365 }, { "epoch": 0.3826648377376368, "grad_norm": 0.7247713208198547, "learning_rate": 2.1234057913325188e-05, "loss": 0.0564, "step": 17366 }, { "epoch": 0.382686873027153, "grad_norm": 0.5020015835762024, "learning_rate": 2.1233084196714526e-05, "loss": 0.0696, "step": 17367 }, { "epoch": 0.38270890831666915, "grad_norm": 0.5685167908668518, "learning_rate": 2.1232110448355424e-05, "loss": 0.0869, "step": 17368 }, { "epoch": 0.3827309436061853, "grad_norm": 0.5078503489494324, "learning_rate": 2.1231136668252843e-05, "loss": 0.0712, "step": 17369 }, { "epoch": 0.3827529788957015, "grad_norm": 0.9827606678009033, "learning_rate": 2.1230162856411738e-05, "loss": 0.0763, "step": 17370 }, { "epoch": 0.38277501418521764, "grad_norm": 0.7494532465934753, "learning_rate": 2.1229189012837076e-05, "loss": 0.0721, "step": 17371 }, { "epoch": 0.3827970494747338, "grad_norm": 1.180198311805725, "learning_rate": 2.1228215137533816e-05, "loss": 0.1028, "step": 17372 }, { "epoch": 0.38281908476425, "grad_norm": 1.0441398620605469, "learning_rate": 2.1227241230506912e-05, "loss": 0.112, "step": 17373 }, { "epoch": 0.3828411200537661, "grad_norm": 0.5101728439331055, "learning_rate": 2.122626729176133e-05, "loss": 0.0617, "step": 17374 }, { "epoch": 0.38286315534328225, "grad_norm": 0.8162574172019958, "learning_rate": 2.122529332130203e-05, "loss": 0.0847, "step": 17375 }, { "epoch": 0.3828851906327984, "grad_norm": 0.44896960258483887, "learning_rate": 2.122431931913397e-05, "loss": 0.0498, "step": 17376 }, { "epoch": 0.3829072259223146, "grad_norm": 0.5292906761169434, "learning_rate": 2.1223345285262116e-05, "loss": 0.0665, "step": 17377 }, { "epoch": 0.38292926121183074, "grad_norm": 0.9515035152435303, "learning_rate": 2.1222371219691428e-05, "loss": 0.0917, "step": 17378 }, { "epoch": 0.3829512965013469, "grad_norm": 0.6205436587333679, "learning_rate": 2.1221397122426867e-05, "loss": 0.0785, "step": 17379 }, { "epoch": 0.38297333179086307, "grad_norm": 0.7281634211540222, "learning_rate": 2.122042299347339e-05, "loss": 0.0873, "step": 17380 }, { "epoch": 0.38299536708037923, "grad_norm": 0.6112515926361084, "learning_rate": 2.1219448832835974e-05, "loss": 0.0756, "step": 17381 }, { "epoch": 0.3830174023698954, "grad_norm": 0.6706395745277405, "learning_rate": 2.121847464051956e-05, "loss": 0.0816, "step": 17382 }, { "epoch": 0.38303943765941156, "grad_norm": 1.1623259782791138, "learning_rate": 2.1217500416529124e-05, "loss": 0.0795, "step": 17383 }, { "epoch": 0.38306147294892773, "grad_norm": 0.7956778407096863, "learning_rate": 2.1216526160869623e-05, "loss": 0.1172, "step": 17384 }, { "epoch": 0.3830835082384439, "grad_norm": 0.7117040157318115, "learning_rate": 2.121555187354602e-05, "loss": 0.0621, "step": 17385 }, { "epoch": 0.38310554352796, "grad_norm": 0.861177384853363, "learning_rate": 2.121457755456328e-05, "loss": 0.108, "step": 17386 }, { "epoch": 0.38312757881747617, "grad_norm": 0.8844444751739502, "learning_rate": 2.121360320392636e-05, "loss": 0.0823, "step": 17387 }, { "epoch": 0.38314961410699233, "grad_norm": 0.7816156148910522, "learning_rate": 2.1212628821640235e-05, "loss": 0.1091, "step": 17388 }, { "epoch": 0.3831716493965085, "grad_norm": 0.749965250492096, "learning_rate": 2.1211654407709853e-05, "loss": 0.0818, "step": 17389 }, { "epoch": 0.38319368468602466, "grad_norm": 0.7002550959587097, "learning_rate": 2.121067996214019e-05, "loss": 0.0893, "step": 17390 }, { "epoch": 0.3832157199755408, "grad_norm": 0.6751217842102051, "learning_rate": 2.1209705484936196e-05, "loss": 0.1092, "step": 17391 }, { "epoch": 0.383237755265057, "grad_norm": 0.7128788828849792, "learning_rate": 2.1208730976102846e-05, "loss": 0.0905, "step": 17392 }, { "epoch": 0.38325979055457315, "grad_norm": 0.6144430041313171, "learning_rate": 2.1207756435645098e-05, "loss": 0.0798, "step": 17393 }, { "epoch": 0.3832818258440893, "grad_norm": 0.5047809481620789, "learning_rate": 2.120678186356792e-05, "loss": 0.054, "step": 17394 }, { "epoch": 0.3833038611336055, "grad_norm": 0.8243920803070068, "learning_rate": 2.1205807259876273e-05, "loss": 0.0889, "step": 17395 }, { "epoch": 0.38332589642312165, "grad_norm": 1.215415596961975, "learning_rate": 2.1204832624575124e-05, "loss": 0.1173, "step": 17396 }, { "epoch": 0.3833479317126378, "grad_norm": 0.8441333174705505, "learning_rate": 2.1203857957669434e-05, "loss": 0.0603, "step": 17397 }, { "epoch": 0.3833699670021539, "grad_norm": 0.5398091673851013, "learning_rate": 2.1202883259164164e-05, "loss": 0.0616, "step": 17398 }, { "epoch": 0.3833920022916701, "grad_norm": 0.44462981820106506, "learning_rate": 2.1201908529064287e-05, "loss": 0.0766, "step": 17399 }, { "epoch": 0.38341403758118625, "grad_norm": 1.2197967767715454, "learning_rate": 2.1200933767374766e-05, "loss": 0.0971, "step": 17400 }, { "epoch": 0.3834360728707024, "grad_norm": 0.8495993614196777, "learning_rate": 2.119995897410056e-05, "loss": 0.079, "step": 17401 }, { "epoch": 0.3834581081602186, "grad_norm": 0.5091913938522339, "learning_rate": 2.119898414924664e-05, "loss": 0.1064, "step": 17402 }, { "epoch": 0.38348014344973474, "grad_norm": 0.738770604133606, "learning_rate": 2.119800929281797e-05, "loss": 0.105, "step": 17403 }, { "epoch": 0.3835021787392509, "grad_norm": 0.618211567401886, "learning_rate": 2.119703440481952e-05, "loss": 0.0887, "step": 17404 }, { "epoch": 0.3835242140287671, "grad_norm": 0.5934737920761108, "learning_rate": 2.119605948525624e-05, "loss": 0.0871, "step": 17405 }, { "epoch": 0.38354624931828324, "grad_norm": 0.9052784442901611, "learning_rate": 2.119508453413311e-05, "loss": 0.0966, "step": 17406 }, { "epoch": 0.3835682846077994, "grad_norm": 0.6416744589805603, "learning_rate": 2.11941095514551e-05, "loss": 0.0676, "step": 17407 }, { "epoch": 0.38359031989731557, "grad_norm": 0.7090700268745422, "learning_rate": 2.119313453722716e-05, "loss": 0.1264, "step": 17408 }, { "epoch": 0.38361235518683173, "grad_norm": 1.0139905214309692, "learning_rate": 2.1192159491454267e-05, "loss": 0.0898, "step": 17409 }, { "epoch": 0.3836343904763479, "grad_norm": 0.8716208934783936, "learning_rate": 2.1191184414141385e-05, "loss": 0.0846, "step": 17410 }, { "epoch": 0.383656425765864, "grad_norm": 0.559178352355957, "learning_rate": 2.1190209305293484e-05, "loss": 0.0684, "step": 17411 }, { "epoch": 0.38367846105538017, "grad_norm": 0.9289243221282959, "learning_rate": 2.1189234164915526e-05, "loss": 0.105, "step": 17412 }, { "epoch": 0.38370049634489634, "grad_norm": 0.5761090517044067, "learning_rate": 2.118825899301248e-05, "loss": 0.0802, "step": 17413 }, { "epoch": 0.3837225316344125, "grad_norm": 0.7980669736862183, "learning_rate": 2.1187283789589307e-05, "loss": 0.1034, "step": 17414 }, { "epoch": 0.38374456692392866, "grad_norm": 0.479749470949173, "learning_rate": 2.1186308554650987e-05, "loss": 0.1308, "step": 17415 }, { "epoch": 0.38376660221344483, "grad_norm": 0.5632535815238953, "learning_rate": 2.1185333288202475e-05, "loss": 0.0603, "step": 17416 }, { "epoch": 0.383788637502961, "grad_norm": 0.6499543190002441, "learning_rate": 2.118435799024875e-05, "loss": 0.0877, "step": 17417 }, { "epoch": 0.38381067279247716, "grad_norm": 0.770301103591919, "learning_rate": 2.1183382660794765e-05, "loss": 0.0724, "step": 17418 }, { "epoch": 0.3838327080819933, "grad_norm": 0.6120905876159668, "learning_rate": 2.1182407299845504e-05, "loss": 0.0742, "step": 17419 }, { "epoch": 0.3838547433715095, "grad_norm": 0.623030424118042, "learning_rate": 2.1181431907405924e-05, "loss": 0.113, "step": 17420 }, { "epoch": 0.38387677866102565, "grad_norm": 0.6130151152610779, "learning_rate": 2.1180456483480997e-05, "loss": 0.0807, "step": 17421 }, { "epoch": 0.3838988139505418, "grad_norm": 0.8161023259162903, "learning_rate": 2.1179481028075687e-05, "loss": 0.0668, "step": 17422 }, { "epoch": 0.3839208492400579, "grad_norm": 0.5529637336730957, "learning_rate": 2.1178505541194972e-05, "loss": 0.0737, "step": 17423 }, { "epoch": 0.3839428845295741, "grad_norm": 1.0752230882644653, "learning_rate": 2.117753002284381e-05, "loss": 0.0546, "step": 17424 }, { "epoch": 0.38396491981909026, "grad_norm": 0.6822798848152161, "learning_rate": 2.117655447302718e-05, "loss": 0.0764, "step": 17425 }, { "epoch": 0.3839869551086064, "grad_norm": 0.5710639357566833, "learning_rate": 2.1175578891750043e-05, "loss": 0.0777, "step": 17426 }, { "epoch": 0.3840089903981226, "grad_norm": 0.5652956366539001, "learning_rate": 2.1174603279017373e-05, "loss": 0.0752, "step": 17427 }, { "epoch": 0.38403102568763875, "grad_norm": 0.5224572420120239, "learning_rate": 2.117362763483414e-05, "loss": 0.0682, "step": 17428 }, { "epoch": 0.3840530609771549, "grad_norm": 0.8030896782875061, "learning_rate": 2.11726519592053e-05, "loss": 0.0935, "step": 17429 }, { "epoch": 0.3840750962666711, "grad_norm": 0.5196676850318909, "learning_rate": 2.1171676252135844e-05, "loss": 0.0636, "step": 17430 }, { "epoch": 0.38409713155618724, "grad_norm": 0.8451235294342041, "learning_rate": 2.1170700513630727e-05, "loss": 0.1081, "step": 17431 }, { "epoch": 0.3841191668457034, "grad_norm": 1.116021752357483, "learning_rate": 2.1169724743694925e-05, "loss": 0.0884, "step": 17432 }, { "epoch": 0.3841412021352196, "grad_norm": 0.638745903968811, "learning_rate": 2.1168748942333404e-05, "loss": 0.0788, "step": 17433 }, { "epoch": 0.38416323742473574, "grad_norm": 0.6006558537483215, "learning_rate": 2.116777310955114e-05, "loss": 0.0644, "step": 17434 }, { "epoch": 0.3841852727142519, "grad_norm": 0.8340122699737549, "learning_rate": 2.1166797245353095e-05, "loss": 0.0862, "step": 17435 }, { "epoch": 0.384207308003768, "grad_norm": 0.8153696656227112, "learning_rate": 2.116582134974425e-05, "loss": 0.0928, "step": 17436 }, { "epoch": 0.3842293432932842, "grad_norm": 0.7492247819900513, "learning_rate": 2.116484542272957e-05, "loss": 0.0684, "step": 17437 }, { "epoch": 0.38425137858280034, "grad_norm": 1.0413727760314941, "learning_rate": 2.1163869464314027e-05, "loss": 0.1228, "step": 17438 }, { "epoch": 0.3842734138723165, "grad_norm": 0.3636031746864319, "learning_rate": 2.116289347450259e-05, "loss": 0.0718, "step": 17439 }, { "epoch": 0.38429544916183267, "grad_norm": 0.4879469573497772, "learning_rate": 2.1161917453300234e-05, "loss": 0.0559, "step": 17440 }, { "epoch": 0.38431748445134883, "grad_norm": 0.7444091439247131, "learning_rate": 2.1160941400711922e-05, "loss": 0.1029, "step": 17441 }, { "epoch": 0.384339519740865, "grad_norm": 0.9064431190490723, "learning_rate": 2.1159965316742636e-05, "loss": 0.1013, "step": 17442 }, { "epoch": 0.38436155503038116, "grad_norm": 0.507900059223175, "learning_rate": 2.1158989201397342e-05, "loss": 0.0911, "step": 17443 }, { "epoch": 0.38438359031989733, "grad_norm": 0.6747925877571106, "learning_rate": 2.1158013054681013e-05, "loss": 0.0871, "step": 17444 }, { "epoch": 0.3844056256094135, "grad_norm": 0.701800525188446, "learning_rate": 2.1157036876598625e-05, "loss": 0.067, "step": 17445 }, { "epoch": 0.38442766089892966, "grad_norm": 1.010688304901123, "learning_rate": 2.1156060667155146e-05, "loss": 0.0652, "step": 17446 }, { "epoch": 0.3844496961884458, "grad_norm": 0.41092318296432495, "learning_rate": 2.115508442635554e-05, "loss": 0.0553, "step": 17447 }, { "epoch": 0.38447173147796193, "grad_norm": 0.9067981839179993, "learning_rate": 2.11541081542048e-05, "loss": 0.0917, "step": 17448 }, { "epoch": 0.3844937667674781, "grad_norm": 0.9682359099388123, "learning_rate": 2.115313185070788e-05, "loss": 0.1004, "step": 17449 }, { "epoch": 0.38451580205699426, "grad_norm": 0.8119693994522095, "learning_rate": 2.115215551586976e-05, "loss": 0.0905, "step": 17450 }, { "epoch": 0.3845378373465104, "grad_norm": 0.6391499638557434, "learning_rate": 2.115117914969542e-05, "loss": 0.0898, "step": 17451 }, { "epoch": 0.3845598726360266, "grad_norm": 0.6480546593666077, "learning_rate": 2.115020275218982e-05, "loss": 0.0727, "step": 17452 }, { "epoch": 0.38458190792554275, "grad_norm": 0.8627767562866211, "learning_rate": 2.114922632335794e-05, "loss": 0.0932, "step": 17453 }, { "epoch": 0.3846039432150589, "grad_norm": 0.8840965628623962, "learning_rate": 2.1148249863204754e-05, "loss": 0.0693, "step": 17454 }, { "epoch": 0.3846259785045751, "grad_norm": 1.0482795238494873, "learning_rate": 2.114727337173523e-05, "loss": 0.0985, "step": 17455 }, { "epoch": 0.38464801379409125, "grad_norm": 0.7883305549621582, "learning_rate": 2.1146296848954354e-05, "loss": 0.1291, "step": 17456 }, { "epoch": 0.3846700490836074, "grad_norm": 0.6619472503662109, "learning_rate": 2.1145320294867087e-05, "loss": 0.0481, "step": 17457 }, { "epoch": 0.3846920843731236, "grad_norm": 0.8319331407546997, "learning_rate": 2.114434370947841e-05, "loss": 0.1052, "step": 17458 }, { "epoch": 0.38471411966263974, "grad_norm": 0.9985330104827881, "learning_rate": 2.1143367092793294e-05, "loss": 0.1206, "step": 17459 }, { "epoch": 0.38473615495215585, "grad_norm": 0.8097489476203918, "learning_rate": 2.114239044481672e-05, "loss": 0.0986, "step": 17460 }, { "epoch": 0.384758190241672, "grad_norm": 0.6671568155288696, "learning_rate": 2.1141413765553652e-05, "loss": 0.1074, "step": 17461 }, { "epoch": 0.3847802255311882, "grad_norm": 0.6093481183052063, "learning_rate": 2.1140437055009074e-05, "loss": 0.1346, "step": 17462 }, { "epoch": 0.38480226082070434, "grad_norm": 0.9042543768882751, "learning_rate": 2.1139460313187956e-05, "loss": 0.0938, "step": 17463 }, { "epoch": 0.3848242961102205, "grad_norm": 0.8532770872116089, "learning_rate": 2.1138483540095273e-05, "loss": 0.1113, "step": 17464 }, { "epoch": 0.3848463313997367, "grad_norm": 1.053584337234497, "learning_rate": 2.1137506735736e-05, "loss": 0.0962, "step": 17465 }, { "epoch": 0.38486836668925284, "grad_norm": 0.6217700242996216, "learning_rate": 2.113652990011512e-05, "loss": 0.0605, "step": 17466 }, { "epoch": 0.384890401978769, "grad_norm": 0.7311344146728516, "learning_rate": 2.11355530332376e-05, "loss": 0.0729, "step": 17467 }, { "epoch": 0.38491243726828517, "grad_norm": 0.6335144639015198, "learning_rate": 2.113457613510842e-05, "loss": 0.09, "step": 17468 }, { "epoch": 0.38493447255780133, "grad_norm": 1.2405344247817993, "learning_rate": 2.113359920573255e-05, "loss": 0.0874, "step": 17469 }, { "epoch": 0.3849565078473175, "grad_norm": 0.7940754890441895, "learning_rate": 2.1132622245114972e-05, "loss": 0.1065, "step": 17470 }, { "epoch": 0.38497854313683366, "grad_norm": 0.5594837665557861, "learning_rate": 2.1131645253260658e-05, "loss": 0.0596, "step": 17471 }, { "epoch": 0.3850005784263498, "grad_norm": 0.5735739469528198, "learning_rate": 2.113066823017459e-05, "loss": 0.0886, "step": 17472 }, { "epoch": 0.38502261371586594, "grad_norm": 0.5680385828018188, "learning_rate": 2.1129691175861737e-05, "loss": 0.0763, "step": 17473 }, { "epoch": 0.3850446490053821, "grad_norm": 0.7334325909614563, "learning_rate": 2.1128714090327083e-05, "loss": 0.0833, "step": 17474 }, { "epoch": 0.38506668429489826, "grad_norm": 1.0238672494888306, "learning_rate": 2.1127736973575603e-05, "loss": 0.0832, "step": 17475 }, { "epoch": 0.38508871958441443, "grad_norm": 0.622223436832428, "learning_rate": 2.1126759825612274e-05, "loss": 0.0852, "step": 17476 }, { "epoch": 0.3851107548739306, "grad_norm": 0.722518801689148, "learning_rate": 2.112578264644207e-05, "loss": 0.0667, "step": 17477 }, { "epoch": 0.38513279016344676, "grad_norm": 0.7735962867736816, "learning_rate": 2.1124805436069964e-05, "loss": 0.0772, "step": 17478 }, { "epoch": 0.3851548254529629, "grad_norm": 0.8702720403671265, "learning_rate": 2.1123828194500946e-05, "loss": 0.1349, "step": 17479 }, { "epoch": 0.3851768607424791, "grad_norm": 0.984138011932373, "learning_rate": 2.1122850921739987e-05, "loss": 0.1031, "step": 17480 }, { "epoch": 0.38519889603199525, "grad_norm": 1.1155107021331787, "learning_rate": 2.112187361779206e-05, "loss": 0.1259, "step": 17481 }, { "epoch": 0.3852209313215114, "grad_norm": 0.6663952469825745, "learning_rate": 2.1120896282662154e-05, "loss": 0.1099, "step": 17482 }, { "epoch": 0.3852429666110276, "grad_norm": 0.7428945302963257, "learning_rate": 2.1119918916355242e-05, "loss": 0.0977, "step": 17483 }, { "epoch": 0.38526500190054375, "grad_norm": 0.9973055124282837, "learning_rate": 2.1118941518876294e-05, "loss": 0.1038, "step": 17484 }, { "epoch": 0.38528703719005986, "grad_norm": 0.47712206840515137, "learning_rate": 2.11179640902303e-05, "loss": 0.0734, "step": 17485 }, { "epoch": 0.385309072479576, "grad_norm": 0.7991025447845459, "learning_rate": 2.1116986630422234e-05, "loss": 0.0859, "step": 17486 }, { "epoch": 0.3853311077690922, "grad_norm": 0.7988693714141846, "learning_rate": 2.111600913945707e-05, "loss": 0.1024, "step": 17487 }, { "epoch": 0.38535314305860835, "grad_norm": 0.6211591958999634, "learning_rate": 2.11150316173398e-05, "loss": 0.0977, "step": 17488 }, { "epoch": 0.3853751783481245, "grad_norm": 0.6422823071479797, "learning_rate": 2.111405406407539e-05, "loss": 0.0976, "step": 17489 }, { "epoch": 0.3853972136376407, "grad_norm": 0.6942975521087646, "learning_rate": 2.1113076479668826e-05, "loss": 0.0816, "step": 17490 }, { "epoch": 0.38541924892715684, "grad_norm": 0.8058503866195679, "learning_rate": 2.1112098864125085e-05, "loss": 0.0869, "step": 17491 }, { "epoch": 0.385441284216673, "grad_norm": 0.905388593673706, "learning_rate": 2.1111121217449146e-05, "loss": 0.0855, "step": 17492 }, { "epoch": 0.3854633195061892, "grad_norm": 0.6658359169960022, "learning_rate": 2.111014353964599e-05, "loss": 0.0849, "step": 17493 }, { "epoch": 0.38548535479570534, "grad_norm": 0.5845731496810913, "learning_rate": 2.1109165830720596e-05, "loss": 0.074, "step": 17494 }, { "epoch": 0.3855073900852215, "grad_norm": 0.9323520660400391, "learning_rate": 2.1108188090677943e-05, "loss": 0.0897, "step": 17495 }, { "epoch": 0.38552942537473767, "grad_norm": 0.5360434651374817, "learning_rate": 2.1107210319523014e-05, "loss": 0.0502, "step": 17496 }, { "epoch": 0.3855514606642538, "grad_norm": 0.8311288356781006, "learning_rate": 2.110623251726079e-05, "loss": 0.0662, "step": 17497 }, { "epoch": 0.38557349595376994, "grad_norm": 1.045455813407898, "learning_rate": 2.110525468389625e-05, "loss": 0.0971, "step": 17498 }, { "epoch": 0.3855955312432861, "grad_norm": 0.3059646189212799, "learning_rate": 2.110427681943437e-05, "loss": 0.0695, "step": 17499 }, { "epoch": 0.38561756653280227, "grad_norm": 0.8131752014160156, "learning_rate": 2.1103298923880136e-05, "loss": 0.1196, "step": 17500 }, { "epoch": 0.38563960182231843, "grad_norm": 1.395420789718628, "learning_rate": 2.1102320997238527e-05, "loss": 0.0992, "step": 17501 }, { "epoch": 0.3856616371118346, "grad_norm": 0.9818911552429199, "learning_rate": 2.1101343039514526e-05, "loss": 0.1516, "step": 17502 }, { "epoch": 0.38568367240135076, "grad_norm": 0.5158225893974304, "learning_rate": 2.1100365050713112e-05, "loss": 0.0877, "step": 17503 }, { "epoch": 0.38570570769086693, "grad_norm": 0.5886615514755249, "learning_rate": 2.1099387030839268e-05, "loss": 0.0686, "step": 17504 }, { "epoch": 0.3857277429803831, "grad_norm": 0.9944347739219666, "learning_rate": 2.1098408979897976e-05, "loss": 0.1031, "step": 17505 }, { "epoch": 0.38574977826989926, "grad_norm": 0.8825163841247559, "learning_rate": 2.1097430897894217e-05, "loss": 0.107, "step": 17506 }, { "epoch": 0.3857718135594154, "grad_norm": 0.5654502511024475, "learning_rate": 2.1096452784832972e-05, "loss": 0.0617, "step": 17507 }, { "epoch": 0.3857938488489316, "grad_norm": 0.600179135799408, "learning_rate": 2.1095474640719225e-05, "loss": 0.0962, "step": 17508 }, { "epoch": 0.38581588413844775, "grad_norm": 0.8205534815788269, "learning_rate": 2.109449646555796e-05, "loss": 0.0822, "step": 17509 }, { "epoch": 0.38583791942796386, "grad_norm": 0.704669713973999, "learning_rate": 2.1093518259354146e-05, "loss": 0.0736, "step": 17510 }, { "epoch": 0.38585995471748, "grad_norm": 0.9728628993034363, "learning_rate": 2.1092540022112786e-05, "loss": 0.0991, "step": 17511 }, { "epoch": 0.3858819900069962, "grad_norm": 0.5156943202018738, "learning_rate": 2.109156175383885e-05, "loss": 0.0667, "step": 17512 }, { "epoch": 0.38590402529651235, "grad_norm": 0.9541309475898743, "learning_rate": 2.109058345453732e-05, "loss": 0.1038, "step": 17513 }, { "epoch": 0.3859260605860285, "grad_norm": 0.9977579116821289, "learning_rate": 2.1089605124213183e-05, "loss": 0.0934, "step": 17514 }, { "epoch": 0.3859480958755447, "grad_norm": 0.6639487743377686, "learning_rate": 2.1088626762871426e-05, "loss": 0.0811, "step": 17515 }, { "epoch": 0.38597013116506085, "grad_norm": 0.45188188552856445, "learning_rate": 2.1087648370517025e-05, "loss": 0.0708, "step": 17516 }, { "epoch": 0.385992166454577, "grad_norm": 0.6441569924354553, "learning_rate": 2.1086669947154967e-05, "loss": 0.0636, "step": 17517 }, { "epoch": 0.3860142017440932, "grad_norm": 1.8001015186309814, "learning_rate": 2.1085691492790236e-05, "loss": 0.0841, "step": 17518 }, { "epoch": 0.38603623703360934, "grad_norm": 0.5613628029823303, "learning_rate": 2.108471300742781e-05, "loss": 0.0644, "step": 17519 }, { "epoch": 0.3860582723231255, "grad_norm": 0.5471293330192566, "learning_rate": 2.1083734491072683e-05, "loss": 0.0548, "step": 17520 }, { "epoch": 0.38608030761264167, "grad_norm": 0.5270375609397888, "learning_rate": 2.108275594372983e-05, "loss": 0.0731, "step": 17521 }, { "epoch": 0.3861023429021578, "grad_norm": 0.6579224467277527, "learning_rate": 2.1081777365404246e-05, "loss": 0.0521, "step": 17522 }, { "epoch": 0.38612437819167394, "grad_norm": 0.8636817336082458, "learning_rate": 2.1080798756100902e-05, "loss": 0.0812, "step": 17523 }, { "epoch": 0.3861464134811901, "grad_norm": 0.8951284885406494, "learning_rate": 2.1079820115824795e-05, "loss": 0.1003, "step": 17524 }, { "epoch": 0.3861684487707063, "grad_norm": 0.9370294213294983, "learning_rate": 2.1078841444580897e-05, "loss": 0.0715, "step": 17525 }, { "epoch": 0.38619048406022244, "grad_norm": 0.9894893169403076, "learning_rate": 2.1077862742374203e-05, "loss": 0.081, "step": 17526 }, { "epoch": 0.3862125193497386, "grad_norm": 0.6427790522575378, "learning_rate": 2.1076884009209692e-05, "loss": 0.0875, "step": 17527 }, { "epoch": 0.38623455463925477, "grad_norm": 0.8699271082878113, "learning_rate": 2.1075905245092357e-05, "loss": 0.1141, "step": 17528 }, { "epoch": 0.38625658992877093, "grad_norm": 1.0162427425384521, "learning_rate": 2.1074926450027173e-05, "loss": 0.0958, "step": 17529 }, { "epoch": 0.3862786252182871, "grad_norm": 0.7704290747642517, "learning_rate": 2.1073947624019137e-05, "loss": 0.0682, "step": 17530 }, { "epoch": 0.38630066050780326, "grad_norm": 0.7618758678436279, "learning_rate": 2.1072968767073226e-05, "loss": 0.0852, "step": 17531 }, { "epoch": 0.3863226957973194, "grad_norm": 1.0890185832977295, "learning_rate": 2.107198987919443e-05, "loss": 0.1159, "step": 17532 }, { "epoch": 0.3863447310868356, "grad_norm": 0.6127341389656067, "learning_rate": 2.1071010960387727e-05, "loss": 0.0807, "step": 17533 }, { "epoch": 0.3863667663763517, "grad_norm": 0.5940712094306946, "learning_rate": 2.1070032010658118e-05, "loss": 0.0645, "step": 17534 }, { "epoch": 0.38638880166586786, "grad_norm": 1.070930004119873, "learning_rate": 2.1069053030010573e-05, "loss": 0.1193, "step": 17535 }, { "epoch": 0.38641083695538403, "grad_norm": 0.7546752691268921, "learning_rate": 2.1068074018450093e-05, "loss": 0.1195, "step": 17536 }, { "epoch": 0.3864328722449002, "grad_norm": 0.6037629842758179, "learning_rate": 2.1067094975981656e-05, "loss": 0.0863, "step": 17537 }, { "epoch": 0.38645490753441636, "grad_norm": 1.0258649587631226, "learning_rate": 2.106611590261025e-05, "loss": 0.0746, "step": 17538 }, { "epoch": 0.3864769428239325, "grad_norm": 0.7044416666030884, "learning_rate": 2.1065136798340866e-05, "loss": 0.0892, "step": 17539 }, { "epoch": 0.3864989781134487, "grad_norm": 0.6139906644821167, "learning_rate": 2.1064157663178486e-05, "loss": 0.0845, "step": 17540 }, { "epoch": 0.38652101340296485, "grad_norm": 0.9423325657844543, "learning_rate": 2.1063178497128097e-05, "loss": 0.1299, "step": 17541 }, { "epoch": 0.386543048692481, "grad_norm": 0.5163016319274902, "learning_rate": 2.1062199300194687e-05, "loss": 0.0624, "step": 17542 }, { "epoch": 0.3865650839819972, "grad_norm": 0.929754912853241, "learning_rate": 2.106122007238325e-05, "loss": 0.0894, "step": 17543 }, { "epoch": 0.38658711927151335, "grad_norm": 0.5310024619102478, "learning_rate": 2.1060240813698763e-05, "loss": 0.096, "step": 17544 }, { "epoch": 0.3866091545610295, "grad_norm": 0.6272175312042236, "learning_rate": 2.1059261524146227e-05, "loss": 0.0783, "step": 17545 }, { "epoch": 0.3866311898505457, "grad_norm": 0.6183730959892273, "learning_rate": 2.1058282203730616e-05, "loss": 0.0974, "step": 17546 }, { "epoch": 0.3866532251400618, "grad_norm": 1.036615252494812, "learning_rate": 2.105730285245693e-05, "loss": 0.1235, "step": 17547 }, { "epoch": 0.38667526042957795, "grad_norm": 0.4496268928050995, "learning_rate": 2.1056323470330155e-05, "loss": 0.0941, "step": 17548 }, { "epoch": 0.3866972957190941, "grad_norm": 0.5988109111785889, "learning_rate": 2.1055344057355273e-05, "loss": 0.084, "step": 17549 }, { "epoch": 0.3867193310086103, "grad_norm": 0.7778573036193848, "learning_rate": 2.1054364613537275e-05, "loss": 0.0571, "step": 17550 }, { "epoch": 0.38674136629812644, "grad_norm": 0.7876912355422974, "learning_rate": 2.1053385138881155e-05, "loss": 0.136, "step": 17551 }, { "epoch": 0.3867634015876426, "grad_norm": 0.9136098027229309, "learning_rate": 2.1052405633391897e-05, "loss": 0.1098, "step": 17552 }, { "epoch": 0.3867854368771588, "grad_norm": 0.4527372419834137, "learning_rate": 2.105142609707449e-05, "loss": 0.0775, "step": 17553 }, { "epoch": 0.38680747216667494, "grad_norm": 0.8625006675720215, "learning_rate": 2.1050446529933922e-05, "loss": 0.1235, "step": 17554 }, { "epoch": 0.3868295074561911, "grad_norm": 0.8894989490509033, "learning_rate": 2.1049466931975193e-05, "loss": 0.0754, "step": 17555 }, { "epoch": 0.38685154274570727, "grad_norm": 0.7601545453071594, "learning_rate": 2.104848730320328e-05, "loss": 0.1106, "step": 17556 }, { "epoch": 0.38687357803522343, "grad_norm": 0.5367418527603149, "learning_rate": 2.1047507643623183e-05, "loss": 0.0614, "step": 17557 }, { "epoch": 0.3868956133247396, "grad_norm": 1.1078426837921143, "learning_rate": 2.104652795323988e-05, "loss": 0.065, "step": 17558 }, { "epoch": 0.3869176486142557, "grad_norm": 0.8522916436195374, "learning_rate": 2.1045548232058372e-05, "loss": 0.1591, "step": 17559 }, { "epoch": 0.38693968390377187, "grad_norm": 0.5996394157409668, "learning_rate": 2.1044568480083642e-05, "loss": 0.0944, "step": 17560 }, { "epoch": 0.38696171919328803, "grad_norm": 0.7457226514816284, "learning_rate": 2.104358869732068e-05, "loss": 0.0809, "step": 17561 }, { "epoch": 0.3869837544828042, "grad_norm": 0.868063747882843, "learning_rate": 2.1042608883774488e-05, "loss": 0.066, "step": 17562 }, { "epoch": 0.38700578977232036, "grad_norm": 0.7342466115951538, "learning_rate": 2.1041629039450042e-05, "loss": 0.0727, "step": 17563 }, { "epoch": 0.38702782506183653, "grad_norm": 0.4675808548927307, "learning_rate": 2.1040649164352348e-05, "loss": 0.0654, "step": 17564 }, { "epoch": 0.3870498603513527, "grad_norm": 0.7421506643295288, "learning_rate": 2.1039669258486378e-05, "loss": 0.0952, "step": 17565 }, { "epoch": 0.38707189564086886, "grad_norm": 0.7829723358154297, "learning_rate": 2.1038689321857144e-05, "loss": 0.092, "step": 17566 }, { "epoch": 0.387093930930385, "grad_norm": 0.36390212178230286, "learning_rate": 2.103770935446962e-05, "loss": 0.0606, "step": 17567 }, { "epoch": 0.3871159662199012, "grad_norm": 0.3945404589176178, "learning_rate": 2.1036729356328808e-05, "loss": 0.0589, "step": 17568 }, { "epoch": 0.38713800150941735, "grad_norm": 1.0421459674835205, "learning_rate": 2.1035749327439688e-05, "loss": 0.0888, "step": 17569 }, { "epoch": 0.3871600367989335, "grad_norm": 0.6271584033966064, "learning_rate": 2.103476926780727e-05, "loss": 0.1197, "step": 17570 }, { "epoch": 0.3871820720884496, "grad_norm": 0.8784700036048889, "learning_rate": 2.1033789177436535e-05, "loss": 0.0824, "step": 17571 }, { "epoch": 0.3872041073779658, "grad_norm": 0.7664238810539246, "learning_rate": 2.1032809056332476e-05, "loss": 0.0836, "step": 17572 }, { "epoch": 0.38722614266748195, "grad_norm": 0.6154894828796387, "learning_rate": 2.103182890450008e-05, "loss": 0.0847, "step": 17573 }, { "epoch": 0.3872481779569981, "grad_norm": 0.8410546183586121, "learning_rate": 2.103084872194435e-05, "loss": 0.1159, "step": 17574 }, { "epoch": 0.3872702132465143, "grad_norm": 0.844925045967102, "learning_rate": 2.102986850867027e-05, "loss": 0.0713, "step": 17575 }, { "epoch": 0.38729224853603045, "grad_norm": 0.7869388461112976, "learning_rate": 2.1028888264682836e-05, "loss": 0.1147, "step": 17576 }, { "epoch": 0.3873142838255466, "grad_norm": 0.52109295129776, "learning_rate": 2.1027907989987045e-05, "loss": 0.0744, "step": 17577 }, { "epoch": 0.3873363191150628, "grad_norm": 0.5178923606872559, "learning_rate": 2.1026927684587885e-05, "loss": 0.0955, "step": 17578 }, { "epoch": 0.38735835440457894, "grad_norm": 0.751956582069397, "learning_rate": 2.102594734849035e-05, "loss": 0.0627, "step": 17579 }, { "epoch": 0.3873803896940951, "grad_norm": 1.8783849477767944, "learning_rate": 2.1024966981699436e-05, "loss": 0.0993, "step": 17580 }, { "epoch": 0.38740242498361127, "grad_norm": 0.591041624546051, "learning_rate": 2.102398658422013e-05, "loss": 0.1017, "step": 17581 }, { "epoch": 0.38742446027312744, "grad_norm": 0.8536674380302429, "learning_rate": 2.102300615605743e-05, "loss": 0.0653, "step": 17582 }, { "epoch": 0.3874464955626436, "grad_norm": 0.6015098690986633, "learning_rate": 2.102202569721633e-05, "loss": 0.1087, "step": 17583 }, { "epoch": 0.3874685308521597, "grad_norm": 0.47458240389823914, "learning_rate": 2.1021045207701827e-05, "loss": 0.0844, "step": 17584 }, { "epoch": 0.3874905661416759, "grad_norm": 1.0629162788391113, "learning_rate": 2.102006468751891e-05, "loss": 0.0647, "step": 17585 }, { "epoch": 0.38751260143119204, "grad_norm": 0.7811924815177917, "learning_rate": 2.101908413667258e-05, "loss": 0.0893, "step": 17586 }, { "epoch": 0.3875346367207082, "grad_norm": 0.7588554620742798, "learning_rate": 2.1018103555167822e-05, "loss": 0.099, "step": 17587 }, { "epoch": 0.38755667201022437, "grad_norm": 0.6333721876144409, "learning_rate": 2.1017122943009633e-05, "loss": 0.0686, "step": 17588 }, { "epoch": 0.38757870729974053, "grad_norm": 0.8915400505065918, "learning_rate": 2.1016142300203013e-05, "loss": 0.0836, "step": 17589 }, { "epoch": 0.3876007425892567, "grad_norm": 0.49512431025505066, "learning_rate": 2.1015161626752954e-05, "loss": 0.0728, "step": 17590 }, { "epoch": 0.38762277787877286, "grad_norm": 0.9069339632987976, "learning_rate": 2.1014180922664453e-05, "loss": 0.0907, "step": 17591 }, { "epoch": 0.387644813168289, "grad_norm": 0.47287431359291077, "learning_rate": 2.10132001879425e-05, "loss": 0.0742, "step": 17592 }, { "epoch": 0.3876668484578052, "grad_norm": 0.6351982355117798, "learning_rate": 2.10122194225921e-05, "loss": 0.0938, "step": 17593 }, { "epoch": 0.38768888374732136, "grad_norm": 0.8512473702430725, "learning_rate": 2.101123862661824e-05, "loss": 0.0775, "step": 17594 }, { "epoch": 0.3877109190368375, "grad_norm": 0.8455377817153931, "learning_rate": 2.101025780002592e-05, "loss": 0.0866, "step": 17595 }, { "epoch": 0.38773295432635363, "grad_norm": 0.7725833654403687, "learning_rate": 2.100927694282013e-05, "loss": 0.1044, "step": 17596 }, { "epoch": 0.3877549896158698, "grad_norm": 0.4718063473701477, "learning_rate": 2.1008296055005868e-05, "loss": 0.075, "step": 17597 }, { "epoch": 0.38777702490538596, "grad_norm": 0.8533941507339478, "learning_rate": 2.1007315136588134e-05, "loss": 0.0953, "step": 17598 }, { "epoch": 0.3877990601949021, "grad_norm": 0.6186190247535706, "learning_rate": 2.1006334187571925e-05, "loss": 0.0775, "step": 17599 }, { "epoch": 0.3878210954844183, "grad_norm": 0.9290505051612854, "learning_rate": 2.100535320796223e-05, "loss": 0.0662, "step": 17600 }, { "epoch": 0.38784313077393445, "grad_norm": 0.7656719088554382, "learning_rate": 2.1004372197764058e-05, "loss": 0.1173, "step": 17601 }, { "epoch": 0.3878651660634506, "grad_norm": 1.1885287761688232, "learning_rate": 2.1003391156982393e-05, "loss": 0.0848, "step": 17602 }, { "epoch": 0.3878872013529668, "grad_norm": 1.1849225759506226, "learning_rate": 2.1002410085622242e-05, "loss": 0.1021, "step": 17603 }, { "epoch": 0.38790923664248295, "grad_norm": 0.9732233285903931, "learning_rate": 2.1001428983688596e-05, "loss": 0.1399, "step": 17604 }, { "epoch": 0.3879312719319991, "grad_norm": 0.5675805807113647, "learning_rate": 2.1000447851186453e-05, "loss": 0.0819, "step": 17605 }, { "epoch": 0.3879533072215153, "grad_norm": 0.6234756112098694, "learning_rate": 2.0999466688120813e-05, "loss": 0.0725, "step": 17606 }, { "epoch": 0.38797534251103144, "grad_norm": 0.8828761577606201, "learning_rate": 2.099848549449667e-05, "loss": 0.0907, "step": 17607 }, { "epoch": 0.38799737780054755, "grad_norm": 0.6963325142860413, "learning_rate": 2.0997504270319023e-05, "loss": 0.0883, "step": 17608 }, { "epoch": 0.3880194130900637, "grad_norm": 0.4024451673030853, "learning_rate": 2.0996523015592875e-05, "loss": 0.0805, "step": 17609 }, { "epoch": 0.3880414483795799, "grad_norm": 0.6365494728088379, "learning_rate": 2.0995541730323216e-05, "loss": 0.1084, "step": 17610 }, { "epoch": 0.38806348366909604, "grad_norm": 0.8775617480278015, "learning_rate": 2.099456041451505e-05, "loss": 0.0939, "step": 17611 }, { "epoch": 0.3880855189586122, "grad_norm": 0.6687744855880737, "learning_rate": 2.0993579068173373e-05, "loss": 0.0852, "step": 17612 }, { "epoch": 0.3881075542481284, "grad_norm": 0.6332975029945374, "learning_rate": 2.0992597691303183e-05, "loss": 0.1075, "step": 17613 }, { "epoch": 0.38812958953764454, "grad_norm": 1.0786833763122559, "learning_rate": 2.0991616283909477e-05, "loss": 0.0816, "step": 17614 }, { "epoch": 0.3881516248271607, "grad_norm": 1.0254336595535278, "learning_rate": 2.0990634845997258e-05, "loss": 0.082, "step": 17615 }, { "epoch": 0.38817366011667687, "grad_norm": 0.444858193397522, "learning_rate": 2.0989653377571528e-05, "loss": 0.0831, "step": 17616 }, { "epoch": 0.38819569540619303, "grad_norm": 0.7659713625907898, "learning_rate": 2.0988671878637273e-05, "loss": 0.0813, "step": 17617 }, { "epoch": 0.3882177306957092, "grad_norm": 0.6280414462089539, "learning_rate": 2.0987690349199505e-05, "loss": 0.0749, "step": 17618 }, { "epoch": 0.38823976598522536, "grad_norm": 0.6525088548660278, "learning_rate": 2.0986708789263222e-05, "loss": 0.0567, "step": 17619 }, { "epoch": 0.3882618012747415, "grad_norm": 0.7356574535369873, "learning_rate": 2.098572719883342e-05, "loss": 0.0856, "step": 17620 }, { "epoch": 0.38828383656425763, "grad_norm": 1.0810977220535278, "learning_rate": 2.09847455779151e-05, "loss": 0.0857, "step": 17621 }, { "epoch": 0.3883058718537738, "grad_norm": 0.5601243376731873, "learning_rate": 2.098376392651326e-05, "loss": 0.0995, "step": 17622 }, { "epoch": 0.38832790714328996, "grad_norm": 0.7211548089981079, "learning_rate": 2.0982782244632896e-05, "loss": 0.0815, "step": 17623 }, { "epoch": 0.38834994243280613, "grad_norm": 0.6565713882446289, "learning_rate": 2.098180053227902e-05, "loss": 0.0611, "step": 17624 }, { "epoch": 0.3883719777223223, "grad_norm": 0.6072553396224976, "learning_rate": 2.0980818789456625e-05, "loss": 0.0641, "step": 17625 }, { "epoch": 0.38839401301183846, "grad_norm": 0.8953543901443481, "learning_rate": 2.0979837016170713e-05, "loss": 0.1328, "step": 17626 }, { "epoch": 0.3884160483013546, "grad_norm": 0.6893813610076904, "learning_rate": 2.0978855212426285e-05, "loss": 0.0619, "step": 17627 }, { "epoch": 0.3884380835908708, "grad_norm": 0.50217205286026, "learning_rate": 2.097787337822834e-05, "loss": 0.0764, "step": 17628 }, { "epoch": 0.38846011888038695, "grad_norm": 0.5288928151130676, "learning_rate": 2.097689151358188e-05, "loss": 0.0805, "step": 17629 }, { "epoch": 0.3884821541699031, "grad_norm": 0.42899301648139954, "learning_rate": 2.0975909618491903e-05, "loss": 0.0761, "step": 17630 }, { "epoch": 0.3885041894594193, "grad_norm": 0.434128075838089, "learning_rate": 2.0974927692963423e-05, "loss": 0.0853, "step": 17631 }, { "epoch": 0.38852622474893544, "grad_norm": 0.3921739459037781, "learning_rate": 2.0973945737001425e-05, "loss": 0.0653, "step": 17632 }, { "epoch": 0.38854826003845155, "grad_norm": 0.7067705392837524, "learning_rate": 2.097296375061092e-05, "loss": 0.1196, "step": 17633 }, { "epoch": 0.3885702953279677, "grad_norm": 0.5406570434570312, "learning_rate": 2.097198173379691e-05, "loss": 0.0903, "step": 17634 }, { "epoch": 0.3885923306174839, "grad_norm": 0.7733278870582581, "learning_rate": 2.0970999686564392e-05, "loss": 0.0798, "step": 17635 }, { "epoch": 0.38861436590700005, "grad_norm": 0.6151835918426514, "learning_rate": 2.097001760891837e-05, "loss": 0.0683, "step": 17636 }, { "epoch": 0.3886364011965162, "grad_norm": 0.8200811743736267, "learning_rate": 2.096903550086385e-05, "loss": 0.106, "step": 17637 }, { "epoch": 0.3886584364860324, "grad_norm": 1.1045315265655518, "learning_rate": 2.0968053362405826e-05, "loss": 0.0959, "step": 17638 }, { "epoch": 0.38868047177554854, "grad_norm": 0.659269392490387, "learning_rate": 2.096707119354931e-05, "loss": 0.0617, "step": 17639 }, { "epoch": 0.3887025070650647, "grad_norm": 0.8903753757476807, "learning_rate": 2.0966088994299298e-05, "loss": 0.0933, "step": 17640 }, { "epoch": 0.38872454235458087, "grad_norm": 0.4665539264678955, "learning_rate": 2.0965106764660802e-05, "loss": 0.0802, "step": 17641 }, { "epoch": 0.38874657764409704, "grad_norm": 0.6527777910232544, "learning_rate": 2.096412450463881e-05, "loss": 0.0724, "step": 17642 }, { "epoch": 0.3887686129336132, "grad_norm": 0.8904755115509033, "learning_rate": 2.0963142214238343e-05, "loss": 0.1159, "step": 17643 }, { "epoch": 0.38879064822312936, "grad_norm": 0.9615782499313354, "learning_rate": 2.0962159893464385e-05, "loss": 0.1083, "step": 17644 }, { "epoch": 0.38881268351264553, "grad_norm": 0.8265554308891296, "learning_rate": 2.0961177542321957e-05, "loss": 0.083, "step": 17645 }, { "epoch": 0.38883471880216164, "grad_norm": 0.9387245178222656, "learning_rate": 2.0960195160816052e-05, "loss": 0.1149, "step": 17646 }, { "epoch": 0.3888567540916778, "grad_norm": 0.6151728630065918, "learning_rate": 2.0959212748951675e-05, "loss": 0.1098, "step": 17647 }, { "epoch": 0.38887878938119397, "grad_norm": 0.8626678586006165, "learning_rate": 2.0958230306733833e-05, "loss": 0.1196, "step": 17648 }, { "epoch": 0.38890082467071013, "grad_norm": 0.5607280731201172, "learning_rate": 2.0957247834167528e-05, "loss": 0.058, "step": 17649 }, { "epoch": 0.3889228599602263, "grad_norm": 0.8487368822097778, "learning_rate": 2.0956265331257773e-05, "loss": 0.0699, "step": 17650 }, { "epoch": 0.38894489524974246, "grad_norm": 0.6296434998512268, "learning_rate": 2.0955282798009553e-05, "loss": 0.0624, "step": 17651 }, { "epoch": 0.3889669305392586, "grad_norm": 0.9486322402954102, "learning_rate": 2.0954300234427895e-05, "loss": 0.0667, "step": 17652 }, { "epoch": 0.3889889658287748, "grad_norm": 0.4400169253349304, "learning_rate": 2.0953317640517785e-05, "loss": 0.0612, "step": 17653 }, { "epoch": 0.38901100111829096, "grad_norm": 0.3839138150215149, "learning_rate": 2.0952335016284237e-05, "loss": 0.0643, "step": 17654 }, { "epoch": 0.3890330364078071, "grad_norm": 0.6584778428077698, "learning_rate": 2.0951352361732257e-05, "loss": 0.0451, "step": 17655 }, { "epoch": 0.3890550716973233, "grad_norm": 1.0300301313400269, "learning_rate": 2.095036967686685e-05, "loss": 0.1105, "step": 17656 }, { "epoch": 0.38907710698683945, "grad_norm": 1.325662612915039, "learning_rate": 2.0949386961693017e-05, "loss": 0.095, "step": 17657 }, { "epoch": 0.38909914227635556, "grad_norm": 1.1005887985229492, "learning_rate": 2.0948404216215763e-05, "loss": 0.1262, "step": 17658 }, { "epoch": 0.3891211775658717, "grad_norm": 1.0223897695541382, "learning_rate": 2.0947421440440103e-05, "loss": 0.1118, "step": 17659 }, { "epoch": 0.3891432128553879, "grad_norm": 0.6464389562606812, "learning_rate": 2.0946438634371035e-05, "loss": 0.0903, "step": 17660 }, { "epoch": 0.38916524814490405, "grad_norm": 1.376841425895691, "learning_rate": 2.0945455798013563e-05, "loss": 0.1233, "step": 17661 }, { "epoch": 0.3891872834344202, "grad_norm": 0.43975749611854553, "learning_rate": 2.0944472931372698e-05, "loss": 0.0724, "step": 17662 }, { "epoch": 0.3892093187239364, "grad_norm": 0.800813615322113, "learning_rate": 2.094349003445344e-05, "loss": 0.1029, "step": 17663 }, { "epoch": 0.38923135401345255, "grad_norm": 1.1466670036315918, "learning_rate": 2.094250710726081e-05, "loss": 0.0888, "step": 17664 }, { "epoch": 0.3892533893029687, "grad_norm": 0.6153247952461243, "learning_rate": 2.0941524149799794e-05, "loss": 0.1345, "step": 17665 }, { "epoch": 0.3892754245924849, "grad_norm": 1.025038719177246, "learning_rate": 2.0940541162075417e-05, "loss": 0.0975, "step": 17666 }, { "epoch": 0.38929745988200104, "grad_norm": 0.5687286853790283, "learning_rate": 2.0939558144092678e-05, "loss": 0.061, "step": 17667 }, { "epoch": 0.3893194951715172, "grad_norm": 0.8029957413673401, "learning_rate": 2.0938575095856582e-05, "loss": 0.0948, "step": 17668 }, { "epoch": 0.38934153046103337, "grad_norm": 0.4717203676700592, "learning_rate": 2.0937592017372137e-05, "loss": 0.1001, "step": 17669 }, { "epoch": 0.3893635657505495, "grad_norm": 0.6193360090255737, "learning_rate": 2.0936608908644356e-05, "loss": 0.0738, "step": 17670 }, { "epoch": 0.38938560104006564, "grad_norm": 0.5573828816413879, "learning_rate": 2.0935625769678238e-05, "loss": 0.0607, "step": 17671 }, { "epoch": 0.3894076363295818, "grad_norm": 0.9179137349128723, "learning_rate": 2.0934642600478796e-05, "loss": 0.1291, "step": 17672 }, { "epoch": 0.389429671619098, "grad_norm": 0.6084972023963928, "learning_rate": 2.093365940105104e-05, "loss": 0.0751, "step": 17673 }, { "epoch": 0.38945170690861414, "grad_norm": 0.8465416431427002, "learning_rate": 2.093267617139997e-05, "loss": 0.0884, "step": 17674 }, { "epoch": 0.3894737421981303, "grad_norm": 0.8439450860023499, "learning_rate": 2.0931692911530603e-05, "loss": 0.1097, "step": 17675 }, { "epoch": 0.38949577748764647, "grad_norm": 1.1246347427368164, "learning_rate": 2.093070962144794e-05, "loss": 0.1018, "step": 17676 }, { "epoch": 0.38951781277716263, "grad_norm": 0.48393693566322327, "learning_rate": 2.0929726301156996e-05, "loss": 0.0668, "step": 17677 }, { "epoch": 0.3895398480666788, "grad_norm": 0.890116274356842, "learning_rate": 2.0928742950662773e-05, "loss": 0.0944, "step": 17678 }, { "epoch": 0.38956188335619496, "grad_norm": 0.47152745723724365, "learning_rate": 2.0927759569970284e-05, "loss": 0.0542, "step": 17679 }, { "epoch": 0.3895839186457111, "grad_norm": 0.909408688545227, "learning_rate": 2.0926776159084536e-05, "loss": 0.097, "step": 17680 }, { "epoch": 0.3896059539352273, "grad_norm": 0.9370928406715393, "learning_rate": 2.092579271801054e-05, "loss": 0.0868, "step": 17681 }, { "epoch": 0.38962798922474345, "grad_norm": 0.5473717451095581, "learning_rate": 2.092480924675331e-05, "loss": 0.0795, "step": 17682 }, { "epoch": 0.38965002451425956, "grad_norm": 0.5143160820007324, "learning_rate": 2.0923825745317842e-05, "loss": 0.0591, "step": 17683 }, { "epoch": 0.3896720598037757, "grad_norm": 0.6978341341018677, "learning_rate": 2.092284221370915e-05, "loss": 0.0828, "step": 17684 }, { "epoch": 0.3896940950932919, "grad_norm": 0.6748080849647522, "learning_rate": 2.0921858651932253e-05, "loss": 0.08, "step": 17685 }, { "epoch": 0.38971613038280806, "grad_norm": 0.7165102958679199, "learning_rate": 2.0920875059992152e-05, "loss": 0.0811, "step": 17686 }, { "epoch": 0.3897381656723242, "grad_norm": 0.9361954927444458, "learning_rate": 2.091989143789386e-05, "loss": 0.0604, "step": 17687 }, { "epoch": 0.3897602009618404, "grad_norm": 0.8923050165176392, "learning_rate": 2.0918907785642384e-05, "loss": 0.0835, "step": 17688 }, { "epoch": 0.38978223625135655, "grad_norm": 0.6537447571754456, "learning_rate": 2.091792410324274e-05, "loss": 0.0891, "step": 17689 }, { "epoch": 0.3898042715408727, "grad_norm": 0.5947649478912354, "learning_rate": 2.0916940390699936e-05, "loss": 0.0713, "step": 17690 }, { "epoch": 0.3898263068303889, "grad_norm": 1.1500458717346191, "learning_rate": 2.091595664801898e-05, "loss": 0.102, "step": 17691 }, { "epoch": 0.38984834211990504, "grad_norm": 0.664725661277771, "learning_rate": 2.0914972875204882e-05, "loss": 0.0768, "step": 17692 }, { "epoch": 0.3898703774094212, "grad_norm": 0.6902870535850525, "learning_rate": 2.0913989072262657e-05, "loss": 0.0936, "step": 17693 }, { "epoch": 0.3898924126989374, "grad_norm": 0.5516257882118225, "learning_rate": 2.091300523919731e-05, "loss": 0.0736, "step": 17694 }, { "epoch": 0.3899144479884535, "grad_norm": 0.5972537398338318, "learning_rate": 2.0912021376013863e-05, "loss": 0.0914, "step": 17695 }, { "epoch": 0.38993648327796965, "grad_norm": 0.7802632451057434, "learning_rate": 2.091103748271732e-05, "loss": 0.076, "step": 17696 }, { "epoch": 0.3899585185674858, "grad_norm": 0.45827335119247437, "learning_rate": 2.091005355931269e-05, "loss": 0.0629, "step": 17697 }, { "epoch": 0.389980553857002, "grad_norm": 1.1017214059829712, "learning_rate": 2.0909069605804992e-05, "loss": 0.0869, "step": 17698 }, { "epoch": 0.39000258914651814, "grad_norm": 0.682215690612793, "learning_rate": 2.090808562219923e-05, "loss": 0.0672, "step": 17699 }, { "epoch": 0.3900246244360343, "grad_norm": 0.5476492047309875, "learning_rate": 2.0907101608500424e-05, "loss": 0.0641, "step": 17700 }, { "epoch": 0.39004665972555047, "grad_norm": 0.6459496021270752, "learning_rate": 2.0906117564713576e-05, "loss": 0.0756, "step": 17701 }, { "epoch": 0.39006869501506664, "grad_norm": 1.042953610420227, "learning_rate": 2.090513349084371e-05, "loss": 0.069, "step": 17702 }, { "epoch": 0.3900907303045828, "grad_norm": 1.2066237926483154, "learning_rate": 2.0904149386895827e-05, "loss": 0.1155, "step": 17703 }, { "epoch": 0.39011276559409896, "grad_norm": 0.6986823081970215, "learning_rate": 2.0903165252874952e-05, "loss": 0.1039, "step": 17704 }, { "epoch": 0.39013480088361513, "grad_norm": 1.1012158393859863, "learning_rate": 2.0902181088786084e-05, "loss": 0.1259, "step": 17705 }, { "epoch": 0.3901568361731313, "grad_norm": 0.9288212060928345, "learning_rate": 2.090119689463425e-05, "loss": 0.0957, "step": 17706 }, { "epoch": 0.3901788714626474, "grad_norm": 0.6017037034034729, "learning_rate": 2.0900212670424446e-05, "loss": 0.0738, "step": 17707 }, { "epoch": 0.39020090675216357, "grad_norm": 0.7818014025688171, "learning_rate": 2.0899228416161705e-05, "loss": 0.0716, "step": 17708 }, { "epoch": 0.39022294204167973, "grad_norm": 0.789886474609375, "learning_rate": 2.0898244131851025e-05, "loss": 0.0956, "step": 17709 }, { "epoch": 0.3902449773311959, "grad_norm": 0.6804556846618652, "learning_rate": 2.0897259817497425e-05, "loss": 0.0834, "step": 17710 }, { "epoch": 0.39026701262071206, "grad_norm": 0.8830960392951965, "learning_rate": 2.0896275473105916e-05, "loss": 0.0683, "step": 17711 }, { "epoch": 0.3902890479102282, "grad_norm": 0.9458062052726746, "learning_rate": 2.0895291098681522e-05, "loss": 0.1028, "step": 17712 }, { "epoch": 0.3903110831997444, "grad_norm": 0.7406428456306458, "learning_rate": 2.089430669422924e-05, "loss": 0.069, "step": 17713 }, { "epoch": 0.39033311848926056, "grad_norm": 0.6019325852394104, "learning_rate": 2.08933222597541e-05, "loss": 0.0643, "step": 17714 }, { "epoch": 0.3903551537787767, "grad_norm": 0.737726092338562, "learning_rate": 2.089233779526111e-05, "loss": 0.092, "step": 17715 }, { "epoch": 0.3903771890682929, "grad_norm": 0.8492122292518616, "learning_rate": 2.0891353300755276e-05, "loss": 0.0799, "step": 17716 }, { "epoch": 0.39039922435780905, "grad_norm": 0.8235628604888916, "learning_rate": 2.0890368776241625e-05, "loss": 0.1019, "step": 17717 }, { "epoch": 0.3904212596473252, "grad_norm": 1.0798441171646118, "learning_rate": 2.0889384221725168e-05, "loss": 0.1001, "step": 17718 }, { "epoch": 0.3904432949368414, "grad_norm": 0.8917486667633057, "learning_rate": 2.0888399637210915e-05, "loss": 0.0841, "step": 17719 }, { "epoch": 0.3904653302263575, "grad_norm": 1.0022673606872559, "learning_rate": 2.088741502270389e-05, "loss": 0.1347, "step": 17720 }, { "epoch": 0.39048736551587365, "grad_norm": 0.5525656938552856, "learning_rate": 2.0886430378209104e-05, "loss": 0.0813, "step": 17721 }, { "epoch": 0.3905094008053898, "grad_norm": 0.7675944566726685, "learning_rate": 2.0885445703731566e-05, "loss": 0.0897, "step": 17722 }, { "epoch": 0.390531436094906, "grad_norm": 0.7707091569900513, "learning_rate": 2.0884460999276305e-05, "loss": 0.0881, "step": 17723 }, { "epoch": 0.39055347138442215, "grad_norm": 0.5621751546859741, "learning_rate": 2.0883476264848322e-05, "loss": 0.1178, "step": 17724 }, { "epoch": 0.3905755066739383, "grad_norm": 0.6872168183326721, "learning_rate": 2.088249150045264e-05, "loss": 0.0558, "step": 17725 }, { "epoch": 0.3905975419634545, "grad_norm": 0.8286648392677307, "learning_rate": 2.0881506706094274e-05, "loss": 0.0964, "step": 17726 }, { "epoch": 0.39061957725297064, "grad_norm": 1.0233269929885864, "learning_rate": 2.0880521881778244e-05, "loss": 0.0933, "step": 17727 }, { "epoch": 0.3906416125424868, "grad_norm": 0.8871572017669678, "learning_rate": 2.0879537027509557e-05, "loss": 0.0819, "step": 17728 }, { "epoch": 0.39066364783200297, "grad_norm": 0.8280138969421387, "learning_rate": 2.087855214329324e-05, "loss": 0.0806, "step": 17729 }, { "epoch": 0.39068568312151913, "grad_norm": 0.589976966381073, "learning_rate": 2.0877567229134306e-05, "loss": 0.0839, "step": 17730 }, { "epoch": 0.3907077184110353, "grad_norm": 0.8746573328971863, "learning_rate": 2.087658228503777e-05, "loss": 0.1108, "step": 17731 }, { "epoch": 0.3907297537005514, "grad_norm": 0.5264793634414673, "learning_rate": 2.0875597311008644e-05, "loss": 0.0629, "step": 17732 }, { "epoch": 0.3907517889900676, "grad_norm": 0.5830940008163452, "learning_rate": 2.0874612307051955e-05, "loss": 0.0933, "step": 17733 }, { "epoch": 0.39077382427958374, "grad_norm": 0.7407110929489136, "learning_rate": 2.087362727317271e-05, "loss": 0.075, "step": 17734 }, { "epoch": 0.3907958595690999, "grad_norm": 1.2667471170425415, "learning_rate": 2.0872642209375935e-05, "loss": 0.0894, "step": 17735 }, { "epoch": 0.39081789485861607, "grad_norm": 0.5478833317756653, "learning_rate": 2.0871657115666643e-05, "loss": 0.0683, "step": 17736 }, { "epoch": 0.39083993014813223, "grad_norm": 0.5856515169143677, "learning_rate": 2.0870671992049857e-05, "loss": 0.0776, "step": 17737 }, { "epoch": 0.3908619654376484, "grad_norm": 0.9011715054512024, "learning_rate": 2.0869686838530582e-05, "loss": 0.1213, "step": 17738 }, { "epoch": 0.39088400072716456, "grad_norm": 0.5236151218414307, "learning_rate": 2.086870165511385e-05, "loss": 0.0968, "step": 17739 }, { "epoch": 0.3909060360166807, "grad_norm": 0.6993923783302307, "learning_rate": 2.0867716441804678e-05, "loss": 0.0901, "step": 17740 }, { "epoch": 0.3909280713061969, "grad_norm": 0.8276981711387634, "learning_rate": 2.086673119860807e-05, "loss": 0.0945, "step": 17741 }, { "epoch": 0.39095010659571305, "grad_norm": 0.634086549282074, "learning_rate": 2.0865745925529058e-05, "loss": 0.1034, "step": 17742 }, { "epoch": 0.3909721418852292, "grad_norm": 0.5530033111572266, "learning_rate": 2.0864760622572656e-05, "loss": 0.0651, "step": 17743 }, { "epoch": 0.3909941771747453, "grad_norm": 0.45789283514022827, "learning_rate": 2.0863775289743886e-05, "loss": 0.0585, "step": 17744 }, { "epoch": 0.3910162124642615, "grad_norm": 0.6411839127540588, "learning_rate": 2.0862789927047765e-05, "loss": 0.0753, "step": 17745 }, { "epoch": 0.39103824775377766, "grad_norm": 0.5602293610572815, "learning_rate": 2.0861804534489308e-05, "loss": 0.0965, "step": 17746 }, { "epoch": 0.3910602830432938, "grad_norm": 0.621116042137146, "learning_rate": 2.086081911207354e-05, "loss": 0.0803, "step": 17747 }, { "epoch": 0.39108231833281, "grad_norm": 0.6464610695838928, "learning_rate": 2.085983365980547e-05, "loss": 0.0846, "step": 17748 }, { "epoch": 0.39110435362232615, "grad_norm": 0.5898609757423401, "learning_rate": 2.085884817769013e-05, "loss": 0.0763, "step": 17749 }, { "epoch": 0.3911263889118423, "grad_norm": 0.7932270765304565, "learning_rate": 2.0857862665732536e-05, "loss": 0.0906, "step": 17750 }, { "epoch": 0.3911484242013585, "grad_norm": 0.572044312953949, "learning_rate": 2.0856877123937702e-05, "loss": 0.0794, "step": 17751 }, { "epoch": 0.39117045949087464, "grad_norm": 0.7380363941192627, "learning_rate": 2.0855891552310652e-05, "loss": 0.0919, "step": 17752 }, { "epoch": 0.3911924947803908, "grad_norm": 0.5556607246398926, "learning_rate": 2.085490595085641e-05, "loss": 0.0914, "step": 17753 }, { "epoch": 0.391214530069907, "grad_norm": 0.9692108035087585, "learning_rate": 2.0853920319579993e-05, "loss": 0.0557, "step": 17754 }, { "epoch": 0.39123656535942314, "grad_norm": 0.7154120802879333, "learning_rate": 2.085293465848642e-05, "loss": 0.0745, "step": 17755 }, { "epoch": 0.3912586006489393, "grad_norm": 0.8525329232215881, "learning_rate": 2.0851948967580712e-05, "loss": 0.1264, "step": 17756 }, { "epoch": 0.3912806359384554, "grad_norm": 0.47593989968299866, "learning_rate": 2.0850963246867884e-05, "loss": 0.0825, "step": 17757 }, { "epoch": 0.3913026712279716, "grad_norm": 1.0214120149612427, "learning_rate": 2.084997749635297e-05, "loss": 0.0959, "step": 17758 }, { "epoch": 0.39132470651748774, "grad_norm": 0.647633969783783, "learning_rate": 2.084899171604098e-05, "loss": 0.1132, "step": 17759 }, { "epoch": 0.3913467418070039, "grad_norm": 0.49582338333129883, "learning_rate": 2.084800590593694e-05, "loss": 0.048, "step": 17760 }, { "epoch": 0.39136877709652007, "grad_norm": 0.8453266024589539, "learning_rate": 2.0847020066045866e-05, "loss": 0.0618, "step": 17761 }, { "epoch": 0.39139081238603624, "grad_norm": 0.7663219571113586, "learning_rate": 2.084603419637279e-05, "loss": 0.1152, "step": 17762 }, { "epoch": 0.3914128476755524, "grad_norm": 0.614206850528717, "learning_rate": 2.0845048296922723e-05, "loss": 0.0953, "step": 17763 }, { "epoch": 0.39143488296506856, "grad_norm": 0.7301029562950134, "learning_rate": 2.0844062367700694e-05, "loss": 0.1173, "step": 17764 }, { "epoch": 0.39145691825458473, "grad_norm": 0.5107935667037964, "learning_rate": 2.0843076408711718e-05, "loss": 0.0862, "step": 17765 }, { "epoch": 0.3914789535441009, "grad_norm": 1.0177507400512695, "learning_rate": 2.0842090419960823e-05, "loss": 0.1068, "step": 17766 }, { "epoch": 0.39150098883361706, "grad_norm": 0.5541136860847473, "learning_rate": 2.0841104401453028e-05, "loss": 0.0746, "step": 17767 }, { "epoch": 0.3915230241231332, "grad_norm": 0.622262179851532, "learning_rate": 2.0840118353193352e-05, "loss": 0.0825, "step": 17768 }, { "epoch": 0.39154505941264933, "grad_norm": 0.5959774255752563, "learning_rate": 2.0839132275186828e-05, "loss": 0.1053, "step": 17769 }, { "epoch": 0.3915670947021655, "grad_norm": 1.5276833772659302, "learning_rate": 2.083814616743847e-05, "loss": 0.1072, "step": 17770 }, { "epoch": 0.39158912999168166, "grad_norm": 0.9248490929603577, "learning_rate": 2.08371600299533e-05, "loss": 0.1039, "step": 17771 }, { "epoch": 0.3916111652811978, "grad_norm": 0.7155601382255554, "learning_rate": 2.0836173862736347e-05, "loss": 0.0676, "step": 17772 }, { "epoch": 0.391633200570714, "grad_norm": 0.6893091797828674, "learning_rate": 2.083518766579263e-05, "loss": 0.0834, "step": 17773 }, { "epoch": 0.39165523586023016, "grad_norm": 0.7853916883468628, "learning_rate": 2.083420143912717e-05, "loss": 0.0898, "step": 17774 }, { "epoch": 0.3916772711497463, "grad_norm": 0.578374981880188, "learning_rate": 2.0833215182744997e-05, "loss": 0.1036, "step": 17775 }, { "epoch": 0.3916993064392625, "grad_norm": 1.1700470447540283, "learning_rate": 2.083222889665113e-05, "loss": 0.0811, "step": 17776 }, { "epoch": 0.39172134172877865, "grad_norm": 0.6888161897659302, "learning_rate": 2.0831242580850593e-05, "loss": 0.089, "step": 17777 }, { "epoch": 0.3917433770182948, "grad_norm": 0.8353070616722107, "learning_rate": 2.083025623534842e-05, "loss": 0.1104, "step": 17778 }, { "epoch": 0.391765412307811, "grad_norm": 0.5840854644775391, "learning_rate": 2.0829269860149614e-05, "loss": 0.1119, "step": 17779 }, { "epoch": 0.39178744759732714, "grad_norm": 0.6312697529792786, "learning_rate": 2.082828345525921e-05, "loss": 0.0825, "step": 17780 }, { "epoch": 0.39180948288684325, "grad_norm": 0.4897614121437073, "learning_rate": 2.0827297020682242e-05, "loss": 0.0668, "step": 17781 }, { "epoch": 0.3918315181763594, "grad_norm": 0.675786554813385, "learning_rate": 2.0826310556423717e-05, "loss": 0.1054, "step": 17782 }, { "epoch": 0.3918535534658756, "grad_norm": 0.6117755770683289, "learning_rate": 2.0825324062488667e-05, "loss": 0.0883, "step": 17783 }, { "epoch": 0.39187558875539175, "grad_norm": 0.49369916319847107, "learning_rate": 2.082433753888213e-05, "loss": 0.1011, "step": 17784 }, { "epoch": 0.3918976240449079, "grad_norm": 0.7917494773864746, "learning_rate": 2.0823350985609107e-05, "loss": 0.0735, "step": 17785 }, { "epoch": 0.3919196593344241, "grad_norm": 0.9347222447395325, "learning_rate": 2.082236440267464e-05, "loss": 0.0764, "step": 17786 }, { "epoch": 0.39194169462394024, "grad_norm": 0.8197381496429443, "learning_rate": 2.0821377790083745e-05, "loss": 0.1036, "step": 17787 }, { "epoch": 0.3919637299134564, "grad_norm": 0.23233580589294434, "learning_rate": 2.0820391147841456e-05, "loss": 0.0599, "step": 17788 }, { "epoch": 0.39198576520297257, "grad_norm": 0.880337119102478, "learning_rate": 2.081940447595279e-05, "loss": 0.0997, "step": 17789 }, { "epoch": 0.39200780049248873, "grad_norm": 1.0664881467819214, "learning_rate": 2.081841777442278e-05, "loss": 0.078, "step": 17790 }, { "epoch": 0.3920298357820049, "grad_norm": 0.48345082998275757, "learning_rate": 2.0817431043256444e-05, "loss": 0.113, "step": 17791 }, { "epoch": 0.39205187107152106, "grad_norm": 0.7795045971870422, "learning_rate": 2.0816444282458816e-05, "loss": 0.0861, "step": 17792 }, { "epoch": 0.3920739063610372, "grad_norm": 0.7647018432617188, "learning_rate": 2.0815457492034913e-05, "loss": 0.1134, "step": 17793 }, { "epoch": 0.39209594165055334, "grad_norm": 0.5954820513725281, "learning_rate": 2.0814470671989777e-05, "loss": 0.1008, "step": 17794 }, { "epoch": 0.3921179769400695, "grad_norm": 1.0585074424743652, "learning_rate": 2.0813483822328415e-05, "loss": 0.0913, "step": 17795 }, { "epoch": 0.39214001222958567, "grad_norm": 0.7539745569229126, "learning_rate": 2.081249694305586e-05, "loss": 0.1158, "step": 17796 }, { "epoch": 0.39216204751910183, "grad_norm": 0.6885445713996887, "learning_rate": 2.0811510034177145e-05, "loss": 0.0712, "step": 17797 }, { "epoch": 0.392184082808618, "grad_norm": 0.4778629243373871, "learning_rate": 2.0810523095697298e-05, "loss": 0.0554, "step": 17798 }, { "epoch": 0.39220611809813416, "grad_norm": 0.8018850684165955, "learning_rate": 2.0809536127621332e-05, "loss": 0.1188, "step": 17799 }, { "epoch": 0.3922281533876503, "grad_norm": 0.6295652985572815, "learning_rate": 2.0808549129954288e-05, "loss": 0.089, "step": 17800 }, { "epoch": 0.3922501886771665, "grad_norm": 0.655799150466919, "learning_rate": 2.0807562102701193e-05, "loss": 0.0736, "step": 17801 }, { "epoch": 0.39227222396668265, "grad_norm": 0.6013438701629639, "learning_rate": 2.0806575045867064e-05, "loss": 0.0466, "step": 17802 }, { "epoch": 0.3922942592561988, "grad_norm": 0.7512063980102539, "learning_rate": 2.0805587959456934e-05, "loss": 0.0656, "step": 17803 }, { "epoch": 0.392316294545715, "grad_norm": 0.6595950722694397, "learning_rate": 2.0804600843475833e-05, "loss": 0.0797, "step": 17804 }, { "epoch": 0.39233832983523115, "grad_norm": 0.9261695742607117, "learning_rate": 2.0803613697928786e-05, "loss": 0.0603, "step": 17805 }, { "epoch": 0.39236036512474726, "grad_norm": 0.6257733702659607, "learning_rate": 2.0802626522820825e-05, "loss": 0.0805, "step": 17806 }, { "epoch": 0.3923824004142634, "grad_norm": 0.5740891098976135, "learning_rate": 2.0801639318156977e-05, "loss": 0.0577, "step": 17807 }, { "epoch": 0.3924044357037796, "grad_norm": 0.9604031443595886, "learning_rate": 2.0800652083942264e-05, "loss": 0.0828, "step": 17808 }, { "epoch": 0.39242647099329575, "grad_norm": 1.0763219594955444, "learning_rate": 2.079966482018172e-05, "loss": 0.0866, "step": 17809 }, { "epoch": 0.3924485062828119, "grad_norm": 0.7456347942352295, "learning_rate": 2.0798677526880373e-05, "loss": 0.0879, "step": 17810 }, { "epoch": 0.3924705415723281, "grad_norm": 0.7310542464256287, "learning_rate": 2.0797690204043252e-05, "loss": 0.1227, "step": 17811 }, { "epoch": 0.39249257686184424, "grad_norm": 0.6451762318611145, "learning_rate": 2.0796702851675388e-05, "loss": 0.0652, "step": 17812 }, { "epoch": 0.3925146121513604, "grad_norm": 0.8673369288444519, "learning_rate": 2.079571546978181e-05, "loss": 0.0918, "step": 17813 }, { "epoch": 0.3925366474408766, "grad_norm": 0.9633846879005432, "learning_rate": 2.079472805836754e-05, "loss": 0.1048, "step": 17814 }, { "epoch": 0.39255868273039274, "grad_norm": 0.781030535697937, "learning_rate": 2.0793740617437616e-05, "loss": 0.0892, "step": 17815 }, { "epoch": 0.3925807180199089, "grad_norm": 0.6016314625740051, "learning_rate": 2.079275314699706e-05, "loss": 0.0892, "step": 17816 }, { "epoch": 0.39260275330942507, "grad_norm": 0.7844539284706116, "learning_rate": 2.079176564705091e-05, "loss": 0.0876, "step": 17817 }, { "epoch": 0.3926247885989412, "grad_norm": 1.121001958847046, "learning_rate": 2.0790778117604193e-05, "loss": 0.0559, "step": 17818 }, { "epoch": 0.39264682388845734, "grad_norm": 0.654716432094574, "learning_rate": 2.0789790558661937e-05, "loss": 0.0843, "step": 17819 }, { "epoch": 0.3926688591779735, "grad_norm": 0.7252532243728638, "learning_rate": 2.0788802970229172e-05, "loss": 0.0604, "step": 17820 }, { "epoch": 0.39269089446748967, "grad_norm": 0.9544902443885803, "learning_rate": 2.0787815352310926e-05, "loss": 0.1095, "step": 17821 }, { "epoch": 0.39271292975700584, "grad_norm": 0.30436745285987854, "learning_rate": 2.0786827704912233e-05, "loss": 0.0603, "step": 17822 }, { "epoch": 0.392734965046522, "grad_norm": 0.9687546491622925, "learning_rate": 2.078584002803813e-05, "loss": 0.0998, "step": 17823 }, { "epoch": 0.39275700033603816, "grad_norm": 0.36605632305145264, "learning_rate": 2.0784852321693634e-05, "loss": 0.0614, "step": 17824 }, { "epoch": 0.39277903562555433, "grad_norm": 0.9309653639793396, "learning_rate": 2.0783864585883786e-05, "loss": 0.0908, "step": 17825 }, { "epoch": 0.3928010709150705, "grad_norm": 0.3280067443847656, "learning_rate": 2.0782876820613615e-05, "loss": 0.0885, "step": 17826 }, { "epoch": 0.39282310620458666, "grad_norm": 0.783263087272644, "learning_rate": 2.078188902588815e-05, "loss": 0.0801, "step": 17827 }, { "epoch": 0.3928451414941028, "grad_norm": 0.3444576859474182, "learning_rate": 2.0780901201712425e-05, "loss": 0.0766, "step": 17828 }, { "epoch": 0.392867176783619, "grad_norm": 0.8523809313774109, "learning_rate": 2.077991334809147e-05, "loss": 0.0679, "step": 17829 }, { "epoch": 0.39288921207313515, "grad_norm": 0.756597101688385, "learning_rate": 2.0778925465030314e-05, "loss": 0.0754, "step": 17830 }, { "epoch": 0.39291124736265126, "grad_norm": 0.6135099530220032, "learning_rate": 2.0777937552533992e-05, "loss": 0.0622, "step": 17831 }, { "epoch": 0.3929332826521674, "grad_norm": 0.46763503551483154, "learning_rate": 2.0776949610607537e-05, "loss": 0.0613, "step": 17832 }, { "epoch": 0.3929553179416836, "grad_norm": 0.8618776202201843, "learning_rate": 2.077596163925598e-05, "loss": 0.1311, "step": 17833 }, { "epoch": 0.39297735323119976, "grad_norm": 0.5788809657096863, "learning_rate": 2.077497363848436e-05, "loss": 0.0818, "step": 17834 }, { "epoch": 0.3929993885207159, "grad_norm": 1.0521316528320312, "learning_rate": 2.077398560829769e-05, "loss": 0.1218, "step": 17835 }, { "epoch": 0.3930214238102321, "grad_norm": 0.863990843296051, "learning_rate": 2.0772997548701022e-05, "loss": 0.0995, "step": 17836 }, { "epoch": 0.39304345909974825, "grad_norm": 0.7947934865951538, "learning_rate": 2.077200945969938e-05, "loss": 0.1239, "step": 17837 }, { "epoch": 0.3930654943892644, "grad_norm": 0.6562746167182922, "learning_rate": 2.07710213412978e-05, "loss": 0.1383, "step": 17838 }, { "epoch": 0.3930875296787806, "grad_norm": 0.5855689644813538, "learning_rate": 2.0770033193501312e-05, "loss": 0.0627, "step": 17839 }, { "epoch": 0.39310956496829674, "grad_norm": 0.615980863571167, "learning_rate": 2.0769045016314953e-05, "loss": 0.0884, "step": 17840 }, { "epoch": 0.3931316002578129, "grad_norm": 1.095422625541687, "learning_rate": 2.0768056809743753e-05, "loss": 0.0862, "step": 17841 }, { "epoch": 0.3931536355473291, "grad_norm": 0.8068232536315918, "learning_rate": 2.0767068573792748e-05, "loss": 0.0933, "step": 17842 }, { "epoch": 0.3931756708368452, "grad_norm": 0.5949768424034119, "learning_rate": 2.076608030846697e-05, "loss": 0.0868, "step": 17843 }, { "epoch": 0.39319770612636135, "grad_norm": 0.8087986707687378, "learning_rate": 2.076509201377145e-05, "loss": 0.0851, "step": 17844 }, { "epoch": 0.3932197414158775, "grad_norm": 0.824069082736969, "learning_rate": 2.0764103689711222e-05, "loss": 0.0856, "step": 17845 }, { "epoch": 0.3932417767053937, "grad_norm": 0.9113011956214905, "learning_rate": 2.076311533629133e-05, "loss": 0.0816, "step": 17846 }, { "epoch": 0.39326381199490984, "grad_norm": 0.6095337271690369, "learning_rate": 2.0762126953516796e-05, "loss": 0.0666, "step": 17847 }, { "epoch": 0.393285847284426, "grad_norm": 0.9665021896362305, "learning_rate": 2.0761138541392663e-05, "loss": 0.1091, "step": 17848 }, { "epoch": 0.39330788257394217, "grad_norm": 0.8461431264877319, "learning_rate": 2.0760150099923967e-05, "loss": 0.0641, "step": 17849 }, { "epoch": 0.39332991786345833, "grad_norm": 0.7424440979957581, "learning_rate": 2.0759161629115727e-05, "loss": 0.079, "step": 17850 }, { "epoch": 0.3933519531529745, "grad_norm": 0.662411630153656, "learning_rate": 2.0758173128972997e-05, "loss": 0.0866, "step": 17851 }, { "epoch": 0.39337398844249066, "grad_norm": 0.612541913986206, "learning_rate": 2.0757184599500797e-05, "loss": 0.1203, "step": 17852 }, { "epoch": 0.3933960237320068, "grad_norm": 0.6932822465896606, "learning_rate": 2.0756196040704174e-05, "loss": 0.0788, "step": 17853 }, { "epoch": 0.393418059021523, "grad_norm": 0.47093841433525085, "learning_rate": 2.0755207452588152e-05, "loss": 0.0691, "step": 17854 }, { "epoch": 0.39344009431103916, "grad_norm": 0.6630224585533142, "learning_rate": 2.0754218835157774e-05, "loss": 0.0767, "step": 17855 }, { "epoch": 0.39346212960055527, "grad_norm": 0.4315376877784729, "learning_rate": 2.075323018841808e-05, "loss": 0.0386, "step": 17856 }, { "epoch": 0.39348416489007143, "grad_norm": 0.7771088480949402, "learning_rate": 2.0752241512374092e-05, "loss": 0.0945, "step": 17857 }, { "epoch": 0.3935062001795876, "grad_norm": 0.361556738615036, "learning_rate": 2.0751252807030855e-05, "loss": 0.0683, "step": 17858 }, { "epoch": 0.39352823546910376, "grad_norm": 0.9539405107498169, "learning_rate": 2.0750264072393407e-05, "loss": 0.1339, "step": 17859 }, { "epoch": 0.3935502707586199, "grad_norm": 1.0302553176879883, "learning_rate": 2.0749275308466776e-05, "loss": 0.0814, "step": 17860 }, { "epoch": 0.3935723060481361, "grad_norm": 0.574461042881012, "learning_rate": 2.0748286515256008e-05, "loss": 0.0873, "step": 17861 }, { "epoch": 0.39359434133765225, "grad_norm": 0.48154330253601074, "learning_rate": 2.074729769276613e-05, "loss": 0.0848, "step": 17862 }, { "epoch": 0.3936163766271684, "grad_norm": 0.683491587638855, "learning_rate": 2.0746308841002185e-05, "loss": 0.0927, "step": 17863 }, { "epoch": 0.3936384119166846, "grad_norm": 0.9926062226295471, "learning_rate": 2.0745319959969207e-05, "loss": 0.112, "step": 17864 }, { "epoch": 0.39366044720620075, "grad_norm": 0.7064897418022156, "learning_rate": 2.0744331049672237e-05, "loss": 0.0775, "step": 17865 }, { "epoch": 0.3936824824957169, "grad_norm": 0.5163894295692444, "learning_rate": 2.0743342110116302e-05, "loss": 0.0791, "step": 17866 }, { "epoch": 0.3937045177852331, "grad_norm": 0.9800403118133545, "learning_rate": 2.0742353141306452e-05, "loss": 0.0971, "step": 17867 }, { "epoch": 0.3937265530747492, "grad_norm": 0.563092052936554, "learning_rate": 2.074136414324771e-05, "loss": 0.0901, "step": 17868 }, { "epoch": 0.39374858836426535, "grad_norm": 0.9073442816734314, "learning_rate": 2.0740375115945126e-05, "loss": 0.082, "step": 17869 }, { "epoch": 0.3937706236537815, "grad_norm": 0.6554228067398071, "learning_rate": 2.0739386059403734e-05, "loss": 0.0692, "step": 17870 }, { "epoch": 0.3937926589432977, "grad_norm": 0.606513500213623, "learning_rate": 2.0738396973628574e-05, "loss": 0.0638, "step": 17871 }, { "epoch": 0.39381469423281384, "grad_norm": 0.661448061466217, "learning_rate": 2.0737407858624676e-05, "loss": 0.0901, "step": 17872 }, { "epoch": 0.39383672952233, "grad_norm": 0.9277927875518799, "learning_rate": 2.0736418714397087e-05, "loss": 0.096, "step": 17873 }, { "epoch": 0.3938587648118462, "grad_norm": 0.6852157711982727, "learning_rate": 2.073542954095084e-05, "loss": 0.1243, "step": 17874 }, { "epoch": 0.39388080010136234, "grad_norm": 0.5910547971725464, "learning_rate": 2.073444033829097e-05, "loss": 0.0736, "step": 17875 }, { "epoch": 0.3939028353908785, "grad_norm": 0.7035260796546936, "learning_rate": 2.0733451106422527e-05, "loss": 0.0793, "step": 17876 }, { "epoch": 0.39392487068039467, "grad_norm": 0.7375607490539551, "learning_rate": 2.073246184535054e-05, "loss": 0.0731, "step": 17877 }, { "epoch": 0.39394690596991083, "grad_norm": 0.664344310760498, "learning_rate": 2.0731472555080053e-05, "loss": 0.0682, "step": 17878 }, { "epoch": 0.393968941259427, "grad_norm": 0.6135240793228149, "learning_rate": 2.0730483235616096e-05, "loss": 0.0795, "step": 17879 }, { "epoch": 0.3939909765489431, "grad_norm": 0.732519268989563, "learning_rate": 2.0729493886963727e-05, "loss": 0.0738, "step": 17880 }, { "epoch": 0.39401301183845927, "grad_norm": 0.978336751461029, "learning_rate": 2.0728504509127968e-05, "loss": 0.0679, "step": 17881 }, { "epoch": 0.39403504712797544, "grad_norm": 0.4615446627140045, "learning_rate": 2.0727515102113863e-05, "loss": 0.0967, "step": 17882 }, { "epoch": 0.3940570824174916, "grad_norm": 0.7685093879699707, "learning_rate": 2.0726525665926448e-05, "loss": 0.054, "step": 17883 }, { "epoch": 0.39407911770700776, "grad_norm": 0.5037212371826172, "learning_rate": 2.072553620057077e-05, "loss": 0.0949, "step": 17884 }, { "epoch": 0.39410115299652393, "grad_norm": 1.1862794160842896, "learning_rate": 2.0724546706051866e-05, "loss": 0.0886, "step": 17885 }, { "epoch": 0.3941231882860401, "grad_norm": 0.5874757170677185, "learning_rate": 2.0723557182374777e-05, "loss": 0.0609, "step": 17886 }, { "epoch": 0.39414522357555626, "grad_norm": 0.5289356112480164, "learning_rate": 2.0722567629544536e-05, "loss": 0.0825, "step": 17887 }, { "epoch": 0.3941672588650724, "grad_norm": 1.1105320453643799, "learning_rate": 2.07215780475662e-05, "loss": 0.1056, "step": 17888 }, { "epoch": 0.3941892941545886, "grad_norm": 1.4518418312072754, "learning_rate": 2.0720588436444795e-05, "loss": 0.116, "step": 17889 }, { "epoch": 0.39421132944410475, "grad_norm": 0.6848915219306946, "learning_rate": 2.0719598796185365e-05, "loss": 0.104, "step": 17890 }, { "epoch": 0.3942333647336209, "grad_norm": 0.911122739315033, "learning_rate": 2.0718609126792948e-05, "loss": 0.0879, "step": 17891 }, { "epoch": 0.3942554000231371, "grad_norm": 0.42866548895835876, "learning_rate": 2.0717619428272592e-05, "loss": 0.0788, "step": 17892 }, { "epoch": 0.3942774353126532, "grad_norm": 0.31991294026374817, "learning_rate": 2.071662970062933e-05, "loss": 0.065, "step": 17893 }, { "epoch": 0.39429947060216936, "grad_norm": 0.4348328411579132, "learning_rate": 2.0715639943868212e-05, "loss": 0.0857, "step": 17894 }, { "epoch": 0.3943215058916855, "grad_norm": 1.1196762323379517, "learning_rate": 2.071465015799427e-05, "loss": 0.0885, "step": 17895 }, { "epoch": 0.3943435411812017, "grad_norm": 0.5019680857658386, "learning_rate": 2.0713660343012553e-05, "loss": 0.0899, "step": 17896 }, { "epoch": 0.39436557647071785, "grad_norm": 0.8294033408164978, "learning_rate": 2.0712670498928102e-05, "loss": 0.0977, "step": 17897 }, { "epoch": 0.394387611760234, "grad_norm": 0.7759199738502502, "learning_rate": 2.0711680625745955e-05, "loss": 0.0791, "step": 17898 }, { "epoch": 0.3944096470497502, "grad_norm": 0.6336545348167419, "learning_rate": 2.0710690723471153e-05, "loss": 0.1001, "step": 17899 }, { "epoch": 0.39443168233926634, "grad_norm": 0.4959147274494171, "learning_rate": 2.0709700792108745e-05, "loss": 0.0942, "step": 17900 }, { "epoch": 0.3944537176287825, "grad_norm": 0.7358671426773071, "learning_rate": 2.070871083166377e-05, "loss": 0.0934, "step": 17901 }, { "epoch": 0.39447575291829867, "grad_norm": 0.6721664071083069, "learning_rate": 2.0707720842141264e-05, "loss": 0.0973, "step": 17902 }, { "epoch": 0.39449778820781484, "grad_norm": 0.761971652507782, "learning_rate": 2.0706730823546277e-05, "loss": 0.1036, "step": 17903 }, { "epoch": 0.394519823497331, "grad_norm": 0.7594249248504639, "learning_rate": 2.0705740775883853e-05, "loss": 0.0648, "step": 17904 }, { "epoch": 0.3945418587868471, "grad_norm": 0.5330234169960022, "learning_rate": 2.070475069915903e-05, "loss": 0.0682, "step": 17905 }, { "epoch": 0.3945638940763633, "grad_norm": 0.8576955795288086, "learning_rate": 2.070376059337685e-05, "loss": 0.1232, "step": 17906 }, { "epoch": 0.39458592936587944, "grad_norm": 0.7913893461227417, "learning_rate": 2.070277045854236e-05, "loss": 0.1168, "step": 17907 }, { "epoch": 0.3946079646553956, "grad_norm": 0.6877694725990295, "learning_rate": 2.0701780294660605e-05, "loss": 0.0893, "step": 17908 }, { "epoch": 0.39462999994491177, "grad_norm": 0.6301702260971069, "learning_rate": 2.0700790101736622e-05, "loss": 0.1051, "step": 17909 }, { "epoch": 0.39465203523442793, "grad_norm": 0.765156090259552, "learning_rate": 2.069979987977546e-05, "loss": 0.0788, "step": 17910 }, { "epoch": 0.3946740705239441, "grad_norm": 0.6015766263008118, "learning_rate": 2.0698809628782156e-05, "loss": 0.085, "step": 17911 }, { "epoch": 0.39469610581346026, "grad_norm": 0.832149863243103, "learning_rate": 2.0697819348761763e-05, "loss": 0.0858, "step": 17912 }, { "epoch": 0.3947181411029764, "grad_norm": 0.5134787559509277, "learning_rate": 2.0696829039719324e-05, "loss": 0.0731, "step": 17913 }, { "epoch": 0.3947401763924926, "grad_norm": 0.7983025312423706, "learning_rate": 2.0695838701659874e-05, "loss": 0.0948, "step": 17914 }, { "epoch": 0.39476221168200876, "grad_norm": 0.7031692862510681, "learning_rate": 2.069484833458846e-05, "loss": 0.0833, "step": 17915 }, { "epoch": 0.3947842469715249, "grad_norm": 0.5137563347816467, "learning_rate": 2.0693857938510138e-05, "loss": 0.0735, "step": 17916 }, { "epoch": 0.39480628226104103, "grad_norm": 0.9995980858802795, "learning_rate": 2.069286751342994e-05, "loss": 0.0951, "step": 17917 }, { "epoch": 0.3948283175505572, "grad_norm": 0.3871174454689026, "learning_rate": 2.0691877059352918e-05, "loss": 0.0805, "step": 17918 }, { "epoch": 0.39485035284007336, "grad_norm": 0.7345985770225525, "learning_rate": 2.069088657628411e-05, "loss": 0.0743, "step": 17919 }, { "epoch": 0.3948723881295895, "grad_norm": 0.6885915398597717, "learning_rate": 2.0689896064228566e-05, "loss": 0.0667, "step": 17920 }, { "epoch": 0.3948944234191057, "grad_norm": 0.5644320845603943, "learning_rate": 2.068890552319133e-05, "loss": 0.1069, "step": 17921 }, { "epoch": 0.39491645870862185, "grad_norm": 1.0142911672592163, "learning_rate": 2.068791495317745e-05, "loss": 0.0638, "step": 17922 }, { "epoch": 0.394938493998138, "grad_norm": 1.0624550580978394, "learning_rate": 2.0686924354191966e-05, "loss": 0.0784, "step": 17923 }, { "epoch": 0.3949605292876542, "grad_norm": 0.9821957349777222, "learning_rate": 2.068593372623993e-05, "loss": 0.1118, "step": 17924 }, { "epoch": 0.39498256457717035, "grad_norm": 0.4684270918369293, "learning_rate": 2.0684943069326377e-05, "loss": 0.0814, "step": 17925 }, { "epoch": 0.3950045998666865, "grad_norm": 1.8881423473358154, "learning_rate": 2.0683952383456368e-05, "loss": 0.0595, "step": 17926 }, { "epoch": 0.3950266351562027, "grad_norm": 0.7139308452606201, "learning_rate": 2.068296166863494e-05, "loss": 0.1074, "step": 17927 }, { "epoch": 0.39504867044571884, "grad_norm": 0.9942402839660645, "learning_rate": 2.0681970924867144e-05, "loss": 0.1338, "step": 17928 }, { "epoch": 0.395070705735235, "grad_norm": 1.12400221824646, "learning_rate": 2.0680980152158016e-05, "loss": 0.1171, "step": 17929 }, { "epoch": 0.3950927410247511, "grad_norm": 0.8303423523902893, "learning_rate": 2.0679989350512617e-05, "loss": 0.0675, "step": 17930 }, { "epoch": 0.3951147763142673, "grad_norm": 0.7238925695419312, "learning_rate": 2.067899851993598e-05, "loss": 0.0991, "step": 17931 }, { "epoch": 0.39513681160378344, "grad_norm": 0.7291034460067749, "learning_rate": 2.067800766043316e-05, "loss": 0.093, "step": 17932 }, { "epoch": 0.3951588468932996, "grad_norm": 0.7526109218597412, "learning_rate": 2.0677016772009202e-05, "loss": 0.1209, "step": 17933 }, { "epoch": 0.3951808821828158, "grad_norm": 0.5525645017623901, "learning_rate": 2.0676025854669158e-05, "loss": 0.093, "step": 17934 }, { "epoch": 0.39520291747233194, "grad_norm": 0.725404679775238, "learning_rate": 2.0675034908418066e-05, "loss": 0.0906, "step": 17935 }, { "epoch": 0.3952249527618481, "grad_norm": 0.7808708548545837, "learning_rate": 2.067404393326098e-05, "loss": 0.084, "step": 17936 }, { "epoch": 0.39524698805136427, "grad_norm": 0.5265064239501953, "learning_rate": 2.067305292920294e-05, "loss": 0.0975, "step": 17937 }, { "epoch": 0.39526902334088043, "grad_norm": 0.9325913786888123, "learning_rate": 2.067206189624901e-05, "loss": 0.1053, "step": 17938 }, { "epoch": 0.3952910586303966, "grad_norm": 0.8051109313964844, "learning_rate": 2.067107083440422e-05, "loss": 0.074, "step": 17939 }, { "epoch": 0.39531309391991276, "grad_norm": 0.7624151110649109, "learning_rate": 2.0670079743673623e-05, "loss": 0.0825, "step": 17940 }, { "epoch": 0.3953351292094289, "grad_norm": 0.7052751779556274, "learning_rate": 2.066908862406227e-05, "loss": 0.1139, "step": 17941 }, { "epoch": 0.39535716449894504, "grad_norm": 1.126129150390625, "learning_rate": 2.0668097475575215e-05, "loss": 0.0702, "step": 17942 }, { "epoch": 0.3953791997884612, "grad_norm": 0.5817402005195618, "learning_rate": 2.0667106298217496e-05, "loss": 0.0843, "step": 17943 }, { "epoch": 0.39540123507797736, "grad_norm": 0.9196232557296753, "learning_rate": 2.0666115091994164e-05, "loss": 0.0908, "step": 17944 }, { "epoch": 0.39542327036749353, "grad_norm": 0.28227537870407104, "learning_rate": 2.0665123856910276e-05, "loss": 0.0645, "step": 17945 }, { "epoch": 0.3954453056570097, "grad_norm": 0.6073575019836426, "learning_rate": 2.0664132592970865e-05, "loss": 0.081, "step": 17946 }, { "epoch": 0.39546734094652586, "grad_norm": 0.698013424873352, "learning_rate": 2.0663141300180996e-05, "loss": 0.0867, "step": 17947 }, { "epoch": 0.395489376236042, "grad_norm": 0.4821208119392395, "learning_rate": 2.0662149978545706e-05, "loss": 0.063, "step": 17948 }, { "epoch": 0.3955114115255582, "grad_norm": 0.5797393321990967, "learning_rate": 2.0661158628070055e-05, "loss": 0.1049, "step": 17949 }, { "epoch": 0.39553344681507435, "grad_norm": 0.7108669877052307, "learning_rate": 2.0660167248759084e-05, "loss": 0.0856, "step": 17950 }, { "epoch": 0.3955554821045905, "grad_norm": 0.5352692604064941, "learning_rate": 2.065917584061785e-05, "loss": 0.074, "step": 17951 }, { "epoch": 0.3955775173941067, "grad_norm": 0.6714522838592529, "learning_rate": 2.0658184403651397e-05, "loss": 0.0859, "step": 17952 }, { "epoch": 0.39559955268362285, "grad_norm": 0.6174001097679138, "learning_rate": 2.0657192937864777e-05, "loss": 0.0886, "step": 17953 }, { "epoch": 0.39562158797313896, "grad_norm": 0.9174215793609619, "learning_rate": 2.0656201443263038e-05, "loss": 0.1057, "step": 17954 }, { "epoch": 0.3956436232626551, "grad_norm": 1.1042982339859009, "learning_rate": 2.065520991985123e-05, "loss": 0.1041, "step": 17955 }, { "epoch": 0.3956656585521713, "grad_norm": 0.5695433020591736, "learning_rate": 2.0654218367634412e-05, "loss": 0.0905, "step": 17956 }, { "epoch": 0.39568769384168745, "grad_norm": 1.3093563318252563, "learning_rate": 2.0653226786617623e-05, "loss": 0.1, "step": 17957 }, { "epoch": 0.3957097291312036, "grad_norm": 0.832331657409668, "learning_rate": 2.0652235176805918e-05, "loss": 0.0715, "step": 17958 }, { "epoch": 0.3957317644207198, "grad_norm": 0.6743043661117554, "learning_rate": 2.065124353820435e-05, "loss": 0.073, "step": 17959 }, { "epoch": 0.39575379971023594, "grad_norm": 0.7537397742271423, "learning_rate": 2.0650251870817968e-05, "loss": 0.1014, "step": 17960 }, { "epoch": 0.3957758349997521, "grad_norm": 0.9511464238166809, "learning_rate": 2.0649260174651827e-05, "loss": 0.1311, "step": 17961 }, { "epoch": 0.39579787028926827, "grad_norm": 0.5978109836578369, "learning_rate": 2.064826844971097e-05, "loss": 0.1091, "step": 17962 }, { "epoch": 0.39581990557878444, "grad_norm": 0.9647929668426514, "learning_rate": 2.064727669600045e-05, "loss": 0.1168, "step": 17963 }, { "epoch": 0.3958419408683006, "grad_norm": 0.22543799877166748, "learning_rate": 2.0646284913525327e-05, "loss": 0.071, "step": 17964 }, { "epoch": 0.39586397615781677, "grad_norm": 0.6804612874984741, "learning_rate": 2.0645293102290644e-05, "loss": 0.0638, "step": 17965 }, { "epoch": 0.39588601144733293, "grad_norm": 0.6486192941665649, "learning_rate": 2.0644301262301456e-05, "loss": 0.1095, "step": 17966 }, { "epoch": 0.39590804673684904, "grad_norm": 0.789120078086853, "learning_rate": 2.064330939356282e-05, "loss": 0.0982, "step": 17967 }, { "epoch": 0.3959300820263652, "grad_norm": 0.4930160641670227, "learning_rate": 2.0642317496079777e-05, "loss": 0.0708, "step": 17968 }, { "epoch": 0.39595211731588137, "grad_norm": 0.6225764751434326, "learning_rate": 2.064132556985739e-05, "loss": 0.0851, "step": 17969 }, { "epoch": 0.39597415260539753, "grad_norm": 0.7905788421630859, "learning_rate": 2.0640333614900705e-05, "loss": 0.0747, "step": 17970 }, { "epoch": 0.3959961878949137, "grad_norm": 0.5074189305305481, "learning_rate": 2.0639341631214773e-05, "loss": 0.118, "step": 17971 }, { "epoch": 0.39601822318442986, "grad_norm": 0.8388957977294922, "learning_rate": 2.0638349618804652e-05, "loss": 0.1043, "step": 17972 }, { "epoch": 0.396040258473946, "grad_norm": 0.6725770235061646, "learning_rate": 2.0637357577675395e-05, "loss": 0.1043, "step": 17973 }, { "epoch": 0.3960622937634622, "grad_norm": 0.4912108778953552, "learning_rate": 2.0636365507832053e-05, "loss": 0.0724, "step": 17974 }, { "epoch": 0.39608432905297836, "grad_norm": 0.7066033482551575, "learning_rate": 2.0635373409279678e-05, "loss": 0.0832, "step": 17975 }, { "epoch": 0.3961063643424945, "grad_norm": 0.7745345234870911, "learning_rate": 2.0634381282023328e-05, "loss": 0.1092, "step": 17976 }, { "epoch": 0.3961283996320107, "grad_norm": 0.9837652444839478, "learning_rate": 2.0633389126068045e-05, "loss": 0.0905, "step": 17977 }, { "epoch": 0.39615043492152685, "grad_norm": 0.5806244015693665, "learning_rate": 2.0632396941418894e-05, "loss": 0.0725, "step": 17978 }, { "epoch": 0.39617247021104296, "grad_norm": 0.6067631840705872, "learning_rate": 2.0631404728080924e-05, "loss": 0.0687, "step": 17979 }, { "epoch": 0.3961945055005591, "grad_norm": 0.4594058692455292, "learning_rate": 2.063041248605919e-05, "loss": 0.0506, "step": 17980 }, { "epoch": 0.3962165407900753, "grad_norm": 0.8813983201980591, "learning_rate": 2.0629420215358748e-05, "loss": 0.0764, "step": 17981 }, { "epoch": 0.39623857607959145, "grad_norm": 0.629213809967041, "learning_rate": 2.0628427915984646e-05, "loss": 0.0583, "step": 17982 }, { "epoch": 0.3962606113691076, "grad_norm": 0.7517106533050537, "learning_rate": 2.0627435587941948e-05, "loss": 0.1252, "step": 17983 }, { "epoch": 0.3962826466586238, "grad_norm": 0.8217512965202332, "learning_rate": 2.0626443231235702e-05, "loss": 0.0869, "step": 17984 }, { "epoch": 0.39630468194813995, "grad_norm": 0.7366949319839478, "learning_rate": 2.062545084587096e-05, "loss": 0.0751, "step": 17985 }, { "epoch": 0.3963267172376561, "grad_norm": 0.8128681182861328, "learning_rate": 2.062445843185278e-05, "loss": 0.0771, "step": 17986 }, { "epoch": 0.3963487525271723, "grad_norm": 1.3452436923980713, "learning_rate": 2.062346598918622e-05, "loss": 0.0768, "step": 17987 }, { "epoch": 0.39637078781668844, "grad_norm": 0.7064870595932007, "learning_rate": 2.062247351787633e-05, "loss": 0.0932, "step": 17988 }, { "epoch": 0.3963928231062046, "grad_norm": 0.48225146532058716, "learning_rate": 2.062148101792817e-05, "loss": 0.0967, "step": 17989 }, { "epoch": 0.39641485839572077, "grad_norm": 1.3613686561584473, "learning_rate": 2.0620488489346787e-05, "loss": 0.0976, "step": 17990 }, { "epoch": 0.3964368936852369, "grad_norm": 0.7261428236961365, "learning_rate": 2.0619495932137252e-05, "loss": 0.1028, "step": 17991 }, { "epoch": 0.39645892897475304, "grad_norm": 0.5472456812858582, "learning_rate": 2.0618503346304603e-05, "loss": 0.1129, "step": 17992 }, { "epoch": 0.3964809642642692, "grad_norm": 0.7888270020484924, "learning_rate": 2.061751073185391e-05, "loss": 0.0759, "step": 17993 }, { "epoch": 0.3965029995537854, "grad_norm": 0.9031119346618652, "learning_rate": 2.0616518088790213e-05, "loss": 0.125, "step": 17994 }, { "epoch": 0.39652503484330154, "grad_norm": 0.7995522618293762, "learning_rate": 2.0615525417118588e-05, "loss": 0.1074, "step": 17995 }, { "epoch": 0.3965470701328177, "grad_norm": 0.7628425359725952, "learning_rate": 2.0614532716844073e-05, "loss": 0.1291, "step": 17996 }, { "epoch": 0.39656910542233387, "grad_norm": 0.683907687664032, "learning_rate": 2.0613539987971737e-05, "loss": 0.0787, "step": 17997 }, { "epoch": 0.39659114071185003, "grad_norm": 0.7649326920509338, "learning_rate": 2.0612547230506625e-05, "loss": 0.0949, "step": 17998 }, { "epoch": 0.3966131760013662, "grad_norm": 0.630071759223938, "learning_rate": 2.061155444445381e-05, "loss": 0.113, "step": 17999 }, { "epoch": 0.39663521129088236, "grad_norm": 0.6420456767082214, "learning_rate": 2.061056162981833e-05, "loss": 0.0677, "step": 18000 }, { "epoch": 0.3966572465803985, "grad_norm": 0.6613651514053345, "learning_rate": 2.0609568786605257e-05, "loss": 0.1082, "step": 18001 }, { "epoch": 0.3966792818699147, "grad_norm": 1.9371916055679321, "learning_rate": 2.0608575914819638e-05, "loss": 0.058, "step": 18002 }, { "epoch": 0.39670131715943086, "grad_norm": 0.6485080718994141, "learning_rate": 2.0607583014466536e-05, "loss": 0.0756, "step": 18003 }, { "epoch": 0.39672335244894696, "grad_norm": 0.9652023911476135, "learning_rate": 2.0606590085551005e-05, "loss": 0.0837, "step": 18004 }, { "epoch": 0.39674538773846313, "grad_norm": 0.8277058601379395, "learning_rate": 2.060559712807811e-05, "loss": 0.0928, "step": 18005 }, { "epoch": 0.3967674230279793, "grad_norm": 0.540052056312561, "learning_rate": 2.0604604142052894e-05, "loss": 0.0955, "step": 18006 }, { "epoch": 0.39678945831749546, "grad_norm": 0.8277685046195984, "learning_rate": 2.060361112748043e-05, "loss": 0.0925, "step": 18007 }, { "epoch": 0.3968114936070116, "grad_norm": 1.1936770677566528, "learning_rate": 2.0602618084365773e-05, "loss": 0.0938, "step": 18008 }, { "epoch": 0.3968335288965278, "grad_norm": 0.5418371558189392, "learning_rate": 2.060162501271397e-05, "loss": 0.089, "step": 18009 }, { "epoch": 0.39685556418604395, "grad_norm": 0.799851655960083, "learning_rate": 2.0600631912530094e-05, "loss": 0.0756, "step": 18010 }, { "epoch": 0.3968775994755601, "grad_norm": 0.6691021919250488, "learning_rate": 2.0599638783819186e-05, "loss": 0.0984, "step": 18011 }, { "epoch": 0.3968996347650763, "grad_norm": 0.6480470895767212, "learning_rate": 2.0598645626586324e-05, "loss": 0.0722, "step": 18012 }, { "epoch": 0.39692167005459245, "grad_norm": 0.7734681963920593, "learning_rate": 2.0597652440836556e-05, "loss": 0.108, "step": 18013 }, { "epoch": 0.3969437053441086, "grad_norm": 0.8396872282028198, "learning_rate": 2.059665922657494e-05, "loss": 0.0722, "step": 18014 }, { "epoch": 0.3969657406336248, "grad_norm": 0.9098224639892578, "learning_rate": 2.059566598380654e-05, "loss": 0.0754, "step": 18015 }, { "epoch": 0.3969877759231409, "grad_norm": 0.8550655841827393, "learning_rate": 2.0594672712536412e-05, "loss": 0.0802, "step": 18016 }, { "epoch": 0.39700981121265705, "grad_norm": 0.7547065615653992, "learning_rate": 2.0593679412769613e-05, "loss": 0.108, "step": 18017 }, { "epoch": 0.3970318465021732, "grad_norm": 0.3707602024078369, "learning_rate": 2.0592686084511207e-05, "loss": 0.0899, "step": 18018 }, { "epoch": 0.3970538817916894, "grad_norm": 0.5330619812011719, "learning_rate": 2.059169272776625e-05, "loss": 0.0829, "step": 18019 }, { "epoch": 0.39707591708120554, "grad_norm": 0.704237699508667, "learning_rate": 2.0590699342539805e-05, "loss": 0.1065, "step": 18020 }, { "epoch": 0.3970979523707217, "grad_norm": 0.6600368022918701, "learning_rate": 2.058970592883693e-05, "loss": 0.075, "step": 18021 }, { "epoch": 0.39711998766023787, "grad_norm": 1.2213671207427979, "learning_rate": 2.0588712486662686e-05, "loss": 0.1135, "step": 18022 }, { "epoch": 0.39714202294975404, "grad_norm": 0.8118261098861694, "learning_rate": 2.0587719016022138e-05, "loss": 0.0867, "step": 18023 }, { "epoch": 0.3971640582392702, "grad_norm": 0.9298679232597351, "learning_rate": 2.0586725516920334e-05, "loss": 0.0926, "step": 18024 }, { "epoch": 0.39718609352878637, "grad_norm": 0.42046722769737244, "learning_rate": 2.058573198936234e-05, "loss": 0.082, "step": 18025 }, { "epoch": 0.39720812881830253, "grad_norm": 0.7637796401977539, "learning_rate": 2.058473843335322e-05, "loss": 0.1069, "step": 18026 }, { "epoch": 0.3972301641078187, "grad_norm": 1.0035322904586792, "learning_rate": 2.058374484889803e-05, "loss": 0.0996, "step": 18027 }, { "epoch": 0.3972521993973348, "grad_norm": 0.5603729486465454, "learning_rate": 2.058275123600184e-05, "loss": 0.1084, "step": 18028 }, { "epoch": 0.39727423468685097, "grad_norm": 0.587537944316864, "learning_rate": 2.0581757594669695e-05, "loss": 0.0544, "step": 18029 }, { "epoch": 0.39729626997636713, "grad_norm": 0.9133626818656921, "learning_rate": 2.058076392490667e-05, "loss": 0.087, "step": 18030 }, { "epoch": 0.3973183052658833, "grad_norm": 0.7713159918785095, "learning_rate": 2.0579770226717825e-05, "loss": 0.0728, "step": 18031 }, { "epoch": 0.39734034055539946, "grad_norm": 1.2474088668823242, "learning_rate": 2.0578776500108213e-05, "loss": 0.13, "step": 18032 }, { "epoch": 0.3973623758449156, "grad_norm": 0.5553929805755615, "learning_rate": 2.0577782745082902e-05, "loss": 0.0742, "step": 18033 }, { "epoch": 0.3973844111344318, "grad_norm": 0.5072131156921387, "learning_rate": 2.0576788961646954e-05, "loss": 0.0662, "step": 18034 }, { "epoch": 0.39740644642394796, "grad_norm": 0.5653300881385803, "learning_rate": 2.0575795149805424e-05, "loss": 0.0558, "step": 18035 }, { "epoch": 0.3974284817134641, "grad_norm": 0.49116191267967224, "learning_rate": 2.0574801309563384e-05, "loss": 0.0551, "step": 18036 }, { "epoch": 0.3974505170029803, "grad_norm": 0.44717416167259216, "learning_rate": 2.0573807440925893e-05, "loss": 0.0859, "step": 18037 }, { "epoch": 0.39747255229249645, "grad_norm": 0.7697303891181946, "learning_rate": 2.057281354389801e-05, "loss": 0.1138, "step": 18038 }, { "epoch": 0.3974945875820126, "grad_norm": 0.6857022643089294, "learning_rate": 2.05718196184848e-05, "loss": 0.0907, "step": 18039 }, { "epoch": 0.3975166228715288, "grad_norm": 0.7391141057014465, "learning_rate": 2.0570825664691322e-05, "loss": 0.0836, "step": 18040 }, { "epoch": 0.3975386581610449, "grad_norm": 0.9992356300354004, "learning_rate": 2.0569831682522644e-05, "loss": 0.1581, "step": 18041 }, { "epoch": 0.39756069345056105, "grad_norm": 0.5725916624069214, "learning_rate": 2.0568837671983824e-05, "loss": 0.0769, "step": 18042 }, { "epoch": 0.3975827287400772, "grad_norm": 0.4092424511909485, "learning_rate": 2.056784363307993e-05, "loss": 0.0933, "step": 18043 }, { "epoch": 0.3976047640295934, "grad_norm": 0.8129077553749084, "learning_rate": 2.0566849565816025e-05, "loss": 0.0901, "step": 18044 }, { "epoch": 0.39762679931910955, "grad_norm": 0.43387559056282043, "learning_rate": 2.0565855470197163e-05, "loss": 0.0787, "step": 18045 }, { "epoch": 0.3976488346086257, "grad_norm": 0.7959995269775391, "learning_rate": 2.056486134622842e-05, "loss": 0.1013, "step": 18046 }, { "epoch": 0.3976708698981419, "grad_norm": 0.5442610383033752, "learning_rate": 2.0563867193914852e-05, "loss": 0.1078, "step": 18047 }, { "epoch": 0.39769290518765804, "grad_norm": 0.8296104073524475, "learning_rate": 2.0562873013261526e-05, "loss": 0.0983, "step": 18048 }, { "epoch": 0.3977149404771742, "grad_norm": 0.7282569408416748, "learning_rate": 2.0561878804273505e-05, "loss": 0.1237, "step": 18049 }, { "epoch": 0.39773697576669037, "grad_norm": 0.4705082178115845, "learning_rate": 2.056088456695585e-05, "loss": 0.0476, "step": 18050 }, { "epoch": 0.39775901105620654, "grad_norm": 0.6771228313446045, "learning_rate": 2.055989030131363e-05, "loss": 0.0917, "step": 18051 }, { "epoch": 0.3977810463457227, "grad_norm": 0.8031463623046875, "learning_rate": 2.0558896007351907e-05, "loss": 0.0895, "step": 18052 }, { "epoch": 0.3978030816352388, "grad_norm": 0.4270029366016388, "learning_rate": 2.0557901685075743e-05, "loss": 0.0588, "step": 18053 }, { "epoch": 0.397825116924755, "grad_norm": 0.8466294407844543, "learning_rate": 2.055690733449021e-05, "loss": 0.077, "step": 18054 }, { "epoch": 0.39784715221427114, "grad_norm": 0.9553384184837341, "learning_rate": 2.0555912955600366e-05, "loss": 0.0981, "step": 18055 }, { "epoch": 0.3978691875037873, "grad_norm": 0.871482789516449, "learning_rate": 2.055491854841128e-05, "loss": 0.103, "step": 18056 }, { "epoch": 0.39789122279330347, "grad_norm": 0.9562103748321533, "learning_rate": 2.0553924112928014e-05, "loss": 0.0681, "step": 18057 }, { "epoch": 0.39791325808281963, "grad_norm": 0.6484475135803223, "learning_rate": 2.0552929649155633e-05, "loss": 0.0764, "step": 18058 }, { "epoch": 0.3979352933723358, "grad_norm": 1.2930493354797363, "learning_rate": 2.05519351570992e-05, "loss": 0.1196, "step": 18059 }, { "epoch": 0.39795732866185196, "grad_norm": 0.38257893919944763, "learning_rate": 2.055094063676379e-05, "loss": 0.0565, "step": 18060 }, { "epoch": 0.3979793639513681, "grad_norm": 0.9023085832595825, "learning_rate": 2.054994608815446e-05, "loss": 0.105, "step": 18061 }, { "epoch": 0.3980013992408843, "grad_norm": 0.7913432717323303, "learning_rate": 2.054895151127628e-05, "loss": 0.086, "step": 18062 }, { "epoch": 0.39802343453040046, "grad_norm": 0.6942925453186035, "learning_rate": 2.0547956906134312e-05, "loss": 0.0801, "step": 18063 }, { "epoch": 0.3980454698199166, "grad_norm": 0.8915741443634033, "learning_rate": 2.0546962272733632e-05, "loss": 0.1034, "step": 18064 }, { "epoch": 0.39806750510943273, "grad_norm": 0.49088993668556213, "learning_rate": 2.054596761107929e-05, "loss": 0.053, "step": 18065 }, { "epoch": 0.3980895403989489, "grad_norm": 0.4259903132915497, "learning_rate": 2.0544972921176364e-05, "loss": 0.063, "step": 18066 }, { "epoch": 0.39811157568846506, "grad_norm": 0.5596545934677124, "learning_rate": 2.0543978203029913e-05, "loss": 0.0783, "step": 18067 }, { "epoch": 0.3981336109779812, "grad_norm": 0.8954631686210632, "learning_rate": 2.0542983456645016e-05, "loss": 0.1064, "step": 18068 }, { "epoch": 0.3981556462674974, "grad_norm": 0.5748318433761597, "learning_rate": 2.0541988682026726e-05, "loss": 0.0865, "step": 18069 }, { "epoch": 0.39817768155701355, "grad_norm": 1.6354544162750244, "learning_rate": 2.0540993879180117e-05, "loss": 0.0798, "step": 18070 }, { "epoch": 0.3981997168465297, "grad_norm": 0.8490318655967712, "learning_rate": 2.0539999048110256e-05, "loss": 0.1382, "step": 18071 }, { "epoch": 0.3982217521360459, "grad_norm": 0.9750061631202698, "learning_rate": 2.0539004188822213e-05, "loss": 0.0792, "step": 18072 }, { "epoch": 0.39824378742556205, "grad_norm": 0.7755231857299805, "learning_rate": 2.0538009301321044e-05, "loss": 0.1139, "step": 18073 }, { "epoch": 0.3982658227150782, "grad_norm": 0.6451044082641602, "learning_rate": 2.0537014385611825e-05, "loss": 0.0815, "step": 18074 }, { "epoch": 0.3982878580045944, "grad_norm": 0.938209593296051, "learning_rate": 2.0536019441699628e-05, "loss": 0.1224, "step": 18075 }, { "epoch": 0.39830989329411054, "grad_norm": 0.9845672845840454, "learning_rate": 2.0535024469589507e-05, "loss": 0.0976, "step": 18076 }, { "epoch": 0.3983319285836267, "grad_norm": 0.7172842621803284, "learning_rate": 2.0534029469286546e-05, "loss": 0.0964, "step": 18077 }, { "epoch": 0.3983539638731428, "grad_norm": 0.7316780686378479, "learning_rate": 2.05330344407958e-05, "loss": 0.0817, "step": 18078 }, { "epoch": 0.398375999162659, "grad_norm": 0.9796819686889648, "learning_rate": 2.0532039384122346e-05, "loss": 0.075, "step": 18079 }, { "epoch": 0.39839803445217514, "grad_norm": 0.3780626058578491, "learning_rate": 2.053104429927125e-05, "loss": 0.0889, "step": 18080 }, { "epoch": 0.3984200697416913, "grad_norm": 0.7696143388748169, "learning_rate": 2.053004918624758e-05, "loss": 0.0823, "step": 18081 }, { "epoch": 0.39844210503120747, "grad_norm": 0.4940365254878998, "learning_rate": 2.0529054045056397e-05, "loss": 0.0853, "step": 18082 }, { "epoch": 0.39846414032072364, "grad_norm": 0.8587397336959839, "learning_rate": 2.0528058875702786e-05, "loss": 0.0952, "step": 18083 }, { "epoch": 0.3984861756102398, "grad_norm": 0.9217486381530762, "learning_rate": 2.0527063678191797e-05, "loss": 0.0954, "step": 18084 }, { "epoch": 0.39850821089975597, "grad_norm": 0.4155929386615753, "learning_rate": 2.0526068452528517e-05, "loss": 0.0598, "step": 18085 }, { "epoch": 0.39853024618927213, "grad_norm": 0.9644028544425964, "learning_rate": 2.0525073198718e-05, "loss": 0.0723, "step": 18086 }, { "epoch": 0.3985522814787883, "grad_norm": 0.46742987632751465, "learning_rate": 2.052407791676533e-05, "loss": 0.1245, "step": 18087 }, { "epoch": 0.39857431676830446, "grad_norm": 0.7596082091331482, "learning_rate": 2.0523082606675565e-05, "loss": 0.0754, "step": 18088 }, { "epoch": 0.3985963520578206, "grad_norm": 0.7492856979370117, "learning_rate": 2.052208726845378e-05, "loss": 0.072, "step": 18089 }, { "epoch": 0.39861838734733673, "grad_norm": 0.6847529411315918, "learning_rate": 2.052109190210504e-05, "loss": 0.082, "step": 18090 }, { "epoch": 0.3986404226368529, "grad_norm": 0.7566355466842651, "learning_rate": 2.0520096507634424e-05, "loss": 0.0804, "step": 18091 }, { "epoch": 0.39866245792636906, "grad_norm": 0.8765127658843994, "learning_rate": 2.0519101085046988e-05, "loss": 0.1064, "step": 18092 }, { "epoch": 0.3986844932158852, "grad_norm": 0.7833921909332275, "learning_rate": 2.0518105634347815e-05, "loss": 0.0655, "step": 18093 }, { "epoch": 0.3987065285054014, "grad_norm": 0.5663047432899475, "learning_rate": 2.051711015554197e-05, "loss": 0.0711, "step": 18094 }, { "epoch": 0.39872856379491756, "grad_norm": 0.6679219007492065, "learning_rate": 2.0516114648634527e-05, "loss": 0.0901, "step": 18095 }, { "epoch": 0.3987505990844337, "grad_norm": 0.6862311363220215, "learning_rate": 2.0515119113630556e-05, "loss": 0.0712, "step": 18096 }, { "epoch": 0.3987726343739499, "grad_norm": 0.8453536629676819, "learning_rate": 2.0514123550535122e-05, "loss": 0.0788, "step": 18097 }, { "epoch": 0.39879466966346605, "grad_norm": 0.80100417137146, "learning_rate": 2.05131279593533e-05, "loss": 0.0854, "step": 18098 }, { "epoch": 0.3988167049529822, "grad_norm": 1.0628573894500732, "learning_rate": 2.051213234009016e-05, "loss": 0.1004, "step": 18099 }, { "epoch": 0.3988387402424984, "grad_norm": 0.4295952320098877, "learning_rate": 2.0511136692750778e-05, "loss": 0.0587, "step": 18100 }, { "epoch": 0.39886077553201454, "grad_norm": 0.494720458984375, "learning_rate": 2.051014101734022e-05, "loss": 0.0987, "step": 18101 }, { "epoch": 0.3988828108215307, "grad_norm": 0.6990992426872253, "learning_rate": 2.0509145313863555e-05, "loss": 0.0858, "step": 18102 }, { "epoch": 0.3989048461110468, "grad_norm": 1.1879019737243652, "learning_rate": 2.0508149582325866e-05, "loss": 0.0988, "step": 18103 }, { "epoch": 0.398926881400563, "grad_norm": 0.7265468239784241, "learning_rate": 2.0507153822732213e-05, "loss": 0.0802, "step": 18104 }, { "epoch": 0.39894891669007915, "grad_norm": 0.4617844820022583, "learning_rate": 2.050615803508767e-05, "loss": 0.0613, "step": 18105 }, { "epoch": 0.3989709519795953, "grad_norm": 0.6704620718955994, "learning_rate": 2.0505162219397316e-05, "loss": 0.0859, "step": 18106 }, { "epoch": 0.3989929872691115, "grad_norm": 0.7267357110977173, "learning_rate": 2.0504166375666217e-05, "loss": 0.078, "step": 18107 }, { "epoch": 0.39901502255862764, "grad_norm": 0.6591634154319763, "learning_rate": 2.0503170503899448e-05, "loss": 0.0852, "step": 18108 }, { "epoch": 0.3990370578481438, "grad_norm": 0.6764671206474304, "learning_rate": 2.050217460410208e-05, "loss": 0.0839, "step": 18109 }, { "epoch": 0.39905909313765997, "grad_norm": 0.26314491033554077, "learning_rate": 2.0501178676279185e-05, "loss": 0.0601, "step": 18110 }, { "epoch": 0.39908112842717613, "grad_norm": 0.7668083310127258, "learning_rate": 2.0500182720435838e-05, "loss": 0.1027, "step": 18111 }, { "epoch": 0.3991031637166923, "grad_norm": 0.9151598811149597, "learning_rate": 2.0499186736577114e-05, "loss": 0.0712, "step": 18112 }, { "epoch": 0.39912519900620846, "grad_norm": 0.5629034638404846, "learning_rate": 2.0498190724708077e-05, "loss": 0.0683, "step": 18113 }, { "epoch": 0.39914723429572463, "grad_norm": 0.8173795342445374, "learning_rate": 2.0497194684833813e-05, "loss": 0.0997, "step": 18114 }, { "epoch": 0.39916926958524074, "grad_norm": 0.6191717386245728, "learning_rate": 2.049619861695938e-05, "loss": 0.1173, "step": 18115 }, { "epoch": 0.3991913048747569, "grad_norm": 0.5965124368667603, "learning_rate": 2.0495202521089867e-05, "loss": 0.0751, "step": 18116 }, { "epoch": 0.39921334016427307, "grad_norm": 0.9477732181549072, "learning_rate": 2.049420639723034e-05, "loss": 0.0889, "step": 18117 }, { "epoch": 0.39923537545378923, "grad_norm": 0.9700037240982056, "learning_rate": 2.049321024538587e-05, "loss": 0.1196, "step": 18118 }, { "epoch": 0.3992574107433054, "grad_norm": 0.9912576675415039, "learning_rate": 2.0492214065561538e-05, "loss": 0.1116, "step": 18119 }, { "epoch": 0.39927944603282156, "grad_norm": 0.9319270253181458, "learning_rate": 2.0491217857762413e-05, "loss": 0.1078, "step": 18120 }, { "epoch": 0.3993014813223377, "grad_norm": 0.7157377600669861, "learning_rate": 2.0490221621993573e-05, "loss": 0.1013, "step": 18121 }, { "epoch": 0.3993235166118539, "grad_norm": 0.48756155371665955, "learning_rate": 2.048922535826009e-05, "loss": 0.0925, "step": 18122 }, { "epoch": 0.39934555190137005, "grad_norm": 0.6064015626907349, "learning_rate": 2.0488229066567036e-05, "loss": 0.0935, "step": 18123 }, { "epoch": 0.3993675871908862, "grad_norm": 0.8247542977333069, "learning_rate": 2.0487232746919487e-05, "loss": 0.0991, "step": 18124 }, { "epoch": 0.3993896224804024, "grad_norm": 0.5099732279777527, "learning_rate": 2.0486236399322525e-05, "loss": 0.0776, "step": 18125 }, { "epoch": 0.39941165776991855, "grad_norm": 0.5210160613059998, "learning_rate": 2.0485240023781217e-05, "loss": 0.0742, "step": 18126 }, { "epoch": 0.39943369305943466, "grad_norm": 0.7102009057998657, "learning_rate": 2.048424362030064e-05, "loss": 0.0825, "step": 18127 }, { "epoch": 0.3994557283489508, "grad_norm": 0.868808388710022, "learning_rate": 2.0483247188885866e-05, "loss": 0.095, "step": 18128 }, { "epoch": 0.399477763638467, "grad_norm": 0.3998293876647949, "learning_rate": 2.0482250729541978e-05, "loss": 0.0487, "step": 18129 }, { "epoch": 0.39949979892798315, "grad_norm": 0.5094934701919556, "learning_rate": 2.0481254242274043e-05, "loss": 0.0867, "step": 18130 }, { "epoch": 0.3995218342174993, "grad_norm": 0.716947078704834, "learning_rate": 2.0480257727087146e-05, "loss": 0.0791, "step": 18131 }, { "epoch": 0.3995438695070155, "grad_norm": 0.7484541535377502, "learning_rate": 2.0479261183986353e-05, "loss": 0.0946, "step": 18132 }, { "epoch": 0.39956590479653165, "grad_norm": 0.49400776624679565, "learning_rate": 2.0478264612976746e-05, "loss": 0.0963, "step": 18133 }, { "epoch": 0.3995879400860478, "grad_norm": 0.8855500221252441, "learning_rate": 2.0477268014063404e-05, "loss": 0.0908, "step": 18134 }, { "epoch": 0.399609975375564, "grad_norm": 0.70207679271698, "learning_rate": 2.0476271387251395e-05, "loss": 0.1104, "step": 18135 }, { "epoch": 0.39963201066508014, "grad_norm": 0.6012163758277893, "learning_rate": 2.0475274732545798e-05, "loss": 0.0743, "step": 18136 }, { "epoch": 0.3996540459545963, "grad_norm": 0.7296782732009888, "learning_rate": 2.047427804995169e-05, "loss": 0.081, "step": 18137 }, { "epoch": 0.39967608124411247, "grad_norm": 1.27114737033844, "learning_rate": 2.0473281339474146e-05, "loss": 0.111, "step": 18138 }, { "epoch": 0.39969811653362863, "grad_norm": 0.6806459426879883, "learning_rate": 2.0472284601118256e-05, "loss": 0.0653, "step": 18139 }, { "epoch": 0.39972015182314474, "grad_norm": 0.5507895946502686, "learning_rate": 2.0471287834889073e-05, "loss": 0.0538, "step": 18140 }, { "epoch": 0.3997421871126609, "grad_norm": 0.6268736720085144, "learning_rate": 2.0470291040791695e-05, "loss": 0.1037, "step": 18141 }, { "epoch": 0.39976422240217707, "grad_norm": 0.7148224711418152, "learning_rate": 2.046929421883119e-05, "loss": 0.0909, "step": 18142 }, { "epoch": 0.39978625769169324, "grad_norm": 0.8595757484436035, "learning_rate": 2.0468297369012638e-05, "loss": 0.0499, "step": 18143 }, { "epoch": 0.3998082929812094, "grad_norm": 0.587636411190033, "learning_rate": 2.0467300491341115e-05, "loss": 0.087, "step": 18144 }, { "epoch": 0.39983032827072557, "grad_norm": 0.9994818568229675, "learning_rate": 2.0466303585821697e-05, "loss": 0.0887, "step": 18145 }, { "epoch": 0.39985236356024173, "grad_norm": 0.5707700252532959, "learning_rate": 2.0465306652459462e-05, "loss": 0.0997, "step": 18146 }, { "epoch": 0.3998743988497579, "grad_norm": 0.6991782188415527, "learning_rate": 2.0464309691259493e-05, "loss": 0.0868, "step": 18147 }, { "epoch": 0.39989643413927406, "grad_norm": 0.5102025866508484, "learning_rate": 2.0463312702226865e-05, "loss": 0.0916, "step": 18148 }, { "epoch": 0.3999184694287902, "grad_norm": 0.7488088607788086, "learning_rate": 2.046231568536665e-05, "loss": 0.0995, "step": 18149 }, { "epoch": 0.3999405047183064, "grad_norm": 0.5195083618164062, "learning_rate": 2.0461318640683937e-05, "loss": 0.0637, "step": 18150 }, { "epoch": 0.39996254000782255, "grad_norm": 0.9489569067955017, "learning_rate": 2.04603215681838e-05, "loss": 0.1572, "step": 18151 }, { "epoch": 0.39998457529733866, "grad_norm": 0.48432013392448425, "learning_rate": 2.0459324467871318e-05, "loss": 0.0613, "step": 18152 }, { "epoch": 0.4000066105868548, "grad_norm": 0.6314606666564941, "learning_rate": 2.0458327339751563e-05, "loss": 0.1086, "step": 18153 }, { "epoch": 0.400028645876371, "grad_norm": 0.7272760272026062, "learning_rate": 2.0457330183829626e-05, "loss": 0.0926, "step": 18154 }, { "epoch": 0.40005068116588716, "grad_norm": 0.8723968863487244, "learning_rate": 2.0456333000110575e-05, "loss": 0.0969, "step": 18155 }, { "epoch": 0.4000727164554033, "grad_norm": 0.5533886551856995, "learning_rate": 2.0455335788599498e-05, "loss": 0.0657, "step": 18156 }, { "epoch": 0.4000947517449195, "grad_norm": 0.40243417024612427, "learning_rate": 2.0454338549301465e-05, "loss": 0.075, "step": 18157 }, { "epoch": 0.40011678703443565, "grad_norm": 0.5196274518966675, "learning_rate": 2.0453341282221568e-05, "loss": 0.0856, "step": 18158 }, { "epoch": 0.4001388223239518, "grad_norm": 0.8769851326942444, "learning_rate": 2.045234398736488e-05, "loss": 0.1057, "step": 18159 }, { "epoch": 0.400160857613468, "grad_norm": 0.5018317699432373, "learning_rate": 2.0451346664736476e-05, "loss": 0.1111, "step": 18160 }, { "epoch": 0.40018289290298414, "grad_norm": 0.5158257484436035, "learning_rate": 2.045034931434144e-05, "loss": 0.0598, "step": 18161 }, { "epoch": 0.4002049281925003, "grad_norm": 0.6403974294662476, "learning_rate": 2.0449351936184853e-05, "loss": 0.0703, "step": 18162 }, { "epoch": 0.4002269634820165, "grad_norm": 1.1812546253204346, "learning_rate": 2.0448354530271793e-05, "loss": 0.1223, "step": 18163 }, { "epoch": 0.4002489987715326, "grad_norm": 0.7517534494400024, "learning_rate": 2.0447357096607346e-05, "loss": 0.0537, "step": 18164 }, { "epoch": 0.40027103406104875, "grad_norm": 1.2584166526794434, "learning_rate": 2.0446359635196583e-05, "loss": 0.0788, "step": 18165 }, { "epoch": 0.4002930693505649, "grad_norm": 0.546114444732666, "learning_rate": 2.0445362146044593e-05, "loss": 0.0781, "step": 18166 }, { "epoch": 0.4003151046400811, "grad_norm": 0.5266912579536438, "learning_rate": 2.0444364629156454e-05, "loss": 0.1035, "step": 18167 }, { "epoch": 0.40033713992959724, "grad_norm": 0.6702527403831482, "learning_rate": 2.0443367084537244e-05, "loss": 0.0867, "step": 18168 }, { "epoch": 0.4003591752191134, "grad_norm": 0.8131382465362549, "learning_rate": 2.0442369512192053e-05, "loss": 0.0748, "step": 18169 }, { "epoch": 0.40038121050862957, "grad_norm": 0.5048098564147949, "learning_rate": 2.0441371912125943e-05, "loss": 0.0802, "step": 18170 }, { "epoch": 0.40040324579814573, "grad_norm": 0.9213371872901917, "learning_rate": 2.0440374284344018e-05, "loss": 0.1141, "step": 18171 }, { "epoch": 0.4004252810876619, "grad_norm": 0.5288414359092712, "learning_rate": 2.043937662885135e-05, "loss": 0.0651, "step": 18172 }, { "epoch": 0.40044731637717806, "grad_norm": 0.769362211227417, "learning_rate": 2.0438378945653014e-05, "loss": 0.0622, "step": 18173 }, { "epoch": 0.40046935166669423, "grad_norm": 0.7578004002571106, "learning_rate": 2.04373812347541e-05, "loss": 0.0829, "step": 18174 }, { "epoch": 0.4004913869562104, "grad_norm": 0.7865351438522339, "learning_rate": 2.043638349615969e-05, "loss": 0.0864, "step": 18175 }, { "epoch": 0.40051342224572656, "grad_norm": 0.40602853894233704, "learning_rate": 2.0435385729874863e-05, "loss": 0.1048, "step": 18176 }, { "epoch": 0.40053545753524267, "grad_norm": 0.7605463862419128, "learning_rate": 2.04343879359047e-05, "loss": 0.0864, "step": 18177 }, { "epoch": 0.40055749282475883, "grad_norm": 0.7163793444633484, "learning_rate": 2.043339011425428e-05, "loss": 0.0778, "step": 18178 }, { "epoch": 0.400579528114275, "grad_norm": 1.4131724834442139, "learning_rate": 2.04323922649287e-05, "loss": 0.0792, "step": 18179 }, { "epoch": 0.40060156340379116, "grad_norm": 0.900075376033783, "learning_rate": 2.043139438793303e-05, "loss": 0.1092, "step": 18180 }, { "epoch": 0.4006235986933073, "grad_norm": 0.7781221270561218, "learning_rate": 2.0430396483272357e-05, "loss": 0.1256, "step": 18181 }, { "epoch": 0.4006456339828235, "grad_norm": 0.3667213022708893, "learning_rate": 2.042939855095176e-05, "loss": 0.0737, "step": 18182 }, { "epoch": 0.40066766927233965, "grad_norm": 0.6378761529922485, "learning_rate": 2.0428400590976326e-05, "loss": 0.075, "step": 18183 }, { "epoch": 0.4006897045618558, "grad_norm": 0.6982147693634033, "learning_rate": 2.0427402603351135e-05, "loss": 0.1195, "step": 18184 }, { "epoch": 0.400711739851372, "grad_norm": 0.630090057849884, "learning_rate": 2.0426404588081273e-05, "loss": 0.0991, "step": 18185 }, { "epoch": 0.40073377514088815, "grad_norm": 0.680691123008728, "learning_rate": 2.0425406545171824e-05, "loss": 0.0621, "step": 18186 }, { "epoch": 0.4007558104304043, "grad_norm": 0.422340452671051, "learning_rate": 2.0424408474627867e-05, "loss": 0.1008, "step": 18187 }, { "epoch": 0.4007778457199205, "grad_norm": 0.6862202882766724, "learning_rate": 2.0423410376454495e-05, "loss": 0.0737, "step": 18188 }, { "epoch": 0.4007998810094366, "grad_norm": 0.9200685024261475, "learning_rate": 2.042241225065678e-05, "loss": 0.0691, "step": 18189 }, { "epoch": 0.40082191629895275, "grad_norm": 0.7729028463363647, "learning_rate": 2.0421414097239815e-05, "loss": 0.0771, "step": 18190 }, { "epoch": 0.4008439515884689, "grad_norm": 0.5726724863052368, "learning_rate": 2.0420415916208677e-05, "loss": 0.0815, "step": 18191 }, { "epoch": 0.4008659868779851, "grad_norm": 0.6145832538604736, "learning_rate": 2.0419417707568457e-05, "loss": 0.0759, "step": 18192 }, { "epoch": 0.40088802216750125, "grad_norm": 0.741856575012207, "learning_rate": 2.0418419471324236e-05, "loss": 0.1131, "step": 18193 }, { "epoch": 0.4009100574570174, "grad_norm": 0.721126914024353, "learning_rate": 2.04174212074811e-05, "loss": 0.0869, "step": 18194 }, { "epoch": 0.4009320927465336, "grad_norm": 0.6616611480712891, "learning_rate": 2.041642291604413e-05, "loss": 0.0992, "step": 18195 }, { "epoch": 0.40095412803604974, "grad_norm": 0.678546130657196, "learning_rate": 2.0415424597018417e-05, "loss": 0.0631, "step": 18196 }, { "epoch": 0.4009761633255659, "grad_norm": 0.7679030299186707, "learning_rate": 2.0414426250409038e-05, "loss": 0.1166, "step": 18197 }, { "epoch": 0.40099819861508207, "grad_norm": 1.1564899682998657, "learning_rate": 2.041342787622109e-05, "loss": 0.0876, "step": 18198 }, { "epoch": 0.40102023390459823, "grad_norm": 0.8580482006072998, "learning_rate": 2.0412429474459648e-05, "loss": 0.114, "step": 18199 }, { "epoch": 0.4010422691941144, "grad_norm": 0.7379608750343323, "learning_rate": 2.0411431045129797e-05, "loss": 0.0557, "step": 18200 }, { "epoch": 0.4010643044836305, "grad_norm": 0.6707176566123962, "learning_rate": 2.0410432588236627e-05, "loss": 0.0803, "step": 18201 }, { "epoch": 0.40108633977314667, "grad_norm": 0.70763099193573, "learning_rate": 2.0409434103785224e-05, "loss": 0.0969, "step": 18202 }, { "epoch": 0.40110837506266284, "grad_norm": 0.6022711992263794, "learning_rate": 2.0408435591780668e-05, "loss": 0.0943, "step": 18203 }, { "epoch": 0.401130410352179, "grad_norm": 0.6207641363143921, "learning_rate": 2.0407437052228053e-05, "loss": 0.062, "step": 18204 }, { "epoch": 0.40115244564169517, "grad_norm": 0.8053705096244812, "learning_rate": 2.040643848513246e-05, "loss": 0.0978, "step": 18205 }, { "epoch": 0.40117448093121133, "grad_norm": 0.5208964943885803, "learning_rate": 2.0405439890498978e-05, "loss": 0.0526, "step": 18206 }, { "epoch": 0.4011965162207275, "grad_norm": 0.5160278677940369, "learning_rate": 2.0404441268332695e-05, "loss": 0.0931, "step": 18207 }, { "epoch": 0.40121855151024366, "grad_norm": 0.7932679057121277, "learning_rate": 2.040344261863869e-05, "loss": 0.1004, "step": 18208 }, { "epoch": 0.4012405867997598, "grad_norm": 0.984294056892395, "learning_rate": 2.0402443941422054e-05, "loss": 0.0872, "step": 18209 }, { "epoch": 0.401262622089276, "grad_norm": 0.9376326203346252, "learning_rate": 2.0401445236687877e-05, "loss": 0.0932, "step": 18210 }, { "epoch": 0.40128465737879215, "grad_norm": 0.7251954674720764, "learning_rate": 2.0400446504441244e-05, "loss": 0.0589, "step": 18211 }, { "epoch": 0.4013066926683083, "grad_norm": 0.9214912056922913, "learning_rate": 2.0399447744687232e-05, "loss": 0.072, "step": 18212 }, { "epoch": 0.4013287279578245, "grad_norm": 1.1640551090240479, "learning_rate": 2.0398448957430948e-05, "loss": 0.0858, "step": 18213 }, { "epoch": 0.4013507632473406, "grad_norm": 0.6831122040748596, "learning_rate": 2.0397450142677464e-05, "loss": 0.0943, "step": 18214 }, { "epoch": 0.40137279853685676, "grad_norm": 0.6331560611724854, "learning_rate": 2.0396451300431873e-05, "loss": 0.0798, "step": 18215 }, { "epoch": 0.4013948338263729, "grad_norm": 0.5792882442474365, "learning_rate": 2.0395452430699262e-05, "loss": 0.0603, "step": 18216 }, { "epoch": 0.4014168691158891, "grad_norm": 0.3559938669204712, "learning_rate": 2.0394453533484715e-05, "loss": 0.1179, "step": 18217 }, { "epoch": 0.40143890440540525, "grad_norm": 0.49448537826538086, "learning_rate": 2.0393454608793324e-05, "loss": 0.062, "step": 18218 }, { "epoch": 0.4014609396949214, "grad_norm": 0.6090081930160522, "learning_rate": 2.039245565663018e-05, "loss": 0.096, "step": 18219 }, { "epoch": 0.4014829749844376, "grad_norm": 0.8178851008415222, "learning_rate": 2.0391456677000366e-05, "loss": 0.0803, "step": 18220 }, { "epoch": 0.40150501027395374, "grad_norm": 0.5634338855743408, "learning_rate": 2.039045766990897e-05, "loss": 0.1013, "step": 18221 }, { "epoch": 0.4015270455634699, "grad_norm": 0.665651261806488, "learning_rate": 2.0389458635361088e-05, "loss": 0.0684, "step": 18222 }, { "epoch": 0.4015490808529861, "grad_norm": 0.46351271867752075, "learning_rate": 2.03884595733618e-05, "loss": 0.0687, "step": 18223 }, { "epoch": 0.40157111614250224, "grad_norm": 1.1545847654342651, "learning_rate": 2.03874604839162e-05, "loss": 0.104, "step": 18224 }, { "epoch": 0.4015931514320184, "grad_norm": 0.6335184574127197, "learning_rate": 2.038646136702937e-05, "loss": 0.074, "step": 18225 }, { "epoch": 0.4016151867215345, "grad_norm": 0.6473909616470337, "learning_rate": 2.0385462222706405e-05, "loss": 0.0743, "step": 18226 }, { "epoch": 0.4016372220110507, "grad_norm": 1.0073657035827637, "learning_rate": 2.0384463050952396e-05, "loss": 0.1225, "step": 18227 }, { "epoch": 0.40165925730056684, "grad_norm": 0.6276550889015198, "learning_rate": 2.0383463851772426e-05, "loss": 0.0769, "step": 18228 }, { "epoch": 0.401681292590083, "grad_norm": 0.5147889256477356, "learning_rate": 2.038246462517159e-05, "loss": 0.0862, "step": 18229 }, { "epoch": 0.40170332787959917, "grad_norm": 0.46038010716438293, "learning_rate": 2.0381465371154976e-05, "loss": 0.0942, "step": 18230 }, { "epoch": 0.40172536316911533, "grad_norm": 0.5563232898712158, "learning_rate": 2.0380466089727667e-05, "loss": 0.0794, "step": 18231 }, { "epoch": 0.4017473984586315, "grad_norm": 0.7411881685256958, "learning_rate": 2.037946678089477e-05, "loss": 0.1013, "step": 18232 }, { "epoch": 0.40176943374814766, "grad_norm": 0.48994147777557373, "learning_rate": 2.037846744466135e-05, "loss": 0.0748, "step": 18233 }, { "epoch": 0.40179146903766383, "grad_norm": 0.6235189437866211, "learning_rate": 2.0377468081032523e-05, "loss": 0.082, "step": 18234 }, { "epoch": 0.40181350432718, "grad_norm": 0.7897382974624634, "learning_rate": 2.037646869001336e-05, "loss": 0.0799, "step": 18235 }, { "epoch": 0.40183553961669616, "grad_norm": 0.9274945855140686, "learning_rate": 2.037546927160896e-05, "loss": 0.0884, "step": 18236 }, { "epoch": 0.4018575749062123, "grad_norm": 0.4762495756149292, "learning_rate": 2.037446982582442e-05, "loss": 0.0944, "step": 18237 }, { "epoch": 0.40187961019572843, "grad_norm": 0.5988138318061829, "learning_rate": 2.037347035266482e-05, "loss": 0.1268, "step": 18238 }, { "epoch": 0.4019016454852446, "grad_norm": 0.8945522308349609, "learning_rate": 2.037247085213525e-05, "loss": 0.1002, "step": 18239 }, { "epoch": 0.40192368077476076, "grad_norm": 0.6099647283554077, "learning_rate": 2.0371471324240804e-05, "loss": 0.0757, "step": 18240 }, { "epoch": 0.4019457160642769, "grad_norm": 0.7417981624603271, "learning_rate": 2.0370471768986578e-05, "loss": 0.0923, "step": 18241 }, { "epoch": 0.4019677513537931, "grad_norm": 0.5312272310256958, "learning_rate": 2.036947218637766e-05, "loss": 0.0539, "step": 18242 }, { "epoch": 0.40198978664330925, "grad_norm": 0.7908170819282532, "learning_rate": 2.0368472576419134e-05, "loss": 0.1117, "step": 18243 }, { "epoch": 0.4020118219328254, "grad_norm": 0.7624784708023071, "learning_rate": 2.0367472939116106e-05, "loss": 0.0884, "step": 18244 }, { "epoch": 0.4020338572223416, "grad_norm": 0.7397285103797913, "learning_rate": 2.036647327447366e-05, "loss": 0.0652, "step": 18245 }, { "epoch": 0.40205589251185775, "grad_norm": 0.6283496618270874, "learning_rate": 2.0365473582496887e-05, "loss": 0.0868, "step": 18246 }, { "epoch": 0.4020779278013739, "grad_norm": 0.43118739128112793, "learning_rate": 2.0364473863190874e-05, "loss": 0.0753, "step": 18247 }, { "epoch": 0.4020999630908901, "grad_norm": 0.41580238938331604, "learning_rate": 2.0363474116560724e-05, "loss": 0.0703, "step": 18248 }, { "epoch": 0.40212199838040624, "grad_norm": 0.378793329000473, "learning_rate": 2.036247434261152e-05, "loss": 0.0672, "step": 18249 }, { "epoch": 0.4021440336699224, "grad_norm": 0.9316888451576233, "learning_rate": 2.0361474541348366e-05, "loss": 0.0974, "step": 18250 }, { "epoch": 0.4021660689594385, "grad_norm": 1.0225765705108643, "learning_rate": 2.0360474712776343e-05, "loss": 0.0921, "step": 18251 }, { "epoch": 0.4021881042489547, "grad_norm": 0.5673684477806091, "learning_rate": 2.0359474856900545e-05, "loss": 0.0928, "step": 18252 }, { "epoch": 0.40221013953847085, "grad_norm": 0.6573984026908875, "learning_rate": 2.035847497372607e-05, "loss": 0.1106, "step": 18253 }, { "epoch": 0.402232174827987, "grad_norm": 0.4771776497364044, "learning_rate": 2.0357475063258007e-05, "loss": 0.0647, "step": 18254 }, { "epoch": 0.4022542101175032, "grad_norm": 0.8005005717277527, "learning_rate": 2.035647512550146e-05, "loss": 0.0917, "step": 18255 }, { "epoch": 0.40227624540701934, "grad_norm": 1.0275547504425049, "learning_rate": 2.03554751604615e-05, "loss": 0.1253, "step": 18256 }, { "epoch": 0.4022982806965355, "grad_norm": 0.6614736914634705, "learning_rate": 2.035447516814324e-05, "loss": 0.0741, "step": 18257 }, { "epoch": 0.40232031598605167, "grad_norm": 0.7624601125717163, "learning_rate": 2.0353475148551763e-05, "loss": 0.0661, "step": 18258 }, { "epoch": 0.40234235127556783, "grad_norm": 0.6826834082603455, "learning_rate": 2.035247510169217e-05, "loss": 0.0667, "step": 18259 }, { "epoch": 0.402364386565084, "grad_norm": 0.669730544090271, "learning_rate": 2.035147502756955e-05, "loss": 0.0782, "step": 18260 }, { "epoch": 0.40238642185460016, "grad_norm": 0.8112375736236572, "learning_rate": 2.0350474926188996e-05, "loss": 0.1109, "step": 18261 }, { "epoch": 0.4024084571441163, "grad_norm": 0.7375742197036743, "learning_rate": 2.034947479755561e-05, "loss": 0.0844, "step": 18262 }, { "epoch": 0.40243049243363244, "grad_norm": 0.6516304612159729, "learning_rate": 2.0348474641674474e-05, "loss": 0.1008, "step": 18263 }, { "epoch": 0.4024525277231486, "grad_norm": 0.608050525188446, "learning_rate": 2.0347474458550688e-05, "loss": 0.1076, "step": 18264 }, { "epoch": 0.40247456301266477, "grad_norm": 0.6629700064659119, "learning_rate": 2.0346474248189352e-05, "loss": 0.0895, "step": 18265 }, { "epoch": 0.40249659830218093, "grad_norm": 0.7032939791679382, "learning_rate": 2.034547401059555e-05, "loss": 0.0987, "step": 18266 }, { "epoch": 0.4025186335916971, "grad_norm": 0.40137121081352234, "learning_rate": 2.0344473745774388e-05, "loss": 0.0995, "step": 18267 }, { "epoch": 0.40254066888121326, "grad_norm": 0.5079524517059326, "learning_rate": 2.034347345373095e-05, "loss": 0.1099, "step": 18268 }, { "epoch": 0.4025627041707294, "grad_norm": 0.5418393015861511, "learning_rate": 2.034247313447034e-05, "loss": 0.0689, "step": 18269 }, { "epoch": 0.4025847394602456, "grad_norm": 0.7204862236976624, "learning_rate": 2.034147278799765e-05, "loss": 0.0896, "step": 18270 }, { "epoch": 0.40260677474976175, "grad_norm": 0.5345685482025146, "learning_rate": 2.0340472414317977e-05, "loss": 0.1058, "step": 18271 }, { "epoch": 0.4026288100392779, "grad_norm": 0.7958272695541382, "learning_rate": 2.033947201343641e-05, "loss": 0.076, "step": 18272 }, { "epoch": 0.4026508453287941, "grad_norm": 0.6817664504051208, "learning_rate": 2.033847158535805e-05, "loss": 0.0767, "step": 18273 }, { "epoch": 0.40267288061831025, "grad_norm": 1.8324637413024902, "learning_rate": 2.0337471130087988e-05, "loss": 0.0907, "step": 18274 }, { "epoch": 0.40269491590782636, "grad_norm": 1.1300667524337769, "learning_rate": 2.0336470647631324e-05, "loss": 0.1403, "step": 18275 }, { "epoch": 0.4027169511973425, "grad_norm": 0.8967615962028503, "learning_rate": 2.0335470137993156e-05, "loss": 0.125, "step": 18276 }, { "epoch": 0.4027389864868587, "grad_norm": 0.806688129901886, "learning_rate": 2.0334469601178577e-05, "loss": 0.0765, "step": 18277 }, { "epoch": 0.40276102177637485, "grad_norm": 0.723792314529419, "learning_rate": 2.0333469037192684e-05, "loss": 0.0825, "step": 18278 }, { "epoch": 0.402783057065891, "grad_norm": 0.4000653326511383, "learning_rate": 2.0332468446040572e-05, "loss": 0.0624, "step": 18279 }, { "epoch": 0.4028050923554072, "grad_norm": 0.7340521216392517, "learning_rate": 2.033146782772734e-05, "loss": 0.0827, "step": 18280 }, { "epoch": 0.40282712764492334, "grad_norm": 0.6979506015777588, "learning_rate": 2.0330467182258083e-05, "loss": 0.0868, "step": 18281 }, { "epoch": 0.4028491629344395, "grad_norm": 0.6492626070976257, "learning_rate": 2.03294665096379e-05, "loss": 0.0832, "step": 18282 }, { "epoch": 0.4028711982239557, "grad_norm": 1.3088891506195068, "learning_rate": 2.0328465809871882e-05, "loss": 0.0902, "step": 18283 }, { "epoch": 0.40289323351347184, "grad_norm": 0.6947830319404602, "learning_rate": 2.0327465082965135e-05, "loss": 0.0877, "step": 18284 }, { "epoch": 0.402915268802988, "grad_norm": 0.7955507040023804, "learning_rate": 2.032646432892275e-05, "loss": 0.0759, "step": 18285 }, { "epoch": 0.40293730409250417, "grad_norm": 1.0072778463363647, "learning_rate": 2.0325463547749826e-05, "loss": 0.1134, "step": 18286 }, { "epoch": 0.40295933938202033, "grad_norm": 0.8843215107917786, "learning_rate": 2.0324462739451456e-05, "loss": 0.1166, "step": 18287 }, { "epoch": 0.40298137467153644, "grad_norm": 0.9673469066619873, "learning_rate": 2.0323461904032745e-05, "loss": 0.1286, "step": 18288 }, { "epoch": 0.4030034099610526, "grad_norm": 0.6252342462539673, "learning_rate": 2.0322461041498787e-05, "loss": 0.0821, "step": 18289 }, { "epoch": 0.40302544525056877, "grad_norm": 0.6231021285057068, "learning_rate": 2.0321460151854687e-05, "loss": 0.0871, "step": 18290 }, { "epoch": 0.40304748054008493, "grad_norm": 0.9498467445373535, "learning_rate": 2.032045923510553e-05, "loss": 0.0698, "step": 18291 }, { "epoch": 0.4030695158296011, "grad_norm": 0.7212309241294861, "learning_rate": 2.0319458291256422e-05, "loss": 0.1028, "step": 18292 }, { "epoch": 0.40309155111911726, "grad_norm": 0.7383280992507935, "learning_rate": 2.0318457320312463e-05, "loss": 0.067, "step": 18293 }, { "epoch": 0.40311358640863343, "grad_norm": 0.3649156391620636, "learning_rate": 2.0317456322278746e-05, "loss": 0.0942, "step": 18294 }, { "epoch": 0.4031356216981496, "grad_norm": 1.047882080078125, "learning_rate": 2.0316455297160376e-05, "loss": 0.0945, "step": 18295 }, { "epoch": 0.40315765698766576, "grad_norm": 0.6641868948936462, "learning_rate": 2.0315454244962443e-05, "loss": 0.1054, "step": 18296 }, { "epoch": 0.4031796922771819, "grad_norm": 0.7294888496398926, "learning_rate": 2.0314453165690054e-05, "loss": 0.0834, "step": 18297 }, { "epoch": 0.4032017275666981, "grad_norm": 0.8268877863883972, "learning_rate": 2.0313452059348305e-05, "loss": 0.1038, "step": 18298 }, { "epoch": 0.40322376285621425, "grad_norm": 0.5024074912071228, "learning_rate": 2.03124509259423e-05, "loss": 0.0884, "step": 18299 }, { "epoch": 0.40324579814573036, "grad_norm": 0.668494462966919, "learning_rate": 2.031144976547713e-05, "loss": 0.1081, "step": 18300 }, { "epoch": 0.4032678334352465, "grad_norm": 0.821995735168457, "learning_rate": 2.0310448577957898e-05, "loss": 0.065, "step": 18301 }, { "epoch": 0.4032898687247627, "grad_norm": 0.6350433230400085, "learning_rate": 2.0309447363389702e-05, "loss": 0.0853, "step": 18302 }, { "epoch": 0.40331190401427885, "grad_norm": 0.6576060652732849, "learning_rate": 2.0308446121777645e-05, "loss": 0.0883, "step": 18303 }, { "epoch": 0.403333939303795, "grad_norm": 0.3297049105167389, "learning_rate": 2.0307444853126823e-05, "loss": 0.0459, "step": 18304 }, { "epoch": 0.4033559745933112, "grad_norm": 0.5823862552642822, "learning_rate": 2.030644355744234e-05, "loss": 0.0786, "step": 18305 }, { "epoch": 0.40337800988282735, "grad_norm": 0.7127134203910828, "learning_rate": 2.0305442234729295e-05, "loss": 0.0538, "step": 18306 }, { "epoch": 0.4034000451723435, "grad_norm": 0.6675224304199219, "learning_rate": 2.030444088499279e-05, "loss": 0.0476, "step": 18307 }, { "epoch": 0.4034220804618597, "grad_norm": 0.8654223680496216, "learning_rate": 2.0303439508237918e-05, "loss": 0.1047, "step": 18308 }, { "epoch": 0.40344411575137584, "grad_norm": 0.784919023513794, "learning_rate": 2.030243810446979e-05, "loss": 0.0747, "step": 18309 }, { "epoch": 0.403466151040892, "grad_norm": 0.5850110054016113, "learning_rate": 2.0301436673693496e-05, "loss": 0.0808, "step": 18310 }, { "epoch": 0.40348818633040817, "grad_norm": 0.6908378601074219, "learning_rate": 2.030043521591415e-05, "loss": 0.094, "step": 18311 }, { "epoch": 0.40351022161992434, "grad_norm": 0.6656070351600647, "learning_rate": 2.0299433731136835e-05, "loss": 0.0913, "step": 18312 }, { "epoch": 0.40353225690944045, "grad_norm": 0.7835074663162231, "learning_rate": 2.0298432219366666e-05, "loss": 0.0762, "step": 18313 }, { "epoch": 0.4035542921989566, "grad_norm": 0.7663061022758484, "learning_rate": 2.0297430680608742e-05, "loss": 0.1072, "step": 18314 }, { "epoch": 0.4035763274884728, "grad_norm": 0.78580641746521, "learning_rate": 2.029642911486816e-05, "loss": 0.1036, "step": 18315 }, { "epoch": 0.40359836277798894, "grad_norm": 0.8674687147140503, "learning_rate": 2.0295427522150028e-05, "loss": 0.1069, "step": 18316 }, { "epoch": 0.4036203980675051, "grad_norm": 0.5020698308944702, "learning_rate": 2.0294425902459444e-05, "loss": 0.0751, "step": 18317 }, { "epoch": 0.40364243335702127, "grad_norm": 0.9202415943145752, "learning_rate": 2.029342425580151e-05, "loss": 0.0764, "step": 18318 }, { "epoch": 0.40366446864653743, "grad_norm": 0.7785139083862305, "learning_rate": 2.0292422582181326e-05, "loss": 0.0696, "step": 18319 }, { "epoch": 0.4036865039360536, "grad_norm": 0.5260263085365295, "learning_rate": 2.0291420881603996e-05, "loss": 0.0777, "step": 18320 }, { "epoch": 0.40370853922556976, "grad_norm": 0.5548232197761536, "learning_rate": 2.029041915407462e-05, "loss": 0.0843, "step": 18321 }, { "epoch": 0.4037305745150859, "grad_norm": 1.048632264137268, "learning_rate": 2.0289417399598305e-05, "loss": 0.0931, "step": 18322 }, { "epoch": 0.4037526098046021, "grad_norm": 0.7663631439208984, "learning_rate": 2.028841561818015e-05, "loss": 0.0563, "step": 18323 }, { "epoch": 0.40377464509411826, "grad_norm": 0.4733446538448334, "learning_rate": 2.0287413809825257e-05, "loss": 0.0576, "step": 18324 }, { "epoch": 0.40379668038363437, "grad_norm": 0.978951096534729, "learning_rate": 2.0286411974538734e-05, "loss": 0.1091, "step": 18325 }, { "epoch": 0.40381871567315053, "grad_norm": 0.8464785218238831, "learning_rate": 2.0285410112325677e-05, "loss": 0.0895, "step": 18326 }, { "epoch": 0.4038407509626667, "grad_norm": 0.4913518726825714, "learning_rate": 2.0284408223191193e-05, "loss": 0.0765, "step": 18327 }, { "epoch": 0.40386278625218286, "grad_norm": 0.9404377937316895, "learning_rate": 2.0283406307140384e-05, "loss": 0.0948, "step": 18328 }, { "epoch": 0.403884821541699, "grad_norm": 0.578359067440033, "learning_rate": 2.028240436417835e-05, "loss": 0.0995, "step": 18329 }, { "epoch": 0.4039068568312152, "grad_norm": 0.5444090366363525, "learning_rate": 2.0281402394310206e-05, "loss": 0.0815, "step": 18330 }, { "epoch": 0.40392889212073135, "grad_norm": 0.9271597266197205, "learning_rate": 2.0280400397541043e-05, "loss": 0.1315, "step": 18331 }, { "epoch": 0.4039509274102475, "grad_norm": 0.5298058986663818, "learning_rate": 2.0279398373875964e-05, "loss": 0.0764, "step": 18332 }, { "epoch": 0.4039729626997637, "grad_norm": 0.8743347525596619, "learning_rate": 2.0278396323320087e-05, "loss": 0.0827, "step": 18333 }, { "epoch": 0.40399499798927985, "grad_norm": 0.956915557384491, "learning_rate": 2.0277394245878503e-05, "loss": 0.076, "step": 18334 }, { "epoch": 0.404017033278796, "grad_norm": 0.8007540702819824, "learning_rate": 2.0276392141556318e-05, "loss": 0.06, "step": 18335 }, { "epoch": 0.4040390685683122, "grad_norm": 0.7672736048698425, "learning_rate": 2.0275390010358642e-05, "loss": 0.0641, "step": 18336 }, { "epoch": 0.4040611038578283, "grad_norm": 0.8232859969139099, "learning_rate": 2.0274387852290574e-05, "loss": 0.0983, "step": 18337 }, { "epoch": 0.40408313914734445, "grad_norm": 0.7228586077690125, "learning_rate": 2.0273385667357222e-05, "loss": 0.086, "step": 18338 }, { "epoch": 0.4041051744368606, "grad_norm": 0.9005985856056213, "learning_rate": 2.0272383455563683e-05, "loss": 0.091, "step": 18339 }, { "epoch": 0.4041272097263768, "grad_norm": 0.4668474495410919, "learning_rate": 2.0271381216915073e-05, "loss": 0.0853, "step": 18340 }, { "epoch": 0.40414924501589294, "grad_norm": 0.4506261646747589, "learning_rate": 2.027037895141649e-05, "loss": 0.0739, "step": 18341 }, { "epoch": 0.4041712803054091, "grad_norm": 0.3349849581718445, "learning_rate": 2.0269376659073043e-05, "loss": 0.067, "step": 18342 }, { "epoch": 0.4041933155949253, "grad_norm": 0.6668404936790466, "learning_rate": 2.0268374339889835e-05, "loss": 0.0602, "step": 18343 }, { "epoch": 0.40421535088444144, "grad_norm": 0.9184284210205078, "learning_rate": 2.026737199387197e-05, "loss": 0.0946, "step": 18344 }, { "epoch": 0.4042373861739576, "grad_norm": 0.9520832300186157, "learning_rate": 2.026636962102456e-05, "loss": 0.1261, "step": 18345 }, { "epoch": 0.40425942146347377, "grad_norm": 1.1468786001205444, "learning_rate": 2.0265367221352695e-05, "loss": 0.0878, "step": 18346 }, { "epoch": 0.40428145675298993, "grad_norm": 0.5335069894790649, "learning_rate": 2.0264364794861498e-05, "loss": 0.0577, "step": 18347 }, { "epoch": 0.4043034920425061, "grad_norm": 0.7655817866325378, "learning_rate": 2.0263362341556067e-05, "loss": 0.0905, "step": 18348 }, { "epoch": 0.40432552733202226, "grad_norm": 0.9996103048324585, "learning_rate": 2.0262359861441512e-05, "loss": 0.0995, "step": 18349 }, { "epoch": 0.40434756262153837, "grad_norm": 0.40294066071510315, "learning_rate": 2.026135735452293e-05, "loss": 0.1084, "step": 18350 }, { "epoch": 0.40436959791105453, "grad_norm": 0.8681215047836304, "learning_rate": 2.0260354820805437e-05, "loss": 0.08, "step": 18351 }, { "epoch": 0.4043916332005707, "grad_norm": 0.6242876648902893, "learning_rate": 2.0259352260294136e-05, "loss": 0.0889, "step": 18352 }, { "epoch": 0.40441366849008686, "grad_norm": 0.48894432187080383, "learning_rate": 2.0258349672994134e-05, "loss": 0.1213, "step": 18353 }, { "epoch": 0.40443570377960303, "grad_norm": 0.5988180041313171, "learning_rate": 2.0257347058910537e-05, "loss": 0.0595, "step": 18354 }, { "epoch": 0.4044577390691192, "grad_norm": 0.6166224479675293, "learning_rate": 2.0256344418048454e-05, "loss": 0.0888, "step": 18355 }, { "epoch": 0.40447977435863536, "grad_norm": 0.4374200403690338, "learning_rate": 2.025534175041299e-05, "loss": 0.0818, "step": 18356 }, { "epoch": 0.4045018096481515, "grad_norm": 0.314729779958725, "learning_rate": 2.025433905600925e-05, "loss": 0.0538, "step": 18357 }, { "epoch": 0.4045238449376677, "grad_norm": 0.6826393008232117, "learning_rate": 2.0253336334842346e-05, "loss": 0.0811, "step": 18358 }, { "epoch": 0.40454588022718385, "grad_norm": 0.6654185652732849, "learning_rate": 2.0252333586917382e-05, "loss": 0.0779, "step": 18359 }, { "epoch": 0.4045679155167, "grad_norm": 0.7307196259498596, "learning_rate": 2.0251330812239462e-05, "loss": 0.0864, "step": 18360 }, { "epoch": 0.4045899508062162, "grad_norm": 0.4335775375366211, "learning_rate": 2.0250328010813705e-05, "loss": 0.074, "step": 18361 }, { "epoch": 0.4046119860957323, "grad_norm": 0.972527265548706, "learning_rate": 2.0249325182645207e-05, "loss": 0.1056, "step": 18362 }, { "epoch": 0.40463402138524845, "grad_norm": 0.6841124296188354, "learning_rate": 2.024832232773908e-05, "loss": 0.0606, "step": 18363 }, { "epoch": 0.4046560566747646, "grad_norm": 0.987606942653656, "learning_rate": 2.0247319446100436e-05, "loss": 0.0984, "step": 18364 }, { "epoch": 0.4046780919642808, "grad_norm": 0.4363139569759369, "learning_rate": 2.0246316537734384e-05, "loss": 0.0456, "step": 18365 }, { "epoch": 0.40470012725379695, "grad_norm": 0.34938952326774597, "learning_rate": 2.024531360264603e-05, "loss": 0.0734, "step": 18366 }, { "epoch": 0.4047221625433131, "grad_norm": 0.7293321490287781, "learning_rate": 2.024431064084047e-05, "loss": 0.0785, "step": 18367 }, { "epoch": 0.4047441978328293, "grad_norm": 0.44465169310569763, "learning_rate": 2.024330765232283e-05, "loss": 0.097, "step": 18368 }, { "epoch": 0.40476623312234544, "grad_norm": 0.9107537865638733, "learning_rate": 2.0242304637098212e-05, "loss": 0.0725, "step": 18369 }, { "epoch": 0.4047882684118616, "grad_norm": 0.5777431130409241, "learning_rate": 2.0241301595171727e-05, "loss": 0.0667, "step": 18370 }, { "epoch": 0.40481030370137777, "grad_norm": 0.7564352750778198, "learning_rate": 2.024029852654848e-05, "loss": 0.0918, "step": 18371 }, { "epoch": 0.40483233899089394, "grad_norm": 0.9425830245018005, "learning_rate": 2.023929543123358e-05, "loss": 0.1188, "step": 18372 }, { "epoch": 0.4048543742804101, "grad_norm": 0.6974992752075195, "learning_rate": 2.0238292309232146e-05, "loss": 0.115, "step": 18373 }, { "epoch": 0.4048764095699262, "grad_norm": 0.8040991425514221, "learning_rate": 2.0237289160549278e-05, "loss": 0.1102, "step": 18374 }, { "epoch": 0.4048984448594424, "grad_norm": 0.5436403751373291, "learning_rate": 2.0236285985190083e-05, "loss": 0.0696, "step": 18375 }, { "epoch": 0.40492048014895854, "grad_norm": 0.8169064521789551, "learning_rate": 2.023528278315968e-05, "loss": 0.0882, "step": 18376 }, { "epoch": 0.4049425154384747, "grad_norm": 0.5646091103553772, "learning_rate": 2.0234279554463173e-05, "loss": 0.0896, "step": 18377 }, { "epoch": 0.40496455072799087, "grad_norm": 0.5576003789901733, "learning_rate": 2.0233276299105673e-05, "loss": 0.1055, "step": 18378 }, { "epoch": 0.40498658601750703, "grad_norm": 0.6246188282966614, "learning_rate": 2.023227301709229e-05, "loss": 0.0805, "step": 18379 }, { "epoch": 0.4050086213070232, "grad_norm": 1.177937626838684, "learning_rate": 2.0231269708428137e-05, "loss": 0.1132, "step": 18380 }, { "epoch": 0.40503065659653936, "grad_norm": 0.6596569418907166, "learning_rate": 2.0230266373118324e-05, "loss": 0.0838, "step": 18381 }, { "epoch": 0.4050526918860555, "grad_norm": 0.5070107579231262, "learning_rate": 2.0229263011167955e-05, "loss": 0.0558, "step": 18382 }, { "epoch": 0.4050747271755717, "grad_norm": 0.7835826873779297, "learning_rate": 2.0228259622582147e-05, "loss": 0.1135, "step": 18383 }, { "epoch": 0.40509676246508786, "grad_norm": 0.6817857623100281, "learning_rate": 2.022725620736601e-05, "loss": 0.0817, "step": 18384 }, { "epoch": 0.405118797754604, "grad_norm": 0.4633432626724243, "learning_rate": 2.0226252765524652e-05, "loss": 0.0923, "step": 18385 }, { "epoch": 0.4051408330441202, "grad_norm": 0.9494194388389587, "learning_rate": 2.0225249297063184e-05, "loss": 0.1248, "step": 18386 }, { "epoch": 0.4051628683336363, "grad_norm": 0.9154391884803772, "learning_rate": 2.0224245801986724e-05, "loss": 0.1261, "step": 18387 }, { "epoch": 0.40518490362315246, "grad_norm": 0.9375894665718079, "learning_rate": 2.022324228030038e-05, "loss": 0.1108, "step": 18388 }, { "epoch": 0.4052069389126686, "grad_norm": 0.43824249505996704, "learning_rate": 2.022223873200926e-05, "loss": 0.0917, "step": 18389 }, { "epoch": 0.4052289742021848, "grad_norm": 0.7101054191589355, "learning_rate": 2.0221235157118477e-05, "loss": 0.0909, "step": 18390 }, { "epoch": 0.40525100949170095, "grad_norm": 0.5636857151985168, "learning_rate": 2.0220231555633144e-05, "loss": 0.1125, "step": 18391 }, { "epoch": 0.4052730447812171, "grad_norm": 0.9228859543800354, "learning_rate": 2.021922792755837e-05, "loss": 0.121, "step": 18392 }, { "epoch": 0.4052950800707333, "grad_norm": 0.6288066506385803, "learning_rate": 2.0218224272899272e-05, "loss": 0.0828, "step": 18393 }, { "epoch": 0.40531711536024945, "grad_norm": 0.7402573823928833, "learning_rate": 2.0217220591660955e-05, "loss": 0.0813, "step": 18394 }, { "epoch": 0.4053391506497656, "grad_norm": 0.5733414888381958, "learning_rate": 2.021621688384854e-05, "loss": 0.0745, "step": 18395 }, { "epoch": 0.4053611859392818, "grad_norm": 0.7368636131286621, "learning_rate": 2.0215213149467137e-05, "loss": 0.1047, "step": 18396 }, { "epoch": 0.40538322122879794, "grad_norm": 0.6014625430107117, "learning_rate": 2.021420938852186e-05, "loss": 0.0894, "step": 18397 }, { "epoch": 0.4054052565183141, "grad_norm": 0.5794697403907776, "learning_rate": 2.021320560101781e-05, "loss": 0.0606, "step": 18398 }, { "epoch": 0.4054272918078302, "grad_norm": 0.8918811678886414, "learning_rate": 2.0212201786960112e-05, "loss": 0.0752, "step": 18399 }, { "epoch": 0.4054493270973464, "grad_norm": 0.7709305286407471, "learning_rate": 2.0211197946353875e-05, "loss": 0.1129, "step": 18400 }, { "epoch": 0.40547136238686254, "grad_norm": 0.7290324568748474, "learning_rate": 2.0210194079204213e-05, "loss": 0.0829, "step": 18401 }, { "epoch": 0.4054933976763787, "grad_norm": 0.5431756377220154, "learning_rate": 2.0209190185516237e-05, "loss": 0.0955, "step": 18402 }, { "epoch": 0.4055154329658949, "grad_norm": 0.608088493347168, "learning_rate": 2.0208186265295065e-05, "loss": 0.0836, "step": 18403 }, { "epoch": 0.40553746825541104, "grad_norm": 0.6249108910560608, "learning_rate": 2.0207182318545808e-05, "loss": 0.0644, "step": 18404 }, { "epoch": 0.4055595035449272, "grad_norm": 0.6993687152862549, "learning_rate": 2.0206178345273577e-05, "loss": 0.1056, "step": 18405 }, { "epoch": 0.40558153883444337, "grad_norm": 0.5050885677337646, "learning_rate": 2.0205174345483485e-05, "loss": 0.069, "step": 18406 }, { "epoch": 0.40560357412395953, "grad_norm": 0.8558361530303955, "learning_rate": 2.0204170319180653e-05, "loss": 0.073, "step": 18407 }, { "epoch": 0.4056256094134757, "grad_norm": 0.9356086254119873, "learning_rate": 2.0203166266370185e-05, "loss": 0.0953, "step": 18408 }, { "epoch": 0.40564764470299186, "grad_norm": 1.324866771697998, "learning_rate": 2.0202162187057205e-05, "loss": 0.1183, "step": 18409 }, { "epoch": 0.405669679992508, "grad_norm": 0.5477520823478699, "learning_rate": 2.0201158081246825e-05, "loss": 0.0931, "step": 18410 }, { "epoch": 0.40569171528202413, "grad_norm": 0.53499436378479, "learning_rate": 2.0200153948944155e-05, "loss": 0.0615, "step": 18411 }, { "epoch": 0.4057137505715403, "grad_norm": 0.6151174902915955, "learning_rate": 2.0199149790154317e-05, "loss": 0.0781, "step": 18412 }, { "epoch": 0.40573578586105646, "grad_norm": 0.7278409600257874, "learning_rate": 2.0198145604882418e-05, "loss": 0.0759, "step": 18413 }, { "epoch": 0.40575782115057263, "grad_norm": 0.433207631111145, "learning_rate": 2.0197141393133577e-05, "loss": 0.0662, "step": 18414 }, { "epoch": 0.4057798564400888, "grad_norm": 0.6585749387741089, "learning_rate": 2.0196137154912906e-05, "loss": 0.0942, "step": 18415 }, { "epoch": 0.40580189172960496, "grad_norm": 0.8227319717407227, "learning_rate": 2.0195132890225526e-05, "loss": 0.1076, "step": 18416 }, { "epoch": 0.4058239270191211, "grad_norm": 0.6127170324325562, "learning_rate": 2.019412859907654e-05, "loss": 0.1126, "step": 18417 }, { "epoch": 0.4058459623086373, "grad_norm": 0.5510360598564148, "learning_rate": 2.019312428147108e-05, "loss": 0.0869, "step": 18418 }, { "epoch": 0.40586799759815345, "grad_norm": 0.8185858130455017, "learning_rate": 2.0192119937414247e-05, "loss": 0.076, "step": 18419 }, { "epoch": 0.4058900328876696, "grad_norm": 0.6337085366249084, "learning_rate": 2.019111556691117e-05, "loss": 0.0985, "step": 18420 }, { "epoch": 0.4059120681771858, "grad_norm": 0.6950251460075378, "learning_rate": 2.0190111169966954e-05, "loss": 0.0577, "step": 18421 }, { "epoch": 0.40593410346670195, "grad_norm": 0.8430370688438416, "learning_rate": 2.0189106746586718e-05, "loss": 0.1104, "step": 18422 }, { "epoch": 0.4059561387562181, "grad_norm": 0.6747764348983765, "learning_rate": 2.018810229677558e-05, "loss": 0.073, "step": 18423 }, { "epoch": 0.4059781740457342, "grad_norm": 0.489678293466568, "learning_rate": 2.0187097820538655e-05, "loss": 0.0543, "step": 18424 }, { "epoch": 0.4060002093352504, "grad_norm": 0.8133593797683716, "learning_rate": 2.0186093317881054e-05, "loss": 0.0873, "step": 18425 }, { "epoch": 0.40602224462476655, "grad_norm": 0.653887152671814, "learning_rate": 2.0185088788807907e-05, "loss": 0.0573, "step": 18426 }, { "epoch": 0.4060442799142827, "grad_norm": 0.9302366971969604, "learning_rate": 2.0184084233324317e-05, "loss": 0.0999, "step": 18427 }, { "epoch": 0.4060663152037989, "grad_norm": 0.9830083250999451, "learning_rate": 2.018307965143541e-05, "loss": 0.1043, "step": 18428 }, { "epoch": 0.40608835049331504, "grad_norm": 0.5197851657867432, "learning_rate": 2.0182075043146296e-05, "loss": 0.1095, "step": 18429 }, { "epoch": 0.4061103857828312, "grad_norm": 0.6064532995223999, "learning_rate": 2.0181070408462093e-05, "loss": 0.0969, "step": 18430 }, { "epoch": 0.40613242107234737, "grad_norm": 0.8225088715553284, "learning_rate": 2.0180065747387925e-05, "loss": 0.0853, "step": 18431 }, { "epoch": 0.40615445636186354, "grad_norm": 0.9189720153808594, "learning_rate": 2.01790610599289e-05, "loss": 0.0625, "step": 18432 }, { "epoch": 0.4061764916513797, "grad_norm": 0.4718473255634308, "learning_rate": 2.0178056346090143e-05, "loss": 0.0421, "step": 18433 }, { "epoch": 0.40619852694089587, "grad_norm": 0.49826958775520325, "learning_rate": 2.0177051605876765e-05, "loss": 0.0569, "step": 18434 }, { "epoch": 0.40622056223041203, "grad_norm": 0.8455778360366821, "learning_rate": 2.017604683929389e-05, "loss": 0.0938, "step": 18435 }, { "epoch": 0.40624259751992814, "grad_norm": 0.738211989402771, "learning_rate": 2.0175042046346635e-05, "loss": 0.0821, "step": 18436 }, { "epoch": 0.4062646328094443, "grad_norm": 0.7545273900032043, "learning_rate": 2.0174037227040115e-05, "loss": 0.0857, "step": 18437 }, { "epoch": 0.40628666809896047, "grad_norm": 0.7989412546157837, "learning_rate": 2.0173032381379443e-05, "loss": 0.0837, "step": 18438 }, { "epoch": 0.40630870338847663, "grad_norm": 0.929843008518219, "learning_rate": 2.017202750936975e-05, "loss": 0.1042, "step": 18439 }, { "epoch": 0.4063307386779928, "grad_norm": 0.9786946177482605, "learning_rate": 2.017102261101614e-05, "loss": 0.105, "step": 18440 }, { "epoch": 0.40635277396750896, "grad_norm": 0.6830589771270752, "learning_rate": 2.0170017686323744e-05, "loss": 0.0959, "step": 18441 }, { "epoch": 0.4063748092570251, "grad_norm": 0.7168923020362854, "learning_rate": 2.0169012735297672e-05, "loss": 0.0965, "step": 18442 }, { "epoch": 0.4063968445465413, "grad_norm": 0.6248716711997986, "learning_rate": 2.016800775794305e-05, "loss": 0.1128, "step": 18443 }, { "epoch": 0.40641887983605746, "grad_norm": 0.655241847038269, "learning_rate": 2.0167002754264995e-05, "loss": 0.0751, "step": 18444 }, { "epoch": 0.4064409151255736, "grad_norm": 0.5051968693733215, "learning_rate": 2.016599772426862e-05, "loss": 0.0974, "step": 18445 }, { "epoch": 0.4064629504150898, "grad_norm": 0.8840169310569763, "learning_rate": 2.016499266795905e-05, "loss": 0.1164, "step": 18446 }, { "epoch": 0.40648498570460595, "grad_norm": 0.5614838600158691, "learning_rate": 2.0163987585341403e-05, "loss": 0.061, "step": 18447 }, { "epoch": 0.40650702099412206, "grad_norm": 0.7111674547195435, "learning_rate": 2.0162982476420797e-05, "loss": 0.0974, "step": 18448 }, { "epoch": 0.4065290562836382, "grad_norm": 0.7293274998664856, "learning_rate": 2.0161977341202352e-05, "loss": 0.0816, "step": 18449 }, { "epoch": 0.4065510915731544, "grad_norm": 0.8813287615776062, "learning_rate": 2.0160972179691188e-05, "loss": 0.1025, "step": 18450 }, { "epoch": 0.40657312686267055, "grad_norm": 0.6377298831939697, "learning_rate": 2.015996699189243e-05, "loss": 0.0903, "step": 18451 }, { "epoch": 0.4065951621521867, "grad_norm": 0.3970177173614502, "learning_rate": 2.015896177781119e-05, "loss": 0.0483, "step": 18452 }, { "epoch": 0.4066171974417029, "grad_norm": 0.4196925461292267, "learning_rate": 2.0157956537452592e-05, "loss": 0.0485, "step": 18453 }, { "epoch": 0.40663923273121905, "grad_norm": 0.5520133972167969, "learning_rate": 2.0156951270821752e-05, "loss": 0.0905, "step": 18454 }, { "epoch": 0.4066612680207352, "grad_norm": 0.6490345001220703, "learning_rate": 2.0155945977923798e-05, "loss": 0.1241, "step": 18455 }, { "epoch": 0.4066833033102514, "grad_norm": 0.4818291664123535, "learning_rate": 2.0154940658763844e-05, "loss": 0.0783, "step": 18456 }, { "epoch": 0.40670533859976754, "grad_norm": 0.9304682612419128, "learning_rate": 2.015393531334701e-05, "loss": 0.1082, "step": 18457 }, { "epoch": 0.4067273738892837, "grad_norm": 1.1578912734985352, "learning_rate": 2.0152929941678424e-05, "loss": 0.0938, "step": 18458 }, { "epoch": 0.40674940917879987, "grad_norm": 0.8641079068183899, "learning_rate": 2.0151924543763204e-05, "loss": 0.0843, "step": 18459 }, { "epoch": 0.40677144446831603, "grad_norm": 1.8969578742980957, "learning_rate": 2.015091911960647e-05, "loss": 0.1254, "step": 18460 }, { "epoch": 0.40679347975783214, "grad_norm": 0.7963321208953857, "learning_rate": 2.0149913669213336e-05, "loss": 0.1119, "step": 18461 }, { "epoch": 0.4068155150473483, "grad_norm": 0.8897789120674133, "learning_rate": 2.0148908192588937e-05, "loss": 0.0915, "step": 18462 }, { "epoch": 0.4068375503368645, "grad_norm": 0.4745691418647766, "learning_rate": 2.014790268973838e-05, "loss": 0.0546, "step": 18463 }, { "epoch": 0.40685958562638064, "grad_norm": 0.7808311581611633, "learning_rate": 2.0146897160666802e-05, "loss": 0.1042, "step": 18464 }, { "epoch": 0.4068816209158968, "grad_norm": 1.1313642263412476, "learning_rate": 2.014589160537931e-05, "loss": 0.1448, "step": 18465 }, { "epoch": 0.40690365620541297, "grad_norm": 0.6070948243141174, "learning_rate": 2.0144886023881036e-05, "loss": 0.0567, "step": 18466 }, { "epoch": 0.40692569149492913, "grad_norm": 0.6655765175819397, "learning_rate": 2.0143880416177103e-05, "loss": 0.0855, "step": 18467 }, { "epoch": 0.4069477267844453, "grad_norm": 0.5187990665435791, "learning_rate": 2.0142874782272625e-05, "loss": 0.0943, "step": 18468 }, { "epoch": 0.40696976207396146, "grad_norm": 0.8697804808616638, "learning_rate": 2.0141869122172725e-05, "loss": 0.0755, "step": 18469 }, { "epoch": 0.4069917973634776, "grad_norm": 0.7181501984596252, "learning_rate": 2.0140863435882532e-05, "loss": 0.0775, "step": 18470 }, { "epoch": 0.4070138326529938, "grad_norm": 1.1732498407363892, "learning_rate": 2.013985772340716e-05, "loss": 0.0868, "step": 18471 }, { "epoch": 0.40703586794250995, "grad_norm": 0.5533844232559204, "learning_rate": 2.0138851984751743e-05, "loss": 0.1174, "step": 18472 }, { "epoch": 0.40705790323202606, "grad_norm": 0.8431556224822998, "learning_rate": 2.013784621992139e-05, "loss": 0.082, "step": 18473 }, { "epoch": 0.40707993852154223, "grad_norm": 0.31670913100242615, "learning_rate": 2.0136840428921238e-05, "loss": 0.0506, "step": 18474 }, { "epoch": 0.4071019738110584, "grad_norm": 0.9404345750808716, "learning_rate": 2.0135834611756398e-05, "loss": 0.093, "step": 18475 }, { "epoch": 0.40712400910057456, "grad_norm": 0.44302985072135925, "learning_rate": 2.0134828768432002e-05, "loss": 0.0997, "step": 18476 }, { "epoch": 0.4071460443900907, "grad_norm": 0.6728427410125732, "learning_rate": 2.0133822898953167e-05, "loss": 0.088, "step": 18477 }, { "epoch": 0.4071680796796069, "grad_norm": 0.5936959981918335, "learning_rate": 2.0132817003325022e-05, "loss": 0.0759, "step": 18478 }, { "epoch": 0.40719011496912305, "grad_norm": 0.5885763168334961, "learning_rate": 2.0131811081552687e-05, "loss": 0.0871, "step": 18479 }, { "epoch": 0.4072121502586392, "grad_norm": 0.9412815570831299, "learning_rate": 2.0130805133641278e-05, "loss": 0.0923, "step": 18480 }, { "epoch": 0.4072341855481554, "grad_norm": 0.6659957766532898, "learning_rate": 2.0129799159595938e-05, "loss": 0.1351, "step": 18481 }, { "epoch": 0.40725622083767155, "grad_norm": 0.7851887941360474, "learning_rate": 2.0128793159421772e-05, "loss": 0.0861, "step": 18482 }, { "epoch": 0.4072782561271877, "grad_norm": 0.5780546069145203, "learning_rate": 2.0127787133123918e-05, "loss": 0.102, "step": 18483 }, { "epoch": 0.4073002914167039, "grad_norm": 0.6924412846565247, "learning_rate": 2.0126781080707495e-05, "loss": 0.0998, "step": 18484 }, { "epoch": 0.40732232670622, "grad_norm": 1.0626249313354492, "learning_rate": 2.0125775002177625e-05, "loss": 0.0857, "step": 18485 }, { "epoch": 0.40734436199573615, "grad_norm": 0.46918654441833496, "learning_rate": 2.012476889753943e-05, "loss": 0.0826, "step": 18486 }, { "epoch": 0.4073663972852523, "grad_norm": 0.9528740048408508, "learning_rate": 2.0123762766798048e-05, "loss": 0.0909, "step": 18487 }, { "epoch": 0.4073884325747685, "grad_norm": 0.791216254234314, "learning_rate": 2.0122756609958586e-05, "loss": 0.0842, "step": 18488 }, { "epoch": 0.40741046786428464, "grad_norm": 0.5251543521881104, "learning_rate": 2.012175042702618e-05, "loss": 0.0601, "step": 18489 }, { "epoch": 0.4074325031538008, "grad_norm": 0.7010596394538879, "learning_rate": 2.012074421800595e-05, "loss": 0.1216, "step": 18490 }, { "epoch": 0.40745453844331697, "grad_norm": 0.5150327086448669, "learning_rate": 2.011973798290303e-05, "loss": 0.061, "step": 18491 }, { "epoch": 0.40747657373283314, "grad_norm": 0.5883998274803162, "learning_rate": 2.0118731721722536e-05, "loss": 0.0967, "step": 18492 }, { "epoch": 0.4074986090223493, "grad_norm": 0.6717656254768372, "learning_rate": 2.01177254344696e-05, "loss": 0.0511, "step": 18493 }, { "epoch": 0.40752064431186547, "grad_norm": 0.8626120686531067, "learning_rate": 2.0116719121149336e-05, "loss": 0.106, "step": 18494 }, { "epoch": 0.40754267960138163, "grad_norm": 0.4199315905570984, "learning_rate": 2.0115712781766884e-05, "loss": 0.0732, "step": 18495 }, { "epoch": 0.4075647148908978, "grad_norm": 0.6276677846908569, "learning_rate": 2.0114706416327358e-05, "loss": 0.0957, "step": 18496 }, { "epoch": 0.40758675018041396, "grad_norm": 0.7389002442359924, "learning_rate": 2.011370002483589e-05, "loss": 0.0884, "step": 18497 }, { "epoch": 0.40760878546993007, "grad_norm": 0.4550527036190033, "learning_rate": 2.0112693607297614e-05, "loss": 0.0924, "step": 18498 }, { "epoch": 0.40763082075944623, "grad_norm": 0.8777950406074524, "learning_rate": 2.0111687163717642e-05, "loss": 0.0843, "step": 18499 }, { "epoch": 0.4076528560489624, "grad_norm": 1.1276432275772095, "learning_rate": 2.011068069410111e-05, "loss": 0.0818, "step": 18500 }, { "epoch": 0.40767489133847856, "grad_norm": 0.7165576219558716, "learning_rate": 2.0109674198453132e-05, "loss": 0.077, "step": 18501 }, { "epoch": 0.4076969266279947, "grad_norm": 1.2918380498886108, "learning_rate": 2.0108667676778853e-05, "loss": 0.0734, "step": 18502 }, { "epoch": 0.4077189619175109, "grad_norm": 0.8054873943328857, "learning_rate": 2.0107661129083383e-05, "loss": 0.083, "step": 18503 }, { "epoch": 0.40774099720702706, "grad_norm": 0.8937056064605713, "learning_rate": 2.010665455537186e-05, "loss": 0.0911, "step": 18504 }, { "epoch": 0.4077630324965432, "grad_norm": 0.6557328701019287, "learning_rate": 2.0105647955649403e-05, "loss": 0.1071, "step": 18505 }, { "epoch": 0.4077850677860594, "grad_norm": 0.608206033706665, "learning_rate": 2.010464132992115e-05, "loss": 0.058, "step": 18506 }, { "epoch": 0.40780710307557555, "grad_norm": 0.5159275531768799, "learning_rate": 2.0103634678192215e-05, "loss": 0.0702, "step": 18507 }, { "epoch": 0.4078291383650917, "grad_norm": 0.7020046710968018, "learning_rate": 2.0102628000467735e-05, "loss": 0.0849, "step": 18508 }, { "epoch": 0.4078511736546079, "grad_norm": 0.9063080549240112, "learning_rate": 2.010162129675283e-05, "loss": 0.1212, "step": 18509 }, { "epoch": 0.407873208944124, "grad_norm": 1.0281449556350708, "learning_rate": 2.0100614567052637e-05, "loss": 0.1037, "step": 18510 }, { "epoch": 0.40789524423364015, "grad_norm": 0.6748901605606079, "learning_rate": 2.0099607811372276e-05, "loss": 0.0599, "step": 18511 }, { "epoch": 0.4079172795231563, "grad_norm": 0.5344855785369873, "learning_rate": 2.009860102971688e-05, "loss": 0.0828, "step": 18512 }, { "epoch": 0.4079393148126725, "grad_norm": 1.004786729812622, "learning_rate": 2.009759422209157e-05, "loss": 0.0811, "step": 18513 }, { "epoch": 0.40796135010218865, "grad_norm": 0.6457801461219788, "learning_rate": 2.0096587388501485e-05, "loss": 0.0934, "step": 18514 }, { "epoch": 0.4079833853917048, "grad_norm": 0.9071857929229736, "learning_rate": 2.009558052895175e-05, "loss": 0.0724, "step": 18515 }, { "epoch": 0.408005420681221, "grad_norm": 0.8819318413734436, "learning_rate": 2.0094573643447482e-05, "loss": 0.1146, "step": 18516 }, { "epoch": 0.40802745597073714, "grad_norm": 0.9170827269554138, "learning_rate": 2.0093566731993822e-05, "loss": 0.0889, "step": 18517 }, { "epoch": 0.4080494912602533, "grad_norm": 0.6023722290992737, "learning_rate": 2.0092559794595898e-05, "loss": 0.0728, "step": 18518 }, { "epoch": 0.40807152654976947, "grad_norm": 0.7264904975891113, "learning_rate": 2.009155283125883e-05, "loss": 0.0876, "step": 18519 }, { "epoch": 0.40809356183928563, "grad_norm": 0.6003546118736267, "learning_rate": 2.0090545841987758e-05, "loss": 0.1126, "step": 18520 }, { "epoch": 0.4081155971288018, "grad_norm": 0.8313159346580505, "learning_rate": 2.0089538826787808e-05, "loss": 0.122, "step": 18521 }, { "epoch": 0.40813763241831796, "grad_norm": 0.7043697834014893, "learning_rate": 2.00885317856641e-05, "loss": 0.0746, "step": 18522 }, { "epoch": 0.4081596677078341, "grad_norm": 0.6618818044662476, "learning_rate": 2.0087524718621783e-05, "loss": 0.0494, "step": 18523 }, { "epoch": 0.40818170299735024, "grad_norm": 0.4786447584629059, "learning_rate": 2.0086517625665963e-05, "loss": 0.0678, "step": 18524 }, { "epoch": 0.4082037382868664, "grad_norm": 1.0942533016204834, "learning_rate": 2.0085510506801784e-05, "loss": 0.0757, "step": 18525 }, { "epoch": 0.40822577357638257, "grad_norm": 0.874180018901825, "learning_rate": 2.0084503362034376e-05, "loss": 0.1025, "step": 18526 }, { "epoch": 0.40824780886589873, "grad_norm": 0.36584052443504333, "learning_rate": 2.0083496191368867e-05, "loss": 0.0861, "step": 18527 }, { "epoch": 0.4082698441554149, "grad_norm": 0.6093683242797852, "learning_rate": 2.0082488994810378e-05, "loss": 0.0669, "step": 18528 }, { "epoch": 0.40829187944493106, "grad_norm": 0.935997724533081, "learning_rate": 2.0081481772364058e-05, "loss": 0.0899, "step": 18529 }, { "epoch": 0.4083139147344472, "grad_norm": 0.8193721771240234, "learning_rate": 2.0080474524035016e-05, "loss": 0.0874, "step": 18530 }, { "epoch": 0.4083359500239634, "grad_norm": 0.5018566846847534, "learning_rate": 2.00794672498284e-05, "loss": 0.0652, "step": 18531 }, { "epoch": 0.40835798531347955, "grad_norm": 0.8094122409820557, "learning_rate": 2.007845994974933e-05, "loss": 0.078, "step": 18532 }, { "epoch": 0.4083800206029957, "grad_norm": 0.9290745258331299, "learning_rate": 2.007745262380294e-05, "loss": 0.1154, "step": 18533 }, { "epoch": 0.4084020558925119, "grad_norm": 0.6147425770759583, "learning_rate": 2.0076445271994364e-05, "loss": 0.0741, "step": 18534 }, { "epoch": 0.408424091182028, "grad_norm": 0.2886723279953003, "learning_rate": 2.0075437894328728e-05, "loss": 0.0773, "step": 18535 }, { "epoch": 0.40844612647154416, "grad_norm": 0.6176381707191467, "learning_rate": 2.0074430490811162e-05, "loss": 0.073, "step": 18536 }, { "epoch": 0.4084681617610603, "grad_norm": 0.4811510145664215, "learning_rate": 2.0073423061446805e-05, "loss": 0.0867, "step": 18537 }, { "epoch": 0.4084901970505765, "grad_norm": 0.504109799861908, "learning_rate": 2.0072415606240785e-05, "loss": 0.067, "step": 18538 }, { "epoch": 0.40851223234009265, "grad_norm": 0.5790340900421143, "learning_rate": 2.007140812519823e-05, "loss": 0.0905, "step": 18539 }, { "epoch": 0.4085342676296088, "grad_norm": 0.5100900530815125, "learning_rate": 2.0070400618324273e-05, "loss": 0.0966, "step": 18540 }, { "epoch": 0.408556302919125, "grad_norm": 0.5911635756492615, "learning_rate": 2.006939308562405e-05, "loss": 0.1055, "step": 18541 }, { "epoch": 0.40857833820864115, "grad_norm": 0.7703285813331604, "learning_rate": 2.0068385527102686e-05, "loss": 0.0904, "step": 18542 }, { "epoch": 0.4086003734981573, "grad_norm": 0.7457807660102844, "learning_rate": 2.0067377942765313e-05, "loss": 0.079, "step": 18543 }, { "epoch": 0.4086224087876735, "grad_norm": 0.750616192817688, "learning_rate": 2.0066370332617075e-05, "loss": 0.0923, "step": 18544 }, { "epoch": 0.40864444407718964, "grad_norm": 0.7470536828041077, "learning_rate": 2.006536269666309e-05, "loss": 0.0851, "step": 18545 }, { "epoch": 0.4086664793667058, "grad_norm": 0.5298578143119812, "learning_rate": 2.0064355034908502e-05, "loss": 0.0915, "step": 18546 }, { "epoch": 0.4086885146562219, "grad_norm": 0.8397691249847412, "learning_rate": 2.006334734735843e-05, "loss": 0.1125, "step": 18547 }, { "epoch": 0.4087105499457381, "grad_norm": 0.6533049941062927, "learning_rate": 2.0062339634018025e-05, "loss": 0.0638, "step": 18548 }, { "epoch": 0.40873258523525424, "grad_norm": 0.49698376655578613, "learning_rate": 2.00613318948924e-05, "loss": 0.0605, "step": 18549 }, { "epoch": 0.4087546205247704, "grad_norm": 0.810387372970581, "learning_rate": 2.0060324129986707e-05, "loss": 0.1056, "step": 18550 }, { "epoch": 0.40877665581428657, "grad_norm": 0.6754451394081116, "learning_rate": 2.005931633930606e-05, "loss": 0.081, "step": 18551 }, { "epoch": 0.40879869110380274, "grad_norm": 0.8256098031997681, "learning_rate": 2.005830852285561e-05, "loss": 0.0909, "step": 18552 }, { "epoch": 0.4088207263933189, "grad_norm": 0.8986025452613831, "learning_rate": 2.0057300680640476e-05, "loss": 0.1038, "step": 18553 }, { "epoch": 0.40884276168283507, "grad_norm": 0.6335261464118958, "learning_rate": 2.00562928126658e-05, "loss": 0.0726, "step": 18554 }, { "epoch": 0.40886479697235123, "grad_norm": 0.756942868232727, "learning_rate": 2.0055284918936717e-05, "loss": 0.0946, "step": 18555 }, { "epoch": 0.4088868322618674, "grad_norm": 0.6539678573608398, "learning_rate": 2.0054276999458355e-05, "loss": 0.0755, "step": 18556 }, { "epoch": 0.40890886755138356, "grad_norm": 0.8742861151695251, "learning_rate": 2.0053269054235847e-05, "loss": 0.0957, "step": 18557 }, { "epoch": 0.4089309028408997, "grad_norm": 0.7049883604049683, "learning_rate": 2.0052261083274334e-05, "loss": 0.0746, "step": 18558 }, { "epoch": 0.4089529381304159, "grad_norm": 0.2945817708969116, "learning_rate": 2.005125308657894e-05, "loss": 0.055, "step": 18559 }, { "epoch": 0.408974973419932, "grad_norm": 0.5229482650756836, "learning_rate": 2.005024506415481e-05, "loss": 0.0769, "step": 18560 }, { "epoch": 0.40899700870944816, "grad_norm": 0.5425983667373657, "learning_rate": 2.0049237016007072e-05, "loss": 0.083, "step": 18561 }, { "epoch": 0.4090190439989643, "grad_norm": 0.702573835849762, "learning_rate": 2.004822894214086e-05, "loss": 0.0755, "step": 18562 }, { "epoch": 0.4090410792884805, "grad_norm": 0.7077562808990479, "learning_rate": 2.004722084256132e-05, "loss": 0.1009, "step": 18563 }, { "epoch": 0.40906311457799666, "grad_norm": 0.49558088183403015, "learning_rate": 2.004621271727357e-05, "loss": 0.0564, "step": 18564 }, { "epoch": 0.4090851498675128, "grad_norm": 0.5554923415184021, "learning_rate": 2.0045204566282755e-05, "loss": 0.0619, "step": 18565 }, { "epoch": 0.409107185157029, "grad_norm": 0.6160633563995361, "learning_rate": 2.0044196389594006e-05, "loss": 0.0786, "step": 18566 }, { "epoch": 0.40912922044654515, "grad_norm": 0.6464425325393677, "learning_rate": 2.004318818721246e-05, "loss": 0.0992, "step": 18567 }, { "epoch": 0.4091512557360613, "grad_norm": 0.535196840763092, "learning_rate": 2.0042179959143252e-05, "loss": 0.0693, "step": 18568 }, { "epoch": 0.4091732910255775, "grad_norm": 0.7330126166343689, "learning_rate": 2.004117170539152e-05, "loss": 0.0719, "step": 18569 }, { "epoch": 0.40919532631509364, "grad_norm": 0.8335781693458557, "learning_rate": 2.0040163425962397e-05, "loss": 0.0867, "step": 18570 }, { "epoch": 0.4092173616046098, "grad_norm": 0.9202608466148376, "learning_rate": 2.0039155120861017e-05, "loss": 0.1382, "step": 18571 }, { "epoch": 0.4092393968941259, "grad_norm": 0.7186369895935059, "learning_rate": 2.003814679009252e-05, "loss": 0.1115, "step": 18572 }, { "epoch": 0.4092614321836421, "grad_norm": 1.089564323425293, "learning_rate": 2.003713843366204e-05, "loss": 0.0951, "step": 18573 }, { "epoch": 0.40928346747315825, "grad_norm": 0.9360558986663818, "learning_rate": 2.003613005157471e-05, "loss": 0.083, "step": 18574 }, { "epoch": 0.4093055027626744, "grad_norm": 0.5466418862342834, "learning_rate": 2.003512164383567e-05, "loss": 0.0674, "step": 18575 }, { "epoch": 0.4093275380521906, "grad_norm": 0.9865919351577759, "learning_rate": 2.0034113210450053e-05, "loss": 0.104, "step": 18576 }, { "epoch": 0.40934957334170674, "grad_norm": 0.7610729932785034, "learning_rate": 2.0033104751423003e-05, "loss": 0.0767, "step": 18577 }, { "epoch": 0.4093716086312229, "grad_norm": 0.8393807411193848, "learning_rate": 2.0032096266759647e-05, "loss": 0.0831, "step": 18578 }, { "epoch": 0.40939364392073907, "grad_norm": 1.1178213357925415, "learning_rate": 2.003108775646513e-05, "loss": 0.0882, "step": 18579 }, { "epoch": 0.40941567921025523, "grad_norm": 0.7281147837638855, "learning_rate": 2.0030079220544583e-05, "loss": 0.0776, "step": 18580 }, { "epoch": 0.4094377144997714, "grad_norm": 0.8821378350257874, "learning_rate": 2.0029070659003148e-05, "loss": 0.0687, "step": 18581 }, { "epoch": 0.40945974978928756, "grad_norm": 0.5019983053207397, "learning_rate": 2.0028062071845956e-05, "loss": 0.0762, "step": 18582 }, { "epoch": 0.40948178507880373, "grad_norm": 0.7941427826881409, "learning_rate": 2.0027053459078147e-05, "loss": 0.1494, "step": 18583 }, { "epoch": 0.40950382036831984, "grad_norm": 1.0822309255599976, "learning_rate": 2.002604482070486e-05, "loss": 0.0668, "step": 18584 }, { "epoch": 0.409525855657836, "grad_norm": 0.5661423206329346, "learning_rate": 2.0025036156731233e-05, "loss": 0.0842, "step": 18585 }, { "epoch": 0.40954789094735217, "grad_norm": 0.6395455002784729, "learning_rate": 2.0024027467162398e-05, "loss": 0.0777, "step": 18586 }, { "epoch": 0.40956992623686833, "grad_norm": 0.5036717057228088, "learning_rate": 2.0023018752003505e-05, "loss": 0.0846, "step": 18587 }, { "epoch": 0.4095919615263845, "grad_norm": 0.8011009693145752, "learning_rate": 2.002201001125968e-05, "loss": 0.1067, "step": 18588 }, { "epoch": 0.40961399681590066, "grad_norm": 0.6792635917663574, "learning_rate": 2.0021001244936063e-05, "loss": 0.0801, "step": 18589 }, { "epoch": 0.4096360321054168, "grad_norm": 0.5489262938499451, "learning_rate": 2.0019992453037794e-05, "loss": 0.1188, "step": 18590 }, { "epoch": 0.409658067394933, "grad_norm": 0.5684620141983032, "learning_rate": 2.0018983635570013e-05, "loss": 0.098, "step": 18591 }, { "epoch": 0.40968010268444915, "grad_norm": 0.5992302298545837, "learning_rate": 2.0017974792537855e-05, "loss": 0.0753, "step": 18592 }, { "epoch": 0.4097021379739653, "grad_norm": 0.56497722864151, "learning_rate": 2.0016965923946462e-05, "loss": 0.0736, "step": 18593 }, { "epoch": 0.4097241732634815, "grad_norm": 0.8328118324279785, "learning_rate": 2.0015957029800976e-05, "loss": 0.0972, "step": 18594 }, { "epoch": 0.40974620855299765, "grad_norm": 0.4016176164150238, "learning_rate": 2.0014948110106526e-05, "loss": 0.1036, "step": 18595 }, { "epoch": 0.4097682438425138, "grad_norm": 0.8390660285949707, "learning_rate": 2.001393916486826e-05, "loss": 0.0944, "step": 18596 }, { "epoch": 0.4097902791320299, "grad_norm": 0.6751203536987305, "learning_rate": 2.0012930194091306e-05, "loss": 0.1145, "step": 18597 }, { "epoch": 0.4098123144215461, "grad_norm": 0.5630851984024048, "learning_rate": 2.001192119778082e-05, "loss": 0.0891, "step": 18598 }, { "epoch": 0.40983434971106225, "grad_norm": 0.5737805962562561, "learning_rate": 2.001091217594192e-05, "loss": 0.0962, "step": 18599 }, { "epoch": 0.4098563850005784, "grad_norm": 0.9916462898254395, "learning_rate": 2.0009903128579765e-05, "loss": 0.0929, "step": 18600 }, { "epoch": 0.4098784202900946, "grad_norm": 0.4479377269744873, "learning_rate": 2.000889405569948e-05, "loss": 0.0634, "step": 18601 }, { "epoch": 0.40990045557961075, "grad_norm": 0.7917107939720154, "learning_rate": 2.0007884957306223e-05, "loss": 0.0808, "step": 18602 }, { "epoch": 0.4099224908691269, "grad_norm": 0.671769917011261, "learning_rate": 2.0006875833405117e-05, "loss": 0.1136, "step": 18603 }, { "epoch": 0.4099445261586431, "grad_norm": 0.5689795613288879, "learning_rate": 2.000586668400131e-05, "loss": 0.0679, "step": 18604 }, { "epoch": 0.40996656144815924, "grad_norm": 0.6824914813041687, "learning_rate": 2.000485750909993e-05, "loss": 0.0446, "step": 18605 }, { "epoch": 0.4099885967376754, "grad_norm": 0.7120692729949951, "learning_rate": 2.0003848308706136e-05, "loss": 0.0842, "step": 18606 }, { "epoch": 0.41001063202719157, "grad_norm": 0.5139671564102173, "learning_rate": 2.0002839082825058e-05, "loss": 0.1004, "step": 18607 }, { "epoch": 0.41003266731670773, "grad_norm": 0.5854390263557434, "learning_rate": 2.0001829831461834e-05, "loss": 0.068, "step": 18608 }, { "epoch": 0.41005470260622384, "grad_norm": 0.4570560157299042, "learning_rate": 2.000082055462161e-05, "loss": 0.0741, "step": 18609 }, { "epoch": 0.41007673789574, "grad_norm": 0.7366166710853577, "learning_rate": 1.999981125230953e-05, "loss": 0.0673, "step": 18610 }, { "epoch": 0.41009877318525617, "grad_norm": 0.5953187346458435, "learning_rate": 1.9998801924530728e-05, "loss": 0.1007, "step": 18611 }, { "epoch": 0.41012080847477234, "grad_norm": 0.6320078372955322, "learning_rate": 1.999779257129034e-05, "loss": 0.0889, "step": 18612 }, { "epoch": 0.4101428437642885, "grad_norm": 0.7488765716552734, "learning_rate": 1.999678319259352e-05, "loss": 0.075, "step": 18613 }, { "epoch": 0.41016487905380467, "grad_norm": 0.6364790201187134, "learning_rate": 1.9995773788445405e-05, "loss": 0.1315, "step": 18614 }, { "epoch": 0.41018691434332083, "grad_norm": 0.5634326338768005, "learning_rate": 1.9994764358851134e-05, "loss": 0.0745, "step": 18615 }, { "epoch": 0.410208949632837, "grad_norm": 0.6575042009353638, "learning_rate": 1.999375490381585e-05, "loss": 0.111, "step": 18616 }, { "epoch": 0.41023098492235316, "grad_norm": 1.4477263689041138, "learning_rate": 1.9992745423344694e-05, "loss": 0.1092, "step": 18617 }, { "epoch": 0.4102530202118693, "grad_norm": 0.742972731590271, "learning_rate": 1.9991735917442808e-05, "loss": 0.0577, "step": 18618 }, { "epoch": 0.4102750555013855, "grad_norm": 0.47150176763534546, "learning_rate": 1.9990726386115333e-05, "loss": 0.0416, "step": 18619 }, { "epoch": 0.41029709079090165, "grad_norm": 0.7178549766540527, "learning_rate": 1.9989716829367414e-05, "loss": 0.1109, "step": 18620 }, { "epoch": 0.41031912608041776, "grad_norm": 0.9019567370414734, "learning_rate": 1.9988707247204194e-05, "loss": 0.1114, "step": 18621 }, { "epoch": 0.4103411613699339, "grad_norm": 0.7975254654884338, "learning_rate": 1.9987697639630808e-05, "loss": 0.0697, "step": 18622 }, { "epoch": 0.4103631966594501, "grad_norm": 0.6398864388465881, "learning_rate": 1.9986688006652407e-05, "loss": 0.084, "step": 18623 }, { "epoch": 0.41038523194896626, "grad_norm": 0.5531312823295593, "learning_rate": 1.9985678348274124e-05, "loss": 0.0878, "step": 18624 }, { "epoch": 0.4104072672384824, "grad_norm": 0.6053537726402283, "learning_rate": 1.9984668664501112e-05, "loss": 0.0903, "step": 18625 }, { "epoch": 0.4104293025279986, "grad_norm": 1.174507737159729, "learning_rate": 1.9983658955338512e-05, "loss": 0.0975, "step": 18626 }, { "epoch": 0.41045133781751475, "grad_norm": 0.8432832956314087, "learning_rate": 1.9982649220791465e-05, "loss": 0.0877, "step": 18627 }, { "epoch": 0.4104733731070309, "grad_norm": 0.7501423358917236, "learning_rate": 1.998163946086511e-05, "loss": 0.0747, "step": 18628 }, { "epoch": 0.4104954083965471, "grad_norm": 1.2403078079223633, "learning_rate": 1.9980629675564594e-05, "loss": 0.108, "step": 18629 }, { "epoch": 0.41051744368606324, "grad_norm": 0.6277379989624023, "learning_rate": 1.997961986489506e-05, "loss": 0.0899, "step": 18630 }, { "epoch": 0.4105394789755794, "grad_norm": 0.8169013857841492, "learning_rate": 1.9978610028861648e-05, "loss": 0.0635, "step": 18631 }, { "epoch": 0.4105615142650956, "grad_norm": 0.6929333806037903, "learning_rate": 1.997760016746951e-05, "loss": 0.0877, "step": 18632 }, { "epoch": 0.41058354955461174, "grad_norm": 0.6248698830604553, "learning_rate": 1.9976590280723785e-05, "loss": 0.0747, "step": 18633 }, { "epoch": 0.41060558484412785, "grad_norm": 0.546805202960968, "learning_rate": 1.9975580368629617e-05, "loss": 0.0762, "step": 18634 }, { "epoch": 0.410627620133644, "grad_norm": 0.5526686906814575, "learning_rate": 1.9974570431192152e-05, "loss": 0.1086, "step": 18635 }, { "epoch": 0.4106496554231602, "grad_norm": 0.8167657256126404, "learning_rate": 1.997356046841653e-05, "loss": 0.1055, "step": 18636 }, { "epoch": 0.41067169071267634, "grad_norm": 0.5626472234725952, "learning_rate": 1.9972550480307897e-05, "loss": 0.0594, "step": 18637 }, { "epoch": 0.4106937260021925, "grad_norm": 0.6855483651161194, "learning_rate": 1.99715404668714e-05, "loss": 0.1103, "step": 18638 }, { "epoch": 0.41071576129170867, "grad_norm": 0.846245527267456, "learning_rate": 1.997053042811218e-05, "loss": 0.0655, "step": 18639 }, { "epoch": 0.41073779658122483, "grad_norm": 1.0895805358886719, "learning_rate": 1.996952036403538e-05, "loss": 0.0834, "step": 18640 }, { "epoch": 0.410759831870741, "grad_norm": 0.38554835319519043, "learning_rate": 1.996851027464615e-05, "loss": 0.0514, "step": 18641 }, { "epoch": 0.41078186716025716, "grad_norm": 0.40616080164909363, "learning_rate": 1.9967500159949638e-05, "loss": 0.0711, "step": 18642 }, { "epoch": 0.41080390244977333, "grad_norm": 0.7483254075050354, "learning_rate": 1.996649001995098e-05, "loss": 0.1034, "step": 18643 }, { "epoch": 0.4108259377392895, "grad_norm": 0.7430527210235596, "learning_rate": 1.9965479854655325e-05, "loss": 0.0941, "step": 18644 }, { "epoch": 0.41084797302880566, "grad_norm": 0.6540340185165405, "learning_rate": 1.9964469664067817e-05, "loss": 0.0794, "step": 18645 }, { "epoch": 0.41087000831832177, "grad_norm": 0.33125072717666626, "learning_rate": 1.9963459448193604e-05, "loss": 0.0592, "step": 18646 }, { "epoch": 0.41089204360783793, "grad_norm": 0.6769810914993286, "learning_rate": 1.996244920703783e-05, "loss": 0.0823, "step": 18647 }, { "epoch": 0.4109140788973541, "grad_norm": 0.5329456329345703, "learning_rate": 1.996143894060564e-05, "loss": 0.0852, "step": 18648 }, { "epoch": 0.41093611418687026, "grad_norm": 0.5847759246826172, "learning_rate": 1.9960428648902183e-05, "loss": 0.0772, "step": 18649 }, { "epoch": 0.4109581494763864, "grad_norm": 0.9346272349357605, "learning_rate": 1.99594183319326e-05, "loss": 0.111, "step": 18650 }, { "epoch": 0.4109801847659026, "grad_norm": 0.540050745010376, "learning_rate": 1.9958407989702048e-05, "loss": 0.0883, "step": 18651 }, { "epoch": 0.41100222005541875, "grad_norm": 0.5589573979377747, "learning_rate": 1.995739762221566e-05, "loss": 0.0784, "step": 18652 }, { "epoch": 0.4110242553449349, "grad_norm": 0.6595600843429565, "learning_rate": 1.9956387229478585e-05, "loss": 0.082, "step": 18653 }, { "epoch": 0.4110462906344511, "grad_norm": 0.7269915342330933, "learning_rate": 1.9955376811495974e-05, "loss": 0.1076, "step": 18654 }, { "epoch": 0.41106832592396725, "grad_norm": 0.5683762431144714, "learning_rate": 1.995436636827297e-05, "loss": 0.0713, "step": 18655 }, { "epoch": 0.4110903612134834, "grad_norm": 0.41302284598350525, "learning_rate": 1.9953355899814723e-05, "loss": 0.0754, "step": 18656 }, { "epoch": 0.4111123965029996, "grad_norm": 0.910103440284729, "learning_rate": 1.9952345406126377e-05, "loss": 0.0815, "step": 18657 }, { "epoch": 0.4111344317925157, "grad_norm": 0.7306451797485352, "learning_rate": 1.9951334887213086e-05, "loss": 0.087, "step": 18658 }, { "epoch": 0.41115646708203185, "grad_norm": 0.9037591814994812, "learning_rate": 1.995032434307999e-05, "loss": 0.0911, "step": 18659 }, { "epoch": 0.411178502371548, "grad_norm": 0.6990617513656616, "learning_rate": 1.994931377373223e-05, "loss": 0.0728, "step": 18660 }, { "epoch": 0.4112005376610642, "grad_norm": 1.0885224342346191, "learning_rate": 1.9948303179174968e-05, "loss": 0.0682, "step": 18661 }, { "epoch": 0.41122257295058035, "grad_norm": 0.9080096483230591, "learning_rate": 1.9947292559413338e-05, "loss": 0.0955, "step": 18662 }, { "epoch": 0.4112446082400965, "grad_norm": 0.7697116732597351, "learning_rate": 1.9946281914452498e-05, "loss": 0.0645, "step": 18663 }, { "epoch": 0.4112666435296127, "grad_norm": 0.5643327832221985, "learning_rate": 1.994527124429759e-05, "loss": 0.0951, "step": 18664 }, { "epoch": 0.41128867881912884, "grad_norm": 0.6359620690345764, "learning_rate": 1.9944260548953768e-05, "loss": 0.0867, "step": 18665 }, { "epoch": 0.411310714108645, "grad_norm": 0.5896664261817932, "learning_rate": 1.9943249828426174e-05, "loss": 0.1074, "step": 18666 }, { "epoch": 0.41133274939816117, "grad_norm": 0.5334587097167969, "learning_rate": 1.9942239082719957e-05, "loss": 0.0505, "step": 18667 }, { "epoch": 0.41135478468767733, "grad_norm": 0.600823163986206, "learning_rate": 1.9941228311840266e-05, "loss": 0.0985, "step": 18668 }, { "epoch": 0.4113768199771935, "grad_norm": 0.5242127776145935, "learning_rate": 1.9940217515792252e-05, "loss": 0.0453, "step": 18669 }, { "epoch": 0.41139885526670966, "grad_norm": 0.7174397706985474, "learning_rate": 1.9939206694581054e-05, "loss": 0.0977, "step": 18670 }, { "epoch": 0.41142089055622577, "grad_norm": 0.34464067220687866, "learning_rate": 1.9938195848211835e-05, "loss": 0.0539, "step": 18671 }, { "epoch": 0.41144292584574194, "grad_norm": 0.6517727375030518, "learning_rate": 1.993718497668973e-05, "loss": 0.1095, "step": 18672 }, { "epoch": 0.4114649611352581, "grad_norm": 0.5321625471115112, "learning_rate": 1.99361740800199e-05, "loss": 0.0873, "step": 18673 }, { "epoch": 0.41148699642477427, "grad_norm": 0.5829885005950928, "learning_rate": 1.9935163158207486e-05, "loss": 0.0874, "step": 18674 }, { "epoch": 0.41150903171429043, "grad_norm": 0.5642194747924805, "learning_rate": 1.9934152211257642e-05, "loss": 0.0607, "step": 18675 }, { "epoch": 0.4115310670038066, "grad_norm": 0.8297379016876221, "learning_rate": 1.993314123917551e-05, "loss": 0.0973, "step": 18676 }, { "epoch": 0.41155310229332276, "grad_norm": 0.892122745513916, "learning_rate": 1.9932130241966248e-05, "loss": 0.0913, "step": 18677 }, { "epoch": 0.4115751375828389, "grad_norm": 0.6200963258743286, "learning_rate": 1.9931119219635e-05, "loss": 0.0891, "step": 18678 }, { "epoch": 0.4115971728723551, "grad_norm": 0.5695409774780273, "learning_rate": 1.9930108172186917e-05, "loss": 0.0719, "step": 18679 }, { "epoch": 0.41161920816187125, "grad_norm": 0.8467015624046326, "learning_rate": 1.9929097099627152e-05, "loss": 0.0975, "step": 18680 }, { "epoch": 0.4116412434513874, "grad_norm": 0.7135372757911682, "learning_rate": 1.9928086001960852e-05, "loss": 0.0877, "step": 18681 }, { "epoch": 0.4116632787409036, "grad_norm": 1.0276938676834106, "learning_rate": 1.992707487919317e-05, "loss": 0.0682, "step": 18682 }, { "epoch": 0.4116853140304197, "grad_norm": 0.5035634636878967, "learning_rate": 1.9926063731329247e-05, "loss": 0.056, "step": 18683 }, { "epoch": 0.41170734931993586, "grad_norm": 0.9307029843330383, "learning_rate": 1.9925052558374245e-05, "loss": 0.0773, "step": 18684 }, { "epoch": 0.411729384609452, "grad_norm": 0.8750956654548645, "learning_rate": 1.9924041360333304e-05, "loss": 0.107, "step": 18685 }, { "epoch": 0.4117514198989682, "grad_norm": 0.767214298248291, "learning_rate": 1.9923030137211583e-05, "loss": 0.056, "step": 18686 }, { "epoch": 0.41177345518848435, "grad_norm": 0.4608094096183777, "learning_rate": 1.992201888901423e-05, "loss": 0.0774, "step": 18687 }, { "epoch": 0.4117954904780005, "grad_norm": 0.7296714186668396, "learning_rate": 1.9921007615746397e-05, "loss": 0.0981, "step": 18688 }, { "epoch": 0.4118175257675167, "grad_norm": 0.6894634962081909, "learning_rate": 1.991999631741323e-05, "loss": 0.0798, "step": 18689 }, { "epoch": 0.41183956105703284, "grad_norm": 0.9581598043441772, "learning_rate": 1.9918984994019886e-05, "loss": 0.0854, "step": 18690 }, { "epoch": 0.411861596346549, "grad_norm": 0.8546404838562012, "learning_rate": 1.991797364557151e-05, "loss": 0.0655, "step": 18691 }, { "epoch": 0.4118836316360652, "grad_norm": 0.8567067980766296, "learning_rate": 1.9916962272073266e-05, "loss": 0.0909, "step": 18692 }, { "epoch": 0.41190566692558134, "grad_norm": 0.797490656375885, "learning_rate": 1.9915950873530286e-05, "loss": 0.0811, "step": 18693 }, { "epoch": 0.4119277022150975, "grad_norm": 0.727347195148468, "learning_rate": 1.991493944994774e-05, "loss": 0.073, "step": 18694 }, { "epoch": 0.4119497375046136, "grad_norm": 0.7213855981826782, "learning_rate": 1.9913928001330766e-05, "loss": 0.0992, "step": 18695 }, { "epoch": 0.4119717727941298, "grad_norm": 0.7633474469184875, "learning_rate": 1.9912916527684522e-05, "loss": 0.101, "step": 18696 }, { "epoch": 0.41199380808364594, "grad_norm": 0.6364170908927917, "learning_rate": 1.991190502901416e-05, "loss": 0.0976, "step": 18697 }, { "epoch": 0.4120158433731621, "grad_norm": 0.7505594491958618, "learning_rate": 1.9910893505324834e-05, "loss": 0.1164, "step": 18698 }, { "epoch": 0.41203787866267827, "grad_norm": 0.6266939043998718, "learning_rate": 1.9909881956621692e-05, "loss": 0.0704, "step": 18699 }, { "epoch": 0.41205991395219443, "grad_norm": 0.8182982802391052, "learning_rate": 1.990887038290989e-05, "loss": 0.1, "step": 18700 }, { "epoch": 0.4120819492417106, "grad_norm": 0.8585044741630554, "learning_rate": 1.9907858784194576e-05, "loss": 0.1081, "step": 18701 }, { "epoch": 0.41210398453122676, "grad_norm": 0.7272797226905823, "learning_rate": 1.9906847160480908e-05, "loss": 0.0698, "step": 18702 }, { "epoch": 0.41212601982074293, "grad_norm": 0.7500369548797607, "learning_rate": 1.9905835511774038e-05, "loss": 0.0587, "step": 18703 }, { "epoch": 0.4121480551102591, "grad_norm": 0.7601677179336548, "learning_rate": 1.9904823838079114e-05, "loss": 0.117, "step": 18704 }, { "epoch": 0.41217009039977526, "grad_norm": 0.8284595012664795, "learning_rate": 1.990381213940129e-05, "loss": 0.0789, "step": 18705 }, { "epoch": 0.4121921256892914, "grad_norm": 0.6230422258377075, "learning_rate": 1.9902800415745728e-05, "loss": 0.0632, "step": 18706 }, { "epoch": 0.4122141609788076, "grad_norm": 0.44151201844215393, "learning_rate": 1.990178866711757e-05, "loss": 0.0963, "step": 18707 }, { "epoch": 0.4122361962683237, "grad_norm": 0.9453781843185425, "learning_rate": 1.990077689352197e-05, "loss": 0.0818, "step": 18708 }, { "epoch": 0.41225823155783986, "grad_norm": 0.7671673893928528, "learning_rate": 1.9899765094964092e-05, "loss": 0.0893, "step": 18709 }, { "epoch": 0.412280266847356, "grad_norm": 1.151113748550415, "learning_rate": 1.9898753271449077e-05, "loss": 0.1179, "step": 18710 }, { "epoch": 0.4123023021368722, "grad_norm": 1.0662575960159302, "learning_rate": 1.989774142298209e-05, "loss": 0.0835, "step": 18711 }, { "epoch": 0.41232433742638835, "grad_norm": 0.6480686068534851, "learning_rate": 1.9896729549568276e-05, "loss": 0.1061, "step": 18712 }, { "epoch": 0.4123463727159045, "grad_norm": 0.5200278162956238, "learning_rate": 1.9895717651212794e-05, "loss": 0.09, "step": 18713 }, { "epoch": 0.4123684080054207, "grad_norm": 1.1538804769515991, "learning_rate": 1.9894705727920797e-05, "loss": 0.052, "step": 18714 }, { "epoch": 0.41239044329493685, "grad_norm": 0.5005618929862976, "learning_rate": 1.989369377969744e-05, "loss": 0.0588, "step": 18715 }, { "epoch": 0.412412478584453, "grad_norm": 0.6384637355804443, "learning_rate": 1.9892681806547874e-05, "loss": 0.0919, "step": 18716 }, { "epoch": 0.4124345138739692, "grad_norm": 0.924461841583252, "learning_rate": 1.9891669808477258e-05, "loss": 0.0778, "step": 18717 }, { "epoch": 0.41245654916348534, "grad_norm": 0.7752528190612793, "learning_rate": 1.989065778549074e-05, "loss": 0.0839, "step": 18718 }, { "epoch": 0.4124785844530015, "grad_norm": 0.6509976983070374, "learning_rate": 1.9889645737593485e-05, "loss": 0.0715, "step": 18719 }, { "epoch": 0.4125006197425176, "grad_norm": 0.6813528537750244, "learning_rate": 1.988863366479064e-05, "loss": 0.1016, "step": 18720 }, { "epoch": 0.4125226550320338, "grad_norm": 0.7884361743927002, "learning_rate": 1.9887621567087362e-05, "loss": 0.1015, "step": 18721 }, { "epoch": 0.41254469032154995, "grad_norm": 0.6751656532287598, "learning_rate": 1.9886609444488808e-05, "loss": 0.0911, "step": 18722 }, { "epoch": 0.4125667256110661, "grad_norm": 0.47910964488983154, "learning_rate": 1.988559729700013e-05, "loss": 0.0874, "step": 18723 }, { "epoch": 0.4125887609005823, "grad_norm": 0.8332417011260986, "learning_rate": 1.9884585124626483e-05, "loss": 0.0611, "step": 18724 }, { "epoch": 0.41261079619009844, "grad_norm": 0.5301007628440857, "learning_rate": 1.9883572927373027e-05, "loss": 0.0772, "step": 18725 }, { "epoch": 0.4126328314796146, "grad_norm": 0.8014103770256042, "learning_rate": 1.9882560705244916e-05, "loss": 0.0697, "step": 18726 }, { "epoch": 0.41265486676913077, "grad_norm": 0.841636598110199, "learning_rate": 1.9881548458247302e-05, "loss": 0.0806, "step": 18727 }, { "epoch": 0.41267690205864693, "grad_norm": 0.9130160212516785, "learning_rate": 1.9880536186385347e-05, "loss": 0.1184, "step": 18728 }, { "epoch": 0.4126989373481631, "grad_norm": 0.8094120621681213, "learning_rate": 1.9879523889664205e-05, "loss": 0.0933, "step": 18729 }, { "epoch": 0.41272097263767926, "grad_norm": 0.6369256973266602, "learning_rate": 1.987851156808903e-05, "loss": 0.0871, "step": 18730 }, { "epoch": 0.4127430079271954, "grad_norm": 0.6560153365135193, "learning_rate": 1.9877499221664976e-05, "loss": 0.0797, "step": 18731 }, { "epoch": 0.4127650432167116, "grad_norm": 0.5051912665367126, "learning_rate": 1.987648685039721e-05, "loss": 0.0816, "step": 18732 }, { "epoch": 0.4127870785062277, "grad_norm": 0.7805588245391846, "learning_rate": 1.9875474454290877e-05, "loss": 0.1127, "step": 18733 }, { "epoch": 0.41280911379574386, "grad_norm": 0.7586953639984131, "learning_rate": 1.987446203335114e-05, "loss": 0.0956, "step": 18734 }, { "epoch": 0.41283114908526003, "grad_norm": 0.8709399104118347, "learning_rate": 1.987344958758315e-05, "loss": 0.0969, "step": 18735 }, { "epoch": 0.4128531843747762, "grad_norm": 0.9560621976852417, "learning_rate": 1.987243711699207e-05, "loss": 0.1216, "step": 18736 }, { "epoch": 0.41287521966429236, "grad_norm": 0.8700999617576599, "learning_rate": 1.987142462158306e-05, "loss": 0.0994, "step": 18737 }, { "epoch": 0.4128972549538085, "grad_norm": 0.6392633318901062, "learning_rate": 1.9870412101361266e-05, "loss": 0.0952, "step": 18738 }, { "epoch": 0.4129192902433247, "grad_norm": 0.7995656728744507, "learning_rate": 1.986939955633185e-05, "loss": 0.1158, "step": 18739 }, { "epoch": 0.41294132553284085, "grad_norm": 0.506130039691925, "learning_rate": 1.9868386986499974e-05, "loss": 0.082, "step": 18740 }, { "epoch": 0.412963360822357, "grad_norm": 0.723191499710083, "learning_rate": 1.9867374391870792e-05, "loss": 0.0804, "step": 18741 }, { "epoch": 0.4129853961118732, "grad_norm": 0.6818109750747681, "learning_rate": 1.9866361772449464e-05, "loss": 0.0837, "step": 18742 }, { "epoch": 0.41300743140138935, "grad_norm": 0.4555368721485138, "learning_rate": 1.9865349128241142e-05, "loss": 0.094, "step": 18743 }, { "epoch": 0.4130294666909055, "grad_norm": 0.7496731877326965, "learning_rate": 1.9864336459250992e-05, "loss": 0.0845, "step": 18744 }, { "epoch": 0.4130515019804216, "grad_norm": 0.41166314482688904, "learning_rate": 1.9863323765484165e-05, "loss": 0.0506, "step": 18745 }, { "epoch": 0.4130735372699378, "grad_norm": 0.7692071199417114, "learning_rate": 1.9862311046945823e-05, "loss": 0.0898, "step": 18746 }, { "epoch": 0.41309557255945395, "grad_norm": 0.9184482097625732, "learning_rate": 1.9861298303641125e-05, "loss": 0.118, "step": 18747 }, { "epoch": 0.4131176078489701, "grad_norm": 0.7575452327728271, "learning_rate": 1.986028553557523e-05, "loss": 0.0834, "step": 18748 }, { "epoch": 0.4131396431384863, "grad_norm": 0.5868671536445618, "learning_rate": 1.985927274275329e-05, "loss": 0.0817, "step": 18749 }, { "epoch": 0.41316167842800244, "grad_norm": 1.062086820602417, "learning_rate": 1.9858259925180468e-05, "loss": 0.0839, "step": 18750 }, { "epoch": 0.4131837137175186, "grad_norm": 0.7899259924888611, "learning_rate": 1.9857247082861925e-05, "loss": 0.0783, "step": 18751 }, { "epoch": 0.4132057490070348, "grad_norm": 0.6607916951179504, "learning_rate": 1.9856234215802817e-05, "loss": 0.0771, "step": 18752 }, { "epoch": 0.41322778429655094, "grad_norm": 0.5002885460853577, "learning_rate": 1.9855221324008307e-05, "loss": 0.0554, "step": 18753 }, { "epoch": 0.4132498195860671, "grad_norm": 0.5934819579124451, "learning_rate": 1.9854208407483545e-05, "loss": 0.0791, "step": 18754 }, { "epoch": 0.41327185487558327, "grad_norm": 0.7533759474754333, "learning_rate": 1.9853195466233705e-05, "loss": 0.0939, "step": 18755 }, { "epoch": 0.41329389016509943, "grad_norm": 1.5528404712677002, "learning_rate": 1.985218250026393e-05, "loss": 0.094, "step": 18756 }, { "epoch": 0.41331592545461554, "grad_norm": 0.45779407024383545, "learning_rate": 1.985116950957939e-05, "loss": 0.0708, "step": 18757 }, { "epoch": 0.4133379607441317, "grad_norm": 0.6613157391548157, "learning_rate": 1.9850156494185244e-05, "loss": 0.078, "step": 18758 }, { "epoch": 0.41335999603364787, "grad_norm": 0.8594681620597839, "learning_rate": 1.984914345408665e-05, "loss": 0.0786, "step": 18759 }, { "epoch": 0.41338203132316403, "grad_norm": 1.0596166849136353, "learning_rate": 1.9848130389288766e-05, "loss": 0.0766, "step": 18760 }, { "epoch": 0.4134040666126802, "grad_norm": 1.1302354335784912, "learning_rate": 1.9847117299796756e-05, "loss": 0.1503, "step": 18761 }, { "epoch": 0.41342610190219636, "grad_norm": 0.6747064590454102, "learning_rate": 1.9846104185615782e-05, "loss": 0.1144, "step": 18762 }, { "epoch": 0.41344813719171253, "grad_norm": 0.3504595458507538, "learning_rate": 1.9845091046750994e-05, "loss": 0.0763, "step": 18763 }, { "epoch": 0.4134701724812287, "grad_norm": 0.846153199672699, "learning_rate": 1.984407788320756e-05, "loss": 0.0723, "step": 18764 }, { "epoch": 0.41349220777074486, "grad_norm": 1.109676480293274, "learning_rate": 1.984306469499064e-05, "loss": 0.0636, "step": 18765 }, { "epoch": 0.413514243060261, "grad_norm": 0.6091367602348328, "learning_rate": 1.9842051482105397e-05, "loss": 0.0872, "step": 18766 }, { "epoch": 0.4135362783497772, "grad_norm": 0.7595846652984619, "learning_rate": 1.9841038244556988e-05, "loss": 0.0782, "step": 18767 }, { "epoch": 0.41355831363929335, "grad_norm": 0.9836556315422058, "learning_rate": 1.9840024982350576e-05, "loss": 0.0608, "step": 18768 }, { "epoch": 0.4135803489288095, "grad_norm": 0.4991207420825958, "learning_rate": 1.9839011695491324e-05, "loss": 0.0972, "step": 18769 }, { "epoch": 0.4136023842183256, "grad_norm": 2.0120949745178223, "learning_rate": 1.9837998383984385e-05, "loss": 0.071, "step": 18770 }, { "epoch": 0.4136244195078418, "grad_norm": 1.002441644668579, "learning_rate": 1.9836985047834928e-05, "loss": 0.1367, "step": 18771 }, { "epoch": 0.41364645479735795, "grad_norm": 0.7078791856765747, "learning_rate": 1.983597168704811e-05, "loss": 0.0714, "step": 18772 }, { "epoch": 0.4136684900868741, "grad_norm": 0.52421635389328, "learning_rate": 1.9834958301629097e-05, "loss": 0.0751, "step": 18773 }, { "epoch": 0.4136905253763903, "grad_norm": 0.9704180955886841, "learning_rate": 1.9833944891583052e-05, "loss": 0.0534, "step": 18774 }, { "epoch": 0.41371256066590645, "grad_norm": 0.7512491345405579, "learning_rate": 1.9832931456915128e-05, "loss": 0.128, "step": 18775 }, { "epoch": 0.4137345959554226, "grad_norm": 1.2093514204025269, "learning_rate": 1.9831917997630498e-05, "loss": 0.1048, "step": 18776 }, { "epoch": 0.4137566312449388, "grad_norm": 0.8758106827735901, "learning_rate": 1.9830904513734313e-05, "loss": 0.0871, "step": 18777 }, { "epoch": 0.41377866653445494, "grad_norm": 0.5654400587081909, "learning_rate": 1.9829891005231746e-05, "loss": 0.0698, "step": 18778 }, { "epoch": 0.4138007018239711, "grad_norm": 0.6893940567970276, "learning_rate": 1.9828877472127948e-05, "loss": 0.075, "step": 18779 }, { "epoch": 0.41382273711348727, "grad_norm": 0.7089169025421143, "learning_rate": 1.9827863914428095e-05, "loss": 0.0637, "step": 18780 }, { "epoch": 0.41384477240300344, "grad_norm": 0.8585398197174072, "learning_rate": 1.9826850332137334e-05, "loss": 0.0833, "step": 18781 }, { "epoch": 0.41386680769251954, "grad_norm": 0.5712350606918335, "learning_rate": 1.9825836725260842e-05, "loss": 0.0734, "step": 18782 }, { "epoch": 0.4138888429820357, "grad_norm": 0.6472415328025818, "learning_rate": 1.9824823093803773e-05, "loss": 0.0721, "step": 18783 }, { "epoch": 0.4139108782715519, "grad_norm": 0.7640251517295837, "learning_rate": 1.9823809437771296e-05, "loss": 0.0944, "step": 18784 }, { "epoch": 0.41393291356106804, "grad_norm": 0.6253200769424438, "learning_rate": 1.9822795757168568e-05, "loss": 0.0996, "step": 18785 }, { "epoch": 0.4139549488505842, "grad_norm": 0.8643928170204163, "learning_rate": 1.9821782052000757e-05, "loss": 0.0836, "step": 18786 }, { "epoch": 0.41397698414010037, "grad_norm": 0.3314763307571411, "learning_rate": 1.982076832227302e-05, "loss": 0.054, "step": 18787 }, { "epoch": 0.41399901942961653, "grad_norm": 0.9887129068374634, "learning_rate": 1.9819754567990527e-05, "loss": 0.0726, "step": 18788 }, { "epoch": 0.4140210547191327, "grad_norm": 0.5755300521850586, "learning_rate": 1.9818740789158442e-05, "loss": 0.1033, "step": 18789 }, { "epoch": 0.41404309000864886, "grad_norm": 0.9215511679649353, "learning_rate": 1.9817726985781924e-05, "loss": 0.104, "step": 18790 }, { "epoch": 0.414065125298165, "grad_norm": 0.7880574464797974, "learning_rate": 1.9816713157866137e-05, "loss": 0.103, "step": 18791 }, { "epoch": 0.4140871605876812, "grad_norm": 0.4731694757938385, "learning_rate": 1.981569930541625e-05, "loss": 0.0562, "step": 18792 }, { "epoch": 0.41410919587719736, "grad_norm": 0.6667773723602295, "learning_rate": 1.9814685428437428e-05, "loss": 0.1025, "step": 18793 }, { "epoch": 0.41413123116671346, "grad_norm": 0.8572505116462708, "learning_rate": 1.9813671526934823e-05, "loss": 0.0753, "step": 18794 }, { "epoch": 0.41415326645622963, "grad_norm": 0.6781762838363647, "learning_rate": 1.9812657600913613e-05, "loss": 0.0908, "step": 18795 }, { "epoch": 0.4141753017457458, "grad_norm": 0.5099777579307556, "learning_rate": 1.981164365037895e-05, "loss": 0.0679, "step": 18796 }, { "epoch": 0.41419733703526196, "grad_norm": 0.3619520366191864, "learning_rate": 1.9810629675336017e-05, "loss": 0.0504, "step": 18797 }, { "epoch": 0.4142193723247781, "grad_norm": 0.5764018297195435, "learning_rate": 1.980961567578996e-05, "loss": 0.0557, "step": 18798 }, { "epoch": 0.4142414076142943, "grad_norm": 0.6476197838783264, "learning_rate": 1.9808601651745952e-05, "loss": 0.1042, "step": 18799 }, { "epoch": 0.41426344290381045, "grad_norm": 1.1614314317703247, "learning_rate": 1.9807587603209154e-05, "loss": 0.0875, "step": 18800 }, { "epoch": 0.4142854781933266, "grad_norm": 0.5981976985931396, "learning_rate": 1.9806573530184743e-05, "loss": 0.082, "step": 18801 }, { "epoch": 0.4143075134828428, "grad_norm": 0.5535058975219727, "learning_rate": 1.980555943267787e-05, "loss": 0.0504, "step": 18802 }, { "epoch": 0.41432954877235895, "grad_norm": 0.8596422076225281, "learning_rate": 1.9804545310693707e-05, "loss": 0.097, "step": 18803 }, { "epoch": 0.4143515840618751, "grad_norm": 0.5944843292236328, "learning_rate": 1.9803531164237414e-05, "loss": 0.0851, "step": 18804 }, { "epoch": 0.4143736193513913, "grad_norm": 0.8026271462440491, "learning_rate": 1.9802516993314168e-05, "loss": 0.0612, "step": 18805 }, { "epoch": 0.41439565464090744, "grad_norm": 1.0263649225234985, "learning_rate": 1.980150279792912e-05, "loss": 0.103, "step": 18806 }, { "epoch": 0.41441768993042355, "grad_norm": 1.0313584804534912, "learning_rate": 1.9800488578087452e-05, "loss": 0.0808, "step": 18807 }, { "epoch": 0.4144397252199397, "grad_norm": 0.7307754158973694, "learning_rate": 1.9799474333794313e-05, "loss": 0.126, "step": 18808 }, { "epoch": 0.4144617605094559, "grad_norm": 0.7066910862922668, "learning_rate": 1.9798460065054886e-05, "loss": 0.0686, "step": 18809 }, { "epoch": 0.41448379579897204, "grad_norm": 0.7547584176063538, "learning_rate": 1.9797445771874326e-05, "loss": 0.0818, "step": 18810 }, { "epoch": 0.4145058310884882, "grad_norm": 0.5631446242332458, "learning_rate": 1.9796431454257797e-05, "loss": 0.0763, "step": 18811 }, { "epoch": 0.4145278663780044, "grad_norm": 0.9277334213256836, "learning_rate": 1.9795417112210476e-05, "loss": 0.095, "step": 18812 }, { "epoch": 0.41454990166752054, "grad_norm": 0.6194234490394592, "learning_rate": 1.9794402745737517e-05, "loss": 0.082, "step": 18813 }, { "epoch": 0.4145719369570367, "grad_norm": 0.769414484500885, "learning_rate": 1.97933883548441e-05, "loss": 0.0873, "step": 18814 }, { "epoch": 0.41459397224655287, "grad_norm": 0.804416835308075, "learning_rate": 1.979237393953539e-05, "loss": 0.0814, "step": 18815 }, { "epoch": 0.41461600753606903, "grad_norm": 0.8797301650047302, "learning_rate": 1.979135949981654e-05, "loss": 0.0929, "step": 18816 }, { "epoch": 0.4146380428255852, "grad_norm": 0.5507203936576843, "learning_rate": 1.9790345035692737e-05, "loss": 0.0681, "step": 18817 }, { "epoch": 0.41466007811510136, "grad_norm": 0.7260928153991699, "learning_rate": 1.978933054716913e-05, "loss": 0.0987, "step": 18818 }, { "epoch": 0.41468211340461747, "grad_norm": 0.8187366724014282, "learning_rate": 1.9788316034250894e-05, "loss": 0.0779, "step": 18819 }, { "epoch": 0.41470414869413363, "grad_norm": 0.6643666625022888, "learning_rate": 1.9787301496943204e-05, "loss": 0.0972, "step": 18820 }, { "epoch": 0.4147261839836498, "grad_norm": 0.44665923714637756, "learning_rate": 1.978628693525121e-05, "loss": 0.0699, "step": 18821 }, { "epoch": 0.41474821927316596, "grad_norm": 0.6209990978240967, "learning_rate": 1.97852723491801e-05, "loss": 0.0788, "step": 18822 }, { "epoch": 0.41477025456268213, "grad_norm": 0.40917664766311646, "learning_rate": 1.9784257738735023e-05, "loss": 0.0512, "step": 18823 }, { "epoch": 0.4147922898521983, "grad_norm": 0.41723012924194336, "learning_rate": 1.9783243103921163e-05, "loss": 0.0635, "step": 18824 }, { "epoch": 0.41481432514171446, "grad_norm": 0.3381248414516449, "learning_rate": 1.978222844474368e-05, "loss": 0.0893, "step": 18825 }, { "epoch": 0.4148363604312306, "grad_norm": 0.6015183329582214, "learning_rate": 1.9781213761207742e-05, "loss": 0.072, "step": 18826 }, { "epoch": 0.4148583957207468, "grad_norm": 0.7855379581451416, "learning_rate": 1.9780199053318516e-05, "loss": 0.0777, "step": 18827 }, { "epoch": 0.41488043101026295, "grad_norm": 0.6688635945320129, "learning_rate": 1.9779184321081176e-05, "loss": 0.0749, "step": 18828 }, { "epoch": 0.4149024662997791, "grad_norm": 0.7584642767906189, "learning_rate": 1.9778169564500884e-05, "loss": 0.0699, "step": 18829 }, { "epoch": 0.4149245015892953, "grad_norm": 1.0726269483566284, "learning_rate": 1.9777154783582816e-05, "loss": 0.0959, "step": 18830 }, { "epoch": 0.4149465368788114, "grad_norm": 0.6524889469146729, "learning_rate": 1.9776139978332133e-05, "loss": 0.0783, "step": 18831 }, { "epoch": 0.41496857216832755, "grad_norm": 0.5519238114356995, "learning_rate": 1.977512514875401e-05, "loss": 0.081, "step": 18832 }, { "epoch": 0.4149906074578437, "grad_norm": 0.7274309396743774, "learning_rate": 1.9774110294853614e-05, "loss": 0.0633, "step": 18833 }, { "epoch": 0.4150126427473599, "grad_norm": 1.2631468772888184, "learning_rate": 1.9773095416636115e-05, "loss": 0.0826, "step": 18834 }, { "epoch": 0.41503467803687605, "grad_norm": 0.6592695116996765, "learning_rate": 1.977208051410668e-05, "loss": 0.0899, "step": 18835 }, { "epoch": 0.4150567133263922, "grad_norm": 0.5362138152122498, "learning_rate": 1.977106558727048e-05, "loss": 0.0878, "step": 18836 }, { "epoch": 0.4150787486159084, "grad_norm": 0.4801577031612396, "learning_rate": 1.9770050636132686e-05, "loss": 0.0729, "step": 18837 }, { "epoch": 0.41510078390542454, "grad_norm": 1.0088732242584229, "learning_rate": 1.976903566069846e-05, "loss": 0.1492, "step": 18838 }, { "epoch": 0.4151228191949407, "grad_norm": 0.6592192649841309, "learning_rate": 1.9768020660972988e-05, "loss": 0.0562, "step": 18839 }, { "epoch": 0.41514485448445687, "grad_norm": 0.5238017439842224, "learning_rate": 1.9767005636961427e-05, "loss": 0.0644, "step": 18840 }, { "epoch": 0.41516688977397304, "grad_norm": 0.8005557656288147, "learning_rate": 1.9765990588668948e-05, "loss": 0.0932, "step": 18841 }, { "epoch": 0.4151889250634892, "grad_norm": 0.7366046905517578, "learning_rate": 1.9764975516100725e-05, "loss": 0.072, "step": 18842 }, { "epoch": 0.41521096035300536, "grad_norm": 0.4428929090499878, "learning_rate": 1.9763960419261925e-05, "loss": 0.0691, "step": 18843 }, { "epoch": 0.4152329956425215, "grad_norm": 0.475772887468338, "learning_rate": 1.9762945298157718e-05, "loss": 0.0552, "step": 18844 }, { "epoch": 0.41525503093203764, "grad_norm": 0.9521058201789856, "learning_rate": 1.976193015279328e-05, "loss": 0.154, "step": 18845 }, { "epoch": 0.4152770662215538, "grad_norm": 0.9486891031265259, "learning_rate": 1.9760914983173772e-05, "loss": 0.0649, "step": 18846 }, { "epoch": 0.41529910151106997, "grad_norm": 0.5655761957168579, "learning_rate": 1.975989978930438e-05, "loss": 0.0826, "step": 18847 }, { "epoch": 0.41532113680058613, "grad_norm": 0.5030190944671631, "learning_rate": 1.9758884571190262e-05, "loss": 0.089, "step": 18848 }, { "epoch": 0.4153431720901023, "grad_norm": 0.3764486312866211, "learning_rate": 1.9757869328836592e-05, "loss": 0.0754, "step": 18849 }, { "epoch": 0.41536520737961846, "grad_norm": 0.4621852934360504, "learning_rate": 1.9756854062248544e-05, "loss": 0.0781, "step": 18850 }, { "epoch": 0.4153872426691346, "grad_norm": 0.7635056376457214, "learning_rate": 1.9755838771431288e-05, "loss": 0.1644, "step": 18851 }, { "epoch": 0.4154092779586508, "grad_norm": 0.9256405234336853, "learning_rate": 1.9754823456389987e-05, "loss": 0.0742, "step": 18852 }, { "epoch": 0.41543131324816696, "grad_norm": 0.7357524037361145, "learning_rate": 1.975380811712983e-05, "loss": 0.0743, "step": 18853 }, { "epoch": 0.4154533485376831, "grad_norm": 0.7686912417411804, "learning_rate": 1.9752792753655974e-05, "loss": 0.0922, "step": 18854 }, { "epoch": 0.4154753838271993, "grad_norm": 0.9245861172676086, "learning_rate": 1.9751777365973595e-05, "loss": 0.0805, "step": 18855 }, { "epoch": 0.4154974191167154, "grad_norm": 0.5768342018127441, "learning_rate": 1.9750761954087867e-05, "loss": 0.0797, "step": 18856 }, { "epoch": 0.41551945440623156, "grad_norm": 0.6095295548439026, "learning_rate": 1.9749746518003965e-05, "loss": 0.099, "step": 18857 }, { "epoch": 0.4155414896957477, "grad_norm": 0.6212149262428284, "learning_rate": 1.9748731057727055e-05, "loss": 0.1101, "step": 18858 }, { "epoch": 0.4155635249852639, "grad_norm": 0.4577164649963379, "learning_rate": 1.974771557326231e-05, "loss": 0.0713, "step": 18859 }, { "epoch": 0.41558556027478005, "grad_norm": 0.7646320462226868, "learning_rate": 1.97467000646149e-05, "loss": 0.1059, "step": 18860 }, { "epoch": 0.4156075955642962, "grad_norm": 1.032586932182312, "learning_rate": 1.9745684531790004e-05, "loss": 0.0742, "step": 18861 }, { "epoch": 0.4156296308538124, "grad_norm": 0.6950590014457703, "learning_rate": 1.97446689747928e-05, "loss": 0.0669, "step": 18862 }, { "epoch": 0.41565166614332855, "grad_norm": 0.780531644821167, "learning_rate": 1.9743653393628442e-05, "loss": 0.0909, "step": 18863 }, { "epoch": 0.4156737014328447, "grad_norm": 0.7300507426261902, "learning_rate": 1.9742637788302116e-05, "loss": 0.0793, "step": 18864 }, { "epoch": 0.4156957367223609, "grad_norm": 0.5838617086410522, "learning_rate": 1.9741622158818994e-05, "loss": 0.0719, "step": 18865 }, { "epoch": 0.41571777201187704, "grad_norm": 1.1596192121505737, "learning_rate": 1.974060650518425e-05, "loss": 0.1096, "step": 18866 }, { "epoch": 0.4157398073013932, "grad_norm": 1.2945529222488403, "learning_rate": 1.973959082740305e-05, "loss": 0.0783, "step": 18867 }, { "epoch": 0.4157618425909093, "grad_norm": 1.1506718397140503, "learning_rate": 1.9738575125480578e-05, "loss": 0.1053, "step": 18868 }, { "epoch": 0.4157838778804255, "grad_norm": 0.755179226398468, "learning_rate": 1.9737559399421995e-05, "loss": 0.0652, "step": 18869 }, { "epoch": 0.41580591316994164, "grad_norm": 0.7631291747093201, "learning_rate": 1.9736543649232483e-05, "loss": 0.0795, "step": 18870 }, { "epoch": 0.4158279484594578, "grad_norm": 1.1213427782058716, "learning_rate": 1.9735527874917216e-05, "loss": 0.0944, "step": 18871 }, { "epoch": 0.415849983748974, "grad_norm": 1.260750651359558, "learning_rate": 1.973451207648137e-05, "loss": 0.0857, "step": 18872 }, { "epoch": 0.41587201903849014, "grad_norm": 0.4249156713485718, "learning_rate": 1.9733496253930112e-05, "loss": 0.0615, "step": 18873 }, { "epoch": 0.4158940543280063, "grad_norm": 0.5939552783966064, "learning_rate": 1.973248040726862e-05, "loss": 0.1061, "step": 18874 }, { "epoch": 0.41591608961752247, "grad_norm": 0.6270262598991394, "learning_rate": 1.9731464536502064e-05, "loss": 0.0409, "step": 18875 }, { "epoch": 0.41593812490703863, "grad_norm": 0.7065950036048889, "learning_rate": 1.973044864163562e-05, "loss": 0.0807, "step": 18876 }, { "epoch": 0.4159601601965548, "grad_norm": 0.6029729247093201, "learning_rate": 1.9729432722674472e-05, "loss": 0.08, "step": 18877 }, { "epoch": 0.41598219548607096, "grad_norm": 0.6035619974136353, "learning_rate": 1.9728416779623778e-05, "loss": 0.1061, "step": 18878 }, { "epoch": 0.4160042307755871, "grad_norm": 0.6575719714164734, "learning_rate": 1.972740081248873e-05, "loss": 0.0948, "step": 18879 }, { "epoch": 0.4160262660651033, "grad_norm": 0.7471852898597717, "learning_rate": 1.9726384821274497e-05, "loss": 0.0765, "step": 18880 }, { "epoch": 0.4160483013546194, "grad_norm": 0.5175542235374451, "learning_rate": 1.9725368805986244e-05, "loss": 0.0705, "step": 18881 }, { "epoch": 0.41607033664413556, "grad_norm": 0.490193247795105, "learning_rate": 1.9724352766629154e-05, "loss": 0.081, "step": 18882 }, { "epoch": 0.41609237193365173, "grad_norm": 0.5997306704521179, "learning_rate": 1.972333670320841e-05, "loss": 0.1029, "step": 18883 }, { "epoch": 0.4161144072231679, "grad_norm": 0.5825260877609253, "learning_rate": 1.9722320615729168e-05, "loss": 0.0917, "step": 18884 }, { "epoch": 0.41613644251268406, "grad_norm": 0.4830235540866852, "learning_rate": 1.972130450419662e-05, "loss": 0.0736, "step": 18885 }, { "epoch": 0.4161584778022002, "grad_norm": 0.5451270937919617, "learning_rate": 1.9720288368615937e-05, "loss": 0.0648, "step": 18886 }, { "epoch": 0.4161805130917164, "grad_norm": 0.8722227215766907, "learning_rate": 1.9719272208992294e-05, "loss": 0.078, "step": 18887 }, { "epoch": 0.41620254838123255, "grad_norm": 1.003669023513794, "learning_rate": 1.971825602533087e-05, "loss": 0.1059, "step": 18888 }, { "epoch": 0.4162245836707487, "grad_norm": 0.8100221753120422, "learning_rate": 1.9717239817636836e-05, "loss": 0.094, "step": 18889 }, { "epoch": 0.4162466189602649, "grad_norm": 0.6045483946800232, "learning_rate": 1.9716223585915365e-05, "loss": 0.0571, "step": 18890 }, { "epoch": 0.41626865424978104, "grad_norm": 0.6514573097229004, "learning_rate": 1.9715207330171643e-05, "loss": 0.1007, "step": 18891 }, { "epoch": 0.4162906895392972, "grad_norm": 0.6477335095405579, "learning_rate": 1.9714191050410838e-05, "loss": 0.0763, "step": 18892 }, { "epoch": 0.4163127248288133, "grad_norm": 0.9254291653633118, "learning_rate": 1.9713174746638135e-05, "loss": 0.0935, "step": 18893 }, { "epoch": 0.4163347601183295, "grad_norm": 0.40598264336586, "learning_rate": 1.9712158418858703e-05, "loss": 0.0459, "step": 18894 }, { "epoch": 0.41635679540784565, "grad_norm": 0.4345364570617676, "learning_rate": 1.971114206707772e-05, "loss": 0.0319, "step": 18895 }, { "epoch": 0.4163788306973618, "grad_norm": 0.5272321105003357, "learning_rate": 1.9710125691300368e-05, "loss": 0.0952, "step": 18896 }, { "epoch": 0.416400865986878, "grad_norm": 0.5641011595726013, "learning_rate": 1.970910929153182e-05, "loss": 0.0953, "step": 18897 }, { "epoch": 0.41642290127639414, "grad_norm": 0.6904227137565613, "learning_rate": 1.970809286777725e-05, "loss": 0.102, "step": 18898 }, { "epoch": 0.4164449365659103, "grad_norm": 0.8359028697013855, "learning_rate": 1.9707076420041837e-05, "loss": 0.0717, "step": 18899 }, { "epoch": 0.41646697185542647, "grad_norm": 0.9101126790046692, "learning_rate": 1.970605994833076e-05, "loss": 0.119, "step": 18900 }, { "epoch": 0.41648900714494264, "grad_norm": 1.1564245223999023, "learning_rate": 1.97050434526492e-05, "loss": 0.0743, "step": 18901 }, { "epoch": 0.4165110424344588, "grad_norm": 0.9588249325752258, "learning_rate": 1.970402693300233e-05, "loss": 0.1014, "step": 18902 }, { "epoch": 0.41653307772397496, "grad_norm": 0.8633124232292175, "learning_rate": 1.970301038939532e-05, "loss": 0.107, "step": 18903 }, { "epoch": 0.41655511301349113, "grad_norm": 1.3745944499969482, "learning_rate": 1.970199382183337e-05, "loss": 0.1051, "step": 18904 }, { "epoch": 0.41657714830300724, "grad_norm": 1.020352840423584, "learning_rate": 1.9700977230321635e-05, "loss": 0.0832, "step": 18905 }, { "epoch": 0.4165991835925234, "grad_norm": 1.3376437425613403, "learning_rate": 1.9699960614865308e-05, "loss": 0.1335, "step": 18906 }, { "epoch": 0.41662121888203957, "grad_norm": 0.9444661140441895, "learning_rate": 1.9698943975469556e-05, "loss": 0.0721, "step": 18907 }, { "epoch": 0.41664325417155573, "grad_norm": 0.6711654663085938, "learning_rate": 1.9697927312139566e-05, "loss": 0.1037, "step": 18908 }, { "epoch": 0.4166652894610719, "grad_norm": 1.1908555030822754, "learning_rate": 1.9696910624880507e-05, "loss": 0.1154, "step": 18909 }, { "epoch": 0.41668732475058806, "grad_norm": 1.0933401584625244, "learning_rate": 1.969589391369757e-05, "loss": 0.106, "step": 18910 }, { "epoch": 0.4167093600401042, "grad_norm": 1.2060049772262573, "learning_rate": 1.969487717859592e-05, "loss": 0.0781, "step": 18911 }, { "epoch": 0.4167313953296204, "grad_norm": 0.7252524495124817, "learning_rate": 1.969386041958075e-05, "loss": 0.0692, "step": 18912 }, { "epoch": 0.41675343061913656, "grad_norm": 0.5156814455986023, "learning_rate": 1.969284363665723e-05, "loss": 0.1318, "step": 18913 }, { "epoch": 0.4167754659086527, "grad_norm": 0.8420446515083313, "learning_rate": 1.9691826829830543e-05, "loss": 0.0824, "step": 18914 }, { "epoch": 0.4167975011981689, "grad_norm": 0.8591108918190002, "learning_rate": 1.969080999910586e-05, "loss": 0.0946, "step": 18915 }, { "epoch": 0.41681953648768505, "grad_norm": 0.7573718428611755, "learning_rate": 1.968979314448837e-05, "loss": 0.0966, "step": 18916 }, { "epoch": 0.4168415717772012, "grad_norm": 0.8000622391700745, "learning_rate": 1.968877626598325e-05, "loss": 0.0711, "step": 18917 }, { "epoch": 0.4168636070667173, "grad_norm": 0.799297034740448, "learning_rate": 1.968775936359568e-05, "loss": 0.0682, "step": 18918 }, { "epoch": 0.4168856423562335, "grad_norm": 1.0289422273635864, "learning_rate": 1.968674243733083e-05, "loss": 0.094, "step": 18919 }, { "epoch": 0.41690767764574965, "grad_norm": 0.8826175928115845, "learning_rate": 1.9685725487193892e-05, "loss": 0.1292, "step": 18920 }, { "epoch": 0.4169297129352658, "grad_norm": 0.742322564125061, "learning_rate": 1.9684708513190042e-05, "loss": 0.0913, "step": 18921 }, { "epoch": 0.416951748224782, "grad_norm": 1.0812406539916992, "learning_rate": 1.968369151532446e-05, "loss": 0.0484, "step": 18922 }, { "epoch": 0.41697378351429815, "grad_norm": 0.5931770205497742, "learning_rate": 1.968267449360232e-05, "loss": 0.0827, "step": 18923 }, { "epoch": 0.4169958188038143, "grad_norm": 0.9113203287124634, "learning_rate": 1.968165744802881e-05, "loss": 0.0718, "step": 18924 }, { "epoch": 0.4170178540933305, "grad_norm": 0.9478129744529724, "learning_rate": 1.9680640378609113e-05, "loss": 0.1076, "step": 18925 }, { "epoch": 0.41703988938284664, "grad_norm": 0.8339720964431763, "learning_rate": 1.9679623285348398e-05, "loss": 0.0931, "step": 18926 }, { "epoch": 0.4170619246723628, "grad_norm": 0.476940780878067, "learning_rate": 1.9678606168251858e-05, "loss": 0.0612, "step": 18927 }, { "epoch": 0.41708395996187897, "grad_norm": 0.6833168268203735, "learning_rate": 1.9677589027324666e-05, "loss": 0.1066, "step": 18928 }, { "epoch": 0.41710599525139513, "grad_norm": 0.8906972408294678, "learning_rate": 1.967657186257201e-05, "loss": 0.0791, "step": 18929 }, { "epoch": 0.41712803054091124, "grad_norm": 0.6293403506278992, "learning_rate": 1.9675554673999057e-05, "loss": 0.0772, "step": 18930 }, { "epoch": 0.4171500658304274, "grad_norm": 0.7885281443595886, "learning_rate": 1.9674537461611002e-05, "loss": 0.0874, "step": 18931 }, { "epoch": 0.4171721011199436, "grad_norm": 0.7843695878982544, "learning_rate": 1.9673520225413016e-05, "loss": 0.0603, "step": 18932 }, { "epoch": 0.41719413640945974, "grad_norm": 0.8808053135871887, "learning_rate": 1.9672502965410288e-05, "loss": 0.0653, "step": 18933 }, { "epoch": 0.4172161716989759, "grad_norm": 0.5901324152946472, "learning_rate": 1.9671485681607997e-05, "loss": 0.1002, "step": 18934 }, { "epoch": 0.41723820698849207, "grad_norm": 0.9642691016197205, "learning_rate": 1.9670468374011325e-05, "loss": 0.0791, "step": 18935 }, { "epoch": 0.41726024227800823, "grad_norm": 0.9658722877502441, "learning_rate": 1.9669451042625454e-05, "loss": 0.0771, "step": 18936 }, { "epoch": 0.4172822775675244, "grad_norm": 0.861720860004425, "learning_rate": 1.9668433687455565e-05, "loss": 0.0967, "step": 18937 }, { "epoch": 0.41730431285704056, "grad_norm": 0.9493446946144104, "learning_rate": 1.9667416308506833e-05, "loss": 0.0603, "step": 18938 }, { "epoch": 0.4173263481465567, "grad_norm": 0.44819486141204834, "learning_rate": 1.9666398905784457e-05, "loss": 0.0768, "step": 18939 }, { "epoch": 0.4173483834360729, "grad_norm": 0.4697491526603699, "learning_rate": 1.96653814792936e-05, "loss": 0.0727, "step": 18940 }, { "epoch": 0.41737041872558905, "grad_norm": 0.8073601722717285, "learning_rate": 1.966436402903946e-05, "loss": 0.0986, "step": 18941 }, { "epoch": 0.4173924540151052, "grad_norm": 0.8186956644058228, "learning_rate": 1.966334655502721e-05, "loss": 0.1039, "step": 18942 }, { "epoch": 0.41741448930462133, "grad_norm": 0.7091030478477478, "learning_rate": 1.9662329057262035e-05, "loss": 0.0971, "step": 18943 }, { "epoch": 0.4174365245941375, "grad_norm": 0.5206831693649292, "learning_rate": 1.9661311535749123e-05, "loss": 0.0758, "step": 18944 }, { "epoch": 0.41745855988365366, "grad_norm": 0.4600551128387451, "learning_rate": 1.9660293990493643e-05, "loss": 0.072, "step": 18945 }, { "epoch": 0.4174805951731698, "grad_norm": 0.9406875371932983, "learning_rate": 1.9659276421500794e-05, "loss": 0.0658, "step": 18946 }, { "epoch": 0.417502630462686, "grad_norm": 0.4701470136642456, "learning_rate": 1.9658258828775745e-05, "loss": 0.0703, "step": 18947 }, { "epoch": 0.41752466575220215, "grad_norm": 0.5896719098091125, "learning_rate": 1.965724121232369e-05, "loss": 0.0761, "step": 18948 }, { "epoch": 0.4175467010417183, "grad_norm": 0.628913402557373, "learning_rate": 1.9656223572149807e-05, "loss": 0.0566, "step": 18949 }, { "epoch": 0.4175687363312345, "grad_norm": 0.4545150101184845, "learning_rate": 1.9655205908259285e-05, "loss": 0.0865, "step": 18950 }, { "epoch": 0.41759077162075064, "grad_norm": 0.7194129824638367, "learning_rate": 1.9654188220657297e-05, "loss": 0.0605, "step": 18951 }, { "epoch": 0.4176128069102668, "grad_norm": 0.5127097964286804, "learning_rate": 1.9653170509349034e-05, "loss": 0.1115, "step": 18952 }, { "epoch": 0.417634842199783, "grad_norm": 0.7512078285217285, "learning_rate": 1.965215277433968e-05, "loss": 0.0729, "step": 18953 }, { "epoch": 0.41765687748929914, "grad_norm": 0.696915864944458, "learning_rate": 1.9651135015634417e-05, "loss": 0.0734, "step": 18954 }, { "epoch": 0.41767891277881525, "grad_norm": 0.583446741104126, "learning_rate": 1.9650117233238426e-05, "loss": 0.0871, "step": 18955 }, { "epoch": 0.4177009480683314, "grad_norm": 0.642299234867096, "learning_rate": 1.9649099427156898e-05, "loss": 0.0767, "step": 18956 }, { "epoch": 0.4177229833578476, "grad_norm": 0.5517116785049438, "learning_rate": 1.964808159739501e-05, "loss": 0.0569, "step": 18957 }, { "epoch": 0.41774501864736374, "grad_norm": 0.8031482100486755, "learning_rate": 1.9647063743957954e-05, "loss": 0.0981, "step": 18958 }, { "epoch": 0.4177670539368799, "grad_norm": 0.8490927219390869, "learning_rate": 1.9646045866850907e-05, "loss": 0.1057, "step": 18959 }, { "epoch": 0.41778908922639607, "grad_norm": 0.71186763048172, "learning_rate": 1.9645027966079064e-05, "loss": 0.094, "step": 18960 }, { "epoch": 0.41781112451591224, "grad_norm": 0.544914186000824, "learning_rate": 1.9644010041647595e-05, "loss": 0.0736, "step": 18961 }, { "epoch": 0.4178331598054284, "grad_norm": 0.5212929248809814, "learning_rate": 1.9642992093561697e-05, "loss": 0.0543, "step": 18962 }, { "epoch": 0.41785519509494456, "grad_norm": 0.6848970055580139, "learning_rate": 1.9641974121826545e-05, "loss": 0.1053, "step": 18963 }, { "epoch": 0.41787723038446073, "grad_norm": 0.5589648485183716, "learning_rate": 1.9640956126447335e-05, "loss": 0.0859, "step": 18964 }, { "epoch": 0.4178992656739769, "grad_norm": 0.8546769022941589, "learning_rate": 1.9639938107429245e-05, "loss": 0.0929, "step": 18965 }, { "epoch": 0.41792130096349306, "grad_norm": 0.8094162344932556, "learning_rate": 1.963892006477746e-05, "loss": 0.096, "step": 18966 }, { "epoch": 0.41794333625300917, "grad_norm": 0.6817566752433777, "learning_rate": 1.963790199849717e-05, "loss": 0.082, "step": 18967 }, { "epoch": 0.41796537154252533, "grad_norm": 1.0645103454589844, "learning_rate": 1.963688390859356e-05, "loss": 0.0979, "step": 18968 }, { "epoch": 0.4179874068320415, "grad_norm": 0.5809648633003235, "learning_rate": 1.9635865795071813e-05, "loss": 0.0808, "step": 18969 }, { "epoch": 0.41800944212155766, "grad_norm": 0.41298621892929077, "learning_rate": 1.9634847657937113e-05, "loss": 0.0385, "step": 18970 }, { "epoch": 0.4180314774110738, "grad_norm": 0.750678300857544, "learning_rate": 1.963382949719465e-05, "loss": 0.0889, "step": 18971 }, { "epoch": 0.41805351270059, "grad_norm": 0.5607503652572632, "learning_rate": 1.9632811312849606e-05, "loss": 0.0826, "step": 18972 }, { "epoch": 0.41807554799010616, "grad_norm": 0.34469175338745117, "learning_rate": 1.9631793104907173e-05, "loss": 0.0865, "step": 18973 }, { "epoch": 0.4180975832796223, "grad_norm": 0.6846281290054321, "learning_rate": 1.963077487337253e-05, "loss": 0.0715, "step": 18974 }, { "epoch": 0.4181196185691385, "grad_norm": 0.7515108585357666, "learning_rate": 1.962975661825087e-05, "loss": 0.0818, "step": 18975 }, { "epoch": 0.41814165385865465, "grad_norm": 0.7186272740364075, "learning_rate": 1.9628738339547378e-05, "loss": 0.0663, "step": 18976 }, { "epoch": 0.4181636891481708, "grad_norm": 0.6637461185455322, "learning_rate": 1.962772003726724e-05, "loss": 0.0635, "step": 18977 }, { "epoch": 0.418185724437687, "grad_norm": 0.5127208828926086, "learning_rate": 1.9626701711415636e-05, "loss": 0.0704, "step": 18978 }, { "epoch": 0.41820775972720314, "grad_norm": 0.568686306476593, "learning_rate": 1.9625683361997766e-05, "loss": 0.074, "step": 18979 }, { "epoch": 0.41822979501671925, "grad_norm": 0.6166147589683533, "learning_rate": 1.9624664989018807e-05, "loss": 0.1021, "step": 18980 }, { "epoch": 0.4182518303062354, "grad_norm": 0.8750518560409546, "learning_rate": 1.962364659248395e-05, "loss": 0.1025, "step": 18981 }, { "epoch": 0.4182738655957516, "grad_norm": 0.6715894341468811, "learning_rate": 1.962262817239838e-05, "loss": 0.0707, "step": 18982 }, { "epoch": 0.41829590088526775, "grad_norm": 0.8094280958175659, "learning_rate": 1.9621609728767287e-05, "loss": 0.0889, "step": 18983 }, { "epoch": 0.4183179361747839, "grad_norm": 0.7732394337654114, "learning_rate": 1.962059126159586e-05, "loss": 0.1005, "step": 18984 }, { "epoch": 0.4183399714643001, "grad_norm": 0.8606061339378357, "learning_rate": 1.9619572770889284e-05, "loss": 0.085, "step": 18985 }, { "epoch": 0.41836200675381624, "grad_norm": 0.700383186340332, "learning_rate": 1.961855425665274e-05, "loss": 0.077, "step": 18986 }, { "epoch": 0.4183840420433324, "grad_norm": 0.7936561703681946, "learning_rate": 1.961753571889143e-05, "loss": 0.0961, "step": 18987 }, { "epoch": 0.41840607733284857, "grad_norm": 0.457540899515152, "learning_rate": 1.961651715761053e-05, "loss": 0.092, "step": 18988 }, { "epoch": 0.41842811262236473, "grad_norm": 0.4471103847026825, "learning_rate": 1.9615498572815233e-05, "loss": 0.071, "step": 18989 }, { "epoch": 0.4184501479118809, "grad_norm": 0.6571667194366455, "learning_rate": 1.9614479964510728e-05, "loss": 0.0738, "step": 18990 }, { "epoch": 0.41847218320139706, "grad_norm": 0.8446227312088013, "learning_rate": 1.9613461332702203e-05, "loss": 0.0756, "step": 18991 }, { "epoch": 0.4184942184909132, "grad_norm": 1.0124456882476807, "learning_rate": 1.9612442677394847e-05, "loss": 0.132, "step": 18992 }, { "epoch": 0.41851625378042934, "grad_norm": 0.4469935894012451, "learning_rate": 1.961142399859384e-05, "loss": 0.0774, "step": 18993 }, { "epoch": 0.4185382890699455, "grad_norm": 0.8676022291183472, "learning_rate": 1.9610405296304385e-05, "loss": 0.0962, "step": 18994 }, { "epoch": 0.41856032435946167, "grad_norm": 0.6169683933258057, "learning_rate": 1.9609386570531656e-05, "loss": 0.0626, "step": 18995 }, { "epoch": 0.41858235964897783, "grad_norm": 0.6351307034492493, "learning_rate": 1.9608367821280855e-05, "loss": 0.0883, "step": 18996 }, { "epoch": 0.418604394938494, "grad_norm": 0.9625479578971863, "learning_rate": 1.9607349048557165e-05, "loss": 0.1069, "step": 18997 }, { "epoch": 0.41862643022801016, "grad_norm": 1.0746420621871948, "learning_rate": 1.9606330252365773e-05, "loss": 0.0932, "step": 18998 }, { "epoch": 0.4186484655175263, "grad_norm": 0.5892770290374756, "learning_rate": 1.9605311432711877e-05, "loss": 0.0715, "step": 18999 }, { "epoch": 0.4186705008070425, "grad_norm": 0.4908897578716278, "learning_rate": 1.9604292589600657e-05, "loss": 0.0498, "step": 19000 }, { "epoch": 0.41869253609655865, "grad_norm": 0.8544020056724548, "learning_rate": 1.9603273723037304e-05, "loss": 0.0793, "step": 19001 }, { "epoch": 0.4187145713860748, "grad_norm": 0.6079861521720886, "learning_rate": 1.9602254833027015e-05, "loss": 0.086, "step": 19002 }, { "epoch": 0.418736606675591, "grad_norm": 0.5683678388595581, "learning_rate": 1.9601235919574967e-05, "loss": 0.0916, "step": 19003 }, { "epoch": 0.4187586419651071, "grad_norm": 0.48115429282188416, "learning_rate": 1.9600216982686358e-05, "loss": 0.0531, "step": 19004 }, { "epoch": 0.41878067725462326, "grad_norm": 0.8604596853256226, "learning_rate": 1.9599198022366377e-05, "loss": 0.0763, "step": 19005 }, { "epoch": 0.4188027125441394, "grad_norm": 0.7998619079589844, "learning_rate": 1.9598179038620217e-05, "loss": 0.0593, "step": 19006 }, { "epoch": 0.4188247478336556, "grad_norm": 0.6375460624694824, "learning_rate": 1.9597160031453067e-05, "loss": 0.0689, "step": 19007 }, { "epoch": 0.41884678312317175, "grad_norm": 0.5185886025428772, "learning_rate": 1.9596141000870112e-05, "loss": 0.0655, "step": 19008 }, { "epoch": 0.4188688184126879, "grad_norm": 0.5291204452514648, "learning_rate": 1.9595121946876548e-05, "loss": 0.0826, "step": 19009 }, { "epoch": 0.4188908537022041, "grad_norm": 0.8519740104675293, "learning_rate": 1.9594102869477563e-05, "loss": 0.0768, "step": 19010 }, { "epoch": 0.41891288899172024, "grad_norm": 0.6647956967353821, "learning_rate": 1.9593083768678348e-05, "loss": 0.0832, "step": 19011 }, { "epoch": 0.4189349242812364, "grad_norm": 0.6341022849082947, "learning_rate": 1.9592064644484093e-05, "loss": 0.0653, "step": 19012 }, { "epoch": 0.4189569595707526, "grad_norm": 0.9353637099266052, "learning_rate": 1.9591045496899995e-05, "loss": 0.0943, "step": 19013 }, { "epoch": 0.41897899486026874, "grad_norm": 0.534690797328949, "learning_rate": 1.9590026325931236e-05, "loss": 0.1061, "step": 19014 }, { "epoch": 0.4190010301497849, "grad_norm": 0.7744578719139099, "learning_rate": 1.9589007131583015e-05, "loss": 0.084, "step": 19015 }, { "epoch": 0.41902306543930107, "grad_norm": 0.9840517640113831, "learning_rate": 1.958798791386052e-05, "loss": 0.112, "step": 19016 }, { "epoch": 0.4190451007288172, "grad_norm": 0.937726616859436, "learning_rate": 1.9586968672768942e-05, "loss": 0.1109, "step": 19017 }, { "epoch": 0.41906713601833334, "grad_norm": 0.46848776936531067, "learning_rate": 1.9585949408313468e-05, "loss": 0.0647, "step": 19018 }, { "epoch": 0.4190891713078495, "grad_norm": 0.6113278865814209, "learning_rate": 1.95849301204993e-05, "loss": 0.0501, "step": 19019 }, { "epoch": 0.41911120659736567, "grad_norm": 0.9357254505157471, "learning_rate": 1.958391080933162e-05, "loss": 0.0977, "step": 19020 }, { "epoch": 0.41913324188688184, "grad_norm": 0.9127336740493774, "learning_rate": 1.958289147481563e-05, "loss": 0.0959, "step": 19021 }, { "epoch": 0.419155277176398, "grad_norm": 0.5537729859352112, "learning_rate": 1.9581872116956507e-05, "loss": 0.0774, "step": 19022 }, { "epoch": 0.41917731246591416, "grad_norm": 0.890524685382843, "learning_rate": 1.9580852735759462e-05, "loss": 0.109, "step": 19023 }, { "epoch": 0.41919934775543033, "grad_norm": 0.9838366508483887, "learning_rate": 1.9579833331229675e-05, "loss": 0.0671, "step": 19024 }, { "epoch": 0.4192213830449465, "grad_norm": 0.879508376121521, "learning_rate": 1.9578813903372343e-05, "loss": 0.0803, "step": 19025 }, { "epoch": 0.41924341833446266, "grad_norm": 0.5190492272377014, "learning_rate": 1.9577794452192646e-05, "loss": 0.0981, "step": 19026 }, { "epoch": 0.4192654536239788, "grad_norm": 0.31828656792640686, "learning_rate": 1.9576774977695797e-05, "loss": 0.0755, "step": 19027 }, { "epoch": 0.419287488913495, "grad_norm": 0.9128928184509277, "learning_rate": 1.957575547988697e-05, "loss": 0.0903, "step": 19028 }, { "epoch": 0.4193095242030111, "grad_norm": 0.6117068529129028, "learning_rate": 1.957473595877138e-05, "loss": 0.0506, "step": 19029 }, { "epoch": 0.41933155949252726, "grad_norm": 0.4285101294517517, "learning_rate": 1.9573716414354195e-05, "loss": 0.0675, "step": 19030 }, { "epoch": 0.4193535947820434, "grad_norm": 0.6183709502220154, "learning_rate": 1.9572696846640624e-05, "loss": 0.0822, "step": 19031 }, { "epoch": 0.4193756300715596, "grad_norm": 0.45010703802108765, "learning_rate": 1.9571677255635856e-05, "loss": 0.0673, "step": 19032 }, { "epoch": 0.41939766536107576, "grad_norm": 0.6141971349716187, "learning_rate": 1.9570657641345085e-05, "loss": 0.0919, "step": 19033 }, { "epoch": 0.4194197006505919, "grad_norm": 0.5814757943153381, "learning_rate": 1.9569638003773502e-05, "loss": 0.0871, "step": 19034 }, { "epoch": 0.4194417359401081, "grad_norm": 0.720923900604248, "learning_rate": 1.95686183429263e-05, "loss": 0.1033, "step": 19035 }, { "epoch": 0.41946377122962425, "grad_norm": 0.5225251913070679, "learning_rate": 1.9567598658808677e-05, "loss": 0.0752, "step": 19036 }, { "epoch": 0.4194858065191404, "grad_norm": 0.45598945021629333, "learning_rate": 1.9566578951425824e-05, "loss": 0.0879, "step": 19037 }, { "epoch": 0.4195078418086566, "grad_norm": 0.6556071043014526, "learning_rate": 1.9565559220782938e-05, "loss": 0.0979, "step": 19038 }, { "epoch": 0.41952987709817274, "grad_norm": 0.8332560062408447, "learning_rate": 1.956453946688521e-05, "loss": 0.108, "step": 19039 }, { "epoch": 0.4195519123876889, "grad_norm": 0.8709008097648621, "learning_rate": 1.9563519689737837e-05, "loss": 0.0897, "step": 19040 }, { "epoch": 0.419573947677205, "grad_norm": 0.6844491958618164, "learning_rate": 1.9562499889346006e-05, "loss": 0.0714, "step": 19041 }, { "epoch": 0.4195959829667212, "grad_norm": 0.42944249510765076, "learning_rate": 1.9561480065714918e-05, "loss": 0.072, "step": 19042 }, { "epoch": 0.41961801825623735, "grad_norm": 0.7795167565345764, "learning_rate": 1.9560460218849767e-05, "loss": 0.1008, "step": 19043 }, { "epoch": 0.4196400535457535, "grad_norm": 0.7759252786636353, "learning_rate": 1.9559440348755747e-05, "loss": 0.103, "step": 19044 }, { "epoch": 0.4196620888352697, "grad_norm": 0.41624340415000916, "learning_rate": 1.9558420455438052e-05, "loss": 0.0748, "step": 19045 }, { "epoch": 0.41968412412478584, "grad_norm": 0.8986605405807495, "learning_rate": 1.9557400538901877e-05, "loss": 0.1009, "step": 19046 }, { "epoch": 0.419706159414302, "grad_norm": 0.5177430510520935, "learning_rate": 1.9556380599152414e-05, "loss": 0.0792, "step": 19047 }, { "epoch": 0.41972819470381817, "grad_norm": 0.5482355356216431, "learning_rate": 1.955536063619487e-05, "loss": 0.0892, "step": 19048 }, { "epoch": 0.41975022999333433, "grad_norm": 0.8567795157432556, "learning_rate": 1.9554340650034422e-05, "loss": 0.0978, "step": 19049 }, { "epoch": 0.4197722652828505, "grad_norm": 1.3248738050460815, "learning_rate": 1.955332064067628e-05, "loss": 0.055, "step": 19050 }, { "epoch": 0.41979430057236666, "grad_norm": 0.61418616771698, "learning_rate": 1.955230060812563e-05, "loss": 0.0812, "step": 19051 }, { "epoch": 0.41981633586188283, "grad_norm": 1.132692813873291, "learning_rate": 1.9551280552387677e-05, "loss": 0.1057, "step": 19052 }, { "epoch": 0.419838371151399, "grad_norm": 0.5202658176422119, "learning_rate": 1.9550260473467608e-05, "loss": 0.0705, "step": 19053 }, { "epoch": 0.4198604064409151, "grad_norm": 0.5363191962242126, "learning_rate": 1.9549240371370623e-05, "loss": 0.0906, "step": 19054 }, { "epoch": 0.41988244173043127, "grad_norm": 0.5040485262870789, "learning_rate": 1.954822024610192e-05, "loss": 0.0904, "step": 19055 }, { "epoch": 0.41990447701994743, "grad_norm": 0.49214309453964233, "learning_rate": 1.9547200097666688e-05, "loss": 0.0621, "step": 19056 }, { "epoch": 0.4199265123094636, "grad_norm": 0.7744629979133606, "learning_rate": 1.9546179926070132e-05, "loss": 0.0693, "step": 19057 }, { "epoch": 0.41994854759897976, "grad_norm": 0.6346650719642639, "learning_rate": 1.954515973131744e-05, "loss": 0.0811, "step": 19058 }, { "epoch": 0.4199705828884959, "grad_norm": 0.6707025766372681, "learning_rate": 1.9544139513413813e-05, "loss": 0.0915, "step": 19059 }, { "epoch": 0.4199926181780121, "grad_norm": 0.7958505153656006, "learning_rate": 1.954311927236445e-05, "loss": 0.0649, "step": 19060 }, { "epoch": 0.42001465346752825, "grad_norm": 0.9126911759376526, "learning_rate": 1.954209900817454e-05, "loss": 0.0951, "step": 19061 }, { "epoch": 0.4200366887570444, "grad_norm": 0.5896832346916199, "learning_rate": 1.9541078720849285e-05, "loss": 0.0692, "step": 19062 }, { "epoch": 0.4200587240465606, "grad_norm": 0.5510501265525818, "learning_rate": 1.9540058410393885e-05, "loss": 0.0983, "step": 19063 }, { "epoch": 0.42008075933607675, "grad_norm": 0.7434924840927124, "learning_rate": 1.953903807681353e-05, "loss": 0.0806, "step": 19064 }, { "epoch": 0.4201027946255929, "grad_norm": 0.772346019744873, "learning_rate": 1.953801772011342e-05, "loss": 0.0723, "step": 19065 }, { "epoch": 0.420124829915109, "grad_norm": 0.717871904373169, "learning_rate": 1.9536997340298747e-05, "loss": 0.0915, "step": 19066 }, { "epoch": 0.4201468652046252, "grad_norm": 0.5381901264190674, "learning_rate": 1.9535976937374725e-05, "loss": 0.0947, "step": 19067 }, { "epoch": 0.42016890049414135, "grad_norm": 0.677216649055481, "learning_rate": 1.9534956511346528e-05, "loss": 0.0892, "step": 19068 }, { "epoch": 0.4201909357836575, "grad_norm": 0.6008116602897644, "learning_rate": 1.9533936062219376e-05, "loss": 0.1055, "step": 19069 }, { "epoch": 0.4202129710731737, "grad_norm": 0.4057231843471527, "learning_rate": 1.9532915589998446e-05, "loss": 0.0674, "step": 19070 }, { "epoch": 0.42023500636268984, "grad_norm": 0.37339040637016296, "learning_rate": 1.9531895094688957e-05, "loss": 0.0517, "step": 19071 }, { "epoch": 0.420257041652206, "grad_norm": 1.1020257472991943, "learning_rate": 1.9530874576296093e-05, "loss": 0.1045, "step": 19072 }, { "epoch": 0.4202790769417222, "grad_norm": 0.4901919960975647, "learning_rate": 1.952985403482505e-05, "loss": 0.0892, "step": 19073 }, { "epoch": 0.42030111223123834, "grad_norm": 0.7324414253234863, "learning_rate": 1.9528833470281033e-05, "loss": 0.0995, "step": 19074 }, { "epoch": 0.4203231475207545, "grad_norm": 0.6293976902961731, "learning_rate": 1.9527812882669242e-05, "loss": 0.0535, "step": 19075 }, { "epoch": 0.42034518281027067, "grad_norm": 0.41248711943626404, "learning_rate": 1.952679227199487e-05, "loss": 0.0718, "step": 19076 }, { "epoch": 0.42036721809978683, "grad_norm": 0.9359279274940491, "learning_rate": 1.9525771638263113e-05, "loss": 0.0813, "step": 19077 }, { "epoch": 0.42038925338930294, "grad_norm": 0.5450677275657654, "learning_rate": 1.952475098147918e-05, "loss": 0.0624, "step": 19078 }, { "epoch": 0.4204112886788191, "grad_norm": 0.8476296067237854, "learning_rate": 1.9523730301648263e-05, "loss": 0.1075, "step": 19079 }, { "epoch": 0.42043332396833527, "grad_norm": 0.66986483335495, "learning_rate": 1.9522709598775566e-05, "loss": 0.1189, "step": 19080 }, { "epoch": 0.42045535925785144, "grad_norm": 0.7606486082077026, "learning_rate": 1.9521688872866277e-05, "loss": 0.0876, "step": 19081 }, { "epoch": 0.4204773945473676, "grad_norm": 0.4838635325431824, "learning_rate": 1.9520668123925603e-05, "loss": 0.1279, "step": 19082 }, { "epoch": 0.42049942983688376, "grad_norm": 0.9336710572242737, "learning_rate": 1.951964735195874e-05, "loss": 0.0756, "step": 19083 }, { "epoch": 0.42052146512639993, "grad_norm": 0.717925488948822, "learning_rate": 1.9518626556970896e-05, "loss": 0.0841, "step": 19084 }, { "epoch": 0.4205435004159161, "grad_norm": 0.7478471994400024, "learning_rate": 1.951760573896726e-05, "loss": 0.0961, "step": 19085 }, { "epoch": 0.42056553570543226, "grad_norm": 0.9745314717292786, "learning_rate": 1.9516584897953033e-05, "loss": 0.0855, "step": 19086 }, { "epoch": 0.4205875709949484, "grad_norm": 0.8183925747871399, "learning_rate": 1.9515564033933422e-05, "loss": 0.0945, "step": 19087 }, { "epoch": 0.4206096062844646, "grad_norm": 0.7149696350097656, "learning_rate": 1.951454314691362e-05, "loss": 0.1315, "step": 19088 }, { "epoch": 0.42063164157398075, "grad_norm": 0.7093861699104309, "learning_rate": 1.9513522236898828e-05, "loss": 0.0719, "step": 19089 }, { "epoch": 0.4206536768634969, "grad_norm": 0.7007019519805908, "learning_rate": 1.951250130389425e-05, "loss": 0.0782, "step": 19090 }, { "epoch": 0.420675712153013, "grad_norm": 0.5573181509971619, "learning_rate": 1.9511480347905078e-05, "loss": 0.0946, "step": 19091 }, { "epoch": 0.4206977474425292, "grad_norm": 0.7006693482398987, "learning_rate": 1.951045936893652e-05, "loss": 0.1017, "step": 19092 }, { "epoch": 0.42071978273204536, "grad_norm": 0.6838459372520447, "learning_rate": 1.9509438366993776e-05, "loss": 0.0904, "step": 19093 }, { "epoch": 0.4207418180215615, "grad_norm": 0.6420247554779053, "learning_rate": 1.950841734208204e-05, "loss": 0.0489, "step": 19094 }, { "epoch": 0.4207638533110777, "grad_norm": 0.7189649939537048, "learning_rate": 1.950739629420652e-05, "loss": 0.0936, "step": 19095 }, { "epoch": 0.42078588860059385, "grad_norm": 1.3231843709945679, "learning_rate": 1.9506375223372412e-05, "loss": 0.1661, "step": 19096 }, { "epoch": 0.42080792389011, "grad_norm": 0.8768414855003357, "learning_rate": 1.950535412958492e-05, "loss": 0.0776, "step": 19097 }, { "epoch": 0.4208299591796262, "grad_norm": 0.8361608386039734, "learning_rate": 1.950433301284924e-05, "loss": 0.1056, "step": 19098 }, { "epoch": 0.42085199446914234, "grad_norm": 1.5834521055221558, "learning_rate": 1.950331187317058e-05, "loss": 0.1167, "step": 19099 }, { "epoch": 0.4208740297586585, "grad_norm": 0.547773003578186, "learning_rate": 1.9502290710554134e-05, "loss": 0.0749, "step": 19100 }, { "epoch": 0.4208960650481747, "grad_norm": 0.7374962568283081, "learning_rate": 1.9501269525005113e-05, "loss": 0.1041, "step": 19101 }, { "epoch": 0.42091810033769084, "grad_norm": 0.4772791564464569, "learning_rate": 1.9500248316528706e-05, "loss": 0.0932, "step": 19102 }, { "epoch": 0.42094013562720695, "grad_norm": 0.46321606636047363, "learning_rate": 1.9499227085130127e-05, "loss": 0.1023, "step": 19103 }, { "epoch": 0.4209621709167231, "grad_norm": 1.3848456144332886, "learning_rate": 1.9498205830814566e-05, "loss": 0.1111, "step": 19104 }, { "epoch": 0.4209842062062393, "grad_norm": 0.6527562737464905, "learning_rate": 1.9497184553587236e-05, "loss": 0.1019, "step": 19105 }, { "epoch": 0.42100624149575544, "grad_norm": 0.8522407412528992, "learning_rate": 1.9496163253453332e-05, "loss": 0.0918, "step": 19106 }, { "epoch": 0.4210282767852716, "grad_norm": 0.6710302829742432, "learning_rate": 1.9495141930418057e-05, "loss": 0.0732, "step": 19107 }, { "epoch": 0.42105031207478777, "grad_norm": 0.8037934303283691, "learning_rate": 1.949412058448661e-05, "loss": 0.0732, "step": 19108 }, { "epoch": 0.42107234736430393, "grad_norm": 0.8924348950386047, "learning_rate": 1.9493099215664203e-05, "loss": 0.0766, "step": 19109 }, { "epoch": 0.4210943826538201, "grad_norm": 0.4351576566696167, "learning_rate": 1.9492077823956032e-05, "loss": 0.0561, "step": 19110 }, { "epoch": 0.42111641794333626, "grad_norm": 0.4347231686115265, "learning_rate": 1.9491056409367297e-05, "loss": 0.0577, "step": 19111 }, { "epoch": 0.4211384532328524, "grad_norm": 0.5783697366714478, "learning_rate": 1.9490034971903203e-05, "loss": 0.0829, "step": 19112 }, { "epoch": 0.4211604885223686, "grad_norm": 0.5956349968910217, "learning_rate": 1.9489013511568955e-05, "loss": 0.0815, "step": 19113 }, { "epoch": 0.42118252381188476, "grad_norm": 0.5404037833213806, "learning_rate": 1.948799202836975e-05, "loss": 0.0699, "step": 19114 }, { "epoch": 0.42120455910140087, "grad_norm": 0.7647721767425537, "learning_rate": 1.94869705223108e-05, "loss": 0.1014, "step": 19115 }, { "epoch": 0.42122659439091703, "grad_norm": 0.7794087529182434, "learning_rate": 1.9485948993397297e-05, "loss": 0.0999, "step": 19116 }, { "epoch": 0.4212486296804332, "grad_norm": 0.6300182342529297, "learning_rate": 1.948492744163446e-05, "loss": 0.0933, "step": 19117 }, { "epoch": 0.42127066496994936, "grad_norm": 0.4798794686794281, "learning_rate": 1.9483905867027477e-05, "loss": 0.0804, "step": 19118 }, { "epoch": 0.4212927002594655, "grad_norm": 0.865230917930603, "learning_rate": 1.9482884269581553e-05, "loss": 0.0738, "step": 19119 }, { "epoch": 0.4213147355489817, "grad_norm": 0.5843232870101929, "learning_rate": 1.9481862649301898e-05, "loss": 0.0739, "step": 19120 }, { "epoch": 0.42133677083849785, "grad_norm": 0.7780106663703918, "learning_rate": 1.948084100619372e-05, "loss": 0.1017, "step": 19121 }, { "epoch": 0.421358806128014, "grad_norm": 0.705619215965271, "learning_rate": 1.9479819340262203e-05, "loss": 0.0896, "step": 19122 }, { "epoch": 0.4213808414175302, "grad_norm": 0.6014420390129089, "learning_rate": 1.947879765151257e-05, "loss": 0.0432, "step": 19123 }, { "epoch": 0.42140287670704635, "grad_norm": 0.5261484384536743, "learning_rate": 1.9477775939950022e-05, "loss": 0.1052, "step": 19124 }, { "epoch": 0.4214249119965625, "grad_norm": 0.9081809520721436, "learning_rate": 1.9476754205579755e-05, "loss": 0.0686, "step": 19125 }, { "epoch": 0.4214469472860787, "grad_norm": 0.45028844475746155, "learning_rate": 1.9475732448406982e-05, "loss": 0.0915, "step": 19126 }, { "epoch": 0.42146898257559484, "grad_norm": 0.5403754711151123, "learning_rate": 1.9474710668436907e-05, "loss": 0.08, "step": 19127 }, { "epoch": 0.42149101786511095, "grad_norm": 0.7242993712425232, "learning_rate": 1.9473688865674726e-05, "loss": 0.0858, "step": 19128 }, { "epoch": 0.4215130531546271, "grad_norm": 0.5066205859184265, "learning_rate": 1.947266704012565e-05, "loss": 0.0696, "step": 19129 }, { "epoch": 0.4215350884441433, "grad_norm": 0.6078226566314697, "learning_rate": 1.947164519179488e-05, "loss": 0.0663, "step": 19130 }, { "epoch": 0.42155712373365944, "grad_norm": 0.6306192278862, "learning_rate": 1.947062332068762e-05, "loss": 0.1136, "step": 19131 }, { "epoch": 0.4215791590231756, "grad_norm": 0.4505317807197571, "learning_rate": 1.946960142680909e-05, "loss": 0.0653, "step": 19132 }, { "epoch": 0.4216011943126918, "grad_norm": 0.8224807381629944, "learning_rate": 1.946857951016447e-05, "loss": 0.0731, "step": 19133 }, { "epoch": 0.42162322960220794, "grad_norm": 0.6727166771888733, "learning_rate": 1.9467557570758988e-05, "loss": 0.0643, "step": 19134 }, { "epoch": 0.4216452648917241, "grad_norm": 0.6488553881645203, "learning_rate": 1.9466535608597834e-05, "loss": 0.0929, "step": 19135 }, { "epoch": 0.42166730018124027, "grad_norm": 0.8082767724990845, "learning_rate": 1.9465513623686223e-05, "loss": 0.1029, "step": 19136 }, { "epoch": 0.42168933547075643, "grad_norm": 1.253862977027893, "learning_rate": 1.9464491616029355e-05, "loss": 0.1013, "step": 19137 }, { "epoch": 0.4217113707602726, "grad_norm": 0.7421794533729553, "learning_rate": 1.9463469585632436e-05, "loss": 0.1256, "step": 19138 }, { "epoch": 0.42173340604978876, "grad_norm": 0.7981996536254883, "learning_rate": 1.9462447532500673e-05, "loss": 0.0893, "step": 19139 }, { "epoch": 0.42175544133930487, "grad_norm": 0.8068335056304932, "learning_rate": 1.9461425456639275e-05, "loss": 0.0834, "step": 19140 }, { "epoch": 0.42177747662882104, "grad_norm": 0.7777596116065979, "learning_rate": 1.946040335805344e-05, "loss": 0.0862, "step": 19141 }, { "epoch": 0.4217995119183372, "grad_norm": 0.6676574349403381, "learning_rate": 1.945938123674838e-05, "loss": 0.0631, "step": 19142 }, { "epoch": 0.42182154720785336, "grad_norm": 0.7728487253189087, "learning_rate": 1.9458359092729302e-05, "loss": 0.0893, "step": 19143 }, { "epoch": 0.42184358249736953, "grad_norm": 0.6118229627609253, "learning_rate": 1.9457336926001408e-05, "loss": 0.0544, "step": 19144 }, { "epoch": 0.4218656177868857, "grad_norm": 0.748377799987793, "learning_rate": 1.945631473656991e-05, "loss": 0.1198, "step": 19145 }, { "epoch": 0.42188765307640186, "grad_norm": 0.7483944296836853, "learning_rate": 1.9455292524440007e-05, "loss": 0.0806, "step": 19146 }, { "epoch": 0.421909688365918, "grad_norm": 0.6229264736175537, "learning_rate": 1.9454270289616915e-05, "loss": 0.0885, "step": 19147 }, { "epoch": 0.4219317236554342, "grad_norm": 0.9979203939437866, "learning_rate": 1.9453248032105832e-05, "loss": 0.0656, "step": 19148 }, { "epoch": 0.42195375894495035, "grad_norm": 0.6306220293045044, "learning_rate": 1.9452225751911972e-05, "loss": 0.0753, "step": 19149 }, { "epoch": 0.4219757942344665, "grad_norm": 0.7617928981781006, "learning_rate": 1.9451203449040533e-05, "loss": 0.0799, "step": 19150 }, { "epoch": 0.4219978295239827, "grad_norm": 0.7523384094238281, "learning_rate": 1.9450181123496736e-05, "loss": 0.0727, "step": 19151 }, { "epoch": 0.4220198648134988, "grad_norm": 0.9120492935180664, "learning_rate": 1.9449158775285772e-05, "loss": 0.0609, "step": 19152 }, { "epoch": 0.42204190010301496, "grad_norm": 0.9066243767738342, "learning_rate": 1.944813640441286e-05, "loss": 0.0889, "step": 19153 }, { "epoch": 0.4220639353925311, "grad_norm": 0.5864694714546204, "learning_rate": 1.9447114010883205e-05, "loss": 0.0919, "step": 19154 }, { "epoch": 0.4220859706820473, "grad_norm": 0.7002386450767517, "learning_rate": 1.9446091594702014e-05, "loss": 0.0965, "step": 19155 }, { "epoch": 0.42210800597156345, "grad_norm": 1.0839403867721558, "learning_rate": 1.9445069155874492e-05, "loss": 0.1119, "step": 19156 }, { "epoch": 0.4221300412610796, "grad_norm": 0.4877156913280487, "learning_rate": 1.944404669440585e-05, "loss": 0.0578, "step": 19157 }, { "epoch": 0.4221520765505958, "grad_norm": 0.5728464722633362, "learning_rate": 1.9443024210301302e-05, "loss": 0.0565, "step": 19158 }, { "epoch": 0.42217411184011194, "grad_norm": 0.5892117023468018, "learning_rate": 1.944200170356604e-05, "loss": 0.1111, "step": 19159 }, { "epoch": 0.4221961471296281, "grad_norm": 0.9372115731239319, "learning_rate": 1.9440979174205283e-05, "loss": 0.0965, "step": 19160 }, { "epoch": 0.4222181824191443, "grad_norm": 0.8641167283058167, "learning_rate": 1.943995662222424e-05, "loss": 0.0931, "step": 19161 }, { "epoch": 0.42224021770866044, "grad_norm": 0.7855311036109924, "learning_rate": 1.9438934047628115e-05, "loss": 0.0943, "step": 19162 }, { "epoch": 0.4222622529981766, "grad_norm": 0.46376264095306396, "learning_rate": 1.9437911450422122e-05, "loss": 0.095, "step": 19163 }, { "epoch": 0.42228428828769277, "grad_norm": 0.49134111404418945, "learning_rate": 1.943688883061146e-05, "loss": 0.0503, "step": 19164 }, { "epoch": 0.4223063235772089, "grad_norm": 0.9095204472541809, "learning_rate": 1.943586618820135e-05, "loss": 0.1131, "step": 19165 }, { "epoch": 0.42232835886672504, "grad_norm": 0.5951917767524719, "learning_rate": 1.9434843523196995e-05, "loss": 0.0522, "step": 19166 }, { "epoch": 0.4223503941562412, "grad_norm": 0.7514339685440063, "learning_rate": 1.9433820835603598e-05, "loss": 0.0976, "step": 19167 }, { "epoch": 0.42237242944575737, "grad_norm": 0.6403438448905945, "learning_rate": 1.9432798125426378e-05, "loss": 0.0855, "step": 19168 }, { "epoch": 0.42239446473527353, "grad_norm": 0.5326747298240662, "learning_rate": 1.9431775392670536e-05, "loss": 0.0829, "step": 19169 }, { "epoch": 0.4224165000247897, "grad_norm": 0.6801785826683044, "learning_rate": 1.943075263734129e-05, "loss": 0.0855, "step": 19170 }, { "epoch": 0.42243853531430586, "grad_norm": 0.9026721715927124, "learning_rate": 1.942972985944384e-05, "loss": 0.0969, "step": 19171 }, { "epoch": 0.422460570603822, "grad_norm": 1.0442545413970947, "learning_rate": 1.942870705898341e-05, "loss": 0.0848, "step": 19172 }, { "epoch": 0.4224826058933382, "grad_norm": 0.8547264337539673, "learning_rate": 1.942768423596519e-05, "loss": 0.0733, "step": 19173 }, { "epoch": 0.42250464118285436, "grad_norm": 0.9807504415512085, "learning_rate": 1.9426661390394404e-05, "loss": 0.1173, "step": 19174 }, { "epoch": 0.4225266764723705, "grad_norm": 0.8332765698432922, "learning_rate": 1.942563852227626e-05, "loss": 0.1099, "step": 19175 }, { "epoch": 0.4225487117618867, "grad_norm": 0.592697024345398, "learning_rate": 1.9424615631615963e-05, "loss": 0.0673, "step": 19176 }, { "epoch": 0.4225707470514028, "grad_norm": 0.5128248929977417, "learning_rate": 1.9423592718418726e-05, "loss": 0.0887, "step": 19177 }, { "epoch": 0.42259278234091896, "grad_norm": 0.7726108431816101, "learning_rate": 1.9422569782689758e-05, "loss": 0.0835, "step": 19178 }, { "epoch": 0.4226148176304351, "grad_norm": 0.5714388489723206, "learning_rate": 1.942154682443427e-05, "loss": 0.1091, "step": 19179 }, { "epoch": 0.4226368529199513, "grad_norm": 0.4809466004371643, "learning_rate": 1.9420523843657478e-05, "loss": 0.0516, "step": 19180 }, { "epoch": 0.42265888820946745, "grad_norm": 0.7510364651679993, "learning_rate": 1.941950084036458e-05, "loss": 0.095, "step": 19181 }, { "epoch": 0.4226809234989836, "grad_norm": 0.5968038439750671, "learning_rate": 1.9418477814560802e-05, "loss": 0.0673, "step": 19182 }, { "epoch": 0.4227029587884998, "grad_norm": 1.1395208835601807, "learning_rate": 1.9417454766251343e-05, "loss": 0.0955, "step": 19183 }, { "epoch": 0.42272499407801595, "grad_norm": 0.5035428404808044, "learning_rate": 1.941643169544142e-05, "loss": 0.0699, "step": 19184 }, { "epoch": 0.4227470293675321, "grad_norm": 0.27272939682006836, "learning_rate": 1.941540860213624e-05, "loss": 0.0617, "step": 19185 }, { "epoch": 0.4227690646570483, "grad_norm": 0.8322455286979675, "learning_rate": 1.941438548634102e-05, "loss": 0.0629, "step": 19186 }, { "epoch": 0.42279109994656444, "grad_norm": 0.458575576543808, "learning_rate": 1.9413362348060965e-05, "loss": 0.1023, "step": 19187 }, { "epoch": 0.4228131352360806, "grad_norm": 0.6931776404380798, "learning_rate": 1.9412339187301286e-05, "loss": 0.1065, "step": 19188 }, { "epoch": 0.42283517052559677, "grad_norm": 0.3651268780231476, "learning_rate": 1.94113160040672e-05, "loss": 0.0832, "step": 19189 }, { "epoch": 0.4228572058151129, "grad_norm": 0.6666631698608398, "learning_rate": 1.9410292798363916e-05, "loss": 0.052, "step": 19190 }, { "epoch": 0.42287924110462904, "grad_norm": 0.8014907836914062, "learning_rate": 1.940926957019665e-05, "loss": 0.0921, "step": 19191 }, { "epoch": 0.4229012763941452, "grad_norm": 0.9024266600608826, "learning_rate": 1.94082463195706e-05, "loss": 0.0955, "step": 19192 }, { "epoch": 0.4229233116836614, "grad_norm": 0.8823651671409607, "learning_rate": 1.9407223046490998e-05, "loss": 0.0731, "step": 19193 }, { "epoch": 0.42294534697317754, "grad_norm": 1.1975971460342407, "learning_rate": 1.9406199750963037e-05, "loss": 0.0917, "step": 19194 }, { "epoch": 0.4229673822626937, "grad_norm": 0.447442889213562, "learning_rate": 1.9405176432991946e-05, "loss": 0.0796, "step": 19195 }, { "epoch": 0.42298941755220987, "grad_norm": 0.5805772542953491, "learning_rate": 1.9404153092582918e-05, "loss": 0.0685, "step": 19196 }, { "epoch": 0.42301145284172603, "grad_norm": 0.6154714822769165, "learning_rate": 1.9403129729741188e-05, "loss": 0.069, "step": 19197 }, { "epoch": 0.4230334881312422, "grad_norm": 0.49260368943214417, "learning_rate": 1.9402106344471953e-05, "loss": 0.0716, "step": 19198 }, { "epoch": 0.42305552342075836, "grad_norm": 0.4847375154495239, "learning_rate": 1.9401082936780432e-05, "loss": 0.084, "step": 19199 }, { "epoch": 0.4230775587102745, "grad_norm": 0.6905861496925354, "learning_rate": 1.940005950667183e-05, "loss": 0.0734, "step": 19200 }, { "epoch": 0.4230995939997907, "grad_norm": 0.7033131718635559, "learning_rate": 1.939903605415137e-05, "loss": 0.0926, "step": 19201 }, { "epoch": 0.4231216292893068, "grad_norm": 0.9610552787780762, "learning_rate": 1.939801257922426e-05, "loss": 0.1034, "step": 19202 }, { "epoch": 0.42314366457882296, "grad_norm": 0.8423336744308472, "learning_rate": 1.9396989081895713e-05, "loss": 0.0988, "step": 19203 }, { "epoch": 0.42316569986833913, "grad_norm": 0.687811017036438, "learning_rate": 1.9395965562170942e-05, "loss": 0.0898, "step": 19204 }, { "epoch": 0.4231877351578553, "grad_norm": 0.6578058004379272, "learning_rate": 1.9394942020055156e-05, "loss": 0.085, "step": 19205 }, { "epoch": 0.42320977044737146, "grad_norm": 0.8457533121109009, "learning_rate": 1.9393918455553584e-05, "loss": 0.1088, "step": 19206 }, { "epoch": 0.4232318057368876, "grad_norm": 0.6606312394142151, "learning_rate": 1.9392894868671426e-05, "loss": 0.1351, "step": 19207 }, { "epoch": 0.4232538410264038, "grad_norm": 0.6813065409660339, "learning_rate": 1.9391871259413893e-05, "loss": 0.0853, "step": 19208 }, { "epoch": 0.42327587631591995, "grad_norm": 0.8398933410644531, "learning_rate": 1.939084762778621e-05, "loss": 0.0949, "step": 19209 }, { "epoch": 0.4232979116054361, "grad_norm": 0.8597308993339539, "learning_rate": 1.938982397379358e-05, "loss": 0.0885, "step": 19210 }, { "epoch": 0.4233199468949523, "grad_norm": 0.6306807994842529, "learning_rate": 1.9388800297441227e-05, "loss": 0.0951, "step": 19211 }, { "epoch": 0.42334198218446845, "grad_norm": 0.400441974401474, "learning_rate": 1.938777659873436e-05, "loss": 0.0543, "step": 19212 }, { "epoch": 0.4233640174739846, "grad_norm": 0.6024095416069031, "learning_rate": 1.9386752877678197e-05, "loss": 0.0609, "step": 19213 }, { "epoch": 0.4233860527635007, "grad_norm": 0.5896760821342468, "learning_rate": 1.9385729134277947e-05, "loss": 0.0703, "step": 19214 }, { "epoch": 0.4234080880530169, "grad_norm": 0.8972271680831909, "learning_rate": 1.9384705368538822e-05, "loss": 0.0958, "step": 19215 }, { "epoch": 0.42343012334253305, "grad_norm": 1.204798936843872, "learning_rate": 1.9383681580466048e-05, "loss": 0.0561, "step": 19216 }, { "epoch": 0.4234521586320492, "grad_norm": 0.49599239230155945, "learning_rate": 1.9382657770064828e-05, "loss": 0.0459, "step": 19217 }, { "epoch": 0.4234741939215654, "grad_norm": 0.9545397162437439, "learning_rate": 1.9381633937340386e-05, "loss": 0.0835, "step": 19218 }, { "epoch": 0.42349622921108154, "grad_norm": 1.009229302406311, "learning_rate": 1.9380610082297927e-05, "loss": 0.0873, "step": 19219 }, { "epoch": 0.4235182645005977, "grad_norm": 0.6072333455085754, "learning_rate": 1.937958620494268e-05, "loss": 0.0616, "step": 19220 }, { "epoch": 0.42354029979011387, "grad_norm": 0.9667598009109497, "learning_rate": 1.9378562305279846e-05, "loss": 0.1037, "step": 19221 }, { "epoch": 0.42356233507963004, "grad_norm": 0.5197320580482483, "learning_rate": 1.937753838331465e-05, "loss": 0.0836, "step": 19222 }, { "epoch": 0.4235843703691462, "grad_norm": 0.8675599098205566, "learning_rate": 1.93765144390523e-05, "loss": 0.1116, "step": 19223 }, { "epoch": 0.42360640565866237, "grad_norm": 0.4930175840854645, "learning_rate": 1.9375490472498014e-05, "loss": 0.0894, "step": 19224 }, { "epoch": 0.42362844094817853, "grad_norm": 0.6921042203903198, "learning_rate": 1.937446648365701e-05, "loss": 0.0727, "step": 19225 }, { "epoch": 0.4236504762376947, "grad_norm": 0.543755054473877, "learning_rate": 1.9373442472534506e-05, "loss": 0.0697, "step": 19226 }, { "epoch": 0.4236725115272108, "grad_norm": 0.8318962454795837, "learning_rate": 1.9372418439135707e-05, "loss": 0.0792, "step": 19227 }, { "epoch": 0.42369454681672697, "grad_norm": 0.8621059060096741, "learning_rate": 1.9371394383465845e-05, "loss": 0.0687, "step": 19228 }, { "epoch": 0.42371658210624313, "grad_norm": 0.9355663657188416, "learning_rate": 1.9370370305530122e-05, "loss": 0.0998, "step": 19229 }, { "epoch": 0.4237386173957593, "grad_norm": 0.4316185712814331, "learning_rate": 1.9369346205333763e-05, "loss": 0.0478, "step": 19230 }, { "epoch": 0.42376065268527546, "grad_norm": 0.6427627801895142, "learning_rate": 1.9368322082881976e-05, "loss": 0.056, "step": 19231 }, { "epoch": 0.4237826879747916, "grad_norm": 0.7091750502586365, "learning_rate": 1.9367297938179986e-05, "loss": 0.0729, "step": 19232 }, { "epoch": 0.4238047232643078, "grad_norm": 0.6472374200820923, "learning_rate": 1.9366273771233003e-05, "loss": 0.0755, "step": 19233 }, { "epoch": 0.42382675855382396, "grad_norm": 1.088453769683838, "learning_rate": 1.936524958204624e-05, "loss": 0.1301, "step": 19234 }, { "epoch": 0.4238487938433401, "grad_norm": 0.872833788394928, "learning_rate": 1.9364225370624933e-05, "loss": 0.103, "step": 19235 }, { "epoch": 0.4238708291328563, "grad_norm": 0.5934451222419739, "learning_rate": 1.9363201136974278e-05, "loss": 0.0953, "step": 19236 }, { "epoch": 0.42389286442237245, "grad_norm": 1.1043354272842407, "learning_rate": 1.9362176881099505e-05, "loss": 0.106, "step": 19237 }, { "epoch": 0.4239148997118886, "grad_norm": 0.5146858096122742, "learning_rate": 1.936115260300582e-05, "loss": 0.076, "step": 19238 }, { "epoch": 0.4239369350014047, "grad_norm": 0.6008111238479614, "learning_rate": 1.9360128302698453e-05, "loss": 0.0917, "step": 19239 }, { "epoch": 0.4239589702909209, "grad_norm": 0.7236118316650391, "learning_rate": 1.935910398018261e-05, "loss": 0.0901, "step": 19240 }, { "epoch": 0.42398100558043705, "grad_norm": 0.6766867637634277, "learning_rate": 1.9358079635463514e-05, "loss": 0.09, "step": 19241 }, { "epoch": 0.4240030408699532, "grad_norm": 0.7220675945281982, "learning_rate": 1.935705526854638e-05, "loss": 0.0964, "step": 19242 }, { "epoch": 0.4240250761594694, "grad_norm": 0.49508988857269287, "learning_rate": 1.9356030879436428e-05, "loss": 0.0835, "step": 19243 }, { "epoch": 0.42404711144898555, "grad_norm": 0.8887604475021362, "learning_rate": 1.9355006468138874e-05, "loss": 0.0726, "step": 19244 }, { "epoch": 0.4240691467385017, "grad_norm": 0.6537437438964844, "learning_rate": 1.935398203465894e-05, "loss": 0.0667, "step": 19245 }, { "epoch": 0.4240911820280179, "grad_norm": 0.9146932363510132, "learning_rate": 1.935295757900184e-05, "loss": 0.1069, "step": 19246 }, { "epoch": 0.42411321731753404, "grad_norm": 0.8296297788619995, "learning_rate": 1.935193310117279e-05, "loss": 0.0755, "step": 19247 }, { "epoch": 0.4241352526070502, "grad_norm": 0.9997521042823792, "learning_rate": 1.935090860117701e-05, "loss": 0.068, "step": 19248 }, { "epoch": 0.42415728789656637, "grad_norm": 0.9577459096908569, "learning_rate": 1.9349884079019726e-05, "loss": 0.1116, "step": 19249 }, { "epoch": 0.42417932318608254, "grad_norm": 0.8338272571563721, "learning_rate": 1.934885953470614e-05, "loss": 0.0954, "step": 19250 }, { "epoch": 0.42420135847559864, "grad_norm": 1.0637720823287964, "learning_rate": 1.934783496824149e-05, "loss": 0.093, "step": 19251 }, { "epoch": 0.4242233937651148, "grad_norm": 0.7250145673751831, "learning_rate": 1.9346810379630978e-05, "loss": 0.0937, "step": 19252 }, { "epoch": 0.424245429054631, "grad_norm": 0.49037179350852966, "learning_rate": 1.9345785768879836e-05, "loss": 0.0754, "step": 19253 }, { "epoch": 0.42426746434414714, "grad_norm": 0.8497461676597595, "learning_rate": 1.9344761135993276e-05, "loss": 0.0634, "step": 19254 }, { "epoch": 0.4242894996336633, "grad_norm": 0.6964835524559021, "learning_rate": 1.9343736480976513e-05, "loss": 0.0769, "step": 19255 }, { "epoch": 0.42431153492317947, "grad_norm": 0.3934862017631531, "learning_rate": 1.934271180383477e-05, "loss": 0.0801, "step": 19256 }, { "epoch": 0.42433357021269563, "grad_norm": 0.9566774964332581, "learning_rate": 1.9341687104573267e-05, "loss": 0.1154, "step": 19257 }, { "epoch": 0.4243556055022118, "grad_norm": 0.5748909711837769, "learning_rate": 1.9340662383197226e-05, "loss": 0.0759, "step": 19258 }, { "epoch": 0.42437764079172796, "grad_norm": 0.8517153263092041, "learning_rate": 1.9339637639711867e-05, "loss": 0.1051, "step": 19259 }, { "epoch": 0.4243996760812441, "grad_norm": 0.459555059671402, "learning_rate": 1.93386128741224e-05, "loss": 0.0461, "step": 19260 }, { "epoch": 0.4244217113707603, "grad_norm": 0.7370684742927551, "learning_rate": 1.933758808643406e-05, "loss": 0.0845, "step": 19261 }, { "epoch": 0.42444374666027646, "grad_norm": 0.9157721400260925, "learning_rate": 1.933656327665205e-05, "loss": 0.0673, "step": 19262 }, { "epoch": 0.4244657819497926, "grad_norm": 0.620225191116333, "learning_rate": 1.93355384447816e-05, "loss": 0.0551, "step": 19263 }, { "epoch": 0.42448781723930873, "grad_norm": 0.5025067329406738, "learning_rate": 1.933451359082793e-05, "loss": 0.0552, "step": 19264 }, { "epoch": 0.4245098525288249, "grad_norm": 1.2711126804351807, "learning_rate": 1.9333488714796255e-05, "loss": 0.0793, "step": 19265 }, { "epoch": 0.42453188781834106, "grad_norm": 0.8928893208503723, "learning_rate": 1.93324638166918e-05, "loss": 0.0831, "step": 19266 }, { "epoch": 0.4245539231078572, "grad_norm": 0.7373504638671875, "learning_rate": 1.9331438896519776e-05, "loss": 0.0663, "step": 19267 }, { "epoch": 0.4245759583973734, "grad_norm": 0.6062486171722412, "learning_rate": 1.9330413954285424e-05, "loss": 0.0835, "step": 19268 }, { "epoch": 0.42459799368688955, "grad_norm": 0.9815699458122253, "learning_rate": 1.9329388989993947e-05, "loss": 0.0949, "step": 19269 }, { "epoch": 0.4246200289764057, "grad_norm": 0.7760957479476929, "learning_rate": 1.9328364003650567e-05, "loss": 0.0746, "step": 19270 }, { "epoch": 0.4246420642659219, "grad_norm": 0.6180217862129211, "learning_rate": 1.9327338995260506e-05, "loss": 0.0898, "step": 19271 }, { "epoch": 0.42466409955543805, "grad_norm": 0.9370294213294983, "learning_rate": 1.9326313964828993e-05, "loss": 0.0699, "step": 19272 }, { "epoch": 0.4246861348449542, "grad_norm": 0.8012592196464539, "learning_rate": 1.9325288912361237e-05, "loss": 0.0982, "step": 19273 }, { "epoch": 0.4247081701344704, "grad_norm": 0.7488945722579956, "learning_rate": 1.932426383786247e-05, "loss": 0.0624, "step": 19274 }, { "epoch": 0.42473020542398654, "grad_norm": 0.922731339931488, "learning_rate": 1.9323238741337908e-05, "loss": 0.0927, "step": 19275 }, { "epoch": 0.42475224071350265, "grad_norm": 0.5730782151222229, "learning_rate": 1.9322213622792774e-05, "loss": 0.0801, "step": 19276 }, { "epoch": 0.4247742760030188, "grad_norm": 0.886668860912323, "learning_rate": 1.9321188482232287e-05, "loss": 0.0703, "step": 19277 }, { "epoch": 0.424796311292535, "grad_norm": 0.7315869331359863, "learning_rate": 1.9320163319661666e-05, "loss": 0.0752, "step": 19278 }, { "epoch": 0.42481834658205114, "grad_norm": 0.4752531051635742, "learning_rate": 1.931913813508614e-05, "loss": 0.0781, "step": 19279 }, { "epoch": 0.4248403818715673, "grad_norm": 0.9965113401412964, "learning_rate": 1.9318112928510927e-05, "loss": 0.1121, "step": 19280 }, { "epoch": 0.42486241716108347, "grad_norm": 0.54923415184021, "learning_rate": 1.931708769994125e-05, "loss": 0.0759, "step": 19281 }, { "epoch": 0.42488445245059964, "grad_norm": 0.5569338202476501, "learning_rate": 1.9316062449382328e-05, "loss": 0.0779, "step": 19282 }, { "epoch": 0.4249064877401158, "grad_norm": 0.42319998145103455, "learning_rate": 1.931503717683939e-05, "loss": 0.0929, "step": 19283 }, { "epoch": 0.42492852302963197, "grad_norm": 0.8862945437431335, "learning_rate": 1.9314011882317648e-05, "loss": 0.0685, "step": 19284 }, { "epoch": 0.42495055831914813, "grad_norm": 0.6786395907402039, "learning_rate": 1.9312986565822333e-05, "loss": 0.0575, "step": 19285 }, { "epoch": 0.4249725936086643, "grad_norm": 1.3382571935653687, "learning_rate": 1.9311961227358667e-05, "loss": 0.1258, "step": 19286 }, { "epoch": 0.42499462889818046, "grad_norm": 0.6313140392303467, "learning_rate": 1.931093586693187e-05, "loss": 0.0656, "step": 19287 }, { "epoch": 0.42501666418769657, "grad_norm": 0.7816596031188965, "learning_rate": 1.930991048454716e-05, "loss": 0.0816, "step": 19288 }, { "epoch": 0.42503869947721273, "grad_norm": 0.5664879083633423, "learning_rate": 1.9308885080209768e-05, "loss": 0.0597, "step": 19289 }, { "epoch": 0.4250607347667289, "grad_norm": 0.8940879106521606, "learning_rate": 1.9307859653924912e-05, "loss": 0.1314, "step": 19290 }, { "epoch": 0.42508277005624506, "grad_norm": 1.0220253467559814, "learning_rate": 1.930683420569782e-05, "loss": 0.0817, "step": 19291 }, { "epoch": 0.4251048053457612, "grad_norm": 0.6194120049476624, "learning_rate": 1.9305808735533707e-05, "loss": 0.0822, "step": 19292 }, { "epoch": 0.4251268406352774, "grad_norm": 0.7507917881011963, "learning_rate": 1.9304783243437803e-05, "loss": 0.1133, "step": 19293 }, { "epoch": 0.42514887592479356, "grad_norm": 0.44099971652030945, "learning_rate": 1.9303757729415333e-05, "loss": 0.0849, "step": 19294 }, { "epoch": 0.4251709112143097, "grad_norm": 0.5677380561828613, "learning_rate": 1.9302732193471515e-05, "loss": 0.0563, "step": 19295 }, { "epoch": 0.4251929465038259, "grad_norm": 0.8597143888473511, "learning_rate": 1.930170663561157e-05, "loss": 0.1156, "step": 19296 }, { "epoch": 0.42521498179334205, "grad_norm": 0.5940836071968079, "learning_rate": 1.9300681055840733e-05, "loss": 0.0882, "step": 19297 }, { "epoch": 0.4252370170828582, "grad_norm": 0.8124358654022217, "learning_rate": 1.9299655454164217e-05, "loss": 0.0886, "step": 19298 }, { "epoch": 0.4252590523723744, "grad_norm": 0.6724879741668701, "learning_rate": 1.9298629830587255e-05, "loss": 0.1161, "step": 19299 }, { "epoch": 0.42528108766189054, "grad_norm": 0.6570402979850769, "learning_rate": 1.929760418511506e-05, "loss": 0.0973, "step": 19300 }, { "epoch": 0.42530312295140665, "grad_norm": 0.4355489909648895, "learning_rate": 1.9296578517752867e-05, "loss": 0.0611, "step": 19301 }, { "epoch": 0.4253251582409228, "grad_norm": 0.9744718074798584, "learning_rate": 1.9295552828505895e-05, "loss": 0.1215, "step": 19302 }, { "epoch": 0.425347193530439, "grad_norm": 0.86829674243927, "learning_rate": 1.9294527117379366e-05, "loss": 0.0878, "step": 19303 }, { "epoch": 0.42536922881995515, "grad_norm": 0.5083625912666321, "learning_rate": 1.929350138437851e-05, "loss": 0.0616, "step": 19304 }, { "epoch": 0.4253912641094713, "grad_norm": 0.7060609459877014, "learning_rate": 1.9292475629508548e-05, "loss": 0.0929, "step": 19305 }, { "epoch": 0.4254132993989875, "grad_norm": 0.7253303527832031, "learning_rate": 1.9291449852774706e-05, "loss": 0.0639, "step": 19306 }, { "epoch": 0.42543533468850364, "grad_norm": 0.5455753803253174, "learning_rate": 1.9290424054182207e-05, "loss": 0.0776, "step": 19307 }, { "epoch": 0.4254573699780198, "grad_norm": 0.7723244428634644, "learning_rate": 1.9289398233736282e-05, "loss": 0.0861, "step": 19308 }, { "epoch": 0.42547940526753597, "grad_norm": 0.575023889541626, "learning_rate": 1.928837239144215e-05, "loss": 0.0861, "step": 19309 }, { "epoch": 0.42550144055705214, "grad_norm": 0.5734806060791016, "learning_rate": 1.928734652730504e-05, "loss": 0.059, "step": 19310 }, { "epoch": 0.4255234758465683, "grad_norm": 0.843204915523529, "learning_rate": 1.928632064133017e-05, "loss": 0.1086, "step": 19311 }, { "epoch": 0.42554551113608446, "grad_norm": 0.41446053981781006, "learning_rate": 1.928529473352277e-05, "loss": 0.0793, "step": 19312 }, { "epoch": 0.4255675464256006, "grad_norm": 2.180021286010742, "learning_rate": 1.9284268803888067e-05, "loss": 0.0985, "step": 19313 }, { "epoch": 0.42558958171511674, "grad_norm": 0.7112267017364502, "learning_rate": 1.9283242852431285e-05, "loss": 0.0845, "step": 19314 }, { "epoch": 0.4256116170046329, "grad_norm": 0.7757961750030518, "learning_rate": 1.9282216879157655e-05, "loss": 0.1126, "step": 19315 }, { "epoch": 0.42563365229414907, "grad_norm": 0.7532503604888916, "learning_rate": 1.9281190884072394e-05, "loss": 0.0738, "step": 19316 }, { "epoch": 0.42565568758366523, "grad_norm": 1.512675166130066, "learning_rate": 1.9280164867180738e-05, "loss": 0.0795, "step": 19317 }, { "epoch": 0.4256777228731814, "grad_norm": 0.7244101762771606, "learning_rate": 1.92791388284879e-05, "loss": 0.0732, "step": 19318 }, { "epoch": 0.42569975816269756, "grad_norm": 0.9222845435142517, "learning_rate": 1.9278112767999116e-05, "loss": 0.1285, "step": 19319 }, { "epoch": 0.4257217934522137, "grad_norm": 1.0363150835037231, "learning_rate": 1.9277086685719612e-05, "loss": 0.0887, "step": 19320 }, { "epoch": 0.4257438287417299, "grad_norm": 0.8746337294578552, "learning_rate": 1.9276060581654605e-05, "loss": 0.0796, "step": 19321 }, { "epoch": 0.42576586403124606, "grad_norm": 0.4815629720687866, "learning_rate": 1.9275034455809334e-05, "loss": 0.075, "step": 19322 }, { "epoch": 0.4257878993207622, "grad_norm": 0.7537646889686584, "learning_rate": 1.927400830818902e-05, "loss": 0.0834, "step": 19323 }, { "epoch": 0.4258099346102784, "grad_norm": 0.6408596038818359, "learning_rate": 1.927298213879889e-05, "loss": 0.0648, "step": 19324 }, { "epoch": 0.4258319698997945, "grad_norm": 0.8619550466537476, "learning_rate": 1.9271955947644166e-05, "loss": 0.1212, "step": 19325 }, { "epoch": 0.42585400518931066, "grad_norm": 0.4461231529712677, "learning_rate": 1.9270929734730085e-05, "loss": 0.0818, "step": 19326 }, { "epoch": 0.4258760404788268, "grad_norm": 0.6141111254692078, "learning_rate": 1.9269903500061867e-05, "loss": 0.0597, "step": 19327 }, { "epoch": 0.425898075768343, "grad_norm": 0.6663206815719604, "learning_rate": 1.9268877243644736e-05, "loss": 0.0624, "step": 19328 }, { "epoch": 0.42592011105785915, "grad_norm": 0.8312159180641174, "learning_rate": 1.9267850965483927e-05, "loss": 0.0581, "step": 19329 }, { "epoch": 0.4259421463473753, "grad_norm": 0.8044581413269043, "learning_rate": 1.9266824665584665e-05, "loss": 0.0941, "step": 19330 }, { "epoch": 0.4259641816368915, "grad_norm": 1.0105310678482056, "learning_rate": 1.926579834395218e-05, "loss": 0.0645, "step": 19331 }, { "epoch": 0.42598621692640765, "grad_norm": 0.8628654479980469, "learning_rate": 1.9264772000591696e-05, "loss": 0.0765, "step": 19332 }, { "epoch": 0.4260082522159238, "grad_norm": 0.9218652844429016, "learning_rate": 1.926374563550844e-05, "loss": 0.0942, "step": 19333 }, { "epoch": 0.42603028750544, "grad_norm": 0.757799506187439, "learning_rate": 1.926271924870764e-05, "loss": 0.0912, "step": 19334 }, { "epoch": 0.42605232279495614, "grad_norm": 0.6359637379646301, "learning_rate": 1.9261692840194525e-05, "loss": 0.0765, "step": 19335 }, { "epoch": 0.4260743580844723, "grad_norm": 0.9359044432640076, "learning_rate": 1.926066640997432e-05, "loss": 0.0916, "step": 19336 }, { "epoch": 0.42609639337398847, "grad_norm": 0.8544549942016602, "learning_rate": 1.9259639958052266e-05, "loss": 0.0993, "step": 19337 }, { "epoch": 0.4261184286635046, "grad_norm": 0.6647583842277527, "learning_rate": 1.925861348443357e-05, "loss": 0.0592, "step": 19338 }, { "epoch": 0.42614046395302074, "grad_norm": 0.701265811920166, "learning_rate": 1.9257586989123478e-05, "loss": 0.0702, "step": 19339 }, { "epoch": 0.4261624992425369, "grad_norm": 0.4821349084377289, "learning_rate": 1.9256560472127212e-05, "loss": 0.0796, "step": 19340 }, { "epoch": 0.42618453453205307, "grad_norm": 1.2077608108520508, "learning_rate": 1.9255533933450006e-05, "loss": 0.0958, "step": 19341 }, { "epoch": 0.42620656982156924, "grad_norm": 0.4393267035484314, "learning_rate": 1.9254507373097074e-05, "loss": 0.052, "step": 19342 }, { "epoch": 0.4262286051110854, "grad_norm": 0.3910575807094574, "learning_rate": 1.9253480791073658e-05, "loss": 0.1152, "step": 19343 }, { "epoch": 0.42625064040060157, "grad_norm": 0.5990947484970093, "learning_rate": 1.9252454187384982e-05, "loss": 0.0896, "step": 19344 }, { "epoch": 0.42627267569011773, "grad_norm": 0.8976319432258606, "learning_rate": 1.925142756203628e-05, "loss": 0.0953, "step": 19345 }, { "epoch": 0.4262947109796339, "grad_norm": 0.6106569170951843, "learning_rate": 1.9250400915032777e-05, "loss": 0.0921, "step": 19346 }, { "epoch": 0.42631674626915006, "grad_norm": 0.5677838325500488, "learning_rate": 1.9249374246379697e-05, "loss": 0.0586, "step": 19347 }, { "epoch": 0.4263387815586662, "grad_norm": 0.9553132653236389, "learning_rate": 1.9248347556082285e-05, "loss": 0.0866, "step": 19348 }, { "epoch": 0.4263608168481824, "grad_norm": 0.7004659175872803, "learning_rate": 1.9247320844145757e-05, "loss": 0.0526, "step": 19349 }, { "epoch": 0.4263828521376985, "grad_norm": 0.6186507344245911, "learning_rate": 1.9246294110575346e-05, "loss": 0.0849, "step": 19350 }, { "epoch": 0.42640488742721466, "grad_norm": 0.8733474612236023, "learning_rate": 1.924526735537628e-05, "loss": 0.0614, "step": 19351 }, { "epoch": 0.4264269227167308, "grad_norm": 0.6424747109413147, "learning_rate": 1.9244240578553793e-05, "loss": 0.1132, "step": 19352 }, { "epoch": 0.426448958006247, "grad_norm": 0.6439513564109802, "learning_rate": 1.924321378011311e-05, "loss": 0.1081, "step": 19353 }, { "epoch": 0.42647099329576316, "grad_norm": 0.8812785148620605, "learning_rate": 1.9242186960059467e-05, "loss": 0.0709, "step": 19354 }, { "epoch": 0.4264930285852793, "grad_norm": 0.6771366596221924, "learning_rate": 1.924116011839809e-05, "loss": 0.0543, "step": 19355 }, { "epoch": 0.4265150638747955, "grad_norm": 0.9455088973045349, "learning_rate": 1.924013325513421e-05, "loss": 0.0855, "step": 19356 }, { "epoch": 0.42653709916431165, "grad_norm": 0.5920175909996033, "learning_rate": 1.923910637027306e-05, "loss": 0.0891, "step": 19357 }, { "epoch": 0.4265591344538278, "grad_norm": 0.42464497685432434, "learning_rate": 1.9238079463819867e-05, "loss": 0.0961, "step": 19358 }, { "epoch": 0.426581169743344, "grad_norm": 0.5350375771522522, "learning_rate": 1.923705253577986e-05, "loss": 0.0524, "step": 19359 }, { "epoch": 0.42660320503286014, "grad_norm": 0.7804339528083801, "learning_rate": 1.9236025586158277e-05, "loss": 0.0724, "step": 19360 }, { "epoch": 0.4266252403223763, "grad_norm": 0.8400323987007141, "learning_rate": 1.923499861496034e-05, "loss": 0.0872, "step": 19361 }, { "epoch": 0.4266472756118924, "grad_norm": 0.7753351926803589, "learning_rate": 1.9233971622191284e-05, "loss": 0.0862, "step": 19362 }, { "epoch": 0.4266693109014086, "grad_norm": 0.7051174640655518, "learning_rate": 1.923294460785634e-05, "loss": 0.086, "step": 19363 }, { "epoch": 0.42669134619092475, "grad_norm": 0.415903240442276, "learning_rate": 1.923191757196074e-05, "loss": 0.088, "step": 19364 }, { "epoch": 0.4267133814804409, "grad_norm": 0.5587881803512573, "learning_rate": 1.9230890514509718e-05, "loss": 0.0729, "step": 19365 }, { "epoch": 0.4267354167699571, "grad_norm": 0.7899201512336731, "learning_rate": 1.9229863435508498e-05, "loss": 0.0931, "step": 19366 }, { "epoch": 0.42675745205947324, "grad_norm": 0.3854272961616516, "learning_rate": 1.9228836334962316e-05, "loss": 0.0768, "step": 19367 }, { "epoch": 0.4267794873489894, "grad_norm": 0.5921732187271118, "learning_rate": 1.92278092128764e-05, "loss": 0.088, "step": 19368 }, { "epoch": 0.42680152263850557, "grad_norm": 0.7136440277099609, "learning_rate": 1.9226782069255986e-05, "loss": 0.0876, "step": 19369 }, { "epoch": 0.42682355792802174, "grad_norm": 0.8847838044166565, "learning_rate": 1.9225754904106307e-05, "loss": 0.1131, "step": 19370 }, { "epoch": 0.4268455932175379, "grad_norm": 0.9151880741119385, "learning_rate": 1.922472771743259e-05, "loss": 0.0975, "step": 19371 }, { "epoch": 0.42686762850705406, "grad_norm": 0.8074215054512024, "learning_rate": 1.9223700509240074e-05, "loss": 0.0607, "step": 19372 }, { "epoch": 0.42688966379657023, "grad_norm": 0.7082821726799011, "learning_rate": 1.9222673279533982e-05, "loss": 0.0753, "step": 19373 }, { "epoch": 0.4269116990860864, "grad_norm": 0.6849303841590881, "learning_rate": 1.922164602831955e-05, "loss": 0.0935, "step": 19374 }, { "epoch": 0.4269337343756025, "grad_norm": 1.0278502702713013, "learning_rate": 1.922061875560201e-05, "loss": 0.1009, "step": 19375 }, { "epoch": 0.42695576966511867, "grad_norm": 0.7413655519485474, "learning_rate": 1.9219591461386595e-05, "loss": 0.0946, "step": 19376 }, { "epoch": 0.42697780495463483, "grad_norm": 0.6810984015464783, "learning_rate": 1.9218564145678544e-05, "loss": 0.0821, "step": 19377 }, { "epoch": 0.426999840244151, "grad_norm": 0.6694087982177734, "learning_rate": 1.921753680848308e-05, "loss": 0.1001, "step": 19378 }, { "epoch": 0.42702187553366716, "grad_norm": 0.6789119243621826, "learning_rate": 1.921650944980544e-05, "loss": 0.0836, "step": 19379 }, { "epoch": 0.4270439108231833, "grad_norm": 0.4724894165992737, "learning_rate": 1.921548206965086e-05, "loss": 0.0603, "step": 19380 }, { "epoch": 0.4270659461126995, "grad_norm": 0.6376519799232483, "learning_rate": 1.9214454668024566e-05, "loss": 0.0684, "step": 19381 }, { "epoch": 0.42708798140221566, "grad_norm": 0.5114627480506897, "learning_rate": 1.921342724493179e-05, "loss": 0.0684, "step": 19382 }, { "epoch": 0.4271100166917318, "grad_norm": 0.5233885049819946, "learning_rate": 1.9212399800377773e-05, "loss": 0.0918, "step": 19383 }, { "epoch": 0.427132051981248, "grad_norm": 0.8345179557800293, "learning_rate": 1.9211372334367744e-05, "loss": 0.0945, "step": 19384 }, { "epoch": 0.42715408727076415, "grad_norm": 0.48998549580574036, "learning_rate": 1.921034484690694e-05, "loss": 0.1006, "step": 19385 }, { "epoch": 0.4271761225602803, "grad_norm": 0.553484320640564, "learning_rate": 1.920931733800059e-05, "loss": 0.0751, "step": 19386 }, { "epoch": 0.4271981578497964, "grad_norm": 0.6302621960639954, "learning_rate": 1.9208289807653932e-05, "loss": 0.0492, "step": 19387 }, { "epoch": 0.4272201931393126, "grad_norm": 0.5759218335151672, "learning_rate": 1.9207262255872195e-05, "loss": 0.1097, "step": 19388 }, { "epoch": 0.42724222842882875, "grad_norm": 0.5196929574012756, "learning_rate": 1.9206234682660616e-05, "loss": 0.0973, "step": 19389 }, { "epoch": 0.4272642637183449, "grad_norm": 0.8749048709869385, "learning_rate": 1.9205207088024426e-05, "loss": 0.0766, "step": 19390 }, { "epoch": 0.4272862990078611, "grad_norm": 0.9257980585098267, "learning_rate": 1.9204179471968863e-05, "loss": 0.0655, "step": 19391 }, { "epoch": 0.42730833429737725, "grad_norm": 0.5804263353347778, "learning_rate": 1.920315183449916e-05, "loss": 0.0693, "step": 19392 }, { "epoch": 0.4273303695868934, "grad_norm": 0.7697457075119019, "learning_rate": 1.9202124175620548e-05, "loss": 0.0697, "step": 19393 }, { "epoch": 0.4273524048764096, "grad_norm": 0.8484922051429749, "learning_rate": 1.9201096495338268e-05, "loss": 0.1075, "step": 19394 }, { "epoch": 0.42737444016592574, "grad_norm": 0.7473506331443787, "learning_rate": 1.9200068793657547e-05, "loss": 0.0625, "step": 19395 }, { "epoch": 0.4273964754554419, "grad_norm": 0.6287477612495422, "learning_rate": 1.9199041070583624e-05, "loss": 0.086, "step": 19396 }, { "epoch": 0.42741851074495807, "grad_norm": 0.6004077792167664, "learning_rate": 1.9198013326121736e-05, "loss": 0.0661, "step": 19397 }, { "epoch": 0.42744054603447423, "grad_norm": 0.3334251046180725, "learning_rate": 1.9196985560277117e-05, "loss": 0.0739, "step": 19398 }, { "epoch": 0.4274625813239904, "grad_norm": 0.8508285880088806, "learning_rate": 1.9195957773054997e-05, "loss": 0.1068, "step": 19399 }, { "epoch": 0.4274846166135065, "grad_norm": 0.6289334297180176, "learning_rate": 1.9194929964460612e-05, "loss": 0.082, "step": 19400 }, { "epoch": 0.42750665190302267, "grad_norm": 0.7018419504165649, "learning_rate": 1.91939021344992e-05, "loss": 0.0907, "step": 19401 }, { "epoch": 0.42752868719253884, "grad_norm": 0.6249426603317261, "learning_rate": 1.9192874283176002e-05, "loss": 0.0689, "step": 19402 }, { "epoch": 0.427550722482055, "grad_norm": 0.5322931408882141, "learning_rate": 1.9191846410496236e-05, "loss": 0.1071, "step": 19403 }, { "epoch": 0.42757275777157117, "grad_norm": 0.35169538855552673, "learning_rate": 1.9190818516465155e-05, "loss": 0.0702, "step": 19404 }, { "epoch": 0.42759479306108733, "grad_norm": 1.0834660530090332, "learning_rate": 1.918979060108799e-05, "loss": 0.1194, "step": 19405 }, { "epoch": 0.4276168283506035, "grad_norm": 0.5115889310836792, "learning_rate": 1.918876266436997e-05, "loss": 0.0668, "step": 19406 }, { "epoch": 0.42763886364011966, "grad_norm": 0.7596473693847656, "learning_rate": 1.918773470631634e-05, "loss": 0.0855, "step": 19407 }, { "epoch": 0.4276608989296358, "grad_norm": 0.8376138806343079, "learning_rate": 1.9186706726932325e-05, "loss": 0.0671, "step": 19408 }, { "epoch": 0.427682934219152, "grad_norm": 0.7045204043388367, "learning_rate": 1.918567872622317e-05, "loss": 0.0982, "step": 19409 }, { "epoch": 0.42770496950866815, "grad_norm": 0.82108473777771, "learning_rate": 1.918465070419411e-05, "loss": 0.0834, "step": 19410 }, { "epoch": 0.4277270047981843, "grad_norm": 0.8637518882751465, "learning_rate": 1.918362266085038e-05, "loss": 0.0959, "step": 19411 }, { "epoch": 0.4277490400877004, "grad_norm": 0.6573891043663025, "learning_rate": 1.9182594596197213e-05, "loss": 0.0797, "step": 19412 }, { "epoch": 0.4277710753772166, "grad_norm": 0.5257464647293091, "learning_rate": 1.9181566510239854e-05, "loss": 0.0635, "step": 19413 }, { "epoch": 0.42779311066673276, "grad_norm": 0.636223554611206, "learning_rate": 1.9180538402983532e-05, "loss": 0.0776, "step": 19414 }, { "epoch": 0.4278151459562489, "grad_norm": 0.5673820376396179, "learning_rate": 1.9179510274433487e-05, "loss": 0.0898, "step": 19415 }, { "epoch": 0.4278371812457651, "grad_norm": 0.734308123588562, "learning_rate": 1.9178482124594955e-05, "loss": 0.0876, "step": 19416 }, { "epoch": 0.42785921653528125, "grad_norm": 0.6275847554206848, "learning_rate": 1.9177453953473173e-05, "loss": 0.065, "step": 19417 }, { "epoch": 0.4278812518247974, "grad_norm": 0.6234079599380493, "learning_rate": 1.9176425761073374e-05, "loss": 0.103, "step": 19418 }, { "epoch": 0.4279032871143136, "grad_norm": 0.6364330053329468, "learning_rate": 1.9175397547400807e-05, "loss": 0.0826, "step": 19419 }, { "epoch": 0.42792532240382974, "grad_norm": 0.7314937710762024, "learning_rate": 1.9174369312460692e-05, "loss": 0.0675, "step": 19420 }, { "epoch": 0.4279473576933459, "grad_norm": 0.6816607713699341, "learning_rate": 1.9173341056258282e-05, "loss": 0.0948, "step": 19421 }, { "epoch": 0.4279693929828621, "grad_norm": 0.6214107871055603, "learning_rate": 1.9172312778798804e-05, "loss": 0.1298, "step": 19422 }, { "epoch": 0.42799142827237824, "grad_norm": 0.533327043056488, "learning_rate": 1.9171284480087504e-05, "loss": 0.0657, "step": 19423 }, { "epoch": 0.42801346356189435, "grad_norm": 0.6423655152320862, "learning_rate": 1.917025616012961e-05, "loss": 0.0623, "step": 19424 }, { "epoch": 0.4280354988514105, "grad_norm": 1.764227271080017, "learning_rate": 1.9169227818930373e-05, "loss": 0.0724, "step": 19425 }, { "epoch": 0.4280575341409267, "grad_norm": 0.9678840637207031, "learning_rate": 1.9168199456495015e-05, "loss": 0.0812, "step": 19426 }, { "epoch": 0.42807956943044284, "grad_norm": 0.5498115420341492, "learning_rate": 1.9167171072828786e-05, "loss": 0.0937, "step": 19427 }, { "epoch": 0.428101604719959, "grad_norm": 1.0256080627441406, "learning_rate": 1.9166142667936922e-05, "loss": 0.1003, "step": 19428 }, { "epoch": 0.42812364000947517, "grad_norm": 0.5500313639640808, "learning_rate": 1.916511424182466e-05, "loss": 0.0662, "step": 19429 }, { "epoch": 0.42814567529899134, "grad_norm": 1.2610126733779907, "learning_rate": 1.9164085794497227e-05, "loss": 0.1041, "step": 19430 }, { "epoch": 0.4281677105885075, "grad_norm": 0.5729873180389404, "learning_rate": 1.9163057325959883e-05, "loss": 0.0753, "step": 19431 }, { "epoch": 0.42818974587802366, "grad_norm": 0.68478924036026, "learning_rate": 1.9162028836217848e-05, "loss": 0.0719, "step": 19432 }, { "epoch": 0.42821178116753983, "grad_norm": 1.5121299028396606, "learning_rate": 1.9161000325276374e-05, "loss": 0.1273, "step": 19433 }, { "epoch": 0.428233816457056, "grad_norm": 0.6651380658149719, "learning_rate": 1.9159971793140693e-05, "loss": 0.0814, "step": 19434 }, { "epoch": 0.42825585174657216, "grad_norm": 0.548126220703125, "learning_rate": 1.9158943239816045e-05, "loss": 0.0529, "step": 19435 }, { "epoch": 0.4282778870360883, "grad_norm": 0.7878984212875366, "learning_rate": 1.915791466530767e-05, "loss": 0.0715, "step": 19436 }, { "epoch": 0.42829992232560443, "grad_norm": 0.6436640024185181, "learning_rate": 1.91568860696208e-05, "loss": 0.0955, "step": 19437 }, { "epoch": 0.4283219576151206, "grad_norm": 0.9486755728721619, "learning_rate": 1.9155857452760686e-05, "loss": 0.0915, "step": 19438 }, { "epoch": 0.42834399290463676, "grad_norm": 0.4877314567565918, "learning_rate": 1.9154828814732557e-05, "loss": 0.0684, "step": 19439 }, { "epoch": 0.4283660281941529, "grad_norm": 0.7482308745384216, "learning_rate": 1.9153800155541662e-05, "loss": 0.1126, "step": 19440 }, { "epoch": 0.4283880634836691, "grad_norm": 1.0296239852905273, "learning_rate": 1.915277147519323e-05, "loss": 0.0963, "step": 19441 }, { "epoch": 0.42841009877318526, "grad_norm": 0.7679303288459778, "learning_rate": 1.915174277369251e-05, "loss": 0.0912, "step": 19442 }, { "epoch": 0.4284321340627014, "grad_norm": 0.8678827285766602, "learning_rate": 1.9150714051044737e-05, "loss": 0.1121, "step": 19443 }, { "epoch": 0.4284541693522176, "grad_norm": 0.40313780307769775, "learning_rate": 1.9149685307255153e-05, "loss": 0.0588, "step": 19444 }, { "epoch": 0.42847620464173375, "grad_norm": 0.7497815489768982, "learning_rate": 1.9148656542328994e-05, "loss": 0.0587, "step": 19445 }, { "epoch": 0.4284982399312499, "grad_norm": 0.31721264123916626, "learning_rate": 1.9147627756271507e-05, "loss": 0.0634, "step": 19446 }, { "epoch": 0.4285202752207661, "grad_norm": 0.833808183670044, "learning_rate": 1.914659894908792e-05, "loss": 0.095, "step": 19447 }, { "epoch": 0.42854231051028224, "grad_norm": 0.545376181602478, "learning_rate": 1.9145570120783486e-05, "loss": 0.0908, "step": 19448 }, { "epoch": 0.42856434579979835, "grad_norm": 0.6625285744667053, "learning_rate": 1.914454127136344e-05, "loss": 0.0957, "step": 19449 }, { "epoch": 0.4285863810893145, "grad_norm": 0.8297489285469055, "learning_rate": 1.914351240083302e-05, "loss": 0.0997, "step": 19450 }, { "epoch": 0.4286084163788307, "grad_norm": 0.3493861258029938, "learning_rate": 1.914248350919747e-05, "loss": 0.0595, "step": 19451 }, { "epoch": 0.42863045166834685, "grad_norm": 0.8751657605171204, "learning_rate": 1.914145459646204e-05, "loss": 0.0757, "step": 19452 }, { "epoch": 0.428652486957863, "grad_norm": 0.9182853102684021, "learning_rate": 1.9140425662631946e-05, "loss": 0.0881, "step": 19453 }, { "epoch": 0.4286745222473792, "grad_norm": 0.5823982954025269, "learning_rate": 1.913939670771245e-05, "loss": 0.0737, "step": 19454 }, { "epoch": 0.42869655753689534, "grad_norm": 1.4390391111373901, "learning_rate": 1.9138367731708787e-05, "loss": 0.0551, "step": 19455 }, { "epoch": 0.4287185928264115, "grad_norm": 0.6907930374145508, "learning_rate": 1.9137338734626198e-05, "loss": 0.0821, "step": 19456 }, { "epoch": 0.42874062811592767, "grad_norm": 0.5093914270401001, "learning_rate": 1.9136309716469926e-05, "loss": 0.0881, "step": 19457 }, { "epoch": 0.42876266340544383, "grad_norm": 0.6635039448738098, "learning_rate": 1.9135280677245205e-05, "loss": 0.0666, "step": 19458 }, { "epoch": 0.42878469869496, "grad_norm": 0.5994870066642761, "learning_rate": 1.9134251616957288e-05, "loss": 0.0732, "step": 19459 }, { "epoch": 0.42880673398447616, "grad_norm": 1.071045160293579, "learning_rate": 1.913322253561141e-05, "loss": 0.0879, "step": 19460 }, { "epoch": 0.42882876927399227, "grad_norm": 0.6743356585502625, "learning_rate": 1.9132193433212813e-05, "loss": 0.0977, "step": 19461 }, { "epoch": 0.42885080456350844, "grad_norm": 0.8332881331443787, "learning_rate": 1.9131164309766734e-05, "loss": 0.0916, "step": 19462 }, { "epoch": 0.4288728398530246, "grad_norm": 0.6704974174499512, "learning_rate": 1.9130135165278426e-05, "loss": 0.099, "step": 19463 }, { "epoch": 0.42889487514254077, "grad_norm": 0.5126529932022095, "learning_rate": 1.912910599975312e-05, "loss": 0.0944, "step": 19464 }, { "epoch": 0.42891691043205693, "grad_norm": 0.6103360056877136, "learning_rate": 1.9128076813196066e-05, "loss": 0.1042, "step": 19465 }, { "epoch": 0.4289389457215731, "grad_norm": 0.97482830286026, "learning_rate": 1.91270476056125e-05, "loss": 0.1, "step": 19466 }, { "epoch": 0.42896098101108926, "grad_norm": 0.5889822244644165, "learning_rate": 1.9126018377007672e-05, "loss": 0.0764, "step": 19467 }, { "epoch": 0.4289830163006054, "grad_norm": 0.44589686393737793, "learning_rate": 1.912498912738682e-05, "loss": 0.0589, "step": 19468 }, { "epoch": 0.4290050515901216, "grad_norm": 0.645915687084198, "learning_rate": 1.9123959856755184e-05, "loss": 0.0698, "step": 19469 }, { "epoch": 0.42902708687963775, "grad_norm": 0.748540461063385, "learning_rate": 1.912293056511801e-05, "loss": 0.1065, "step": 19470 }, { "epoch": 0.4290491221691539, "grad_norm": 1.9656351804733276, "learning_rate": 1.912190125248054e-05, "loss": 0.1322, "step": 19471 }, { "epoch": 0.4290711574586701, "grad_norm": 0.6267585754394531, "learning_rate": 1.912087191884801e-05, "loss": 0.0961, "step": 19472 }, { "epoch": 0.42909319274818625, "grad_norm": 0.6109200119972229, "learning_rate": 1.9119842564225676e-05, "loss": 0.0761, "step": 19473 }, { "epoch": 0.42911522803770236, "grad_norm": 1.1625845432281494, "learning_rate": 1.9118813188618773e-05, "loss": 0.0768, "step": 19474 }, { "epoch": 0.4291372633272185, "grad_norm": 1.0249319076538086, "learning_rate": 1.911778379203255e-05, "loss": 0.1131, "step": 19475 }, { "epoch": 0.4291592986167347, "grad_norm": 0.5061123371124268, "learning_rate": 1.9116754374472243e-05, "loss": 0.0906, "step": 19476 }, { "epoch": 0.42918133390625085, "grad_norm": 0.6532756090164185, "learning_rate": 1.91157249359431e-05, "loss": 0.0859, "step": 19477 }, { "epoch": 0.429203369195767, "grad_norm": 0.8914830684661865, "learning_rate": 1.9114695476450356e-05, "loss": 0.1082, "step": 19478 }, { "epoch": 0.4292254044852832, "grad_norm": 0.3712247610092163, "learning_rate": 1.9113665995999263e-05, "loss": 0.087, "step": 19479 }, { "epoch": 0.42924743977479934, "grad_norm": 0.5595368146896362, "learning_rate": 1.911263649459507e-05, "loss": 0.1136, "step": 19480 }, { "epoch": 0.4292694750643155, "grad_norm": 0.5984638333320618, "learning_rate": 1.911160697224301e-05, "loss": 0.0556, "step": 19481 }, { "epoch": 0.4292915103538317, "grad_norm": 0.5393499135971069, "learning_rate": 1.911057742894833e-05, "loss": 0.067, "step": 19482 }, { "epoch": 0.42931354564334784, "grad_norm": 0.5364243388175964, "learning_rate": 1.9109547864716274e-05, "loss": 0.0577, "step": 19483 }, { "epoch": 0.429335580932864, "grad_norm": 0.6218652725219727, "learning_rate": 1.9108518279552092e-05, "loss": 0.0867, "step": 19484 }, { "epoch": 0.42935761622238017, "grad_norm": 0.8639167547225952, "learning_rate": 1.910748867346102e-05, "loss": 0.0749, "step": 19485 }, { "epoch": 0.4293796515118963, "grad_norm": 0.2853609323501587, "learning_rate": 1.9106459046448307e-05, "loss": 0.0576, "step": 19486 }, { "epoch": 0.42940168680141244, "grad_norm": 0.4109005033969879, "learning_rate": 1.9105429398519194e-05, "loss": 0.0892, "step": 19487 }, { "epoch": 0.4294237220909286, "grad_norm": 0.4440452754497528, "learning_rate": 1.9104399729678928e-05, "loss": 0.0861, "step": 19488 }, { "epoch": 0.42944575738044477, "grad_norm": 0.6289401054382324, "learning_rate": 1.910337003993275e-05, "loss": 0.0767, "step": 19489 }, { "epoch": 0.42946779266996093, "grad_norm": 0.7559375166893005, "learning_rate": 1.9102340329285916e-05, "loss": 0.0895, "step": 19490 }, { "epoch": 0.4294898279594771, "grad_norm": 0.9920634627342224, "learning_rate": 1.9101310597743657e-05, "loss": 0.0861, "step": 19491 }, { "epoch": 0.42951186324899326, "grad_norm": 0.9300756454467773, "learning_rate": 1.910028084531123e-05, "loss": 0.1277, "step": 19492 }, { "epoch": 0.42953389853850943, "grad_norm": 0.7154580354690552, "learning_rate": 1.9099251071993865e-05, "loss": 0.0806, "step": 19493 }, { "epoch": 0.4295559338280256, "grad_norm": 0.7431346774101257, "learning_rate": 1.9098221277796822e-05, "loss": 0.0732, "step": 19494 }, { "epoch": 0.42957796911754176, "grad_norm": 0.559821367263794, "learning_rate": 1.9097191462725337e-05, "loss": 0.0871, "step": 19495 }, { "epoch": 0.4296000044070579, "grad_norm": 0.7726936936378479, "learning_rate": 1.9096161626784657e-05, "loss": 0.1401, "step": 19496 }, { "epoch": 0.4296220396965741, "grad_norm": 0.3232065737247467, "learning_rate": 1.9095131769980035e-05, "loss": 0.041, "step": 19497 }, { "epoch": 0.4296440749860902, "grad_norm": 0.7060723900794983, "learning_rate": 1.909410189231671e-05, "loss": 0.0947, "step": 19498 }, { "epoch": 0.42966611027560636, "grad_norm": 0.4568691551685333, "learning_rate": 1.9093071993799927e-05, "loss": 0.0638, "step": 19499 }, { "epoch": 0.4296881455651225, "grad_norm": 0.8525811433792114, "learning_rate": 1.9092042074434936e-05, "loss": 0.0919, "step": 19500 }, { "epoch": 0.4297101808546387, "grad_norm": 0.4303250312805176, "learning_rate": 1.9091012134226977e-05, "loss": 0.0942, "step": 19501 }, { "epoch": 0.42973221614415485, "grad_norm": 0.5738986134529114, "learning_rate": 1.90899821731813e-05, "loss": 0.078, "step": 19502 }, { "epoch": 0.429754251433671, "grad_norm": 0.5678343772888184, "learning_rate": 1.908895219130315e-05, "loss": 0.079, "step": 19503 }, { "epoch": 0.4297762867231872, "grad_norm": 1.261800765991211, "learning_rate": 1.9087922188597772e-05, "loss": 0.1354, "step": 19504 }, { "epoch": 0.42979832201270335, "grad_norm": 0.5630200505256653, "learning_rate": 1.9086892165070418e-05, "loss": 0.0917, "step": 19505 }, { "epoch": 0.4298203573022195, "grad_norm": 0.8920369148254395, "learning_rate": 1.9085862120726328e-05, "loss": 0.0864, "step": 19506 }, { "epoch": 0.4298423925917357, "grad_norm": 0.5061798095703125, "learning_rate": 1.9084832055570753e-05, "loss": 0.0814, "step": 19507 }, { "epoch": 0.42986442788125184, "grad_norm": 0.7409302592277527, "learning_rate": 1.9083801969608943e-05, "loss": 0.0916, "step": 19508 }, { "epoch": 0.429886463170768, "grad_norm": 0.9221582412719727, "learning_rate": 1.9082771862846135e-05, "loss": 0.0678, "step": 19509 }, { "epoch": 0.42990849846028417, "grad_norm": 0.854324221611023, "learning_rate": 1.9081741735287576e-05, "loss": 0.1219, "step": 19510 }, { "epoch": 0.4299305337498003, "grad_norm": 0.40553995966911316, "learning_rate": 1.9080711586938523e-05, "loss": 0.0643, "step": 19511 }, { "epoch": 0.42995256903931645, "grad_norm": 0.9068768620491028, "learning_rate": 1.9079681417804213e-05, "loss": 0.0882, "step": 19512 }, { "epoch": 0.4299746043288326, "grad_norm": 0.8772487044334412, "learning_rate": 1.9078651227889905e-05, "loss": 0.133, "step": 19513 }, { "epoch": 0.4299966396183488, "grad_norm": 0.5923371315002441, "learning_rate": 1.9077621017200833e-05, "loss": 0.0841, "step": 19514 }, { "epoch": 0.43001867490786494, "grad_norm": 0.6316279172897339, "learning_rate": 1.9076590785742252e-05, "loss": 0.0813, "step": 19515 }, { "epoch": 0.4300407101973811, "grad_norm": 0.42820921540260315, "learning_rate": 1.907556053351941e-05, "loss": 0.0628, "step": 19516 }, { "epoch": 0.43006274548689727, "grad_norm": 0.5379087328910828, "learning_rate": 1.9074530260537554e-05, "loss": 0.1074, "step": 19517 }, { "epoch": 0.43008478077641343, "grad_norm": 1.0175141096115112, "learning_rate": 1.9073499966801926e-05, "loss": 0.1119, "step": 19518 }, { "epoch": 0.4301068160659296, "grad_norm": 0.7046679854393005, "learning_rate": 1.9072469652317782e-05, "loss": 0.0835, "step": 19519 }, { "epoch": 0.43012885135544576, "grad_norm": 0.4681876599788666, "learning_rate": 1.9071439317090363e-05, "loss": 0.0821, "step": 19520 }, { "epoch": 0.4301508866449619, "grad_norm": 0.553246796131134, "learning_rate": 1.9070408961124922e-05, "loss": 0.0689, "step": 19521 }, { "epoch": 0.4301729219344781, "grad_norm": 0.5324656963348389, "learning_rate": 1.9069378584426703e-05, "loss": 0.0828, "step": 19522 }, { "epoch": 0.4301949572239942, "grad_norm": 0.3426104187965393, "learning_rate": 1.9068348187000964e-05, "loss": 0.0645, "step": 19523 }, { "epoch": 0.43021699251351037, "grad_norm": 0.6877561807632446, "learning_rate": 1.906731776885294e-05, "loss": 0.0741, "step": 19524 }, { "epoch": 0.43023902780302653, "grad_norm": 0.8903016448020935, "learning_rate": 1.9066287329987883e-05, "loss": 0.0935, "step": 19525 }, { "epoch": 0.4302610630925427, "grad_norm": 0.620051383972168, "learning_rate": 1.906525687041105e-05, "loss": 0.0877, "step": 19526 }, { "epoch": 0.43028309838205886, "grad_norm": 0.8386802673339844, "learning_rate": 1.906422639012768e-05, "loss": 0.0921, "step": 19527 }, { "epoch": 0.430305133671575, "grad_norm": 0.7190266251564026, "learning_rate": 1.9063195889143028e-05, "loss": 0.1224, "step": 19528 }, { "epoch": 0.4303271689610912, "grad_norm": 0.46717849373817444, "learning_rate": 1.9062165367462336e-05, "loss": 0.0871, "step": 19529 }, { "epoch": 0.43034920425060735, "grad_norm": 0.9449363350868225, "learning_rate": 1.9061134825090866e-05, "loss": 0.0859, "step": 19530 }, { "epoch": 0.4303712395401235, "grad_norm": 1.032808780670166, "learning_rate": 1.9060104262033852e-05, "loss": 0.0743, "step": 19531 }, { "epoch": 0.4303932748296397, "grad_norm": 0.5537463426589966, "learning_rate": 1.9059073678296552e-05, "loss": 0.0786, "step": 19532 }, { "epoch": 0.43041531011915585, "grad_norm": 0.3198679983615875, "learning_rate": 1.905804307388421e-05, "loss": 0.093, "step": 19533 }, { "epoch": 0.430437345408672, "grad_norm": 0.3696041405200958, "learning_rate": 1.905701244880208e-05, "loss": 0.0867, "step": 19534 }, { "epoch": 0.4304593806981881, "grad_norm": 0.5641444325447083, "learning_rate": 1.9055981803055413e-05, "loss": 0.0964, "step": 19535 }, { "epoch": 0.4304814159877043, "grad_norm": 0.5489187836647034, "learning_rate": 1.9054951136649456e-05, "loss": 0.0993, "step": 19536 }, { "epoch": 0.43050345127722045, "grad_norm": 0.562867283821106, "learning_rate": 1.9053920449589454e-05, "loss": 0.0861, "step": 19537 }, { "epoch": 0.4305254865667366, "grad_norm": 0.5177410840988159, "learning_rate": 1.9052889741880666e-05, "loss": 0.0704, "step": 19538 }, { "epoch": 0.4305475218562528, "grad_norm": 0.522618293762207, "learning_rate": 1.9051859013528333e-05, "loss": 0.0964, "step": 19539 }, { "epoch": 0.43056955714576894, "grad_norm": 0.8184006214141846, "learning_rate": 1.9050828264537714e-05, "loss": 0.1135, "step": 19540 }, { "epoch": 0.4305915924352851, "grad_norm": 0.6337719559669495, "learning_rate": 1.9049797494914047e-05, "loss": 0.084, "step": 19541 }, { "epoch": 0.4306136277248013, "grad_norm": 0.7918204069137573, "learning_rate": 1.9048766704662596e-05, "loss": 0.0803, "step": 19542 }, { "epoch": 0.43063566301431744, "grad_norm": 0.7217676043510437, "learning_rate": 1.90477358937886e-05, "loss": 0.1097, "step": 19543 }, { "epoch": 0.4306576983038336, "grad_norm": 0.5943143367767334, "learning_rate": 1.904670506229732e-05, "loss": 0.0923, "step": 19544 }, { "epoch": 0.43067973359334977, "grad_norm": 0.48199859261512756, "learning_rate": 1.9045674210194e-05, "loss": 0.078, "step": 19545 }, { "epoch": 0.43070176888286593, "grad_norm": 0.9384893178939819, "learning_rate": 1.904464333748389e-05, "loss": 0.0599, "step": 19546 }, { "epoch": 0.4307238041723821, "grad_norm": 0.4721405506134033, "learning_rate": 1.9043612444172244e-05, "loss": 0.0904, "step": 19547 }, { "epoch": 0.4307458394618982, "grad_norm": 0.44318294525146484, "learning_rate": 1.9042581530264307e-05, "loss": 0.0727, "step": 19548 }, { "epoch": 0.43076787475141437, "grad_norm": 1.1703705787658691, "learning_rate": 1.9041550595765344e-05, "loss": 0.0875, "step": 19549 }, { "epoch": 0.43078991004093053, "grad_norm": 0.5209753513336182, "learning_rate": 1.9040519640680587e-05, "loss": 0.0701, "step": 19550 }, { "epoch": 0.4308119453304467, "grad_norm": 0.49447405338287354, "learning_rate": 1.90394886650153e-05, "loss": 0.0878, "step": 19551 }, { "epoch": 0.43083398061996286, "grad_norm": 0.6047772169113159, "learning_rate": 1.903845766877473e-05, "loss": 0.0647, "step": 19552 }, { "epoch": 0.43085601590947903, "grad_norm": 0.556664228439331, "learning_rate": 1.9037426651964132e-05, "loss": 0.0658, "step": 19553 }, { "epoch": 0.4308780511989952, "grad_norm": 0.9646574258804321, "learning_rate": 1.9036395614588756e-05, "loss": 0.1081, "step": 19554 }, { "epoch": 0.43090008648851136, "grad_norm": 0.5528080463409424, "learning_rate": 1.903536455665385e-05, "loss": 0.0586, "step": 19555 }, { "epoch": 0.4309221217780275, "grad_norm": 0.34640324115753174, "learning_rate": 1.903433347816467e-05, "loss": 0.0793, "step": 19556 }, { "epoch": 0.4309441570675437, "grad_norm": 0.966579794883728, "learning_rate": 1.9033302379126464e-05, "loss": 0.1027, "step": 19557 }, { "epoch": 0.43096619235705985, "grad_norm": 0.5145645141601562, "learning_rate": 1.9032271259544486e-05, "loss": 0.0794, "step": 19558 }, { "epoch": 0.430988227646576, "grad_norm": 0.8438788056373596, "learning_rate": 1.903124011942399e-05, "loss": 0.0692, "step": 19559 }, { "epoch": 0.4310102629360921, "grad_norm": 0.68446284532547, "learning_rate": 1.9030208958770225e-05, "loss": 0.0796, "step": 19560 }, { "epoch": 0.4310322982256083, "grad_norm": 0.900406002998352, "learning_rate": 1.9029177777588447e-05, "loss": 0.0748, "step": 19561 }, { "epoch": 0.43105433351512445, "grad_norm": 0.5654412508010864, "learning_rate": 1.9028146575883902e-05, "loss": 0.0723, "step": 19562 }, { "epoch": 0.4310763688046406, "grad_norm": 0.45314016938209534, "learning_rate": 1.902711535366185e-05, "loss": 0.0798, "step": 19563 }, { "epoch": 0.4310984040941568, "grad_norm": 0.35865485668182373, "learning_rate": 1.902608411092754e-05, "loss": 0.0571, "step": 19564 }, { "epoch": 0.43112043938367295, "grad_norm": 0.5984081625938416, "learning_rate": 1.9025052847686224e-05, "loss": 0.1027, "step": 19565 }, { "epoch": 0.4311424746731891, "grad_norm": 0.8376408815383911, "learning_rate": 1.9024021563943156e-05, "loss": 0.0859, "step": 19566 }, { "epoch": 0.4311645099627053, "grad_norm": 0.7694987654685974, "learning_rate": 1.9022990259703588e-05, "loss": 0.0987, "step": 19567 }, { "epoch": 0.43118654525222144, "grad_norm": 0.8187200427055359, "learning_rate": 1.902195893497277e-05, "loss": 0.085, "step": 19568 }, { "epoch": 0.4312085805417376, "grad_norm": 0.7354122400283813, "learning_rate": 1.9020927589755956e-05, "loss": 0.0422, "step": 19569 }, { "epoch": 0.43123061583125377, "grad_norm": 0.7931056618690491, "learning_rate": 1.9019896224058412e-05, "loss": 0.1043, "step": 19570 }, { "epoch": 0.43125265112076994, "grad_norm": 0.799856424331665, "learning_rate": 1.9018864837885374e-05, "loss": 0.08, "step": 19571 }, { "epoch": 0.43127468641028605, "grad_norm": 0.9603537917137146, "learning_rate": 1.9017833431242105e-05, "loss": 0.078, "step": 19572 }, { "epoch": 0.4312967216998022, "grad_norm": 1.13177490234375, "learning_rate": 1.9016802004133852e-05, "loss": 0.1165, "step": 19573 }, { "epoch": 0.4313187569893184, "grad_norm": 0.7996910810470581, "learning_rate": 1.9015770556565874e-05, "loss": 0.0818, "step": 19574 }, { "epoch": 0.43134079227883454, "grad_norm": 0.5776373744010925, "learning_rate": 1.9014739088543423e-05, "loss": 0.0788, "step": 19575 }, { "epoch": 0.4313628275683507, "grad_norm": 0.7653857469558716, "learning_rate": 1.9013707600071757e-05, "loss": 0.0896, "step": 19576 }, { "epoch": 0.43138486285786687, "grad_norm": 0.5120125412940979, "learning_rate": 1.901267609115612e-05, "loss": 0.0464, "step": 19577 }, { "epoch": 0.43140689814738303, "grad_norm": 0.6890912652015686, "learning_rate": 1.9011644561801778e-05, "loss": 0.0675, "step": 19578 }, { "epoch": 0.4314289334368992, "grad_norm": 0.5009579062461853, "learning_rate": 1.9010613012013976e-05, "loss": 0.0797, "step": 19579 }, { "epoch": 0.43145096872641536, "grad_norm": 0.7846515774726868, "learning_rate": 1.9009581441797975e-05, "loss": 0.0964, "step": 19580 }, { "epoch": 0.4314730040159315, "grad_norm": 0.7173853516578674, "learning_rate": 1.9008549851159018e-05, "loss": 0.0859, "step": 19581 }, { "epoch": 0.4314950393054477, "grad_norm": 0.3536513149738312, "learning_rate": 1.900751824010237e-05, "loss": 0.0548, "step": 19582 }, { "epoch": 0.43151707459496386, "grad_norm": 0.8853411078453064, "learning_rate": 1.9006486608633285e-05, "loss": 0.0763, "step": 19583 }, { "epoch": 0.43153910988448, "grad_norm": 0.9048513770103455, "learning_rate": 1.9005454956757013e-05, "loss": 0.0859, "step": 19584 }, { "epoch": 0.43156114517399613, "grad_norm": 0.788834273815155, "learning_rate": 1.900442328447881e-05, "loss": 0.0782, "step": 19585 }, { "epoch": 0.4315831804635123, "grad_norm": 0.5869349837303162, "learning_rate": 1.9003391591803936e-05, "loss": 0.0624, "step": 19586 }, { "epoch": 0.43160521575302846, "grad_norm": 0.6880519986152649, "learning_rate": 1.900235987873764e-05, "loss": 0.0993, "step": 19587 }, { "epoch": 0.4316272510425446, "grad_norm": 0.7194599509239197, "learning_rate": 1.900132814528518e-05, "loss": 0.0883, "step": 19588 }, { "epoch": 0.4316492863320608, "grad_norm": 0.759860098361969, "learning_rate": 1.9000296391451805e-05, "loss": 0.0711, "step": 19589 }, { "epoch": 0.43167132162157695, "grad_norm": 0.7748228311538696, "learning_rate": 1.8999264617242776e-05, "loss": 0.0576, "step": 19590 }, { "epoch": 0.4316933569110931, "grad_norm": 0.8996946811676025, "learning_rate": 1.8998232822663353e-05, "loss": 0.1009, "step": 19591 }, { "epoch": 0.4317153922006093, "grad_norm": 0.6714535355567932, "learning_rate": 1.8997201007718782e-05, "loss": 0.0804, "step": 19592 }, { "epoch": 0.43173742749012545, "grad_norm": 0.4704703092575073, "learning_rate": 1.8996169172414324e-05, "loss": 0.0744, "step": 19593 }, { "epoch": 0.4317594627796416, "grad_norm": 1.1962034702301025, "learning_rate": 1.8995137316755236e-05, "loss": 0.084, "step": 19594 }, { "epoch": 0.4317814980691578, "grad_norm": 0.8445980548858643, "learning_rate": 1.899410544074677e-05, "loss": 0.0792, "step": 19595 }, { "epoch": 0.43180353335867394, "grad_norm": 0.9184519052505493, "learning_rate": 1.8993073544394183e-05, "loss": 0.1104, "step": 19596 }, { "epoch": 0.43182556864819005, "grad_norm": 0.8749151825904846, "learning_rate": 1.8992041627702733e-05, "loss": 0.0664, "step": 19597 }, { "epoch": 0.4318476039377062, "grad_norm": 0.523535966873169, "learning_rate": 1.8991009690677668e-05, "loss": 0.0458, "step": 19598 }, { "epoch": 0.4318696392272224, "grad_norm": 0.8763347864151001, "learning_rate": 1.8989977733324253e-05, "loss": 0.0865, "step": 19599 }, { "epoch": 0.43189167451673854, "grad_norm": 0.5930756330490112, "learning_rate": 1.8988945755647745e-05, "loss": 0.061, "step": 19600 }, { "epoch": 0.4319137098062547, "grad_norm": 0.7529799342155457, "learning_rate": 1.8987913757653394e-05, "loss": 0.0664, "step": 19601 }, { "epoch": 0.4319357450957709, "grad_norm": 0.8102133870124817, "learning_rate": 1.8986881739346465e-05, "loss": 0.0602, "step": 19602 }, { "epoch": 0.43195778038528704, "grad_norm": 0.7658849954605103, "learning_rate": 1.89858497007322e-05, "loss": 0.0657, "step": 19603 }, { "epoch": 0.4319798156748032, "grad_norm": 1.091429352760315, "learning_rate": 1.8984817641815876e-05, "loss": 0.1139, "step": 19604 }, { "epoch": 0.43200185096431937, "grad_norm": 0.3467572033405304, "learning_rate": 1.898378556260273e-05, "loss": 0.06, "step": 19605 }, { "epoch": 0.43202388625383553, "grad_norm": 0.9255666136741638, "learning_rate": 1.898275346309803e-05, "loss": 0.0966, "step": 19606 }, { "epoch": 0.4320459215433517, "grad_norm": 0.6747176051139832, "learning_rate": 1.8981721343307033e-05, "loss": 0.0827, "step": 19607 }, { "epoch": 0.43206795683286786, "grad_norm": 0.7550774812698364, "learning_rate": 1.8980689203234994e-05, "loss": 0.0863, "step": 19608 }, { "epoch": 0.432089992122384, "grad_norm": 0.7156953811645508, "learning_rate": 1.8979657042887168e-05, "loss": 0.0845, "step": 19609 }, { "epoch": 0.43211202741190013, "grad_norm": 0.9067752957344055, "learning_rate": 1.897862486226882e-05, "loss": 0.0878, "step": 19610 }, { "epoch": 0.4321340627014163, "grad_norm": 0.6883910894393921, "learning_rate": 1.89775926613852e-05, "loss": 0.1023, "step": 19611 }, { "epoch": 0.43215609799093246, "grad_norm": 0.7900739908218384, "learning_rate": 1.8976560440241564e-05, "loss": 0.0898, "step": 19612 }, { "epoch": 0.43217813328044863, "grad_norm": 1.066686987876892, "learning_rate": 1.897552819884317e-05, "loss": 0.0757, "step": 19613 }, { "epoch": 0.4322001685699648, "grad_norm": 0.4893118143081665, "learning_rate": 1.8974495937195287e-05, "loss": 0.0471, "step": 19614 }, { "epoch": 0.43222220385948096, "grad_norm": 1.0771180391311646, "learning_rate": 1.897346365530316e-05, "loss": 0.0906, "step": 19615 }, { "epoch": 0.4322442391489971, "grad_norm": 0.6544468998908997, "learning_rate": 1.8972431353172057e-05, "loss": 0.0868, "step": 19616 }, { "epoch": 0.4322662744385133, "grad_norm": 1.1161243915557861, "learning_rate": 1.8971399030807225e-05, "loss": 0.1145, "step": 19617 }, { "epoch": 0.43228830972802945, "grad_norm": 1.0135936737060547, "learning_rate": 1.897036668821393e-05, "loss": 0.1003, "step": 19618 }, { "epoch": 0.4323103450175456, "grad_norm": 1.0575789213180542, "learning_rate": 1.8969334325397432e-05, "loss": 0.1144, "step": 19619 }, { "epoch": 0.4323323803070618, "grad_norm": 1.338070273399353, "learning_rate": 1.8968301942362984e-05, "loss": 0.1251, "step": 19620 }, { "epoch": 0.43235441559657795, "grad_norm": 1.0232807397842407, "learning_rate": 1.8967269539115843e-05, "loss": 0.1137, "step": 19621 }, { "epoch": 0.43237645088609405, "grad_norm": 0.5590515732765198, "learning_rate": 1.8966237115661274e-05, "loss": 0.0602, "step": 19622 }, { "epoch": 0.4323984861756102, "grad_norm": 0.46799805760383606, "learning_rate": 1.8965204672004533e-05, "loss": 0.1141, "step": 19623 }, { "epoch": 0.4324205214651264, "grad_norm": 0.39994725584983826, "learning_rate": 1.8964172208150877e-05, "loss": 0.071, "step": 19624 }, { "epoch": 0.43244255675464255, "grad_norm": 0.7773271203041077, "learning_rate": 1.8963139724105564e-05, "loss": 0.0893, "step": 19625 }, { "epoch": 0.4324645920441587, "grad_norm": 0.748055100440979, "learning_rate": 1.896210721987386e-05, "loss": 0.0924, "step": 19626 }, { "epoch": 0.4324866273336749, "grad_norm": 0.5692700147628784, "learning_rate": 1.896107469546102e-05, "loss": 0.0695, "step": 19627 }, { "epoch": 0.43250866262319104, "grad_norm": 0.592090368270874, "learning_rate": 1.8960042150872295e-05, "loss": 0.0824, "step": 19628 }, { "epoch": 0.4325306979127072, "grad_norm": 1.086327075958252, "learning_rate": 1.8959009586112953e-05, "loss": 0.1033, "step": 19629 }, { "epoch": 0.43255273320222337, "grad_norm": 0.770516037940979, "learning_rate": 1.895797700118826e-05, "loss": 0.0899, "step": 19630 }, { "epoch": 0.43257476849173954, "grad_norm": 0.8602572083473206, "learning_rate": 1.895694439610346e-05, "loss": 0.1155, "step": 19631 }, { "epoch": 0.4325968037812557, "grad_norm": 0.8573782444000244, "learning_rate": 1.8955911770863824e-05, "loss": 0.117, "step": 19632 }, { "epoch": 0.43261883907077187, "grad_norm": 0.6104395985603333, "learning_rate": 1.8954879125474605e-05, "loss": 0.0845, "step": 19633 }, { "epoch": 0.432640874360288, "grad_norm": 0.6093587875366211, "learning_rate": 1.895384645994107e-05, "loss": 0.0631, "step": 19634 }, { "epoch": 0.43266290964980414, "grad_norm": 0.47571906447410583, "learning_rate": 1.895281377426847e-05, "loss": 0.0584, "step": 19635 }, { "epoch": 0.4326849449393203, "grad_norm": 0.6024608016014099, "learning_rate": 1.8951781068462073e-05, "loss": 0.0774, "step": 19636 }, { "epoch": 0.43270698022883647, "grad_norm": 0.8766686916351318, "learning_rate": 1.8950748342527136e-05, "loss": 0.0831, "step": 19637 }, { "epoch": 0.43272901551835263, "grad_norm": 0.9717397093772888, "learning_rate": 1.894971559646892e-05, "loss": 0.0965, "step": 19638 }, { "epoch": 0.4327510508078688, "grad_norm": 0.4378182888031006, "learning_rate": 1.894868283029268e-05, "loss": 0.0304, "step": 19639 }, { "epoch": 0.43277308609738496, "grad_norm": 0.758905291557312, "learning_rate": 1.8947650044003685e-05, "loss": 0.0639, "step": 19640 }, { "epoch": 0.4327951213869011, "grad_norm": 0.40725094079971313, "learning_rate": 1.894661723760719e-05, "loss": 0.0683, "step": 19641 }, { "epoch": 0.4328171566764173, "grad_norm": 1.1582399606704712, "learning_rate": 1.8945584411108457e-05, "loss": 0.1063, "step": 19642 }, { "epoch": 0.43283919196593346, "grad_norm": 0.7839481830596924, "learning_rate": 1.8944551564512752e-05, "loss": 0.1005, "step": 19643 }, { "epoch": 0.4328612272554496, "grad_norm": 0.4373505115509033, "learning_rate": 1.8943518697825324e-05, "loss": 0.0879, "step": 19644 }, { "epoch": 0.4328832625449658, "grad_norm": 0.9517388939857483, "learning_rate": 1.8942485811051444e-05, "loss": 0.0649, "step": 19645 }, { "epoch": 0.43290529783448195, "grad_norm": 0.5861859321594238, "learning_rate": 1.8941452904196365e-05, "loss": 0.0912, "step": 19646 }, { "epoch": 0.43292733312399806, "grad_norm": 0.7927149534225464, "learning_rate": 1.894041997726536e-05, "loss": 0.1088, "step": 19647 }, { "epoch": 0.4329493684135142, "grad_norm": 0.5759027004241943, "learning_rate": 1.8939387030263675e-05, "loss": 0.077, "step": 19648 }, { "epoch": 0.4329714037030304, "grad_norm": 0.5150131583213806, "learning_rate": 1.893835406319658e-05, "loss": 0.071, "step": 19649 }, { "epoch": 0.43299343899254655, "grad_norm": 0.813713550567627, "learning_rate": 1.8937321076069345e-05, "loss": 0.0666, "step": 19650 }, { "epoch": 0.4330154742820627, "grad_norm": 0.7132299542427063, "learning_rate": 1.893628806888722e-05, "loss": 0.0834, "step": 19651 }, { "epoch": 0.4330375095715789, "grad_norm": 0.5803778767585754, "learning_rate": 1.8935255041655463e-05, "loss": 0.0739, "step": 19652 }, { "epoch": 0.43305954486109505, "grad_norm": 0.8241205811500549, "learning_rate": 1.8934221994379343e-05, "loss": 0.076, "step": 19653 }, { "epoch": 0.4330815801506112, "grad_norm": 0.46486157178878784, "learning_rate": 1.893318892706412e-05, "loss": 0.0486, "step": 19654 }, { "epoch": 0.4331036154401274, "grad_norm": 1.0411289930343628, "learning_rate": 1.893215583971506e-05, "loss": 0.1106, "step": 19655 }, { "epoch": 0.43312565072964354, "grad_norm": 0.6585443615913391, "learning_rate": 1.8931122732337418e-05, "loss": 0.1254, "step": 19656 }, { "epoch": 0.4331476860191597, "grad_norm": 0.8601697087287903, "learning_rate": 1.8930089604936464e-05, "loss": 0.0862, "step": 19657 }, { "epoch": 0.43316972130867587, "grad_norm": 0.6033734083175659, "learning_rate": 1.8929056457517455e-05, "loss": 0.1083, "step": 19658 }, { "epoch": 0.433191756598192, "grad_norm": 0.7034724354743958, "learning_rate": 1.8928023290085652e-05, "loss": 0.0814, "step": 19659 }, { "epoch": 0.43321379188770814, "grad_norm": 0.7335402965545654, "learning_rate": 1.8926990102646325e-05, "loss": 0.0594, "step": 19660 }, { "epoch": 0.4332358271772243, "grad_norm": 0.7808897495269775, "learning_rate": 1.8925956895204725e-05, "loss": 0.0889, "step": 19661 }, { "epoch": 0.4332578624667405, "grad_norm": 0.7299380302429199, "learning_rate": 1.8924923667766127e-05, "loss": 0.068, "step": 19662 }, { "epoch": 0.43327989775625664, "grad_norm": 0.9543676376342773, "learning_rate": 1.8923890420335783e-05, "loss": 0.0784, "step": 19663 }, { "epoch": 0.4333019330457728, "grad_norm": 0.9796997308731079, "learning_rate": 1.8922857152918965e-05, "loss": 0.1078, "step": 19664 }, { "epoch": 0.43332396833528897, "grad_norm": 0.6480220556259155, "learning_rate": 1.892182386552093e-05, "loss": 0.0731, "step": 19665 }, { "epoch": 0.43334600362480513, "grad_norm": 0.8548622727394104, "learning_rate": 1.892079055814695e-05, "loss": 0.0932, "step": 19666 }, { "epoch": 0.4333680389143213, "grad_norm": 0.43456539511680603, "learning_rate": 1.891975723080227e-05, "loss": 0.0799, "step": 19667 }, { "epoch": 0.43339007420383746, "grad_norm": 0.8041388988494873, "learning_rate": 1.891872388349217e-05, "loss": 0.113, "step": 19668 }, { "epoch": 0.4334121094933536, "grad_norm": 0.6831251978874207, "learning_rate": 1.8917690516221908e-05, "loss": 0.0807, "step": 19669 }, { "epoch": 0.4334341447828698, "grad_norm": 0.7638205885887146, "learning_rate": 1.8916657128996745e-05, "loss": 0.0808, "step": 19670 }, { "epoch": 0.4334561800723859, "grad_norm": 0.5596970319747925, "learning_rate": 1.8915623721821947e-05, "loss": 0.0736, "step": 19671 }, { "epoch": 0.43347821536190206, "grad_norm": 0.6537156701087952, "learning_rate": 1.891459029470278e-05, "loss": 0.0873, "step": 19672 }, { "epoch": 0.43350025065141823, "grad_norm": 0.6356256604194641, "learning_rate": 1.8913556847644503e-05, "loss": 0.0887, "step": 19673 }, { "epoch": 0.4335222859409344, "grad_norm": 0.6830151677131653, "learning_rate": 1.8912523380652387e-05, "loss": 0.0651, "step": 19674 }, { "epoch": 0.43354432123045056, "grad_norm": 0.8107836842536926, "learning_rate": 1.891148989373169e-05, "loss": 0.074, "step": 19675 }, { "epoch": 0.4335663565199667, "grad_norm": 0.7446072101593018, "learning_rate": 1.8910456386887677e-05, "loss": 0.081, "step": 19676 }, { "epoch": 0.4335883918094829, "grad_norm": 0.8402604460716248, "learning_rate": 1.8909422860125613e-05, "loss": 0.0932, "step": 19677 }, { "epoch": 0.43361042709899905, "grad_norm": 0.7463235855102539, "learning_rate": 1.890838931345076e-05, "loss": 0.0879, "step": 19678 }, { "epoch": 0.4336324623885152, "grad_norm": 0.8007459044456482, "learning_rate": 1.890735574686838e-05, "loss": 0.0815, "step": 19679 }, { "epoch": 0.4336544976780314, "grad_norm": 0.713677167892456, "learning_rate": 1.8906322160383746e-05, "loss": 0.0659, "step": 19680 }, { "epoch": 0.43367653296754755, "grad_norm": 0.5772672295570374, "learning_rate": 1.8905288554002123e-05, "loss": 0.0883, "step": 19681 }, { "epoch": 0.4336985682570637, "grad_norm": 0.5840647220611572, "learning_rate": 1.8904254927728765e-05, "loss": 0.0649, "step": 19682 }, { "epoch": 0.4337206035465799, "grad_norm": 0.7181220650672913, "learning_rate": 1.8903221281568947e-05, "loss": 0.0586, "step": 19683 }, { "epoch": 0.433742638836096, "grad_norm": 0.5354883074760437, "learning_rate": 1.8902187615527927e-05, "loss": 0.0969, "step": 19684 }, { "epoch": 0.43376467412561215, "grad_norm": 0.6154065132141113, "learning_rate": 1.8901153929610978e-05, "loss": 0.0968, "step": 19685 }, { "epoch": 0.4337867094151283, "grad_norm": 0.6403517723083496, "learning_rate": 1.8900120223823354e-05, "loss": 0.0627, "step": 19686 }, { "epoch": 0.4338087447046445, "grad_norm": 0.5869688987731934, "learning_rate": 1.889908649817033e-05, "loss": 0.0804, "step": 19687 }, { "epoch": 0.43383077999416064, "grad_norm": 0.5695790648460388, "learning_rate": 1.8898052752657163e-05, "loss": 0.0718, "step": 19688 }, { "epoch": 0.4338528152836768, "grad_norm": 0.8198609948158264, "learning_rate": 1.889701898728913e-05, "loss": 0.0947, "step": 19689 }, { "epoch": 0.43387485057319297, "grad_norm": 0.5287544131278992, "learning_rate": 1.8895985202071486e-05, "loss": 0.0793, "step": 19690 }, { "epoch": 0.43389688586270914, "grad_norm": 0.5217995643615723, "learning_rate": 1.8894951397009503e-05, "loss": 0.0958, "step": 19691 }, { "epoch": 0.4339189211522253, "grad_norm": 0.6579933762550354, "learning_rate": 1.889391757210844e-05, "loss": 0.1075, "step": 19692 }, { "epoch": 0.43394095644174147, "grad_norm": 0.91813063621521, "learning_rate": 1.889288372737357e-05, "loss": 0.1094, "step": 19693 }, { "epoch": 0.43396299173125763, "grad_norm": 0.7906877398490906, "learning_rate": 1.8891849862810147e-05, "loss": 0.1089, "step": 19694 }, { "epoch": 0.4339850270207738, "grad_norm": 0.7507782578468323, "learning_rate": 1.8890815978423454e-05, "loss": 0.0559, "step": 19695 }, { "epoch": 0.4340070623102899, "grad_norm": 1.2478711605072021, "learning_rate": 1.8889782074218745e-05, "loss": 0.1009, "step": 19696 }, { "epoch": 0.43402909759980607, "grad_norm": 0.6663655042648315, "learning_rate": 1.8888748150201295e-05, "loss": 0.0936, "step": 19697 }, { "epoch": 0.43405113288932223, "grad_norm": 0.6061151027679443, "learning_rate": 1.8887714206376363e-05, "loss": 0.0848, "step": 19698 }, { "epoch": 0.4340731681788384, "grad_norm": 0.6004737019538879, "learning_rate": 1.888668024274922e-05, "loss": 0.0669, "step": 19699 }, { "epoch": 0.43409520346835456, "grad_norm": 0.8375561237335205, "learning_rate": 1.8885646259325124e-05, "loss": 0.083, "step": 19700 }, { "epoch": 0.4341172387578707, "grad_norm": 1.076749563217163, "learning_rate": 1.8884612256109354e-05, "loss": 0.104, "step": 19701 }, { "epoch": 0.4341392740473869, "grad_norm": 0.8811576962471008, "learning_rate": 1.8883578233107163e-05, "loss": 0.065, "step": 19702 }, { "epoch": 0.43416130933690306, "grad_norm": 0.4648202657699585, "learning_rate": 1.888254419032383e-05, "loss": 0.0734, "step": 19703 }, { "epoch": 0.4341833446264192, "grad_norm": 0.47111648321151733, "learning_rate": 1.888151012776462e-05, "loss": 0.0788, "step": 19704 }, { "epoch": 0.4342053799159354, "grad_norm": 0.9639028906822205, "learning_rate": 1.8880476045434797e-05, "loss": 0.1247, "step": 19705 }, { "epoch": 0.43422741520545155, "grad_norm": 0.6931335926055908, "learning_rate": 1.8879441943339627e-05, "loss": 0.0689, "step": 19706 }, { "epoch": 0.4342494504949677, "grad_norm": 0.8519006967544556, "learning_rate": 1.8878407821484378e-05, "loss": 0.0667, "step": 19707 }, { "epoch": 0.4342714857844838, "grad_norm": 0.6874947547912598, "learning_rate": 1.887737367987432e-05, "loss": 0.0852, "step": 19708 }, { "epoch": 0.434293521074, "grad_norm": 0.8907867670059204, "learning_rate": 1.8876339518514716e-05, "loss": 0.104, "step": 19709 }, { "epoch": 0.43431555636351615, "grad_norm": 1.13979971408844, "learning_rate": 1.887530533741084e-05, "loss": 0.1122, "step": 19710 }, { "epoch": 0.4343375916530323, "grad_norm": 0.757536768913269, "learning_rate": 1.887427113656795e-05, "loss": 0.1157, "step": 19711 }, { "epoch": 0.4343596269425485, "grad_norm": 0.42123478651046753, "learning_rate": 1.8873236915991325e-05, "loss": 0.0583, "step": 19712 }, { "epoch": 0.43438166223206465, "grad_norm": 0.5899537205696106, "learning_rate": 1.8872202675686226e-05, "loss": 0.0675, "step": 19713 }, { "epoch": 0.4344036975215808, "grad_norm": 0.8292330503463745, "learning_rate": 1.8871168415657928e-05, "loss": 0.0757, "step": 19714 }, { "epoch": 0.434425732811097, "grad_norm": 0.6789239645004272, "learning_rate": 1.8870134135911685e-05, "loss": 0.0882, "step": 19715 }, { "epoch": 0.43444776810061314, "grad_norm": 1.0350421667099, "learning_rate": 1.8869099836452777e-05, "loss": 0.0755, "step": 19716 }, { "epoch": 0.4344698033901293, "grad_norm": 0.3690941035747528, "learning_rate": 1.8868065517286464e-05, "loss": 0.088, "step": 19717 }, { "epoch": 0.43449183867964547, "grad_norm": 0.6117200255393982, "learning_rate": 1.8867031178418026e-05, "loss": 0.0848, "step": 19718 }, { "epoch": 0.43451387396916163, "grad_norm": 0.5823532938957214, "learning_rate": 1.8865996819852718e-05, "loss": 0.0626, "step": 19719 }, { "epoch": 0.4345359092586778, "grad_norm": 0.423632949590683, "learning_rate": 1.8864962441595822e-05, "loss": 0.0691, "step": 19720 }, { "epoch": 0.4345579445481939, "grad_norm": 0.7682319283485413, "learning_rate": 1.8863928043652592e-05, "loss": 0.0941, "step": 19721 }, { "epoch": 0.4345799798377101, "grad_norm": 0.8768391013145447, "learning_rate": 1.8862893626028312e-05, "loss": 0.0702, "step": 19722 }, { "epoch": 0.43460201512722624, "grad_norm": 0.685753345489502, "learning_rate": 1.886185918872824e-05, "loss": 0.097, "step": 19723 }, { "epoch": 0.4346240504167424, "grad_norm": 0.5851603150367737, "learning_rate": 1.8860824731757645e-05, "loss": 0.0809, "step": 19724 }, { "epoch": 0.43464608570625857, "grad_norm": 0.6892154216766357, "learning_rate": 1.8859790255121805e-05, "loss": 0.0832, "step": 19725 }, { "epoch": 0.43466812099577473, "grad_norm": 0.5412160754203796, "learning_rate": 1.885875575882598e-05, "loss": 0.0928, "step": 19726 }, { "epoch": 0.4346901562852909, "grad_norm": 0.1612929105758667, "learning_rate": 1.8857721242875444e-05, "loss": 0.0751, "step": 19727 }, { "epoch": 0.43471219157480706, "grad_norm": 0.6066175103187561, "learning_rate": 1.8856686707275462e-05, "loss": 0.0855, "step": 19728 }, { "epoch": 0.4347342268643232, "grad_norm": 1.2670557498931885, "learning_rate": 1.8855652152031313e-05, "loss": 0.0675, "step": 19729 }, { "epoch": 0.4347562621538394, "grad_norm": 0.8565493226051331, "learning_rate": 1.8854617577148257e-05, "loss": 0.0749, "step": 19730 }, { "epoch": 0.43477829744335555, "grad_norm": 0.8123986721038818, "learning_rate": 1.8853582982631564e-05, "loss": 0.1371, "step": 19731 }, { "epoch": 0.4348003327328717, "grad_norm": 0.6912162899971008, "learning_rate": 1.885254836848651e-05, "loss": 0.1148, "step": 19732 }, { "epoch": 0.43482236802238783, "grad_norm": 0.6333045959472656, "learning_rate": 1.885151373471836e-05, "loss": 0.0776, "step": 19733 }, { "epoch": 0.434844403311904, "grad_norm": 0.7436322569847107, "learning_rate": 1.8850479081332383e-05, "loss": 0.0951, "step": 19734 }, { "epoch": 0.43486643860142016, "grad_norm": 0.8087795376777649, "learning_rate": 1.8849444408333852e-05, "loss": 0.0684, "step": 19735 }, { "epoch": 0.4348884738909363, "grad_norm": 0.5036383271217346, "learning_rate": 1.8848409715728036e-05, "loss": 0.0697, "step": 19736 }, { "epoch": 0.4349105091804525, "grad_norm": 0.33348995447158813, "learning_rate": 1.8847375003520208e-05, "loss": 0.0668, "step": 19737 }, { "epoch": 0.43493254446996865, "grad_norm": 0.5773749947547913, "learning_rate": 1.884634027171564e-05, "loss": 0.0616, "step": 19738 }, { "epoch": 0.4349545797594848, "grad_norm": 0.745330274105072, "learning_rate": 1.8845305520319594e-05, "loss": 0.0824, "step": 19739 }, { "epoch": 0.434976615049001, "grad_norm": 0.8743148446083069, "learning_rate": 1.884427074933734e-05, "loss": 0.0703, "step": 19740 }, { "epoch": 0.43499865033851715, "grad_norm": 0.5426576733589172, "learning_rate": 1.884323595877416e-05, "loss": 0.0914, "step": 19741 }, { "epoch": 0.4350206856280333, "grad_norm": 0.7322063446044922, "learning_rate": 1.8842201148635315e-05, "loss": 0.0675, "step": 19742 }, { "epoch": 0.4350427209175495, "grad_norm": 0.5198688507080078, "learning_rate": 1.884116631892608e-05, "loss": 0.0773, "step": 19743 }, { "epoch": 0.43506475620706564, "grad_norm": 0.8021381497383118, "learning_rate": 1.8840131469651727e-05, "loss": 0.0676, "step": 19744 }, { "epoch": 0.43508679149658175, "grad_norm": 0.9203630685806274, "learning_rate": 1.883909660081752e-05, "loss": 0.1012, "step": 19745 }, { "epoch": 0.4351088267860979, "grad_norm": 0.5624995231628418, "learning_rate": 1.8838061712428744e-05, "loss": 0.077, "step": 19746 }, { "epoch": 0.4351308620756141, "grad_norm": 0.808157205581665, "learning_rate": 1.8837026804490654e-05, "loss": 0.0983, "step": 19747 }, { "epoch": 0.43515289736513024, "grad_norm": 0.47109729051589966, "learning_rate": 1.8835991877008535e-05, "loss": 0.111, "step": 19748 }, { "epoch": 0.4351749326546464, "grad_norm": 0.5129961371421814, "learning_rate": 1.883495692998765e-05, "loss": 0.0785, "step": 19749 }, { "epoch": 0.43519696794416257, "grad_norm": 0.83721524477005, "learning_rate": 1.883392196343327e-05, "loss": 0.0962, "step": 19750 }, { "epoch": 0.43521900323367874, "grad_norm": 0.9999495148658752, "learning_rate": 1.883288697735067e-05, "loss": 0.0842, "step": 19751 }, { "epoch": 0.4352410385231949, "grad_norm": 0.724102258682251, "learning_rate": 1.8831851971745123e-05, "loss": 0.1055, "step": 19752 }, { "epoch": 0.43526307381271107, "grad_norm": 0.8774630427360535, "learning_rate": 1.8830816946621904e-05, "loss": 0.1082, "step": 19753 }, { "epoch": 0.43528510910222723, "grad_norm": 0.5493260025978088, "learning_rate": 1.882978190198627e-05, "loss": 0.063, "step": 19754 }, { "epoch": 0.4353071443917434, "grad_norm": 1.0344510078430176, "learning_rate": 1.882874683784351e-05, "loss": 0.0981, "step": 19755 }, { "epoch": 0.43532917968125956, "grad_norm": 0.6545920372009277, "learning_rate": 1.8827711754198888e-05, "loss": 0.1006, "step": 19756 }, { "epoch": 0.4353512149707757, "grad_norm": 0.9078889489173889, "learning_rate": 1.8826676651057673e-05, "loss": 0.0699, "step": 19757 }, { "epoch": 0.43537325026029183, "grad_norm": 0.5788447856903076, "learning_rate": 1.8825641528425147e-05, "loss": 0.0686, "step": 19758 }, { "epoch": 0.435395285549808, "grad_norm": 0.7134201526641846, "learning_rate": 1.8824606386306573e-05, "loss": 0.0836, "step": 19759 }, { "epoch": 0.43541732083932416, "grad_norm": 0.6204076409339905, "learning_rate": 1.8823571224707234e-05, "loss": 0.0566, "step": 19760 }, { "epoch": 0.4354393561288403, "grad_norm": 1.3319603204727173, "learning_rate": 1.8822536043632395e-05, "loss": 0.0887, "step": 19761 }, { "epoch": 0.4354613914183565, "grad_norm": 0.4509134292602539, "learning_rate": 1.8821500843087328e-05, "loss": 0.07, "step": 19762 }, { "epoch": 0.43548342670787266, "grad_norm": 0.8577189445495605, "learning_rate": 1.882046562307731e-05, "loss": 0.1228, "step": 19763 }, { "epoch": 0.4355054619973888, "grad_norm": 0.834928572177887, "learning_rate": 1.8819430383607605e-05, "loss": 0.0687, "step": 19764 }, { "epoch": 0.435527497286905, "grad_norm": 0.7021815776824951, "learning_rate": 1.88183951246835e-05, "loss": 0.0737, "step": 19765 }, { "epoch": 0.43554953257642115, "grad_norm": 0.40077677369117737, "learning_rate": 1.8817359846310257e-05, "loss": 0.0927, "step": 19766 }, { "epoch": 0.4355715678659373, "grad_norm": 0.6669295430183411, "learning_rate": 1.8816324548493153e-05, "loss": 0.058, "step": 19767 }, { "epoch": 0.4355936031554535, "grad_norm": 0.7422541975975037, "learning_rate": 1.8815289231237466e-05, "loss": 0.0905, "step": 19768 }, { "epoch": 0.43561563844496964, "grad_norm": 0.7414654493331909, "learning_rate": 1.8814253894548465e-05, "loss": 0.082, "step": 19769 }, { "epoch": 0.43563767373448575, "grad_norm": 0.920674741268158, "learning_rate": 1.8813218538431418e-05, "loss": 0.1039, "step": 19770 }, { "epoch": 0.4356597090240019, "grad_norm": 0.6531094312667847, "learning_rate": 1.8812183162891608e-05, "loss": 0.0622, "step": 19771 }, { "epoch": 0.4356817443135181, "grad_norm": 0.7272183895111084, "learning_rate": 1.8811147767934302e-05, "loss": 0.0972, "step": 19772 }, { "epoch": 0.43570377960303425, "grad_norm": 0.7639836072921753, "learning_rate": 1.8810112353564784e-05, "loss": 0.1075, "step": 19773 }, { "epoch": 0.4357258148925504, "grad_norm": 0.41592174768447876, "learning_rate": 1.8809076919788314e-05, "loss": 0.0894, "step": 19774 }, { "epoch": 0.4357478501820666, "grad_norm": 0.84074866771698, "learning_rate": 1.880804146661017e-05, "loss": 0.0778, "step": 19775 }, { "epoch": 0.43576988547158274, "grad_norm": 0.6184937953948975, "learning_rate": 1.8807005994035636e-05, "loss": 0.0709, "step": 19776 }, { "epoch": 0.4357919207610989, "grad_norm": 1.2197787761688232, "learning_rate": 1.8805970502069978e-05, "loss": 0.1004, "step": 19777 }, { "epoch": 0.43581395605061507, "grad_norm": 0.8469704389572144, "learning_rate": 1.880493499071847e-05, "loss": 0.1012, "step": 19778 }, { "epoch": 0.43583599134013123, "grad_norm": 0.6390593647956848, "learning_rate": 1.8803899459986387e-05, "loss": 0.0888, "step": 19779 }, { "epoch": 0.4358580266296474, "grad_norm": 0.5501484274864197, "learning_rate": 1.8802863909879002e-05, "loss": 0.0609, "step": 19780 }, { "epoch": 0.43588006191916356, "grad_norm": 0.8353363275527954, "learning_rate": 1.8801828340401596e-05, "loss": 0.1135, "step": 19781 }, { "epoch": 0.4359020972086797, "grad_norm": 0.9372314214706421, "learning_rate": 1.8800792751559435e-05, "loss": 0.1073, "step": 19782 }, { "epoch": 0.43592413249819584, "grad_norm": 0.5100812315940857, "learning_rate": 1.87997571433578e-05, "loss": 0.0665, "step": 19783 }, { "epoch": 0.435946167787712, "grad_norm": 0.8227962255477905, "learning_rate": 1.8798721515801968e-05, "loss": 0.1329, "step": 19784 }, { "epoch": 0.43596820307722817, "grad_norm": 0.32406148314476013, "learning_rate": 1.8797685868897206e-05, "loss": 0.0723, "step": 19785 }, { "epoch": 0.43599023836674433, "grad_norm": 0.3888138234615326, "learning_rate": 1.8796650202648796e-05, "loss": 0.048, "step": 19786 }, { "epoch": 0.4360122736562605, "grad_norm": 0.7561486959457397, "learning_rate": 1.879561451706201e-05, "loss": 0.0868, "step": 19787 }, { "epoch": 0.43603430894577666, "grad_norm": 0.9026308655738831, "learning_rate": 1.8794578812142123e-05, "loss": 0.0949, "step": 19788 }, { "epoch": 0.4360563442352928, "grad_norm": 0.8540955185890198, "learning_rate": 1.8793543087894413e-05, "loss": 0.093, "step": 19789 }, { "epoch": 0.436078379524809, "grad_norm": 0.8110577464103699, "learning_rate": 1.879250734432415e-05, "loss": 0.1206, "step": 19790 }, { "epoch": 0.43610041481432515, "grad_norm": 0.6132630705833435, "learning_rate": 1.8791471581436617e-05, "loss": 0.1108, "step": 19791 }, { "epoch": 0.4361224501038413, "grad_norm": 1.0689082145690918, "learning_rate": 1.879043579923709e-05, "loss": 0.1006, "step": 19792 }, { "epoch": 0.4361444853933575, "grad_norm": 0.5576449632644653, "learning_rate": 1.8789399997730833e-05, "loss": 0.0945, "step": 19793 }, { "epoch": 0.43616652068287365, "grad_norm": 0.7743450999259949, "learning_rate": 1.8788364176923136e-05, "loss": 0.096, "step": 19794 }, { "epoch": 0.43618855597238976, "grad_norm": 0.6555290222167969, "learning_rate": 1.8787328336819262e-05, "loss": 0.0814, "step": 19795 }, { "epoch": 0.4362105912619059, "grad_norm": 0.65064537525177, "learning_rate": 1.8786292477424495e-05, "loss": 0.0776, "step": 19796 }, { "epoch": 0.4362326265514221, "grad_norm": 0.7343613505363464, "learning_rate": 1.878525659874411e-05, "loss": 0.0837, "step": 19797 }, { "epoch": 0.43625466184093825, "grad_norm": 0.483023464679718, "learning_rate": 1.8784220700783392e-05, "loss": 0.0528, "step": 19798 }, { "epoch": 0.4362766971304544, "grad_norm": 0.5948441624641418, "learning_rate": 1.87831847835476e-05, "loss": 0.1179, "step": 19799 }, { "epoch": 0.4362987324199706, "grad_norm": 0.5756195783615112, "learning_rate": 1.8782148847042023e-05, "loss": 0.0495, "step": 19800 }, { "epoch": 0.43632076770948675, "grad_norm": 0.5679880380630493, "learning_rate": 1.8781112891271935e-05, "loss": 0.086, "step": 19801 }, { "epoch": 0.4363428029990029, "grad_norm": 0.6669378280639648, "learning_rate": 1.8780076916242604e-05, "loss": 0.0692, "step": 19802 }, { "epoch": 0.4363648382885191, "grad_norm": 0.5170981884002686, "learning_rate": 1.877904092195932e-05, "loss": 0.0821, "step": 19803 }, { "epoch": 0.43638687357803524, "grad_norm": 0.6591261625289917, "learning_rate": 1.8778004908427355e-05, "loss": 0.0829, "step": 19804 }, { "epoch": 0.4364089088675514, "grad_norm": 0.6511885523796082, "learning_rate": 1.877696887565198e-05, "loss": 0.074, "step": 19805 }, { "epoch": 0.43643094415706757, "grad_norm": 1.0031167268753052, "learning_rate": 1.8775932823638485e-05, "loss": 0.1202, "step": 19806 }, { "epoch": 0.4364529794465837, "grad_norm": 0.5433199405670166, "learning_rate": 1.877489675239213e-05, "loss": 0.0711, "step": 19807 }, { "epoch": 0.43647501473609984, "grad_norm": 0.6365828514099121, "learning_rate": 1.8773860661918207e-05, "loss": 0.0828, "step": 19808 }, { "epoch": 0.436497050025616, "grad_norm": 0.2572646141052246, "learning_rate": 1.877282455222199e-05, "loss": 0.0539, "step": 19809 }, { "epoch": 0.43651908531513217, "grad_norm": 0.763075053691864, "learning_rate": 1.8771788423308753e-05, "loss": 0.0921, "step": 19810 }, { "epoch": 0.43654112060464834, "grad_norm": 0.5187652111053467, "learning_rate": 1.877075227518377e-05, "loss": 0.0862, "step": 19811 }, { "epoch": 0.4365631558941645, "grad_norm": 0.7543107867240906, "learning_rate": 1.876971610785233e-05, "loss": 0.0902, "step": 19812 }, { "epoch": 0.43658519118368067, "grad_norm": 0.9136228561401367, "learning_rate": 1.87686799213197e-05, "loss": 0.0682, "step": 19813 }, { "epoch": 0.43660722647319683, "grad_norm": 0.6623810529708862, "learning_rate": 1.876764371559116e-05, "loss": 0.1099, "step": 19814 }, { "epoch": 0.436629261762713, "grad_norm": 0.5097702145576477, "learning_rate": 1.8766607490672e-05, "loss": 0.0706, "step": 19815 }, { "epoch": 0.43665129705222916, "grad_norm": 0.9188487529754639, "learning_rate": 1.8765571246567482e-05, "loss": 0.1132, "step": 19816 }, { "epoch": 0.4366733323417453, "grad_norm": 0.7810351848602295, "learning_rate": 1.8764534983282892e-05, "loss": 0.0981, "step": 19817 }, { "epoch": 0.4366953676312615, "grad_norm": 0.8592840433120728, "learning_rate": 1.8763498700823502e-05, "loss": 0.0997, "step": 19818 }, { "epoch": 0.43671740292077765, "grad_norm": 0.8483101725578308, "learning_rate": 1.8762462399194598e-05, "loss": 0.0888, "step": 19819 }, { "epoch": 0.43673943821029376, "grad_norm": 0.8932023048400879, "learning_rate": 1.876142607840146e-05, "loss": 0.0548, "step": 19820 }, { "epoch": 0.4367614734998099, "grad_norm": 0.6889813542366028, "learning_rate": 1.8760389738449355e-05, "loss": 0.0823, "step": 19821 }, { "epoch": 0.4367835087893261, "grad_norm": 0.6390479207038879, "learning_rate": 1.875935337934357e-05, "loss": 0.1137, "step": 19822 }, { "epoch": 0.43680554407884226, "grad_norm": 0.4743596017360687, "learning_rate": 1.8758317001089385e-05, "loss": 0.0643, "step": 19823 }, { "epoch": 0.4368275793683584, "grad_norm": 0.5877402424812317, "learning_rate": 1.8757280603692077e-05, "loss": 0.079, "step": 19824 }, { "epoch": 0.4368496146578746, "grad_norm": 0.9602163434028625, "learning_rate": 1.8756244187156923e-05, "loss": 0.0825, "step": 19825 }, { "epoch": 0.43687164994739075, "grad_norm": 0.5545939803123474, "learning_rate": 1.8755207751489198e-05, "loss": 0.0661, "step": 19826 }, { "epoch": 0.4368936852369069, "grad_norm": 0.7199000716209412, "learning_rate": 1.8754171296694193e-05, "loss": 0.0877, "step": 19827 }, { "epoch": 0.4369157205264231, "grad_norm": 0.37440529465675354, "learning_rate": 1.8753134822777174e-05, "loss": 0.0685, "step": 19828 }, { "epoch": 0.43693775581593924, "grad_norm": 0.7474768757820129, "learning_rate": 1.8752098329743432e-05, "loss": 0.0495, "step": 19829 }, { "epoch": 0.4369597911054554, "grad_norm": 0.5738281607627869, "learning_rate": 1.8751061817598238e-05, "loss": 0.0767, "step": 19830 }, { "epoch": 0.4369818263949716, "grad_norm": 0.635632336139679, "learning_rate": 1.8750025286346877e-05, "loss": 0.0922, "step": 19831 }, { "epoch": 0.4370038616844877, "grad_norm": 0.9513305425643921, "learning_rate": 1.8748988735994622e-05, "loss": 0.1117, "step": 19832 }, { "epoch": 0.43702589697400385, "grad_norm": 0.8559678792953491, "learning_rate": 1.8747952166546765e-05, "loss": 0.0981, "step": 19833 }, { "epoch": 0.43704793226352, "grad_norm": 0.7735841274261475, "learning_rate": 1.8746915578008574e-05, "loss": 0.1131, "step": 19834 }, { "epoch": 0.4370699675530362, "grad_norm": 0.6119497418403625, "learning_rate": 1.874587897038533e-05, "loss": 0.0763, "step": 19835 }, { "epoch": 0.43709200284255234, "grad_norm": 0.681017279624939, "learning_rate": 1.8744842343682316e-05, "loss": 0.0854, "step": 19836 }, { "epoch": 0.4371140381320685, "grad_norm": 0.3568834662437439, "learning_rate": 1.874380569790481e-05, "loss": 0.0457, "step": 19837 }, { "epoch": 0.43713607342158467, "grad_norm": 0.6904649138450623, "learning_rate": 1.87427690330581e-05, "loss": 0.0815, "step": 19838 }, { "epoch": 0.43715810871110083, "grad_norm": 1.13533353805542, "learning_rate": 1.8741732349147457e-05, "loss": 0.1009, "step": 19839 }, { "epoch": 0.437180144000617, "grad_norm": 0.5101318359375, "learning_rate": 1.8740695646178167e-05, "loss": 0.0527, "step": 19840 }, { "epoch": 0.43720217929013316, "grad_norm": 0.7714985609054565, "learning_rate": 1.873965892415551e-05, "loss": 0.0836, "step": 19841 }, { "epoch": 0.43722421457964933, "grad_norm": 0.7454736828804016, "learning_rate": 1.8738622183084763e-05, "loss": 0.0949, "step": 19842 }, { "epoch": 0.4372462498691655, "grad_norm": 0.4919555187225342, "learning_rate": 1.8737585422971202e-05, "loss": 0.0633, "step": 19843 }, { "epoch": 0.4372682851586816, "grad_norm": 0.5203378200531006, "learning_rate": 1.873654864382012e-05, "loss": 0.0563, "step": 19844 }, { "epoch": 0.43729032044819777, "grad_norm": 0.5988035202026367, "learning_rate": 1.873551184563679e-05, "loss": 0.1331, "step": 19845 }, { "epoch": 0.43731235573771393, "grad_norm": 0.9520304203033447, "learning_rate": 1.8734475028426497e-05, "loss": 0.0842, "step": 19846 }, { "epoch": 0.4373343910272301, "grad_norm": 0.4791048467159271, "learning_rate": 1.8733438192194517e-05, "loss": 0.088, "step": 19847 }, { "epoch": 0.43735642631674626, "grad_norm": 0.7569254040718079, "learning_rate": 1.8732401336946135e-05, "loss": 0.0759, "step": 19848 }, { "epoch": 0.4373784616062624, "grad_norm": 0.6440072655677795, "learning_rate": 1.8731364462686638e-05, "loss": 0.0864, "step": 19849 }, { "epoch": 0.4374004968957786, "grad_norm": 0.6531131267547607, "learning_rate": 1.8730327569421293e-05, "loss": 0.0802, "step": 19850 }, { "epoch": 0.43742253218529475, "grad_norm": 0.8003441095352173, "learning_rate": 1.8729290657155394e-05, "loss": 0.1156, "step": 19851 }, { "epoch": 0.4374445674748109, "grad_norm": 0.5563454627990723, "learning_rate": 1.8728253725894215e-05, "loss": 0.076, "step": 19852 }, { "epoch": 0.4374666027643271, "grad_norm": 0.5254711508750916, "learning_rate": 1.8727216775643038e-05, "loss": 0.0749, "step": 19853 }, { "epoch": 0.43748863805384325, "grad_norm": 0.42121511697769165, "learning_rate": 1.8726179806407153e-05, "loss": 0.1018, "step": 19854 }, { "epoch": 0.4375106733433594, "grad_norm": 0.4826483428478241, "learning_rate": 1.8725142818191834e-05, "loss": 0.0659, "step": 19855 }, { "epoch": 0.4375327086328756, "grad_norm": 0.6354314088821411, "learning_rate": 1.8724105811002364e-05, "loss": 0.0727, "step": 19856 }, { "epoch": 0.4375547439223917, "grad_norm": 0.590878427028656, "learning_rate": 1.872306878484403e-05, "loss": 0.0831, "step": 19857 }, { "epoch": 0.43757677921190785, "grad_norm": 0.6023603081703186, "learning_rate": 1.8722031739722105e-05, "loss": 0.0835, "step": 19858 }, { "epoch": 0.437598814501424, "grad_norm": 0.6885054111480713, "learning_rate": 1.8720994675641878e-05, "loss": 0.0751, "step": 19859 }, { "epoch": 0.4376208497909402, "grad_norm": 0.5227469801902771, "learning_rate": 1.871995759260863e-05, "loss": 0.0893, "step": 19860 }, { "epoch": 0.43764288508045635, "grad_norm": 0.8401712775230408, "learning_rate": 1.8718920490627646e-05, "loss": 0.0874, "step": 19861 }, { "epoch": 0.4376649203699725, "grad_norm": 0.7820581197738647, "learning_rate": 1.87178833697042e-05, "loss": 0.1005, "step": 19862 }, { "epoch": 0.4376869556594887, "grad_norm": 0.6605895161628723, "learning_rate": 1.8716846229843585e-05, "loss": 0.0682, "step": 19863 }, { "epoch": 0.43770899094900484, "grad_norm": 0.6424611210823059, "learning_rate": 1.871580907105108e-05, "loss": 0.0755, "step": 19864 }, { "epoch": 0.437731026238521, "grad_norm": 0.5414587259292603, "learning_rate": 1.8714771893331964e-05, "loss": 0.0795, "step": 19865 }, { "epoch": 0.43775306152803717, "grad_norm": 0.8032054305076599, "learning_rate": 1.871373469669152e-05, "loss": 0.0594, "step": 19866 }, { "epoch": 0.43777509681755333, "grad_norm": 0.8434147238731384, "learning_rate": 1.871269748113504e-05, "loss": 0.0429, "step": 19867 }, { "epoch": 0.4377971321070695, "grad_norm": 0.6483018398284912, "learning_rate": 1.8711660246667798e-05, "loss": 0.0472, "step": 19868 }, { "epoch": 0.4378191673965856, "grad_norm": 0.49514126777648926, "learning_rate": 1.8710622993295077e-05, "loss": 0.0733, "step": 19869 }, { "epoch": 0.43784120268610177, "grad_norm": 0.6505089998245239, "learning_rate": 1.8709585721022167e-05, "loss": 0.0971, "step": 19870 }, { "epoch": 0.43786323797561794, "grad_norm": 0.5145529508590698, "learning_rate": 1.8708548429854344e-05, "loss": 0.0818, "step": 19871 }, { "epoch": 0.4378852732651341, "grad_norm": 0.5503087639808655, "learning_rate": 1.8707511119796903e-05, "loss": 0.0692, "step": 19872 }, { "epoch": 0.43790730855465027, "grad_norm": 0.5936728715896606, "learning_rate": 1.8706473790855114e-05, "loss": 0.088, "step": 19873 }, { "epoch": 0.43792934384416643, "grad_norm": 0.6153205037117004, "learning_rate": 1.8705436443034265e-05, "loss": 0.1137, "step": 19874 }, { "epoch": 0.4379513791336826, "grad_norm": 0.6005422472953796, "learning_rate": 1.8704399076339648e-05, "loss": 0.0578, "step": 19875 }, { "epoch": 0.43797341442319876, "grad_norm": 0.8635044693946838, "learning_rate": 1.8703361690776532e-05, "loss": 0.0835, "step": 19876 }, { "epoch": 0.4379954497127149, "grad_norm": 0.8390501141548157, "learning_rate": 1.8702324286350212e-05, "loss": 0.0868, "step": 19877 }, { "epoch": 0.4380174850022311, "grad_norm": 0.7480851411819458, "learning_rate": 1.8701286863065968e-05, "loss": 0.1033, "step": 19878 }, { "epoch": 0.43803952029174725, "grad_norm": 0.7262271046638489, "learning_rate": 1.8700249420929088e-05, "loss": 0.0767, "step": 19879 }, { "epoch": 0.4380615555812634, "grad_norm": 0.772004246711731, "learning_rate": 1.869921195994485e-05, "loss": 0.0895, "step": 19880 }, { "epoch": 0.4380835908707795, "grad_norm": 0.74895840883255, "learning_rate": 1.869817448011854e-05, "loss": 0.135, "step": 19881 }, { "epoch": 0.4381056261602957, "grad_norm": 0.6295298337936401, "learning_rate": 1.8697136981455452e-05, "loss": 0.0695, "step": 19882 }, { "epoch": 0.43812766144981186, "grad_norm": 0.7141674160957336, "learning_rate": 1.8696099463960856e-05, "loss": 0.0632, "step": 19883 }, { "epoch": 0.438149696739328, "grad_norm": 0.4534914791584015, "learning_rate": 1.8695061927640048e-05, "loss": 0.0479, "step": 19884 }, { "epoch": 0.4381717320288442, "grad_norm": 0.685903012752533, "learning_rate": 1.8694024372498307e-05, "loss": 0.0732, "step": 19885 }, { "epoch": 0.43819376731836035, "grad_norm": 0.8267034292221069, "learning_rate": 1.8692986798540918e-05, "loss": 0.1047, "step": 19886 }, { "epoch": 0.4382158026078765, "grad_norm": 0.6924993991851807, "learning_rate": 1.8691949205773167e-05, "loss": 0.0899, "step": 19887 }, { "epoch": 0.4382378378973927, "grad_norm": 1.035134196281433, "learning_rate": 1.869091159420034e-05, "loss": 0.0947, "step": 19888 }, { "epoch": 0.43825987318690884, "grad_norm": 0.7474132776260376, "learning_rate": 1.8689873963827723e-05, "loss": 0.0662, "step": 19889 }, { "epoch": 0.438281908476425, "grad_norm": 1.1262415647506714, "learning_rate": 1.86888363146606e-05, "loss": 0.1215, "step": 19890 }, { "epoch": 0.4383039437659412, "grad_norm": 0.5975713729858398, "learning_rate": 1.8687798646704246e-05, "loss": 0.1002, "step": 19891 }, { "epoch": 0.43832597905545734, "grad_norm": 0.6771396994590759, "learning_rate": 1.8686760959963963e-05, "loss": 0.0962, "step": 19892 }, { "epoch": 0.4383480143449735, "grad_norm": 0.592661440372467, "learning_rate": 1.8685723254445027e-05, "loss": 0.0656, "step": 19893 }, { "epoch": 0.4383700496344896, "grad_norm": 0.791922926902771, "learning_rate": 1.8684685530152727e-05, "loss": 0.0535, "step": 19894 }, { "epoch": 0.4383920849240058, "grad_norm": 0.6724552512168884, "learning_rate": 1.868364778709235e-05, "loss": 0.0728, "step": 19895 }, { "epoch": 0.43841412021352194, "grad_norm": 0.5872463583946228, "learning_rate": 1.8682610025269175e-05, "loss": 0.0612, "step": 19896 }, { "epoch": 0.4384361555030381, "grad_norm": 0.7169902920722961, "learning_rate": 1.8681572244688497e-05, "loss": 0.0717, "step": 19897 }, { "epoch": 0.43845819079255427, "grad_norm": 0.6995439529418945, "learning_rate": 1.86805344453556e-05, "loss": 0.0842, "step": 19898 }, { "epoch": 0.43848022608207043, "grad_norm": 0.9636476039886475, "learning_rate": 1.8679496627275757e-05, "loss": 0.0797, "step": 19899 }, { "epoch": 0.4385022613715866, "grad_norm": 0.5814294219017029, "learning_rate": 1.8678458790454273e-05, "loss": 0.0631, "step": 19900 }, { "epoch": 0.43852429666110276, "grad_norm": 0.7705302238464355, "learning_rate": 1.867742093489642e-05, "loss": 0.062, "step": 19901 }, { "epoch": 0.43854633195061893, "grad_norm": 0.5777220726013184, "learning_rate": 1.8676383060607498e-05, "loss": 0.0666, "step": 19902 }, { "epoch": 0.4385683672401351, "grad_norm": 0.5869941711425781, "learning_rate": 1.8675345167592777e-05, "loss": 0.0625, "step": 19903 }, { "epoch": 0.43859040252965126, "grad_norm": 0.6553857326507568, "learning_rate": 1.8674307255857558e-05, "loss": 0.0615, "step": 19904 }, { "epoch": 0.4386124378191674, "grad_norm": 1.3557612895965576, "learning_rate": 1.8673269325407122e-05, "loss": 0.1458, "step": 19905 }, { "epoch": 0.43863447310868353, "grad_norm": 0.8231424689292908, "learning_rate": 1.8672231376246752e-05, "loss": 0.1049, "step": 19906 }, { "epoch": 0.4386565083981997, "grad_norm": 0.4724602997303009, "learning_rate": 1.867119340838174e-05, "loss": 0.0977, "step": 19907 }, { "epoch": 0.43867854368771586, "grad_norm": 1.0747483968734741, "learning_rate": 1.867015542181737e-05, "loss": 0.0895, "step": 19908 }, { "epoch": 0.438700578977232, "grad_norm": 0.6656607985496521, "learning_rate": 1.8669117416558932e-05, "loss": 0.0811, "step": 19909 }, { "epoch": 0.4387226142667482, "grad_norm": 0.7098685503005981, "learning_rate": 1.866807939261171e-05, "loss": 0.0735, "step": 19910 }, { "epoch": 0.43874464955626435, "grad_norm": 0.40080296993255615, "learning_rate": 1.8667041349980995e-05, "loss": 0.0672, "step": 19911 }, { "epoch": 0.4387666848457805, "grad_norm": 0.800073504447937, "learning_rate": 1.866600328867207e-05, "loss": 0.089, "step": 19912 }, { "epoch": 0.4387887201352967, "grad_norm": 0.920097827911377, "learning_rate": 1.8664965208690225e-05, "loss": 0.101, "step": 19913 }, { "epoch": 0.43881075542481285, "grad_norm": 0.4591526985168457, "learning_rate": 1.8663927110040747e-05, "loss": 0.0725, "step": 19914 }, { "epoch": 0.438832790714329, "grad_norm": 0.6946183443069458, "learning_rate": 1.8662888992728924e-05, "loss": 0.1012, "step": 19915 }, { "epoch": 0.4388548260038452, "grad_norm": 0.47537732124328613, "learning_rate": 1.866185085676004e-05, "loss": 0.0743, "step": 19916 }, { "epoch": 0.43887686129336134, "grad_norm": 0.5829397439956665, "learning_rate": 1.8660812702139392e-05, "loss": 0.1074, "step": 19917 }, { "epoch": 0.43889889658287745, "grad_norm": 0.9717912077903748, "learning_rate": 1.8659774528872254e-05, "loss": 0.0951, "step": 19918 }, { "epoch": 0.4389209318723936, "grad_norm": 0.6222028136253357, "learning_rate": 1.865873633696393e-05, "loss": 0.0891, "step": 19919 }, { "epoch": 0.4389429671619098, "grad_norm": 0.5307531356811523, "learning_rate": 1.86576981264197e-05, "loss": 0.0736, "step": 19920 }, { "epoch": 0.43896500245142595, "grad_norm": 0.9200195670127869, "learning_rate": 1.8656659897244845e-05, "loss": 0.0746, "step": 19921 }, { "epoch": 0.4389870377409421, "grad_norm": 0.5462670922279358, "learning_rate": 1.8655621649444662e-05, "loss": 0.0521, "step": 19922 }, { "epoch": 0.4390090730304583, "grad_norm": 0.7224186062812805, "learning_rate": 1.865458338302444e-05, "loss": 0.0679, "step": 19923 }, { "epoch": 0.43903110831997444, "grad_norm": 0.7366620302200317, "learning_rate": 1.8653545097989464e-05, "loss": 0.0754, "step": 19924 }, { "epoch": 0.4390531436094906, "grad_norm": 0.6403992176055908, "learning_rate": 1.8652506794345022e-05, "loss": 0.0935, "step": 19925 }, { "epoch": 0.43907517889900677, "grad_norm": 0.5378026962280273, "learning_rate": 1.8651468472096407e-05, "loss": 0.0647, "step": 19926 }, { "epoch": 0.43909721418852293, "grad_norm": 0.5645443797111511, "learning_rate": 1.8650430131248903e-05, "loss": 0.0629, "step": 19927 }, { "epoch": 0.4391192494780391, "grad_norm": 0.9635828733444214, "learning_rate": 1.8649391771807802e-05, "loss": 0.0835, "step": 19928 }, { "epoch": 0.43914128476755526, "grad_norm": 0.6590983271598816, "learning_rate": 1.864835339377839e-05, "loss": 0.0515, "step": 19929 }, { "epoch": 0.4391633200570714, "grad_norm": 0.2810477614402771, "learning_rate": 1.8647314997165965e-05, "loss": 0.0598, "step": 19930 }, { "epoch": 0.43918535534658754, "grad_norm": 0.6170405745506287, "learning_rate": 1.86462765819758e-05, "loss": 0.0756, "step": 19931 }, { "epoch": 0.4392073906361037, "grad_norm": 0.298715740442276, "learning_rate": 1.8645238148213194e-05, "loss": 0.0893, "step": 19932 }, { "epoch": 0.43922942592561987, "grad_norm": 0.5161367654800415, "learning_rate": 1.864419969588344e-05, "loss": 0.0633, "step": 19933 }, { "epoch": 0.43925146121513603, "grad_norm": 0.5677582025527954, "learning_rate": 1.864316122499182e-05, "loss": 0.0894, "step": 19934 }, { "epoch": 0.4392734965046522, "grad_norm": 0.49860522150993347, "learning_rate": 1.8642122735543627e-05, "loss": 0.0838, "step": 19935 }, { "epoch": 0.43929553179416836, "grad_norm": 0.7077555656433105, "learning_rate": 1.8641084227544152e-05, "loss": 0.0971, "step": 19936 }, { "epoch": 0.4393175670836845, "grad_norm": 0.4863804280757904, "learning_rate": 1.8640045700998678e-05, "loss": 0.0722, "step": 19937 }, { "epoch": 0.4393396023732007, "grad_norm": 0.43970584869384766, "learning_rate": 1.8639007155912502e-05, "loss": 0.042, "step": 19938 }, { "epoch": 0.43936163766271685, "grad_norm": 0.7334752082824707, "learning_rate": 1.8637968592290908e-05, "loss": 0.077, "step": 19939 }, { "epoch": 0.439383672952233, "grad_norm": 0.5189051628112793, "learning_rate": 1.8636930010139194e-05, "loss": 0.0384, "step": 19940 }, { "epoch": 0.4394057082417492, "grad_norm": 0.6083871722221375, "learning_rate": 1.863589140946264e-05, "loss": 0.107, "step": 19941 }, { "epoch": 0.43942774353126535, "grad_norm": 0.6864745616912842, "learning_rate": 1.8634852790266546e-05, "loss": 0.0621, "step": 19942 }, { "epoch": 0.43944977882078146, "grad_norm": 0.6297682523727417, "learning_rate": 1.8633814152556196e-05, "loss": 0.0795, "step": 19943 }, { "epoch": 0.4394718141102976, "grad_norm": 1.001558780670166, "learning_rate": 1.863277549633688e-05, "loss": 0.0798, "step": 19944 }, { "epoch": 0.4394938493998138, "grad_norm": 0.6736436486244202, "learning_rate": 1.8631736821613897e-05, "loss": 0.0703, "step": 19945 }, { "epoch": 0.43951588468932995, "grad_norm": 0.7651368975639343, "learning_rate": 1.8630698128392525e-05, "loss": 0.0992, "step": 19946 }, { "epoch": 0.4395379199788461, "grad_norm": 0.8072057366371155, "learning_rate": 1.8629659416678058e-05, "loss": 0.095, "step": 19947 }, { "epoch": 0.4395599552683623, "grad_norm": 0.45970726013183594, "learning_rate": 1.862862068647579e-05, "loss": 0.0854, "step": 19948 }, { "epoch": 0.43958199055787844, "grad_norm": 0.6827650666236877, "learning_rate": 1.8627581937791015e-05, "loss": 0.1272, "step": 19949 }, { "epoch": 0.4396040258473946, "grad_norm": 0.8933460712432861, "learning_rate": 1.862654317062902e-05, "loss": 0.0855, "step": 19950 }, { "epoch": 0.4396260611369108, "grad_norm": 0.8747389316558838, "learning_rate": 1.8625504384995093e-05, "loss": 0.0815, "step": 19951 }, { "epoch": 0.43964809642642694, "grad_norm": 0.6038064956665039, "learning_rate": 1.8624465580894533e-05, "loss": 0.0864, "step": 19952 }, { "epoch": 0.4396701317159431, "grad_norm": 0.6809461116790771, "learning_rate": 1.8623426758332622e-05, "loss": 0.0697, "step": 19953 }, { "epoch": 0.43969216700545927, "grad_norm": 0.5742230415344238, "learning_rate": 1.8622387917314657e-05, "loss": 0.0761, "step": 19954 }, { "epoch": 0.4397142022949754, "grad_norm": 1.1164718866348267, "learning_rate": 1.8621349057845924e-05, "loss": 0.1077, "step": 19955 }, { "epoch": 0.43973623758449154, "grad_norm": 2.0115301609039307, "learning_rate": 1.8620310179931722e-05, "loss": 0.0909, "step": 19956 }, { "epoch": 0.4397582728740077, "grad_norm": 0.508054792881012, "learning_rate": 1.861927128357734e-05, "loss": 0.0938, "step": 19957 }, { "epoch": 0.43978030816352387, "grad_norm": 0.7769423127174377, "learning_rate": 1.8618232368788067e-05, "loss": 0.0728, "step": 19958 }, { "epoch": 0.43980234345304003, "grad_norm": 0.6664407253265381, "learning_rate": 1.8617193435569193e-05, "loss": 0.1089, "step": 19959 }, { "epoch": 0.4398243787425562, "grad_norm": 0.571974515914917, "learning_rate": 1.861615448392602e-05, "loss": 0.0733, "step": 19960 }, { "epoch": 0.43984641403207236, "grad_norm": 0.6218301057815552, "learning_rate": 1.861511551386383e-05, "loss": 0.0866, "step": 19961 }, { "epoch": 0.43986844932158853, "grad_norm": 0.7555128931999207, "learning_rate": 1.8614076525387916e-05, "loss": 0.1222, "step": 19962 }, { "epoch": 0.4398904846111047, "grad_norm": 0.49304094910621643, "learning_rate": 1.8613037518503572e-05, "loss": 0.095, "step": 19963 }, { "epoch": 0.43991251990062086, "grad_norm": 0.4825284481048584, "learning_rate": 1.8611998493216093e-05, "loss": 0.0672, "step": 19964 }, { "epoch": 0.439934555190137, "grad_norm": 0.7841684222221375, "learning_rate": 1.861095944953077e-05, "loss": 0.0877, "step": 19965 }, { "epoch": 0.4399565904796532, "grad_norm": 0.6126728057861328, "learning_rate": 1.8609920387452888e-05, "loss": 0.1212, "step": 19966 }, { "epoch": 0.43997862576916935, "grad_norm": 0.509203314781189, "learning_rate": 1.860888130698775e-05, "loss": 0.0614, "step": 19967 }, { "epoch": 0.44000066105868546, "grad_norm": 0.6972029805183411, "learning_rate": 1.8607842208140647e-05, "loss": 0.0941, "step": 19968 }, { "epoch": 0.4400226963482016, "grad_norm": 0.6874213814735413, "learning_rate": 1.860680309091687e-05, "loss": 0.0886, "step": 19969 }, { "epoch": 0.4400447316377178, "grad_norm": 0.8072725534439087, "learning_rate": 1.8605763955321704e-05, "loss": 0.102, "step": 19970 }, { "epoch": 0.44006676692723395, "grad_norm": 0.5616666674613953, "learning_rate": 1.8604724801360447e-05, "loss": 0.0785, "step": 19971 }, { "epoch": 0.4400888022167501, "grad_norm": 0.7082749009132385, "learning_rate": 1.86036856290384e-05, "loss": 0.0571, "step": 19972 }, { "epoch": 0.4401108375062663, "grad_norm": 0.7728520631790161, "learning_rate": 1.8602646438360844e-05, "loss": 0.106, "step": 19973 }, { "epoch": 0.44013287279578245, "grad_norm": 0.7089045643806458, "learning_rate": 1.8601607229333084e-05, "loss": 0.0871, "step": 19974 }, { "epoch": 0.4401549080852986, "grad_norm": 0.7472171783447266, "learning_rate": 1.8600568001960405e-05, "loss": 0.1284, "step": 19975 }, { "epoch": 0.4401769433748148, "grad_norm": 0.9424959421157837, "learning_rate": 1.8599528756248102e-05, "loss": 0.09, "step": 19976 }, { "epoch": 0.44019897866433094, "grad_norm": 0.20010288059711456, "learning_rate": 1.859848949220147e-05, "loss": 0.0943, "step": 19977 }, { "epoch": 0.4402210139538471, "grad_norm": 0.47300803661346436, "learning_rate": 1.85974502098258e-05, "loss": 0.058, "step": 19978 }, { "epoch": 0.44024304924336327, "grad_norm": 0.5960679650306702, "learning_rate": 1.8596410909126386e-05, "loss": 0.0913, "step": 19979 }, { "epoch": 0.4402650845328794, "grad_norm": 0.8644468188285828, "learning_rate": 1.8595371590108524e-05, "loss": 0.0981, "step": 19980 }, { "epoch": 0.44028711982239555, "grad_norm": 0.8249015808105469, "learning_rate": 1.85943322527775e-05, "loss": 0.0826, "step": 19981 }, { "epoch": 0.4403091551119117, "grad_norm": 0.45562219619750977, "learning_rate": 1.8593292897138624e-05, "loss": 0.0659, "step": 19982 }, { "epoch": 0.4403311904014279, "grad_norm": 1.2456049919128418, "learning_rate": 1.859225352319718e-05, "loss": 0.1284, "step": 19983 }, { "epoch": 0.44035322569094404, "grad_norm": 0.7692012786865234, "learning_rate": 1.859121413095846e-05, "loss": 0.0913, "step": 19984 }, { "epoch": 0.4403752609804602, "grad_norm": 0.9198360443115234, "learning_rate": 1.859017472042776e-05, "loss": 0.1114, "step": 19985 }, { "epoch": 0.44039729626997637, "grad_norm": 0.8356338143348694, "learning_rate": 1.8589135291610374e-05, "loss": 0.0722, "step": 19986 }, { "epoch": 0.44041933155949253, "grad_norm": 0.4866257309913635, "learning_rate": 1.8588095844511597e-05, "loss": 0.0646, "step": 19987 }, { "epoch": 0.4404413668490087, "grad_norm": 0.5603232383728027, "learning_rate": 1.8587056379136728e-05, "loss": 0.0629, "step": 19988 }, { "epoch": 0.44046340213852486, "grad_norm": 0.5235940217971802, "learning_rate": 1.858601689549105e-05, "loss": 0.0721, "step": 19989 }, { "epoch": 0.440485437428041, "grad_norm": 0.6847132444381714, "learning_rate": 1.8584977393579872e-05, "loss": 0.0873, "step": 19990 }, { "epoch": 0.4405074727175572, "grad_norm": 0.7360690832138062, "learning_rate": 1.858393787340848e-05, "loss": 0.0807, "step": 19991 }, { "epoch": 0.4405295080070733, "grad_norm": 0.7599875330924988, "learning_rate": 1.8582898334982167e-05, "loss": 0.0955, "step": 19992 }, { "epoch": 0.44055154329658947, "grad_norm": 0.5040141940116882, "learning_rate": 1.8581858778306236e-05, "loss": 0.0931, "step": 19993 }, { "epoch": 0.44057357858610563, "grad_norm": 0.6267973780632019, "learning_rate": 1.8580819203385972e-05, "loss": 0.0783, "step": 19994 }, { "epoch": 0.4405956138756218, "grad_norm": 0.6626583933830261, "learning_rate": 1.8579779610226683e-05, "loss": 0.1106, "step": 19995 }, { "epoch": 0.44061764916513796, "grad_norm": 0.7706981301307678, "learning_rate": 1.857873999883365e-05, "loss": 0.0718, "step": 19996 }, { "epoch": 0.4406396844546541, "grad_norm": 0.5484051704406738, "learning_rate": 1.857770036921218e-05, "loss": 0.0696, "step": 19997 }, { "epoch": 0.4406617197441703, "grad_norm": 0.532798707485199, "learning_rate": 1.857666072136756e-05, "loss": 0.0698, "step": 19998 }, { "epoch": 0.44068375503368645, "grad_norm": 0.6711916327476501, "learning_rate": 1.857562105530509e-05, "loss": 0.0621, "step": 19999 }, { "epoch": 0.4407057903232026, "grad_norm": 0.4812670946121216, "learning_rate": 1.8574581371030068e-05, "loss": 0.075, "step": 20000 }, { "epoch": 0.4407278256127188, "grad_norm": 0.509330689907074, "learning_rate": 1.8573541668547784e-05, "loss": 0.0648, "step": 20001 }, { "epoch": 0.44074986090223495, "grad_norm": 0.779998779296875, "learning_rate": 1.8572501947863534e-05, "loss": 0.079, "step": 20002 }, { "epoch": 0.4407718961917511, "grad_norm": 0.5589069724082947, "learning_rate": 1.8571462208982618e-05, "loss": 0.0794, "step": 20003 }, { "epoch": 0.4407939314812673, "grad_norm": 1.0456891059875488, "learning_rate": 1.8570422451910327e-05, "loss": 0.1174, "step": 20004 }, { "epoch": 0.4408159667707834, "grad_norm": 0.6890438199043274, "learning_rate": 1.8569382676651965e-05, "loss": 0.1096, "step": 20005 }, { "epoch": 0.44083800206029955, "grad_norm": 0.7475040555000305, "learning_rate": 1.8568342883212816e-05, "loss": 0.0749, "step": 20006 }, { "epoch": 0.4408600373498157, "grad_norm": 0.8759974241256714, "learning_rate": 1.856730307159819e-05, "loss": 0.1005, "step": 20007 }, { "epoch": 0.4408820726393319, "grad_norm": 0.5959205627441406, "learning_rate": 1.8566263241813376e-05, "loss": 0.0916, "step": 20008 }, { "epoch": 0.44090410792884804, "grad_norm": 0.7631168365478516, "learning_rate": 1.856522339386367e-05, "loss": 0.0923, "step": 20009 }, { "epoch": 0.4409261432183642, "grad_norm": 0.6881048679351807, "learning_rate": 1.8564183527754365e-05, "loss": 0.0584, "step": 20010 }, { "epoch": 0.4409481785078804, "grad_norm": 1.3336085081100464, "learning_rate": 1.8563143643490767e-05, "loss": 0.0727, "step": 20011 }, { "epoch": 0.44097021379739654, "grad_norm": 0.7908903956413269, "learning_rate": 1.8562103741078163e-05, "loss": 0.0696, "step": 20012 }, { "epoch": 0.4409922490869127, "grad_norm": 1.0692375898361206, "learning_rate": 1.856106382052186e-05, "loss": 0.0927, "step": 20013 }, { "epoch": 0.44101428437642887, "grad_norm": 0.7584648132324219, "learning_rate": 1.8560023881827144e-05, "loss": 0.0651, "step": 20014 }, { "epoch": 0.44103631966594503, "grad_norm": 0.40789663791656494, "learning_rate": 1.855898392499932e-05, "loss": 0.0662, "step": 20015 }, { "epoch": 0.4410583549554612, "grad_norm": 0.4927372634410858, "learning_rate": 1.8557943950043685e-05, "loss": 0.0849, "step": 20016 }, { "epoch": 0.4410803902449773, "grad_norm": 1.0874435901641846, "learning_rate": 1.8556903956965525e-05, "loss": 0.1131, "step": 20017 }, { "epoch": 0.44110242553449347, "grad_norm": 0.6688526272773743, "learning_rate": 1.8555863945770157e-05, "loss": 0.0778, "step": 20018 }, { "epoch": 0.44112446082400963, "grad_norm": 0.6005560159683228, "learning_rate": 1.8554823916462856e-05, "loss": 0.094, "step": 20019 }, { "epoch": 0.4411464961135258, "grad_norm": 0.6381493806838989, "learning_rate": 1.8553783869048938e-05, "loss": 0.0793, "step": 20020 }, { "epoch": 0.44116853140304196, "grad_norm": 0.6681985855102539, "learning_rate": 1.855274380353369e-05, "loss": 0.0734, "step": 20021 }, { "epoch": 0.44119056669255813, "grad_norm": 0.5923818349838257, "learning_rate": 1.8551703719922418e-05, "loss": 0.0889, "step": 20022 }, { "epoch": 0.4412126019820743, "grad_norm": 0.8325834274291992, "learning_rate": 1.855066361822041e-05, "loss": 0.0937, "step": 20023 }, { "epoch": 0.44123463727159046, "grad_norm": 0.5946558117866516, "learning_rate": 1.854962349843297e-05, "loss": 0.0716, "step": 20024 }, { "epoch": 0.4412566725611066, "grad_norm": 0.5802578926086426, "learning_rate": 1.854858336056539e-05, "loss": 0.0618, "step": 20025 }, { "epoch": 0.4412787078506228, "grad_norm": 0.5679544806480408, "learning_rate": 1.854754320462298e-05, "loss": 0.0608, "step": 20026 }, { "epoch": 0.44130074314013895, "grad_norm": 0.7457404732704163, "learning_rate": 1.8546503030611023e-05, "loss": 0.0793, "step": 20027 }, { "epoch": 0.4413227784296551, "grad_norm": 0.508583128452301, "learning_rate": 1.8545462838534825e-05, "loss": 0.0555, "step": 20028 }, { "epoch": 0.4413448137191712, "grad_norm": 1.0241297483444214, "learning_rate": 1.8544422628399685e-05, "loss": 0.076, "step": 20029 }, { "epoch": 0.4413668490086874, "grad_norm": 1.245823621749878, "learning_rate": 1.85433824002109e-05, "loss": 0.0895, "step": 20030 }, { "epoch": 0.44138888429820355, "grad_norm": 0.9492751955986023, "learning_rate": 1.8542342153973766e-05, "loss": 0.1033, "step": 20031 }, { "epoch": 0.4414109195877197, "grad_norm": 0.897909939289093, "learning_rate": 1.8541301889693586e-05, "loss": 0.087, "step": 20032 }, { "epoch": 0.4414329548772359, "grad_norm": 0.7804138660430908, "learning_rate": 1.8540261607375657e-05, "loss": 0.0754, "step": 20033 }, { "epoch": 0.44145499016675205, "grad_norm": 1.1048046350479126, "learning_rate": 1.8539221307025277e-05, "loss": 0.104, "step": 20034 }, { "epoch": 0.4414770254562682, "grad_norm": 1.0688531398773193, "learning_rate": 1.8538180988647742e-05, "loss": 0.0934, "step": 20035 }, { "epoch": 0.4414990607457844, "grad_norm": 0.559253990650177, "learning_rate": 1.8537140652248358e-05, "loss": 0.1027, "step": 20036 }, { "epoch": 0.44152109603530054, "grad_norm": 0.8339971303939819, "learning_rate": 1.8536100297832417e-05, "loss": 0.0932, "step": 20037 }, { "epoch": 0.4415431313248167, "grad_norm": 0.40531715750694275, "learning_rate": 1.8535059925405222e-05, "loss": 0.0575, "step": 20038 }, { "epoch": 0.44156516661433287, "grad_norm": 0.719072163105011, "learning_rate": 1.8534019534972075e-05, "loss": 0.0905, "step": 20039 }, { "epoch": 0.44158720190384904, "grad_norm": 0.7026600241661072, "learning_rate": 1.8532979126538265e-05, "loss": 0.1058, "step": 20040 }, { "epoch": 0.4416092371933652, "grad_norm": 0.5231570601463318, "learning_rate": 1.85319387001091e-05, "loss": 0.0807, "step": 20041 }, { "epoch": 0.4416312724828813, "grad_norm": 0.7626478672027588, "learning_rate": 1.853089825568988e-05, "loss": 0.0702, "step": 20042 }, { "epoch": 0.4416533077723975, "grad_norm": 0.3939283788204193, "learning_rate": 1.8529857793285897e-05, "loss": 0.0756, "step": 20043 }, { "epoch": 0.44167534306191364, "grad_norm": 0.4150907099246979, "learning_rate": 1.8528817312902456e-05, "loss": 0.0522, "step": 20044 }, { "epoch": 0.4416973783514298, "grad_norm": 0.6571722626686096, "learning_rate": 1.852777681454486e-05, "loss": 0.1267, "step": 20045 }, { "epoch": 0.44171941364094597, "grad_norm": 0.5158995389938354, "learning_rate": 1.8526736298218405e-05, "loss": 0.0976, "step": 20046 }, { "epoch": 0.44174144893046213, "grad_norm": 0.5143165588378906, "learning_rate": 1.852569576392839e-05, "loss": 0.0851, "step": 20047 }, { "epoch": 0.4417634842199783, "grad_norm": 0.7228632569313049, "learning_rate": 1.852465521168011e-05, "loss": 0.1013, "step": 20048 }, { "epoch": 0.44178551950949446, "grad_norm": 0.34566426277160645, "learning_rate": 1.852361464147888e-05, "loss": 0.0607, "step": 20049 }, { "epoch": 0.4418075547990106, "grad_norm": 0.6478070020675659, "learning_rate": 1.8522574053329984e-05, "loss": 0.0683, "step": 20050 }, { "epoch": 0.4418295900885268, "grad_norm": 0.795834481716156, "learning_rate": 1.8521533447238735e-05, "loss": 0.0641, "step": 20051 }, { "epoch": 0.44185162537804296, "grad_norm": 0.6073036193847656, "learning_rate": 1.852049282321042e-05, "loss": 0.0805, "step": 20052 }, { "epoch": 0.4418736606675591, "grad_norm": 0.4424857497215271, "learning_rate": 1.8519452181250356e-05, "loss": 0.0528, "step": 20053 }, { "epoch": 0.44189569595707523, "grad_norm": 0.4515048563480377, "learning_rate": 1.8518411521363828e-05, "loss": 0.0925, "step": 20054 }, { "epoch": 0.4419177312465914, "grad_norm": 0.8374944925308228, "learning_rate": 1.8517370843556146e-05, "loss": 0.1087, "step": 20055 }, { "epoch": 0.44193976653610756, "grad_norm": 0.7553701996803284, "learning_rate": 1.8516330147832612e-05, "loss": 0.0571, "step": 20056 }, { "epoch": 0.4419618018256237, "grad_norm": 0.7077287435531616, "learning_rate": 1.8515289434198517e-05, "loss": 0.0646, "step": 20057 }, { "epoch": 0.4419838371151399, "grad_norm": 1.1184216737747192, "learning_rate": 1.851424870265917e-05, "loss": 0.0627, "step": 20058 }, { "epoch": 0.44200587240465605, "grad_norm": 0.7363449931144714, "learning_rate": 1.8513207953219867e-05, "loss": 0.0627, "step": 20059 }, { "epoch": 0.4420279076941722, "grad_norm": 0.577309250831604, "learning_rate": 1.851216718588592e-05, "loss": 0.0556, "step": 20060 }, { "epoch": 0.4420499429836884, "grad_norm": 0.9434697031974792, "learning_rate": 1.8511126400662614e-05, "loss": 0.0732, "step": 20061 }, { "epoch": 0.44207197827320455, "grad_norm": 0.5387662649154663, "learning_rate": 1.851008559755526e-05, "loss": 0.0608, "step": 20062 }, { "epoch": 0.4420940135627207, "grad_norm": 0.41429784893989563, "learning_rate": 1.8509044776569163e-05, "loss": 0.0569, "step": 20063 }, { "epoch": 0.4421160488522369, "grad_norm": 1.3330377340316772, "learning_rate": 1.8508003937709614e-05, "loss": 0.0876, "step": 20064 }, { "epoch": 0.44213808414175304, "grad_norm": 0.9781768321990967, "learning_rate": 1.8506963080981923e-05, "loss": 0.0712, "step": 20065 }, { "epoch": 0.4421601194312692, "grad_norm": 0.7837865948677063, "learning_rate": 1.8505922206391387e-05, "loss": 0.0953, "step": 20066 }, { "epoch": 0.4421821547207853, "grad_norm": 0.7093529105186462, "learning_rate": 1.8504881313943307e-05, "loss": 0.0831, "step": 20067 }, { "epoch": 0.4422041900103015, "grad_norm": 0.7102766036987305, "learning_rate": 1.850384040364299e-05, "loss": 0.08, "step": 20068 }, { "epoch": 0.44222622529981764, "grad_norm": 0.48653069138526917, "learning_rate": 1.8502799475495734e-05, "loss": 0.0923, "step": 20069 }, { "epoch": 0.4422482605893338, "grad_norm": 0.36797118186950684, "learning_rate": 1.850175852950684e-05, "loss": 0.0711, "step": 20070 }, { "epoch": 0.44227029587885, "grad_norm": 0.6613513827323914, "learning_rate": 1.8500717565681617e-05, "loss": 0.0769, "step": 20071 }, { "epoch": 0.44229233116836614, "grad_norm": 0.6984138488769531, "learning_rate": 1.849967658402536e-05, "loss": 0.0798, "step": 20072 }, { "epoch": 0.4423143664578823, "grad_norm": 0.5623219013214111, "learning_rate": 1.849863558454337e-05, "loss": 0.11, "step": 20073 }, { "epoch": 0.44233640174739847, "grad_norm": 1.0211775302886963, "learning_rate": 1.8497594567240957e-05, "loss": 0.0974, "step": 20074 }, { "epoch": 0.44235843703691463, "grad_norm": 1.1765949726104736, "learning_rate": 1.8496553532123417e-05, "loss": 0.066, "step": 20075 }, { "epoch": 0.4423804723264308, "grad_norm": 1.3953540325164795, "learning_rate": 1.8495512479196055e-05, "loss": 0.0784, "step": 20076 }, { "epoch": 0.44240250761594696, "grad_norm": 0.6614883542060852, "learning_rate": 1.8494471408464174e-05, "loss": 0.0805, "step": 20077 }, { "epoch": 0.4424245429054631, "grad_norm": 0.7662847638130188, "learning_rate": 1.8493430319933076e-05, "loss": 0.0623, "step": 20078 }, { "epoch": 0.44244657819497923, "grad_norm": 0.6421592831611633, "learning_rate": 1.8492389213608065e-05, "loss": 0.1069, "step": 20079 }, { "epoch": 0.4424686134844954, "grad_norm": 0.36083269119262695, "learning_rate": 1.8491348089494443e-05, "loss": 0.0504, "step": 20080 }, { "epoch": 0.44249064877401156, "grad_norm": 0.8731822371482849, "learning_rate": 1.849030694759751e-05, "loss": 0.1056, "step": 20081 }, { "epoch": 0.44251268406352773, "grad_norm": 1.1882829666137695, "learning_rate": 1.8489265787922577e-05, "loss": 0.1218, "step": 20082 }, { "epoch": 0.4425347193530439, "grad_norm": 0.6962294578552246, "learning_rate": 1.848822461047494e-05, "loss": 0.142, "step": 20083 }, { "epoch": 0.44255675464256006, "grad_norm": 0.6783772110939026, "learning_rate": 1.8487183415259903e-05, "loss": 0.0811, "step": 20084 }, { "epoch": 0.4425787899320762, "grad_norm": 0.8937355875968933, "learning_rate": 1.848614220228277e-05, "loss": 0.1039, "step": 20085 }, { "epoch": 0.4426008252215924, "grad_norm": 0.5551090836524963, "learning_rate": 1.848510097154885e-05, "loss": 0.0834, "step": 20086 }, { "epoch": 0.44262286051110855, "grad_norm": 0.7107053995132446, "learning_rate": 1.848405972306344e-05, "loss": 0.0647, "step": 20087 }, { "epoch": 0.4426448958006247, "grad_norm": 0.4508099853992462, "learning_rate": 1.8483018456831846e-05, "loss": 0.0688, "step": 20088 }, { "epoch": 0.4426669310901409, "grad_norm": 0.6901352405548096, "learning_rate": 1.8481977172859367e-05, "loss": 0.0704, "step": 20089 }, { "epoch": 0.44268896637965705, "grad_norm": 0.7280227541923523, "learning_rate": 1.8480935871151316e-05, "loss": 0.0775, "step": 20090 }, { "epoch": 0.44271100166917315, "grad_norm": 0.7142467498779297, "learning_rate": 1.8479894551712992e-05, "loss": 0.1008, "step": 20091 }, { "epoch": 0.4427330369586893, "grad_norm": 0.4341110587120056, "learning_rate": 1.8478853214549693e-05, "loss": 0.071, "step": 20092 }, { "epoch": 0.4427550722482055, "grad_norm": 0.5993574857711792, "learning_rate": 1.8477811859666733e-05, "loss": 0.0965, "step": 20093 }, { "epoch": 0.44277710753772165, "grad_norm": 0.8778685331344604, "learning_rate": 1.8476770487069417e-05, "loss": 0.1271, "step": 20094 }, { "epoch": 0.4427991428272378, "grad_norm": 1.0194191932678223, "learning_rate": 1.847572909676304e-05, "loss": 0.0787, "step": 20095 }, { "epoch": 0.442821178116754, "grad_norm": 1.095739483833313, "learning_rate": 1.847468768875291e-05, "loss": 0.0764, "step": 20096 }, { "epoch": 0.44284321340627014, "grad_norm": 0.6172202229499817, "learning_rate": 1.8473646263044335e-05, "loss": 0.1027, "step": 20097 }, { "epoch": 0.4428652486957863, "grad_norm": 1.0677481889724731, "learning_rate": 1.8472604819642613e-05, "loss": 0.1347, "step": 20098 }, { "epoch": 0.44288728398530247, "grad_norm": 0.7185034155845642, "learning_rate": 1.8471563358553055e-05, "loss": 0.0713, "step": 20099 }, { "epoch": 0.44290931927481864, "grad_norm": 0.7978706955909729, "learning_rate": 1.8470521879780962e-05, "loss": 0.1029, "step": 20100 }, { "epoch": 0.4429313545643348, "grad_norm": 0.8699564933776855, "learning_rate": 1.846948038333164e-05, "loss": 0.0761, "step": 20101 }, { "epoch": 0.44295338985385097, "grad_norm": 0.6948645114898682, "learning_rate": 1.84684388692104e-05, "loss": 0.0949, "step": 20102 }, { "epoch": 0.44297542514336713, "grad_norm": 0.4993647336959839, "learning_rate": 1.8467397337422528e-05, "loss": 0.0923, "step": 20103 }, { "epoch": 0.44299746043288324, "grad_norm": 1.4194107055664062, "learning_rate": 1.8466355787973355e-05, "loss": 0.0809, "step": 20104 }, { "epoch": 0.4430194957223994, "grad_norm": 0.8177316188812256, "learning_rate": 1.8465314220868163e-05, "loss": 0.1034, "step": 20105 }, { "epoch": 0.44304153101191557, "grad_norm": 0.7725547552108765, "learning_rate": 1.8464272636112276e-05, "loss": 0.0799, "step": 20106 }, { "epoch": 0.44306356630143173, "grad_norm": 0.5741235613822937, "learning_rate": 1.8463231033710984e-05, "loss": 0.0764, "step": 20107 }, { "epoch": 0.4430856015909479, "grad_norm": 0.370484858751297, "learning_rate": 1.84621894136696e-05, "loss": 0.0706, "step": 20108 }, { "epoch": 0.44310763688046406, "grad_norm": 0.5106983184814453, "learning_rate": 1.846114777599343e-05, "loss": 0.0729, "step": 20109 }, { "epoch": 0.4431296721699802, "grad_norm": 0.7104876637458801, "learning_rate": 1.8460106120687785e-05, "loss": 0.1037, "step": 20110 }, { "epoch": 0.4431517074594964, "grad_norm": 1.1524910926818848, "learning_rate": 1.8459064447757956e-05, "loss": 0.0829, "step": 20111 }, { "epoch": 0.44317374274901256, "grad_norm": 0.6890961527824402, "learning_rate": 1.8458022757209262e-05, "loss": 0.0707, "step": 20112 }, { "epoch": 0.4431957780385287, "grad_norm": 0.6319032907485962, "learning_rate": 1.8456981049047e-05, "loss": 0.1103, "step": 20113 }, { "epoch": 0.4432178133280449, "grad_norm": 0.9894335865974426, "learning_rate": 1.845593932327648e-05, "loss": 0.1002, "step": 20114 }, { "epoch": 0.44323984861756105, "grad_norm": 0.5444884896278381, "learning_rate": 1.8454897579903005e-05, "loss": 0.068, "step": 20115 }, { "epoch": 0.44326188390707716, "grad_norm": 0.9906008839607239, "learning_rate": 1.845385581893189e-05, "loss": 0.0758, "step": 20116 }, { "epoch": 0.4432839191965933, "grad_norm": 0.46062037348747253, "learning_rate": 1.8452814040368434e-05, "loss": 0.0767, "step": 20117 }, { "epoch": 0.4433059544861095, "grad_norm": 0.586543083190918, "learning_rate": 1.8451772244217944e-05, "loss": 0.0786, "step": 20118 }, { "epoch": 0.44332798977562565, "grad_norm": 0.641529381275177, "learning_rate": 1.8450730430485727e-05, "loss": 0.1142, "step": 20119 }, { "epoch": 0.4433500250651418, "grad_norm": 1.0911223888397217, "learning_rate": 1.8449688599177088e-05, "loss": 0.0758, "step": 20120 }, { "epoch": 0.443372060354658, "grad_norm": 0.8304886817932129, "learning_rate": 1.8448646750297333e-05, "loss": 0.1144, "step": 20121 }, { "epoch": 0.44339409564417415, "grad_norm": 0.7919175028800964, "learning_rate": 1.844760488385178e-05, "loss": 0.0903, "step": 20122 }, { "epoch": 0.4434161309336903, "grad_norm": 0.6477362513542175, "learning_rate": 1.8446562999845714e-05, "loss": 0.0626, "step": 20123 }, { "epoch": 0.4434381662232065, "grad_norm": 1.020322322845459, "learning_rate": 1.8445521098284464e-05, "loss": 0.075, "step": 20124 }, { "epoch": 0.44346020151272264, "grad_norm": 0.4703027904033661, "learning_rate": 1.8444479179173322e-05, "loss": 0.079, "step": 20125 }, { "epoch": 0.4434822368022388, "grad_norm": 0.6440389752388, "learning_rate": 1.8443437242517603e-05, "loss": 0.0529, "step": 20126 }, { "epoch": 0.44350427209175497, "grad_norm": 0.7473979592323303, "learning_rate": 1.8442395288322613e-05, "loss": 0.0887, "step": 20127 }, { "epoch": 0.4435263073812711, "grad_norm": 0.9297314882278442, "learning_rate": 1.8441353316593652e-05, "loss": 0.1088, "step": 20128 }, { "epoch": 0.44354834267078724, "grad_norm": 0.6410040259361267, "learning_rate": 1.844031132733604e-05, "loss": 0.1043, "step": 20129 }, { "epoch": 0.4435703779603034, "grad_norm": 0.9784277677536011, "learning_rate": 1.8439269320555072e-05, "loss": 0.1246, "step": 20130 }, { "epoch": 0.4435924132498196, "grad_norm": 1.2629146575927734, "learning_rate": 1.8438227296256065e-05, "loss": 0.0928, "step": 20131 }, { "epoch": 0.44361444853933574, "grad_norm": 0.7938876152038574, "learning_rate": 1.843718525444432e-05, "loss": 0.0685, "step": 20132 }, { "epoch": 0.4436364838288519, "grad_norm": 0.6527875065803528, "learning_rate": 1.8436143195125148e-05, "loss": 0.0813, "step": 20133 }, { "epoch": 0.44365851911836807, "grad_norm": 0.7746332883834839, "learning_rate": 1.843510111830386e-05, "loss": 0.0669, "step": 20134 }, { "epoch": 0.44368055440788423, "grad_norm": 0.6941356658935547, "learning_rate": 1.843405902398576e-05, "loss": 0.102, "step": 20135 }, { "epoch": 0.4437025896974004, "grad_norm": 0.6096571087837219, "learning_rate": 1.843301691217615e-05, "loss": 0.1068, "step": 20136 }, { "epoch": 0.44372462498691656, "grad_norm": 0.7536024451255798, "learning_rate": 1.8431974782880347e-05, "loss": 0.1063, "step": 20137 }, { "epoch": 0.4437466602764327, "grad_norm": 0.6716538667678833, "learning_rate": 1.8430932636103655e-05, "loss": 0.0959, "step": 20138 }, { "epoch": 0.4437686955659489, "grad_norm": 0.8389776349067688, "learning_rate": 1.8429890471851386e-05, "loss": 0.077, "step": 20139 }, { "epoch": 0.44379073085546505, "grad_norm": 0.7702388167381287, "learning_rate": 1.842884829012884e-05, "loss": 0.1212, "step": 20140 }, { "epoch": 0.44381276614498116, "grad_norm": 0.4126059412956238, "learning_rate": 1.8427806090941336e-05, "loss": 0.0837, "step": 20141 }, { "epoch": 0.44383480143449733, "grad_norm": 0.5655410289764404, "learning_rate": 1.8426763874294174e-05, "loss": 0.0568, "step": 20142 }, { "epoch": 0.4438568367240135, "grad_norm": 0.872700035572052, "learning_rate": 1.842572164019267e-05, "loss": 0.081, "step": 20143 }, { "epoch": 0.44387887201352966, "grad_norm": 0.706486165523529, "learning_rate": 1.8424679388642127e-05, "loss": 0.0677, "step": 20144 }, { "epoch": 0.4439009073030458, "grad_norm": 0.6017420887947083, "learning_rate": 1.8423637119647855e-05, "loss": 0.0658, "step": 20145 }, { "epoch": 0.443922942592562, "grad_norm": 0.6323201060295105, "learning_rate": 1.8422594833215162e-05, "loss": 0.0854, "step": 20146 }, { "epoch": 0.44394497788207815, "grad_norm": 0.46621280908584595, "learning_rate": 1.842155252934936e-05, "loss": 0.0776, "step": 20147 }, { "epoch": 0.4439670131715943, "grad_norm": 0.6869781017303467, "learning_rate": 1.8420510208055756e-05, "loss": 0.0969, "step": 20148 }, { "epoch": 0.4439890484611105, "grad_norm": 0.5342942476272583, "learning_rate": 1.8419467869339657e-05, "loss": 0.083, "step": 20149 }, { "epoch": 0.44401108375062665, "grad_norm": 0.4984596073627472, "learning_rate": 1.841842551320638e-05, "loss": 0.0688, "step": 20150 }, { "epoch": 0.4440331190401428, "grad_norm": 0.5631101131439209, "learning_rate": 1.8417383139661223e-05, "loss": 0.0744, "step": 20151 }, { "epoch": 0.444055154329659, "grad_norm": 0.5995911955833435, "learning_rate": 1.8416340748709503e-05, "loss": 0.0904, "step": 20152 }, { "epoch": 0.4440771896191751, "grad_norm": 0.4158749580383301, "learning_rate": 1.8415298340356528e-05, "loss": 0.0871, "step": 20153 }, { "epoch": 0.44409922490869125, "grad_norm": 0.6341301202774048, "learning_rate": 1.8414255914607607e-05, "loss": 0.0747, "step": 20154 }, { "epoch": 0.4441212601982074, "grad_norm": 0.6889550685882568, "learning_rate": 1.8413213471468046e-05, "loss": 0.0639, "step": 20155 }, { "epoch": 0.4441432954877236, "grad_norm": 0.9212427735328674, "learning_rate": 1.8412171010943165e-05, "loss": 0.0941, "step": 20156 }, { "epoch": 0.44416533077723974, "grad_norm": 0.5747195482254028, "learning_rate": 1.8411128533038265e-05, "loss": 0.0718, "step": 20157 }, { "epoch": 0.4441873660667559, "grad_norm": 0.47176840901374817, "learning_rate": 1.841008603775866e-05, "loss": 0.066, "step": 20158 }, { "epoch": 0.44420940135627207, "grad_norm": 0.800238311290741, "learning_rate": 1.8409043525109653e-05, "loss": 0.0695, "step": 20159 }, { "epoch": 0.44423143664578824, "grad_norm": 0.9140990376472473, "learning_rate": 1.8408000995096564e-05, "loss": 0.0761, "step": 20160 }, { "epoch": 0.4442534719353044, "grad_norm": 0.911750078201294, "learning_rate": 1.8406958447724696e-05, "loss": 0.0919, "step": 20161 }, { "epoch": 0.44427550722482056, "grad_norm": 0.3884781301021576, "learning_rate": 1.8405915882999363e-05, "loss": 0.0839, "step": 20162 }, { "epoch": 0.44429754251433673, "grad_norm": 0.5607826709747314, "learning_rate": 1.840487330092587e-05, "loss": 0.0643, "step": 20163 }, { "epoch": 0.4443195778038529, "grad_norm": 0.7478349208831787, "learning_rate": 1.840383070150954e-05, "loss": 0.058, "step": 20164 }, { "epoch": 0.444341613093369, "grad_norm": 0.5586510896682739, "learning_rate": 1.8402788084755665e-05, "loss": 0.0729, "step": 20165 }, { "epoch": 0.44436364838288517, "grad_norm": 0.6814385652542114, "learning_rate": 1.8401745450669572e-05, "loss": 0.0576, "step": 20166 }, { "epoch": 0.44438568367240133, "grad_norm": 0.5721435546875, "learning_rate": 1.8400702799256562e-05, "loss": 0.0946, "step": 20167 }, { "epoch": 0.4444077189619175, "grad_norm": 0.9104688167572021, "learning_rate": 1.8399660130521956e-05, "loss": 0.084, "step": 20168 }, { "epoch": 0.44442975425143366, "grad_norm": 0.7446753978729248, "learning_rate": 1.839861744447105e-05, "loss": 0.0846, "step": 20169 }, { "epoch": 0.4444517895409498, "grad_norm": 0.5976375937461853, "learning_rate": 1.839757474110916e-05, "loss": 0.0933, "step": 20170 }, { "epoch": 0.444473824830466, "grad_norm": 0.6351041197776794, "learning_rate": 1.839653202044161e-05, "loss": 0.0993, "step": 20171 }, { "epoch": 0.44449586011998216, "grad_norm": 0.6190674901008606, "learning_rate": 1.8395489282473694e-05, "loss": 0.0777, "step": 20172 }, { "epoch": 0.4445178954094983, "grad_norm": 0.6140050888061523, "learning_rate": 1.8394446527210732e-05, "loss": 0.0938, "step": 20173 }, { "epoch": 0.4445399306990145, "grad_norm": 0.3460763394832611, "learning_rate": 1.839340375465804e-05, "loss": 0.0699, "step": 20174 }, { "epoch": 0.44456196598853065, "grad_norm": 0.43301424384117126, "learning_rate": 1.8392360964820913e-05, "loss": 0.0745, "step": 20175 }, { "epoch": 0.4445840012780468, "grad_norm": 0.8331366777420044, "learning_rate": 1.8391318157704674e-05, "loss": 0.0728, "step": 20176 }, { "epoch": 0.444606036567563, "grad_norm": 0.7451027631759644, "learning_rate": 1.8390275333314638e-05, "loss": 0.0809, "step": 20177 }, { "epoch": 0.4446280718570791, "grad_norm": 0.7291796803474426, "learning_rate": 1.8389232491656108e-05, "loss": 0.1065, "step": 20178 }, { "epoch": 0.44465010714659525, "grad_norm": 0.6686379313468933, "learning_rate": 1.8388189632734402e-05, "loss": 0.0738, "step": 20179 }, { "epoch": 0.4446721424361114, "grad_norm": 0.6207400560379028, "learning_rate": 1.8387146756554827e-05, "loss": 0.0937, "step": 20180 }, { "epoch": 0.4446941777256276, "grad_norm": 0.790386974811554, "learning_rate": 1.83861038631227e-05, "loss": 0.0949, "step": 20181 }, { "epoch": 0.44471621301514375, "grad_norm": 0.48612385988235474, "learning_rate": 1.838506095244333e-05, "loss": 0.0584, "step": 20182 }, { "epoch": 0.4447382483046599, "grad_norm": 0.557010293006897, "learning_rate": 1.838401802452203e-05, "loss": 0.0954, "step": 20183 }, { "epoch": 0.4447602835941761, "grad_norm": 0.9604889154434204, "learning_rate": 1.8382975079364106e-05, "loss": 0.1101, "step": 20184 }, { "epoch": 0.44478231888369224, "grad_norm": 0.9018853902816772, "learning_rate": 1.838193211697488e-05, "loss": 0.077, "step": 20185 }, { "epoch": 0.4448043541732084, "grad_norm": 0.8764233589172363, "learning_rate": 1.838088913735966e-05, "loss": 0.0769, "step": 20186 }, { "epoch": 0.44482638946272457, "grad_norm": 0.7716899514198303, "learning_rate": 1.8379846140523758e-05, "loss": 0.0599, "step": 20187 }, { "epoch": 0.44484842475224073, "grad_norm": 0.6932815313339233, "learning_rate": 1.8378803126472484e-05, "loss": 0.1175, "step": 20188 }, { "epoch": 0.4448704600417569, "grad_norm": 0.6477869153022766, "learning_rate": 1.837776009521116e-05, "loss": 0.0502, "step": 20189 }, { "epoch": 0.444892495331273, "grad_norm": 0.8085346817970276, "learning_rate": 1.837671704674509e-05, "loss": 0.076, "step": 20190 }, { "epoch": 0.4449145306207892, "grad_norm": 0.513022243976593, "learning_rate": 1.837567398107959e-05, "loss": 0.055, "step": 20191 }, { "epoch": 0.44493656591030534, "grad_norm": 0.8696961402893066, "learning_rate": 1.8374630898219972e-05, "loss": 0.0987, "step": 20192 }, { "epoch": 0.4449586011998215, "grad_norm": 0.5679016709327698, "learning_rate": 1.8373587798171544e-05, "loss": 0.059, "step": 20193 }, { "epoch": 0.44498063648933767, "grad_norm": 0.740221381187439, "learning_rate": 1.837254468093963e-05, "loss": 0.0945, "step": 20194 }, { "epoch": 0.44500267177885383, "grad_norm": 0.8639959692955017, "learning_rate": 1.8371501546529535e-05, "loss": 0.1003, "step": 20195 }, { "epoch": 0.44502470706837, "grad_norm": 0.7725703120231628, "learning_rate": 1.8370458394946576e-05, "loss": 0.1127, "step": 20196 }, { "epoch": 0.44504674235788616, "grad_norm": 0.6978785991668701, "learning_rate": 1.8369415226196068e-05, "loss": 0.0738, "step": 20197 }, { "epoch": 0.4450687776474023, "grad_norm": 0.7493317723274231, "learning_rate": 1.836837204028332e-05, "loss": 0.064, "step": 20198 }, { "epoch": 0.4450908129369185, "grad_norm": 0.6251401901245117, "learning_rate": 1.8367328837213643e-05, "loss": 0.0782, "step": 20199 }, { "epoch": 0.44511284822643465, "grad_norm": 0.7130855917930603, "learning_rate": 1.836628561699236e-05, "loss": 0.0859, "step": 20200 }, { "epoch": 0.4451348835159508, "grad_norm": 0.7728335857391357, "learning_rate": 1.8365242379624775e-05, "loss": 0.0906, "step": 20201 }, { "epoch": 0.44515691880546693, "grad_norm": 0.4201262891292572, "learning_rate": 1.8364199125116206e-05, "loss": 0.0476, "step": 20202 }, { "epoch": 0.4451789540949831, "grad_norm": 0.9764958620071411, "learning_rate": 1.8363155853471964e-05, "loss": 0.141, "step": 20203 }, { "epoch": 0.44520098938449926, "grad_norm": 0.61836838722229, "learning_rate": 1.8362112564697372e-05, "loss": 0.067, "step": 20204 }, { "epoch": 0.4452230246740154, "grad_norm": 0.7531495094299316, "learning_rate": 1.836106925879774e-05, "loss": 0.0952, "step": 20205 }, { "epoch": 0.4452450599635316, "grad_norm": 0.8282369375228882, "learning_rate": 1.8360025935778373e-05, "loss": 0.0665, "step": 20206 }, { "epoch": 0.44526709525304775, "grad_norm": 0.8126421570777893, "learning_rate": 1.8358982595644592e-05, "loss": 0.0709, "step": 20207 }, { "epoch": 0.4452891305425639, "grad_norm": 0.6786821484565735, "learning_rate": 1.835793923840172e-05, "loss": 0.1063, "step": 20208 }, { "epoch": 0.4453111658320801, "grad_norm": 0.45883435010910034, "learning_rate": 1.835689586405505e-05, "loss": 0.13, "step": 20209 }, { "epoch": 0.44533320112159624, "grad_norm": 0.746855616569519, "learning_rate": 1.835585247260992e-05, "loss": 0.0814, "step": 20210 }, { "epoch": 0.4453552364111124, "grad_norm": 0.5438206195831299, "learning_rate": 1.8354809064071626e-05, "loss": 0.0907, "step": 20211 }, { "epoch": 0.4453772717006286, "grad_norm": 0.6981831789016724, "learning_rate": 1.8353765638445495e-05, "loss": 0.0912, "step": 20212 }, { "epoch": 0.44539930699014474, "grad_norm": 0.6358029246330261, "learning_rate": 1.8352722195736834e-05, "loss": 0.0745, "step": 20213 }, { "epoch": 0.4454213422796609, "grad_norm": 0.6311675906181335, "learning_rate": 1.835167873595097e-05, "loss": 0.0737, "step": 20214 }, { "epoch": 0.445443377569177, "grad_norm": 0.6039573550224304, "learning_rate": 1.8350635259093202e-05, "loss": 0.095, "step": 20215 }, { "epoch": 0.4454654128586932, "grad_norm": 0.8486197590827942, "learning_rate": 1.834959176516885e-05, "loss": 0.0887, "step": 20216 }, { "epoch": 0.44548744814820934, "grad_norm": 0.6678850054740906, "learning_rate": 1.834854825418323e-05, "loss": 0.0644, "step": 20217 }, { "epoch": 0.4455094834377255, "grad_norm": 0.6094793081283569, "learning_rate": 1.8347504726141658e-05, "loss": 0.1075, "step": 20218 }, { "epoch": 0.44553151872724167, "grad_norm": 0.942649781703949, "learning_rate": 1.834646118104946e-05, "loss": 0.0991, "step": 20219 }, { "epoch": 0.44555355401675784, "grad_norm": 0.9622889161109924, "learning_rate": 1.8345417618911927e-05, "loss": 0.1013, "step": 20220 }, { "epoch": 0.445575589306274, "grad_norm": 0.5151284337043762, "learning_rate": 1.8344374039734396e-05, "loss": 0.1111, "step": 20221 }, { "epoch": 0.44559762459579016, "grad_norm": 1.0124508142471313, "learning_rate": 1.8343330443522175e-05, "loss": 0.0729, "step": 20222 }, { "epoch": 0.44561965988530633, "grad_norm": 0.772680401802063, "learning_rate": 1.8342286830280578e-05, "loss": 0.0889, "step": 20223 }, { "epoch": 0.4456416951748225, "grad_norm": 0.46428826451301575, "learning_rate": 1.834124320001492e-05, "loss": 0.0603, "step": 20224 }, { "epoch": 0.44566373046433866, "grad_norm": 0.3129291832447052, "learning_rate": 1.8340199552730518e-05, "loss": 0.0531, "step": 20225 }, { "epoch": 0.4456857657538548, "grad_norm": 0.77605140209198, "learning_rate": 1.833915588843269e-05, "loss": 0.0805, "step": 20226 }, { "epoch": 0.44570780104337093, "grad_norm": 0.602607011795044, "learning_rate": 1.833811220712675e-05, "loss": 0.0615, "step": 20227 }, { "epoch": 0.4457298363328871, "grad_norm": 0.6529698371887207, "learning_rate": 1.8337068508818014e-05, "loss": 0.0728, "step": 20228 }, { "epoch": 0.44575187162240326, "grad_norm": 0.7007632851600647, "learning_rate": 1.8336024793511803e-05, "loss": 0.0767, "step": 20229 }, { "epoch": 0.4457739069119194, "grad_norm": 0.709424614906311, "learning_rate": 1.8334981061213426e-05, "loss": 0.0959, "step": 20230 }, { "epoch": 0.4457959422014356, "grad_norm": 0.7616944313049316, "learning_rate": 1.8333937311928203e-05, "loss": 0.0612, "step": 20231 }, { "epoch": 0.44581797749095176, "grad_norm": 0.38638606667518616, "learning_rate": 1.8332893545661446e-05, "loss": 0.0758, "step": 20232 }, { "epoch": 0.4458400127804679, "grad_norm": 0.3256131410598755, "learning_rate": 1.833184976241848e-05, "loss": 0.06, "step": 20233 }, { "epoch": 0.4458620480699841, "grad_norm": 0.5605111718177795, "learning_rate": 1.8330805962204608e-05, "loss": 0.1176, "step": 20234 }, { "epoch": 0.44588408335950025, "grad_norm": 1.199013113975525, "learning_rate": 1.8329762145025164e-05, "loss": 0.1299, "step": 20235 }, { "epoch": 0.4459061186490164, "grad_norm": 0.7541505098342896, "learning_rate": 1.8328718310885454e-05, "loss": 0.1054, "step": 20236 }, { "epoch": 0.4459281539385326, "grad_norm": 0.709755539894104, "learning_rate": 1.8327674459790795e-05, "loss": 0.0872, "step": 20237 }, { "epoch": 0.44595018922804874, "grad_norm": 0.37640446424484253, "learning_rate": 1.8326630591746508e-05, "loss": 0.0696, "step": 20238 }, { "epoch": 0.44597222451756485, "grad_norm": 0.6859794855117798, "learning_rate": 1.8325586706757902e-05, "loss": 0.0799, "step": 20239 }, { "epoch": 0.445994259807081, "grad_norm": 0.6635652184486389, "learning_rate": 1.8324542804830305e-05, "loss": 0.0885, "step": 20240 }, { "epoch": 0.4460162950965972, "grad_norm": 0.6016507148742676, "learning_rate": 1.8323498885969022e-05, "loss": 0.0789, "step": 20241 }, { "epoch": 0.44603833038611335, "grad_norm": 0.9369806051254272, "learning_rate": 1.8322454950179385e-05, "loss": 0.0901, "step": 20242 }, { "epoch": 0.4460603656756295, "grad_norm": 0.6314859986305237, "learning_rate": 1.8321410997466694e-05, "loss": 0.0684, "step": 20243 }, { "epoch": 0.4460824009651457, "grad_norm": 0.9313660860061646, "learning_rate": 1.8320367027836286e-05, "loss": 0.0803, "step": 20244 }, { "epoch": 0.44610443625466184, "grad_norm": 0.8129127025604248, "learning_rate": 1.831932304129346e-05, "loss": 0.0921, "step": 20245 }, { "epoch": 0.446126471544178, "grad_norm": 0.47484269738197327, "learning_rate": 1.8318279037843548e-05, "loss": 0.0986, "step": 20246 }, { "epoch": 0.44614850683369417, "grad_norm": 0.5179340243339539, "learning_rate": 1.8317235017491858e-05, "loss": 0.0741, "step": 20247 }, { "epoch": 0.44617054212321033, "grad_norm": 0.8615170121192932, "learning_rate": 1.8316190980243713e-05, "loss": 0.0819, "step": 20248 }, { "epoch": 0.4461925774127265, "grad_norm": 0.6930680871009827, "learning_rate": 1.8315146926104428e-05, "loss": 0.0729, "step": 20249 }, { "epoch": 0.44621461270224266, "grad_norm": 1.3542596101760864, "learning_rate": 1.8314102855079323e-05, "loss": 0.1159, "step": 20250 }, { "epoch": 0.44623664799175883, "grad_norm": 0.9323481917381287, "learning_rate": 1.8313058767173713e-05, "loss": 0.1139, "step": 20251 }, { "epoch": 0.44625868328127494, "grad_norm": 0.5647695064544678, "learning_rate": 1.8312014662392923e-05, "loss": 0.0703, "step": 20252 }, { "epoch": 0.4462807185707911, "grad_norm": 0.6216089725494385, "learning_rate": 1.8310970540742262e-05, "loss": 0.0906, "step": 20253 }, { "epoch": 0.44630275386030727, "grad_norm": 0.6836988925933838, "learning_rate": 1.8309926402227053e-05, "loss": 0.0708, "step": 20254 }, { "epoch": 0.44632478914982343, "grad_norm": 0.8110004663467407, "learning_rate": 1.8308882246852612e-05, "loss": 0.0719, "step": 20255 }, { "epoch": 0.4463468244393396, "grad_norm": 0.8224332928657532, "learning_rate": 1.8307838074624265e-05, "loss": 0.1144, "step": 20256 }, { "epoch": 0.44636885972885576, "grad_norm": 0.8390206694602966, "learning_rate": 1.830679388554732e-05, "loss": 0.0915, "step": 20257 }, { "epoch": 0.4463908950183719, "grad_norm": 0.9843921065330505, "learning_rate": 1.83057496796271e-05, "loss": 0.084, "step": 20258 }, { "epoch": 0.4464129303078881, "grad_norm": 0.7505814433097839, "learning_rate": 1.8304705456868926e-05, "loss": 0.1225, "step": 20259 }, { "epoch": 0.44643496559740425, "grad_norm": 0.5761159658432007, "learning_rate": 1.8303661217278112e-05, "loss": 0.0603, "step": 20260 }, { "epoch": 0.4464570008869204, "grad_norm": 0.6765622496604919, "learning_rate": 1.8302616960859986e-05, "loss": 0.0871, "step": 20261 }, { "epoch": 0.4464790361764366, "grad_norm": 0.7795286178588867, "learning_rate": 1.8301572687619857e-05, "loss": 0.0988, "step": 20262 }, { "epoch": 0.44650107146595275, "grad_norm": 1.2546778917312622, "learning_rate": 1.830052839756305e-05, "loss": 0.0926, "step": 20263 }, { "epoch": 0.44652310675546886, "grad_norm": 0.407265305519104, "learning_rate": 1.829948409069488e-05, "loss": 0.0509, "step": 20264 }, { "epoch": 0.446545142044985, "grad_norm": 0.6944836378097534, "learning_rate": 1.829843976702067e-05, "loss": 0.0785, "step": 20265 }, { "epoch": 0.4465671773345012, "grad_norm": 0.7842546701431274, "learning_rate": 1.829739542654573e-05, "loss": 0.0644, "step": 20266 }, { "epoch": 0.44658921262401735, "grad_norm": 0.5930261015892029, "learning_rate": 1.82963510692754e-05, "loss": 0.0465, "step": 20267 }, { "epoch": 0.4466112479135335, "grad_norm": 0.7126737833023071, "learning_rate": 1.8295306695214974e-05, "loss": 0.1064, "step": 20268 }, { "epoch": 0.4466332832030497, "grad_norm": 0.5075037479400635, "learning_rate": 1.829426230436979e-05, "loss": 0.0769, "step": 20269 }, { "epoch": 0.44665531849256584, "grad_norm": 0.5950624942779541, "learning_rate": 1.8293217896745165e-05, "loss": 0.0659, "step": 20270 }, { "epoch": 0.446677353782082, "grad_norm": 0.945408046245575, "learning_rate": 1.8292173472346412e-05, "loss": 0.0943, "step": 20271 }, { "epoch": 0.4466993890715982, "grad_norm": 0.5536565780639648, "learning_rate": 1.829112903117885e-05, "loss": 0.0741, "step": 20272 }, { "epoch": 0.44672142436111434, "grad_norm": 0.7908421158790588, "learning_rate": 1.8290084573247808e-05, "loss": 0.0641, "step": 20273 }, { "epoch": 0.4467434596506305, "grad_norm": 0.3626081645488739, "learning_rate": 1.82890400985586e-05, "loss": 0.0822, "step": 20274 }, { "epoch": 0.44676549494014667, "grad_norm": 0.7214751839637756, "learning_rate": 1.8287995607116545e-05, "loss": 0.0809, "step": 20275 }, { "epoch": 0.44678753022966283, "grad_norm": 1.09281587600708, "learning_rate": 1.8286951098926966e-05, "loss": 0.1089, "step": 20276 }, { "epoch": 0.44680956551917894, "grad_norm": 0.5849865674972534, "learning_rate": 1.8285906573995184e-05, "loss": 0.0679, "step": 20277 }, { "epoch": 0.4468316008086951, "grad_norm": 0.6021073460578918, "learning_rate": 1.8284862032326515e-05, "loss": 0.0949, "step": 20278 }, { "epoch": 0.44685363609821127, "grad_norm": 0.6671695113182068, "learning_rate": 1.8283817473926287e-05, "loss": 0.0583, "step": 20279 }, { "epoch": 0.44687567138772744, "grad_norm": 0.3884183466434479, "learning_rate": 1.828277289879981e-05, "loss": 0.0892, "step": 20280 }, { "epoch": 0.4468977066772436, "grad_norm": 0.7002959847450256, "learning_rate": 1.8281728306952415e-05, "loss": 0.1025, "step": 20281 }, { "epoch": 0.44691974196675976, "grad_norm": 0.6338390707969666, "learning_rate": 1.828068369838941e-05, "loss": 0.0801, "step": 20282 }, { "epoch": 0.44694177725627593, "grad_norm": 0.5502347350120544, "learning_rate": 1.827963907311613e-05, "loss": 0.0854, "step": 20283 }, { "epoch": 0.4469638125457921, "grad_norm": 0.43141329288482666, "learning_rate": 1.827859443113789e-05, "loss": 0.0738, "step": 20284 }, { "epoch": 0.44698584783530826, "grad_norm": 0.665074348449707, "learning_rate": 1.8277549772460014e-05, "loss": 0.0599, "step": 20285 }, { "epoch": 0.4470078831248244, "grad_norm": 0.6047274470329285, "learning_rate": 1.8276505097087815e-05, "loss": 0.0589, "step": 20286 }, { "epoch": 0.4470299184143406, "grad_norm": 0.5917474031448364, "learning_rate": 1.8275460405026617e-05, "loss": 0.0882, "step": 20287 }, { "epoch": 0.44705195370385675, "grad_norm": 0.8489373922348022, "learning_rate": 1.8274415696281744e-05, "loss": 0.0737, "step": 20288 }, { "epoch": 0.44707398899337286, "grad_norm": 0.6561049222946167, "learning_rate": 1.8273370970858513e-05, "loss": 0.1082, "step": 20289 }, { "epoch": 0.447096024282889, "grad_norm": 0.4247170090675354, "learning_rate": 1.8272326228762253e-05, "loss": 0.0531, "step": 20290 }, { "epoch": 0.4471180595724052, "grad_norm": 0.4661140441894531, "learning_rate": 1.8271281469998278e-05, "loss": 0.0625, "step": 20291 }, { "epoch": 0.44714009486192136, "grad_norm": 0.7614215016365051, "learning_rate": 1.8270236694571914e-05, "loss": 0.0916, "step": 20292 }, { "epoch": 0.4471621301514375, "grad_norm": 0.5825321674346924, "learning_rate": 1.8269191902488482e-05, "loss": 0.0841, "step": 20293 }, { "epoch": 0.4471841654409537, "grad_norm": 0.6582595109939575, "learning_rate": 1.8268147093753304e-05, "loss": 0.09, "step": 20294 }, { "epoch": 0.44720620073046985, "grad_norm": 0.5843479037284851, "learning_rate": 1.8267102268371695e-05, "loss": 0.1026, "step": 20295 }, { "epoch": 0.447228236019986, "grad_norm": 0.6019613742828369, "learning_rate": 1.8266057426348986e-05, "loss": 0.0849, "step": 20296 }, { "epoch": 0.4472502713095022, "grad_norm": 0.5635198354721069, "learning_rate": 1.8265012567690492e-05, "loss": 0.0527, "step": 20297 }, { "epoch": 0.44727230659901834, "grad_norm": 0.830839991569519, "learning_rate": 1.826396769240154e-05, "loss": 0.0747, "step": 20298 }, { "epoch": 0.4472943418885345, "grad_norm": 0.7910706996917725, "learning_rate": 1.8262922800487448e-05, "loss": 0.0767, "step": 20299 }, { "epoch": 0.4473163771780507, "grad_norm": 0.714610755443573, "learning_rate": 1.8261877891953543e-05, "loss": 0.0652, "step": 20300 }, { "epoch": 0.4473384124675668, "grad_norm": 0.6072184443473816, "learning_rate": 1.8260832966805145e-05, "loss": 0.0768, "step": 20301 }, { "epoch": 0.44736044775708295, "grad_norm": 0.4800008237361908, "learning_rate": 1.8259788025047575e-05, "loss": 0.0408, "step": 20302 }, { "epoch": 0.4473824830465991, "grad_norm": 0.8347657322883606, "learning_rate": 1.8258743066686158e-05, "loss": 0.0609, "step": 20303 }, { "epoch": 0.4474045183361153, "grad_norm": 0.6059018969535828, "learning_rate": 1.8257698091726215e-05, "loss": 0.0887, "step": 20304 }, { "epoch": 0.44742655362563144, "grad_norm": 0.6624377965927124, "learning_rate": 1.8256653100173063e-05, "loss": 0.0681, "step": 20305 }, { "epoch": 0.4474485889151476, "grad_norm": 0.6620619893074036, "learning_rate": 1.8255608092032038e-05, "loss": 0.079, "step": 20306 }, { "epoch": 0.44747062420466377, "grad_norm": 0.8700022101402283, "learning_rate": 1.8254563067308452e-05, "loss": 0.0864, "step": 20307 }, { "epoch": 0.44749265949417993, "grad_norm": 0.41823604702949524, "learning_rate": 1.825351802600763e-05, "loss": 0.0538, "step": 20308 }, { "epoch": 0.4475146947836961, "grad_norm": 0.4054865539073944, "learning_rate": 1.82524729681349e-05, "loss": 0.0947, "step": 20309 }, { "epoch": 0.44753673007321226, "grad_norm": 0.5489331483840942, "learning_rate": 1.825142789369558e-05, "loss": 0.0917, "step": 20310 }, { "epoch": 0.44755876536272843, "grad_norm": 0.6416481733322144, "learning_rate": 1.8250382802694992e-05, "loss": 0.0648, "step": 20311 }, { "epoch": 0.4475808006522446, "grad_norm": 0.6764264702796936, "learning_rate": 1.824933769513846e-05, "loss": 0.0654, "step": 20312 }, { "epoch": 0.44760283594176076, "grad_norm": 0.7745157480239868, "learning_rate": 1.824829257103131e-05, "loss": 0.0934, "step": 20313 }, { "epoch": 0.44762487123127687, "grad_norm": 1.0614796876907349, "learning_rate": 1.8247247430378866e-05, "loss": 0.1023, "step": 20314 }, { "epoch": 0.44764690652079303, "grad_norm": 0.5296831727027893, "learning_rate": 1.8246202273186445e-05, "loss": 0.0798, "step": 20315 }, { "epoch": 0.4476689418103092, "grad_norm": 0.5653221607208252, "learning_rate": 1.824515709945938e-05, "loss": 0.0749, "step": 20316 }, { "epoch": 0.44769097709982536, "grad_norm": 0.7450246810913086, "learning_rate": 1.824411190920299e-05, "loss": 0.0619, "step": 20317 }, { "epoch": 0.4477130123893415, "grad_norm": 0.7449973225593567, "learning_rate": 1.8243066702422595e-05, "loss": 0.0901, "step": 20318 }, { "epoch": 0.4477350476788577, "grad_norm": 0.48284995555877686, "learning_rate": 1.8242021479123526e-05, "loss": 0.0682, "step": 20319 }, { "epoch": 0.44775708296837385, "grad_norm": 1.2653244733810425, "learning_rate": 1.82409762393111e-05, "loss": 0.0711, "step": 20320 }, { "epoch": 0.44777911825789, "grad_norm": 0.7534224390983582, "learning_rate": 1.8239930982990646e-05, "loss": 0.0741, "step": 20321 }, { "epoch": 0.4478011535474062, "grad_norm": 0.49619749188423157, "learning_rate": 1.823888571016748e-05, "loss": 0.0629, "step": 20322 }, { "epoch": 0.44782318883692235, "grad_norm": 0.492192804813385, "learning_rate": 1.8237840420846937e-05, "loss": 0.0852, "step": 20323 }, { "epoch": 0.4478452241264385, "grad_norm": 0.518750786781311, "learning_rate": 1.8236795115034336e-05, "loss": 0.071, "step": 20324 }, { "epoch": 0.4478672594159547, "grad_norm": 0.5606734156608582, "learning_rate": 1.8235749792735004e-05, "loss": 0.0953, "step": 20325 }, { "epoch": 0.4478892947054708, "grad_norm": 0.531151294708252, "learning_rate": 1.8234704453954265e-05, "loss": 0.0613, "step": 20326 }, { "epoch": 0.44791132999498695, "grad_norm": 0.7238869667053223, "learning_rate": 1.8233659098697437e-05, "loss": 0.0753, "step": 20327 }, { "epoch": 0.4479333652845031, "grad_norm": 0.9380331039428711, "learning_rate": 1.823261372696985e-05, "loss": 0.0797, "step": 20328 }, { "epoch": 0.4479554005740193, "grad_norm": 0.7438759803771973, "learning_rate": 1.8231568338776823e-05, "loss": 0.1086, "step": 20329 }, { "epoch": 0.44797743586353544, "grad_norm": 1.0189481973648071, "learning_rate": 1.8230522934123694e-05, "loss": 0.0713, "step": 20330 }, { "epoch": 0.4479994711530516, "grad_norm": 0.5690658092498779, "learning_rate": 1.8229477513015777e-05, "loss": 0.0696, "step": 20331 }, { "epoch": 0.4480215064425678, "grad_norm": 0.589147686958313, "learning_rate": 1.8228432075458395e-05, "loss": 0.066, "step": 20332 }, { "epoch": 0.44804354173208394, "grad_norm": 0.882097601890564, "learning_rate": 1.8227386621456885e-05, "loss": 0.09, "step": 20333 }, { "epoch": 0.4480655770216001, "grad_norm": 0.5624496936798096, "learning_rate": 1.822634115101656e-05, "loss": 0.0754, "step": 20334 }, { "epoch": 0.44808761231111627, "grad_norm": 0.4167492389678955, "learning_rate": 1.8225295664142748e-05, "loss": 0.0733, "step": 20335 }, { "epoch": 0.44810964760063243, "grad_norm": 0.4298940896987915, "learning_rate": 1.8224250160840777e-05, "loss": 0.093, "step": 20336 }, { "epoch": 0.4481316828901486, "grad_norm": 0.8212524652481079, "learning_rate": 1.822320464111597e-05, "loss": 0.084, "step": 20337 }, { "epoch": 0.4481537181796647, "grad_norm": 0.8681704998016357, "learning_rate": 1.8222159104973655e-05, "loss": 0.0685, "step": 20338 }, { "epoch": 0.44817575346918087, "grad_norm": 0.9505451321601868, "learning_rate": 1.8221113552419153e-05, "loss": 0.0744, "step": 20339 }, { "epoch": 0.44819778875869704, "grad_norm": 0.5535507798194885, "learning_rate": 1.8220067983457797e-05, "loss": 0.0739, "step": 20340 }, { "epoch": 0.4482198240482132, "grad_norm": 0.951340913772583, "learning_rate": 1.8219022398094907e-05, "loss": 0.0975, "step": 20341 }, { "epoch": 0.44824185933772936, "grad_norm": 0.708602249622345, "learning_rate": 1.8217976796335808e-05, "loss": 0.0601, "step": 20342 }, { "epoch": 0.44826389462724553, "grad_norm": 1.3550130128860474, "learning_rate": 1.8216931178185825e-05, "loss": 0.1029, "step": 20343 }, { "epoch": 0.4482859299167617, "grad_norm": 0.8135166168212891, "learning_rate": 1.821588554365029e-05, "loss": 0.079, "step": 20344 }, { "epoch": 0.44830796520627786, "grad_norm": 0.5308411121368408, "learning_rate": 1.8214839892734522e-05, "loss": 0.0991, "step": 20345 }, { "epoch": 0.448330000495794, "grad_norm": 0.8453260660171509, "learning_rate": 1.821379422544386e-05, "loss": 0.1179, "step": 20346 }, { "epoch": 0.4483520357853102, "grad_norm": 0.6496376991271973, "learning_rate": 1.821274854178361e-05, "loss": 0.1174, "step": 20347 }, { "epoch": 0.44837407107482635, "grad_norm": 0.46291592717170715, "learning_rate": 1.8211702841759113e-05, "loss": 0.0881, "step": 20348 }, { "epoch": 0.4483961063643425, "grad_norm": 0.9602694511413574, "learning_rate": 1.8210657125375693e-05, "loss": 0.1233, "step": 20349 }, { "epoch": 0.4484181416538587, "grad_norm": 0.5640436410903931, "learning_rate": 1.820961139263867e-05, "loss": 0.0827, "step": 20350 }, { "epoch": 0.4484401769433748, "grad_norm": 0.4244081676006317, "learning_rate": 1.8208565643553378e-05, "loss": 0.0798, "step": 20351 }, { "epoch": 0.44846221223289096, "grad_norm": 0.557933509349823, "learning_rate": 1.8207519878125138e-05, "loss": 0.0488, "step": 20352 }, { "epoch": 0.4484842475224071, "grad_norm": 0.8180322051048279, "learning_rate": 1.8206474096359286e-05, "loss": 0.0949, "step": 20353 }, { "epoch": 0.4485062828119233, "grad_norm": 0.42394694685935974, "learning_rate": 1.820542829826113e-05, "loss": 0.0669, "step": 20354 }, { "epoch": 0.44852831810143945, "grad_norm": 0.7751814126968384, "learning_rate": 1.820438248383602e-05, "loss": 0.0744, "step": 20355 }, { "epoch": 0.4485503533909556, "grad_norm": 0.6321399807929993, "learning_rate": 1.820333665308927e-05, "loss": 0.0921, "step": 20356 }, { "epoch": 0.4485723886804718, "grad_norm": 0.4493257999420166, "learning_rate": 1.8202290806026208e-05, "loss": 0.0883, "step": 20357 }, { "epoch": 0.44859442396998794, "grad_norm": 0.9274534583091736, "learning_rate": 1.8201244942652157e-05, "loss": 0.0784, "step": 20358 }, { "epoch": 0.4486164592595041, "grad_norm": 0.5067104697227478, "learning_rate": 1.8200199062972454e-05, "loss": 0.0844, "step": 20359 }, { "epoch": 0.4486384945490203, "grad_norm": 0.7445939779281616, "learning_rate": 1.819915316699242e-05, "loss": 0.0832, "step": 20360 }, { "epoch": 0.44866052983853644, "grad_norm": 0.7074973583221436, "learning_rate": 1.819810725471738e-05, "loss": 0.0596, "step": 20361 }, { "epoch": 0.4486825651280526, "grad_norm": 1.1135082244873047, "learning_rate": 1.8197061326152664e-05, "loss": 0.0945, "step": 20362 }, { "epoch": 0.4487046004175687, "grad_norm": 0.5924645066261292, "learning_rate": 1.8196015381303605e-05, "loss": 0.0656, "step": 20363 }, { "epoch": 0.4487266357070849, "grad_norm": 0.7011825442314148, "learning_rate": 1.8194969420175525e-05, "loss": 0.0859, "step": 20364 }, { "epoch": 0.44874867099660104, "grad_norm": 0.4668091833591461, "learning_rate": 1.8193923442773752e-05, "loss": 0.0614, "step": 20365 }, { "epoch": 0.4487707062861172, "grad_norm": 0.8356955051422119, "learning_rate": 1.8192877449103615e-05, "loss": 0.0665, "step": 20366 }, { "epoch": 0.44879274157563337, "grad_norm": 0.59224534034729, "learning_rate": 1.819183143917044e-05, "loss": 0.0653, "step": 20367 }, { "epoch": 0.44881477686514953, "grad_norm": 0.492127388715744, "learning_rate": 1.8190785412979554e-05, "loss": 0.0982, "step": 20368 }, { "epoch": 0.4488368121546657, "grad_norm": 0.6648678779602051, "learning_rate": 1.818973937053629e-05, "loss": 0.0666, "step": 20369 }, { "epoch": 0.44885884744418186, "grad_norm": 0.7297644019126892, "learning_rate": 1.818869331184597e-05, "loss": 0.0773, "step": 20370 }, { "epoch": 0.44888088273369803, "grad_norm": 0.9816092848777771, "learning_rate": 1.8187647236913927e-05, "loss": 0.0786, "step": 20371 }, { "epoch": 0.4489029180232142, "grad_norm": 0.7563652396202087, "learning_rate": 1.818660114574549e-05, "loss": 0.1091, "step": 20372 }, { "epoch": 0.44892495331273036, "grad_norm": 0.8205816149711609, "learning_rate": 1.818555503834598e-05, "loss": 0.086, "step": 20373 }, { "epoch": 0.4489469886022465, "grad_norm": 1.1004542112350464, "learning_rate": 1.818450891472073e-05, "loss": 0.1248, "step": 20374 }, { "epoch": 0.44896902389176263, "grad_norm": 1.1406846046447754, "learning_rate": 1.818346277487507e-05, "loss": 0.0823, "step": 20375 }, { "epoch": 0.4489910591812788, "grad_norm": 0.7325742244720459, "learning_rate": 1.8182416618814327e-05, "loss": 0.0922, "step": 20376 }, { "epoch": 0.44901309447079496, "grad_norm": 1.1658552885055542, "learning_rate": 1.8181370446543826e-05, "loss": 0.0652, "step": 20377 }, { "epoch": 0.4490351297603111, "grad_norm": 0.5695787668228149, "learning_rate": 1.8180324258068904e-05, "loss": 0.0789, "step": 20378 }, { "epoch": 0.4490571650498273, "grad_norm": 0.6654224395751953, "learning_rate": 1.8179278053394884e-05, "loss": 0.0768, "step": 20379 }, { "epoch": 0.44907920033934345, "grad_norm": 1.0806326866149902, "learning_rate": 1.8178231832527098e-05, "loss": 0.1069, "step": 20380 }, { "epoch": 0.4491012356288596, "grad_norm": 1.089505672454834, "learning_rate": 1.8177185595470872e-05, "loss": 0.106, "step": 20381 }, { "epoch": 0.4491232709183758, "grad_norm": 0.45426464080810547, "learning_rate": 1.8176139342231537e-05, "loss": 0.0816, "step": 20382 }, { "epoch": 0.44914530620789195, "grad_norm": 0.6107221841812134, "learning_rate": 1.8175093072814417e-05, "loss": 0.0893, "step": 20383 }, { "epoch": 0.4491673414974081, "grad_norm": 0.5181110501289368, "learning_rate": 1.8174046787224847e-05, "loss": 0.0557, "step": 20384 }, { "epoch": 0.4491893767869243, "grad_norm": 0.7589728832244873, "learning_rate": 1.8173000485468152e-05, "loss": 0.0944, "step": 20385 }, { "epoch": 0.44921141207644044, "grad_norm": 0.31985312700271606, "learning_rate": 1.817195416754967e-05, "loss": 0.0975, "step": 20386 }, { "epoch": 0.4492334473659566, "grad_norm": 0.525061309337616, "learning_rate": 1.817090783347472e-05, "loss": 0.0442, "step": 20387 }, { "epoch": 0.4492554826554727, "grad_norm": 0.6162818074226379, "learning_rate": 1.8169861483248638e-05, "loss": 0.1045, "step": 20388 }, { "epoch": 0.4492775179449889, "grad_norm": 0.6757727861404419, "learning_rate": 1.8168815116876753e-05, "loss": 0.0895, "step": 20389 }, { "epoch": 0.44929955323450504, "grad_norm": 0.4847106337547302, "learning_rate": 1.8167768734364394e-05, "loss": 0.092, "step": 20390 }, { "epoch": 0.4493215885240212, "grad_norm": 0.678238034248352, "learning_rate": 1.8166722335716884e-05, "loss": 0.107, "step": 20391 }, { "epoch": 0.4493436238135374, "grad_norm": 0.6275375485420227, "learning_rate": 1.8165675920939563e-05, "loss": 0.0719, "step": 20392 }, { "epoch": 0.44936565910305354, "grad_norm": 0.48335862159729004, "learning_rate": 1.8164629490037757e-05, "loss": 0.096, "step": 20393 }, { "epoch": 0.4493876943925697, "grad_norm": 1.121080994606018, "learning_rate": 1.8163583043016795e-05, "loss": 0.112, "step": 20394 }, { "epoch": 0.44940972968208587, "grad_norm": 0.8647212386131287, "learning_rate": 1.8162536579882008e-05, "loss": 0.0808, "step": 20395 }, { "epoch": 0.44943176497160203, "grad_norm": 0.7695612907409668, "learning_rate": 1.816149010063873e-05, "loss": 0.1124, "step": 20396 }, { "epoch": 0.4494538002611182, "grad_norm": 0.5186548233032227, "learning_rate": 1.8160443605292286e-05, "loss": 0.0701, "step": 20397 }, { "epoch": 0.44947583555063436, "grad_norm": 0.6770976185798645, "learning_rate": 1.8159397093848004e-05, "loss": 0.0777, "step": 20398 }, { "epoch": 0.4494978708401505, "grad_norm": 0.5830792188644409, "learning_rate": 1.815835056631122e-05, "loss": 0.0844, "step": 20399 }, { "epoch": 0.44951990612966664, "grad_norm": 0.5355975031852722, "learning_rate": 1.8157304022687263e-05, "loss": 0.0523, "step": 20400 }, { "epoch": 0.4495419414191828, "grad_norm": 0.6254972815513611, "learning_rate": 1.8156257462981467e-05, "loss": 0.1055, "step": 20401 }, { "epoch": 0.44956397670869896, "grad_norm": 0.6016308069229126, "learning_rate": 1.8155210887199157e-05, "loss": 0.0766, "step": 20402 }, { "epoch": 0.44958601199821513, "grad_norm": 0.8011147975921631, "learning_rate": 1.815416429534567e-05, "loss": 0.0516, "step": 20403 }, { "epoch": 0.4496080472877313, "grad_norm": 0.6735875606536865, "learning_rate": 1.8153117687426326e-05, "loss": 0.0674, "step": 20404 }, { "epoch": 0.44963008257724746, "grad_norm": 0.5685247778892517, "learning_rate": 1.8152071063446467e-05, "loss": 0.074, "step": 20405 }, { "epoch": 0.4496521178667636, "grad_norm": 0.6732449531555176, "learning_rate": 1.8151024423411416e-05, "loss": 0.1018, "step": 20406 }, { "epoch": 0.4496741531562798, "grad_norm": 0.8927270174026489, "learning_rate": 1.814997776732651e-05, "loss": 0.0958, "step": 20407 }, { "epoch": 0.44969618844579595, "grad_norm": 0.5571802258491516, "learning_rate": 1.8148931095197077e-05, "loss": 0.1126, "step": 20408 }, { "epoch": 0.4497182237353121, "grad_norm": 0.5613957643508911, "learning_rate": 1.814788440702845e-05, "loss": 0.0718, "step": 20409 }, { "epoch": 0.4497402590248283, "grad_norm": 0.7265819311141968, "learning_rate": 1.814683770282596e-05, "loss": 0.1447, "step": 20410 }, { "epoch": 0.44976229431434445, "grad_norm": 0.5092628598213196, "learning_rate": 1.814579098259494e-05, "loss": 0.1075, "step": 20411 }, { "epoch": 0.44978432960386056, "grad_norm": 0.9832219481468201, "learning_rate": 1.814474424634072e-05, "loss": 0.0836, "step": 20412 }, { "epoch": 0.4498063648933767, "grad_norm": 0.619408130645752, "learning_rate": 1.814369749406863e-05, "loss": 0.0698, "step": 20413 }, { "epoch": 0.4498284001828929, "grad_norm": 0.8018124103546143, "learning_rate": 1.8142650725784e-05, "loss": 0.1094, "step": 20414 }, { "epoch": 0.44985043547240905, "grad_norm": 0.6281996965408325, "learning_rate": 1.8141603941492172e-05, "loss": 0.0806, "step": 20415 }, { "epoch": 0.4498724707619252, "grad_norm": 0.7397772669792175, "learning_rate": 1.814055714119846e-05, "loss": 0.0503, "step": 20416 }, { "epoch": 0.4498945060514414, "grad_norm": 0.7645944952964783, "learning_rate": 1.813951032490821e-05, "loss": 0.0767, "step": 20417 }, { "epoch": 0.44991654134095754, "grad_norm": 0.6839975118637085, "learning_rate": 1.8138463492626754e-05, "loss": 0.0772, "step": 20418 }, { "epoch": 0.4499385766304737, "grad_norm": 0.4260097146034241, "learning_rate": 1.8137416644359417e-05, "loss": 0.075, "step": 20419 }, { "epoch": 0.4499606119199899, "grad_norm": 0.5985180735588074, "learning_rate": 1.813636978011154e-05, "loss": 0.0743, "step": 20420 }, { "epoch": 0.44998264720950604, "grad_norm": 0.49081212282180786, "learning_rate": 1.8135322899888443e-05, "loss": 0.0852, "step": 20421 }, { "epoch": 0.4500046824990222, "grad_norm": 0.8084460496902466, "learning_rate": 1.813427600369547e-05, "loss": 0.0836, "step": 20422 }, { "epoch": 0.45002671778853837, "grad_norm": 0.8115832805633545, "learning_rate": 1.8133229091537946e-05, "loss": 0.061, "step": 20423 }, { "epoch": 0.45004875307805453, "grad_norm": 0.6028499603271484, "learning_rate": 1.8132182163421207e-05, "loss": 0.0465, "step": 20424 }, { "epoch": 0.45007078836757064, "grad_norm": 0.6155337691307068, "learning_rate": 1.8131135219350582e-05, "loss": 0.0818, "step": 20425 }, { "epoch": 0.4500928236570868, "grad_norm": 1.012212872505188, "learning_rate": 1.813008825933141e-05, "loss": 0.0773, "step": 20426 }, { "epoch": 0.45011485894660297, "grad_norm": 0.712575376033783, "learning_rate": 1.812904128336902e-05, "loss": 0.0716, "step": 20427 }, { "epoch": 0.45013689423611913, "grad_norm": 0.6803061366081238, "learning_rate": 1.8127994291468743e-05, "loss": 0.0405, "step": 20428 }, { "epoch": 0.4501589295256353, "grad_norm": 0.5757352113723755, "learning_rate": 1.812694728363591e-05, "loss": 0.1196, "step": 20429 }, { "epoch": 0.45018096481515146, "grad_norm": 0.37406671047210693, "learning_rate": 1.8125900259875867e-05, "loss": 0.0914, "step": 20430 }, { "epoch": 0.4502030001046676, "grad_norm": 0.779267430305481, "learning_rate": 1.812485322019393e-05, "loss": 0.0884, "step": 20431 }, { "epoch": 0.4502250353941838, "grad_norm": 0.7824183106422424, "learning_rate": 1.8123806164595443e-05, "loss": 0.0647, "step": 20432 }, { "epoch": 0.45024707068369996, "grad_norm": 0.9969483017921448, "learning_rate": 1.8122759093085733e-05, "loss": 0.0786, "step": 20433 }, { "epoch": 0.4502691059732161, "grad_norm": 0.5995392203330994, "learning_rate": 1.8121712005670143e-05, "loss": 0.0986, "step": 20434 }, { "epoch": 0.4502911412627323, "grad_norm": 0.803352415561676, "learning_rate": 1.8120664902353993e-05, "loss": 0.1162, "step": 20435 }, { "epoch": 0.45031317655224845, "grad_norm": 1.074015736579895, "learning_rate": 1.8119617783142626e-05, "loss": 0.1083, "step": 20436 }, { "epoch": 0.45033521184176456, "grad_norm": 0.648407518863678, "learning_rate": 1.8118570648041375e-05, "loss": 0.069, "step": 20437 }, { "epoch": 0.4503572471312807, "grad_norm": 0.7086424231529236, "learning_rate": 1.8117523497055566e-05, "loss": 0.0902, "step": 20438 }, { "epoch": 0.4503792824207969, "grad_norm": 0.7232579588890076, "learning_rate": 1.8116476330190542e-05, "loss": 0.0939, "step": 20439 }, { "epoch": 0.45040131771031305, "grad_norm": 0.6900907754898071, "learning_rate": 1.8115429147451633e-05, "loss": 0.0763, "step": 20440 }, { "epoch": 0.4504233529998292, "grad_norm": 0.9147356152534485, "learning_rate": 1.8114381948844168e-05, "loss": 0.0963, "step": 20441 }, { "epoch": 0.4504453882893454, "grad_norm": 0.4943678081035614, "learning_rate": 1.811333473437349e-05, "loss": 0.0758, "step": 20442 }, { "epoch": 0.45046742357886155, "grad_norm": 0.4280124306678772, "learning_rate": 1.811228750404493e-05, "loss": 0.0628, "step": 20443 }, { "epoch": 0.4504894588683777, "grad_norm": 0.604546070098877, "learning_rate": 1.811124025786382e-05, "loss": 0.0769, "step": 20444 }, { "epoch": 0.4505114941578939, "grad_norm": 0.8298752307891846, "learning_rate": 1.8110192995835494e-05, "loss": 0.1216, "step": 20445 }, { "epoch": 0.45053352944741004, "grad_norm": 0.6528780460357666, "learning_rate": 1.8109145717965284e-05, "loss": 0.07, "step": 20446 }, { "epoch": 0.4505555647369262, "grad_norm": 0.28696897625923157, "learning_rate": 1.8108098424258534e-05, "loss": 0.0809, "step": 20447 }, { "epoch": 0.45057760002644237, "grad_norm": 0.8756070137023926, "learning_rate": 1.8107051114720568e-05, "loss": 0.0905, "step": 20448 }, { "epoch": 0.4505996353159585, "grad_norm": 0.5682886242866516, "learning_rate": 1.8106003789356726e-05, "loss": 0.0829, "step": 20449 }, { "epoch": 0.45062167060547464, "grad_norm": 0.8482112288475037, "learning_rate": 1.810495644817234e-05, "loss": 0.1057, "step": 20450 }, { "epoch": 0.4506437058949908, "grad_norm": 0.5919806361198425, "learning_rate": 1.8103909091172745e-05, "loss": 0.089, "step": 20451 }, { "epoch": 0.450665741184507, "grad_norm": 1.0068697929382324, "learning_rate": 1.8102861718363278e-05, "loss": 0.1241, "step": 20452 }, { "epoch": 0.45068777647402314, "grad_norm": 0.5605911612510681, "learning_rate": 1.8101814329749274e-05, "loss": 0.0615, "step": 20453 }, { "epoch": 0.4507098117635393, "grad_norm": 1.2082042694091797, "learning_rate": 1.8100766925336063e-05, "loss": 0.064, "step": 20454 }, { "epoch": 0.45073184705305547, "grad_norm": 0.3379831910133362, "learning_rate": 1.8099719505128987e-05, "loss": 0.0988, "step": 20455 }, { "epoch": 0.45075388234257163, "grad_norm": 0.6348570585250854, "learning_rate": 1.8098672069133372e-05, "loss": 0.0623, "step": 20456 }, { "epoch": 0.4507759176320878, "grad_norm": 0.7795338034629822, "learning_rate": 1.809762461735456e-05, "loss": 0.0925, "step": 20457 }, { "epoch": 0.45079795292160396, "grad_norm": 0.8562353253364563, "learning_rate": 1.8096577149797884e-05, "loss": 0.0942, "step": 20458 }, { "epoch": 0.4508199882111201, "grad_norm": 0.7627873420715332, "learning_rate": 1.8095529666468687e-05, "loss": 0.0963, "step": 20459 }, { "epoch": 0.4508420235006363, "grad_norm": 0.7577337622642517, "learning_rate": 1.8094482167372296e-05, "loss": 0.1019, "step": 20460 }, { "epoch": 0.45086405879015246, "grad_norm": 0.5624366998672485, "learning_rate": 1.809343465251404e-05, "loss": 0.0741, "step": 20461 }, { "epoch": 0.45088609407966856, "grad_norm": 0.6982957720756531, "learning_rate": 1.809238712189927e-05, "loss": 0.0744, "step": 20462 }, { "epoch": 0.45090812936918473, "grad_norm": 0.35695135593414307, "learning_rate": 1.8091339575533308e-05, "loss": 0.0563, "step": 20463 }, { "epoch": 0.4509301646587009, "grad_norm": 0.6608408093452454, "learning_rate": 1.80902920134215e-05, "loss": 0.0701, "step": 20464 }, { "epoch": 0.45095219994821706, "grad_norm": 0.526265561580658, "learning_rate": 1.8089244435569176e-05, "loss": 0.0751, "step": 20465 }, { "epoch": 0.4509742352377332, "grad_norm": 0.7527025938034058, "learning_rate": 1.8088196841981676e-05, "loss": 0.0776, "step": 20466 }, { "epoch": 0.4509962705272494, "grad_norm": 0.5286436080932617, "learning_rate": 1.8087149232664334e-05, "loss": 0.0814, "step": 20467 }, { "epoch": 0.45101830581676555, "grad_norm": 0.40135490894317627, "learning_rate": 1.8086101607622482e-05, "loss": 0.0865, "step": 20468 }, { "epoch": 0.4510403411062817, "grad_norm": 0.6752382516860962, "learning_rate": 1.808505396686146e-05, "loss": 0.1082, "step": 20469 }, { "epoch": 0.4510623763957979, "grad_norm": 0.8468894362449646, "learning_rate": 1.8084006310386606e-05, "loss": 0.1009, "step": 20470 }, { "epoch": 0.45108441168531405, "grad_norm": 0.6855751872062683, "learning_rate": 1.808295863820325e-05, "loss": 0.0717, "step": 20471 }, { "epoch": 0.4511064469748302, "grad_norm": 0.48267000913619995, "learning_rate": 1.8081910950316738e-05, "loss": 0.079, "step": 20472 }, { "epoch": 0.4511284822643464, "grad_norm": 0.5222563147544861, "learning_rate": 1.8080863246732394e-05, "loss": 0.0706, "step": 20473 }, { "epoch": 0.4511505175538625, "grad_norm": 0.6952348947525024, "learning_rate": 1.807981552745557e-05, "loss": 0.0545, "step": 20474 }, { "epoch": 0.45117255284337865, "grad_norm": 0.5504271984100342, "learning_rate": 1.8078767792491588e-05, "loss": 0.0621, "step": 20475 }, { "epoch": 0.4511945881328948, "grad_norm": 0.9181898236274719, "learning_rate": 1.8077720041845795e-05, "loss": 0.0746, "step": 20476 }, { "epoch": 0.451216623422411, "grad_norm": 0.7656020522117615, "learning_rate": 1.8076672275523517e-05, "loss": 0.067, "step": 20477 }, { "epoch": 0.45123865871192714, "grad_norm": 0.6419122815132141, "learning_rate": 1.8075624493530102e-05, "loss": 0.0748, "step": 20478 }, { "epoch": 0.4512606940014433, "grad_norm": 1.1986433267593384, "learning_rate": 1.807457669587088e-05, "loss": 0.0575, "step": 20479 }, { "epoch": 0.4512827292909595, "grad_norm": 0.5499619841575623, "learning_rate": 1.8073528882551193e-05, "loss": 0.077, "step": 20480 }, { "epoch": 0.45130476458047564, "grad_norm": 0.5865623950958252, "learning_rate": 1.807248105357637e-05, "loss": 0.0915, "step": 20481 }, { "epoch": 0.4513267998699918, "grad_norm": 0.64503413438797, "learning_rate": 1.807143320895176e-05, "loss": 0.0652, "step": 20482 }, { "epoch": 0.45134883515950797, "grad_norm": 0.7526378631591797, "learning_rate": 1.8070385348682693e-05, "loss": 0.0908, "step": 20483 }, { "epoch": 0.45137087044902413, "grad_norm": 0.9118422865867615, "learning_rate": 1.80693374727745e-05, "loss": 0.077, "step": 20484 }, { "epoch": 0.4513929057385403, "grad_norm": 0.5001431107521057, "learning_rate": 1.8068289581232532e-05, "loss": 0.0756, "step": 20485 }, { "epoch": 0.45141494102805646, "grad_norm": 0.6201049089431763, "learning_rate": 1.8067241674062115e-05, "loss": 0.0618, "step": 20486 }, { "epoch": 0.45143697631757257, "grad_norm": 0.6324297189712524, "learning_rate": 1.8066193751268596e-05, "loss": 0.0783, "step": 20487 }, { "epoch": 0.45145901160708873, "grad_norm": 0.5716443061828613, "learning_rate": 1.8065145812857303e-05, "loss": 0.0862, "step": 20488 }, { "epoch": 0.4514810468966049, "grad_norm": 0.911817729473114, "learning_rate": 1.8064097858833584e-05, "loss": 0.0733, "step": 20489 }, { "epoch": 0.45150308218612106, "grad_norm": 0.8106176853179932, "learning_rate": 1.8063049889202764e-05, "loss": 0.1109, "step": 20490 }, { "epoch": 0.4515251174756372, "grad_norm": 0.46887633204460144, "learning_rate": 1.8062001903970197e-05, "loss": 0.0654, "step": 20491 }, { "epoch": 0.4515471527651534, "grad_norm": 0.7334064841270447, "learning_rate": 1.806095390314121e-05, "loss": 0.0808, "step": 20492 }, { "epoch": 0.45156918805466956, "grad_norm": 0.5199616551399231, "learning_rate": 1.8059905886721147e-05, "loss": 0.0634, "step": 20493 }, { "epoch": 0.4515912233441857, "grad_norm": 0.6026516556739807, "learning_rate": 1.8058857854715333e-05, "loss": 0.0789, "step": 20494 }, { "epoch": 0.4516132586337019, "grad_norm": 0.6797528266906738, "learning_rate": 1.805780980712912e-05, "loss": 0.1113, "step": 20495 }, { "epoch": 0.45163529392321805, "grad_norm": 0.5020534992218018, "learning_rate": 1.8056761743967845e-05, "loss": 0.0792, "step": 20496 }, { "epoch": 0.4516573292127342, "grad_norm": 0.5935609936714172, "learning_rate": 1.805571366523684e-05, "loss": 0.0954, "step": 20497 }, { "epoch": 0.4516793645022504, "grad_norm": 0.6420249938964844, "learning_rate": 1.8054665570941443e-05, "loss": 0.0835, "step": 20498 }, { "epoch": 0.4517013997917665, "grad_norm": 1.269564151763916, "learning_rate": 1.8053617461087005e-05, "loss": 0.0723, "step": 20499 }, { "epoch": 0.45172343508128265, "grad_norm": 0.48627617955207825, "learning_rate": 1.805256933567885e-05, "loss": 0.0866, "step": 20500 }, { "epoch": 0.4517454703707988, "grad_norm": 0.5014544129371643, "learning_rate": 1.8051521194722326e-05, "loss": 0.0693, "step": 20501 }, { "epoch": 0.451767505660315, "grad_norm": 0.6617690324783325, "learning_rate": 1.8050473038222764e-05, "loss": 0.0892, "step": 20502 }, { "epoch": 0.45178954094983115, "grad_norm": 0.5290900468826294, "learning_rate": 1.804942486618551e-05, "loss": 0.0586, "step": 20503 }, { "epoch": 0.4518115762393473, "grad_norm": 0.5841773748397827, "learning_rate": 1.8048376678615898e-05, "loss": 0.076, "step": 20504 }, { "epoch": 0.4518336115288635, "grad_norm": 0.6741701364517212, "learning_rate": 1.8047328475519264e-05, "loss": 0.0931, "step": 20505 }, { "epoch": 0.45185564681837964, "grad_norm": 1.007045865058899, "learning_rate": 1.8046280256900962e-05, "loss": 0.0833, "step": 20506 }, { "epoch": 0.4518776821078958, "grad_norm": 0.6438674330711365, "learning_rate": 1.8045232022766317e-05, "loss": 0.0968, "step": 20507 }, { "epoch": 0.45189971739741197, "grad_norm": 0.5617098808288574, "learning_rate": 1.8044183773120672e-05, "loss": 0.0636, "step": 20508 }, { "epoch": 0.45192175268692814, "grad_norm": 0.8983497619628906, "learning_rate": 1.8043135507969368e-05, "loss": 0.0821, "step": 20509 }, { "epoch": 0.4519437879764443, "grad_norm": 1.0460491180419922, "learning_rate": 1.804208722731774e-05, "loss": 0.071, "step": 20510 }, { "epoch": 0.4519658232659604, "grad_norm": 0.48586854338645935, "learning_rate": 1.804103893117113e-05, "loss": 0.0518, "step": 20511 }, { "epoch": 0.4519878585554766, "grad_norm": 1.0494343042373657, "learning_rate": 1.8039990619534883e-05, "loss": 0.1168, "step": 20512 }, { "epoch": 0.45200989384499274, "grad_norm": 0.5668768882751465, "learning_rate": 1.803894229241433e-05, "loss": 0.0675, "step": 20513 }, { "epoch": 0.4520319291345089, "grad_norm": 0.48768508434295654, "learning_rate": 1.8037893949814815e-05, "loss": 0.0664, "step": 20514 }, { "epoch": 0.45205396442402507, "grad_norm": 0.6591085195541382, "learning_rate": 1.803684559174168e-05, "loss": 0.0571, "step": 20515 }, { "epoch": 0.45207599971354123, "grad_norm": 0.6603829264640808, "learning_rate": 1.803579721820026e-05, "loss": 0.0714, "step": 20516 }, { "epoch": 0.4520980350030574, "grad_norm": 0.8447957038879395, "learning_rate": 1.8034748829195894e-05, "loss": 0.1148, "step": 20517 }, { "epoch": 0.45212007029257356, "grad_norm": 0.7122458815574646, "learning_rate": 1.8033700424733927e-05, "loss": 0.1126, "step": 20518 }, { "epoch": 0.4521421055820897, "grad_norm": 0.4424610137939453, "learning_rate": 1.8032652004819695e-05, "loss": 0.0542, "step": 20519 }, { "epoch": 0.4521641408716059, "grad_norm": 0.5530814528465271, "learning_rate": 1.8031603569458546e-05, "loss": 0.09, "step": 20520 }, { "epoch": 0.45218617616112206, "grad_norm": 0.828469455242157, "learning_rate": 1.8030555118655807e-05, "loss": 0.0677, "step": 20521 }, { "epoch": 0.4522082114506382, "grad_norm": 0.6825725436210632, "learning_rate": 1.802950665241683e-05, "loss": 0.0972, "step": 20522 }, { "epoch": 0.4522302467401544, "grad_norm": 1.0211360454559326, "learning_rate": 1.802845817074695e-05, "loss": 0.0828, "step": 20523 }, { "epoch": 0.4522522820296705, "grad_norm": 0.6991977095603943, "learning_rate": 1.802740967365151e-05, "loss": 0.0761, "step": 20524 }, { "epoch": 0.45227431731918666, "grad_norm": 1.0726372003555298, "learning_rate": 1.8026361161135845e-05, "loss": 0.087, "step": 20525 }, { "epoch": 0.4522963526087028, "grad_norm": 0.5114942193031311, "learning_rate": 1.8025312633205305e-05, "loss": 0.0604, "step": 20526 }, { "epoch": 0.452318387898219, "grad_norm": 1.1058591604232788, "learning_rate": 1.802426408986522e-05, "loss": 0.0814, "step": 20527 }, { "epoch": 0.45234042318773515, "grad_norm": 0.7733250260353088, "learning_rate": 1.802321553112094e-05, "loss": 0.0715, "step": 20528 }, { "epoch": 0.4523624584772513, "grad_norm": 0.7799130082130432, "learning_rate": 1.80221669569778e-05, "loss": 0.105, "step": 20529 }, { "epoch": 0.4523844937667675, "grad_norm": 0.7352358102798462, "learning_rate": 1.8021118367441146e-05, "loss": 0.0871, "step": 20530 }, { "epoch": 0.45240652905628365, "grad_norm": 0.6808153390884399, "learning_rate": 1.8020069762516314e-05, "loss": 0.1089, "step": 20531 }, { "epoch": 0.4524285643457998, "grad_norm": 0.9661800861358643, "learning_rate": 1.8019021142208645e-05, "loss": 0.1115, "step": 20532 }, { "epoch": 0.452450599635316, "grad_norm": 0.44846683740615845, "learning_rate": 1.801797250652349e-05, "loss": 0.0983, "step": 20533 }, { "epoch": 0.45247263492483214, "grad_norm": 0.6364306211471558, "learning_rate": 1.8016923855466173e-05, "loss": 0.0841, "step": 20534 }, { "epoch": 0.4524946702143483, "grad_norm": 0.5508006811141968, "learning_rate": 1.8015875189042053e-05, "loss": 0.0761, "step": 20535 }, { "epoch": 0.4525167055038644, "grad_norm": 0.5421656370162964, "learning_rate": 1.8014826507256458e-05, "loss": 0.0956, "step": 20536 }, { "epoch": 0.4525387407933806, "grad_norm": 0.813061535358429, "learning_rate": 1.801377781011474e-05, "loss": 0.0981, "step": 20537 }, { "epoch": 0.45256077608289674, "grad_norm": 0.7148510813713074, "learning_rate": 1.8012729097622228e-05, "loss": 0.1034, "step": 20538 }, { "epoch": 0.4525828113724129, "grad_norm": 0.5885075926780701, "learning_rate": 1.801168036978428e-05, "loss": 0.0835, "step": 20539 }, { "epoch": 0.45260484666192907, "grad_norm": 0.8147019147872925, "learning_rate": 1.801063162660622e-05, "loss": 0.0803, "step": 20540 }, { "epoch": 0.45262688195144524, "grad_norm": 0.5364090204238892, "learning_rate": 1.8009582868093407e-05, "loss": 0.0699, "step": 20541 }, { "epoch": 0.4526489172409614, "grad_norm": 0.8104365468025208, "learning_rate": 1.8008534094251167e-05, "loss": 0.078, "step": 20542 }, { "epoch": 0.45267095253047757, "grad_norm": 0.8285868167877197, "learning_rate": 1.8007485305084854e-05, "loss": 0.0724, "step": 20543 }, { "epoch": 0.45269298781999373, "grad_norm": 0.6123335957527161, "learning_rate": 1.8006436500599805e-05, "loss": 0.0679, "step": 20544 }, { "epoch": 0.4527150231095099, "grad_norm": 0.7261815667152405, "learning_rate": 1.8005387680801362e-05, "loss": 0.0804, "step": 20545 }, { "epoch": 0.45273705839902606, "grad_norm": 0.7565650343894958, "learning_rate": 1.800433884569487e-05, "loss": 0.0965, "step": 20546 }, { "epoch": 0.4527590936885422, "grad_norm": 0.6133122444152832, "learning_rate": 1.800328999528567e-05, "loss": 0.0463, "step": 20547 }, { "epoch": 0.45278112897805833, "grad_norm": 0.5513667464256287, "learning_rate": 1.80022411295791e-05, "loss": 0.0703, "step": 20548 }, { "epoch": 0.4528031642675745, "grad_norm": 0.41964298486709595, "learning_rate": 1.800119224858051e-05, "loss": 0.0755, "step": 20549 }, { "epoch": 0.45282519955709066, "grad_norm": 0.5384341478347778, "learning_rate": 1.800014335229523e-05, "loss": 0.0776, "step": 20550 }, { "epoch": 0.4528472348466068, "grad_norm": 0.615334689617157, "learning_rate": 1.799909444072862e-05, "loss": 0.0766, "step": 20551 }, { "epoch": 0.452869270136123, "grad_norm": 0.9356498122215271, "learning_rate": 1.799804551388601e-05, "loss": 0.1099, "step": 20552 }, { "epoch": 0.45289130542563916, "grad_norm": 0.913526713848114, "learning_rate": 1.7996996571772745e-05, "loss": 0.104, "step": 20553 }, { "epoch": 0.4529133407151553, "grad_norm": 0.3287845551967621, "learning_rate": 1.799594761439417e-05, "loss": 0.0678, "step": 20554 }, { "epoch": 0.4529353760046715, "grad_norm": 0.5797958970069885, "learning_rate": 1.799489864175563e-05, "loss": 0.0828, "step": 20555 }, { "epoch": 0.45295741129418765, "grad_norm": 0.7880843281745911, "learning_rate": 1.7993849653862467e-05, "loss": 0.0786, "step": 20556 }, { "epoch": 0.4529794465837038, "grad_norm": 0.8169234991073608, "learning_rate": 1.7992800650720016e-05, "loss": 0.0488, "step": 20557 }, { "epoch": 0.45300148187322, "grad_norm": 0.6162373423576355, "learning_rate": 1.7991751632333635e-05, "loss": 0.0829, "step": 20558 }, { "epoch": 0.45302351716273614, "grad_norm": 0.94593745470047, "learning_rate": 1.799070259870865e-05, "loss": 0.1162, "step": 20559 }, { "epoch": 0.4530455524522523, "grad_norm": 0.6978381276130676, "learning_rate": 1.7989653549850414e-05, "loss": 0.0562, "step": 20560 }, { "epoch": 0.4530675877417684, "grad_norm": 0.9918615221977234, "learning_rate": 1.798860448576427e-05, "loss": 0.0757, "step": 20561 }, { "epoch": 0.4530896230312846, "grad_norm": 0.7033194899559021, "learning_rate": 1.7987555406455564e-05, "loss": 0.0993, "step": 20562 }, { "epoch": 0.45311165832080075, "grad_norm": 0.4819530248641968, "learning_rate": 1.798650631192964e-05, "loss": 0.0573, "step": 20563 }, { "epoch": 0.4531336936103169, "grad_norm": 0.7446261048316956, "learning_rate": 1.798545720219183e-05, "loss": 0.0548, "step": 20564 }, { "epoch": 0.4531557288998331, "grad_norm": 0.5013412237167358, "learning_rate": 1.798440807724749e-05, "loss": 0.0761, "step": 20565 }, { "epoch": 0.45317776418934924, "grad_norm": 0.6558533310890198, "learning_rate": 1.7983358937101955e-05, "loss": 0.085, "step": 20566 }, { "epoch": 0.4531997994788654, "grad_norm": 0.6164870858192444, "learning_rate": 1.7982309781760575e-05, "loss": 0.0835, "step": 20567 }, { "epoch": 0.45322183476838157, "grad_norm": 0.7550675272941589, "learning_rate": 1.7981260611228693e-05, "loss": 0.0554, "step": 20568 }, { "epoch": 0.45324387005789774, "grad_norm": 0.4533933401107788, "learning_rate": 1.798021142551165e-05, "loss": 0.0992, "step": 20569 }, { "epoch": 0.4532659053474139, "grad_norm": 0.649441659450531, "learning_rate": 1.7979162224614793e-05, "loss": 0.094, "step": 20570 }, { "epoch": 0.45328794063693006, "grad_norm": 0.7114881277084351, "learning_rate": 1.7978113008543468e-05, "loss": 0.064, "step": 20571 }, { "epoch": 0.45330997592644623, "grad_norm": 0.9194995164871216, "learning_rate": 1.7977063777303013e-05, "loss": 0.0884, "step": 20572 }, { "epoch": 0.45333201121596234, "grad_norm": 0.8045961856842041, "learning_rate": 1.797601453089878e-05, "loss": 0.0638, "step": 20573 }, { "epoch": 0.4533540465054785, "grad_norm": 0.7100371718406677, "learning_rate": 1.7974965269336102e-05, "loss": 0.1006, "step": 20574 }, { "epoch": 0.45337608179499467, "grad_norm": 0.4388979971408844, "learning_rate": 1.7973915992620335e-05, "loss": 0.1272, "step": 20575 }, { "epoch": 0.45339811708451083, "grad_norm": 0.5842623710632324, "learning_rate": 1.797286670075682e-05, "loss": 0.1038, "step": 20576 }, { "epoch": 0.453420152374027, "grad_norm": 0.5698167085647583, "learning_rate": 1.79718173937509e-05, "loss": 0.0772, "step": 20577 }, { "epoch": 0.45344218766354316, "grad_norm": 0.441170334815979, "learning_rate": 1.797076807160792e-05, "loss": 0.0631, "step": 20578 }, { "epoch": 0.4534642229530593, "grad_norm": 0.5332009792327881, "learning_rate": 1.7969718734333226e-05, "loss": 0.0864, "step": 20579 }, { "epoch": 0.4534862582425755, "grad_norm": 0.6036114692687988, "learning_rate": 1.796866938193216e-05, "loss": 0.0566, "step": 20580 }, { "epoch": 0.45350829353209166, "grad_norm": 0.5778084397315979, "learning_rate": 1.7967620014410072e-05, "loss": 0.0698, "step": 20581 }, { "epoch": 0.4535303288216078, "grad_norm": 0.792736291885376, "learning_rate": 1.79665706317723e-05, "loss": 0.0881, "step": 20582 }, { "epoch": 0.453552364111124, "grad_norm": 0.5686808824539185, "learning_rate": 1.7965521234024195e-05, "loss": 0.0755, "step": 20583 }, { "epoch": 0.45357439940064015, "grad_norm": 0.7439868450164795, "learning_rate": 1.79644718211711e-05, "loss": 0.1145, "step": 20584 }, { "epoch": 0.45359643469015626, "grad_norm": 0.7993792295455933, "learning_rate": 1.796342239321836e-05, "loss": 0.0651, "step": 20585 }, { "epoch": 0.4536184699796724, "grad_norm": 0.6444565653800964, "learning_rate": 1.7962372950171324e-05, "loss": 0.0627, "step": 20586 }, { "epoch": 0.4536405052691886, "grad_norm": 0.8730538487434387, "learning_rate": 1.796132349203533e-05, "loss": 0.0789, "step": 20587 }, { "epoch": 0.45366254055870475, "grad_norm": 0.5417951345443726, "learning_rate": 1.796027401881573e-05, "loss": 0.0716, "step": 20588 }, { "epoch": 0.4536845758482209, "grad_norm": 0.42178845405578613, "learning_rate": 1.7959224530517865e-05, "loss": 0.0764, "step": 20589 }, { "epoch": 0.4537066111377371, "grad_norm": 1.1266720294952393, "learning_rate": 1.795817502714708e-05, "loss": 0.0956, "step": 20590 }, { "epoch": 0.45372864642725325, "grad_norm": 1.1506823301315308, "learning_rate": 1.7957125508708726e-05, "loss": 0.0927, "step": 20591 }, { "epoch": 0.4537506817167694, "grad_norm": 0.5910564661026001, "learning_rate": 1.7956075975208145e-05, "loss": 0.0852, "step": 20592 }, { "epoch": 0.4537727170062856, "grad_norm": 0.9494805335998535, "learning_rate": 1.795502642665069e-05, "loss": 0.0735, "step": 20593 }, { "epoch": 0.45379475229580174, "grad_norm": 0.6616581678390503, "learning_rate": 1.7953976863041693e-05, "loss": 0.0774, "step": 20594 }, { "epoch": 0.4538167875853179, "grad_norm": 0.5987884402275085, "learning_rate": 1.795292728438651e-05, "loss": 0.0754, "step": 20595 }, { "epoch": 0.45383882287483407, "grad_norm": 0.5613626837730408, "learning_rate": 1.795187769069049e-05, "loss": 0.1129, "step": 20596 }, { "epoch": 0.45386085816435023, "grad_norm": 0.5634943246841431, "learning_rate": 1.7950828081958966e-05, "loss": 0.0822, "step": 20597 }, { "epoch": 0.45388289345386634, "grad_norm": 0.9400965571403503, "learning_rate": 1.7949778458197298e-05, "loss": 0.0941, "step": 20598 }, { "epoch": 0.4539049287433825, "grad_norm": 0.8563012480735779, "learning_rate": 1.794872881941082e-05, "loss": 0.0887, "step": 20599 }, { "epoch": 0.45392696403289867, "grad_norm": 0.9702771902084351, "learning_rate": 1.7947679165604892e-05, "loss": 0.1364, "step": 20600 }, { "epoch": 0.45394899932241484, "grad_norm": 0.3683619201183319, "learning_rate": 1.794662949678485e-05, "loss": 0.0455, "step": 20601 }, { "epoch": 0.453971034611931, "grad_norm": 0.49558785557746887, "learning_rate": 1.794557981295604e-05, "loss": 0.0532, "step": 20602 }, { "epoch": 0.45399306990144717, "grad_norm": 0.9945743083953857, "learning_rate": 1.7944530114123825e-05, "loss": 0.1047, "step": 20603 }, { "epoch": 0.45401510519096333, "grad_norm": 0.5811468958854675, "learning_rate": 1.794348040029353e-05, "loss": 0.0608, "step": 20604 }, { "epoch": 0.4540371404804795, "grad_norm": 0.5935214161872864, "learning_rate": 1.7942430671470514e-05, "loss": 0.0662, "step": 20605 }, { "epoch": 0.45405917576999566, "grad_norm": 1.0081039667129517, "learning_rate": 1.794138092766012e-05, "loss": 0.0875, "step": 20606 }, { "epoch": 0.4540812110595118, "grad_norm": 0.5001773238182068, "learning_rate": 1.794033116886769e-05, "loss": 0.092, "step": 20607 }, { "epoch": 0.454103246349028, "grad_norm": 0.7084507346153259, "learning_rate": 1.7939281395098583e-05, "loss": 0.0838, "step": 20608 }, { "epoch": 0.45412528163854415, "grad_norm": 0.7931987047195435, "learning_rate": 1.7938231606358136e-05, "loss": 0.0544, "step": 20609 }, { "epoch": 0.45414731692806026, "grad_norm": 0.9096724987030029, "learning_rate": 1.7937181802651704e-05, "loss": 0.0757, "step": 20610 }, { "epoch": 0.4541693522175764, "grad_norm": 0.8414073586463928, "learning_rate": 1.7936131983984626e-05, "loss": 0.0907, "step": 20611 }, { "epoch": 0.4541913875070926, "grad_norm": 0.5507727265357971, "learning_rate": 1.7935082150362258e-05, "loss": 0.1082, "step": 20612 }, { "epoch": 0.45421342279660876, "grad_norm": 0.7566161751747131, "learning_rate": 1.7934032301789937e-05, "loss": 0.0881, "step": 20613 }, { "epoch": 0.4542354580861249, "grad_norm": 0.6119067072868347, "learning_rate": 1.7932982438273017e-05, "loss": 0.0879, "step": 20614 }, { "epoch": 0.4542574933756411, "grad_norm": 0.7984426617622375, "learning_rate": 1.7931932559816846e-05, "loss": 0.0865, "step": 20615 }, { "epoch": 0.45427952866515725, "grad_norm": 0.7034547924995422, "learning_rate": 1.7930882666426772e-05, "loss": 0.0621, "step": 20616 }, { "epoch": 0.4543015639546734, "grad_norm": 0.588149905204773, "learning_rate": 1.792983275810814e-05, "loss": 0.0919, "step": 20617 }, { "epoch": 0.4543235992441896, "grad_norm": 0.9044322371482849, "learning_rate": 1.79287828348663e-05, "loss": 0.0764, "step": 20618 }, { "epoch": 0.45434563453370574, "grad_norm": 0.4195568263530731, "learning_rate": 1.7927732896706593e-05, "loss": 0.0742, "step": 20619 }, { "epoch": 0.4543676698232219, "grad_norm": 0.6019039750099182, "learning_rate": 1.7926682943634375e-05, "loss": 0.0484, "step": 20620 }, { "epoch": 0.4543897051127381, "grad_norm": 0.6600867509841919, "learning_rate": 1.7925632975654996e-05, "loss": 0.05, "step": 20621 }, { "epoch": 0.4544117404022542, "grad_norm": 0.3544066250324249, "learning_rate": 1.7924582992773794e-05, "loss": 0.0743, "step": 20622 }, { "epoch": 0.45443377569177035, "grad_norm": 0.41017386317253113, "learning_rate": 1.7923532994996128e-05, "loss": 0.0747, "step": 20623 }, { "epoch": 0.4544558109812865, "grad_norm": 0.9110977053642273, "learning_rate": 1.7922482982327335e-05, "loss": 0.1134, "step": 20624 }, { "epoch": 0.4544778462708027, "grad_norm": 0.8960710167884827, "learning_rate": 1.7921432954772772e-05, "loss": 0.098, "step": 20625 }, { "epoch": 0.45449988156031884, "grad_norm": 0.5881921052932739, "learning_rate": 1.7920382912337784e-05, "loss": 0.0568, "step": 20626 }, { "epoch": 0.454521916849835, "grad_norm": 0.6042410135269165, "learning_rate": 1.791933285502772e-05, "loss": 0.092, "step": 20627 }, { "epoch": 0.45454395213935117, "grad_norm": 0.9399417638778687, "learning_rate": 1.7918282782847926e-05, "loss": 0.1088, "step": 20628 }, { "epoch": 0.45456598742886734, "grad_norm": 0.4695952534675598, "learning_rate": 1.7917232695803755e-05, "loss": 0.0751, "step": 20629 }, { "epoch": 0.4545880227183835, "grad_norm": 0.5144773125648499, "learning_rate": 1.7916182593900553e-05, "loss": 0.0721, "step": 20630 }, { "epoch": 0.45461005800789966, "grad_norm": 0.6107617020606995, "learning_rate": 1.7915132477143666e-05, "loss": 0.0948, "step": 20631 }, { "epoch": 0.45463209329741583, "grad_norm": 0.6326300501823425, "learning_rate": 1.791408234553845e-05, "loss": 0.0954, "step": 20632 }, { "epoch": 0.454654128586932, "grad_norm": 0.7304591536521912, "learning_rate": 1.791303219909025e-05, "loss": 0.07, "step": 20633 }, { "epoch": 0.45467616387644816, "grad_norm": 0.5066652894020081, "learning_rate": 1.7911982037804417e-05, "loss": 0.0858, "step": 20634 }, { "epoch": 0.45469819916596427, "grad_norm": 0.9019275903701782, "learning_rate": 1.7910931861686295e-05, "loss": 0.0787, "step": 20635 }, { "epoch": 0.45472023445548043, "grad_norm": 0.6413495540618896, "learning_rate": 1.7909881670741234e-05, "loss": 0.0639, "step": 20636 }, { "epoch": 0.4547422697449966, "grad_norm": 0.5426884889602661, "learning_rate": 1.790883146497459e-05, "loss": 0.061, "step": 20637 }, { "epoch": 0.45476430503451276, "grad_norm": 1.286981225013733, "learning_rate": 1.7907781244391702e-05, "loss": 0.0766, "step": 20638 }, { "epoch": 0.4547863403240289, "grad_norm": 1.0310581922531128, "learning_rate": 1.7906731008997927e-05, "loss": 0.1046, "step": 20639 }, { "epoch": 0.4548083756135451, "grad_norm": 0.9134783744812012, "learning_rate": 1.790568075879861e-05, "loss": 0.1057, "step": 20640 }, { "epoch": 0.45483041090306126, "grad_norm": 0.6240931153297424, "learning_rate": 1.790463049379911e-05, "loss": 0.0974, "step": 20641 }, { "epoch": 0.4548524461925774, "grad_norm": 0.8086602091789246, "learning_rate": 1.790358021400477e-05, "loss": 0.0689, "step": 20642 }, { "epoch": 0.4548744814820936, "grad_norm": 0.6018587350845337, "learning_rate": 1.790252991942093e-05, "loss": 0.0914, "step": 20643 }, { "epoch": 0.45489651677160975, "grad_norm": 0.825421929359436, "learning_rate": 1.7901479610052953e-05, "loss": 0.0748, "step": 20644 }, { "epoch": 0.4549185520611259, "grad_norm": 0.7393214106559753, "learning_rate": 1.7900429285906185e-05, "loss": 0.0867, "step": 20645 }, { "epoch": 0.4549405873506421, "grad_norm": 0.34157794713974, "learning_rate": 1.7899378946985976e-05, "loss": 0.0819, "step": 20646 }, { "epoch": 0.4549626226401582, "grad_norm": 0.64055335521698, "learning_rate": 1.789832859329767e-05, "loss": 0.0852, "step": 20647 }, { "epoch": 0.45498465792967435, "grad_norm": 0.4509441554546356, "learning_rate": 1.789727822484663e-05, "loss": 0.0851, "step": 20648 }, { "epoch": 0.4550066932191905, "grad_norm": 0.8983412384986877, "learning_rate": 1.789622784163819e-05, "loss": 0.0792, "step": 20649 }, { "epoch": 0.4550287285087067, "grad_norm": 0.9603497385978699, "learning_rate": 1.7895177443677713e-05, "loss": 0.1106, "step": 20650 }, { "epoch": 0.45505076379822285, "grad_norm": 0.6299542188644409, "learning_rate": 1.7894127030970546e-05, "loss": 0.0656, "step": 20651 }, { "epoch": 0.455072799087739, "grad_norm": 0.6353113651275635, "learning_rate": 1.789307660352204e-05, "loss": 0.0674, "step": 20652 }, { "epoch": 0.4550948343772552, "grad_norm": 0.9211886525154114, "learning_rate": 1.789202616133754e-05, "loss": 0.1045, "step": 20653 }, { "epoch": 0.45511686966677134, "grad_norm": 0.3341885805130005, "learning_rate": 1.7890975704422398e-05, "loss": 0.0675, "step": 20654 }, { "epoch": 0.4551389049562875, "grad_norm": 0.5483335852622986, "learning_rate": 1.788992523278197e-05, "loss": 0.0939, "step": 20655 }, { "epoch": 0.45516094024580367, "grad_norm": 0.5481037497520447, "learning_rate": 1.78888747464216e-05, "loss": 0.0643, "step": 20656 }, { "epoch": 0.45518297553531983, "grad_norm": 1.2432737350463867, "learning_rate": 1.7887824245346645e-05, "loss": 0.0921, "step": 20657 }, { "epoch": 0.455205010824836, "grad_norm": 0.464938223361969, "learning_rate": 1.788677372956245e-05, "loss": 0.0628, "step": 20658 }, { "epoch": 0.4552270461143521, "grad_norm": 0.8630722165107727, "learning_rate": 1.7885723199074373e-05, "loss": 0.1097, "step": 20659 }, { "epoch": 0.45524908140386827, "grad_norm": 0.7718603610992432, "learning_rate": 1.7884672653887756e-05, "loss": 0.0587, "step": 20660 }, { "epoch": 0.45527111669338444, "grad_norm": 0.6843281388282776, "learning_rate": 1.7883622094007955e-05, "loss": 0.0806, "step": 20661 }, { "epoch": 0.4552931519829006, "grad_norm": 0.771711528301239, "learning_rate": 1.7882571519440317e-05, "loss": 0.1012, "step": 20662 }, { "epoch": 0.45531518727241677, "grad_norm": 0.4317021667957306, "learning_rate": 1.78815209301902e-05, "loss": 0.0814, "step": 20663 }, { "epoch": 0.45533722256193293, "grad_norm": 0.9103549718856812, "learning_rate": 1.788047032626295e-05, "loss": 0.0968, "step": 20664 }, { "epoch": 0.4553592578514491, "grad_norm": 0.5792232155799866, "learning_rate": 1.7879419707663924e-05, "loss": 0.1235, "step": 20665 }, { "epoch": 0.45538129314096526, "grad_norm": 0.5520865321159363, "learning_rate": 1.7878369074398465e-05, "loss": 0.092, "step": 20666 }, { "epoch": 0.4554033284304814, "grad_norm": 0.4307548999786377, "learning_rate": 1.7877318426471933e-05, "loss": 0.086, "step": 20667 }, { "epoch": 0.4554253637199976, "grad_norm": 0.7057916522026062, "learning_rate": 1.787626776388967e-05, "loss": 0.0845, "step": 20668 }, { "epoch": 0.45544739900951375, "grad_norm": 0.9086583256721497, "learning_rate": 1.787521708665704e-05, "loss": 0.1001, "step": 20669 }, { "epoch": 0.4554694342990299, "grad_norm": 0.8274835348129272, "learning_rate": 1.7874166394779383e-05, "loss": 0.0944, "step": 20670 }, { "epoch": 0.4554914695885461, "grad_norm": 1.0322471857070923, "learning_rate": 1.7873115688262054e-05, "loss": 0.1103, "step": 20671 }, { "epoch": 0.4555135048780622, "grad_norm": 0.7955276370048523, "learning_rate": 1.7872064967110407e-05, "loss": 0.0905, "step": 20672 }, { "epoch": 0.45553554016757836, "grad_norm": 0.8583513498306274, "learning_rate": 1.7871014231329794e-05, "loss": 0.0755, "step": 20673 }, { "epoch": 0.4555575754570945, "grad_norm": 0.5975017547607422, "learning_rate": 1.786996348092557e-05, "loss": 0.1018, "step": 20674 }, { "epoch": 0.4555796107466107, "grad_norm": 0.6075700521469116, "learning_rate": 1.786891271590308e-05, "loss": 0.0829, "step": 20675 }, { "epoch": 0.45560164603612685, "grad_norm": 0.9545812010765076, "learning_rate": 1.7867861936267674e-05, "loss": 0.093, "step": 20676 }, { "epoch": 0.455623681325643, "grad_norm": 0.8342587351799011, "learning_rate": 1.7866811142024714e-05, "loss": 0.1107, "step": 20677 }, { "epoch": 0.4556457166151592, "grad_norm": 0.5863644480705261, "learning_rate": 1.7865760333179547e-05, "loss": 0.079, "step": 20678 }, { "epoch": 0.45566775190467534, "grad_norm": 0.680013120174408, "learning_rate": 1.7864709509737526e-05, "loss": 0.0792, "step": 20679 }, { "epoch": 0.4556897871941915, "grad_norm": 0.3866581618785858, "learning_rate": 1.7863658671704e-05, "loss": 0.0709, "step": 20680 }, { "epoch": 0.4557118224837077, "grad_norm": 0.5871514678001404, "learning_rate": 1.7862607819084333e-05, "loss": 0.0737, "step": 20681 }, { "epoch": 0.45573385777322384, "grad_norm": 0.5430788397789001, "learning_rate": 1.7861556951883864e-05, "loss": 0.0793, "step": 20682 }, { "epoch": 0.45575589306274, "grad_norm": 0.574974000453949, "learning_rate": 1.786050607010795e-05, "loss": 0.0515, "step": 20683 }, { "epoch": 0.4557779283522561, "grad_norm": 0.7023074626922607, "learning_rate": 1.7859455173761946e-05, "loss": 0.0856, "step": 20684 }, { "epoch": 0.4557999636417723, "grad_norm": 0.8059524893760681, "learning_rate": 1.78584042628512e-05, "loss": 0.0795, "step": 20685 }, { "epoch": 0.45582199893128844, "grad_norm": 1.118021011352539, "learning_rate": 1.7857353337381074e-05, "loss": 0.0868, "step": 20686 }, { "epoch": 0.4558440342208046, "grad_norm": 0.7319002747535706, "learning_rate": 1.785630239735691e-05, "loss": 0.0576, "step": 20687 }, { "epoch": 0.45586606951032077, "grad_norm": 0.539772093296051, "learning_rate": 1.785525144278407e-05, "loss": 0.0533, "step": 20688 }, { "epoch": 0.45588810479983694, "grad_norm": 0.6143520474433899, "learning_rate": 1.7854200473667905e-05, "loss": 0.0815, "step": 20689 }, { "epoch": 0.4559101400893531, "grad_norm": 0.7762891054153442, "learning_rate": 1.7853149490013765e-05, "loss": 0.0439, "step": 20690 }, { "epoch": 0.45593217537886926, "grad_norm": 0.6060381531715393, "learning_rate": 1.7852098491827e-05, "loss": 0.0664, "step": 20691 }, { "epoch": 0.45595421066838543, "grad_norm": 0.7565370202064514, "learning_rate": 1.7851047479112973e-05, "loss": 0.06, "step": 20692 }, { "epoch": 0.4559762459579016, "grad_norm": 0.6712396144866943, "learning_rate": 1.7849996451877027e-05, "loss": 0.0757, "step": 20693 }, { "epoch": 0.45599828124741776, "grad_norm": 0.6523838639259338, "learning_rate": 1.7848945410124528e-05, "loss": 0.0544, "step": 20694 }, { "epoch": 0.4560203165369339, "grad_norm": 0.927362859249115, "learning_rate": 1.7847894353860817e-05, "loss": 0.0676, "step": 20695 }, { "epoch": 0.4560423518264501, "grad_norm": 0.699970006942749, "learning_rate": 1.7846843283091256e-05, "loss": 0.0766, "step": 20696 }, { "epoch": 0.4560643871159662, "grad_norm": 0.8018875122070312, "learning_rate": 1.7845792197821197e-05, "loss": 0.0813, "step": 20697 }, { "epoch": 0.45608642240548236, "grad_norm": 0.7378299832344055, "learning_rate": 1.7844741098055987e-05, "loss": 0.0945, "step": 20698 }, { "epoch": 0.4561084576949985, "grad_norm": 0.3679056167602539, "learning_rate": 1.784368998380099e-05, "loss": 0.0872, "step": 20699 }, { "epoch": 0.4561304929845147, "grad_norm": 0.6264843344688416, "learning_rate": 1.784263885506155e-05, "loss": 0.0779, "step": 20700 }, { "epoch": 0.45615252827403086, "grad_norm": 1.25372314453125, "learning_rate": 1.7841587711843026e-05, "loss": 0.1019, "step": 20701 }, { "epoch": 0.456174563563547, "grad_norm": 0.8241446018218994, "learning_rate": 1.784053655415078e-05, "loss": 0.0788, "step": 20702 }, { "epoch": 0.4561965988530632, "grad_norm": 0.9109811186790466, "learning_rate": 1.7839485381990148e-05, "loss": 0.0881, "step": 20703 }, { "epoch": 0.45621863414257935, "grad_norm": 1.3138072490692139, "learning_rate": 1.7838434195366497e-05, "loss": 0.0921, "step": 20704 }, { "epoch": 0.4562406694320955, "grad_norm": 0.46898576617240906, "learning_rate": 1.783738299428518e-05, "loss": 0.0525, "step": 20705 }, { "epoch": 0.4562627047216117, "grad_norm": 0.5110928416252136, "learning_rate": 1.7836331778751553e-05, "loss": 0.0791, "step": 20706 }, { "epoch": 0.45628474001112784, "grad_norm": 0.6350840926170349, "learning_rate": 1.7835280548770966e-05, "loss": 0.0498, "step": 20707 }, { "epoch": 0.456306775300644, "grad_norm": 0.7533991932868958, "learning_rate": 1.783422930434877e-05, "loss": 0.0939, "step": 20708 }, { "epoch": 0.4563288105901601, "grad_norm": 1.034209966659546, "learning_rate": 1.783317804549033e-05, "loss": 0.0853, "step": 20709 }, { "epoch": 0.4563508458796763, "grad_norm": 0.6867057085037231, "learning_rate": 1.783212677220099e-05, "loss": 0.0886, "step": 20710 }, { "epoch": 0.45637288116919245, "grad_norm": 0.6387248635292053, "learning_rate": 1.7831075484486113e-05, "loss": 0.1141, "step": 20711 }, { "epoch": 0.4563949164587086, "grad_norm": 0.7121782302856445, "learning_rate": 1.783002418235105e-05, "loss": 0.0809, "step": 20712 }, { "epoch": 0.4564169517482248, "grad_norm": 0.9732263088226318, "learning_rate": 1.7828972865801156e-05, "loss": 0.1004, "step": 20713 }, { "epoch": 0.45643898703774094, "grad_norm": 0.5547388792037964, "learning_rate": 1.7827921534841787e-05, "loss": 0.0683, "step": 20714 }, { "epoch": 0.4564610223272571, "grad_norm": 0.5344622135162354, "learning_rate": 1.78268701894783e-05, "loss": 0.064, "step": 20715 }, { "epoch": 0.45648305761677327, "grad_norm": 0.28081220388412476, "learning_rate": 1.782581882971604e-05, "loss": 0.0675, "step": 20716 }, { "epoch": 0.45650509290628943, "grad_norm": 0.7757047414779663, "learning_rate": 1.7824767455560376e-05, "loss": 0.0815, "step": 20717 }, { "epoch": 0.4565271281958056, "grad_norm": 0.6398807764053345, "learning_rate": 1.782371606701665e-05, "loss": 0.0796, "step": 20718 }, { "epoch": 0.45654916348532176, "grad_norm": 0.7293726205825806, "learning_rate": 1.782266466409023e-05, "loss": 0.0748, "step": 20719 }, { "epoch": 0.4565711987748379, "grad_norm": 0.9572288393974304, "learning_rate": 1.7821613246786462e-05, "loss": 0.073, "step": 20720 }, { "epoch": 0.45659323406435404, "grad_norm": 0.5696176886558533, "learning_rate": 1.78205618151107e-05, "loss": 0.0593, "step": 20721 }, { "epoch": 0.4566152693538702, "grad_norm": 0.804821789264679, "learning_rate": 1.7819510369068317e-05, "loss": 0.092, "step": 20722 }, { "epoch": 0.45663730464338637, "grad_norm": 0.8171449303627014, "learning_rate": 1.7818458908664647e-05, "loss": 0.0858, "step": 20723 }, { "epoch": 0.45665933993290253, "grad_norm": 0.4380129873752594, "learning_rate": 1.7817407433905054e-05, "loss": 0.0865, "step": 20724 }, { "epoch": 0.4566813752224187, "grad_norm": 0.7052661180496216, "learning_rate": 1.78163559447949e-05, "loss": 0.0762, "step": 20725 }, { "epoch": 0.45670341051193486, "grad_norm": 0.49578341841697693, "learning_rate": 1.7815304441339527e-05, "loss": 0.0734, "step": 20726 }, { "epoch": 0.456725445801451, "grad_norm": 0.7718030214309692, "learning_rate": 1.7814252923544304e-05, "loss": 0.0677, "step": 20727 }, { "epoch": 0.4567474810909672, "grad_norm": 0.6134281158447266, "learning_rate": 1.781320139141458e-05, "loss": 0.0543, "step": 20728 }, { "epoch": 0.45676951638048335, "grad_norm": 0.5119152069091797, "learning_rate": 1.781214984495571e-05, "loss": 0.081, "step": 20729 }, { "epoch": 0.4567915516699995, "grad_norm": 0.7598370313644409, "learning_rate": 1.7811098284173056e-05, "loss": 0.0695, "step": 20730 }, { "epoch": 0.4568135869595157, "grad_norm": 0.7881470918655396, "learning_rate": 1.781004670907197e-05, "loss": 0.0757, "step": 20731 }, { "epoch": 0.45683562224903185, "grad_norm": 0.7777543067932129, "learning_rate": 1.7808995119657815e-05, "loss": 0.0857, "step": 20732 }, { "epoch": 0.456857657538548, "grad_norm": 0.6010079979896545, "learning_rate": 1.780794351593593e-05, "loss": 0.0566, "step": 20733 }, { "epoch": 0.4568796928280641, "grad_norm": 0.6690614819526672, "learning_rate": 1.7806891897911695e-05, "loss": 0.0983, "step": 20734 }, { "epoch": 0.4569017281175803, "grad_norm": 0.38581809401512146, "learning_rate": 1.7805840265590444e-05, "loss": 0.0616, "step": 20735 }, { "epoch": 0.45692376340709645, "grad_norm": 0.5882669687271118, "learning_rate": 1.7804788618977552e-05, "loss": 0.0898, "step": 20736 }, { "epoch": 0.4569457986966126, "grad_norm": 0.7167249917984009, "learning_rate": 1.7803736958078366e-05, "loss": 0.0811, "step": 20737 }, { "epoch": 0.4569678339861288, "grad_norm": 0.6231369972229004, "learning_rate": 1.7802685282898244e-05, "loss": 0.0849, "step": 20738 }, { "epoch": 0.45698986927564494, "grad_norm": 0.6089907884597778, "learning_rate": 1.780163359344254e-05, "loss": 0.0841, "step": 20739 }, { "epoch": 0.4570119045651611, "grad_norm": 0.5051931738853455, "learning_rate": 1.7800581889716616e-05, "loss": 0.0565, "step": 20740 }, { "epoch": 0.4570339398546773, "grad_norm": 0.45892587304115295, "learning_rate": 1.7799530171725823e-05, "loss": 0.0911, "step": 20741 }, { "epoch": 0.45705597514419344, "grad_norm": 0.719082772731781, "learning_rate": 1.7798478439475525e-05, "loss": 0.1056, "step": 20742 }, { "epoch": 0.4570780104337096, "grad_norm": 0.7943315505981445, "learning_rate": 1.7797426692971076e-05, "loss": 0.0769, "step": 20743 }, { "epoch": 0.45710004572322577, "grad_norm": 0.5681342482566833, "learning_rate": 1.779637493221783e-05, "loss": 0.0637, "step": 20744 }, { "epoch": 0.45712208101274193, "grad_norm": 0.7183270454406738, "learning_rate": 1.779532315722115e-05, "loss": 0.07, "step": 20745 }, { "epoch": 0.45714411630225804, "grad_norm": 0.9794594645500183, "learning_rate": 1.7794271367986394e-05, "loss": 0.0982, "step": 20746 }, { "epoch": 0.4571661515917742, "grad_norm": 0.8435964584350586, "learning_rate": 1.7793219564518907e-05, "loss": 0.0495, "step": 20747 }, { "epoch": 0.45718818688129037, "grad_norm": 0.7444538474082947, "learning_rate": 1.779216774682406e-05, "loss": 0.0926, "step": 20748 }, { "epoch": 0.45721022217080654, "grad_norm": 0.8582331538200378, "learning_rate": 1.7791115914907202e-05, "loss": 0.0905, "step": 20749 }, { "epoch": 0.4572322574603227, "grad_norm": 0.4891318380832672, "learning_rate": 1.7790064068773694e-05, "loss": 0.0669, "step": 20750 }, { "epoch": 0.45725429274983886, "grad_norm": 0.6032590270042419, "learning_rate": 1.7789012208428897e-05, "loss": 0.0791, "step": 20751 }, { "epoch": 0.45727632803935503, "grad_norm": 0.5089893937110901, "learning_rate": 1.7787960333878165e-05, "loss": 0.1119, "step": 20752 }, { "epoch": 0.4572983633288712, "grad_norm": 0.9463332891464233, "learning_rate": 1.778690844512686e-05, "loss": 0.0995, "step": 20753 }, { "epoch": 0.45732039861838736, "grad_norm": 0.6969837546348572, "learning_rate": 1.7785856542180323e-05, "loss": 0.1106, "step": 20754 }, { "epoch": 0.4573424339079035, "grad_norm": 0.8125128746032715, "learning_rate": 1.7784804625043932e-05, "loss": 0.1269, "step": 20755 }, { "epoch": 0.4573644691974197, "grad_norm": 0.8003854751586914, "learning_rate": 1.7783752693723036e-05, "loss": 0.1068, "step": 20756 }, { "epoch": 0.45738650448693585, "grad_norm": 0.577949583530426, "learning_rate": 1.7782700748223e-05, "loss": 0.0743, "step": 20757 }, { "epoch": 0.45740853977645196, "grad_norm": 0.7398689389228821, "learning_rate": 1.7781648788549172e-05, "loss": 0.0755, "step": 20758 }, { "epoch": 0.4574305750659681, "grad_norm": 0.8032994866371155, "learning_rate": 1.778059681470692e-05, "loss": 0.1025, "step": 20759 }, { "epoch": 0.4574526103554843, "grad_norm": 1.065187931060791, "learning_rate": 1.7779544826701587e-05, "loss": 0.0846, "step": 20760 }, { "epoch": 0.45747464564500046, "grad_norm": 0.5037046670913696, "learning_rate": 1.777849282453855e-05, "loss": 0.1029, "step": 20761 }, { "epoch": 0.4574966809345166, "grad_norm": 0.6490790843963623, "learning_rate": 1.777744080822316e-05, "loss": 0.0773, "step": 20762 }, { "epoch": 0.4575187162240328, "grad_norm": 0.6602675318717957, "learning_rate": 1.7776388777760775e-05, "loss": 0.0618, "step": 20763 }, { "epoch": 0.45754075151354895, "grad_norm": 0.8621196150779724, "learning_rate": 1.777533673315675e-05, "loss": 0.0809, "step": 20764 }, { "epoch": 0.4575627868030651, "grad_norm": 0.4476863145828247, "learning_rate": 1.777428467441645e-05, "loss": 0.0921, "step": 20765 }, { "epoch": 0.4575848220925813, "grad_norm": 0.7622203826904297, "learning_rate": 1.7773232601545224e-05, "loss": 0.0977, "step": 20766 }, { "epoch": 0.45760685738209744, "grad_norm": 0.6854865550994873, "learning_rate": 1.777218051454844e-05, "loss": 0.0942, "step": 20767 }, { "epoch": 0.4576288926716136, "grad_norm": 0.8362800478935242, "learning_rate": 1.7771128413431452e-05, "loss": 0.1151, "step": 20768 }, { "epoch": 0.45765092796112977, "grad_norm": 0.5655115842819214, "learning_rate": 1.777007629819963e-05, "loss": 0.0654, "step": 20769 }, { "epoch": 0.45767296325064594, "grad_norm": 0.4798150658607483, "learning_rate": 1.7769024168858316e-05, "loss": 0.0937, "step": 20770 }, { "epoch": 0.45769499854016205, "grad_norm": 0.4720142185688019, "learning_rate": 1.7767972025412882e-05, "loss": 0.0834, "step": 20771 }, { "epoch": 0.4577170338296782, "grad_norm": 0.6069284081459045, "learning_rate": 1.7766919867868677e-05, "loss": 0.105, "step": 20772 }, { "epoch": 0.4577390691191944, "grad_norm": 0.7698283195495605, "learning_rate": 1.7765867696231066e-05, "loss": 0.0758, "step": 20773 }, { "epoch": 0.45776110440871054, "grad_norm": 1.0359090566635132, "learning_rate": 1.7764815510505406e-05, "loss": 0.0979, "step": 20774 }, { "epoch": 0.4577831396982267, "grad_norm": 0.5638728141784668, "learning_rate": 1.7763763310697062e-05, "loss": 0.0814, "step": 20775 }, { "epoch": 0.45780517498774287, "grad_norm": 0.8700628876686096, "learning_rate": 1.776271109681139e-05, "loss": 0.1248, "step": 20776 }, { "epoch": 0.45782721027725903, "grad_norm": 0.5581732392311096, "learning_rate": 1.7761658868853747e-05, "loss": 0.0632, "step": 20777 }, { "epoch": 0.4578492455667752, "grad_norm": 0.5815843343734741, "learning_rate": 1.7760606626829498e-05, "loss": 0.118, "step": 20778 }, { "epoch": 0.45787128085629136, "grad_norm": 0.5944650173187256, "learning_rate": 1.7759554370743993e-05, "loss": 0.077, "step": 20779 }, { "epoch": 0.4578933161458075, "grad_norm": 0.632455050945282, "learning_rate": 1.7758502100602606e-05, "loss": 0.0846, "step": 20780 }, { "epoch": 0.4579153514353237, "grad_norm": 0.526111900806427, "learning_rate": 1.7757449816410678e-05, "loss": 0.0996, "step": 20781 }, { "epoch": 0.45793738672483986, "grad_norm": 0.6202639937400818, "learning_rate": 1.7756397518173588e-05, "loss": 0.101, "step": 20782 }, { "epoch": 0.45795942201435597, "grad_norm": 0.4239776134490967, "learning_rate": 1.7755345205896683e-05, "loss": 0.0601, "step": 20783 }, { "epoch": 0.45798145730387213, "grad_norm": 0.7848002910614014, "learning_rate": 1.7754292879585327e-05, "loss": 0.1062, "step": 20784 }, { "epoch": 0.4580034925933883, "grad_norm": 0.22803999483585358, "learning_rate": 1.7753240539244885e-05, "loss": 0.1002, "step": 20785 }, { "epoch": 0.45802552788290446, "grad_norm": 0.9682605266571045, "learning_rate": 1.775218818488071e-05, "loss": 0.0833, "step": 20786 }, { "epoch": 0.4580475631724206, "grad_norm": 0.7631315588951111, "learning_rate": 1.775113581649816e-05, "loss": 0.0504, "step": 20787 }, { "epoch": 0.4580695984619368, "grad_norm": 0.5490714907646179, "learning_rate": 1.7750083434102608e-05, "loss": 0.0718, "step": 20788 }, { "epoch": 0.45809163375145295, "grad_norm": 0.5373690724372864, "learning_rate": 1.77490310376994e-05, "loss": 0.0648, "step": 20789 }, { "epoch": 0.4581136690409691, "grad_norm": 0.7915061712265015, "learning_rate": 1.7747978627293905e-05, "loss": 0.088, "step": 20790 }, { "epoch": 0.4581357043304853, "grad_norm": 0.726222574710846, "learning_rate": 1.774692620289148e-05, "loss": 0.0942, "step": 20791 }, { "epoch": 0.45815773962000145, "grad_norm": 0.5405734181404114, "learning_rate": 1.7745873764497493e-05, "loss": 0.0644, "step": 20792 }, { "epoch": 0.4581797749095176, "grad_norm": 0.6434244513511658, "learning_rate": 1.7744821312117293e-05, "loss": 0.0555, "step": 20793 }, { "epoch": 0.4582018101990338, "grad_norm": 0.9045289754867554, "learning_rate": 1.774376884575625e-05, "loss": 0.0693, "step": 20794 }, { "epoch": 0.4582238454885499, "grad_norm": 0.626811146736145, "learning_rate": 1.7742716365419716e-05, "loss": 0.1102, "step": 20795 }, { "epoch": 0.45824588077806605, "grad_norm": 0.792610228061676, "learning_rate": 1.7741663871113056e-05, "loss": 0.0859, "step": 20796 }, { "epoch": 0.4582679160675822, "grad_norm": 0.6570454239845276, "learning_rate": 1.7740611362841638e-05, "loss": 0.0718, "step": 20797 }, { "epoch": 0.4582899513570984, "grad_norm": 0.7589679956436157, "learning_rate": 1.773955884061081e-05, "loss": 0.0741, "step": 20798 }, { "epoch": 0.45831198664661454, "grad_norm": 0.3622760772705078, "learning_rate": 1.7738506304425945e-05, "loss": 0.101, "step": 20799 }, { "epoch": 0.4583340219361307, "grad_norm": 0.6884192228317261, "learning_rate": 1.7737453754292396e-05, "loss": 0.0869, "step": 20800 }, { "epoch": 0.4583560572256469, "grad_norm": 0.46525290608406067, "learning_rate": 1.773640119021553e-05, "loss": 0.0769, "step": 20801 }, { "epoch": 0.45837809251516304, "grad_norm": 0.5832286477088928, "learning_rate": 1.7735348612200704e-05, "loss": 0.0859, "step": 20802 }, { "epoch": 0.4584001278046792, "grad_norm": 0.7984188795089722, "learning_rate": 1.773429602025328e-05, "loss": 0.092, "step": 20803 }, { "epoch": 0.45842216309419537, "grad_norm": 1.5924712419509888, "learning_rate": 1.7733243414378617e-05, "loss": 0.0794, "step": 20804 }, { "epoch": 0.45844419838371153, "grad_norm": 0.2859908640384674, "learning_rate": 1.7732190794582084e-05, "loss": 0.0765, "step": 20805 }, { "epoch": 0.4584662336732277, "grad_norm": 1.3360810279846191, "learning_rate": 1.7731138160869034e-05, "loss": 0.08, "step": 20806 }, { "epoch": 0.45848826896274386, "grad_norm": 1.197931170463562, "learning_rate": 1.773008551324484e-05, "loss": 0.1054, "step": 20807 }, { "epoch": 0.45851030425225997, "grad_norm": 0.598748505115509, "learning_rate": 1.772903285171485e-05, "loss": 0.0666, "step": 20808 }, { "epoch": 0.45853233954177613, "grad_norm": 0.8121552467346191, "learning_rate": 1.772798017628444e-05, "loss": 0.1076, "step": 20809 }, { "epoch": 0.4585543748312923, "grad_norm": 0.870503842830658, "learning_rate": 1.7726927486958955e-05, "loss": 0.0691, "step": 20810 }, { "epoch": 0.45857641012080846, "grad_norm": 0.6031832098960876, "learning_rate": 1.7725874783743768e-05, "loss": 0.0581, "step": 20811 }, { "epoch": 0.45859844541032463, "grad_norm": 0.361881822347641, "learning_rate": 1.772482206664424e-05, "loss": 0.0644, "step": 20812 }, { "epoch": 0.4586204806998408, "grad_norm": 0.5073134303092957, "learning_rate": 1.7723769335665733e-05, "loss": 0.0797, "step": 20813 }, { "epoch": 0.45864251598935696, "grad_norm": 0.7288784384727478, "learning_rate": 1.7722716590813608e-05, "loss": 0.1017, "step": 20814 }, { "epoch": 0.4586645512788731, "grad_norm": 0.7497878670692444, "learning_rate": 1.772166383209323e-05, "loss": 0.0504, "step": 20815 }, { "epoch": 0.4586865865683893, "grad_norm": 1.0787067413330078, "learning_rate": 1.772061105950995e-05, "loss": 0.0612, "step": 20816 }, { "epoch": 0.45870862185790545, "grad_norm": 0.7005499601364136, "learning_rate": 1.771955827306915e-05, "loss": 0.0659, "step": 20817 }, { "epoch": 0.4587306571474216, "grad_norm": 1.012732982635498, "learning_rate": 1.7718505472776176e-05, "loss": 0.0806, "step": 20818 }, { "epoch": 0.4587526924369378, "grad_norm": 0.8437239527702332, "learning_rate": 1.771745265863639e-05, "loss": 0.0707, "step": 20819 }, { "epoch": 0.4587747277264539, "grad_norm": 0.6023803353309631, "learning_rate": 1.771639983065517e-05, "loss": 0.1066, "step": 20820 }, { "epoch": 0.45879676301597005, "grad_norm": 0.6606020331382751, "learning_rate": 1.7715346988837862e-05, "loss": 0.072, "step": 20821 }, { "epoch": 0.4588187983054862, "grad_norm": 0.6436362266540527, "learning_rate": 1.7714294133189844e-05, "loss": 0.0775, "step": 20822 }, { "epoch": 0.4588408335950024, "grad_norm": 0.5761427283287048, "learning_rate": 1.771324126371646e-05, "loss": 0.083, "step": 20823 }, { "epoch": 0.45886286888451855, "grad_norm": 0.5666745901107788, "learning_rate": 1.7712188380423093e-05, "loss": 0.0791, "step": 20824 }, { "epoch": 0.4588849041740347, "grad_norm": 0.5706934332847595, "learning_rate": 1.7711135483315092e-05, "loss": 0.0611, "step": 20825 }, { "epoch": 0.4589069394635509, "grad_norm": 1.0456980466842651, "learning_rate": 1.7710082572397824e-05, "loss": 0.1288, "step": 20826 }, { "epoch": 0.45892897475306704, "grad_norm": 0.610298752784729, "learning_rate": 1.7709029647676654e-05, "loss": 0.0723, "step": 20827 }, { "epoch": 0.4589510100425832, "grad_norm": 0.8292615413665771, "learning_rate": 1.770797670915694e-05, "loss": 0.0578, "step": 20828 }, { "epoch": 0.45897304533209937, "grad_norm": 0.8768405914306641, "learning_rate": 1.770692375684405e-05, "loss": 0.0718, "step": 20829 }, { "epoch": 0.45899508062161554, "grad_norm": 0.5705541372299194, "learning_rate": 1.770587079074335e-05, "loss": 0.0576, "step": 20830 }, { "epoch": 0.4590171159111317, "grad_norm": 0.7321467399597168, "learning_rate": 1.7704817810860192e-05, "loss": 0.0709, "step": 20831 }, { "epoch": 0.4590391512006478, "grad_norm": 0.7853466868400574, "learning_rate": 1.770376481719995e-05, "loss": 0.0787, "step": 20832 }, { "epoch": 0.459061186490164, "grad_norm": 0.6682742834091187, "learning_rate": 1.7702711809767983e-05, "loss": 0.0838, "step": 20833 }, { "epoch": 0.45908322177968014, "grad_norm": 0.8624625205993652, "learning_rate": 1.7701658788569655e-05, "loss": 0.093, "step": 20834 }, { "epoch": 0.4591052570691963, "grad_norm": 0.6832988858222961, "learning_rate": 1.770060575361033e-05, "loss": 0.0704, "step": 20835 }, { "epoch": 0.45912729235871247, "grad_norm": 0.5953266024589539, "learning_rate": 1.7699552704895375e-05, "loss": 0.1077, "step": 20836 }, { "epoch": 0.45914932764822863, "grad_norm": 0.5668149590492249, "learning_rate": 1.7698499642430147e-05, "loss": 0.0923, "step": 20837 }, { "epoch": 0.4591713629377448, "grad_norm": 0.6399188041687012, "learning_rate": 1.7697446566220014e-05, "loss": 0.0797, "step": 20838 }, { "epoch": 0.45919339822726096, "grad_norm": 0.8866316676139832, "learning_rate": 1.7696393476270336e-05, "loss": 0.1253, "step": 20839 }, { "epoch": 0.4592154335167771, "grad_norm": 0.61826491355896, "learning_rate": 1.7695340372586486e-05, "loss": 0.0484, "step": 20840 }, { "epoch": 0.4592374688062933, "grad_norm": 0.363209068775177, "learning_rate": 1.769428725517382e-05, "loss": 0.1019, "step": 20841 }, { "epoch": 0.45925950409580946, "grad_norm": 2.9530038833618164, "learning_rate": 1.7693234124037704e-05, "loss": 0.1123, "step": 20842 }, { "epoch": 0.4592815393853256, "grad_norm": 0.5361714959144592, "learning_rate": 1.7692180979183502e-05, "loss": 0.0718, "step": 20843 }, { "epoch": 0.4593035746748418, "grad_norm": 1.0842947959899902, "learning_rate": 1.769112782061658e-05, "loss": 0.0962, "step": 20844 }, { "epoch": 0.4593256099643579, "grad_norm": 2.264627695083618, "learning_rate": 1.76900746483423e-05, "loss": 0.0672, "step": 20845 }, { "epoch": 0.45934764525387406, "grad_norm": 0.6309788823127747, "learning_rate": 1.7689021462366026e-05, "loss": 0.0451, "step": 20846 }, { "epoch": 0.4593696805433902, "grad_norm": 0.9497610330581665, "learning_rate": 1.7687968262693126e-05, "loss": 0.0947, "step": 20847 }, { "epoch": 0.4593917158329064, "grad_norm": 1.4027622938156128, "learning_rate": 1.768691504932896e-05, "loss": 0.069, "step": 20848 }, { "epoch": 0.45941375112242255, "grad_norm": 0.5477818846702576, "learning_rate": 1.76858618222789e-05, "loss": 0.0757, "step": 20849 }, { "epoch": 0.4594357864119387, "grad_norm": 0.9487165212631226, "learning_rate": 1.7684808581548297e-05, "loss": 0.077, "step": 20850 }, { "epoch": 0.4594578217014549, "grad_norm": 0.6845039129257202, "learning_rate": 1.768375532714253e-05, "loss": 0.0895, "step": 20851 }, { "epoch": 0.45947985699097105, "grad_norm": 0.7237663269042969, "learning_rate": 1.7682702059066953e-05, "loss": 0.0631, "step": 20852 }, { "epoch": 0.4595018922804872, "grad_norm": 1.0093945264816284, "learning_rate": 1.768164877732694e-05, "loss": 0.0776, "step": 20853 }, { "epoch": 0.4595239275700034, "grad_norm": 0.8714850544929504, "learning_rate": 1.7680595481927853e-05, "loss": 0.1037, "step": 20854 }, { "epoch": 0.45954596285951954, "grad_norm": 0.5743021368980408, "learning_rate": 1.7679542172875054e-05, "loss": 0.0675, "step": 20855 }, { "epoch": 0.4595679981490357, "grad_norm": 1.0328783988952637, "learning_rate": 1.767848885017391e-05, "loss": 0.0756, "step": 20856 }, { "epoch": 0.4595900334385518, "grad_norm": 0.5668699145317078, "learning_rate": 1.767743551382979e-05, "loss": 0.0748, "step": 20857 }, { "epoch": 0.459612068728068, "grad_norm": 0.9469271898269653, "learning_rate": 1.7676382163848047e-05, "loss": 0.1312, "step": 20858 }, { "epoch": 0.45963410401758414, "grad_norm": 0.8549495935440063, "learning_rate": 1.767532880023406e-05, "loss": 0.1011, "step": 20859 }, { "epoch": 0.4596561393071003, "grad_norm": 0.7515885233879089, "learning_rate": 1.7674275422993186e-05, "loss": 0.0977, "step": 20860 }, { "epoch": 0.4596781745966165, "grad_norm": 0.5674294829368591, "learning_rate": 1.7673222032130795e-05, "loss": 0.0654, "step": 20861 }, { "epoch": 0.45970020988613264, "grad_norm": 0.6042819619178772, "learning_rate": 1.7672168627652246e-05, "loss": 0.0787, "step": 20862 }, { "epoch": 0.4597222451756488, "grad_norm": 0.5747553110122681, "learning_rate": 1.7671115209562912e-05, "loss": 0.0793, "step": 20863 }, { "epoch": 0.45974428046516497, "grad_norm": 0.7342395782470703, "learning_rate": 1.767006177786816e-05, "loss": 0.0586, "step": 20864 }, { "epoch": 0.45976631575468113, "grad_norm": 0.4609973728656769, "learning_rate": 1.7669008332573346e-05, "loss": 0.0842, "step": 20865 }, { "epoch": 0.4597883510441973, "grad_norm": 0.5898104906082153, "learning_rate": 1.7667954873683845e-05, "loss": 0.0923, "step": 20866 }, { "epoch": 0.45981038633371346, "grad_norm": 0.7586257457733154, "learning_rate": 1.7666901401205017e-05, "loss": 0.0802, "step": 20867 }, { "epoch": 0.4598324216232296, "grad_norm": 0.7014052867889404, "learning_rate": 1.766584791514223e-05, "loss": 0.0712, "step": 20868 }, { "epoch": 0.45985445691274573, "grad_norm": 0.8247600197792053, "learning_rate": 1.7664794415500853e-05, "loss": 0.0601, "step": 20869 }, { "epoch": 0.4598764922022619, "grad_norm": 0.5591195821762085, "learning_rate": 1.766374090228625e-05, "loss": 0.0865, "step": 20870 }, { "epoch": 0.45989852749177806, "grad_norm": 0.7453742623329163, "learning_rate": 1.766268737550378e-05, "loss": 0.0634, "step": 20871 }, { "epoch": 0.45992056278129423, "grad_norm": 0.6379135847091675, "learning_rate": 1.766163383515882e-05, "loss": 0.0666, "step": 20872 }, { "epoch": 0.4599425980708104, "grad_norm": 0.5487503409385681, "learning_rate": 1.766058028125673e-05, "loss": 0.0901, "step": 20873 }, { "epoch": 0.45996463336032656, "grad_norm": 0.6273348927497864, "learning_rate": 1.765952671380288e-05, "loss": 0.0817, "step": 20874 }, { "epoch": 0.4599866686498427, "grad_norm": 0.6360996961593628, "learning_rate": 1.765847313280263e-05, "loss": 0.0901, "step": 20875 }, { "epoch": 0.4600087039393589, "grad_norm": 0.8551756739616394, "learning_rate": 1.7657419538261357e-05, "loss": 0.0798, "step": 20876 }, { "epoch": 0.46003073922887505, "grad_norm": 0.49089470505714417, "learning_rate": 1.7656365930184413e-05, "loss": 0.0734, "step": 20877 }, { "epoch": 0.4600527745183912, "grad_norm": 0.40952420234680176, "learning_rate": 1.7655312308577182e-05, "loss": 0.0607, "step": 20878 }, { "epoch": 0.4600748098079074, "grad_norm": 0.8297253847122192, "learning_rate": 1.7654258673445017e-05, "loss": 0.0963, "step": 20879 }, { "epoch": 0.46009684509742355, "grad_norm": 0.43796002864837646, "learning_rate": 1.7653205024793287e-05, "loss": 0.0814, "step": 20880 }, { "epoch": 0.4601188803869397, "grad_norm": 1.0050697326660156, "learning_rate": 1.7652151362627367e-05, "loss": 0.1428, "step": 20881 }, { "epoch": 0.4601409156764558, "grad_norm": 0.7047889232635498, "learning_rate": 1.7651097686952614e-05, "loss": 0.0622, "step": 20882 }, { "epoch": 0.460162950965972, "grad_norm": 0.7316436171531677, "learning_rate": 1.7650043997774396e-05, "loss": 0.0599, "step": 20883 }, { "epoch": 0.46018498625548815, "grad_norm": 0.8328173160552979, "learning_rate": 1.7648990295098086e-05, "loss": 0.1044, "step": 20884 }, { "epoch": 0.4602070215450043, "grad_norm": 0.6195109486579895, "learning_rate": 1.7647936578929048e-05, "loss": 0.0808, "step": 20885 }, { "epoch": 0.4602290568345205, "grad_norm": 0.6784327626228333, "learning_rate": 1.7646882849272648e-05, "loss": 0.0706, "step": 20886 }, { "epoch": 0.46025109212403664, "grad_norm": 0.6097723841667175, "learning_rate": 1.7645829106134254e-05, "loss": 0.0972, "step": 20887 }, { "epoch": 0.4602731274135528, "grad_norm": 0.37188923358917236, "learning_rate": 1.7644775349519237e-05, "loss": 0.0673, "step": 20888 }, { "epoch": 0.46029516270306897, "grad_norm": 0.7440856695175171, "learning_rate": 1.764372157943296e-05, "loss": 0.0796, "step": 20889 }, { "epoch": 0.46031719799258514, "grad_norm": 1.063607931137085, "learning_rate": 1.764266779588079e-05, "loss": 0.0623, "step": 20890 }, { "epoch": 0.4603392332821013, "grad_norm": 0.5547653436660767, "learning_rate": 1.7641613998868095e-05, "loss": 0.0455, "step": 20891 }, { "epoch": 0.46036126857161747, "grad_norm": 0.6720789670944214, "learning_rate": 1.7640560188400242e-05, "loss": 0.0587, "step": 20892 }, { "epoch": 0.46038330386113363, "grad_norm": 0.4318050146102905, "learning_rate": 1.7639506364482603e-05, "loss": 0.0739, "step": 20893 }, { "epoch": 0.46040533915064974, "grad_norm": 0.7642654180526733, "learning_rate": 1.763845252712054e-05, "loss": 0.0871, "step": 20894 }, { "epoch": 0.4604273744401659, "grad_norm": 0.727780282497406, "learning_rate": 1.7637398676319426e-05, "loss": 0.0905, "step": 20895 }, { "epoch": 0.46044940972968207, "grad_norm": 0.7308921813964844, "learning_rate": 1.7636344812084628e-05, "loss": 0.1074, "step": 20896 }, { "epoch": 0.46047144501919823, "grad_norm": 0.6822029948234558, "learning_rate": 1.7635290934421512e-05, "loss": 0.0991, "step": 20897 }, { "epoch": 0.4604934803087144, "grad_norm": 0.674673855304718, "learning_rate": 1.763423704333544e-05, "loss": 0.0758, "step": 20898 }, { "epoch": 0.46051551559823056, "grad_norm": 0.5944401621818542, "learning_rate": 1.763318313883179e-05, "loss": 0.0601, "step": 20899 }, { "epoch": 0.4605375508877467, "grad_norm": 0.9642715454101562, "learning_rate": 1.7632129220915926e-05, "loss": 0.1469, "step": 20900 }, { "epoch": 0.4605595861772629, "grad_norm": 0.7616928219795227, "learning_rate": 1.7631075289593215e-05, "loss": 0.0682, "step": 20901 }, { "epoch": 0.46058162146677906, "grad_norm": 0.9019409418106079, "learning_rate": 1.763002134486903e-05, "loss": 0.0904, "step": 20902 }, { "epoch": 0.4606036567562952, "grad_norm": 0.8124837279319763, "learning_rate": 1.7628967386748735e-05, "loss": 0.1028, "step": 20903 }, { "epoch": 0.4606256920458114, "grad_norm": 0.65763920545578, "learning_rate": 1.76279134152377e-05, "loss": 0.0676, "step": 20904 }, { "epoch": 0.46064772733532755, "grad_norm": 0.785403311252594, "learning_rate": 1.7626859430341293e-05, "loss": 0.0703, "step": 20905 }, { "epoch": 0.46066976262484366, "grad_norm": 0.6079809665679932, "learning_rate": 1.7625805432064876e-05, "loss": 0.0851, "step": 20906 }, { "epoch": 0.4606917979143598, "grad_norm": 0.7290301322937012, "learning_rate": 1.762475142041383e-05, "loss": 0.0831, "step": 20907 }, { "epoch": 0.460713833203876, "grad_norm": 0.3920864760875702, "learning_rate": 1.7623697395393517e-05, "loss": 0.0799, "step": 20908 }, { "epoch": 0.46073586849339215, "grad_norm": 0.8061102628707886, "learning_rate": 1.76226433570093e-05, "loss": 0.0439, "step": 20909 }, { "epoch": 0.4607579037829083, "grad_norm": 0.46920153498649597, "learning_rate": 1.7621589305266562e-05, "loss": 0.0643, "step": 20910 }, { "epoch": 0.4607799390724245, "grad_norm": 0.5436801910400391, "learning_rate": 1.7620535240170664e-05, "loss": 0.0717, "step": 20911 }, { "epoch": 0.46080197436194065, "grad_norm": 0.7806904315948486, "learning_rate": 1.761948116172697e-05, "loss": 0.0807, "step": 20912 }, { "epoch": 0.4608240096514568, "grad_norm": 0.661754310131073, "learning_rate": 1.7618427069940853e-05, "loss": 0.1069, "step": 20913 }, { "epoch": 0.460846044940973, "grad_norm": 0.9198547601699829, "learning_rate": 1.7617372964817688e-05, "loss": 0.0907, "step": 20914 }, { "epoch": 0.46086808023048914, "grad_norm": 0.8565139770507812, "learning_rate": 1.7616318846362834e-05, "loss": 0.088, "step": 20915 }, { "epoch": 0.4608901155200053, "grad_norm": 0.6503551602363586, "learning_rate": 1.761526471458167e-05, "loss": 0.08, "step": 20916 }, { "epoch": 0.46091215080952147, "grad_norm": 0.6114568114280701, "learning_rate": 1.7614210569479555e-05, "loss": 0.1081, "step": 20917 }, { "epoch": 0.46093418609903763, "grad_norm": 0.9127776026725769, "learning_rate": 1.761315641106187e-05, "loss": 0.0985, "step": 20918 }, { "epoch": 0.46095622138855374, "grad_norm": 0.7042858600616455, "learning_rate": 1.7612102239333973e-05, "loss": 0.108, "step": 20919 }, { "epoch": 0.4609782566780699, "grad_norm": 1.263724446296692, "learning_rate": 1.7611048054301245e-05, "loss": 0.0969, "step": 20920 }, { "epoch": 0.4610002919675861, "grad_norm": 0.6167650818824768, "learning_rate": 1.760999385596904e-05, "loss": 0.0747, "step": 20921 }, { "epoch": 0.46102232725710224, "grad_norm": 0.8144837021827698, "learning_rate": 1.760893964434274e-05, "loss": 0.11, "step": 20922 }, { "epoch": 0.4610443625466184, "grad_norm": 0.4233250916004181, "learning_rate": 1.7607885419427714e-05, "loss": 0.0944, "step": 20923 }, { "epoch": 0.46106639783613457, "grad_norm": 0.8073349595069885, "learning_rate": 1.7606831181229327e-05, "loss": 0.0911, "step": 20924 }, { "epoch": 0.46108843312565073, "grad_norm": 0.5693525075912476, "learning_rate": 1.760577692975295e-05, "loss": 0.1131, "step": 20925 }, { "epoch": 0.4611104684151669, "grad_norm": 0.5248075723648071, "learning_rate": 1.760472266500396e-05, "loss": 0.0655, "step": 20926 }, { "epoch": 0.46113250370468306, "grad_norm": 0.8475046753883362, "learning_rate": 1.760366838698771e-05, "loss": 0.0841, "step": 20927 }, { "epoch": 0.4611545389941992, "grad_norm": 0.760213315486908, "learning_rate": 1.760261409570959e-05, "loss": 0.0874, "step": 20928 }, { "epoch": 0.4611765742837154, "grad_norm": 0.8484673500061035, "learning_rate": 1.760155979117496e-05, "loss": 0.0739, "step": 20929 }, { "epoch": 0.46119860957323155, "grad_norm": 0.5167576670646667, "learning_rate": 1.7600505473389183e-05, "loss": 0.0831, "step": 20930 }, { "epoch": 0.46122064486274766, "grad_norm": 0.4824463129043579, "learning_rate": 1.7599451142357645e-05, "loss": 0.0952, "step": 20931 }, { "epoch": 0.46124268015226383, "grad_norm": 0.4134066700935364, "learning_rate": 1.75983967980857e-05, "loss": 0.0547, "step": 20932 }, { "epoch": 0.46126471544178, "grad_norm": 0.6496190428733826, "learning_rate": 1.7597342440578736e-05, "loss": 0.0857, "step": 20933 }, { "epoch": 0.46128675073129616, "grad_norm": 0.5767701268196106, "learning_rate": 1.7596288069842112e-05, "loss": 0.0897, "step": 20934 }, { "epoch": 0.4613087860208123, "grad_norm": 0.7869106531143188, "learning_rate": 1.75952336858812e-05, "loss": 0.0753, "step": 20935 }, { "epoch": 0.4613308213103285, "grad_norm": 0.5549393892288208, "learning_rate": 1.759417928870137e-05, "loss": 0.0829, "step": 20936 }, { "epoch": 0.46135285659984465, "grad_norm": 0.7036856412887573, "learning_rate": 1.7593124878307997e-05, "loss": 0.1142, "step": 20937 }, { "epoch": 0.4613748918893608, "grad_norm": 0.3541906177997589, "learning_rate": 1.7592070454706444e-05, "loss": 0.0947, "step": 20938 }, { "epoch": 0.461396927178877, "grad_norm": 0.7469185590744019, "learning_rate": 1.7591016017902087e-05, "loss": 0.0705, "step": 20939 }, { "epoch": 0.46141896246839315, "grad_norm": 0.8974850177764893, "learning_rate": 1.7589961567900296e-05, "loss": 0.0634, "step": 20940 }, { "epoch": 0.4614409977579093, "grad_norm": 0.8156068921089172, "learning_rate": 1.7588907104706445e-05, "loss": 0.0746, "step": 20941 }, { "epoch": 0.4614630330474255, "grad_norm": 0.3691776990890503, "learning_rate": 1.7587852628325896e-05, "loss": 0.0515, "step": 20942 }, { "epoch": 0.46148506833694164, "grad_norm": 0.5512831211090088, "learning_rate": 1.758679813876403e-05, "loss": 0.0766, "step": 20943 }, { "epoch": 0.46150710362645775, "grad_norm": 0.8644803762435913, "learning_rate": 1.7585743636026216e-05, "loss": 0.0673, "step": 20944 }, { "epoch": 0.4615291389159739, "grad_norm": 0.8636846542358398, "learning_rate": 1.758468912011782e-05, "loss": 0.1022, "step": 20945 }, { "epoch": 0.4615511742054901, "grad_norm": 1.0913546085357666, "learning_rate": 1.7583634591044214e-05, "loss": 0.0556, "step": 20946 }, { "epoch": 0.46157320949500624, "grad_norm": 0.6357080340385437, "learning_rate": 1.758258004881078e-05, "loss": 0.0669, "step": 20947 }, { "epoch": 0.4615952447845224, "grad_norm": 0.8446091413497925, "learning_rate": 1.758152549342287e-05, "loss": 0.0859, "step": 20948 }, { "epoch": 0.46161728007403857, "grad_norm": 0.4644794464111328, "learning_rate": 1.7580470924885877e-05, "loss": 0.0673, "step": 20949 }, { "epoch": 0.46163931536355474, "grad_norm": 0.7742396593093872, "learning_rate": 1.757941634320515e-05, "loss": 0.1034, "step": 20950 }, { "epoch": 0.4616613506530709, "grad_norm": 0.6982198357582092, "learning_rate": 1.7578361748386082e-05, "loss": 0.0741, "step": 20951 }, { "epoch": 0.46168338594258707, "grad_norm": 1.012609839439392, "learning_rate": 1.7577307140434032e-05, "loss": 0.0721, "step": 20952 }, { "epoch": 0.46170542123210323, "grad_norm": 0.7684047818183899, "learning_rate": 1.7576252519354374e-05, "loss": 0.1153, "step": 20953 }, { "epoch": 0.4617274565216194, "grad_norm": 1.0113883018493652, "learning_rate": 1.7575197885152482e-05, "loss": 0.1237, "step": 20954 }, { "epoch": 0.46174949181113556, "grad_norm": 0.8233517408370972, "learning_rate": 1.7574143237833725e-05, "loss": 0.0777, "step": 20955 }, { "epoch": 0.46177152710065167, "grad_norm": 0.9617699980735779, "learning_rate": 1.7573088577403475e-05, "loss": 0.0987, "step": 20956 }, { "epoch": 0.46179356239016783, "grad_norm": 0.6458677053451538, "learning_rate": 1.7572033903867104e-05, "loss": 0.0772, "step": 20957 }, { "epoch": 0.461815597679684, "grad_norm": 1.0444597005844116, "learning_rate": 1.757097921722999e-05, "loss": 0.0751, "step": 20958 }, { "epoch": 0.46183763296920016, "grad_norm": 0.7083505988121033, "learning_rate": 1.7569924517497498e-05, "loss": 0.117, "step": 20959 }, { "epoch": 0.4618596682587163, "grad_norm": 0.7909060716629028, "learning_rate": 1.7568869804675004e-05, "loss": 0.0946, "step": 20960 }, { "epoch": 0.4618817035482325, "grad_norm": 0.9429800510406494, "learning_rate": 1.7567815078767873e-05, "loss": 0.0922, "step": 20961 }, { "epoch": 0.46190373883774866, "grad_norm": 0.42184174060821533, "learning_rate": 1.756676033978149e-05, "loss": 0.0822, "step": 20962 }, { "epoch": 0.4619257741272648, "grad_norm": 0.8708839416503906, "learning_rate": 1.7565705587721214e-05, "loss": 0.0903, "step": 20963 }, { "epoch": 0.461947809416781, "grad_norm": 0.6923931241035461, "learning_rate": 1.7564650822592426e-05, "loss": 0.0944, "step": 20964 }, { "epoch": 0.46196984470629715, "grad_norm": 0.43561291694641113, "learning_rate": 1.7563596044400493e-05, "loss": 0.0724, "step": 20965 }, { "epoch": 0.4619918799958133, "grad_norm": 0.3682190775871277, "learning_rate": 1.7562541253150798e-05, "loss": 0.0722, "step": 20966 }, { "epoch": 0.4620139152853295, "grad_norm": 0.6520646214485168, "learning_rate": 1.7561486448848704e-05, "loss": 0.0591, "step": 20967 }, { "epoch": 0.4620359505748456, "grad_norm": 0.7371220588684082, "learning_rate": 1.7560431631499583e-05, "loss": 0.0614, "step": 20968 }, { "epoch": 0.46205798586436175, "grad_norm": 0.5355307459831238, "learning_rate": 1.7559376801108814e-05, "loss": 0.0671, "step": 20969 }, { "epoch": 0.4620800211538779, "grad_norm": 0.7235140800476074, "learning_rate": 1.7558321957681764e-05, "loss": 0.0791, "step": 20970 }, { "epoch": 0.4621020564433941, "grad_norm": 0.7788603901863098, "learning_rate": 1.7557267101223803e-05, "loss": 0.0899, "step": 20971 }, { "epoch": 0.46212409173291025, "grad_norm": 0.5860984921455383, "learning_rate": 1.7556212231740316e-05, "loss": 0.0878, "step": 20972 }, { "epoch": 0.4621461270224264, "grad_norm": 0.6032348275184631, "learning_rate": 1.7555157349236668e-05, "loss": 0.0929, "step": 20973 }, { "epoch": 0.4621681623119426, "grad_norm": 0.7728356719017029, "learning_rate": 1.755410245371823e-05, "loss": 0.0659, "step": 20974 }, { "epoch": 0.46219019760145874, "grad_norm": 0.7113247513771057, "learning_rate": 1.7553047545190386e-05, "loss": 0.094, "step": 20975 }, { "epoch": 0.4622122328909749, "grad_norm": 0.7624645233154297, "learning_rate": 1.7551992623658503e-05, "loss": 0.11, "step": 20976 }, { "epoch": 0.46223426818049107, "grad_norm": 0.4500616490840912, "learning_rate": 1.7550937689127947e-05, "loss": 0.0629, "step": 20977 }, { "epoch": 0.46225630347000723, "grad_norm": 0.3764078617095947, "learning_rate": 1.7549882741604095e-05, "loss": 0.0792, "step": 20978 }, { "epoch": 0.4622783387595234, "grad_norm": 0.8277013301849365, "learning_rate": 1.7548827781092328e-05, "loss": 0.0704, "step": 20979 }, { "epoch": 0.46230037404903956, "grad_norm": 0.5938454866409302, "learning_rate": 1.7547772807598012e-05, "loss": 0.1085, "step": 20980 }, { "epoch": 0.4623224093385557, "grad_norm": 0.37535908818244934, "learning_rate": 1.7546717821126525e-05, "loss": 0.0756, "step": 20981 }, { "epoch": 0.46234444462807184, "grad_norm": 0.7711655497550964, "learning_rate": 1.7545662821683236e-05, "loss": 0.073, "step": 20982 }, { "epoch": 0.462366479917588, "grad_norm": 0.5149252414703369, "learning_rate": 1.7544607809273523e-05, "loss": 0.0781, "step": 20983 }, { "epoch": 0.46238851520710417, "grad_norm": 0.4819685220718384, "learning_rate": 1.754355278390276e-05, "loss": 0.0898, "step": 20984 }, { "epoch": 0.46241055049662033, "grad_norm": 0.6176177859306335, "learning_rate": 1.7542497745576315e-05, "loss": 0.1103, "step": 20985 }, { "epoch": 0.4624325857861365, "grad_norm": 0.7551988363265991, "learning_rate": 1.7541442694299564e-05, "loss": 0.1, "step": 20986 }, { "epoch": 0.46245462107565266, "grad_norm": 0.9378745555877686, "learning_rate": 1.7540387630077886e-05, "loss": 0.1063, "step": 20987 }, { "epoch": 0.4624766563651688, "grad_norm": 0.5414949059486389, "learning_rate": 1.7539332552916647e-05, "loss": 0.0885, "step": 20988 }, { "epoch": 0.462498691654685, "grad_norm": 0.8863575458526611, "learning_rate": 1.7538277462821233e-05, "loss": 0.0907, "step": 20989 }, { "epoch": 0.46252072694420115, "grad_norm": 0.7159016728401184, "learning_rate": 1.7537222359797006e-05, "loss": 0.0906, "step": 20990 }, { "epoch": 0.4625427622337173, "grad_norm": 0.5068133473396301, "learning_rate": 1.7536167243849347e-05, "loss": 0.0672, "step": 20991 }, { "epoch": 0.4625647975232335, "grad_norm": 0.8238105177879333, "learning_rate": 1.753511211498363e-05, "loss": 0.0403, "step": 20992 }, { "epoch": 0.4625868328127496, "grad_norm": 0.3544410765171051, "learning_rate": 1.7534056973205225e-05, "loss": 0.0711, "step": 20993 }, { "epoch": 0.46260886810226576, "grad_norm": 0.6113430261611938, "learning_rate": 1.7533001818519507e-05, "loss": 0.0434, "step": 20994 }, { "epoch": 0.4626309033917819, "grad_norm": 0.7115038633346558, "learning_rate": 1.753194665093185e-05, "loss": 0.0809, "step": 20995 }, { "epoch": 0.4626529386812981, "grad_norm": 0.6523310542106628, "learning_rate": 1.7530891470447636e-05, "loss": 0.0763, "step": 20996 }, { "epoch": 0.46267497397081425, "grad_norm": 0.6586915254592896, "learning_rate": 1.7529836277072234e-05, "loss": 0.0838, "step": 20997 }, { "epoch": 0.4626970092603304, "grad_norm": 0.7753264904022217, "learning_rate": 1.7528781070811018e-05, "loss": 0.0558, "step": 20998 }, { "epoch": 0.4627190445498466, "grad_norm": 0.591895341873169, "learning_rate": 1.7527725851669365e-05, "loss": 0.0704, "step": 20999 }, { "epoch": 0.46274107983936275, "grad_norm": 0.630048394203186, "learning_rate": 1.7526670619652652e-05, "loss": 0.1213, "step": 21000 }, { "epoch": 0.4627631151288789, "grad_norm": 0.5804614424705505, "learning_rate": 1.7525615374766246e-05, "loss": 0.075, "step": 21001 }, { "epoch": 0.4627851504183951, "grad_norm": 0.3796023726463318, "learning_rate": 1.7524560117015527e-05, "loss": 0.1027, "step": 21002 }, { "epoch": 0.46280718570791124, "grad_norm": 0.47226467728614807, "learning_rate": 1.752350484640587e-05, "loss": 0.0638, "step": 21003 }, { "epoch": 0.4628292209974274, "grad_norm": 0.8060568571090698, "learning_rate": 1.7522449562942648e-05, "loss": 0.0947, "step": 21004 }, { "epoch": 0.4628512562869435, "grad_norm": 0.8732883334159851, "learning_rate": 1.752139426663124e-05, "loss": 0.0996, "step": 21005 }, { "epoch": 0.4628732915764597, "grad_norm": 0.8576896786689758, "learning_rate": 1.752033895747702e-05, "loss": 0.0688, "step": 21006 }, { "epoch": 0.46289532686597584, "grad_norm": 1.1078952550888062, "learning_rate": 1.751928363548536e-05, "loss": 0.0944, "step": 21007 }, { "epoch": 0.462917362155492, "grad_norm": 0.9734174013137817, "learning_rate": 1.751822830066164e-05, "loss": 0.0967, "step": 21008 }, { "epoch": 0.46293939744500817, "grad_norm": 0.9549413919448853, "learning_rate": 1.751717295301123e-05, "loss": 0.0635, "step": 21009 }, { "epoch": 0.46296143273452434, "grad_norm": 0.704740583896637, "learning_rate": 1.7516117592539513e-05, "loss": 0.0738, "step": 21010 }, { "epoch": 0.4629834680240405, "grad_norm": 0.8139148950576782, "learning_rate": 1.7515062219251853e-05, "loss": 0.0943, "step": 21011 }, { "epoch": 0.46300550331355667, "grad_norm": 0.5032416582107544, "learning_rate": 1.7514006833153637e-05, "loss": 0.079, "step": 21012 }, { "epoch": 0.46302753860307283, "grad_norm": 0.7449196577072144, "learning_rate": 1.751295143425023e-05, "loss": 0.1154, "step": 21013 }, { "epoch": 0.463049573892589, "grad_norm": 0.7591410875320435, "learning_rate": 1.7511896022547024e-05, "loss": 0.0903, "step": 21014 }, { "epoch": 0.46307160918210516, "grad_norm": 0.78851318359375, "learning_rate": 1.751084059804938e-05, "loss": 0.0799, "step": 21015 }, { "epoch": 0.4630936444716213, "grad_norm": 0.7301257252693176, "learning_rate": 1.7509785160762678e-05, "loss": 0.095, "step": 21016 }, { "epoch": 0.4631156797611375, "grad_norm": 0.41422176361083984, "learning_rate": 1.750872971069229e-05, "loss": 0.0887, "step": 21017 }, { "epoch": 0.4631377150506536, "grad_norm": 0.5506097674369812, "learning_rate": 1.7507674247843603e-05, "loss": 0.0978, "step": 21018 }, { "epoch": 0.46315975034016976, "grad_norm": 0.6494148969650269, "learning_rate": 1.7506618772221983e-05, "loss": 0.0952, "step": 21019 }, { "epoch": 0.4631817856296859, "grad_norm": 0.4125519394874573, "learning_rate": 1.7505563283832805e-05, "loss": 0.073, "step": 21020 }, { "epoch": 0.4632038209192021, "grad_norm": 0.466654896736145, "learning_rate": 1.7504507782681456e-05, "loss": 0.0546, "step": 21021 }, { "epoch": 0.46322585620871826, "grad_norm": 0.6390888690948486, "learning_rate": 1.7503452268773307e-05, "loss": 0.0887, "step": 21022 }, { "epoch": 0.4632478914982344, "grad_norm": 0.6696680784225464, "learning_rate": 1.7502396742113733e-05, "loss": 0.088, "step": 21023 }, { "epoch": 0.4632699267877506, "grad_norm": 0.937748372554779, "learning_rate": 1.7501341202708107e-05, "loss": 0.0674, "step": 21024 }, { "epoch": 0.46329196207726675, "grad_norm": 0.759630560874939, "learning_rate": 1.750028565056181e-05, "loss": 0.0767, "step": 21025 }, { "epoch": 0.4633139973667829, "grad_norm": 0.974180281162262, "learning_rate": 1.7499230085680214e-05, "loss": 0.1224, "step": 21026 }, { "epoch": 0.4633360326562991, "grad_norm": 0.7918250560760498, "learning_rate": 1.7498174508068705e-05, "loss": 0.0558, "step": 21027 }, { "epoch": 0.46335806794581524, "grad_norm": 0.6067824363708496, "learning_rate": 1.749711891773265e-05, "loss": 0.0801, "step": 21028 }, { "epoch": 0.4633801032353314, "grad_norm": 0.5352611541748047, "learning_rate": 1.7496063314677428e-05, "loss": 0.0898, "step": 21029 }, { "epoch": 0.4634021385248475, "grad_norm": 0.740767240524292, "learning_rate": 1.749500769890842e-05, "loss": 0.1216, "step": 21030 }, { "epoch": 0.4634241738143637, "grad_norm": 1.124330997467041, "learning_rate": 1.7493952070431e-05, "loss": 0.115, "step": 21031 }, { "epoch": 0.46344620910387985, "grad_norm": 0.6350289583206177, "learning_rate": 1.7492896429250547e-05, "loss": 0.0596, "step": 21032 }, { "epoch": 0.463468244393396, "grad_norm": 1.0461419820785522, "learning_rate": 1.7491840775372432e-05, "loss": 0.0645, "step": 21033 }, { "epoch": 0.4634902796829122, "grad_norm": 1.5097516775131226, "learning_rate": 1.7490785108802034e-05, "loss": 0.0791, "step": 21034 }, { "epoch": 0.46351231497242834, "grad_norm": 0.8439461588859558, "learning_rate": 1.7489729429544735e-05, "loss": 0.0628, "step": 21035 }, { "epoch": 0.4635343502619445, "grad_norm": 0.5909620523452759, "learning_rate": 1.748867373760591e-05, "loss": 0.0555, "step": 21036 }, { "epoch": 0.46355638555146067, "grad_norm": 0.57671719789505, "learning_rate": 1.748761803299093e-05, "loss": 0.0672, "step": 21037 }, { "epoch": 0.46357842084097683, "grad_norm": 0.6301868557929993, "learning_rate": 1.748656231570518e-05, "loss": 0.0457, "step": 21038 }, { "epoch": 0.463600456130493, "grad_norm": 0.7199620604515076, "learning_rate": 1.7485506585754034e-05, "loss": 0.0811, "step": 21039 }, { "epoch": 0.46362249142000916, "grad_norm": 0.5076056122779846, "learning_rate": 1.7484450843142876e-05, "loss": 0.0799, "step": 21040 }, { "epoch": 0.46364452670952533, "grad_norm": 1.037041425704956, "learning_rate": 1.7483395087877072e-05, "loss": 0.1281, "step": 21041 }, { "epoch": 0.46366656199904144, "grad_norm": 0.9028689861297607, "learning_rate": 1.7482339319962004e-05, "loss": 0.086, "step": 21042 }, { "epoch": 0.4636885972885576, "grad_norm": 0.6133513450622559, "learning_rate": 1.7481283539403054e-05, "loss": 0.0811, "step": 21043 }, { "epoch": 0.46371063257807377, "grad_norm": 0.5989586114883423, "learning_rate": 1.7480227746205594e-05, "loss": 0.0867, "step": 21044 }, { "epoch": 0.46373266786758993, "grad_norm": 0.5050894618034363, "learning_rate": 1.7479171940375007e-05, "loss": 0.0677, "step": 21045 }, { "epoch": 0.4637547031571061, "grad_norm": 0.727604329586029, "learning_rate": 1.7478116121916662e-05, "loss": 0.0879, "step": 21046 }, { "epoch": 0.46377673844662226, "grad_norm": 0.5777870416641235, "learning_rate": 1.7477060290835952e-05, "loss": 0.051, "step": 21047 }, { "epoch": 0.4637987737361384, "grad_norm": 0.5289290547370911, "learning_rate": 1.7476004447138238e-05, "loss": 0.1115, "step": 21048 }, { "epoch": 0.4638208090256546, "grad_norm": 0.41525816917419434, "learning_rate": 1.747494859082891e-05, "loss": 0.0919, "step": 21049 }, { "epoch": 0.46384284431517075, "grad_norm": 0.5028367042541504, "learning_rate": 1.747389272191334e-05, "loss": 0.063, "step": 21050 }, { "epoch": 0.4638648796046869, "grad_norm": 0.5512672066688538, "learning_rate": 1.7472836840396907e-05, "loss": 0.1082, "step": 21051 }, { "epoch": 0.4638869148942031, "grad_norm": 0.8806699514389038, "learning_rate": 1.747178094628499e-05, "loss": 0.1109, "step": 21052 }, { "epoch": 0.46390895018371925, "grad_norm": 1.0043243169784546, "learning_rate": 1.7470725039582968e-05, "loss": 0.0851, "step": 21053 }, { "epoch": 0.4639309854732354, "grad_norm": 0.7931501865386963, "learning_rate": 1.7469669120296217e-05, "loss": 0.1109, "step": 21054 }, { "epoch": 0.4639530207627515, "grad_norm": 0.49002498388290405, "learning_rate": 1.746861318843012e-05, "loss": 0.0733, "step": 21055 }, { "epoch": 0.4639750560522677, "grad_norm": 0.6747048497200012, "learning_rate": 1.746755724399005e-05, "loss": 0.0646, "step": 21056 }, { "epoch": 0.46399709134178385, "grad_norm": 1.2526538372039795, "learning_rate": 1.7466501286981385e-05, "loss": 0.0755, "step": 21057 }, { "epoch": 0.4640191266313, "grad_norm": 0.7406550049781799, "learning_rate": 1.7465445317409514e-05, "loss": 0.0881, "step": 21058 }, { "epoch": 0.4640411619208162, "grad_norm": 0.7590042948722839, "learning_rate": 1.74643893352798e-05, "loss": 0.0904, "step": 21059 }, { "epoch": 0.46406319721033235, "grad_norm": 0.596724808216095, "learning_rate": 1.7463333340597633e-05, "loss": 0.0581, "step": 21060 }, { "epoch": 0.4640852324998485, "grad_norm": 0.7875155806541443, "learning_rate": 1.7462277333368388e-05, "loss": 0.1095, "step": 21061 }, { "epoch": 0.4641072677893647, "grad_norm": 0.5665094256401062, "learning_rate": 1.7461221313597444e-05, "loss": 0.0842, "step": 21062 }, { "epoch": 0.46412930307888084, "grad_norm": 0.5045598745346069, "learning_rate": 1.746016528129018e-05, "loss": 0.0674, "step": 21063 }, { "epoch": 0.464151338368397, "grad_norm": 0.8663880825042725, "learning_rate": 1.7459109236451973e-05, "loss": 0.0777, "step": 21064 }, { "epoch": 0.46417337365791317, "grad_norm": 0.5133585333824158, "learning_rate": 1.745805317908821e-05, "loss": 0.0549, "step": 21065 }, { "epoch": 0.46419540894742933, "grad_norm": 0.5876259803771973, "learning_rate": 1.7456997109204256e-05, "loss": 0.0555, "step": 21066 }, { "epoch": 0.46421744423694544, "grad_norm": 0.6332186460494995, "learning_rate": 1.7455941026805505e-05, "loss": 0.0748, "step": 21067 }, { "epoch": 0.4642394795264616, "grad_norm": 0.5146456956863403, "learning_rate": 1.7454884931897325e-05, "loss": 0.0478, "step": 21068 }, { "epoch": 0.46426151481597777, "grad_norm": 0.750738799571991, "learning_rate": 1.7453828824485102e-05, "loss": 0.0823, "step": 21069 }, { "epoch": 0.46428355010549394, "grad_norm": 0.849138617515564, "learning_rate": 1.7452772704574216e-05, "loss": 0.0727, "step": 21070 }, { "epoch": 0.4643055853950101, "grad_norm": 0.45332255959510803, "learning_rate": 1.745171657217004e-05, "loss": 0.0718, "step": 21071 }, { "epoch": 0.46432762068452627, "grad_norm": 0.6435077786445618, "learning_rate": 1.7450660427277953e-05, "loss": 0.0776, "step": 21072 }, { "epoch": 0.46434965597404243, "grad_norm": 0.8195372819900513, "learning_rate": 1.7449604269903346e-05, "loss": 0.0712, "step": 21073 }, { "epoch": 0.4643716912635586, "grad_norm": 0.8505797982215881, "learning_rate": 1.744854810005158e-05, "loss": 0.0925, "step": 21074 }, { "epoch": 0.46439372655307476, "grad_norm": 0.5824430584907532, "learning_rate": 1.7447491917728056e-05, "loss": 0.0849, "step": 21075 }, { "epoch": 0.4644157618425909, "grad_norm": 0.8617141842842102, "learning_rate": 1.7446435722938137e-05, "loss": 0.0811, "step": 21076 }, { "epoch": 0.4644377971321071, "grad_norm": 0.3878397047519684, "learning_rate": 1.7445379515687214e-05, "loss": 0.048, "step": 21077 }, { "epoch": 0.46445983242162325, "grad_norm": 0.5120579600334167, "learning_rate": 1.7444323295980662e-05, "loss": 0.0626, "step": 21078 }, { "epoch": 0.46448186771113936, "grad_norm": 0.7880133390426636, "learning_rate": 1.7443267063823857e-05, "loss": 0.0838, "step": 21079 }, { "epoch": 0.4645039030006555, "grad_norm": 0.955966055393219, "learning_rate": 1.744221081922218e-05, "loss": 0.0797, "step": 21080 }, { "epoch": 0.4645259382901717, "grad_norm": 0.3697828948497772, "learning_rate": 1.7441154562181018e-05, "loss": 0.0688, "step": 21081 }, { "epoch": 0.46454797357968786, "grad_norm": 0.4507928192615509, "learning_rate": 1.7440098292705747e-05, "loss": 0.0559, "step": 21082 }, { "epoch": 0.464570008869204, "grad_norm": 0.8278132081031799, "learning_rate": 1.7439042010801745e-05, "loss": 0.1157, "step": 21083 }, { "epoch": 0.4645920441587202, "grad_norm": 0.7324445247650146, "learning_rate": 1.7437985716474396e-05, "loss": 0.0835, "step": 21084 }, { "epoch": 0.46461407944823635, "grad_norm": 0.3519720733165741, "learning_rate": 1.7436929409729073e-05, "loss": 0.0683, "step": 21085 }, { "epoch": 0.4646361147377525, "grad_norm": 0.7582288384437561, "learning_rate": 1.743587309057117e-05, "loss": 0.0744, "step": 21086 }, { "epoch": 0.4646581500272687, "grad_norm": 0.6058526039123535, "learning_rate": 1.7434816759006053e-05, "loss": 0.0767, "step": 21087 }, { "epoch": 0.46468018531678484, "grad_norm": 0.6639015674591064, "learning_rate": 1.7433760415039113e-05, "loss": 0.0677, "step": 21088 }, { "epoch": 0.464702220606301, "grad_norm": 0.3855421841144562, "learning_rate": 1.743270405867572e-05, "loss": 0.0514, "step": 21089 }, { "epoch": 0.4647242558958172, "grad_norm": 0.6530061364173889, "learning_rate": 1.7431647689921268e-05, "loss": 0.0421, "step": 21090 }, { "epoch": 0.46474629118533334, "grad_norm": 0.6882175207138062, "learning_rate": 1.7430591308781125e-05, "loss": 0.0709, "step": 21091 }, { "epoch": 0.46476832647484945, "grad_norm": 0.7253458499908447, "learning_rate": 1.742953491526068e-05, "loss": 0.0804, "step": 21092 }, { "epoch": 0.4647903617643656, "grad_norm": 0.537460207939148, "learning_rate": 1.742847850936531e-05, "loss": 0.0675, "step": 21093 }, { "epoch": 0.4648123970538818, "grad_norm": 0.8426030278205872, "learning_rate": 1.7427422091100398e-05, "loss": 0.0901, "step": 21094 }, { "epoch": 0.46483443234339794, "grad_norm": 0.7058531641960144, "learning_rate": 1.7426365660471322e-05, "loss": 0.0923, "step": 21095 }, { "epoch": 0.4648564676329141, "grad_norm": 0.6772050261497498, "learning_rate": 1.7425309217483467e-05, "loss": 0.1009, "step": 21096 }, { "epoch": 0.46487850292243027, "grad_norm": 0.49442049860954285, "learning_rate": 1.742425276214221e-05, "loss": 0.0779, "step": 21097 }, { "epoch": 0.46490053821194643, "grad_norm": 0.4669567048549652, "learning_rate": 1.7423196294452934e-05, "loss": 0.0902, "step": 21098 }, { "epoch": 0.4649225735014626, "grad_norm": 0.5613266825675964, "learning_rate": 1.7422139814421014e-05, "loss": 0.0698, "step": 21099 }, { "epoch": 0.46494460879097876, "grad_norm": 0.5572763085365295, "learning_rate": 1.7421083322051845e-05, "loss": 0.0776, "step": 21100 }, { "epoch": 0.46496664408049493, "grad_norm": 0.6492993831634521, "learning_rate": 1.7420026817350792e-05, "loss": 0.0554, "step": 21101 }, { "epoch": 0.4649886793700111, "grad_norm": 0.49954405426979065, "learning_rate": 1.7418970300323255e-05, "loss": 0.065, "step": 21102 }, { "epoch": 0.46501071465952726, "grad_norm": 0.6414949297904968, "learning_rate": 1.74179137709746e-05, "loss": 0.0924, "step": 21103 }, { "epoch": 0.46503274994904337, "grad_norm": 0.4948025345802307, "learning_rate": 1.7416857229310218e-05, "loss": 0.0833, "step": 21104 }, { "epoch": 0.46505478523855953, "grad_norm": 0.3518473207950592, "learning_rate": 1.741580067533548e-05, "loss": 0.0723, "step": 21105 }, { "epoch": 0.4650768205280757, "grad_norm": 0.8035306334495544, "learning_rate": 1.7414744109055778e-05, "loss": 0.1175, "step": 21106 }, { "epoch": 0.46509885581759186, "grad_norm": 0.7003640532493591, "learning_rate": 1.741368753047648e-05, "loss": 0.0741, "step": 21107 }, { "epoch": 0.465120891107108, "grad_norm": 0.5344144105911255, "learning_rate": 1.7412630939602985e-05, "loss": 0.0825, "step": 21108 }, { "epoch": 0.4651429263966242, "grad_norm": 0.7495095133781433, "learning_rate": 1.7411574336440667e-05, "loss": 0.086, "step": 21109 }, { "epoch": 0.46516496168614035, "grad_norm": 0.6468122005462646, "learning_rate": 1.741051772099491e-05, "loss": 0.078, "step": 21110 }, { "epoch": 0.4651869969756565, "grad_norm": 0.6782209277153015, "learning_rate": 1.740946109327109e-05, "loss": 0.0804, "step": 21111 }, { "epoch": 0.4652090322651727, "grad_norm": 0.952068567276001, "learning_rate": 1.740840445327459e-05, "loss": 0.0853, "step": 21112 }, { "epoch": 0.46523106755468885, "grad_norm": 0.6770637631416321, "learning_rate": 1.74073478010108e-05, "loss": 0.0969, "step": 21113 }, { "epoch": 0.465253102844205, "grad_norm": 0.7001745104789734, "learning_rate": 1.740629113648509e-05, "loss": 0.0798, "step": 21114 }, { "epoch": 0.4652751381337212, "grad_norm": 0.6288313269615173, "learning_rate": 1.7405234459702853e-05, "loss": 0.0721, "step": 21115 }, { "epoch": 0.4652971734232373, "grad_norm": 0.5643140077590942, "learning_rate": 1.7404177770669467e-05, "loss": 0.0886, "step": 21116 }, { "epoch": 0.46531920871275345, "grad_norm": 1.1555265188217163, "learning_rate": 1.7403121069390318e-05, "loss": 0.0976, "step": 21117 }, { "epoch": 0.4653412440022696, "grad_norm": 0.503402829170227, "learning_rate": 1.740206435587078e-05, "loss": 0.0579, "step": 21118 }, { "epoch": 0.4653632792917858, "grad_norm": 0.6773263216018677, "learning_rate": 1.7401007630116243e-05, "loss": 0.0895, "step": 21119 }, { "epoch": 0.46538531458130195, "grad_norm": 0.40921488404273987, "learning_rate": 1.7399950892132084e-05, "loss": 0.053, "step": 21120 }, { "epoch": 0.4654073498708181, "grad_norm": 0.7824615836143494, "learning_rate": 1.739889414192369e-05, "loss": 0.0801, "step": 21121 }, { "epoch": 0.4654293851603343, "grad_norm": 0.5779725909233093, "learning_rate": 1.7397837379496438e-05, "loss": 0.0631, "step": 21122 }, { "epoch": 0.46545142044985044, "grad_norm": 0.5960108637809753, "learning_rate": 1.7396780604855722e-05, "loss": 0.081, "step": 21123 }, { "epoch": 0.4654734557393666, "grad_norm": 0.8061549067497253, "learning_rate": 1.739572381800691e-05, "loss": 0.0879, "step": 21124 }, { "epoch": 0.46549549102888277, "grad_norm": 0.7269067764282227, "learning_rate": 1.7394667018955396e-05, "loss": 0.0905, "step": 21125 }, { "epoch": 0.46551752631839893, "grad_norm": 0.6039006114006042, "learning_rate": 1.7393610207706556e-05, "loss": 0.0947, "step": 21126 }, { "epoch": 0.4655395616079151, "grad_norm": 0.9717870354652405, "learning_rate": 1.739255338426578e-05, "loss": 0.1102, "step": 21127 }, { "epoch": 0.46556159689743126, "grad_norm": 0.4157796800136566, "learning_rate": 1.7391496548638443e-05, "loss": 0.1116, "step": 21128 }, { "epoch": 0.46558363218694737, "grad_norm": 0.5416911840438843, "learning_rate": 1.7390439700829933e-05, "loss": 0.0898, "step": 21129 }, { "epoch": 0.46560566747646354, "grad_norm": 0.14973169565200806, "learning_rate": 1.738938284084563e-05, "loss": 0.0608, "step": 21130 }, { "epoch": 0.4656277027659797, "grad_norm": 3.1323585510253906, "learning_rate": 1.7388325968690917e-05, "loss": 0.1279, "step": 21131 }, { "epoch": 0.46564973805549587, "grad_norm": 1.449053168296814, "learning_rate": 1.7387269084371184e-05, "loss": 0.0715, "step": 21132 }, { "epoch": 0.46567177334501203, "grad_norm": 0.9154915809631348, "learning_rate": 1.738621218789181e-05, "loss": 0.0943, "step": 21133 }, { "epoch": 0.4656938086345282, "grad_norm": 0.5645501613616943, "learning_rate": 1.738515527925817e-05, "loss": 0.0909, "step": 21134 }, { "epoch": 0.46571584392404436, "grad_norm": 1.458918809890747, "learning_rate": 1.7384098358475663e-05, "loss": 0.092, "step": 21135 }, { "epoch": 0.4657378792135605, "grad_norm": 0.42795729637145996, "learning_rate": 1.7383041425549663e-05, "loss": 0.0584, "step": 21136 }, { "epoch": 0.4657599145030767, "grad_norm": 0.7469528317451477, "learning_rate": 1.738198448048555e-05, "loss": 0.1203, "step": 21137 }, { "epoch": 0.46578194979259285, "grad_norm": 0.5663614273071289, "learning_rate": 1.738092752328872e-05, "loss": 0.0782, "step": 21138 }, { "epoch": 0.465803985082109, "grad_norm": 0.811976969242096, "learning_rate": 1.7379870553964546e-05, "loss": 0.0746, "step": 21139 }, { "epoch": 0.4658260203716252, "grad_norm": 0.39525681734085083, "learning_rate": 1.737881357251841e-05, "loss": 0.0904, "step": 21140 }, { "epoch": 0.4658480556611413, "grad_norm": 0.7622613906860352, "learning_rate": 1.7377756578955708e-05, "loss": 0.0805, "step": 21141 }, { "epoch": 0.46587009095065746, "grad_norm": 0.7572849988937378, "learning_rate": 1.7376699573281816e-05, "loss": 0.0729, "step": 21142 }, { "epoch": 0.4658921262401736, "grad_norm": 0.5026209950447083, "learning_rate": 1.7375642555502118e-05, "loss": 0.0853, "step": 21143 }, { "epoch": 0.4659141615296898, "grad_norm": 1.4562795162200928, "learning_rate": 1.7374585525622e-05, "loss": 0.1024, "step": 21144 }, { "epoch": 0.46593619681920595, "grad_norm": 1.1299582719802856, "learning_rate": 1.737352848364684e-05, "loss": 0.0848, "step": 21145 }, { "epoch": 0.4659582321087221, "grad_norm": 0.6233546733856201, "learning_rate": 1.737247142958203e-05, "loss": 0.1248, "step": 21146 }, { "epoch": 0.4659802673982383, "grad_norm": 0.7083666920661926, "learning_rate": 1.7371414363432946e-05, "loss": 0.0738, "step": 21147 }, { "epoch": 0.46600230268775444, "grad_norm": 0.5000813603401184, "learning_rate": 1.7370357285204983e-05, "loss": 0.0775, "step": 21148 }, { "epoch": 0.4660243379772706, "grad_norm": 0.7303935885429382, "learning_rate": 1.7369300194903514e-05, "loss": 0.0886, "step": 21149 }, { "epoch": 0.4660463732667868, "grad_norm": 0.8617031574249268, "learning_rate": 1.7368243092533937e-05, "loss": 0.0823, "step": 21150 }, { "epoch": 0.46606840855630294, "grad_norm": 0.45036211609840393, "learning_rate": 1.736718597810162e-05, "loss": 0.0743, "step": 21151 }, { "epoch": 0.4660904438458191, "grad_norm": 0.627091646194458, "learning_rate": 1.736612885161196e-05, "loss": 0.0692, "step": 21152 }, { "epoch": 0.46611247913533527, "grad_norm": 0.617477297782898, "learning_rate": 1.7365071713070334e-05, "loss": 0.0959, "step": 21153 }, { "epoch": 0.4661345144248514, "grad_norm": 0.7575014233589172, "learning_rate": 1.736401456248213e-05, "loss": 0.0991, "step": 21154 }, { "epoch": 0.46615654971436754, "grad_norm": 0.7659196853637695, "learning_rate": 1.7362957399852733e-05, "loss": 0.0731, "step": 21155 }, { "epoch": 0.4661785850038837, "grad_norm": 1.064983606338501, "learning_rate": 1.7361900225187525e-05, "loss": 0.0992, "step": 21156 }, { "epoch": 0.46620062029339987, "grad_norm": 0.9717488288879395, "learning_rate": 1.7360843038491897e-05, "loss": 0.0922, "step": 21157 }, { "epoch": 0.46622265558291603, "grad_norm": 0.7708578705787659, "learning_rate": 1.7359785839771227e-05, "loss": 0.1091, "step": 21158 }, { "epoch": 0.4662446908724322, "grad_norm": 0.7203068137168884, "learning_rate": 1.7358728629030902e-05, "loss": 0.0944, "step": 21159 }, { "epoch": 0.46626672616194836, "grad_norm": 0.5515111088752747, "learning_rate": 1.7357671406276303e-05, "loss": 0.0744, "step": 21160 }, { "epoch": 0.46628876145146453, "grad_norm": 0.7686465978622437, "learning_rate": 1.735661417151283e-05, "loss": 0.0788, "step": 21161 }, { "epoch": 0.4663107967409807, "grad_norm": 0.7099869847297668, "learning_rate": 1.7355556924745845e-05, "loss": 0.0729, "step": 21162 }, { "epoch": 0.46633283203049686, "grad_norm": 0.6513515114784241, "learning_rate": 1.735449966598075e-05, "loss": 0.0736, "step": 21163 }, { "epoch": 0.466354867320013, "grad_norm": 0.5996073484420776, "learning_rate": 1.7353442395222928e-05, "loss": 0.0937, "step": 21164 }, { "epoch": 0.4663769026095292, "grad_norm": 0.5335054397583008, "learning_rate": 1.735238511247776e-05, "loss": 0.0495, "step": 21165 }, { "epoch": 0.4663989378990453, "grad_norm": 0.9177796244621277, "learning_rate": 1.7351327817750637e-05, "loss": 0.0645, "step": 21166 }, { "epoch": 0.46642097318856146, "grad_norm": 0.8742750287055969, "learning_rate": 1.7350270511046937e-05, "loss": 0.0841, "step": 21167 }, { "epoch": 0.4664430084780776, "grad_norm": 0.6123591065406799, "learning_rate": 1.7349213192372047e-05, "loss": 0.0742, "step": 21168 }, { "epoch": 0.4664650437675938, "grad_norm": 1.0702900886535645, "learning_rate": 1.734815586173136e-05, "loss": 0.0799, "step": 21169 }, { "epoch": 0.46648707905710995, "grad_norm": 0.7251120805740356, "learning_rate": 1.734709851913025e-05, "loss": 0.093, "step": 21170 }, { "epoch": 0.4665091143466261, "grad_norm": 0.8272598385810852, "learning_rate": 1.7346041164574112e-05, "loss": 0.0828, "step": 21171 }, { "epoch": 0.4665311496361423, "grad_norm": 0.7111504077911377, "learning_rate": 1.7344983798068325e-05, "loss": 0.0872, "step": 21172 }, { "epoch": 0.46655318492565845, "grad_norm": 0.5473603010177612, "learning_rate": 1.7343926419618282e-05, "loss": 0.0916, "step": 21173 }, { "epoch": 0.4665752202151746, "grad_norm": 0.3468349575996399, "learning_rate": 1.7342869029229367e-05, "loss": 0.0539, "step": 21174 }, { "epoch": 0.4665972555046908, "grad_norm": 0.5842129588127136, "learning_rate": 1.7341811626906957e-05, "loss": 0.0396, "step": 21175 }, { "epoch": 0.46661929079420694, "grad_norm": 1.143644094467163, "learning_rate": 1.7340754212656447e-05, "loss": 0.0699, "step": 21176 }, { "epoch": 0.4666413260837231, "grad_norm": 0.8647234439849854, "learning_rate": 1.7339696786483222e-05, "loss": 0.0854, "step": 21177 }, { "epoch": 0.4666633613732392, "grad_norm": 0.5267392992973328, "learning_rate": 1.7338639348392668e-05, "loss": 0.0452, "step": 21178 }, { "epoch": 0.4666853966627554, "grad_norm": 0.3907906115055084, "learning_rate": 1.7337581898390164e-05, "loss": 0.0615, "step": 21179 }, { "epoch": 0.46670743195227155, "grad_norm": 0.671680748462677, "learning_rate": 1.7336524436481107e-05, "loss": 0.0996, "step": 21180 }, { "epoch": 0.4667294672417877, "grad_norm": 0.7367717623710632, "learning_rate": 1.733546696267088e-05, "loss": 0.1376, "step": 21181 }, { "epoch": 0.4667515025313039, "grad_norm": 0.5768091678619385, "learning_rate": 1.733440947696486e-05, "loss": 0.0702, "step": 21182 }, { "epoch": 0.46677353782082004, "grad_norm": 0.9053099751472473, "learning_rate": 1.7333351979368444e-05, "loss": 0.0789, "step": 21183 }, { "epoch": 0.4667955731103362, "grad_norm": 0.5776754021644592, "learning_rate": 1.7332294469887018e-05, "loss": 0.0563, "step": 21184 }, { "epoch": 0.46681760839985237, "grad_norm": 0.6533389687538147, "learning_rate": 1.7331236948525963e-05, "loss": 0.0645, "step": 21185 }, { "epoch": 0.46683964368936853, "grad_norm": 0.5327346920967102, "learning_rate": 1.7330179415290667e-05, "loss": 0.0607, "step": 21186 }, { "epoch": 0.4668616789788847, "grad_norm": 0.8935671448707581, "learning_rate": 1.732912187018652e-05, "loss": 0.0629, "step": 21187 }, { "epoch": 0.46688371426840086, "grad_norm": 0.5865496397018433, "learning_rate": 1.732806431321891e-05, "loss": 0.0788, "step": 21188 }, { "epoch": 0.466905749557917, "grad_norm": 0.609463632106781, "learning_rate": 1.7327006744393213e-05, "loss": 0.086, "step": 21189 }, { "epoch": 0.4669277848474332, "grad_norm": 0.3485634922981262, "learning_rate": 1.732594916371483e-05, "loss": 0.0867, "step": 21190 }, { "epoch": 0.4669498201369493, "grad_norm": 0.8889688849449158, "learning_rate": 1.7324891571189132e-05, "loss": 0.0752, "step": 21191 }, { "epoch": 0.46697185542646547, "grad_norm": 0.4722980856895447, "learning_rate": 1.7323833966821518e-05, "loss": 0.0757, "step": 21192 }, { "epoch": 0.46699389071598163, "grad_norm": 0.7276052832603455, "learning_rate": 1.732277635061737e-05, "loss": 0.0837, "step": 21193 }, { "epoch": 0.4670159260054978, "grad_norm": 0.5726567506790161, "learning_rate": 1.732171872258208e-05, "loss": 0.0882, "step": 21194 }, { "epoch": 0.46703796129501396, "grad_norm": 0.5044587850570679, "learning_rate": 1.732066108272103e-05, "loss": 0.0631, "step": 21195 }, { "epoch": 0.4670599965845301, "grad_norm": 0.7452778220176697, "learning_rate": 1.731960343103961e-05, "loss": 0.0755, "step": 21196 }, { "epoch": 0.4670820318740463, "grad_norm": 0.45103350281715393, "learning_rate": 1.7318545767543205e-05, "loss": 0.0894, "step": 21197 }, { "epoch": 0.46710406716356245, "grad_norm": 0.5199083685874939, "learning_rate": 1.7317488092237204e-05, "loss": 0.0876, "step": 21198 }, { "epoch": 0.4671261024530786, "grad_norm": 0.3085377514362335, "learning_rate": 1.731643040512699e-05, "loss": 0.092, "step": 21199 }, { "epoch": 0.4671481377425948, "grad_norm": 0.6811707615852356, "learning_rate": 1.7315372706217955e-05, "loss": 0.0541, "step": 21200 }, { "epoch": 0.46717017303211095, "grad_norm": 1.0083132982254028, "learning_rate": 1.731431499551549e-05, "loss": 0.1067, "step": 21201 }, { "epoch": 0.4671922083216271, "grad_norm": 0.2768044173717499, "learning_rate": 1.7313257273024968e-05, "loss": 0.0715, "step": 21202 }, { "epoch": 0.4672142436111432, "grad_norm": 1.0039433240890503, "learning_rate": 1.7312199538751793e-05, "loss": 0.1044, "step": 21203 }, { "epoch": 0.4672362789006594, "grad_norm": 0.7673051357269287, "learning_rate": 1.7311141792701347e-05, "loss": 0.0707, "step": 21204 }, { "epoch": 0.46725831419017555, "grad_norm": 0.9512272477149963, "learning_rate": 1.7310084034879016e-05, "loss": 0.0665, "step": 21205 }, { "epoch": 0.4672803494796917, "grad_norm": 2.36923885345459, "learning_rate": 1.7309026265290185e-05, "loss": 0.1007, "step": 21206 }, { "epoch": 0.4673023847692079, "grad_norm": 0.7907613515853882, "learning_rate": 1.730796848394025e-05, "loss": 0.0865, "step": 21207 }, { "epoch": 0.46732442005872404, "grad_norm": 0.9901692867279053, "learning_rate": 1.730691069083459e-05, "loss": 0.1291, "step": 21208 }, { "epoch": 0.4673464553482402, "grad_norm": 0.5704020857810974, "learning_rate": 1.73058528859786e-05, "loss": 0.0528, "step": 21209 }, { "epoch": 0.4673684906377564, "grad_norm": 1.031843662261963, "learning_rate": 1.7304795069377657e-05, "loss": 0.1379, "step": 21210 }, { "epoch": 0.46739052592727254, "grad_norm": 0.7727524042129517, "learning_rate": 1.7303737241037163e-05, "loss": 0.0873, "step": 21211 }, { "epoch": 0.4674125612167887, "grad_norm": 0.4672892987728119, "learning_rate": 1.7302679400962496e-05, "loss": 0.0674, "step": 21212 }, { "epoch": 0.46743459650630487, "grad_norm": 0.6495088338851929, "learning_rate": 1.7301621549159054e-05, "loss": 0.0506, "step": 21213 }, { "epoch": 0.46745663179582103, "grad_norm": 0.5343289971351624, "learning_rate": 1.730056368563222e-05, "loss": 0.0757, "step": 21214 }, { "epoch": 0.46747866708533714, "grad_norm": 0.6653407216072083, "learning_rate": 1.7299505810387374e-05, "loss": 0.0938, "step": 21215 }, { "epoch": 0.4675007023748533, "grad_norm": 0.6190937757492065, "learning_rate": 1.7298447923429914e-05, "loss": 0.0757, "step": 21216 }, { "epoch": 0.46752273766436947, "grad_norm": 0.7619786858558655, "learning_rate": 1.729739002476523e-05, "loss": 0.0691, "step": 21217 }, { "epoch": 0.46754477295388563, "grad_norm": 0.5225757956504822, "learning_rate": 1.7296332114398704e-05, "loss": 0.0998, "step": 21218 }, { "epoch": 0.4675668082434018, "grad_norm": 0.4774675667285919, "learning_rate": 1.7295274192335723e-05, "loss": 0.0746, "step": 21219 }, { "epoch": 0.46758884353291796, "grad_norm": 0.512006938457489, "learning_rate": 1.7294216258581688e-05, "loss": 0.1034, "step": 21220 }, { "epoch": 0.46761087882243413, "grad_norm": 0.7548922896385193, "learning_rate": 1.7293158313141973e-05, "loss": 0.0806, "step": 21221 }, { "epoch": 0.4676329141119503, "grad_norm": 0.879082977771759, "learning_rate": 1.7292100356021978e-05, "loss": 0.0892, "step": 21222 }, { "epoch": 0.46765494940146646, "grad_norm": 0.6097425222396851, "learning_rate": 1.7291042387227084e-05, "loss": 0.0582, "step": 21223 }, { "epoch": 0.4676769846909826, "grad_norm": 0.8538530468940735, "learning_rate": 1.7289984406762682e-05, "loss": 0.0691, "step": 21224 }, { "epoch": 0.4676990199804988, "grad_norm": 0.905109167098999, "learning_rate": 1.7288926414634162e-05, "loss": 0.087, "step": 21225 }, { "epoch": 0.46772105527001495, "grad_norm": 0.4855910837650299, "learning_rate": 1.7287868410846914e-05, "loss": 0.0773, "step": 21226 }, { "epoch": 0.4677430905595311, "grad_norm": 0.8393347859382629, "learning_rate": 1.728681039540632e-05, "loss": 0.077, "step": 21227 }, { "epoch": 0.4677651258490472, "grad_norm": 0.8475931882858276, "learning_rate": 1.728575236831778e-05, "loss": 0.0855, "step": 21228 }, { "epoch": 0.4677871611385634, "grad_norm": 0.6506531238555908, "learning_rate": 1.7284694329586677e-05, "loss": 0.0948, "step": 21229 }, { "epoch": 0.46780919642807955, "grad_norm": 1.1539325714111328, "learning_rate": 1.72836362792184e-05, "loss": 0.1105, "step": 21230 }, { "epoch": 0.4678312317175957, "grad_norm": 0.5206668972969055, "learning_rate": 1.728257821721834e-05, "loss": 0.083, "step": 21231 }, { "epoch": 0.4678532670071119, "grad_norm": 0.4620960056781769, "learning_rate": 1.728152014359188e-05, "loss": 0.077, "step": 21232 }, { "epoch": 0.46787530229662805, "grad_norm": 0.6964071393013, "learning_rate": 1.728046205834442e-05, "loss": 0.0898, "step": 21233 }, { "epoch": 0.4678973375861442, "grad_norm": 0.5970384478569031, "learning_rate": 1.7279403961481343e-05, "loss": 0.0959, "step": 21234 }, { "epoch": 0.4679193728756604, "grad_norm": 0.5262629389762878, "learning_rate": 1.7278345853008036e-05, "loss": 0.0685, "step": 21235 }, { "epoch": 0.46794140816517654, "grad_norm": 0.6160334944725037, "learning_rate": 1.727728773292989e-05, "loss": 0.0792, "step": 21236 }, { "epoch": 0.4679634434546927, "grad_norm": 0.6847153902053833, "learning_rate": 1.7276229601252302e-05, "loss": 0.0607, "step": 21237 }, { "epoch": 0.46798547874420887, "grad_norm": 0.9295803904533386, "learning_rate": 1.7275171457980656e-05, "loss": 0.0992, "step": 21238 }, { "epoch": 0.46800751403372504, "grad_norm": 0.5177538394927979, "learning_rate": 1.727411330312034e-05, "loss": 0.0743, "step": 21239 }, { "epoch": 0.46802954932324115, "grad_norm": 0.6124892234802246, "learning_rate": 1.7273055136676748e-05, "loss": 0.065, "step": 21240 }, { "epoch": 0.4680515846127573, "grad_norm": 0.49561432003974915, "learning_rate": 1.727199695865526e-05, "loss": 0.0686, "step": 21241 }, { "epoch": 0.4680736199022735, "grad_norm": 0.3944946825504303, "learning_rate": 1.727093876906128e-05, "loss": 0.0674, "step": 21242 }, { "epoch": 0.46809565519178964, "grad_norm": 0.7166404724121094, "learning_rate": 1.7269880567900186e-05, "loss": 0.0937, "step": 21243 }, { "epoch": 0.4681176904813058, "grad_norm": 0.4106083810329437, "learning_rate": 1.7268822355177374e-05, "loss": 0.0502, "step": 21244 }, { "epoch": 0.46813972577082197, "grad_norm": 0.6772885918617249, "learning_rate": 1.726776413089824e-05, "loss": 0.0852, "step": 21245 }, { "epoch": 0.46816176106033813, "grad_norm": 0.66423499584198, "learning_rate": 1.726670589506816e-05, "loss": 0.0734, "step": 21246 }, { "epoch": 0.4681837963498543, "grad_norm": 0.9043197631835938, "learning_rate": 1.7265647647692537e-05, "loss": 0.0815, "step": 21247 }, { "epoch": 0.46820583163937046, "grad_norm": 0.8352684378623962, "learning_rate": 1.726458938877675e-05, "loss": 0.0726, "step": 21248 }, { "epoch": 0.4682278669288866, "grad_norm": 0.4731176793575287, "learning_rate": 1.7263531118326196e-05, "loss": 0.0964, "step": 21249 }, { "epoch": 0.4682499022184028, "grad_norm": 0.7846262454986572, "learning_rate": 1.7262472836346264e-05, "loss": 0.1101, "step": 21250 }, { "epoch": 0.46827193750791896, "grad_norm": 0.6570519208908081, "learning_rate": 1.7261414542842347e-05, "loss": 0.0863, "step": 21251 }, { "epoch": 0.46829397279743507, "grad_norm": 0.6235235333442688, "learning_rate": 1.7260356237819833e-05, "loss": 0.0741, "step": 21252 }, { "epoch": 0.46831600808695123, "grad_norm": 1.0310310125350952, "learning_rate": 1.725929792128411e-05, "loss": 0.1113, "step": 21253 }, { "epoch": 0.4683380433764674, "grad_norm": 0.7541408538818359, "learning_rate": 1.7258239593240575e-05, "loss": 0.1013, "step": 21254 }, { "epoch": 0.46836007866598356, "grad_norm": 0.4449937641620636, "learning_rate": 1.7257181253694614e-05, "loss": 0.0866, "step": 21255 }, { "epoch": 0.4683821139554997, "grad_norm": 0.575690507888794, "learning_rate": 1.7256122902651613e-05, "loss": 0.0596, "step": 21256 }, { "epoch": 0.4684041492450159, "grad_norm": 0.6667022705078125, "learning_rate": 1.7255064540116978e-05, "loss": 0.0763, "step": 21257 }, { "epoch": 0.46842618453453205, "grad_norm": 0.6920623779296875, "learning_rate": 1.725400616609608e-05, "loss": 0.0646, "step": 21258 }, { "epoch": 0.4684482198240482, "grad_norm": 0.8127211928367615, "learning_rate": 1.7252947780594326e-05, "loss": 0.0785, "step": 21259 }, { "epoch": 0.4684702551135644, "grad_norm": 0.7566481828689575, "learning_rate": 1.7251889383617095e-05, "loss": 0.1344, "step": 21260 }, { "epoch": 0.46849229040308055, "grad_norm": 0.7995114922523499, "learning_rate": 1.725083097516979e-05, "loss": 0.0855, "step": 21261 }, { "epoch": 0.4685143256925967, "grad_norm": 0.8254455327987671, "learning_rate": 1.7249772555257797e-05, "loss": 0.1245, "step": 21262 }, { "epoch": 0.4685363609821129, "grad_norm": 0.8990391492843628, "learning_rate": 1.7248714123886505e-05, "loss": 0.0868, "step": 21263 }, { "epoch": 0.46855839627162904, "grad_norm": 0.7290114760398865, "learning_rate": 1.7247655681061298e-05, "loss": 0.0832, "step": 21264 }, { "epoch": 0.46858043156114515, "grad_norm": 0.7493895292282104, "learning_rate": 1.724659722678758e-05, "loss": 0.0934, "step": 21265 }, { "epoch": 0.4686024668506613, "grad_norm": 0.871567964553833, "learning_rate": 1.7245538761070738e-05, "loss": 0.1323, "step": 21266 }, { "epoch": 0.4686245021401775, "grad_norm": 0.4599335193634033, "learning_rate": 1.7244480283916163e-05, "loss": 0.0762, "step": 21267 }, { "epoch": 0.46864653742969364, "grad_norm": 0.9568089246749878, "learning_rate": 1.7243421795329252e-05, "loss": 0.0677, "step": 21268 }, { "epoch": 0.4686685727192098, "grad_norm": 0.7155205607414246, "learning_rate": 1.7242363295315386e-05, "loss": 0.0731, "step": 21269 }, { "epoch": 0.468690608008726, "grad_norm": 0.6229762434959412, "learning_rate": 1.724130478387996e-05, "loss": 0.0887, "step": 21270 }, { "epoch": 0.46871264329824214, "grad_norm": 1.0708184242248535, "learning_rate": 1.7240246261028365e-05, "loss": 0.081, "step": 21271 }, { "epoch": 0.4687346785877583, "grad_norm": 0.3344012498855591, "learning_rate": 1.7239187726766e-05, "loss": 0.0477, "step": 21272 }, { "epoch": 0.46875671387727447, "grad_norm": 0.5093310475349426, "learning_rate": 1.7238129181098246e-05, "loss": 0.0524, "step": 21273 }, { "epoch": 0.46877874916679063, "grad_norm": 0.6161854863166809, "learning_rate": 1.7237070624030503e-05, "loss": 0.1171, "step": 21274 }, { "epoch": 0.4688007844563068, "grad_norm": 0.6565623879432678, "learning_rate": 1.723601205556816e-05, "loss": 0.0792, "step": 21275 }, { "epoch": 0.46882281974582296, "grad_norm": 0.5967853665351868, "learning_rate": 1.723495347571661e-05, "loss": 0.1124, "step": 21276 }, { "epoch": 0.46884485503533907, "grad_norm": 0.7980779409408569, "learning_rate": 1.723389488448124e-05, "loss": 0.0937, "step": 21277 }, { "epoch": 0.46886689032485523, "grad_norm": 0.3910321891307831, "learning_rate": 1.7232836281867446e-05, "loss": 0.0288, "step": 21278 }, { "epoch": 0.4688889256143714, "grad_norm": 0.6305122971534729, "learning_rate": 1.723177766788062e-05, "loss": 0.075, "step": 21279 }, { "epoch": 0.46891096090388756, "grad_norm": 0.8679752349853516, "learning_rate": 1.7230719042526156e-05, "loss": 0.1159, "step": 21280 }, { "epoch": 0.46893299619340373, "grad_norm": 1.0567806959152222, "learning_rate": 1.722966040580944e-05, "loss": 0.1169, "step": 21281 }, { "epoch": 0.4689550314829199, "grad_norm": 0.7505201101303101, "learning_rate": 1.722860175773587e-05, "loss": 0.0747, "step": 21282 }, { "epoch": 0.46897706677243606, "grad_norm": 0.431660920381546, "learning_rate": 1.7227543098310833e-05, "loss": 0.0598, "step": 21283 }, { "epoch": 0.4689991020619522, "grad_norm": 1.0190105438232422, "learning_rate": 1.7226484427539726e-05, "loss": 0.086, "step": 21284 }, { "epoch": 0.4690211373514684, "grad_norm": 0.6881341934204102, "learning_rate": 1.7225425745427945e-05, "loss": 0.0986, "step": 21285 }, { "epoch": 0.46904317264098455, "grad_norm": 0.6762824058532715, "learning_rate": 1.7224367051980867e-05, "loss": 0.0442, "step": 21286 }, { "epoch": 0.4690652079305007, "grad_norm": 0.45245039463043213, "learning_rate": 1.7223308347203904e-05, "loss": 0.0645, "step": 21287 }, { "epoch": 0.4690872432200169, "grad_norm": 0.7518368363380432, "learning_rate": 1.7222249631102436e-05, "loss": 0.0989, "step": 21288 }, { "epoch": 0.469109278509533, "grad_norm": 0.646468460559845, "learning_rate": 1.7221190903681857e-05, "loss": 0.0744, "step": 21289 }, { "epoch": 0.46913131379904915, "grad_norm": 0.5410069823265076, "learning_rate": 1.7220132164947566e-05, "loss": 0.0648, "step": 21290 }, { "epoch": 0.4691533490885653, "grad_norm": 1.041408896446228, "learning_rate": 1.721907341490495e-05, "loss": 0.0829, "step": 21291 }, { "epoch": 0.4691753843780815, "grad_norm": 0.5512236952781677, "learning_rate": 1.7218014653559402e-05, "loss": 0.0695, "step": 21292 }, { "epoch": 0.46919741966759765, "grad_norm": 0.495353639125824, "learning_rate": 1.721695588091632e-05, "loss": 0.0511, "step": 21293 }, { "epoch": 0.4692194549571138, "grad_norm": 0.35371920466423035, "learning_rate": 1.7215897096981082e-05, "loss": 0.0474, "step": 21294 }, { "epoch": 0.46924149024663, "grad_norm": 1.0533931255340576, "learning_rate": 1.7214838301759103e-05, "loss": 0.0879, "step": 21295 }, { "epoch": 0.46926352553614614, "grad_norm": 0.6592926979064941, "learning_rate": 1.721377949525576e-05, "loss": 0.0985, "step": 21296 }, { "epoch": 0.4692855608256623, "grad_norm": 0.5852813124656677, "learning_rate": 1.7212720677476453e-05, "loss": 0.0638, "step": 21297 }, { "epoch": 0.46930759611517847, "grad_norm": 0.46550223231315613, "learning_rate": 1.721166184842657e-05, "loss": 0.0599, "step": 21298 }, { "epoch": 0.46932963140469464, "grad_norm": 0.5267477035522461, "learning_rate": 1.7210603008111512e-05, "loss": 0.0878, "step": 21299 }, { "epoch": 0.4693516666942108, "grad_norm": 1.0700016021728516, "learning_rate": 1.720954415653666e-05, "loss": 0.0734, "step": 21300 }, { "epoch": 0.46937370198372697, "grad_norm": 0.741922914981842, "learning_rate": 1.7208485293707423e-05, "loss": 0.0581, "step": 21301 }, { "epoch": 0.4693957372732431, "grad_norm": 1.3578190803527832, "learning_rate": 1.720742641962918e-05, "loss": 0.0971, "step": 21302 }, { "epoch": 0.46941777256275924, "grad_norm": 0.7495657205581665, "learning_rate": 1.7206367534307334e-05, "loss": 0.0871, "step": 21303 }, { "epoch": 0.4694398078522754, "grad_norm": 0.6614553928375244, "learning_rate": 1.7205308637747272e-05, "loss": 0.0721, "step": 21304 }, { "epoch": 0.46946184314179157, "grad_norm": 0.6598101258277893, "learning_rate": 1.7204249729954398e-05, "loss": 0.0825, "step": 21305 }, { "epoch": 0.46948387843130773, "grad_norm": 0.5773374438285828, "learning_rate": 1.7203190810934088e-05, "loss": 0.0887, "step": 21306 }, { "epoch": 0.4695059137208239, "grad_norm": 0.9428718090057373, "learning_rate": 1.720213188069175e-05, "loss": 0.1038, "step": 21307 }, { "epoch": 0.46952794901034006, "grad_norm": 0.633456826210022, "learning_rate": 1.7201072939232772e-05, "loss": 0.0832, "step": 21308 }, { "epoch": 0.4695499842998562, "grad_norm": 0.7000666260719299, "learning_rate": 1.7200013986562554e-05, "loss": 0.0599, "step": 21309 }, { "epoch": 0.4695720195893724, "grad_norm": 0.8184004426002502, "learning_rate": 1.7198955022686486e-05, "loss": 0.0703, "step": 21310 }, { "epoch": 0.46959405487888856, "grad_norm": 0.9219838976860046, "learning_rate": 1.7197896047609953e-05, "loss": 0.0664, "step": 21311 }, { "epoch": 0.4696160901684047, "grad_norm": 0.4990423917770386, "learning_rate": 1.719683706133836e-05, "loss": 0.0941, "step": 21312 }, { "epoch": 0.4696381254579209, "grad_norm": 0.8068021535873413, "learning_rate": 1.7195778063877094e-05, "loss": 0.0785, "step": 21313 }, { "epoch": 0.469660160747437, "grad_norm": 0.6563674807548523, "learning_rate": 1.719471905523156e-05, "loss": 0.0858, "step": 21314 }, { "epoch": 0.46968219603695316, "grad_norm": 0.5022425055503845, "learning_rate": 1.719366003540714e-05, "loss": 0.086, "step": 21315 }, { "epoch": 0.4697042313264693, "grad_norm": 0.7531207799911499, "learning_rate": 1.7192601004409238e-05, "loss": 0.0956, "step": 21316 }, { "epoch": 0.4697262666159855, "grad_norm": 0.6420771479606628, "learning_rate": 1.7191541962243243e-05, "loss": 0.086, "step": 21317 }, { "epoch": 0.46974830190550165, "grad_norm": 1.0294883251190186, "learning_rate": 1.719048290891455e-05, "loss": 0.0842, "step": 21318 }, { "epoch": 0.4697703371950178, "grad_norm": 0.6391622424125671, "learning_rate": 1.7189423844428546e-05, "loss": 0.05, "step": 21319 }, { "epoch": 0.469792372484534, "grad_norm": 0.7166113257408142, "learning_rate": 1.7188364768790638e-05, "loss": 0.0954, "step": 21320 }, { "epoch": 0.46981440777405015, "grad_norm": 0.6848463416099548, "learning_rate": 1.718730568200621e-05, "loss": 0.0702, "step": 21321 }, { "epoch": 0.4698364430635663, "grad_norm": 0.8161794543266296, "learning_rate": 1.7186246584080667e-05, "loss": 0.0999, "step": 21322 }, { "epoch": 0.4698584783530825, "grad_norm": 0.5777004957199097, "learning_rate": 1.7185187475019394e-05, "loss": 0.0922, "step": 21323 }, { "epoch": 0.46988051364259864, "grad_norm": 0.5983704924583435, "learning_rate": 1.7184128354827794e-05, "loss": 0.0576, "step": 21324 }, { "epoch": 0.4699025489321148, "grad_norm": 0.8132219910621643, "learning_rate": 1.7183069223511253e-05, "loss": 0.1146, "step": 21325 }, { "epoch": 0.4699245842216309, "grad_norm": 0.5814628601074219, "learning_rate": 1.718201008107517e-05, "loss": 0.0693, "step": 21326 }, { "epoch": 0.4699466195111471, "grad_norm": 0.6367022395133972, "learning_rate": 1.718095092752494e-05, "loss": 0.0769, "step": 21327 }, { "epoch": 0.46996865480066324, "grad_norm": 0.4850620627403259, "learning_rate": 1.7179891762865957e-05, "loss": 0.0741, "step": 21328 }, { "epoch": 0.4699906900901794, "grad_norm": 0.8279767632484436, "learning_rate": 1.7178832587103614e-05, "loss": 0.0912, "step": 21329 }, { "epoch": 0.4700127253796956, "grad_norm": 0.8304344415664673, "learning_rate": 1.7177773400243315e-05, "loss": 0.0764, "step": 21330 }, { "epoch": 0.47003476066921174, "grad_norm": 1.035416603088379, "learning_rate": 1.717671420229044e-05, "loss": 0.0775, "step": 21331 }, { "epoch": 0.4700567959587279, "grad_norm": 0.3191133141517639, "learning_rate": 1.7175654993250397e-05, "loss": 0.0467, "step": 21332 }, { "epoch": 0.47007883124824407, "grad_norm": 0.19386081397533417, "learning_rate": 1.717459577312858e-05, "loss": 0.0789, "step": 21333 }, { "epoch": 0.47010086653776023, "grad_norm": 0.9538947343826294, "learning_rate": 1.7173536541930375e-05, "loss": 0.0911, "step": 21334 }, { "epoch": 0.4701229018272764, "grad_norm": 0.7577367424964905, "learning_rate": 1.7172477299661184e-05, "loss": 0.0717, "step": 21335 }, { "epoch": 0.47014493711679256, "grad_norm": 0.7349011301994324, "learning_rate": 1.71714180463264e-05, "loss": 0.0976, "step": 21336 }, { "epoch": 0.4701669724063087, "grad_norm": 0.6348708271980286, "learning_rate": 1.7170358781931424e-05, "loss": 0.0616, "step": 21337 }, { "epoch": 0.4701890076958249, "grad_norm": 0.9207468032836914, "learning_rate": 1.7169299506481642e-05, "loss": 0.0831, "step": 21338 }, { "epoch": 0.470211042985341, "grad_norm": 0.34427037835121155, "learning_rate": 1.7168240219982458e-05, "loss": 0.0756, "step": 21339 }, { "epoch": 0.47023307827485716, "grad_norm": 0.5282368063926697, "learning_rate": 1.7167180922439265e-05, "loss": 0.0726, "step": 21340 }, { "epoch": 0.47025511356437333, "grad_norm": 0.7134007215499878, "learning_rate": 1.7166121613857454e-05, "loss": 0.0903, "step": 21341 }, { "epoch": 0.4702771488538895, "grad_norm": 0.683067798614502, "learning_rate": 1.7165062294242423e-05, "loss": 0.1052, "step": 21342 }, { "epoch": 0.47029918414340566, "grad_norm": 0.43875810503959656, "learning_rate": 1.716400296359957e-05, "loss": 0.0619, "step": 21343 }, { "epoch": 0.4703212194329218, "grad_norm": 0.7070804238319397, "learning_rate": 1.716294362193429e-05, "loss": 0.0998, "step": 21344 }, { "epoch": 0.470343254722438, "grad_norm": 0.42879509925842285, "learning_rate": 1.716188426925198e-05, "loss": 0.0595, "step": 21345 }, { "epoch": 0.47036529001195415, "grad_norm": 0.6115875840187073, "learning_rate": 1.716082490555803e-05, "loss": 0.062, "step": 21346 }, { "epoch": 0.4703873253014703, "grad_norm": 0.8383585810661316, "learning_rate": 1.7159765530857844e-05, "loss": 0.1013, "step": 21347 }, { "epoch": 0.4704093605909865, "grad_norm": 0.5691666007041931, "learning_rate": 1.7158706145156814e-05, "loss": 0.0654, "step": 21348 }, { "epoch": 0.47043139588050265, "grad_norm": 0.620597779750824, "learning_rate": 1.7157646748460337e-05, "loss": 0.0666, "step": 21349 }, { "epoch": 0.4704534311700188, "grad_norm": 0.8063513040542603, "learning_rate": 1.7156587340773798e-05, "loss": 0.0793, "step": 21350 }, { "epoch": 0.4704754664595349, "grad_norm": 0.5183194279670715, "learning_rate": 1.7155527922102617e-05, "loss": 0.0886, "step": 21351 }, { "epoch": 0.4704975017490511, "grad_norm": 1.1037333011627197, "learning_rate": 1.7154468492452164e-05, "loss": 0.0839, "step": 21352 }, { "epoch": 0.47051953703856725, "grad_norm": 1.0071204900741577, "learning_rate": 1.7153409051827854e-05, "loss": 0.0961, "step": 21353 }, { "epoch": 0.4705415723280834, "grad_norm": 0.62674480676651, "learning_rate": 1.7152349600235077e-05, "loss": 0.0709, "step": 21354 }, { "epoch": 0.4705636076175996, "grad_norm": 0.49087628722190857, "learning_rate": 1.7151290137679228e-05, "loss": 0.066, "step": 21355 }, { "epoch": 0.47058564290711574, "grad_norm": 0.7467635869979858, "learning_rate": 1.7150230664165713e-05, "loss": 0.1062, "step": 21356 }, { "epoch": 0.4706076781966319, "grad_norm": 0.6518107652664185, "learning_rate": 1.714917117969991e-05, "loss": 0.094, "step": 21357 }, { "epoch": 0.47062971348614807, "grad_norm": 0.5258719325065613, "learning_rate": 1.7148111684287228e-05, "loss": 0.0721, "step": 21358 }, { "epoch": 0.47065174877566424, "grad_norm": 0.7826094627380371, "learning_rate": 1.7147052177933063e-05, "loss": 0.072, "step": 21359 }, { "epoch": 0.4706737840651804, "grad_norm": 0.44536280632019043, "learning_rate": 1.7145992660642808e-05, "loss": 0.0575, "step": 21360 }, { "epoch": 0.47069581935469657, "grad_norm": 0.8790168762207031, "learning_rate": 1.714493313242186e-05, "loss": 0.0812, "step": 21361 }, { "epoch": 0.47071785464421273, "grad_norm": 0.5755337476730347, "learning_rate": 1.7143873593275618e-05, "loss": 0.1198, "step": 21362 }, { "epoch": 0.4707398899337289, "grad_norm": 0.6270096302032471, "learning_rate": 1.714281404320948e-05, "loss": 0.0549, "step": 21363 }, { "epoch": 0.470761925223245, "grad_norm": 0.6494163870811462, "learning_rate": 1.7141754482228844e-05, "loss": 0.0505, "step": 21364 }, { "epoch": 0.47078396051276117, "grad_norm": 1.075105905532837, "learning_rate": 1.71406949103391e-05, "loss": 0.0844, "step": 21365 }, { "epoch": 0.47080599580227733, "grad_norm": 0.6755539774894714, "learning_rate": 1.7139635327545652e-05, "loss": 0.0542, "step": 21366 }, { "epoch": 0.4708280310917935, "grad_norm": 0.8623366355895996, "learning_rate": 1.713857573385389e-05, "loss": 0.0829, "step": 21367 }, { "epoch": 0.47085006638130966, "grad_norm": 0.6545858979225159, "learning_rate": 1.713751612926922e-05, "loss": 0.0872, "step": 21368 }, { "epoch": 0.4708721016708258, "grad_norm": 0.7485346794128418, "learning_rate": 1.7136456513797027e-05, "loss": 0.1074, "step": 21369 }, { "epoch": 0.470894136960342, "grad_norm": 0.5053463578224182, "learning_rate": 1.713539688744272e-05, "loss": 0.0924, "step": 21370 }, { "epoch": 0.47091617224985816, "grad_norm": 0.36893174052238464, "learning_rate": 1.713433725021169e-05, "loss": 0.0782, "step": 21371 }, { "epoch": 0.4709382075393743, "grad_norm": 0.519371509552002, "learning_rate": 1.7133277602109336e-05, "loss": 0.0666, "step": 21372 }, { "epoch": 0.4709602428288905, "grad_norm": 0.6602961421012878, "learning_rate": 1.7132217943141058e-05, "loss": 0.0744, "step": 21373 }, { "epoch": 0.47098227811840665, "grad_norm": 0.5998294353485107, "learning_rate": 1.7131158273312248e-05, "loss": 0.0647, "step": 21374 }, { "epoch": 0.4710043134079228, "grad_norm": 0.5582316517829895, "learning_rate": 1.7130098592628308e-05, "loss": 0.0862, "step": 21375 }, { "epoch": 0.4710263486974389, "grad_norm": 1.0028451681137085, "learning_rate": 1.7129038901094627e-05, "loss": 0.1069, "step": 21376 }, { "epoch": 0.4710483839869551, "grad_norm": 0.744652509689331, "learning_rate": 1.7127979198716618e-05, "loss": 0.0923, "step": 21377 }, { "epoch": 0.47107041927647125, "grad_norm": 0.4182317852973938, "learning_rate": 1.7126919485499664e-05, "loss": 0.0636, "step": 21378 }, { "epoch": 0.4710924545659874, "grad_norm": 0.671379566192627, "learning_rate": 1.712585976144917e-05, "loss": 0.0963, "step": 21379 }, { "epoch": 0.4711144898555036, "grad_norm": 0.9197500944137573, "learning_rate": 1.7124800026570535e-05, "loss": 0.0871, "step": 21380 }, { "epoch": 0.47113652514501975, "grad_norm": 0.3177510201931, "learning_rate": 1.7123740280869152e-05, "loss": 0.0557, "step": 21381 }, { "epoch": 0.4711585604345359, "grad_norm": 0.8172820806503296, "learning_rate": 1.712268052435042e-05, "loss": 0.0862, "step": 21382 }, { "epoch": 0.4711805957240521, "grad_norm": 0.3919954299926758, "learning_rate": 1.712162075701974e-05, "loss": 0.0508, "step": 21383 }, { "epoch": 0.47120263101356824, "grad_norm": 1.2527165412902832, "learning_rate": 1.7120560978882503e-05, "loss": 0.1104, "step": 21384 }, { "epoch": 0.4712246663030844, "grad_norm": 0.8630945086479187, "learning_rate": 1.7119501189944117e-05, "loss": 0.0987, "step": 21385 }, { "epoch": 0.47124670159260057, "grad_norm": 0.7486100196838379, "learning_rate": 1.7118441390209973e-05, "loss": 0.0747, "step": 21386 }, { "epoch": 0.47126873688211673, "grad_norm": 1.097191572189331, "learning_rate": 1.711738157968547e-05, "loss": 0.0645, "step": 21387 }, { "epoch": 0.47129077217163284, "grad_norm": 0.47064775228500366, "learning_rate": 1.711632175837601e-05, "loss": 0.052, "step": 21388 }, { "epoch": 0.471312807461149, "grad_norm": 0.9923319816589355, "learning_rate": 1.7115261926286986e-05, "loss": 0.0828, "step": 21389 }, { "epoch": 0.4713348427506652, "grad_norm": 0.6419829726219177, "learning_rate": 1.71142020834238e-05, "loss": 0.1053, "step": 21390 }, { "epoch": 0.47135687804018134, "grad_norm": 0.5759817361831665, "learning_rate": 1.711314222979185e-05, "loss": 0.0625, "step": 21391 }, { "epoch": 0.4713789133296975, "grad_norm": 0.7616994380950928, "learning_rate": 1.711208236539653e-05, "loss": 0.065, "step": 21392 }, { "epoch": 0.47140094861921367, "grad_norm": 0.4060729444026947, "learning_rate": 1.7111022490243245e-05, "loss": 0.0707, "step": 21393 }, { "epoch": 0.47142298390872983, "grad_norm": 0.616221010684967, "learning_rate": 1.7109962604337388e-05, "loss": 0.0858, "step": 21394 }, { "epoch": 0.471445019198246, "grad_norm": 0.9782793521881104, "learning_rate": 1.7108902707684362e-05, "loss": 0.0875, "step": 21395 }, { "epoch": 0.47146705448776216, "grad_norm": 1.04188871383667, "learning_rate": 1.7107842800289567e-05, "loss": 0.0852, "step": 21396 }, { "epoch": 0.4714890897772783, "grad_norm": 0.5197518467903137, "learning_rate": 1.710678288215839e-05, "loss": 0.0795, "step": 21397 }, { "epoch": 0.4715111250667945, "grad_norm": 0.5707454085350037, "learning_rate": 1.7105722953296244e-05, "loss": 0.081, "step": 21398 }, { "epoch": 0.47153316035631065, "grad_norm": 0.6017771363258362, "learning_rate": 1.7104663013708518e-05, "loss": 0.0984, "step": 21399 }, { "epoch": 0.4715551956458268, "grad_norm": 0.569506824016571, "learning_rate": 1.7103603063400618e-05, "loss": 0.0543, "step": 21400 }, { "epoch": 0.47157723093534293, "grad_norm": 0.7274591326713562, "learning_rate": 1.7102543102377937e-05, "loss": 0.0793, "step": 21401 }, { "epoch": 0.4715992662248591, "grad_norm": 0.37071406841278076, "learning_rate": 1.710148313064588e-05, "loss": 0.0824, "step": 21402 }, { "epoch": 0.47162130151437526, "grad_norm": 0.5497329831123352, "learning_rate": 1.710042314820984e-05, "loss": 0.0636, "step": 21403 }, { "epoch": 0.4716433368038914, "grad_norm": 0.5714479684829712, "learning_rate": 1.7099363155075222e-05, "loss": 0.0659, "step": 21404 }, { "epoch": 0.4716653720934076, "grad_norm": 0.756668746471405, "learning_rate": 1.7098303151247413e-05, "loss": 0.107, "step": 21405 }, { "epoch": 0.47168740738292375, "grad_norm": 0.5442076921463013, "learning_rate": 1.709724313673183e-05, "loss": 0.0717, "step": 21406 }, { "epoch": 0.4717094426724399, "grad_norm": 0.6878395080566406, "learning_rate": 1.7096183111533854e-05, "loss": 0.0949, "step": 21407 }, { "epoch": 0.4717314779619561, "grad_norm": 0.5304660797119141, "learning_rate": 1.7095123075658905e-05, "loss": 0.0735, "step": 21408 }, { "epoch": 0.47175351325147225, "grad_norm": 0.6791108250617981, "learning_rate": 1.7094063029112358e-05, "loss": 0.0722, "step": 21409 }, { "epoch": 0.4717755485409884, "grad_norm": 0.8085973262786865, "learning_rate": 1.7093002971899637e-05, "loss": 0.0907, "step": 21410 }, { "epoch": 0.4717975838305046, "grad_norm": 0.6239739656448364, "learning_rate": 1.709194290402612e-05, "loss": 0.0767, "step": 21411 }, { "epoch": 0.47181961912002074, "grad_norm": 0.5700383186340332, "learning_rate": 1.7090882825497226e-05, "loss": 0.0675, "step": 21412 }, { "epoch": 0.47184165440953685, "grad_norm": 0.6558968424797058, "learning_rate": 1.708982273631834e-05, "loss": 0.0758, "step": 21413 }, { "epoch": 0.471863689699053, "grad_norm": 0.9748091101646423, "learning_rate": 1.7088762636494862e-05, "loss": 0.0788, "step": 21414 }, { "epoch": 0.4718857249885692, "grad_norm": 0.7140902280807495, "learning_rate": 1.7087702526032198e-05, "loss": 0.0616, "step": 21415 }, { "epoch": 0.47190776027808534, "grad_norm": 0.7740921974182129, "learning_rate": 1.7086642404935747e-05, "loss": 0.0971, "step": 21416 }, { "epoch": 0.4719297955676015, "grad_norm": 1.1058446168899536, "learning_rate": 1.7085582273210906e-05, "loss": 0.0887, "step": 21417 }, { "epoch": 0.47195183085711767, "grad_norm": 0.47211146354675293, "learning_rate": 1.7084522130863076e-05, "loss": 0.0747, "step": 21418 }, { "epoch": 0.47197386614663384, "grad_norm": 0.8224731087684631, "learning_rate": 1.7083461977897654e-05, "loss": 0.093, "step": 21419 }, { "epoch": 0.47199590143615, "grad_norm": 0.389374703168869, "learning_rate": 1.7082401814320047e-05, "loss": 0.0691, "step": 21420 }, { "epoch": 0.47201793672566617, "grad_norm": 1.0384321212768555, "learning_rate": 1.7081341640135654e-05, "loss": 0.0976, "step": 21421 }, { "epoch": 0.47203997201518233, "grad_norm": 0.5933343172073364, "learning_rate": 1.7080281455349864e-05, "loss": 0.0537, "step": 21422 }, { "epoch": 0.4720620073046985, "grad_norm": 0.5976381301879883, "learning_rate": 1.707922125996809e-05, "loss": 0.0945, "step": 21423 }, { "epoch": 0.47208404259421466, "grad_norm": 0.5993373394012451, "learning_rate": 1.7078161053995726e-05, "loss": 0.0824, "step": 21424 }, { "epoch": 0.47210607788373077, "grad_norm": 0.6150861978530884, "learning_rate": 1.7077100837438174e-05, "loss": 0.0795, "step": 21425 }, { "epoch": 0.47212811317324693, "grad_norm": 0.3776963949203491, "learning_rate": 1.7076040610300835e-05, "loss": 0.0606, "step": 21426 }, { "epoch": 0.4721501484627631, "grad_norm": 0.646878719329834, "learning_rate": 1.7074980372589106e-05, "loss": 0.0695, "step": 21427 }, { "epoch": 0.47217218375227926, "grad_norm": 0.8377994298934937, "learning_rate": 1.707392012430839e-05, "loss": 0.108, "step": 21428 }, { "epoch": 0.4721942190417954, "grad_norm": 0.70955491065979, "learning_rate": 1.7072859865464088e-05, "loss": 0.0792, "step": 21429 }, { "epoch": 0.4722162543313116, "grad_norm": 0.8863916993141174, "learning_rate": 1.7071799596061593e-05, "loss": 0.1075, "step": 21430 }, { "epoch": 0.47223828962082776, "grad_norm": 0.5982233285903931, "learning_rate": 1.7070739316106322e-05, "loss": 0.0705, "step": 21431 }, { "epoch": 0.4722603249103439, "grad_norm": 0.4994720220565796, "learning_rate": 1.7069679025603654e-05, "loss": 0.0819, "step": 21432 }, { "epoch": 0.4722823601998601, "grad_norm": 0.5426772832870483, "learning_rate": 1.706861872455901e-05, "loss": 0.1026, "step": 21433 }, { "epoch": 0.47230439548937625, "grad_norm": 0.9459083676338196, "learning_rate": 1.7067558412977775e-05, "loss": 0.063, "step": 21434 }, { "epoch": 0.4723264307788924, "grad_norm": 0.5708339810371399, "learning_rate": 1.706649809086536e-05, "loss": 0.0729, "step": 21435 }, { "epoch": 0.4723484660684086, "grad_norm": 0.6137192845344543, "learning_rate": 1.706543775822716e-05, "loss": 0.0714, "step": 21436 }, { "epoch": 0.47237050135792474, "grad_norm": 0.8361082673072815, "learning_rate": 1.7064377415068583e-05, "loss": 0.0831, "step": 21437 }, { "epoch": 0.47239253664744085, "grad_norm": 0.613942563533783, "learning_rate": 1.7063317061395018e-05, "loss": 0.0768, "step": 21438 }, { "epoch": 0.472414571936957, "grad_norm": 2.617372989654541, "learning_rate": 1.7062256697211878e-05, "loss": 0.0553, "step": 21439 }, { "epoch": 0.4724366072264732, "grad_norm": 0.4769207835197449, "learning_rate": 1.7061196322524553e-05, "loss": 0.0588, "step": 21440 }, { "epoch": 0.47245864251598935, "grad_norm": 1.1165610551834106, "learning_rate": 1.7060135937338455e-05, "loss": 0.0744, "step": 21441 }, { "epoch": 0.4724806778055055, "grad_norm": 0.5207147002220154, "learning_rate": 1.7059075541658975e-05, "loss": 0.0572, "step": 21442 }, { "epoch": 0.4725027130950217, "grad_norm": 0.5506758689880371, "learning_rate": 1.7058015135491525e-05, "loss": 0.0827, "step": 21443 }, { "epoch": 0.47252474838453784, "grad_norm": 0.668412446975708, "learning_rate": 1.7056954718841497e-05, "loss": 0.0828, "step": 21444 }, { "epoch": 0.472546783674054, "grad_norm": 0.5915048718452454, "learning_rate": 1.7055894291714295e-05, "loss": 0.0714, "step": 21445 }, { "epoch": 0.47256881896357017, "grad_norm": 0.7968391180038452, "learning_rate": 1.7054833854115323e-05, "loss": 0.0971, "step": 21446 }, { "epoch": 0.47259085425308633, "grad_norm": 0.5013216137886047, "learning_rate": 1.705377340604998e-05, "loss": 0.0751, "step": 21447 }, { "epoch": 0.4726128895426025, "grad_norm": 0.732107400894165, "learning_rate": 1.7052712947523665e-05, "loss": 0.0912, "step": 21448 }, { "epoch": 0.47263492483211866, "grad_norm": 1.1215848922729492, "learning_rate": 1.7051652478541784e-05, "loss": 0.0954, "step": 21449 }, { "epoch": 0.4726569601216348, "grad_norm": 0.5800384879112244, "learning_rate": 1.705059199910974e-05, "loss": 0.0762, "step": 21450 }, { "epoch": 0.47267899541115094, "grad_norm": 0.6886361837387085, "learning_rate": 1.7049531509232926e-05, "loss": 0.0645, "step": 21451 }, { "epoch": 0.4727010307006671, "grad_norm": 0.93559330701828, "learning_rate": 1.7048471008916755e-05, "loss": 0.0811, "step": 21452 }, { "epoch": 0.47272306599018327, "grad_norm": 0.5569040179252625, "learning_rate": 1.704741049816662e-05, "loss": 0.0873, "step": 21453 }, { "epoch": 0.47274510127969943, "grad_norm": 0.9628166556358337, "learning_rate": 1.704634997698792e-05, "loss": 0.0944, "step": 21454 }, { "epoch": 0.4727671365692156, "grad_norm": 0.4628183841705322, "learning_rate": 1.704528944538607e-05, "loss": 0.0718, "step": 21455 }, { "epoch": 0.47278917185873176, "grad_norm": 0.4864979386329651, "learning_rate": 1.704422890336646e-05, "loss": 0.0731, "step": 21456 }, { "epoch": 0.4728112071482479, "grad_norm": 0.7011976838111877, "learning_rate": 1.7043168350934494e-05, "loss": 0.1044, "step": 21457 }, { "epoch": 0.4728332424377641, "grad_norm": 0.608063280582428, "learning_rate": 1.704210778809558e-05, "loss": 0.0855, "step": 21458 }, { "epoch": 0.47285527772728025, "grad_norm": 0.34969690442085266, "learning_rate": 1.7041047214855115e-05, "loss": 0.1025, "step": 21459 }, { "epoch": 0.4728773130167964, "grad_norm": 0.6238871216773987, "learning_rate": 1.7039986631218505e-05, "loss": 0.0811, "step": 21460 }, { "epoch": 0.4728993483063126, "grad_norm": 0.4428524374961853, "learning_rate": 1.7038926037191145e-05, "loss": 0.0755, "step": 21461 }, { "epoch": 0.4729213835958287, "grad_norm": 0.8140469789505005, "learning_rate": 1.7037865432778443e-05, "loss": 0.073, "step": 21462 }, { "epoch": 0.47294341888534486, "grad_norm": 0.6948356628417969, "learning_rate": 1.7036804817985796e-05, "loss": 0.0768, "step": 21463 }, { "epoch": 0.472965454174861, "grad_norm": 0.4877583086490631, "learning_rate": 1.7035744192818608e-05, "loss": 0.0852, "step": 21464 }, { "epoch": 0.4729874894643772, "grad_norm": 0.7706212997436523, "learning_rate": 1.703468355728229e-05, "loss": 0.0639, "step": 21465 }, { "epoch": 0.47300952475389335, "grad_norm": 0.463249534368515, "learning_rate": 1.7033622911382236e-05, "loss": 0.049, "step": 21466 }, { "epoch": 0.4730315600434095, "grad_norm": 0.7492793798446655, "learning_rate": 1.703256225512385e-05, "loss": 0.0955, "step": 21467 }, { "epoch": 0.4730535953329257, "grad_norm": 0.7092356085777283, "learning_rate": 1.7031501588512536e-05, "loss": 0.1161, "step": 21468 }, { "epoch": 0.47307563062244185, "grad_norm": 0.7181247472763062, "learning_rate": 1.703044091155369e-05, "loss": 0.1004, "step": 21469 }, { "epoch": 0.473097665911958, "grad_norm": 1.0792067050933838, "learning_rate": 1.7029380224252724e-05, "loss": 0.1099, "step": 21470 }, { "epoch": 0.4731197012014742, "grad_norm": 0.5274490714073181, "learning_rate": 1.7028319526615033e-05, "loss": 0.0643, "step": 21471 }, { "epoch": 0.47314173649099034, "grad_norm": 0.38255321979522705, "learning_rate": 1.7027258818646024e-05, "loss": 0.067, "step": 21472 }, { "epoch": 0.4731637717805065, "grad_norm": 0.7436671853065491, "learning_rate": 1.70261981003511e-05, "loss": 0.0729, "step": 21473 }, { "epoch": 0.47318580707002267, "grad_norm": 0.5621935725212097, "learning_rate": 1.702513737173566e-05, "loss": 0.0485, "step": 21474 }, { "epoch": 0.4732078423595388, "grad_norm": 0.40304356813430786, "learning_rate": 1.7024076632805114e-05, "loss": 0.0472, "step": 21475 }, { "epoch": 0.47322987764905494, "grad_norm": 0.9199417233467102, "learning_rate": 1.7023015883564862e-05, "loss": 0.0941, "step": 21476 }, { "epoch": 0.4732519129385711, "grad_norm": 0.6816555261611938, "learning_rate": 1.70219551240203e-05, "loss": 0.1036, "step": 21477 }, { "epoch": 0.47327394822808727, "grad_norm": 1.0204845666885376, "learning_rate": 1.7020894354176834e-05, "loss": 0.077, "step": 21478 }, { "epoch": 0.47329598351760344, "grad_norm": 0.6865925788879395, "learning_rate": 1.7019833574039876e-05, "loss": 0.0823, "step": 21479 }, { "epoch": 0.4733180188071196, "grad_norm": 0.5456529855728149, "learning_rate": 1.701877278361482e-05, "loss": 0.0845, "step": 21480 }, { "epoch": 0.47334005409663577, "grad_norm": 0.5079411268234253, "learning_rate": 1.701771198290707e-05, "loss": 0.1299, "step": 21481 }, { "epoch": 0.47336208938615193, "grad_norm": 0.6147915720939636, "learning_rate": 1.701665117192203e-05, "loss": 0.0717, "step": 21482 }, { "epoch": 0.4733841246756681, "grad_norm": 0.7711842060089111, "learning_rate": 1.7015590350665104e-05, "loss": 0.125, "step": 21483 }, { "epoch": 0.47340615996518426, "grad_norm": 0.9585508704185486, "learning_rate": 1.70145295191417e-05, "loss": 0.0948, "step": 21484 }, { "epoch": 0.4734281952547004, "grad_norm": 0.8432064652442932, "learning_rate": 1.7013468677357216e-05, "loss": 0.0856, "step": 21485 }, { "epoch": 0.4734502305442166, "grad_norm": 0.5186905264854431, "learning_rate": 1.701240782531705e-05, "loss": 0.0488, "step": 21486 }, { "epoch": 0.4734722658337327, "grad_norm": 0.6236199140548706, "learning_rate": 1.7011346963026615e-05, "loss": 0.1131, "step": 21487 }, { "epoch": 0.47349430112324886, "grad_norm": 0.6890987157821655, "learning_rate": 1.7010286090491315e-05, "loss": 0.0786, "step": 21488 }, { "epoch": 0.473516336412765, "grad_norm": 0.37822389602661133, "learning_rate": 1.700922520771655e-05, "loss": 0.0552, "step": 21489 }, { "epoch": 0.4735383717022812, "grad_norm": 1.1421059370040894, "learning_rate": 1.7008164314707718e-05, "loss": 0.0876, "step": 21490 }, { "epoch": 0.47356040699179736, "grad_norm": 0.8781241178512573, "learning_rate": 1.7007103411470232e-05, "loss": 0.0774, "step": 21491 }, { "epoch": 0.4735824422813135, "grad_norm": 0.9523763060569763, "learning_rate": 1.7006042498009495e-05, "loss": 0.0646, "step": 21492 }, { "epoch": 0.4736044775708297, "grad_norm": 1.0101404190063477, "learning_rate": 1.7004981574330902e-05, "loss": 0.0722, "step": 21493 }, { "epoch": 0.47362651286034585, "grad_norm": 1.0121911764144897, "learning_rate": 1.7003920640439866e-05, "loss": 0.0706, "step": 21494 }, { "epoch": 0.473648548149862, "grad_norm": 0.5892500877380371, "learning_rate": 1.7002859696341785e-05, "loss": 0.0563, "step": 21495 }, { "epoch": 0.4736705834393782, "grad_norm": 0.6252084970474243, "learning_rate": 1.7001798742042067e-05, "loss": 0.0976, "step": 21496 }, { "epoch": 0.47369261872889434, "grad_norm": 0.6302339434623718, "learning_rate": 1.7000737777546118e-05, "loss": 0.0491, "step": 21497 }, { "epoch": 0.4737146540184105, "grad_norm": 0.34617364406585693, "learning_rate": 1.6999676802859332e-05, "loss": 0.0565, "step": 21498 }, { "epoch": 0.4737366893079266, "grad_norm": 0.8728752732276917, "learning_rate": 1.6998615817987125e-05, "loss": 0.0845, "step": 21499 }, { "epoch": 0.4737587245974428, "grad_norm": 1.1139600276947021, "learning_rate": 1.6997554822934896e-05, "loss": 0.1154, "step": 21500 }, { "epoch": 0.47378075988695895, "grad_norm": 0.8765240907669067, "learning_rate": 1.6996493817708046e-05, "loss": 0.0716, "step": 21501 }, { "epoch": 0.4738027951764751, "grad_norm": 1.0439423322677612, "learning_rate": 1.6995432802311984e-05, "loss": 0.1559, "step": 21502 }, { "epoch": 0.4738248304659913, "grad_norm": 0.8107991218566895, "learning_rate": 1.699437177675211e-05, "loss": 0.045, "step": 21503 }, { "epoch": 0.47384686575550744, "grad_norm": 0.4302346408367157, "learning_rate": 1.6993310741033834e-05, "loss": 0.0867, "step": 21504 }, { "epoch": 0.4738689010450236, "grad_norm": 0.5338859558105469, "learning_rate": 1.6992249695162552e-05, "loss": 0.0925, "step": 21505 }, { "epoch": 0.47389093633453977, "grad_norm": 0.6283729672431946, "learning_rate": 1.6991188639143684e-05, "loss": 0.089, "step": 21506 }, { "epoch": 0.47391297162405593, "grad_norm": 0.6213228106498718, "learning_rate": 1.699012757298262e-05, "loss": 0.0676, "step": 21507 }, { "epoch": 0.4739350069135721, "grad_norm": 0.7406807541847229, "learning_rate": 1.698906649668477e-05, "loss": 0.0765, "step": 21508 }, { "epoch": 0.47395704220308826, "grad_norm": 0.8287373781204224, "learning_rate": 1.6988005410255533e-05, "loss": 0.1097, "step": 21509 }, { "epoch": 0.47397907749260443, "grad_norm": 0.8806793689727783, "learning_rate": 1.698694431370032e-05, "loss": 0.0995, "step": 21510 }, { "epoch": 0.4740011127821206, "grad_norm": 0.660649836063385, "learning_rate": 1.698588320702454e-05, "loss": 0.0691, "step": 21511 }, { "epoch": 0.4740231480716367, "grad_norm": 0.8201959133148193, "learning_rate": 1.6984822090233584e-05, "loss": 0.1, "step": 21512 }, { "epoch": 0.47404518336115287, "grad_norm": 0.6291208863258362, "learning_rate": 1.6983760963332867e-05, "loss": 0.0669, "step": 21513 }, { "epoch": 0.47406721865066903, "grad_norm": 0.8864389657974243, "learning_rate": 1.6982699826327796e-05, "loss": 0.1081, "step": 21514 }, { "epoch": 0.4740892539401852, "grad_norm": 0.6576071977615356, "learning_rate": 1.698163867922377e-05, "loss": 0.0729, "step": 21515 }, { "epoch": 0.47411128922970136, "grad_norm": 0.7418794631958008, "learning_rate": 1.698057752202619e-05, "loss": 0.0622, "step": 21516 }, { "epoch": 0.4741333245192175, "grad_norm": 0.4828534722328186, "learning_rate": 1.6979516354740474e-05, "loss": 0.0706, "step": 21517 }, { "epoch": 0.4741553598087337, "grad_norm": 0.6893019676208496, "learning_rate": 1.6978455177372013e-05, "loss": 0.0857, "step": 21518 }, { "epoch": 0.47417739509824985, "grad_norm": 0.575833261013031, "learning_rate": 1.697739398992622e-05, "loss": 0.0541, "step": 21519 }, { "epoch": 0.474199430387766, "grad_norm": 0.6487346291542053, "learning_rate": 1.6976332792408504e-05, "loss": 0.07, "step": 21520 }, { "epoch": 0.4742214656772822, "grad_norm": 0.5777790546417236, "learning_rate": 1.6975271584824263e-05, "loss": 0.0544, "step": 21521 }, { "epoch": 0.47424350096679835, "grad_norm": 0.30700910091400146, "learning_rate": 1.6974210367178904e-05, "loss": 0.0762, "step": 21522 }, { "epoch": 0.4742655362563145, "grad_norm": 0.8179002404212952, "learning_rate": 1.697314913947783e-05, "loss": 0.0727, "step": 21523 }, { "epoch": 0.4742875715458306, "grad_norm": 0.5453743934631348, "learning_rate": 1.697208790172646e-05, "loss": 0.0597, "step": 21524 }, { "epoch": 0.4743096068353468, "grad_norm": 0.6689717769622803, "learning_rate": 1.697102665393018e-05, "loss": 0.0782, "step": 21525 }, { "epoch": 0.47433164212486295, "grad_norm": 0.7370136380195618, "learning_rate": 1.69699653960944e-05, "loss": 0.0815, "step": 21526 }, { "epoch": 0.4743536774143791, "grad_norm": 0.5391627550125122, "learning_rate": 1.696890412822454e-05, "loss": 0.0903, "step": 21527 }, { "epoch": 0.4743757127038953, "grad_norm": 0.7396321296691895, "learning_rate": 1.6967842850325986e-05, "loss": 0.0864, "step": 21528 }, { "epoch": 0.47439774799341144, "grad_norm": 0.846963107585907, "learning_rate": 1.696678156240416e-05, "loss": 0.0795, "step": 21529 }, { "epoch": 0.4744197832829276, "grad_norm": 1.0161322355270386, "learning_rate": 1.696572026446446e-05, "loss": 0.1021, "step": 21530 }, { "epoch": 0.4744418185724438, "grad_norm": 0.6234933137893677, "learning_rate": 1.696465895651229e-05, "loss": 0.1064, "step": 21531 }, { "epoch": 0.47446385386195994, "grad_norm": 0.6228307485580444, "learning_rate": 1.696359763855306e-05, "loss": 0.0842, "step": 21532 }, { "epoch": 0.4744858891514761, "grad_norm": 0.7214551568031311, "learning_rate": 1.6962536310592173e-05, "loss": 0.1058, "step": 21533 }, { "epoch": 0.47450792444099227, "grad_norm": 0.7330386638641357, "learning_rate": 1.696147497263504e-05, "loss": 0.0799, "step": 21534 }, { "epoch": 0.47452995973050843, "grad_norm": 0.577782154083252, "learning_rate": 1.696041362468706e-05, "loss": 0.0951, "step": 21535 }, { "epoch": 0.47455199502002454, "grad_norm": 0.7076664566993713, "learning_rate": 1.695935226675364e-05, "loss": 0.0888, "step": 21536 }, { "epoch": 0.4745740303095407, "grad_norm": 0.3305574357509613, "learning_rate": 1.6958290898840192e-05, "loss": 0.058, "step": 21537 }, { "epoch": 0.47459606559905687, "grad_norm": 0.9872485995292664, "learning_rate": 1.6957229520952117e-05, "loss": 0.0997, "step": 21538 }, { "epoch": 0.47461810088857304, "grad_norm": 0.47051846981048584, "learning_rate": 1.6956168133094824e-05, "loss": 0.0678, "step": 21539 }, { "epoch": 0.4746401361780892, "grad_norm": 0.8007197380065918, "learning_rate": 1.695510673527372e-05, "loss": 0.1149, "step": 21540 }, { "epoch": 0.47466217146760536, "grad_norm": 0.6986321210861206, "learning_rate": 1.69540453274942e-05, "loss": 0.0666, "step": 21541 }, { "epoch": 0.47468420675712153, "grad_norm": 0.8132390975952148, "learning_rate": 1.6952983909761685e-05, "loss": 0.0659, "step": 21542 }, { "epoch": 0.4747062420466377, "grad_norm": 0.8918643593788147, "learning_rate": 1.6951922482081575e-05, "loss": 0.0828, "step": 21543 }, { "epoch": 0.47472827733615386, "grad_norm": 0.7306735515594482, "learning_rate": 1.695086104445928e-05, "loss": 0.0871, "step": 21544 }, { "epoch": 0.47475031262567, "grad_norm": 0.7689909934997559, "learning_rate": 1.6949799596900197e-05, "loss": 0.079, "step": 21545 }, { "epoch": 0.4747723479151862, "grad_norm": 0.4323219954967499, "learning_rate": 1.6948738139409744e-05, "loss": 0.0992, "step": 21546 }, { "epoch": 0.47479438320470235, "grad_norm": 0.48092830181121826, "learning_rate": 1.6947676671993322e-05, "loss": 0.0664, "step": 21547 }, { "epoch": 0.4748164184942185, "grad_norm": 0.4577769637107849, "learning_rate": 1.694661519465634e-05, "loss": 0.0519, "step": 21548 }, { "epoch": 0.4748384537837346, "grad_norm": 0.6016275882720947, "learning_rate": 1.69455537074042e-05, "loss": 0.0785, "step": 21549 }, { "epoch": 0.4748604890732508, "grad_norm": 0.5052775740623474, "learning_rate": 1.6944492210242314e-05, "loss": 0.065, "step": 21550 }, { "epoch": 0.47488252436276696, "grad_norm": 0.6740983128547668, "learning_rate": 1.6943430703176083e-05, "loss": 0.0938, "step": 21551 }, { "epoch": 0.4749045596522831, "grad_norm": 0.5158264636993408, "learning_rate": 1.6942369186210918e-05, "loss": 0.0617, "step": 21552 }, { "epoch": 0.4749265949417993, "grad_norm": 0.7355164289474487, "learning_rate": 1.6941307659352227e-05, "loss": 0.083, "step": 21553 }, { "epoch": 0.47494863023131545, "grad_norm": 0.7279013991355896, "learning_rate": 1.6940246122605417e-05, "loss": 0.1006, "step": 21554 }, { "epoch": 0.4749706655208316, "grad_norm": 0.26598021388053894, "learning_rate": 1.693918457597589e-05, "loss": 0.0784, "step": 21555 }, { "epoch": 0.4749927008103478, "grad_norm": 0.7266972064971924, "learning_rate": 1.6938123019469055e-05, "loss": 0.1129, "step": 21556 }, { "epoch": 0.47501473609986394, "grad_norm": 0.5079630613327026, "learning_rate": 1.6937061453090325e-05, "loss": 0.0634, "step": 21557 }, { "epoch": 0.4750367713893801, "grad_norm": 0.7454229593276978, "learning_rate": 1.6935999876845094e-05, "loss": 0.0825, "step": 21558 }, { "epoch": 0.4750588066788963, "grad_norm": 0.7218270301818848, "learning_rate": 1.6934938290738787e-05, "loss": 0.0876, "step": 21559 }, { "epoch": 0.47508084196841244, "grad_norm": 0.7060157060623169, "learning_rate": 1.693387669477679e-05, "loss": 0.0609, "step": 21560 }, { "epoch": 0.47510287725792855, "grad_norm": 0.4262472689151764, "learning_rate": 1.6932815088964534e-05, "loss": 0.0712, "step": 21561 }, { "epoch": 0.4751249125474447, "grad_norm": 0.5598462224006653, "learning_rate": 1.6931753473307412e-05, "loss": 0.0695, "step": 21562 }, { "epoch": 0.4751469478369609, "grad_norm": 0.6326691508293152, "learning_rate": 1.693069184781083e-05, "loss": 0.08, "step": 21563 }, { "epoch": 0.47516898312647704, "grad_norm": 0.9312002658843994, "learning_rate": 1.69296302124802e-05, "loss": 0.0816, "step": 21564 }, { "epoch": 0.4751910184159932, "grad_norm": 0.7870341539382935, "learning_rate": 1.6928568567320928e-05, "loss": 0.085, "step": 21565 }, { "epoch": 0.47521305370550937, "grad_norm": 0.5156899094581604, "learning_rate": 1.692750691233842e-05, "loss": 0.059, "step": 21566 }, { "epoch": 0.47523508899502553, "grad_norm": 0.6970663070678711, "learning_rate": 1.6926445247538095e-05, "loss": 0.0674, "step": 21567 }, { "epoch": 0.4752571242845417, "grad_norm": 0.5865325927734375, "learning_rate": 1.692538357292534e-05, "loss": 0.0875, "step": 21568 }, { "epoch": 0.47527915957405786, "grad_norm": 0.5865431427955627, "learning_rate": 1.6924321888505582e-05, "loss": 0.1094, "step": 21569 }, { "epoch": 0.47530119486357403, "grad_norm": 1.0313799381256104, "learning_rate": 1.6923260194284216e-05, "loss": 0.0895, "step": 21570 }, { "epoch": 0.4753232301530902, "grad_norm": 0.7123463153839111, "learning_rate": 1.6922198490266658e-05, "loss": 0.0643, "step": 21571 }, { "epoch": 0.47534526544260636, "grad_norm": 0.45005857944488525, "learning_rate": 1.692113677645831e-05, "loss": 0.0619, "step": 21572 }, { "epoch": 0.4753673007321225, "grad_norm": 0.36229437589645386, "learning_rate": 1.6920075052864582e-05, "loss": 0.0821, "step": 21573 }, { "epoch": 0.47538933602163863, "grad_norm": 0.7872461080551147, "learning_rate": 1.691901331949088e-05, "loss": 0.0743, "step": 21574 }, { "epoch": 0.4754113713111548, "grad_norm": 0.5486190319061279, "learning_rate": 1.6917951576342622e-05, "loss": 0.0932, "step": 21575 }, { "epoch": 0.47543340660067096, "grad_norm": 0.9359984993934631, "learning_rate": 1.6916889823425198e-05, "loss": 0.1135, "step": 21576 }, { "epoch": 0.4754554418901871, "grad_norm": 0.6483825445175171, "learning_rate": 1.691582806074403e-05, "loss": 0.0694, "step": 21577 }, { "epoch": 0.4754774771797033, "grad_norm": 0.5643789768218994, "learning_rate": 1.6914766288304526e-05, "loss": 0.0844, "step": 21578 }, { "epoch": 0.47549951246921945, "grad_norm": 0.7279662489891052, "learning_rate": 1.691370450611209e-05, "loss": 0.0892, "step": 21579 }, { "epoch": 0.4755215477587356, "grad_norm": 1.1512434482574463, "learning_rate": 1.691264271417213e-05, "loss": 0.1055, "step": 21580 }, { "epoch": 0.4755435830482518, "grad_norm": 1.1306545734405518, "learning_rate": 1.6911580912490052e-05, "loss": 0.0826, "step": 21581 }, { "epoch": 0.47556561833776795, "grad_norm": 1.1062291860580444, "learning_rate": 1.691051910107127e-05, "loss": 0.1257, "step": 21582 }, { "epoch": 0.4755876536272841, "grad_norm": 0.7679071426391602, "learning_rate": 1.6909457279921184e-05, "loss": 0.112, "step": 21583 }, { "epoch": 0.4756096889168003, "grad_norm": 0.6101439595222473, "learning_rate": 1.6908395449045215e-05, "loss": 0.0748, "step": 21584 }, { "epoch": 0.47563172420631644, "grad_norm": 0.5017569661140442, "learning_rate": 1.6907333608448764e-05, "loss": 0.116, "step": 21585 }, { "epoch": 0.47565375949583255, "grad_norm": 0.8731178045272827, "learning_rate": 1.690627175813724e-05, "loss": 0.0951, "step": 21586 }, { "epoch": 0.4756757947853487, "grad_norm": 1.1379913091659546, "learning_rate": 1.690520989811605e-05, "loss": 0.0901, "step": 21587 }, { "epoch": 0.4756978300748649, "grad_norm": 0.6358789801597595, "learning_rate": 1.690414802839061e-05, "loss": 0.0683, "step": 21588 }, { "epoch": 0.47571986536438104, "grad_norm": 0.6490068435668945, "learning_rate": 1.690308614896631e-05, "loss": 0.0785, "step": 21589 }, { "epoch": 0.4757419006538972, "grad_norm": 0.6534478664398193, "learning_rate": 1.6902024259848584e-05, "loss": 0.1017, "step": 21590 }, { "epoch": 0.4757639359434134, "grad_norm": 0.6052495241165161, "learning_rate": 1.6900962361042822e-05, "loss": 0.0609, "step": 21591 }, { "epoch": 0.47578597123292954, "grad_norm": 0.7884618639945984, "learning_rate": 1.6899900452554444e-05, "loss": 0.063, "step": 21592 }, { "epoch": 0.4758080065224457, "grad_norm": 0.7862491607666016, "learning_rate": 1.689883853438885e-05, "loss": 0.0677, "step": 21593 }, { "epoch": 0.47583004181196187, "grad_norm": 0.6981626749038696, "learning_rate": 1.6897776606551454e-05, "loss": 0.0795, "step": 21594 }, { "epoch": 0.47585207710147803, "grad_norm": 0.6688984036445618, "learning_rate": 1.6896714669047664e-05, "loss": 0.0964, "step": 21595 }, { "epoch": 0.4758741123909942, "grad_norm": 0.5694214105606079, "learning_rate": 1.6895652721882894e-05, "loss": 0.082, "step": 21596 }, { "epoch": 0.47589614768051036, "grad_norm": 0.3395492136478424, "learning_rate": 1.6894590765062542e-05, "loss": 0.0633, "step": 21597 }, { "epoch": 0.47591818297002647, "grad_norm": 0.8383746147155762, "learning_rate": 1.6893528798592024e-05, "loss": 0.0928, "step": 21598 }, { "epoch": 0.47594021825954264, "grad_norm": 0.5708920955657959, "learning_rate": 1.689246682247675e-05, "loss": 0.1152, "step": 21599 }, { "epoch": 0.4759622535490588, "grad_norm": 0.6119858026504517, "learning_rate": 1.6891404836722127e-05, "loss": 0.1152, "step": 21600 }, { "epoch": 0.47598428883857496, "grad_norm": 0.6515353322029114, "learning_rate": 1.6890342841333566e-05, "loss": 0.0794, "step": 21601 }, { "epoch": 0.47600632412809113, "grad_norm": 0.4353846609592438, "learning_rate": 1.6889280836316476e-05, "loss": 0.0597, "step": 21602 }, { "epoch": 0.4760283594176073, "grad_norm": 0.7266624569892883, "learning_rate": 1.6888218821676265e-05, "loss": 0.0689, "step": 21603 }, { "epoch": 0.47605039470712346, "grad_norm": 0.813910186290741, "learning_rate": 1.688715679741834e-05, "loss": 0.069, "step": 21604 }, { "epoch": 0.4760724299966396, "grad_norm": 0.5669561624526978, "learning_rate": 1.6886094763548117e-05, "loss": 0.067, "step": 21605 }, { "epoch": 0.4760944652861558, "grad_norm": 0.2998211681842804, "learning_rate": 1.6885032720071e-05, "loss": 0.0547, "step": 21606 }, { "epoch": 0.47611650057567195, "grad_norm": 0.6478344798088074, "learning_rate": 1.68839706669924e-05, "loss": 0.1311, "step": 21607 }, { "epoch": 0.4761385358651881, "grad_norm": 0.594849169254303, "learning_rate": 1.688290860431773e-05, "loss": 0.0793, "step": 21608 }, { "epoch": 0.4761605711547043, "grad_norm": 0.5605638027191162, "learning_rate": 1.6881846532052393e-05, "loss": 0.0785, "step": 21609 }, { "epoch": 0.47618260644422045, "grad_norm": 0.6728834509849548, "learning_rate": 1.6880784450201805e-05, "loss": 0.0924, "step": 21610 }, { "epoch": 0.47620464173373656, "grad_norm": 0.8035741448402405, "learning_rate": 1.6879722358771376e-05, "loss": 0.0842, "step": 21611 }, { "epoch": 0.4762266770232527, "grad_norm": 0.5841125249862671, "learning_rate": 1.6878660257766507e-05, "loss": 0.0741, "step": 21612 }, { "epoch": 0.4762487123127689, "grad_norm": 0.5763177275657654, "learning_rate": 1.6877598147192615e-05, "loss": 0.0743, "step": 21613 }, { "epoch": 0.47627074760228505, "grad_norm": 0.5154297947883606, "learning_rate": 1.6876536027055112e-05, "loss": 0.1012, "step": 21614 }, { "epoch": 0.4762927828918012, "grad_norm": 0.7366204261779785, "learning_rate": 1.6875473897359404e-05, "loss": 0.0988, "step": 21615 }, { "epoch": 0.4763148181813174, "grad_norm": 0.659030020236969, "learning_rate": 1.68744117581109e-05, "loss": 0.0815, "step": 21616 }, { "epoch": 0.47633685347083354, "grad_norm": 0.4575873613357544, "learning_rate": 1.687334960931501e-05, "loss": 0.0573, "step": 21617 }, { "epoch": 0.4763588887603497, "grad_norm": 1.03445303440094, "learning_rate": 1.6872287450977152e-05, "loss": 0.0914, "step": 21618 }, { "epoch": 0.4763809240498659, "grad_norm": 0.6173811554908752, "learning_rate": 1.687122528310273e-05, "loss": 0.0951, "step": 21619 }, { "epoch": 0.47640295933938204, "grad_norm": 0.7524383664131165, "learning_rate": 1.687016310569715e-05, "loss": 0.1082, "step": 21620 }, { "epoch": 0.4764249946288982, "grad_norm": 0.8618290424346924, "learning_rate": 1.686910091876582e-05, "loss": 0.0788, "step": 21621 }, { "epoch": 0.47644702991841437, "grad_norm": 0.7500461935997009, "learning_rate": 1.686803872231417e-05, "loss": 0.0611, "step": 21622 }, { "epoch": 0.4764690652079305, "grad_norm": 0.608942449092865, "learning_rate": 1.686697651634759e-05, "loss": 0.0858, "step": 21623 }, { "epoch": 0.47649110049744664, "grad_norm": 0.6240226030349731, "learning_rate": 1.68659143008715e-05, "loss": 0.0762, "step": 21624 }, { "epoch": 0.4765131357869628, "grad_norm": 0.643377959728241, "learning_rate": 1.6864852075891307e-05, "loss": 0.0738, "step": 21625 }, { "epoch": 0.47653517107647897, "grad_norm": 0.7158366441726685, "learning_rate": 1.6863789841412422e-05, "loss": 0.0869, "step": 21626 }, { "epoch": 0.47655720636599513, "grad_norm": 0.6735316514968872, "learning_rate": 1.6862727597440253e-05, "loss": 0.0533, "step": 21627 }, { "epoch": 0.4765792416555113, "grad_norm": 0.5418354272842407, "learning_rate": 1.6861665343980215e-05, "loss": 0.0718, "step": 21628 }, { "epoch": 0.47660127694502746, "grad_norm": 0.5041751861572266, "learning_rate": 1.6860603081037716e-05, "loss": 0.0658, "step": 21629 }, { "epoch": 0.47662331223454363, "grad_norm": 1.0328469276428223, "learning_rate": 1.6859540808618173e-05, "loss": 0.1022, "step": 21630 }, { "epoch": 0.4766453475240598, "grad_norm": 0.8275058269500732, "learning_rate": 1.6858478526726985e-05, "loss": 0.1167, "step": 21631 }, { "epoch": 0.47666738281357596, "grad_norm": 0.2638387382030487, "learning_rate": 1.6857416235369578e-05, "loss": 0.066, "step": 21632 }, { "epoch": 0.4766894181030921, "grad_norm": 0.5420846939086914, "learning_rate": 1.6856353934551344e-05, "loss": 0.0979, "step": 21633 }, { "epoch": 0.4767114533926083, "grad_norm": 0.6321943402290344, "learning_rate": 1.6855291624277712e-05, "loss": 0.0693, "step": 21634 }, { "epoch": 0.4767334886821244, "grad_norm": 0.5524338483810425, "learning_rate": 1.6854229304554082e-05, "loss": 0.071, "step": 21635 }, { "epoch": 0.47675552397164056, "grad_norm": 0.3363710641860962, "learning_rate": 1.6853166975385866e-05, "loss": 0.0797, "step": 21636 }, { "epoch": 0.4767775592611567, "grad_norm": 0.5215063691139221, "learning_rate": 1.6852104636778477e-05, "loss": 0.0604, "step": 21637 }, { "epoch": 0.4767995945506729, "grad_norm": 0.5214987993240356, "learning_rate": 1.6851042288737333e-05, "loss": 0.0916, "step": 21638 }, { "epoch": 0.47682162984018905, "grad_norm": 0.8675636053085327, "learning_rate": 1.6849979931267828e-05, "loss": 0.1194, "step": 21639 }, { "epoch": 0.4768436651297052, "grad_norm": 0.49225756525993347, "learning_rate": 1.6848917564375387e-05, "loss": 0.0894, "step": 21640 }, { "epoch": 0.4768657004192214, "grad_norm": 0.6607183218002319, "learning_rate": 1.6847855188065416e-05, "loss": 0.0998, "step": 21641 }, { "epoch": 0.47688773570873755, "grad_norm": 0.7223244309425354, "learning_rate": 1.684679280234333e-05, "loss": 0.0987, "step": 21642 }, { "epoch": 0.4769097709982537, "grad_norm": 0.6988270878791809, "learning_rate": 1.684573040721454e-05, "loss": 0.0965, "step": 21643 }, { "epoch": 0.4769318062877699, "grad_norm": 0.7642249464988708, "learning_rate": 1.684466800268445e-05, "loss": 0.075, "step": 21644 }, { "epoch": 0.47695384157728604, "grad_norm": 0.48161616921424866, "learning_rate": 1.684360558875848e-05, "loss": 0.0615, "step": 21645 }, { "epoch": 0.4769758768668022, "grad_norm": 0.9706997275352478, "learning_rate": 1.684254316544204e-05, "loss": 0.1049, "step": 21646 }, { "epoch": 0.47699791215631837, "grad_norm": 0.8305289149284363, "learning_rate": 1.6841480732740535e-05, "loss": 0.0999, "step": 21647 }, { "epoch": 0.4770199474458345, "grad_norm": 0.8553767800331116, "learning_rate": 1.684041829065938e-05, "loss": 0.104, "step": 21648 }, { "epoch": 0.47704198273535064, "grad_norm": 0.7598599791526794, "learning_rate": 1.6839355839203995e-05, "loss": 0.0724, "step": 21649 }, { "epoch": 0.4770640180248668, "grad_norm": 0.8483920693397522, "learning_rate": 1.683829337837978e-05, "loss": 0.0872, "step": 21650 }, { "epoch": 0.477086053314383, "grad_norm": 0.6674401164054871, "learning_rate": 1.6837230908192152e-05, "loss": 0.0741, "step": 21651 }, { "epoch": 0.47710808860389914, "grad_norm": 0.4808008074760437, "learning_rate": 1.683616842864652e-05, "loss": 0.0456, "step": 21652 }, { "epoch": 0.4771301238934153, "grad_norm": 0.5535100698471069, "learning_rate": 1.6835105939748297e-05, "loss": 0.0817, "step": 21653 }, { "epoch": 0.47715215918293147, "grad_norm": 0.8638279438018799, "learning_rate": 1.68340434415029e-05, "loss": 0.0868, "step": 21654 }, { "epoch": 0.47717419447244763, "grad_norm": 1.1389838457107544, "learning_rate": 1.6832980933915734e-05, "loss": 0.0678, "step": 21655 }, { "epoch": 0.4771962297619638, "grad_norm": 0.8153031468391418, "learning_rate": 1.683191841699221e-05, "loss": 0.0664, "step": 21656 }, { "epoch": 0.47721826505147996, "grad_norm": 0.35505953431129456, "learning_rate": 1.6830855890737745e-05, "loss": 0.0649, "step": 21657 }, { "epoch": 0.4772403003409961, "grad_norm": 0.721173107624054, "learning_rate": 1.682979335515775e-05, "loss": 0.1018, "step": 21658 }, { "epoch": 0.4772623356305123, "grad_norm": 0.9277998208999634, "learning_rate": 1.682873081025764e-05, "loss": 0.087, "step": 21659 }, { "epoch": 0.4772843709200284, "grad_norm": 0.7965293526649475, "learning_rate": 1.6827668256042817e-05, "loss": 0.1177, "step": 21660 }, { "epoch": 0.47730640620954456, "grad_norm": 0.8421942591667175, "learning_rate": 1.6826605692518704e-05, "loss": 0.0608, "step": 21661 }, { "epoch": 0.47732844149906073, "grad_norm": 0.40258970856666565, "learning_rate": 1.6825543119690703e-05, "loss": 0.0575, "step": 21662 }, { "epoch": 0.4773504767885769, "grad_norm": 1.0641515254974365, "learning_rate": 1.6824480537564235e-05, "loss": 0.118, "step": 21663 }, { "epoch": 0.47737251207809306, "grad_norm": 0.9184114933013916, "learning_rate": 1.682341794614471e-05, "loss": 0.0744, "step": 21664 }, { "epoch": 0.4773945473676092, "grad_norm": 0.7435259222984314, "learning_rate": 1.6822355345437543e-05, "loss": 0.0742, "step": 21665 }, { "epoch": 0.4774165826571254, "grad_norm": 1.1884679794311523, "learning_rate": 1.682129273544814e-05, "loss": 0.1006, "step": 21666 }, { "epoch": 0.47743861794664155, "grad_norm": 0.5419491529464722, "learning_rate": 1.6820230116181913e-05, "loss": 0.0949, "step": 21667 }, { "epoch": 0.4774606532361577, "grad_norm": 0.5517879724502563, "learning_rate": 1.6819167487644284e-05, "loss": 0.0984, "step": 21668 }, { "epoch": 0.4774826885256739, "grad_norm": 0.9142580628395081, "learning_rate": 1.6818104849840653e-05, "loss": 0.0985, "step": 21669 }, { "epoch": 0.47750472381519005, "grad_norm": 1.017521858215332, "learning_rate": 1.6817042202776445e-05, "loss": 0.12, "step": 21670 }, { "epoch": 0.4775267591047062, "grad_norm": 0.6462418437004089, "learning_rate": 1.681597954645706e-05, "loss": 0.0732, "step": 21671 }, { "epoch": 0.4775487943942223, "grad_norm": 0.6442764401435852, "learning_rate": 1.6814916880887923e-05, "loss": 0.0967, "step": 21672 }, { "epoch": 0.4775708296837385, "grad_norm": 0.3349776268005371, "learning_rate": 1.681385420607444e-05, "loss": 0.0536, "step": 21673 }, { "epoch": 0.47759286497325465, "grad_norm": 0.6687940955162048, "learning_rate": 1.6812791522022026e-05, "loss": 0.0822, "step": 21674 }, { "epoch": 0.4776149002627708, "grad_norm": 0.6848248243331909, "learning_rate": 1.681172882873609e-05, "loss": 0.0707, "step": 21675 }, { "epoch": 0.477636935552287, "grad_norm": 0.7654820680618286, "learning_rate": 1.681066612622205e-05, "loss": 0.1074, "step": 21676 }, { "epoch": 0.47765897084180314, "grad_norm": 0.9413226842880249, "learning_rate": 1.6809603414485314e-05, "loss": 0.0851, "step": 21677 }, { "epoch": 0.4776810061313193, "grad_norm": 0.7227296233177185, "learning_rate": 1.6808540693531298e-05, "loss": 0.0895, "step": 21678 }, { "epoch": 0.4777030414208355, "grad_norm": 0.531792402267456, "learning_rate": 1.6807477963365413e-05, "loss": 0.0434, "step": 21679 }, { "epoch": 0.47772507671035164, "grad_norm": 0.43968844413757324, "learning_rate": 1.6806415223993075e-05, "loss": 0.0613, "step": 21680 }, { "epoch": 0.4777471119998678, "grad_norm": 0.48349297046661377, "learning_rate": 1.68053524754197e-05, "loss": 0.0921, "step": 21681 }, { "epoch": 0.47776914728938397, "grad_norm": 0.5907810926437378, "learning_rate": 1.680428971765069e-05, "loss": 0.09, "step": 21682 }, { "epoch": 0.47779118257890013, "grad_norm": 0.8188618421554565, "learning_rate": 1.6803226950691467e-05, "loss": 0.0692, "step": 21683 }, { "epoch": 0.4778132178684163, "grad_norm": 0.538908064365387, "learning_rate": 1.6802164174547444e-05, "loss": 0.1001, "step": 21684 }, { "epoch": 0.4778352531579324, "grad_norm": 0.627518892288208, "learning_rate": 1.6801101389224033e-05, "loss": 0.0586, "step": 21685 }, { "epoch": 0.47785728844744857, "grad_norm": 0.4899823069572449, "learning_rate": 1.6800038594726643e-05, "loss": 0.0603, "step": 21686 }, { "epoch": 0.47787932373696473, "grad_norm": 0.3230634927749634, "learning_rate": 1.6798975791060693e-05, "loss": 0.0384, "step": 21687 }, { "epoch": 0.4779013590264809, "grad_norm": 0.5917831659317017, "learning_rate": 1.679791297823159e-05, "loss": 0.0565, "step": 21688 }, { "epoch": 0.47792339431599706, "grad_norm": 0.6817374229431152, "learning_rate": 1.679685015624476e-05, "loss": 0.0702, "step": 21689 }, { "epoch": 0.47794542960551323, "grad_norm": 0.6839963793754578, "learning_rate": 1.6795787325105608e-05, "loss": 0.1029, "step": 21690 }, { "epoch": 0.4779674648950294, "grad_norm": 0.6240924000740051, "learning_rate": 1.6794724484819547e-05, "loss": 0.083, "step": 21691 }, { "epoch": 0.47798950018454556, "grad_norm": 0.5651339292526245, "learning_rate": 1.6793661635391985e-05, "loss": 0.0927, "step": 21692 }, { "epoch": 0.4780115354740617, "grad_norm": 0.641160249710083, "learning_rate": 1.6792598776828353e-05, "loss": 0.0962, "step": 21693 }, { "epoch": 0.4780335707635779, "grad_norm": 0.5724369287490845, "learning_rate": 1.6791535909134044e-05, "loss": 0.0655, "step": 21694 }, { "epoch": 0.47805560605309405, "grad_norm": 0.4294256269931793, "learning_rate": 1.6790473032314487e-05, "loss": 0.0706, "step": 21695 }, { "epoch": 0.4780776413426102, "grad_norm": 1.1358736753463745, "learning_rate": 1.6789410146375092e-05, "loss": 0.1007, "step": 21696 }, { "epoch": 0.4780996766321263, "grad_norm": 0.75187748670578, "learning_rate": 1.678834725132127e-05, "loss": 0.1036, "step": 21697 }, { "epoch": 0.4781217119216425, "grad_norm": 0.4746703505516052, "learning_rate": 1.678728434715844e-05, "loss": 0.1003, "step": 21698 }, { "epoch": 0.47814374721115865, "grad_norm": 0.32661622762680054, "learning_rate": 1.678622143389201e-05, "loss": 0.064, "step": 21699 }, { "epoch": 0.4781657825006748, "grad_norm": 0.4709968566894531, "learning_rate": 1.6785158511527393e-05, "loss": 0.0626, "step": 21700 }, { "epoch": 0.478187817790191, "grad_norm": 0.5075064301490784, "learning_rate": 1.6784095580070013e-05, "loss": 0.0766, "step": 21701 }, { "epoch": 0.47820985307970715, "grad_norm": 0.6490486264228821, "learning_rate": 1.678303263952527e-05, "loss": 0.0729, "step": 21702 }, { "epoch": 0.4782318883692233, "grad_norm": 0.5172439217567444, "learning_rate": 1.678196968989859e-05, "loss": 0.0692, "step": 21703 }, { "epoch": 0.4782539236587395, "grad_norm": 0.711062490940094, "learning_rate": 1.678090673119538e-05, "loss": 0.0621, "step": 21704 }, { "epoch": 0.47827595894825564, "grad_norm": 0.7653862833976746, "learning_rate": 1.677984376342106e-05, "loss": 0.0905, "step": 21705 }, { "epoch": 0.4782979942377718, "grad_norm": 0.6546107530593872, "learning_rate": 1.677878078658104e-05, "loss": 0.0768, "step": 21706 }, { "epoch": 0.47832002952728797, "grad_norm": 0.4135285019874573, "learning_rate": 1.677771780068074e-05, "loss": 0.0763, "step": 21707 }, { "epoch": 0.47834206481680414, "grad_norm": 0.5167863368988037, "learning_rate": 1.6776654805725562e-05, "loss": 0.0752, "step": 21708 }, { "epoch": 0.47836410010632024, "grad_norm": 0.7181898355484009, "learning_rate": 1.6775591801720934e-05, "loss": 0.0892, "step": 21709 }, { "epoch": 0.4783861353958364, "grad_norm": 0.45835375785827637, "learning_rate": 1.6774528788672262e-05, "loss": 0.0714, "step": 21710 }, { "epoch": 0.4784081706853526, "grad_norm": 0.8559809923171997, "learning_rate": 1.6773465766584962e-05, "loss": 0.076, "step": 21711 }, { "epoch": 0.47843020597486874, "grad_norm": 0.7187997698783875, "learning_rate": 1.6772402735464452e-05, "loss": 0.0648, "step": 21712 }, { "epoch": 0.4784522412643849, "grad_norm": 0.9977641105651855, "learning_rate": 1.677133969531615e-05, "loss": 0.1087, "step": 21713 }, { "epoch": 0.47847427655390107, "grad_norm": 0.5465192198753357, "learning_rate": 1.6770276646145455e-05, "loss": 0.0869, "step": 21714 }, { "epoch": 0.47849631184341723, "grad_norm": 0.5878679156303406, "learning_rate": 1.6769213587957794e-05, "loss": 0.0806, "step": 21715 }, { "epoch": 0.4785183471329334, "grad_norm": 0.6775383353233337, "learning_rate": 1.6768150520758587e-05, "loss": 0.0905, "step": 21716 }, { "epoch": 0.47854038242244956, "grad_norm": 0.6673884987831116, "learning_rate": 1.6767087444553233e-05, "loss": 0.0856, "step": 21717 }, { "epoch": 0.4785624177119657, "grad_norm": 0.5781629681587219, "learning_rate": 1.676602435934716e-05, "loss": 0.0743, "step": 21718 }, { "epoch": 0.4785844530014819, "grad_norm": 0.796560525894165, "learning_rate": 1.676496126514577e-05, "loss": 0.0648, "step": 21719 }, { "epoch": 0.47860648829099806, "grad_norm": 0.5292065143585205, "learning_rate": 1.6763898161954497e-05, "loss": 0.0808, "step": 21720 }, { "epoch": 0.4786285235805142, "grad_norm": 0.8250377178192139, "learning_rate": 1.676283504977874e-05, "loss": 0.0773, "step": 21721 }, { "epoch": 0.47865055887003033, "grad_norm": 0.7597025632858276, "learning_rate": 1.676177192862392e-05, "loss": 0.0412, "step": 21722 }, { "epoch": 0.4786725941595465, "grad_norm": 0.462670236825943, "learning_rate": 1.6760708798495448e-05, "loss": 0.0693, "step": 21723 }, { "epoch": 0.47869462944906266, "grad_norm": 0.6116178035736084, "learning_rate": 1.6759645659398742e-05, "loss": 0.0888, "step": 21724 }, { "epoch": 0.4787166647385788, "grad_norm": 0.6942344903945923, "learning_rate": 1.6758582511339218e-05, "loss": 0.078, "step": 21725 }, { "epoch": 0.478738700028095, "grad_norm": 0.9538866281509399, "learning_rate": 1.675751935432229e-05, "loss": 0.0948, "step": 21726 }, { "epoch": 0.47876073531761115, "grad_norm": 0.6557403802871704, "learning_rate": 1.6756456188353372e-05, "loss": 0.059, "step": 21727 }, { "epoch": 0.4787827706071273, "grad_norm": 0.6064855456352234, "learning_rate": 1.6755393013437883e-05, "loss": 0.0429, "step": 21728 }, { "epoch": 0.4788048058966435, "grad_norm": 0.7776321768760681, "learning_rate": 1.6754329829581242e-05, "loss": 0.0857, "step": 21729 }, { "epoch": 0.47882684118615965, "grad_norm": 0.8045376539230347, "learning_rate": 1.675326663678885e-05, "loss": 0.097, "step": 21730 }, { "epoch": 0.4788488764756758, "grad_norm": 0.6725395321846008, "learning_rate": 1.6752203435066134e-05, "loss": 0.1053, "step": 21731 }, { "epoch": 0.478870911765192, "grad_norm": 0.5897374749183655, "learning_rate": 1.6751140224418502e-05, "loss": 0.0845, "step": 21732 }, { "epoch": 0.47889294705470814, "grad_norm": 0.9959542155265808, "learning_rate": 1.6750077004851376e-05, "loss": 0.1131, "step": 21733 }, { "epoch": 0.47891498234422425, "grad_norm": 0.9008551836013794, "learning_rate": 1.674901377637017e-05, "loss": 0.103, "step": 21734 }, { "epoch": 0.4789370176337404, "grad_norm": 0.8120644688606262, "learning_rate": 1.67479505389803e-05, "loss": 0.1017, "step": 21735 }, { "epoch": 0.4789590529232566, "grad_norm": 0.5955781936645508, "learning_rate": 1.6746887292687178e-05, "loss": 0.0968, "step": 21736 }, { "epoch": 0.47898108821277274, "grad_norm": 0.670871376991272, "learning_rate": 1.674582403749623e-05, "loss": 0.0987, "step": 21737 }, { "epoch": 0.4790031235022889, "grad_norm": 0.5007874965667725, "learning_rate": 1.6744760773412855e-05, "loss": 0.0703, "step": 21738 }, { "epoch": 0.4790251587918051, "grad_norm": 0.7523461580276489, "learning_rate": 1.6743697500442483e-05, "loss": 0.0885, "step": 21739 }, { "epoch": 0.47904719408132124, "grad_norm": 0.5932711958885193, "learning_rate": 1.674263421859052e-05, "loss": 0.0749, "step": 21740 }, { "epoch": 0.4790692293708374, "grad_norm": 0.7006412744522095, "learning_rate": 1.674157092786239e-05, "loss": 0.1016, "step": 21741 }, { "epoch": 0.47909126466035357, "grad_norm": 0.5839275121688843, "learning_rate": 1.6740507628263503e-05, "loss": 0.1098, "step": 21742 }, { "epoch": 0.47911329994986973, "grad_norm": 0.8617119193077087, "learning_rate": 1.6739444319799283e-05, "loss": 0.1177, "step": 21743 }, { "epoch": 0.4791353352393859, "grad_norm": 0.6059184670448303, "learning_rate": 1.6738381002475132e-05, "loss": 0.1388, "step": 21744 }, { "epoch": 0.47915737052890206, "grad_norm": 0.6090903282165527, "learning_rate": 1.6737317676296482e-05, "loss": 0.0802, "step": 21745 }, { "epoch": 0.47917940581841817, "grad_norm": 0.6162145137786865, "learning_rate": 1.6736254341268736e-05, "loss": 0.0893, "step": 21746 }, { "epoch": 0.47920144110793433, "grad_norm": 1.117139458656311, "learning_rate": 1.673519099739732e-05, "loss": 0.0863, "step": 21747 }, { "epoch": 0.4792234763974505, "grad_norm": 0.4705543518066406, "learning_rate": 1.673412764468764e-05, "loss": 0.0722, "step": 21748 }, { "epoch": 0.47924551168696666, "grad_norm": 0.3022020757198334, "learning_rate": 1.6733064283145122e-05, "loss": 0.0822, "step": 21749 }, { "epoch": 0.47926754697648283, "grad_norm": 0.3590555787086487, "learning_rate": 1.6732000912775174e-05, "loss": 0.0541, "step": 21750 }, { "epoch": 0.479289582265999, "grad_norm": 0.6192548274993896, "learning_rate": 1.6730937533583225e-05, "loss": 0.0899, "step": 21751 }, { "epoch": 0.47931161755551516, "grad_norm": 0.500284731388092, "learning_rate": 1.6729874145574677e-05, "loss": 0.0493, "step": 21752 }, { "epoch": 0.4793336528450313, "grad_norm": 0.3565725088119507, "learning_rate": 1.6728810748754952e-05, "loss": 0.0622, "step": 21753 }, { "epoch": 0.4793556881345475, "grad_norm": 0.5681517124176025, "learning_rate": 1.672774734312947e-05, "loss": 0.0746, "step": 21754 }, { "epoch": 0.47937772342406365, "grad_norm": 0.4688090682029724, "learning_rate": 1.672668392870364e-05, "loss": 0.0779, "step": 21755 }, { "epoch": 0.4793997587135798, "grad_norm": 1.1212360858917236, "learning_rate": 1.6725620505482886e-05, "loss": 0.1049, "step": 21756 }, { "epoch": 0.479421794003096, "grad_norm": 0.6487134099006653, "learning_rate": 1.672455707347262e-05, "loss": 0.136, "step": 21757 }, { "epoch": 0.47944382929261214, "grad_norm": 0.8811782002449036, "learning_rate": 1.672349363267826e-05, "loss": 0.0696, "step": 21758 }, { "epoch": 0.47946586458212825, "grad_norm": 1.8798617124557495, "learning_rate": 1.6722430183105223e-05, "loss": 0.066, "step": 21759 }, { "epoch": 0.4794878998716444, "grad_norm": 0.6593592166900635, "learning_rate": 1.6721366724758926e-05, "loss": 0.0799, "step": 21760 }, { "epoch": 0.4795099351611606, "grad_norm": 0.43028727173805237, "learning_rate": 1.6720303257644788e-05, "loss": 0.0793, "step": 21761 }, { "epoch": 0.47953197045067675, "grad_norm": 1.081879734992981, "learning_rate": 1.6719239781768223e-05, "loss": 0.1044, "step": 21762 }, { "epoch": 0.4795540057401929, "grad_norm": 0.5258978009223938, "learning_rate": 1.671817629713464e-05, "loss": 0.0623, "step": 21763 }, { "epoch": 0.4795760410297091, "grad_norm": 0.8669946789741516, "learning_rate": 1.6717112803749468e-05, "loss": 0.0773, "step": 21764 }, { "epoch": 0.47959807631922524, "grad_norm": 0.9754372239112854, "learning_rate": 1.671604930161812e-05, "loss": 0.092, "step": 21765 }, { "epoch": 0.4796201116087414, "grad_norm": 0.58317631483078, "learning_rate": 1.6714985790746016e-05, "loss": 0.053, "step": 21766 }, { "epoch": 0.47964214689825757, "grad_norm": 0.662013053894043, "learning_rate": 1.6713922271138563e-05, "loss": 0.0781, "step": 21767 }, { "epoch": 0.47966418218777374, "grad_norm": 0.7797431349754333, "learning_rate": 1.671285874280119e-05, "loss": 0.0691, "step": 21768 }, { "epoch": 0.4796862174772899, "grad_norm": 0.6793907284736633, "learning_rate": 1.671179520573931e-05, "loss": 0.044, "step": 21769 }, { "epoch": 0.47970825276680606, "grad_norm": 0.4889509379863739, "learning_rate": 1.671073165995834e-05, "loss": 0.0989, "step": 21770 }, { "epoch": 0.4797302880563222, "grad_norm": 0.6040675640106201, "learning_rate": 1.670966810546369e-05, "loss": 0.0921, "step": 21771 }, { "epoch": 0.47975232334583834, "grad_norm": 0.6704245805740356, "learning_rate": 1.6708604542260788e-05, "loss": 0.1025, "step": 21772 }, { "epoch": 0.4797743586353545, "grad_norm": 0.6031422019004822, "learning_rate": 1.670754097035504e-05, "loss": 0.0713, "step": 21773 }, { "epoch": 0.47979639392487067, "grad_norm": 0.39442095160484314, "learning_rate": 1.6706477389751877e-05, "loss": 0.0715, "step": 21774 }, { "epoch": 0.47981842921438683, "grad_norm": 0.9452651739120483, "learning_rate": 1.6705413800456706e-05, "loss": 0.0734, "step": 21775 }, { "epoch": 0.479840464503903, "grad_norm": 0.4225628972053528, "learning_rate": 1.670435020247495e-05, "loss": 0.0795, "step": 21776 }, { "epoch": 0.47986249979341916, "grad_norm": 0.5103094577789307, "learning_rate": 1.670328659581203e-05, "loss": 0.1054, "step": 21777 }, { "epoch": 0.4798845350829353, "grad_norm": 0.6178615689277649, "learning_rate": 1.670222298047335e-05, "loss": 0.1049, "step": 21778 }, { "epoch": 0.4799065703724515, "grad_norm": 0.65900057554245, "learning_rate": 1.6701159356464336e-05, "loss": 0.0553, "step": 21779 }, { "epoch": 0.47992860566196766, "grad_norm": 0.5010220408439636, "learning_rate": 1.6700095723790408e-05, "loss": 0.0773, "step": 21780 }, { "epoch": 0.4799506409514838, "grad_norm": 1.0772455930709839, "learning_rate": 1.6699032082456982e-05, "loss": 0.0966, "step": 21781 }, { "epoch": 0.479972676241, "grad_norm": 0.8234732151031494, "learning_rate": 1.669796843246947e-05, "loss": 0.0632, "step": 21782 }, { "epoch": 0.4799947115305161, "grad_norm": 0.6087550520896912, "learning_rate": 1.66969047738333e-05, "loss": 0.0926, "step": 21783 }, { "epoch": 0.48001674682003226, "grad_norm": 1.1818121671676636, "learning_rate": 1.669584110655388e-05, "loss": 0.0788, "step": 21784 }, { "epoch": 0.4800387821095484, "grad_norm": 0.4816778898239136, "learning_rate": 1.6694777430636635e-05, "loss": 0.064, "step": 21785 }, { "epoch": 0.4800608173990646, "grad_norm": 0.5124028325080872, "learning_rate": 1.6693713746086974e-05, "loss": 0.052, "step": 21786 }, { "epoch": 0.48008285268858075, "grad_norm": 0.5418612360954285, "learning_rate": 1.6692650052910323e-05, "loss": 0.0815, "step": 21787 }, { "epoch": 0.4801048879780969, "grad_norm": 0.6267340183258057, "learning_rate": 1.6691586351112097e-05, "loss": 0.0645, "step": 21788 }, { "epoch": 0.4801269232676131, "grad_norm": 0.6069766283035278, "learning_rate": 1.669052264069772e-05, "loss": 0.086, "step": 21789 }, { "epoch": 0.48014895855712925, "grad_norm": 0.6079531311988831, "learning_rate": 1.6689458921672598e-05, "loss": 0.108, "step": 21790 }, { "epoch": 0.4801709938466454, "grad_norm": 0.6872689723968506, "learning_rate": 1.6688395194042157e-05, "loss": 0.0636, "step": 21791 }, { "epoch": 0.4801930291361616, "grad_norm": 0.8068956136703491, "learning_rate": 1.6687331457811815e-05, "loss": 0.0988, "step": 21792 }, { "epoch": 0.48021506442567774, "grad_norm": 0.33877596259117126, "learning_rate": 1.6686267712986998e-05, "loss": 0.0707, "step": 21793 }, { "epoch": 0.4802370997151939, "grad_norm": 0.5694901347160339, "learning_rate": 1.66852039595731e-05, "loss": 0.0564, "step": 21794 }, { "epoch": 0.48025913500471007, "grad_norm": 0.6888086199760437, "learning_rate": 1.6684140197575566e-05, "loss": 0.07, "step": 21795 }, { "epoch": 0.4802811702942262, "grad_norm": 1.116105318069458, "learning_rate": 1.6683076426999796e-05, "loss": 0.0725, "step": 21796 }, { "epoch": 0.48030320558374234, "grad_norm": 0.6438319683074951, "learning_rate": 1.668201264785122e-05, "loss": 0.0574, "step": 21797 }, { "epoch": 0.4803252408732585, "grad_norm": 0.5212684273719788, "learning_rate": 1.6680948860135245e-05, "loss": 0.0677, "step": 21798 }, { "epoch": 0.4803472761627747, "grad_norm": 0.4840582013130188, "learning_rate": 1.6679885063857297e-05, "loss": 0.0587, "step": 21799 }, { "epoch": 0.48036931145229084, "grad_norm": 0.9058149456977844, "learning_rate": 1.66788212590228e-05, "loss": 0.0791, "step": 21800 }, { "epoch": 0.480391346741807, "grad_norm": 1.1979018449783325, "learning_rate": 1.6677757445637163e-05, "loss": 0.0866, "step": 21801 }, { "epoch": 0.48041338203132317, "grad_norm": 0.9440469741821289, "learning_rate": 1.6676693623705812e-05, "loss": 0.0952, "step": 21802 }, { "epoch": 0.48043541732083933, "grad_norm": 0.5235162377357483, "learning_rate": 1.6675629793234154e-05, "loss": 0.0543, "step": 21803 }, { "epoch": 0.4804574526103555, "grad_norm": 0.9392902851104736, "learning_rate": 1.667456595422762e-05, "loss": 0.0838, "step": 21804 }, { "epoch": 0.48047948789987166, "grad_norm": 0.6336917281150818, "learning_rate": 1.6673502106691617e-05, "loss": 0.0769, "step": 21805 }, { "epoch": 0.4805015231893878, "grad_norm": 0.6138337254524231, "learning_rate": 1.6672438250631578e-05, "loss": 0.0881, "step": 21806 }, { "epoch": 0.480523558478904, "grad_norm": 0.5755981206893921, "learning_rate": 1.6671374386052908e-05, "loss": 0.1057, "step": 21807 }, { "epoch": 0.4805455937684201, "grad_norm": 0.6289457678794861, "learning_rate": 1.6670310512961038e-05, "loss": 0.1016, "step": 21808 }, { "epoch": 0.48056762905793626, "grad_norm": 0.6347538828849792, "learning_rate": 1.6669246631361376e-05, "loss": 0.0725, "step": 21809 }, { "epoch": 0.4805896643474524, "grad_norm": 0.6767627000808716, "learning_rate": 1.6668182741259347e-05, "loss": 0.095, "step": 21810 }, { "epoch": 0.4806116996369686, "grad_norm": 0.6662485003471375, "learning_rate": 1.6667118842660368e-05, "loss": 0.0983, "step": 21811 }, { "epoch": 0.48063373492648476, "grad_norm": 0.7548395395278931, "learning_rate": 1.6666054935569863e-05, "loss": 0.0917, "step": 21812 }, { "epoch": 0.4806557702160009, "grad_norm": 0.5329878926277161, "learning_rate": 1.6664991019993243e-05, "loss": 0.0539, "step": 21813 }, { "epoch": 0.4806778055055171, "grad_norm": 0.551024854183197, "learning_rate": 1.6663927095935933e-05, "loss": 0.0858, "step": 21814 }, { "epoch": 0.48069984079503325, "grad_norm": 0.8782761096954346, "learning_rate": 1.6662863163403343e-05, "loss": 0.082, "step": 21815 }, { "epoch": 0.4807218760845494, "grad_norm": 0.7486280202865601, "learning_rate": 1.666179922240091e-05, "loss": 0.0738, "step": 21816 }, { "epoch": 0.4807439113740656, "grad_norm": 0.4738186299800873, "learning_rate": 1.6660735272934036e-05, "loss": 0.0721, "step": 21817 }, { "epoch": 0.48076594666358174, "grad_norm": 0.9292901754379272, "learning_rate": 1.665967131500815e-05, "loss": 0.0877, "step": 21818 }, { "epoch": 0.4807879819530979, "grad_norm": 0.5322051644325256, "learning_rate": 1.6658607348628664e-05, "loss": 0.0507, "step": 21819 }, { "epoch": 0.4808100172426141, "grad_norm": 0.864497184753418, "learning_rate": 1.6657543373801005e-05, "loss": 0.09, "step": 21820 }, { "epoch": 0.4808320525321302, "grad_norm": 0.3969947397708893, "learning_rate": 1.6656479390530583e-05, "loss": 0.0548, "step": 21821 }, { "epoch": 0.48085408782164635, "grad_norm": 0.7619695067405701, "learning_rate": 1.6655415398822828e-05, "loss": 0.0944, "step": 21822 }, { "epoch": 0.4808761231111625, "grad_norm": 0.8736310601234436, "learning_rate": 1.6654351398683155e-05, "loss": 0.0781, "step": 21823 }, { "epoch": 0.4808981584006787, "grad_norm": 0.7230688333511353, "learning_rate": 1.665328739011698e-05, "loss": 0.0669, "step": 21824 }, { "epoch": 0.48092019369019484, "grad_norm": 0.588503360748291, "learning_rate": 1.6652223373129732e-05, "loss": 0.0649, "step": 21825 }, { "epoch": 0.480942228979711, "grad_norm": 0.6989320516586304, "learning_rate": 1.665115934772682e-05, "loss": 0.0769, "step": 21826 }, { "epoch": 0.48096426426922717, "grad_norm": 0.7632246613502502, "learning_rate": 1.6650095313913668e-05, "loss": 0.083, "step": 21827 }, { "epoch": 0.48098629955874334, "grad_norm": 0.6302728056907654, "learning_rate": 1.6649031271695695e-05, "loss": 0.0538, "step": 21828 }, { "epoch": 0.4810083348482595, "grad_norm": 0.594123363494873, "learning_rate": 1.664796722107832e-05, "loss": 0.0458, "step": 21829 }, { "epoch": 0.48103037013777566, "grad_norm": 0.40000373125076294, "learning_rate": 1.6646903162066967e-05, "loss": 0.0652, "step": 21830 }, { "epoch": 0.48105240542729183, "grad_norm": 0.6626507639884949, "learning_rate": 1.6645839094667054e-05, "loss": 0.0881, "step": 21831 }, { "epoch": 0.481074440716808, "grad_norm": 0.47252076864242554, "learning_rate": 1.6644775018884002e-05, "loss": 0.0756, "step": 21832 }, { "epoch": 0.4810964760063241, "grad_norm": 0.8259348273277283, "learning_rate": 1.6643710934723228e-05, "loss": 0.0908, "step": 21833 }, { "epoch": 0.48111851129584027, "grad_norm": 0.6408388614654541, "learning_rate": 1.6642646842190146e-05, "loss": 0.0387, "step": 21834 }, { "epoch": 0.48114054658535643, "grad_norm": 0.5864738821983337, "learning_rate": 1.664158274129019e-05, "loss": 0.0908, "step": 21835 }, { "epoch": 0.4811625818748726, "grad_norm": 0.7191396951675415, "learning_rate": 1.6640518632028765e-05, "loss": 0.0739, "step": 21836 }, { "epoch": 0.48118461716438876, "grad_norm": 0.7669519782066345, "learning_rate": 1.6639454514411308e-05, "loss": 0.0813, "step": 21837 }, { "epoch": 0.4812066524539049, "grad_norm": 0.9482479691505432, "learning_rate": 1.6638390388443224e-05, "loss": 0.0868, "step": 21838 }, { "epoch": 0.4812286877434211, "grad_norm": 0.47025352716445923, "learning_rate": 1.663732625412994e-05, "loss": 0.0461, "step": 21839 }, { "epoch": 0.48125072303293726, "grad_norm": 0.48689278960227966, "learning_rate": 1.663626211147688e-05, "loss": 0.0772, "step": 21840 }, { "epoch": 0.4812727583224534, "grad_norm": 1.0969210863113403, "learning_rate": 1.6635197960489458e-05, "loss": 0.0638, "step": 21841 }, { "epoch": 0.4812947936119696, "grad_norm": 0.7451611161231995, "learning_rate": 1.663413380117309e-05, "loss": 0.0883, "step": 21842 }, { "epoch": 0.48131682890148575, "grad_norm": 0.8851346373558044, "learning_rate": 1.6633069633533208e-05, "loss": 0.1179, "step": 21843 }, { "epoch": 0.4813388641910019, "grad_norm": 0.6103817820549011, "learning_rate": 1.663200545757522e-05, "loss": 0.0652, "step": 21844 }, { "epoch": 0.481360899480518, "grad_norm": 0.9043713808059692, "learning_rate": 1.6630941273304558e-05, "loss": 0.0913, "step": 21845 }, { "epoch": 0.4813829347700342, "grad_norm": 0.7557879090309143, "learning_rate": 1.662987708072664e-05, "loss": 0.1016, "step": 21846 }, { "epoch": 0.48140497005955035, "grad_norm": 0.749559760093689, "learning_rate": 1.662881287984688e-05, "loss": 0.0769, "step": 21847 }, { "epoch": 0.4814270053490665, "grad_norm": 0.9171212911605835, "learning_rate": 1.6627748670670704e-05, "loss": 0.0921, "step": 21848 }, { "epoch": 0.4814490406385827, "grad_norm": 1.024139642715454, "learning_rate": 1.6626684453203533e-05, "loss": 0.0759, "step": 21849 }, { "epoch": 0.48147107592809885, "grad_norm": 0.14410892128944397, "learning_rate": 1.6625620227450786e-05, "loss": 0.0487, "step": 21850 }, { "epoch": 0.481493111217615, "grad_norm": 0.7676162719726562, "learning_rate": 1.6624555993417876e-05, "loss": 0.1007, "step": 21851 }, { "epoch": 0.4815151465071312, "grad_norm": 0.742073118686676, "learning_rate": 1.662349175111024e-05, "loss": 0.0598, "step": 21852 }, { "epoch": 0.48153718179664734, "grad_norm": 0.9745394587516785, "learning_rate": 1.6622427500533286e-05, "loss": 0.0703, "step": 21853 }, { "epoch": 0.4815592170861635, "grad_norm": 0.7434545159339905, "learning_rate": 1.662136324169244e-05, "loss": 0.0647, "step": 21854 }, { "epoch": 0.48158125237567967, "grad_norm": 0.523565948009491, "learning_rate": 1.662029897459312e-05, "loss": 0.075, "step": 21855 }, { "epoch": 0.48160328766519583, "grad_norm": 0.5903003811836243, "learning_rate": 1.6619234699240746e-05, "loss": 0.0831, "step": 21856 }, { "epoch": 0.481625322954712, "grad_norm": 0.6942310929298401, "learning_rate": 1.661817041564075e-05, "loss": 0.0662, "step": 21857 }, { "epoch": 0.4816473582442281, "grad_norm": 0.8445126414299011, "learning_rate": 1.6617106123798538e-05, "loss": 0.0723, "step": 21858 }, { "epoch": 0.48166939353374427, "grad_norm": 0.6754735112190247, "learning_rate": 1.6616041823719538e-05, "loss": 0.0514, "step": 21859 }, { "epoch": 0.48169142882326044, "grad_norm": 1.0203180313110352, "learning_rate": 1.661497751540917e-05, "loss": 0.1138, "step": 21860 }, { "epoch": 0.4817134641127766, "grad_norm": 1.0305322408676147, "learning_rate": 1.6613913198872856e-05, "loss": 0.0634, "step": 21861 }, { "epoch": 0.48173549940229277, "grad_norm": 0.7067091464996338, "learning_rate": 1.6612848874116015e-05, "loss": 0.0842, "step": 21862 }, { "epoch": 0.48175753469180893, "grad_norm": 0.7941879630088806, "learning_rate": 1.6611784541144073e-05, "loss": 0.0715, "step": 21863 }, { "epoch": 0.4817795699813251, "grad_norm": 0.7654287219047546, "learning_rate": 1.6610720199962447e-05, "loss": 0.1072, "step": 21864 }, { "epoch": 0.48180160527084126, "grad_norm": 1.1024876832962036, "learning_rate": 1.6609655850576562e-05, "loss": 0.0845, "step": 21865 }, { "epoch": 0.4818236405603574, "grad_norm": 0.26540425419807434, "learning_rate": 1.6608591492991828e-05, "loss": 0.05, "step": 21866 }, { "epoch": 0.4818456758498736, "grad_norm": 0.5863766074180603, "learning_rate": 1.6607527127213683e-05, "loss": 0.0748, "step": 21867 }, { "epoch": 0.48186771113938975, "grad_norm": 0.6064209342002869, "learning_rate": 1.660646275324754e-05, "loss": 0.0748, "step": 21868 }, { "epoch": 0.4818897464289059, "grad_norm": 0.9223464727401733, "learning_rate": 1.6605398371098815e-05, "loss": 0.0914, "step": 21869 }, { "epoch": 0.481911781718422, "grad_norm": 0.4581642746925354, "learning_rate": 1.6604333980772937e-05, "loss": 0.0548, "step": 21870 }, { "epoch": 0.4819338170079382, "grad_norm": 0.7099100351333618, "learning_rate": 1.660326958227533e-05, "loss": 0.0817, "step": 21871 }, { "epoch": 0.48195585229745436, "grad_norm": 0.6954618096351624, "learning_rate": 1.660220517561141e-05, "loss": 0.062, "step": 21872 }, { "epoch": 0.4819778875869705, "grad_norm": 0.689385175704956, "learning_rate": 1.66011407607866e-05, "loss": 0.0885, "step": 21873 }, { "epoch": 0.4819999228764867, "grad_norm": 0.49697399139404297, "learning_rate": 1.6600076337806316e-05, "loss": 0.0712, "step": 21874 }, { "epoch": 0.48202195816600285, "grad_norm": 0.7993217706680298, "learning_rate": 1.659901190667599e-05, "loss": 0.1224, "step": 21875 }, { "epoch": 0.482043993455519, "grad_norm": 0.8848064541816711, "learning_rate": 1.6597947467401038e-05, "loss": 0.0918, "step": 21876 }, { "epoch": 0.4820660287450352, "grad_norm": 0.58690345287323, "learning_rate": 1.6596883019986885e-05, "loss": 0.0819, "step": 21877 }, { "epoch": 0.48208806403455134, "grad_norm": 0.7953940629959106, "learning_rate": 1.6595818564438945e-05, "loss": 0.1097, "step": 21878 }, { "epoch": 0.4821100993240675, "grad_norm": 1.031300663948059, "learning_rate": 1.659475410076265e-05, "loss": 0.0841, "step": 21879 }, { "epoch": 0.4821321346135837, "grad_norm": 0.621849775314331, "learning_rate": 1.6593689628963417e-05, "loss": 0.0798, "step": 21880 }, { "epoch": 0.48215416990309984, "grad_norm": 0.3805106580257416, "learning_rate": 1.6592625149046668e-05, "loss": 0.0806, "step": 21881 }, { "epoch": 0.48217620519261595, "grad_norm": 0.5514510869979858, "learning_rate": 1.6591560661017823e-05, "loss": 0.0639, "step": 21882 }, { "epoch": 0.4821982404821321, "grad_norm": 0.5288553237915039, "learning_rate": 1.6590496164882308e-05, "loss": 0.0836, "step": 21883 }, { "epoch": 0.4822202757716483, "grad_norm": 0.6247018575668335, "learning_rate": 1.6589431660645538e-05, "loss": 0.0781, "step": 21884 }, { "epoch": 0.48224231106116444, "grad_norm": 0.49593478441238403, "learning_rate": 1.6588367148312947e-05, "loss": 0.0974, "step": 21885 }, { "epoch": 0.4822643463506806, "grad_norm": 0.8704904913902283, "learning_rate": 1.6587302627889947e-05, "loss": 0.0821, "step": 21886 }, { "epoch": 0.48228638164019677, "grad_norm": 0.3883330821990967, "learning_rate": 1.6586238099381967e-05, "loss": 0.0596, "step": 21887 }, { "epoch": 0.48230841692971294, "grad_norm": 0.7907826900482178, "learning_rate": 1.658517356279442e-05, "loss": 0.0731, "step": 21888 }, { "epoch": 0.4823304522192291, "grad_norm": 0.7257102131843567, "learning_rate": 1.6584109018132736e-05, "loss": 0.1206, "step": 21889 }, { "epoch": 0.48235248750874526, "grad_norm": 0.6406998038291931, "learning_rate": 1.6583044465402338e-05, "loss": 0.0985, "step": 21890 }, { "epoch": 0.48237452279826143, "grad_norm": 0.4858406186103821, "learning_rate": 1.6581979904608644e-05, "loss": 0.0922, "step": 21891 }, { "epoch": 0.4823965580877776, "grad_norm": 0.5044015645980835, "learning_rate": 1.658091533575708e-05, "loss": 0.0757, "step": 21892 }, { "epoch": 0.48241859337729376, "grad_norm": 0.6039717793464661, "learning_rate": 1.6579850758853064e-05, "loss": 0.0817, "step": 21893 }, { "epoch": 0.4824406286668099, "grad_norm": 0.8074206709861755, "learning_rate": 1.6578786173902023e-05, "loss": 0.1036, "step": 21894 }, { "epoch": 0.48246266395632603, "grad_norm": 0.9972670674324036, "learning_rate": 1.6577721580909374e-05, "loss": 0.0539, "step": 21895 }, { "epoch": 0.4824846992458422, "grad_norm": 0.4001673460006714, "learning_rate": 1.6576656979880547e-05, "loss": 0.0941, "step": 21896 }, { "epoch": 0.48250673453535836, "grad_norm": 0.6745200157165527, "learning_rate": 1.6575592370820957e-05, "loss": 0.074, "step": 21897 }, { "epoch": 0.4825287698248745, "grad_norm": 0.6945816278457642, "learning_rate": 1.657452775373603e-05, "loss": 0.0777, "step": 21898 }, { "epoch": 0.4825508051143907, "grad_norm": 0.8443835377693176, "learning_rate": 1.657346312863119e-05, "loss": 0.0761, "step": 21899 }, { "epoch": 0.48257284040390686, "grad_norm": 0.8544051647186279, "learning_rate": 1.657239849551186e-05, "loss": 0.1021, "step": 21900 }, { "epoch": 0.482594875693423, "grad_norm": 0.6969783306121826, "learning_rate": 1.657133385438346e-05, "loss": 0.0902, "step": 21901 }, { "epoch": 0.4826169109829392, "grad_norm": 0.562284529209137, "learning_rate": 1.6570269205251416e-05, "loss": 0.0627, "step": 21902 }, { "epoch": 0.48263894627245535, "grad_norm": 1.2780996561050415, "learning_rate": 1.656920454812114e-05, "loss": 0.0667, "step": 21903 }, { "epoch": 0.4826609815619715, "grad_norm": 0.6125144958496094, "learning_rate": 1.6568139882998076e-05, "loss": 0.0593, "step": 21904 }, { "epoch": 0.4826830168514877, "grad_norm": 0.8016558289527893, "learning_rate": 1.656707520988763e-05, "loss": 0.0748, "step": 21905 }, { "epoch": 0.48270505214100384, "grad_norm": 0.8975573182106018, "learning_rate": 1.656601052879523e-05, "loss": 0.0886, "step": 21906 }, { "epoch": 0.48272708743051995, "grad_norm": 0.4862320125102997, "learning_rate": 1.6564945839726296e-05, "loss": 0.0903, "step": 21907 }, { "epoch": 0.4827491227200361, "grad_norm": 0.7853360176086426, "learning_rate": 1.6563881142686258e-05, "loss": 0.0718, "step": 21908 }, { "epoch": 0.4827711580095523, "grad_norm": 0.8667099475860596, "learning_rate": 1.656281643768053e-05, "loss": 0.0825, "step": 21909 }, { "epoch": 0.48279319329906845, "grad_norm": 0.593688428401947, "learning_rate": 1.6561751724714543e-05, "loss": 0.0698, "step": 21910 }, { "epoch": 0.4828152285885846, "grad_norm": 1.039484977722168, "learning_rate": 1.656068700379371e-05, "loss": 0.0731, "step": 21911 }, { "epoch": 0.4828372638781008, "grad_norm": 0.5273775458335876, "learning_rate": 1.6559622274923473e-05, "loss": 0.072, "step": 21912 }, { "epoch": 0.48285929916761694, "grad_norm": 0.6055644750595093, "learning_rate": 1.6558557538109237e-05, "loss": 0.0691, "step": 21913 }, { "epoch": 0.4828813344571331, "grad_norm": 0.8072613477706909, "learning_rate": 1.655749279335643e-05, "loss": 0.0765, "step": 21914 }, { "epoch": 0.48290336974664927, "grad_norm": 0.7267977595329285, "learning_rate": 1.655642804067048e-05, "loss": 0.1105, "step": 21915 }, { "epoch": 0.48292540503616543, "grad_norm": 0.47551748156547546, "learning_rate": 1.6555363280056807e-05, "loss": 0.0787, "step": 21916 }, { "epoch": 0.4829474403256816, "grad_norm": 0.9589248895645142, "learning_rate": 1.6554298511520835e-05, "loss": 0.086, "step": 21917 }, { "epoch": 0.48296947561519776, "grad_norm": 1.410102128982544, "learning_rate": 1.655323373506798e-05, "loss": 0.0582, "step": 21918 }, { "epoch": 0.48299151090471387, "grad_norm": 0.6482164859771729, "learning_rate": 1.6552168950703684e-05, "loss": 0.0784, "step": 21919 }, { "epoch": 0.48301354619423004, "grad_norm": 0.9795392751693726, "learning_rate": 1.6551104158433358e-05, "loss": 0.0857, "step": 21920 }, { "epoch": 0.4830355814837462, "grad_norm": 0.711173951625824, "learning_rate": 1.6550039358262423e-05, "loss": 0.064, "step": 21921 }, { "epoch": 0.48305761677326237, "grad_norm": 1.1362946033477783, "learning_rate": 1.6548974550196305e-05, "loss": 0.1253, "step": 21922 }, { "epoch": 0.48307965206277853, "grad_norm": 0.4398077428340912, "learning_rate": 1.654790973424043e-05, "loss": 0.0699, "step": 21923 }, { "epoch": 0.4831016873522947, "grad_norm": 0.7179225087165833, "learning_rate": 1.654684491040022e-05, "loss": 0.0916, "step": 21924 }, { "epoch": 0.48312372264181086, "grad_norm": 0.7362048625946045, "learning_rate": 1.65457800786811e-05, "loss": 0.0844, "step": 21925 }, { "epoch": 0.483145757931327, "grad_norm": 0.7593488097190857, "learning_rate": 1.6544715239088492e-05, "loss": 0.1153, "step": 21926 }, { "epoch": 0.4831677932208432, "grad_norm": 0.8930110335350037, "learning_rate": 1.6543650391627823e-05, "loss": 0.1041, "step": 21927 }, { "epoch": 0.48318982851035935, "grad_norm": 1.8582165241241455, "learning_rate": 1.654258553630452e-05, "loss": 0.0868, "step": 21928 }, { "epoch": 0.4832118637998755, "grad_norm": 0.4177072048187256, "learning_rate": 1.6541520673123997e-05, "loss": 0.0892, "step": 21929 }, { "epoch": 0.4832338990893917, "grad_norm": 0.6966196894645691, "learning_rate": 1.654045580209168e-05, "loss": 0.1053, "step": 21930 }, { "epoch": 0.48325593437890785, "grad_norm": 0.6574182510375977, "learning_rate": 1.6539390923213e-05, "loss": 0.0702, "step": 21931 }, { "epoch": 0.48327796966842396, "grad_norm": 0.5730612277984619, "learning_rate": 1.6538326036493366e-05, "loss": 0.0882, "step": 21932 }, { "epoch": 0.4833000049579401, "grad_norm": 0.43523964285850525, "learning_rate": 1.653726114193822e-05, "loss": 0.0613, "step": 21933 }, { "epoch": 0.4833220402474563, "grad_norm": 0.8743044137954712, "learning_rate": 1.653619623955298e-05, "loss": 0.0941, "step": 21934 }, { "epoch": 0.48334407553697245, "grad_norm": 0.39145976305007935, "learning_rate": 1.653513132934307e-05, "loss": 0.049, "step": 21935 }, { "epoch": 0.4833661108264886, "grad_norm": 0.5276086330413818, "learning_rate": 1.6534066411313912e-05, "loss": 0.0407, "step": 21936 }, { "epoch": 0.4833881461160048, "grad_norm": 0.9289599657058716, "learning_rate": 1.653300148547093e-05, "loss": 0.0882, "step": 21937 }, { "epoch": 0.48341018140552094, "grad_norm": 0.6525672078132629, "learning_rate": 1.6531936551819548e-05, "loss": 0.059, "step": 21938 }, { "epoch": 0.4834322166950371, "grad_norm": 0.5992238521575928, "learning_rate": 1.653087161036519e-05, "loss": 0.0426, "step": 21939 }, { "epoch": 0.4834542519845533, "grad_norm": 0.8522448539733887, "learning_rate": 1.6529806661113286e-05, "loss": 0.0794, "step": 21940 }, { "epoch": 0.48347628727406944, "grad_norm": 0.6473336815834045, "learning_rate": 1.6528741704069255e-05, "loss": 0.0837, "step": 21941 }, { "epoch": 0.4834983225635856, "grad_norm": 0.8391017913818359, "learning_rate": 1.6527676739238527e-05, "loss": 0.0769, "step": 21942 }, { "epoch": 0.48352035785310177, "grad_norm": 0.4063705801963806, "learning_rate": 1.6526611766626517e-05, "loss": 0.0546, "step": 21943 }, { "epoch": 0.4835423931426179, "grad_norm": 0.6408102512359619, "learning_rate": 1.652554678623866e-05, "loss": 0.0684, "step": 21944 }, { "epoch": 0.48356442843213404, "grad_norm": 0.7023476362228394, "learning_rate": 1.6524481798080368e-05, "loss": 0.086, "step": 21945 }, { "epoch": 0.4835864637216502, "grad_norm": 0.34179985523223877, "learning_rate": 1.6523416802157073e-05, "loss": 0.0753, "step": 21946 }, { "epoch": 0.48360849901116637, "grad_norm": 0.4554068446159363, "learning_rate": 1.65223517984742e-05, "loss": 0.0826, "step": 21947 }, { "epoch": 0.48363053430068254, "grad_norm": 0.7524392008781433, "learning_rate": 1.652128678703718e-05, "loss": 0.0961, "step": 21948 }, { "epoch": 0.4836525695901987, "grad_norm": 0.5665683746337891, "learning_rate": 1.6520221767851422e-05, "loss": 0.0636, "step": 21949 }, { "epoch": 0.48367460487971486, "grad_norm": 0.4427792727947235, "learning_rate": 1.6519156740922364e-05, "loss": 0.0873, "step": 21950 }, { "epoch": 0.48369664016923103, "grad_norm": 0.6328047513961792, "learning_rate": 1.6518091706255427e-05, "loss": 0.073, "step": 21951 }, { "epoch": 0.4837186754587472, "grad_norm": 0.7967235445976257, "learning_rate": 1.6517026663856034e-05, "loss": 0.0873, "step": 21952 }, { "epoch": 0.48374071074826336, "grad_norm": 0.46852949261665344, "learning_rate": 1.6515961613729607e-05, "loss": 0.0897, "step": 21953 }, { "epoch": 0.4837627460377795, "grad_norm": 0.7524840235710144, "learning_rate": 1.651489655588158e-05, "loss": 0.0928, "step": 21954 }, { "epoch": 0.4837847813272957, "grad_norm": 0.4570351839065552, "learning_rate": 1.6513831490317367e-05, "loss": 0.0623, "step": 21955 }, { "epoch": 0.4838068166168118, "grad_norm": 0.6225950717926025, "learning_rate": 1.6512766417042397e-05, "loss": 0.0633, "step": 21956 }, { "epoch": 0.48382885190632796, "grad_norm": 0.5417986512184143, "learning_rate": 1.6511701336062103e-05, "loss": 0.0809, "step": 21957 }, { "epoch": 0.4838508871958441, "grad_norm": 0.5701280832290649, "learning_rate": 1.6510636247381897e-05, "loss": 0.0666, "step": 21958 }, { "epoch": 0.4838729224853603, "grad_norm": 0.6368579268455505, "learning_rate": 1.6509571151007217e-05, "loss": 0.0624, "step": 21959 }, { "epoch": 0.48389495777487646, "grad_norm": 0.9723055362701416, "learning_rate": 1.650850604694348e-05, "loss": 0.1223, "step": 21960 }, { "epoch": 0.4839169930643926, "grad_norm": 0.7111265063285828, "learning_rate": 1.6507440935196114e-05, "loss": 0.0862, "step": 21961 }, { "epoch": 0.4839390283539088, "grad_norm": 0.7261636257171631, "learning_rate": 1.6506375815770537e-05, "loss": 0.0626, "step": 21962 }, { "epoch": 0.48396106364342495, "grad_norm": 0.8809906840324402, "learning_rate": 1.6505310688672182e-05, "loss": 0.0674, "step": 21963 }, { "epoch": 0.4839830989329411, "grad_norm": 0.362069308757782, "learning_rate": 1.6504245553906476e-05, "loss": 0.0619, "step": 21964 }, { "epoch": 0.4840051342224573, "grad_norm": 0.6562592387199402, "learning_rate": 1.650318041147884e-05, "loss": 0.0775, "step": 21965 }, { "epoch": 0.48402716951197344, "grad_norm": 0.6444836854934692, "learning_rate": 1.6502115261394696e-05, "loss": 0.0757, "step": 21966 }, { "epoch": 0.4840492048014896, "grad_norm": 0.6894195079803467, "learning_rate": 1.650105010365948e-05, "loss": 0.0945, "step": 21967 }, { "epoch": 0.48407124009100577, "grad_norm": 0.4712142050266266, "learning_rate": 1.6499984938278607e-05, "loss": 0.0664, "step": 21968 }, { "epoch": 0.4840932753805219, "grad_norm": 0.9828749299049377, "learning_rate": 1.6498919765257507e-05, "loss": 0.0725, "step": 21969 }, { "epoch": 0.48411531067003805, "grad_norm": 0.4550868272781372, "learning_rate": 1.6497854584601606e-05, "loss": 0.056, "step": 21970 }, { "epoch": 0.4841373459595542, "grad_norm": 0.8475471138954163, "learning_rate": 1.6496789396316326e-05, "loss": 0.0791, "step": 21971 }, { "epoch": 0.4841593812490704, "grad_norm": 0.794724702835083, "learning_rate": 1.6495724200407095e-05, "loss": 0.0815, "step": 21972 }, { "epoch": 0.48418141653858654, "grad_norm": 0.48643288016319275, "learning_rate": 1.6494658996879342e-05, "loss": 0.0559, "step": 21973 }, { "epoch": 0.4842034518281027, "grad_norm": 0.5411329865455627, "learning_rate": 1.6493593785738484e-05, "loss": 0.1024, "step": 21974 }, { "epoch": 0.48422548711761887, "grad_norm": 0.34912875294685364, "learning_rate": 1.6492528566989955e-05, "loss": 0.0759, "step": 21975 }, { "epoch": 0.48424752240713503, "grad_norm": 0.9156675338745117, "learning_rate": 1.6491463340639184e-05, "loss": 0.0615, "step": 21976 }, { "epoch": 0.4842695576966512, "grad_norm": 0.8187935948371887, "learning_rate": 1.649039810669158e-05, "loss": 0.1004, "step": 21977 }, { "epoch": 0.48429159298616736, "grad_norm": 0.44472619891166687, "learning_rate": 1.6489332865152584e-05, "loss": 0.093, "step": 21978 }, { "epoch": 0.4843136282756835, "grad_norm": 0.5779526829719543, "learning_rate": 1.6488267616027614e-05, "loss": 0.0688, "step": 21979 }, { "epoch": 0.4843356635651997, "grad_norm": 0.7082768082618713, "learning_rate": 1.6487202359322102e-05, "loss": 0.0947, "step": 21980 }, { "epoch": 0.4843576988547158, "grad_norm": 0.4584970772266388, "learning_rate": 1.648613709504147e-05, "loss": 0.0582, "step": 21981 }, { "epoch": 0.48437973414423197, "grad_norm": 0.8791859149932861, "learning_rate": 1.6485071823191145e-05, "loss": 0.1427, "step": 21982 }, { "epoch": 0.48440176943374813, "grad_norm": 0.5663718581199646, "learning_rate": 1.6484006543776554e-05, "loss": 0.0627, "step": 21983 }, { "epoch": 0.4844238047232643, "grad_norm": 0.7802765965461731, "learning_rate": 1.648294125680312e-05, "loss": 0.0907, "step": 21984 }, { "epoch": 0.48444584001278046, "grad_norm": 0.37815383076667786, "learning_rate": 1.6481875962276273e-05, "loss": 0.138, "step": 21985 }, { "epoch": 0.4844678753022966, "grad_norm": 0.5789385437965393, "learning_rate": 1.6480810660201434e-05, "loss": 0.0791, "step": 21986 }, { "epoch": 0.4844899105918128, "grad_norm": 0.46917763352394104, "learning_rate": 1.6479745350584034e-05, "loss": 0.0896, "step": 21987 }, { "epoch": 0.48451194588132895, "grad_norm": 0.6406266093254089, "learning_rate": 1.6478680033429498e-05, "loss": 0.061, "step": 21988 }, { "epoch": 0.4845339811708451, "grad_norm": 0.552419126033783, "learning_rate": 1.647761470874325e-05, "loss": 0.0734, "step": 21989 }, { "epoch": 0.4845560164603613, "grad_norm": 0.803436815738678, "learning_rate": 1.6476549376530717e-05, "loss": 0.0761, "step": 21990 }, { "epoch": 0.48457805174987745, "grad_norm": 0.3986543118953705, "learning_rate": 1.6475484036797332e-05, "loss": 0.0704, "step": 21991 }, { "epoch": 0.4846000870393936, "grad_norm": 0.9213026762008667, "learning_rate": 1.647441868954851e-05, "loss": 0.0863, "step": 21992 }, { "epoch": 0.4846221223289097, "grad_norm": 0.8279538154602051, "learning_rate": 1.6473353334789686e-05, "loss": 0.0715, "step": 21993 }, { "epoch": 0.4846441576184259, "grad_norm": 0.5770851969718933, "learning_rate": 1.6472287972526282e-05, "loss": 0.0724, "step": 21994 }, { "epoch": 0.48466619290794205, "grad_norm": 0.8596298098564148, "learning_rate": 1.6471222602763725e-05, "loss": 0.1019, "step": 21995 }, { "epoch": 0.4846882281974582, "grad_norm": 0.6223050951957703, "learning_rate": 1.6470157225507444e-05, "loss": 0.1058, "step": 21996 }, { "epoch": 0.4847102634869744, "grad_norm": 0.9186893105506897, "learning_rate": 1.646909184076286e-05, "loss": 0.1037, "step": 21997 }, { "epoch": 0.48473229877649054, "grad_norm": 0.6223904490470886, "learning_rate": 1.646802644853541e-05, "loss": 0.1163, "step": 21998 }, { "epoch": 0.4847543340660067, "grad_norm": 0.5287423133850098, "learning_rate": 1.646696104883051e-05, "loss": 0.057, "step": 21999 }, { "epoch": 0.4847763693555229, "grad_norm": 0.44554752111434937, "learning_rate": 1.646589564165359e-05, "loss": 0.0899, "step": 22000 }, { "epoch": 0.48479840464503904, "grad_norm": 0.5515428185462952, "learning_rate": 1.6464830227010082e-05, "loss": 0.0763, "step": 22001 }, { "epoch": 0.4848204399345552, "grad_norm": 0.522672176361084, "learning_rate": 1.6463764804905402e-05, "loss": 0.0657, "step": 22002 }, { "epoch": 0.48484247522407137, "grad_norm": 0.5589269995689392, "learning_rate": 1.6462699375344986e-05, "loss": 0.0903, "step": 22003 }, { "epoch": 0.48486451051358753, "grad_norm": 0.6828731298446655, "learning_rate": 1.6461633938334255e-05, "loss": 0.0652, "step": 22004 }, { "epoch": 0.4848865458031037, "grad_norm": 0.749243974685669, "learning_rate": 1.6460568493878645e-05, "loss": 0.0794, "step": 22005 }, { "epoch": 0.4849085810926198, "grad_norm": 0.42927315831184387, "learning_rate": 1.6459503041983573e-05, "loss": 0.0691, "step": 22006 }, { "epoch": 0.48493061638213597, "grad_norm": 0.7646604776382446, "learning_rate": 1.6458437582654472e-05, "loss": 0.0763, "step": 22007 }, { "epoch": 0.48495265167165214, "grad_norm": 0.6491378545761108, "learning_rate": 1.645737211589676e-05, "loss": 0.061, "step": 22008 }, { "epoch": 0.4849746869611683, "grad_norm": 0.5729854702949524, "learning_rate": 1.6456306641715874e-05, "loss": 0.1045, "step": 22009 }, { "epoch": 0.48499672225068446, "grad_norm": 0.44528135657310486, "learning_rate": 1.6455241160117237e-05, "loss": 0.084, "step": 22010 }, { "epoch": 0.48501875754020063, "grad_norm": 0.4850448668003082, "learning_rate": 1.645417567110628e-05, "loss": 0.0551, "step": 22011 }, { "epoch": 0.4850407928297168, "grad_norm": 0.6068087220191956, "learning_rate": 1.645311017468842e-05, "loss": 0.054, "step": 22012 }, { "epoch": 0.48506282811923296, "grad_norm": 0.4117682874202728, "learning_rate": 1.6452044670869093e-05, "loss": 0.0541, "step": 22013 }, { "epoch": 0.4850848634087491, "grad_norm": 0.6645609736442566, "learning_rate": 1.645097915965372e-05, "loss": 0.0776, "step": 22014 }, { "epoch": 0.4851068986982653, "grad_norm": 0.577573299407959, "learning_rate": 1.644991364104774e-05, "loss": 0.0444, "step": 22015 }, { "epoch": 0.48512893398778145, "grad_norm": 0.7018806338310242, "learning_rate": 1.644884811505657e-05, "loss": 0.0846, "step": 22016 }, { "epoch": 0.4851509692772976, "grad_norm": 0.5704760551452637, "learning_rate": 1.6447782581685642e-05, "loss": 0.092, "step": 22017 }, { "epoch": 0.4851730045668137, "grad_norm": 0.5416105389595032, "learning_rate": 1.6446717040940375e-05, "loss": 0.0916, "step": 22018 }, { "epoch": 0.4851950398563299, "grad_norm": 0.67235267162323, "learning_rate": 1.6445651492826205e-05, "loss": 0.0649, "step": 22019 }, { "epoch": 0.48521707514584606, "grad_norm": 0.8410853743553162, "learning_rate": 1.6444585937348554e-05, "loss": 0.0833, "step": 22020 }, { "epoch": 0.4852391104353622, "grad_norm": 0.671646237373352, "learning_rate": 1.6443520374512854e-05, "loss": 0.0948, "step": 22021 }, { "epoch": 0.4852611457248784, "grad_norm": 0.5869931578636169, "learning_rate": 1.644245480432453e-05, "loss": 0.0603, "step": 22022 }, { "epoch": 0.48528318101439455, "grad_norm": 1.4355511665344238, "learning_rate": 1.6441389226789014e-05, "loss": 0.0991, "step": 22023 }, { "epoch": 0.4853052163039107, "grad_norm": 0.5782626867294312, "learning_rate": 1.6440323641911727e-05, "loss": 0.0821, "step": 22024 }, { "epoch": 0.4853272515934269, "grad_norm": 0.4634783864021301, "learning_rate": 1.64392580496981e-05, "loss": 0.0419, "step": 22025 }, { "epoch": 0.48534928688294304, "grad_norm": 0.49904105067253113, "learning_rate": 1.6438192450153562e-05, "loss": 0.0721, "step": 22026 }, { "epoch": 0.4853713221724592, "grad_norm": 0.6115588545799255, "learning_rate": 1.6437126843283532e-05, "loss": 0.098, "step": 22027 }, { "epoch": 0.48539335746197537, "grad_norm": 0.6755245327949524, "learning_rate": 1.643606122909345e-05, "loss": 0.087, "step": 22028 }, { "epoch": 0.48541539275149154, "grad_norm": 0.7689165472984314, "learning_rate": 1.6434995607588735e-05, "loss": 0.0949, "step": 22029 }, { "epoch": 0.4854374280410077, "grad_norm": 0.8244896531105042, "learning_rate": 1.6433929978774822e-05, "loss": 0.1103, "step": 22030 }, { "epoch": 0.4854594633305238, "grad_norm": 1.3784738779067993, "learning_rate": 1.643286434265713e-05, "loss": 0.1226, "step": 22031 }, { "epoch": 0.48548149862004, "grad_norm": 0.5445176362991333, "learning_rate": 1.64317986992411e-05, "loss": 0.0666, "step": 22032 }, { "epoch": 0.48550353390955614, "grad_norm": 0.5166177153587341, "learning_rate": 1.6430733048532137e-05, "loss": 0.0792, "step": 22033 }, { "epoch": 0.4855255691990723, "grad_norm": 0.6149385571479797, "learning_rate": 1.6429667390535697e-05, "loss": 0.0857, "step": 22034 }, { "epoch": 0.48554760448858847, "grad_norm": 0.7063932418823242, "learning_rate": 1.6428601725257185e-05, "loss": 0.0708, "step": 22035 }, { "epoch": 0.48556963977810463, "grad_norm": 0.7603710889816284, "learning_rate": 1.6427536052702044e-05, "loss": 0.0963, "step": 22036 }, { "epoch": 0.4855916750676208, "grad_norm": 0.44967421889305115, "learning_rate": 1.642647037287569e-05, "loss": 0.0594, "step": 22037 }, { "epoch": 0.48561371035713696, "grad_norm": 1.0161409378051758, "learning_rate": 1.6425404685783564e-05, "loss": 0.1139, "step": 22038 }, { "epoch": 0.4856357456466531, "grad_norm": 0.5980334877967834, "learning_rate": 1.642433899143109e-05, "loss": 0.0677, "step": 22039 }, { "epoch": 0.4856577809361693, "grad_norm": 0.6131896376609802, "learning_rate": 1.642327328982369e-05, "loss": 0.0901, "step": 22040 }, { "epoch": 0.48567981622568546, "grad_norm": 0.8755414485931396, "learning_rate": 1.6422207580966792e-05, "loss": 0.1056, "step": 22041 }, { "epoch": 0.4857018515152016, "grad_norm": 0.7448568940162659, "learning_rate": 1.6421141864865833e-05, "loss": 0.066, "step": 22042 }, { "epoch": 0.48572388680471773, "grad_norm": 0.7443719506263733, "learning_rate": 1.6420076141526232e-05, "loss": 0.0837, "step": 22043 }, { "epoch": 0.4857459220942339, "grad_norm": 0.5685047507286072, "learning_rate": 1.6419010410953427e-05, "loss": 0.0602, "step": 22044 }, { "epoch": 0.48576795738375006, "grad_norm": 0.6463062763214111, "learning_rate": 1.6417944673152832e-05, "loss": 0.0926, "step": 22045 }, { "epoch": 0.4857899926732662, "grad_norm": 0.6948680281639099, "learning_rate": 1.6416878928129896e-05, "loss": 0.0808, "step": 22046 }, { "epoch": 0.4858120279627824, "grad_norm": 0.7151151299476624, "learning_rate": 1.6415813175890032e-05, "loss": 0.0684, "step": 22047 }, { "epoch": 0.48583406325229855, "grad_norm": 0.6834960579872131, "learning_rate": 1.641474741643867e-05, "loss": 0.0999, "step": 22048 }, { "epoch": 0.4858560985418147, "grad_norm": 0.7264369130134583, "learning_rate": 1.641368164978124e-05, "loss": 0.0801, "step": 22049 }, { "epoch": 0.4858781338313309, "grad_norm": 0.7975761890411377, "learning_rate": 1.641261587592317e-05, "loss": 0.0873, "step": 22050 }, { "epoch": 0.48590016912084705, "grad_norm": 0.8513365983963013, "learning_rate": 1.6411550094869896e-05, "loss": 0.0949, "step": 22051 }, { "epoch": 0.4859222044103632, "grad_norm": 0.6315194964408875, "learning_rate": 1.6410484306626833e-05, "loss": 0.1108, "step": 22052 }, { "epoch": 0.4859442396998794, "grad_norm": 0.8228599429130554, "learning_rate": 1.6409418511199425e-05, "loss": 0.1292, "step": 22053 }, { "epoch": 0.48596627498939554, "grad_norm": 0.5360751152038574, "learning_rate": 1.6408352708593092e-05, "loss": 0.0648, "step": 22054 }, { "epoch": 0.48598831027891165, "grad_norm": 0.6059288382530212, "learning_rate": 1.640728689881326e-05, "loss": 0.0512, "step": 22055 }, { "epoch": 0.4860103455684278, "grad_norm": 0.7541541457176208, "learning_rate": 1.640622108186536e-05, "loss": 0.0849, "step": 22056 }, { "epoch": 0.486032380857944, "grad_norm": 0.5478070378303528, "learning_rate": 1.6405155257754823e-05, "loss": 0.0766, "step": 22057 }, { "epoch": 0.48605441614746014, "grad_norm": 0.5870372653007507, "learning_rate": 1.6404089426487077e-05, "loss": 0.0594, "step": 22058 }, { "epoch": 0.4860764514369763, "grad_norm": 0.686117947101593, "learning_rate": 1.640302358806755e-05, "loss": 0.0537, "step": 22059 }, { "epoch": 0.4860984867264925, "grad_norm": 0.4453094005584717, "learning_rate": 1.640195774250167e-05, "loss": 0.1117, "step": 22060 }, { "epoch": 0.48612052201600864, "grad_norm": 0.5670282244682312, "learning_rate": 1.6400891889794873e-05, "loss": 0.0574, "step": 22061 }, { "epoch": 0.4861425573055248, "grad_norm": 0.7190932631492615, "learning_rate": 1.6399826029952582e-05, "loss": 0.0917, "step": 22062 }, { "epoch": 0.48616459259504097, "grad_norm": 0.6456161141395569, "learning_rate": 1.6398760162980225e-05, "loss": 0.0949, "step": 22063 }, { "epoch": 0.48618662788455713, "grad_norm": 0.7201600670814514, "learning_rate": 1.6397694288883227e-05, "loss": 0.0584, "step": 22064 }, { "epoch": 0.4862086631740733, "grad_norm": 0.9419541358947754, "learning_rate": 1.639662840766703e-05, "loss": 0.0939, "step": 22065 }, { "epoch": 0.48623069846358946, "grad_norm": 0.4875343143939972, "learning_rate": 1.639556251933705e-05, "loss": 0.0716, "step": 22066 }, { "epoch": 0.4862527337531056, "grad_norm": 0.7319496870040894, "learning_rate": 1.6394496623898722e-05, "loss": 0.0811, "step": 22067 }, { "epoch": 0.48627476904262174, "grad_norm": 0.6854441165924072, "learning_rate": 1.6393430721357476e-05, "loss": 0.0986, "step": 22068 }, { "epoch": 0.4862968043321379, "grad_norm": 0.4788471460342407, "learning_rate": 1.639236481171874e-05, "loss": 0.0722, "step": 22069 }, { "epoch": 0.48631883962165406, "grad_norm": 0.7591289281845093, "learning_rate": 1.6391298894987946e-05, "loss": 0.0929, "step": 22070 }, { "epoch": 0.48634087491117023, "grad_norm": 0.6613712310791016, "learning_rate": 1.6390232971170523e-05, "loss": 0.0474, "step": 22071 }, { "epoch": 0.4863629102006864, "grad_norm": 0.6652671694755554, "learning_rate": 1.6389167040271895e-05, "loss": 0.0753, "step": 22072 }, { "epoch": 0.48638494549020256, "grad_norm": 1.2156603336334229, "learning_rate": 1.6388101102297493e-05, "loss": 0.0741, "step": 22073 }, { "epoch": 0.4864069807797187, "grad_norm": 0.4567849338054657, "learning_rate": 1.6387035157252747e-05, "loss": 0.1032, "step": 22074 }, { "epoch": 0.4864290160692349, "grad_norm": 0.5643087029457092, "learning_rate": 1.6385969205143086e-05, "loss": 0.0951, "step": 22075 }, { "epoch": 0.48645105135875105, "grad_norm": 0.30433574318885803, "learning_rate": 1.6384903245973944e-05, "loss": 0.0781, "step": 22076 }, { "epoch": 0.4864730866482672, "grad_norm": 1.1133081912994385, "learning_rate": 1.6383837279750742e-05, "loss": 0.0747, "step": 22077 }, { "epoch": 0.4864951219377834, "grad_norm": 0.4460338056087494, "learning_rate": 1.6382771306478922e-05, "loss": 0.0507, "step": 22078 }, { "epoch": 0.48651715722729955, "grad_norm": 0.9577451944351196, "learning_rate": 1.6381705326163904e-05, "loss": 0.0723, "step": 22079 }, { "epoch": 0.48653919251681566, "grad_norm": 0.5277019739151001, "learning_rate": 1.638063933881112e-05, "loss": 0.0789, "step": 22080 }, { "epoch": 0.4865612278063318, "grad_norm": 0.5778076648712158, "learning_rate": 1.6379573344425996e-05, "loss": 0.0821, "step": 22081 }, { "epoch": 0.486583263095848, "grad_norm": 0.42639365792274475, "learning_rate": 1.637850734301397e-05, "loss": 0.0943, "step": 22082 }, { "epoch": 0.48660529838536415, "grad_norm": 0.5295961499214172, "learning_rate": 1.637744133458046e-05, "loss": 0.0474, "step": 22083 }, { "epoch": 0.4866273336748803, "grad_norm": 0.7612343430519104, "learning_rate": 1.637637531913091e-05, "loss": 0.0991, "step": 22084 }, { "epoch": 0.4866493689643965, "grad_norm": 0.4534939229488373, "learning_rate": 1.637530929667074e-05, "loss": 0.0879, "step": 22085 }, { "epoch": 0.48667140425391264, "grad_norm": 0.5345935225486755, "learning_rate": 1.637424326720538e-05, "loss": 0.0937, "step": 22086 }, { "epoch": 0.4866934395434288, "grad_norm": 0.8976588845252991, "learning_rate": 1.6373177230740264e-05, "loss": 0.0837, "step": 22087 }, { "epoch": 0.48671547483294497, "grad_norm": 0.8335650563240051, "learning_rate": 1.6372111187280823e-05, "loss": 0.0873, "step": 22088 }, { "epoch": 0.48673751012246114, "grad_norm": 0.624176025390625, "learning_rate": 1.6371045136832477e-05, "loss": 0.0707, "step": 22089 }, { "epoch": 0.4867595454119773, "grad_norm": 0.5999521017074585, "learning_rate": 1.6369979079400662e-05, "loss": 0.0676, "step": 22090 }, { "epoch": 0.48678158070149347, "grad_norm": 0.4197053611278534, "learning_rate": 1.6368913014990817e-05, "loss": 0.0947, "step": 22091 }, { "epoch": 0.4868036159910096, "grad_norm": 0.650895893573761, "learning_rate": 1.636784694360836e-05, "loss": 0.0567, "step": 22092 }, { "epoch": 0.48682565128052574, "grad_norm": 0.6855337619781494, "learning_rate": 1.6366780865258724e-05, "loss": 0.1044, "step": 22093 }, { "epoch": 0.4868476865700419, "grad_norm": 0.32962027192115784, "learning_rate": 1.6365714779947345e-05, "loss": 0.0637, "step": 22094 }, { "epoch": 0.48686972185955807, "grad_norm": 0.8513270616531372, "learning_rate": 1.6364648687679647e-05, "loss": 0.0904, "step": 22095 }, { "epoch": 0.48689175714907423, "grad_norm": 0.8840638995170593, "learning_rate": 1.6363582588461057e-05, "loss": 0.0617, "step": 22096 }, { "epoch": 0.4869137924385904, "grad_norm": 0.9828047752380371, "learning_rate": 1.636251648229701e-05, "loss": 0.0722, "step": 22097 }, { "epoch": 0.48693582772810656, "grad_norm": 0.5066869854927063, "learning_rate": 1.6361450369192936e-05, "loss": 0.0511, "step": 22098 }, { "epoch": 0.4869578630176227, "grad_norm": 0.7055156230926514, "learning_rate": 1.636038424915427e-05, "loss": 0.0798, "step": 22099 }, { "epoch": 0.4869798983071389, "grad_norm": 0.4633503258228302, "learning_rate": 1.635931812218643e-05, "loss": 0.0519, "step": 22100 }, { "epoch": 0.48700193359665506, "grad_norm": 0.8150862455368042, "learning_rate": 1.6358251988294863e-05, "loss": 0.0937, "step": 22101 }, { "epoch": 0.4870239688861712, "grad_norm": 0.8363857269287109, "learning_rate": 1.6357185847484987e-05, "loss": 0.0951, "step": 22102 }, { "epoch": 0.4870460041756874, "grad_norm": 0.4453776180744171, "learning_rate": 1.6356119699762232e-05, "loss": 0.0662, "step": 22103 }, { "epoch": 0.48706803946520355, "grad_norm": 0.5777360796928406, "learning_rate": 1.6355053545132034e-05, "loss": 0.069, "step": 22104 }, { "epoch": 0.48709007475471966, "grad_norm": 0.6351580023765564, "learning_rate": 1.6353987383599823e-05, "loss": 0.0625, "step": 22105 }, { "epoch": 0.4871121100442358, "grad_norm": 0.8059077858924866, "learning_rate": 1.635292121517103e-05, "loss": 0.0787, "step": 22106 }, { "epoch": 0.487134145333752, "grad_norm": 0.7467074990272522, "learning_rate": 1.635185503985108e-05, "loss": 0.0613, "step": 22107 }, { "epoch": 0.48715618062326815, "grad_norm": 0.6337677836418152, "learning_rate": 1.6350788857645406e-05, "loss": 0.0887, "step": 22108 }, { "epoch": 0.4871782159127843, "grad_norm": 0.4011143147945404, "learning_rate": 1.6349722668559443e-05, "loss": 0.0631, "step": 22109 }, { "epoch": 0.4872002512023005, "grad_norm": 1.1114552021026611, "learning_rate": 1.634865647259862e-05, "loss": 0.0763, "step": 22110 }, { "epoch": 0.48722228649181665, "grad_norm": 1.0029562711715698, "learning_rate": 1.634759026976836e-05, "loss": 0.114, "step": 22111 }, { "epoch": 0.4872443217813328, "grad_norm": 1.4903868436813354, "learning_rate": 1.634652406007411e-05, "loss": 0.0908, "step": 22112 }, { "epoch": 0.487266357070849, "grad_norm": 1.1190240383148193, "learning_rate": 1.634545784352128e-05, "loss": 0.1007, "step": 22113 }, { "epoch": 0.48728839236036514, "grad_norm": 0.8162298798561096, "learning_rate": 1.6344391620115322e-05, "loss": 0.0833, "step": 22114 }, { "epoch": 0.4873104276498813, "grad_norm": 0.7076719403266907, "learning_rate": 1.6343325389861647e-05, "loss": 0.0787, "step": 22115 }, { "epoch": 0.48733246293939747, "grad_norm": 0.6394164562225342, "learning_rate": 1.6342259152765705e-05, "loss": 0.0575, "step": 22116 }, { "epoch": 0.4873544982289136, "grad_norm": 0.6337517499923706, "learning_rate": 1.634119290883291e-05, "loss": 0.105, "step": 22117 }, { "epoch": 0.48737653351842974, "grad_norm": 0.6301293969154358, "learning_rate": 1.6340126658068707e-05, "loss": 0.0708, "step": 22118 }, { "epoch": 0.4873985688079459, "grad_norm": 0.75705885887146, "learning_rate": 1.6339060400478514e-05, "loss": 0.0695, "step": 22119 }, { "epoch": 0.4874206040974621, "grad_norm": 0.6700021028518677, "learning_rate": 1.6337994136067775e-05, "loss": 0.0712, "step": 22120 }, { "epoch": 0.48744263938697824, "grad_norm": 0.5965422987937927, "learning_rate": 1.6336927864841906e-05, "loss": 0.0768, "step": 22121 }, { "epoch": 0.4874646746764944, "grad_norm": 0.8449264764785767, "learning_rate": 1.633586158680635e-05, "loss": 0.0715, "step": 22122 }, { "epoch": 0.48748670996601057, "grad_norm": 0.5671343207359314, "learning_rate": 1.6334795301966532e-05, "loss": 0.089, "step": 22123 }, { "epoch": 0.48750874525552673, "grad_norm": 0.44693392515182495, "learning_rate": 1.633372901032789e-05, "loss": 0.0625, "step": 22124 }, { "epoch": 0.4875307805450429, "grad_norm": 0.6654612421989441, "learning_rate": 1.633266271189585e-05, "loss": 0.0821, "step": 22125 }, { "epoch": 0.48755281583455906, "grad_norm": 0.8212881684303284, "learning_rate": 1.6331596406675843e-05, "loss": 0.0954, "step": 22126 }, { "epoch": 0.4875748511240752, "grad_norm": 0.8488034009933472, "learning_rate": 1.6330530094673304e-05, "loss": 0.0657, "step": 22127 }, { "epoch": 0.4875968864135914, "grad_norm": 0.6259601712226868, "learning_rate": 1.6329463775893664e-05, "loss": 0.0786, "step": 22128 }, { "epoch": 0.4876189217031075, "grad_norm": 0.6061455011367798, "learning_rate": 1.6328397450342345e-05, "loss": 0.0802, "step": 22129 }, { "epoch": 0.48764095699262366, "grad_norm": 0.5640973448753357, "learning_rate": 1.632733111802479e-05, "loss": 0.0679, "step": 22130 }, { "epoch": 0.48766299228213983, "grad_norm": 0.45984697341918945, "learning_rate": 1.632626477894642e-05, "loss": 0.0818, "step": 22131 }, { "epoch": 0.487685027571656, "grad_norm": 0.7462860345840454, "learning_rate": 1.6325198433112678e-05, "loss": 0.0763, "step": 22132 }, { "epoch": 0.48770706286117216, "grad_norm": 0.8800005316734314, "learning_rate": 1.6324132080528987e-05, "loss": 0.0724, "step": 22133 }, { "epoch": 0.4877290981506883, "grad_norm": 0.5277256369590759, "learning_rate": 1.6323065721200786e-05, "loss": 0.0741, "step": 22134 }, { "epoch": 0.4877511334402045, "grad_norm": 0.8035373687744141, "learning_rate": 1.63219993551335e-05, "loss": 0.076, "step": 22135 }, { "epoch": 0.48777316872972065, "grad_norm": 0.5817811489105225, "learning_rate": 1.6320932982332558e-05, "loss": 0.0531, "step": 22136 }, { "epoch": 0.4877952040192368, "grad_norm": 0.5635964870452881, "learning_rate": 1.63198666028034e-05, "loss": 0.0489, "step": 22137 }, { "epoch": 0.487817239308753, "grad_norm": 0.8570829629898071, "learning_rate": 1.631880021655145e-05, "loss": 0.0874, "step": 22138 }, { "epoch": 0.48783927459826915, "grad_norm": 0.40567830204963684, "learning_rate": 1.6317733823582147e-05, "loss": 0.0666, "step": 22139 }, { "epoch": 0.4878613098877853, "grad_norm": 0.7078132033348083, "learning_rate": 1.6316667423900914e-05, "loss": 0.0499, "step": 22140 }, { "epoch": 0.4878833451773015, "grad_norm": 0.5679623484611511, "learning_rate": 1.6315601017513195e-05, "loss": 0.0766, "step": 22141 }, { "epoch": 0.4879053804668176, "grad_norm": 0.6038002371788025, "learning_rate": 1.6314534604424413e-05, "loss": 0.0832, "step": 22142 }, { "epoch": 0.48792741575633375, "grad_norm": 0.729140043258667, "learning_rate": 1.631346818464e-05, "loss": 0.0743, "step": 22143 }, { "epoch": 0.4879494510458499, "grad_norm": 0.6704882979393005, "learning_rate": 1.6312401758165387e-05, "loss": 0.089, "step": 22144 }, { "epoch": 0.4879714863353661, "grad_norm": 0.6212383508682251, "learning_rate": 1.6311335325006013e-05, "loss": 0.0718, "step": 22145 }, { "epoch": 0.48799352162488224, "grad_norm": 0.7726719975471497, "learning_rate": 1.6310268885167297e-05, "loss": 0.0979, "step": 22146 }, { "epoch": 0.4880155569143984, "grad_norm": 0.7038088440895081, "learning_rate": 1.6309202438654686e-05, "loss": 0.0986, "step": 22147 }, { "epoch": 0.48803759220391457, "grad_norm": 1.3809717893600464, "learning_rate": 1.63081359854736e-05, "loss": 0.0688, "step": 22148 }, { "epoch": 0.48805962749343074, "grad_norm": 0.6283295154571533, "learning_rate": 1.630706952562948e-05, "loss": 0.0825, "step": 22149 }, { "epoch": 0.4880816627829469, "grad_norm": 0.9769291877746582, "learning_rate": 1.6306003059127756e-05, "loss": 0.0861, "step": 22150 }, { "epoch": 0.48810369807246307, "grad_norm": 0.5909267663955688, "learning_rate": 1.6304936585973855e-05, "loss": 0.1049, "step": 22151 }, { "epoch": 0.48812573336197923, "grad_norm": 0.5807623863220215, "learning_rate": 1.6303870106173208e-05, "loss": 0.0729, "step": 22152 }, { "epoch": 0.4881477686514954, "grad_norm": 0.47875258326530457, "learning_rate": 1.6302803619731257e-05, "loss": 0.0865, "step": 22153 }, { "epoch": 0.4881698039410115, "grad_norm": 0.9403987526893616, "learning_rate": 1.6301737126653426e-05, "loss": 0.1107, "step": 22154 }, { "epoch": 0.48819183923052767, "grad_norm": 0.7771024107933044, "learning_rate": 1.630067062694515e-05, "loss": 0.1132, "step": 22155 }, { "epoch": 0.48821387452004383, "grad_norm": 0.8703327178955078, "learning_rate": 1.629960412061186e-05, "loss": 0.1037, "step": 22156 }, { "epoch": 0.48823590980956, "grad_norm": 0.6349040269851685, "learning_rate": 1.6298537607658995e-05, "loss": 0.0631, "step": 22157 }, { "epoch": 0.48825794509907616, "grad_norm": 0.6097288131713867, "learning_rate": 1.6297471088091977e-05, "loss": 0.0721, "step": 22158 }, { "epoch": 0.4882799803885923, "grad_norm": 0.4997059404850006, "learning_rate": 1.6296404561916243e-05, "loss": 0.0616, "step": 22159 }, { "epoch": 0.4883020156781085, "grad_norm": 0.6749483942985535, "learning_rate": 1.6295338029137226e-05, "loss": 0.1074, "step": 22160 }, { "epoch": 0.48832405096762466, "grad_norm": 0.4297988712787628, "learning_rate": 1.6294271489760354e-05, "loss": 0.0901, "step": 22161 }, { "epoch": 0.4883460862571408, "grad_norm": 0.5958896279335022, "learning_rate": 1.629320494379107e-05, "loss": 0.0721, "step": 22162 }, { "epoch": 0.488368121546657, "grad_norm": 0.5138994455337524, "learning_rate": 1.6292138391234794e-05, "loss": 0.0476, "step": 22163 }, { "epoch": 0.48839015683617315, "grad_norm": 0.7942516207695007, "learning_rate": 1.629107183209697e-05, "loss": 0.0951, "step": 22164 }, { "epoch": 0.4884121921256893, "grad_norm": 0.8165016174316406, "learning_rate": 1.629000526638302e-05, "loss": 0.0664, "step": 22165 }, { "epoch": 0.4884342274152054, "grad_norm": 0.9338268041610718, "learning_rate": 1.6288938694098387e-05, "loss": 0.1435, "step": 22166 }, { "epoch": 0.4884562627047216, "grad_norm": 0.805551290512085, "learning_rate": 1.628787211524849e-05, "loss": 0.1148, "step": 22167 }, { "epoch": 0.48847829799423775, "grad_norm": 1.0146433115005493, "learning_rate": 1.6286805529838778e-05, "loss": 0.0808, "step": 22168 }, { "epoch": 0.4885003332837539, "grad_norm": 0.42171093821525574, "learning_rate": 1.628573893787467e-05, "loss": 0.0391, "step": 22169 }, { "epoch": 0.4885223685732701, "grad_norm": 0.622809112071991, "learning_rate": 1.6284672339361602e-05, "loss": 0.0854, "step": 22170 }, { "epoch": 0.48854440386278625, "grad_norm": 0.3990379273891449, "learning_rate": 1.6283605734305013e-05, "loss": 0.1005, "step": 22171 }, { "epoch": 0.4885664391523024, "grad_norm": 0.6002675890922546, "learning_rate": 1.628253912271033e-05, "loss": 0.0975, "step": 22172 }, { "epoch": 0.4885884744418186, "grad_norm": 0.8052108883857727, "learning_rate": 1.628147250458299e-05, "loss": 0.0921, "step": 22173 }, { "epoch": 0.48861050973133474, "grad_norm": 0.5539090037345886, "learning_rate": 1.6280405879928425e-05, "loss": 0.0619, "step": 22174 }, { "epoch": 0.4886325450208509, "grad_norm": 0.5045452117919922, "learning_rate": 1.627933924875206e-05, "loss": 0.0868, "step": 22175 }, { "epoch": 0.48865458031036707, "grad_norm": 0.6064711213111877, "learning_rate": 1.627827261105934e-05, "loss": 0.087, "step": 22176 }, { "epoch": 0.48867661559988324, "grad_norm": 0.6574897170066833, "learning_rate": 1.6277205966855686e-05, "loss": 0.0746, "step": 22177 }, { "epoch": 0.4886986508893994, "grad_norm": 0.6822249889373779, "learning_rate": 1.6276139316146538e-05, "loss": 0.0628, "step": 22178 }, { "epoch": 0.4887206861789155, "grad_norm": 0.4006441533565521, "learning_rate": 1.6275072658937334e-05, "loss": 0.0584, "step": 22179 }, { "epoch": 0.4887427214684317, "grad_norm": 0.578502893447876, "learning_rate": 1.6274005995233495e-05, "loss": 0.0912, "step": 22180 }, { "epoch": 0.48876475675794784, "grad_norm": 0.7524775862693787, "learning_rate": 1.6272939325040462e-05, "loss": 0.0802, "step": 22181 }, { "epoch": 0.488786792047464, "grad_norm": 1.2329427003860474, "learning_rate": 1.627187264836367e-05, "loss": 0.1168, "step": 22182 }, { "epoch": 0.48880882733698017, "grad_norm": 0.726020097732544, "learning_rate": 1.627080596520855e-05, "loss": 0.0805, "step": 22183 }, { "epoch": 0.48883086262649633, "grad_norm": 0.6325669884681702, "learning_rate": 1.6269739275580527e-05, "loss": 0.0735, "step": 22184 }, { "epoch": 0.4888528979160125, "grad_norm": 0.258171409368515, "learning_rate": 1.6268672579485047e-05, "loss": 0.0505, "step": 22185 }, { "epoch": 0.48887493320552866, "grad_norm": 0.6055275201797485, "learning_rate": 1.626760587692753e-05, "loss": 0.0721, "step": 22186 }, { "epoch": 0.4888969684950448, "grad_norm": 0.4941389262676239, "learning_rate": 1.6266539167913424e-05, "loss": 0.0738, "step": 22187 }, { "epoch": 0.488919003784561, "grad_norm": 0.5780504941940308, "learning_rate": 1.6265472452448146e-05, "loss": 0.0922, "step": 22188 }, { "epoch": 0.48894103907407716, "grad_norm": 0.40592581033706665, "learning_rate": 1.6264405730537145e-05, "loss": 0.0539, "step": 22189 }, { "epoch": 0.4889630743635933, "grad_norm": 0.5457111597061157, "learning_rate": 1.626333900218585e-05, "loss": 0.0996, "step": 22190 }, { "epoch": 0.48898510965310943, "grad_norm": 0.7631713151931763, "learning_rate": 1.6262272267399687e-05, "loss": 0.0692, "step": 22191 }, { "epoch": 0.4890071449426256, "grad_norm": 0.8330932259559631, "learning_rate": 1.6261205526184097e-05, "loss": 0.0833, "step": 22192 }, { "epoch": 0.48902918023214176, "grad_norm": 0.7304320931434631, "learning_rate": 1.626013877854451e-05, "loss": 0.0599, "step": 22193 }, { "epoch": 0.4890512155216579, "grad_norm": 0.5562396049499512, "learning_rate": 1.6259072024486355e-05, "loss": 0.0833, "step": 22194 }, { "epoch": 0.4890732508111741, "grad_norm": 0.8428807258605957, "learning_rate": 1.625800526401508e-05, "loss": 0.0688, "step": 22195 }, { "epoch": 0.48909528610069025, "grad_norm": 0.8210310339927673, "learning_rate": 1.6256938497136102e-05, "loss": 0.0744, "step": 22196 }, { "epoch": 0.4891173213902064, "grad_norm": 0.7191945910453796, "learning_rate": 1.625587172385487e-05, "loss": 0.0704, "step": 22197 }, { "epoch": 0.4891393566797226, "grad_norm": 0.6680472493171692, "learning_rate": 1.6254804944176805e-05, "loss": 0.0785, "step": 22198 }, { "epoch": 0.48916139196923875, "grad_norm": 0.8119347095489502, "learning_rate": 1.6253738158107347e-05, "loss": 0.0686, "step": 22199 }, { "epoch": 0.4891834272587549, "grad_norm": 0.7166030406951904, "learning_rate": 1.6252671365651928e-05, "loss": 0.0751, "step": 22200 }, { "epoch": 0.4892054625482711, "grad_norm": 0.5217649340629578, "learning_rate": 1.625160456681598e-05, "loss": 0.0727, "step": 22201 }, { "epoch": 0.48922749783778724, "grad_norm": 0.5015899538993835, "learning_rate": 1.6250537761604942e-05, "loss": 0.0884, "step": 22202 }, { "epoch": 0.48924953312730335, "grad_norm": 0.49229228496551514, "learning_rate": 1.624947095002424e-05, "loss": 0.0656, "step": 22203 }, { "epoch": 0.4892715684168195, "grad_norm": 0.49835750460624695, "learning_rate": 1.6248404132079317e-05, "loss": 0.0649, "step": 22204 }, { "epoch": 0.4892936037063357, "grad_norm": 0.4857545495033264, "learning_rate": 1.6247337307775602e-05, "loss": 0.0809, "step": 22205 }, { "epoch": 0.48931563899585184, "grad_norm": 0.5590932369232178, "learning_rate": 1.624627047711853e-05, "loss": 0.0482, "step": 22206 }, { "epoch": 0.489337674285368, "grad_norm": 0.42295771837234497, "learning_rate": 1.6245203640113528e-05, "loss": 0.0888, "step": 22207 }, { "epoch": 0.48935970957488417, "grad_norm": 0.7322887778282166, "learning_rate": 1.6244136796766042e-05, "loss": 0.1047, "step": 22208 }, { "epoch": 0.48938174486440034, "grad_norm": 0.7819637656211853, "learning_rate": 1.6243069947081496e-05, "loss": 0.102, "step": 22209 }, { "epoch": 0.4894037801539165, "grad_norm": 0.5084860920906067, "learning_rate": 1.6242003091065335e-05, "loss": 0.0767, "step": 22210 }, { "epoch": 0.48942581544343267, "grad_norm": 0.7820884585380554, "learning_rate": 1.624093622872298e-05, "loss": 0.0649, "step": 22211 }, { "epoch": 0.48944785073294883, "grad_norm": 0.5138797163963318, "learning_rate": 1.623986936005987e-05, "loss": 0.0661, "step": 22212 }, { "epoch": 0.489469886022465, "grad_norm": 1.000259280204773, "learning_rate": 1.6238802485081447e-05, "loss": 0.0966, "step": 22213 }, { "epoch": 0.48949192131198116, "grad_norm": 0.8111160397529602, "learning_rate": 1.6237735603793136e-05, "loss": 0.087, "step": 22214 }, { "epoch": 0.4895139566014973, "grad_norm": 0.43621838092803955, "learning_rate": 1.6236668716200373e-05, "loss": 0.067, "step": 22215 }, { "epoch": 0.48953599189101343, "grad_norm": 0.6148493885993958, "learning_rate": 1.623560182230859e-05, "loss": 0.0824, "step": 22216 }, { "epoch": 0.4895580271805296, "grad_norm": 0.828964352607727, "learning_rate": 1.6234534922123226e-05, "loss": 0.0614, "step": 22217 }, { "epoch": 0.48958006247004576, "grad_norm": 0.6211281418800354, "learning_rate": 1.623346801564972e-05, "loss": 0.0545, "step": 22218 }, { "epoch": 0.4896020977595619, "grad_norm": 0.8883553743362427, "learning_rate": 1.623240110289349e-05, "loss": 0.0805, "step": 22219 }, { "epoch": 0.4896241330490781, "grad_norm": 0.5744149088859558, "learning_rate": 1.6231334183859988e-05, "loss": 0.0772, "step": 22220 }, { "epoch": 0.48964616833859426, "grad_norm": 0.6098245978355408, "learning_rate": 1.6230267258554638e-05, "loss": 0.0691, "step": 22221 }, { "epoch": 0.4896682036281104, "grad_norm": 0.8388776779174805, "learning_rate": 1.622920032698288e-05, "loss": 0.0922, "step": 22222 }, { "epoch": 0.4896902389176266, "grad_norm": 0.617505669593811, "learning_rate": 1.622813338915014e-05, "loss": 0.0549, "step": 22223 }, { "epoch": 0.48971227420714275, "grad_norm": 0.8778365254402161, "learning_rate": 1.622706644506186e-05, "loss": 0.0999, "step": 22224 }, { "epoch": 0.4897343094966589, "grad_norm": 0.35672086477279663, "learning_rate": 1.6225999494723475e-05, "loss": 0.0573, "step": 22225 }, { "epoch": 0.4897563447861751, "grad_norm": 0.7713425755500793, "learning_rate": 1.6224932538140413e-05, "loss": 0.1126, "step": 22226 }, { "epoch": 0.48977838007569124, "grad_norm": 0.4270569384098053, "learning_rate": 1.6223865575318117e-05, "loss": 0.0775, "step": 22227 }, { "epoch": 0.48980041536520735, "grad_norm": 1.075448989868164, "learning_rate": 1.6222798606262014e-05, "loss": 0.1492, "step": 22228 }, { "epoch": 0.4898224506547235, "grad_norm": 0.5507028698921204, "learning_rate": 1.6221731630977545e-05, "loss": 0.1303, "step": 22229 }, { "epoch": 0.4898444859442397, "grad_norm": 0.49962079524993896, "learning_rate": 1.622066464947014e-05, "loss": 0.0922, "step": 22230 }, { "epoch": 0.48986652123375585, "grad_norm": 0.7956942319869995, "learning_rate": 1.6219597661745237e-05, "loss": 0.0661, "step": 22231 }, { "epoch": 0.489888556523272, "grad_norm": 0.5305317640304565, "learning_rate": 1.6218530667808266e-05, "loss": 0.0618, "step": 22232 }, { "epoch": 0.4899105918127882, "grad_norm": 0.6554074883460999, "learning_rate": 1.6217463667664668e-05, "loss": 0.0773, "step": 22233 }, { "epoch": 0.48993262710230434, "grad_norm": 0.42242786288261414, "learning_rate": 1.6216396661319874e-05, "loss": 0.0586, "step": 22234 }, { "epoch": 0.4899546623918205, "grad_norm": 0.6597040891647339, "learning_rate": 1.621532964877932e-05, "loss": 0.0839, "step": 22235 }, { "epoch": 0.48997669768133667, "grad_norm": 0.7400882244110107, "learning_rate": 1.6214262630048437e-05, "loss": 0.0859, "step": 22236 }, { "epoch": 0.48999873297085283, "grad_norm": 0.6265228986740112, "learning_rate": 1.6213195605132666e-05, "loss": 0.0847, "step": 22237 }, { "epoch": 0.490020768260369, "grad_norm": 0.9506320953369141, "learning_rate": 1.621212857403744e-05, "loss": 0.08, "step": 22238 }, { "epoch": 0.49004280354988516, "grad_norm": 0.873727560043335, "learning_rate": 1.6211061536768192e-05, "loss": 0.1163, "step": 22239 }, { "epoch": 0.49006483883940133, "grad_norm": 0.500907838344574, "learning_rate": 1.6209994493330358e-05, "loss": 0.0313, "step": 22240 }, { "epoch": 0.49008687412891744, "grad_norm": 0.6579294204711914, "learning_rate": 1.6208927443729374e-05, "loss": 0.0984, "step": 22241 }, { "epoch": 0.4901089094184336, "grad_norm": 0.6003841757774353, "learning_rate": 1.620786038797067e-05, "loss": 0.0787, "step": 22242 }, { "epoch": 0.49013094470794977, "grad_norm": 0.5907886028289795, "learning_rate": 1.620679332605969e-05, "loss": 0.0907, "step": 22243 }, { "epoch": 0.49015297999746593, "grad_norm": 0.576736330986023, "learning_rate": 1.6205726258001862e-05, "loss": 0.0887, "step": 22244 }, { "epoch": 0.4901750152869821, "grad_norm": 0.6111642718315125, "learning_rate": 1.6204659183802628e-05, "loss": 0.0719, "step": 22245 }, { "epoch": 0.49019705057649826, "grad_norm": 0.3807866871356964, "learning_rate": 1.6203592103467416e-05, "loss": 0.0578, "step": 22246 }, { "epoch": 0.4902190858660144, "grad_norm": 0.9264542460441589, "learning_rate": 1.620252501700166e-05, "loss": 0.125, "step": 22247 }, { "epoch": 0.4902411211555306, "grad_norm": 0.7658888101577759, "learning_rate": 1.6201457924410806e-05, "loss": 0.0841, "step": 22248 }, { "epoch": 0.49026315644504675, "grad_norm": 0.9159522652626038, "learning_rate": 1.6200390825700278e-05, "loss": 0.1344, "step": 22249 }, { "epoch": 0.4902851917345629, "grad_norm": 0.6478734612464905, "learning_rate": 1.6199323720875517e-05, "loss": 0.0728, "step": 22250 }, { "epoch": 0.4903072270240791, "grad_norm": 0.6864326000213623, "learning_rate": 1.6198256609941957e-05, "loss": 0.0937, "step": 22251 }, { "epoch": 0.49032926231359525, "grad_norm": 0.5547270178794861, "learning_rate": 1.6197189492905034e-05, "loss": 0.0749, "step": 22252 }, { "epoch": 0.49035129760311136, "grad_norm": 0.6374387741088867, "learning_rate": 1.6196122369770186e-05, "loss": 0.0692, "step": 22253 }, { "epoch": 0.4903733328926275, "grad_norm": 0.4764144718647003, "learning_rate": 1.6195055240542843e-05, "loss": 0.048, "step": 22254 }, { "epoch": 0.4903953681821437, "grad_norm": 0.6326258778572083, "learning_rate": 1.619398810522844e-05, "loss": 0.066, "step": 22255 }, { "epoch": 0.49041740347165985, "grad_norm": 1.2668911218643188, "learning_rate": 1.6192920963832418e-05, "loss": 0.0831, "step": 22256 }, { "epoch": 0.490439438761176, "grad_norm": 0.8868355751037598, "learning_rate": 1.619185381636021e-05, "loss": 0.0806, "step": 22257 }, { "epoch": 0.4904614740506922, "grad_norm": 1.2381360530853271, "learning_rate": 1.619078666281725e-05, "loss": 0.0906, "step": 22258 }, { "epoch": 0.49048350934020835, "grad_norm": 0.6507713198661804, "learning_rate": 1.6189719503208976e-05, "loss": 0.0658, "step": 22259 }, { "epoch": 0.4905055446297245, "grad_norm": 0.6917517781257629, "learning_rate": 1.6188652337540822e-05, "loss": 0.0707, "step": 22260 }, { "epoch": 0.4905275799192407, "grad_norm": 0.4677955210208893, "learning_rate": 1.6187585165818225e-05, "loss": 0.0603, "step": 22261 }, { "epoch": 0.49054961520875684, "grad_norm": 1.3035945892333984, "learning_rate": 1.618651798804662e-05, "loss": 0.0813, "step": 22262 }, { "epoch": 0.490571650498273, "grad_norm": 0.40973201394081116, "learning_rate": 1.618545080423144e-05, "loss": 0.0847, "step": 22263 }, { "epoch": 0.49059368578778917, "grad_norm": 0.5894466638565063, "learning_rate": 1.6184383614378127e-05, "loss": 0.0616, "step": 22264 }, { "epoch": 0.4906157210773053, "grad_norm": 0.6237196922302246, "learning_rate": 1.618331641849211e-05, "loss": 0.0948, "step": 22265 }, { "epoch": 0.49063775636682144, "grad_norm": 1.2671419382095337, "learning_rate": 1.618224921657883e-05, "loss": 0.0724, "step": 22266 }, { "epoch": 0.4906597916563376, "grad_norm": 0.6581184267997742, "learning_rate": 1.618118200864372e-05, "loss": 0.0725, "step": 22267 }, { "epoch": 0.49068182694585377, "grad_norm": 0.8581275939941406, "learning_rate": 1.6180114794692216e-05, "loss": 0.0739, "step": 22268 }, { "epoch": 0.49070386223536994, "grad_norm": 0.9238520264625549, "learning_rate": 1.6179047574729758e-05, "loss": 0.0884, "step": 22269 }, { "epoch": 0.4907258975248861, "grad_norm": 1.2498276233673096, "learning_rate": 1.6177980348761773e-05, "loss": 0.094, "step": 22270 }, { "epoch": 0.49074793281440227, "grad_norm": 0.9312109351158142, "learning_rate": 1.6176913116793707e-05, "loss": 0.067, "step": 22271 }, { "epoch": 0.49076996810391843, "grad_norm": 0.8156344294548035, "learning_rate": 1.6175845878830986e-05, "loss": 0.0665, "step": 22272 }, { "epoch": 0.4907920033934346, "grad_norm": 0.7246930599212646, "learning_rate": 1.6174778634879057e-05, "loss": 0.0971, "step": 22273 }, { "epoch": 0.49081403868295076, "grad_norm": 0.8684722781181335, "learning_rate": 1.6173711384943346e-05, "loss": 0.1004, "step": 22274 }, { "epoch": 0.4908360739724669, "grad_norm": 0.630892813205719, "learning_rate": 1.61726441290293e-05, "loss": 0.065, "step": 22275 }, { "epoch": 0.4908581092619831, "grad_norm": 0.5759117007255554, "learning_rate": 1.6171576867142347e-05, "loss": 0.0713, "step": 22276 }, { "epoch": 0.49088014455149925, "grad_norm": 0.2938031554222107, "learning_rate": 1.617050959928792e-05, "loss": 0.0573, "step": 22277 }, { "epoch": 0.49090217984101536, "grad_norm": 0.5461788177490234, "learning_rate": 1.616944232547146e-05, "loss": 0.0857, "step": 22278 }, { "epoch": 0.4909242151305315, "grad_norm": 0.8699794411659241, "learning_rate": 1.6168375045698408e-05, "loss": 0.0937, "step": 22279 }, { "epoch": 0.4909462504200477, "grad_norm": 1.3067295551300049, "learning_rate": 1.616730775997419e-05, "loss": 0.0799, "step": 22280 }, { "epoch": 0.49096828570956386, "grad_norm": 0.6869284510612488, "learning_rate": 1.6166240468304253e-05, "loss": 0.0445, "step": 22281 }, { "epoch": 0.49099032099908, "grad_norm": 0.6558582186698914, "learning_rate": 1.6165173170694024e-05, "loss": 0.0818, "step": 22282 }, { "epoch": 0.4910123562885962, "grad_norm": 0.6524061560630798, "learning_rate": 1.6164105867148947e-05, "loss": 0.0531, "step": 22283 }, { "epoch": 0.49103439157811235, "grad_norm": 0.8468543291091919, "learning_rate": 1.616303855767445e-05, "loss": 0.0747, "step": 22284 }, { "epoch": 0.4910564268676285, "grad_norm": 0.6609441637992859, "learning_rate": 1.616197124227598e-05, "loss": 0.0783, "step": 22285 }, { "epoch": 0.4910784621571447, "grad_norm": 0.7217651009559631, "learning_rate": 1.616090392095896e-05, "loss": 0.0616, "step": 22286 }, { "epoch": 0.49110049744666084, "grad_norm": 0.6918922662734985, "learning_rate": 1.6159836593728838e-05, "loss": 0.1245, "step": 22287 }, { "epoch": 0.491122532736177, "grad_norm": 0.6725122928619385, "learning_rate": 1.6158769260591042e-05, "loss": 0.0802, "step": 22288 }, { "epoch": 0.4911445680256932, "grad_norm": 0.3965771198272705, "learning_rate": 1.615770192155102e-05, "loss": 0.0841, "step": 22289 }, { "epoch": 0.4911666033152093, "grad_norm": 0.9076917767524719, "learning_rate": 1.6156634576614196e-05, "loss": 0.0872, "step": 22290 }, { "epoch": 0.49118863860472545, "grad_norm": 0.5274964570999146, "learning_rate": 1.6155567225786012e-05, "loss": 0.1005, "step": 22291 }, { "epoch": 0.4912106738942416, "grad_norm": 0.43351125717163086, "learning_rate": 1.6154499869071908e-05, "loss": 0.052, "step": 22292 }, { "epoch": 0.4912327091837578, "grad_norm": 0.6096329092979431, "learning_rate": 1.6153432506477314e-05, "loss": 0.0492, "step": 22293 }, { "epoch": 0.49125474447327394, "grad_norm": 0.7981131076812744, "learning_rate": 1.6152365138007673e-05, "loss": 0.1088, "step": 22294 }, { "epoch": 0.4912767797627901, "grad_norm": 1.3197938203811646, "learning_rate": 1.6151297763668416e-05, "loss": 0.1075, "step": 22295 }, { "epoch": 0.49129881505230627, "grad_norm": 0.5241841673851013, "learning_rate": 1.6150230383464984e-05, "loss": 0.0565, "step": 22296 }, { "epoch": 0.49132085034182243, "grad_norm": 0.5327258706092834, "learning_rate": 1.6149162997402804e-05, "loss": 0.0715, "step": 22297 }, { "epoch": 0.4913428856313386, "grad_norm": 1.1250100135803223, "learning_rate": 1.614809560548733e-05, "loss": 0.0737, "step": 22298 }, { "epoch": 0.49136492092085476, "grad_norm": 0.8934668898582458, "learning_rate": 1.6147028207723985e-05, "loss": 0.0847, "step": 22299 }, { "epoch": 0.49138695621037093, "grad_norm": 0.9078202843666077, "learning_rate": 1.614596080411821e-05, "loss": 0.0713, "step": 22300 }, { "epoch": 0.4914089914998871, "grad_norm": 0.7829232215881348, "learning_rate": 1.6144893394675445e-05, "loss": 0.0945, "step": 22301 }, { "epoch": 0.4914310267894032, "grad_norm": 0.5562101602554321, "learning_rate": 1.6143825979401125e-05, "loss": 0.0769, "step": 22302 }, { "epoch": 0.49145306207891937, "grad_norm": 0.6525826454162598, "learning_rate": 1.6142758558300682e-05, "loss": 0.0891, "step": 22303 }, { "epoch": 0.49147509736843553, "grad_norm": 0.3770051896572113, "learning_rate": 1.6141691131379557e-05, "loss": 0.0445, "step": 22304 }, { "epoch": 0.4914971326579517, "grad_norm": 0.8189031481742859, "learning_rate": 1.6140623698643185e-05, "loss": 0.0831, "step": 22305 }, { "epoch": 0.49151916794746786, "grad_norm": 0.6999967098236084, "learning_rate": 1.613955626009701e-05, "loss": 0.0631, "step": 22306 }, { "epoch": 0.491541203236984, "grad_norm": 0.769507646560669, "learning_rate": 1.6138488815746463e-05, "loss": 0.0727, "step": 22307 }, { "epoch": 0.4915632385265002, "grad_norm": 0.7034776210784912, "learning_rate": 1.6137421365596984e-05, "loss": 0.085, "step": 22308 }, { "epoch": 0.49158527381601635, "grad_norm": 0.8473108410835266, "learning_rate": 1.6136353909654003e-05, "loss": 0.0698, "step": 22309 }, { "epoch": 0.4916073091055325, "grad_norm": 0.6697137951850891, "learning_rate": 1.6135286447922965e-05, "loss": 0.0879, "step": 22310 }, { "epoch": 0.4916293443950487, "grad_norm": 0.7223321199417114, "learning_rate": 1.61342189804093e-05, "loss": 0.0651, "step": 22311 }, { "epoch": 0.49165137968456485, "grad_norm": 0.49919095635414124, "learning_rate": 1.6133151507118452e-05, "loss": 0.0466, "step": 22312 }, { "epoch": 0.491673414974081, "grad_norm": 0.7222236394882202, "learning_rate": 1.613208402805586e-05, "loss": 0.0503, "step": 22313 }, { "epoch": 0.4916954502635972, "grad_norm": 0.6851812601089478, "learning_rate": 1.613101654322695e-05, "loss": 0.0836, "step": 22314 }, { "epoch": 0.4917174855531133, "grad_norm": 0.44118601083755493, "learning_rate": 1.6129949052637172e-05, "loss": 0.0608, "step": 22315 }, { "epoch": 0.49173952084262945, "grad_norm": 0.4389554262161255, "learning_rate": 1.6128881556291956e-05, "loss": 0.0812, "step": 22316 }, { "epoch": 0.4917615561321456, "grad_norm": 0.5783699750900269, "learning_rate": 1.6127814054196742e-05, "loss": 0.0964, "step": 22317 }, { "epoch": 0.4917835914216618, "grad_norm": 0.41653117537498474, "learning_rate": 1.6126746546356964e-05, "loss": 0.0714, "step": 22318 }, { "epoch": 0.49180562671117795, "grad_norm": 0.4756111800670624, "learning_rate": 1.6125679032778064e-05, "loss": 0.0685, "step": 22319 }, { "epoch": 0.4918276620006941, "grad_norm": 0.4553586542606354, "learning_rate": 1.6124611513465472e-05, "loss": 0.0586, "step": 22320 }, { "epoch": 0.4918496972902103, "grad_norm": 0.5035398006439209, "learning_rate": 1.6123543988424637e-05, "loss": 0.0656, "step": 22321 }, { "epoch": 0.49187173257972644, "grad_norm": 0.6815789341926575, "learning_rate": 1.6122476457660983e-05, "loss": 0.0747, "step": 22322 }, { "epoch": 0.4918937678692426, "grad_norm": 0.9537158012390137, "learning_rate": 1.6121408921179958e-05, "loss": 0.0929, "step": 22323 }, { "epoch": 0.49191580315875877, "grad_norm": 0.6959238648414612, "learning_rate": 1.6120341378987e-05, "loss": 0.0693, "step": 22324 }, { "epoch": 0.49193783844827493, "grad_norm": 0.7368230223655701, "learning_rate": 1.6119273831087538e-05, "loss": 0.0821, "step": 22325 }, { "epoch": 0.4919598737377911, "grad_norm": 0.9526912569999695, "learning_rate": 1.611820627748701e-05, "loss": 0.1004, "step": 22326 }, { "epoch": 0.4919819090273072, "grad_norm": 0.7435754537582397, "learning_rate": 1.6117138718190866e-05, "loss": 0.0777, "step": 22327 }, { "epoch": 0.49200394431682337, "grad_norm": 0.7296997904777527, "learning_rate": 1.6116071153204533e-05, "loss": 0.0693, "step": 22328 }, { "epoch": 0.49202597960633954, "grad_norm": 0.7329961657524109, "learning_rate": 1.611500358253345e-05, "loss": 0.081, "step": 22329 }, { "epoch": 0.4920480148958557, "grad_norm": 0.5895894765853882, "learning_rate": 1.6113936006183056e-05, "loss": 0.065, "step": 22330 }, { "epoch": 0.49207005018537187, "grad_norm": 0.7874571084976196, "learning_rate": 1.6112868424158787e-05, "loss": 0.0933, "step": 22331 }, { "epoch": 0.49209208547488803, "grad_norm": 0.9091330170631409, "learning_rate": 1.6111800836466086e-05, "loss": 0.0847, "step": 22332 }, { "epoch": 0.4921141207644042, "grad_norm": 0.650148868560791, "learning_rate": 1.6110733243110387e-05, "loss": 0.0731, "step": 22333 }, { "epoch": 0.49213615605392036, "grad_norm": 0.3629952073097229, "learning_rate": 1.6109665644097125e-05, "loss": 0.0528, "step": 22334 }, { "epoch": 0.4921581913434365, "grad_norm": 0.550975501537323, "learning_rate": 1.610859803943174e-05, "loss": 0.076, "step": 22335 }, { "epoch": 0.4921802266329527, "grad_norm": 0.4992934763431549, "learning_rate": 1.610753042911967e-05, "loss": 0.0562, "step": 22336 }, { "epoch": 0.49220226192246885, "grad_norm": 0.8331459164619446, "learning_rate": 1.610646281316636e-05, "loss": 0.0759, "step": 22337 }, { "epoch": 0.492224297211985, "grad_norm": 0.6196001172065735, "learning_rate": 1.610539519157724e-05, "loss": 0.1033, "step": 22338 }, { "epoch": 0.4922463325015011, "grad_norm": 0.6645519137382507, "learning_rate": 1.6104327564357742e-05, "loss": 0.0551, "step": 22339 }, { "epoch": 0.4922683677910173, "grad_norm": 0.6456269025802612, "learning_rate": 1.610325993151332e-05, "loss": 0.0571, "step": 22340 }, { "epoch": 0.49229040308053346, "grad_norm": 0.6151903867721558, "learning_rate": 1.61021922930494e-05, "loss": 0.0959, "step": 22341 }, { "epoch": 0.4923124383700496, "grad_norm": 0.4861869812011719, "learning_rate": 1.6101124648971426e-05, "loss": 0.0794, "step": 22342 }, { "epoch": 0.4923344736595658, "grad_norm": 0.6540578603744507, "learning_rate": 1.610005699928483e-05, "loss": 0.082, "step": 22343 }, { "epoch": 0.49235650894908195, "grad_norm": 0.5319477915763855, "learning_rate": 1.6098989343995058e-05, "loss": 0.0597, "step": 22344 }, { "epoch": 0.4923785442385981, "grad_norm": 0.5688735246658325, "learning_rate": 1.609792168310754e-05, "loss": 0.0861, "step": 22345 }, { "epoch": 0.4924005795281143, "grad_norm": 1.1945494413375854, "learning_rate": 1.609685401662772e-05, "loss": 0.0961, "step": 22346 }, { "epoch": 0.49242261481763044, "grad_norm": 0.5586889982223511, "learning_rate": 1.6095786344561034e-05, "loss": 0.0928, "step": 22347 }, { "epoch": 0.4924446501071466, "grad_norm": 0.6286115050315857, "learning_rate": 1.609471866691292e-05, "loss": 0.0652, "step": 22348 }, { "epoch": 0.4924666853966628, "grad_norm": 0.416032075881958, "learning_rate": 1.609365098368882e-05, "loss": 0.0525, "step": 22349 }, { "epoch": 0.49248872068617894, "grad_norm": 0.5057561993598938, "learning_rate": 1.6092583294894164e-05, "loss": 0.0852, "step": 22350 }, { "epoch": 0.4925107559756951, "grad_norm": 0.6097124814987183, "learning_rate": 1.6091515600534397e-05, "loss": 0.0842, "step": 22351 }, { "epoch": 0.4925327912652112, "grad_norm": 0.3039890229701996, "learning_rate": 1.6090447900614957e-05, "loss": 0.0648, "step": 22352 }, { "epoch": 0.4925548265547274, "grad_norm": 0.44212281703948975, "learning_rate": 1.608938019514128e-05, "loss": 0.0791, "step": 22353 }, { "epoch": 0.49257686184424354, "grad_norm": 0.3385927379131317, "learning_rate": 1.6088312484118807e-05, "loss": 0.0707, "step": 22354 }, { "epoch": 0.4925988971337597, "grad_norm": 0.7409830689430237, "learning_rate": 1.6087244767552974e-05, "loss": 0.0858, "step": 22355 }, { "epoch": 0.49262093242327587, "grad_norm": 0.686988353729248, "learning_rate": 1.6086177045449222e-05, "loss": 0.0657, "step": 22356 }, { "epoch": 0.49264296771279203, "grad_norm": 0.8369141817092896, "learning_rate": 1.6085109317812987e-05, "loss": 0.0872, "step": 22357 }, { "epoch": 0.4926650030023082, "grad_norm": 0.8271757960319519, "learning_rate": 1.6084041584649705e-05, "loss": 0.0651, "step": 22358 }, { "epoch": 0.49268703829182436, "grad_norm": 0.8652414083480835, "learning_rate": 1.608297384596482e-05, "loss": 0.0858, "step": 22359 }, { "epoch": 0.49270907358134053, "grad_norm": 0.9278575778007507, "learning_rate": 1.6081906101763767e-05, "loss": 0.0766, "step": 22360 }, { "epoch": 0.4927311088708567, "grad_norm": 0.6097273230552673, "learning_rate": 1.608083835205199e-05, "loss": 0.0714, "step": 22361 }, { "epoch": 0.49275314416037286, "grad_norm": 0.7995662093162537, "learning_rate": 1.607977059683492e-05, "loss": 0.1231, "step": 22362 }, { "epoch": 0.492775179449889, "grad_norm": 1.1386024951934814, "learning_rate": 1.6078702836118004e-05, "loss": 0.1118, "step": 22363 }, { "epoch": 0.49279721473940513, "grad_norm": 0.5444544553756714, "learning_rate": 1.6077635069906675e-05, "loss": 0.0743, "step": 22364 }, { "epoch": 0.4928192500289213, "grad_norm": 0.8113424181938171, "learning_rate": 1.607656729820637e-05, "loss": 0.0567, "step": 22365 }, { "epoch": 0.49284128531843746, "grad_norm": 0.5119706988334656, "learning_rate": 1.6075499521022532e-05, "loss": 0.0558, "step": 22366 }, { "epoch": 0.4928633206079536, "grad_norm": 0.7285698056221008, "learning_rate": 1.6074431738360597e-05, "loss": 0.0934, "step": 22367 }, { "epoch": 0.4928853558974698, "grad_norm": 0.7998790144920349, "learning_rate": 1.6073363950226002e-05, "loss": 0.059, "step": 22368 }, { "epoch": 0.49290739118698595, "grad_norm": 0.6651440262794495, "learning_rate": 1.6072296156624192e-05, "loss": 0.0735, "step": 22369 }, { "epoch": 0.4929294264765021, "grad_norm": 0.8166545629501343, "learning_rate": 1.6071228357560603e-05, "loss": 0.1112, "step": 22370 }, { "epoch": 0.4929514617660183, "grad_norm": 0.6997723579406738, "learning_rate": 1.6070160553040674e-05, "loss": 0.0705, "step": 22371 }, { "epoch": 0.49297349705553445, "grad_norm": 0.9468487501144409, "learning_rate": 1.6069092743069848e-05, "loss": 0.0991, "step": 22372 }, { "epoch": 0.4929955323450506, "grad_norm": 0.5482782125473022, "learning_rate": 1.6068024927653555e-05, "loss": 0.071, "step": 22373 }, { "epoch": 0.4930175676345668, "grad_norm": 0.6499703526496887, "learning_rate": 1.6066957106797234e-05, "loss": 0.1036, "step": 22374 }, { "epoch": 0.49303960292408294, "grad_norm": 0.6287031769752502, "learning_rate": 1.6065889280506332e-05, "loss": 0.0736, "step": 22375 }, { "epoch": 0.49306163821359905, "grad_norm": 0.3361639976501465, "learning_rate": 1.6064821448786282e-05, "loss": 0.0503, "step": 22376 }, { "epoch": 0.4930836735031152, "grad_norm": 0.9156261682510376, "learning_rate": 1.606375361164253e-05, "loss": 0.0867, "step": 22377 }, { "epoch": 0.4931057087926314, "grad_norm": 0.4859314262866974, "learning_rate": 1.6062685769080505e-05, "loss": 0.0615, "step": 22378 }, { "epoch": 0.49312774408214755, "grad_norm": 0.6914181113243103, "learning_rate": 1.6061617921105655e-05, "loss": 0.0733, "step": 22379 }, { "epoch": 0.4931497793716637, "grad_norm": 0.8992477059364319, "learning_rate": 1.6060550067723418e-05, "loss": 0.0862, "step": 22380 }, { "epoch": 0.4931718146611799, "grad_norm": 0.7288925051689148, "learning_rate": 1.6059482208939225e-05, "loss": 0.0804, "step": 22381 }, { "epoch": 0.49319384995069604, "grad_norm": 0.7825410962104797, "learning_rate": 1.6058414344758527e-05, "loss": 0.0716, "step": 22382 }, { "epoch": 0.4932158852402122, "grad_norm": 0.9567502737045288, "learning_rate": 1.6057346475186753e-05, "loss": 0.0803, "step": 22383 }, { "epoch": 0.49323792052972837, "grad_norm": 0.5667718648910522, "learning_rate": 1.6056278600229346e-05, "loss": 0.0522, "step": 22384 }, { "epoch": 0.49325995581924453, "grad_norm": 0.6201373338699341, "learning_rate": 1.6055210719891746e-05, "loss": 0.1072, "step": 22385 }, { "epoch": 0.4932819911087607, "grad_norm": 0.643141508102417, "learning_rate": 1.6054142834179396e-05, "loss": 0.0759, "step": 22386 }, { "epoch": 0.49330402639827686, "grad_norm": 0.5729743838310242, "learning_rate": 1.6053074943097728e-05, "loss": 0.0926, "step": 22387 }, { "epoch": 0.493326061687793, "grad_norm": 0.6010366082191467, "learning_rate": 1.6052007046652187e-05, "loss": 0.1058, "step": 22388 }, { "epoch": 0.49334809697730914, "grad_norm": 0.8194659352302551, "learning_rate": 1.6050939144848207e-05, "loss": 0.0921, "step": 22389 }, { "epoch": 0.4933701322668253, "grad_norm": 0.6778014302253723, "learning_rate": 1.6049871237691232e-05, "loss": 0.0609, "step": 22390 }, { "epoch": 0.49339216755634147, "grad_norm": 0.44741812348365784, "learning_rate": 1.6048803325186695e-05, "loss": 0.0536, "step": 22391 }, { "epoch": 0.49341420284585763, "grad_norm": 0.5467360019683838, "learning_rate": 1.604773540734005e-05, "loss": 0.0847, "step": 22392 }, { "epoch": 0.4934362381353738, "grad_norm": 0.4785390794277191, "learning_rate": 1.6046667484156717e-05, "loss": 0.0641, "step": 22393 }, { "epoch": 0.49345827342488996, "grad_norm": 0.8245639801025391, "learning_rate": 1.6045599555642153e-05, "loss": 0.1088, "step": 22394 }, { "epoch": 0.4934803087144061, "grad_norm": 0.5142103433609009, "learning_rate": 1.604453162180178e-05, "loss": 0.0834, "step": 22395 }, { "epoch": 0.4935023440039223, "grad_norm": 0.9123607873916626, "learning_rate": 1.6043463682641056e-05, "loss": 0.0995, "step": 22396 }, { "epoch": 0.49352437929343845, "grad_norm": 0.5030695199966431, "learning_rate": 1.6042395738165412e-05, "loss": 0.1177, "step": 22397 }, { "epoch": 0.4935464145829546, "grad_norm": 0.7795760631561279, "learning_rate": 1.6041327788380285e-05, "loss": 0.0733, "step": 22398 }, { "epoch": 0.4935684498724708, "grad_norm": 0.6246606111526489, "learning_rate": 1.6040259833291116e-05, "loss": 0.1159, "step": 22399 }, { "epoch": 0.49359048516198695, "grad_norm": 0.7387188076972961, "learning_rate": 1.603919187290335e-05, "loss": 0.0728, "step": 22400 }, { "epoch": 0.49361252045150306, "grad_norm": 0.6618116497993469, "learning_rate": 1.6038123907222415e-05, "loss": 0.0645, "step": 22401 }, { "epoch": 0.4936345557410192, "grad_norm": 0.8771855235099792, "learning_rate": 1.6037055936253763e-05, "loss": 0.0997, "step": 22402 }, { "epoch": 0.4936565910305354, "grad_norm": 0.6433945894241333, "learning_rate": 1.6035987960002827e-05, "loss": 0.0797, "step": 22403 }, { "epoch": 0.49367862632005155, "grad_norm": 1.086409568786621, "learning_rate": 1.6034919978475052e-05, "loss": 0.0887, "step": 22404 }, { "epoch": 0.4937006616095677, "grad_norm": 0.7626205086708069, "learning_rate": 1.6033851991675876e-05, "loss": 0.0819, "step": 22405 }, { "epoch": 0.4937226968990839, "grad_norm": 0.6811474561691284, "learning_rate": 1.603278399961073e-05, "loss": 0.0639, "step": 22406 }, { "epoch": 0.49374473218860004, "grad_norm": 0.6554971933364868, "learning_rate": 1.603171600228507e-05, "loss": 0.0859, "step": 22407 }, { "epoch": 0.4937667674781162, "grad_norm": 0.9393386244773865, "learning_rate": 1.603064799970432e-05, "loss": 0.0956, "step": 22408 }, { "epoch": 0.4937888027676324, "grad_norm": 0.9103900194168091, "learning_rate": 1.602957999187393e-05, "loss": 0.0741, "step": 22409 }, { "epoch": 0.49381083805714854, "grad_norm": 0.8801742792129517, "learning_rate": 1.6028511978799335e-05, "loss": 0.0958, "step": 22410 }, { "epoch": 0.4938328733466647, "grad_norm": 0.4463183879852295, "learning_rate": 1.6027443960485983e-05, "loss": 0.0787, "step": 22411 }, { "epoch": 0.49385490863618087, "grad_norm": 0.6767827868461609, "learning_rate": 1.6026375936939306e-05, "loss": 0.0521, "step": 22412 }, { "epoch": 0.493876943925697, "grad_norm": 0.7259926795959473, "learning_rate": 1.602530790816474e-05, "loss": 0.0881, "step": 22413 }, { "epoch": 0.49389897921521314, "grad_norm": 0.591339111328125, "learning_rate": 1.6024239874167737e-05, "loss": 0.0863, "step": 22414 }, { "epoch": 0.4939210145047293, "grad_norm": 0.6759514808654785, "learning_rate": 1.602317183495373e-05, "loss": 0.0874, "step": 22415 }, { "epoch": 0.49394304979424547, "grad_norm": 0.5824975371360779, "learning_rate": 1.6022103790528157e-05, "loss": 0.0717, "step": 22416 }, { "epoch": 0.49396508508376163, "grad_norm": 0.7173762321472168, "learning_rate": 1.6021035740896463e-05, "loss": 0.0994, "step": 22417 }, { "epoch": 0.4939871203732778, "grad_norm": 0.9422994256019592, "learning_rate": 1.601996768606409e-05, "loss": 0.0857, "step": 22418 }, { "epoch": 0.49400915566279396, "grad_norm": 0.6128246188163757, "learning_rate": 1.6018899626036472e-05, "loss": 0.072, "step": 22419 }, { "epoch": 0.49403119095231013, "grad_norm": 0.7906481027603149, "learning_rate": 1.601783156081905e-05, "loss": 0.1004, "step": 22420 }, { "epoch": 0.4940532262418263, "grad_norm": 0.4266297519207001, "learning_rate": 1.601676349041727e-05, "loss": 0.0543, "step": 22421 }, { "epoch": 0.49407526153134246, "grad_norm": 0.7772535085678101, "learning_rate": 1.601569541483656e-05, "loss": 0.0846, "step": 22422 }, { "epoch": 0.4940972968208586, "grad_norm": 0.3999541997909546, "learning_rate": 1.6014627334082376e-05, "loss": 0.0892, "step": 22423 }, { "epoch": 0.4941193321103748, "grad_norm": 0.7359406352043152, "learning_rate": 1.601355924816015e-05, "loss": 0.0664, "step": 22424 }, { "epoch": 0.49414136739989095, "grad_norm": 0.6395556926727295, "learning_rate": 1.601249115707532e-05, "loss": 0.0984, "step": 22425 }, { "epoch": 0.49416340268940706, "grad_norm": 0.5721629858016968, "learning_rate": 1.6011423060833334e-05, "loss": 0.0712, "step": 22426 }, { "epoch": 0.4941854379789232, "grad_norm": 0.5328962206840515, "learning_rate": 1.6010354959439626e-05, "loss": 0.083, "step": 22427 }, { "epoch": 0.4942074732684394, "grad_norm": 0.9213367700576782, "learning_rate": 1.600928685289964e-05, "loss": 0.0695, "step": 22428 }, { "epoch": 0.49422950855795555, "grad_norm": 0.5579873323440552, "learning_rate": 1.600821874121881e-05, "loss": 0.1027, "step": 22429 }, { "epoch": 0.4942515438474717, "grad_norm": 0.44951894879341125, "learning_rate": 1.6007150624402587e-05, "loss": 0.0653, "step": 22430 }, { "epoch": 0.4942735791369879, "grad_norm": 0.7546252608299255, "learning_rate": 1.6006082502456404e-05, "loss": 0.0949, "step": 22431 }, { "epoch": 0.49429561442650405, "grad_norm": 0.567699670791626, "learning_rate": 1.6005014375385702e-05, "loss": 0.0648, "step": 22432 }, { "epoch": 0.4943176497160202, "grad_norm": 0.6622136235237122, "learning_rate": 1.6003946243195922e-05, "loss": 0.1084, "step": 22433 }, { "epoch": 0.4943396850055364, "grad_norm": 1.0553514957427979, "learning_rate": 1.6002878105892507e-05, "loss": 0.1321, "step": 22434 }, { "epoch": 0.49436172029505254, "grad_norm": 0.8203596472740173, "learning_rate": 1.6001809963480897e-05, "loss": 0.0853, "step": 22435 }, { "epoch": 0.4943837555845687, "grad_norm": 0.5389773845672607, "learning_rate": 1.600074181596653e-05, "loss": 0.0524, "step": 22436 }, { "epoch": 0.49440579087408487, "grad_norm": 0.57300865650177, "learning_rate": 1.599967366335485e-05, "loss": 0.0317, "step": 22437 }, { "epoch": 0.494427826163601, "grad_norm": 0.8077917695045471, "learning_rate": 1.5998605505651297e-05, "loss": 0.0964, "step": 22438 }, { "epoch": 0.49444986145311715, "grad_norm": 0.710088849067688, "learning_rate": 1.5997537342861304e-05, "loss": 0.0702, "step": 22439 }, { "epoch": 0.4944718967426333, "grad_norm": 0.40856724977493286, "learning_rate": 1.5996469174990326e-05, "loss": 0.0946, "step": 22440 }, { "epoch": 0.4944939320321495, "grad_norm": 0.5701146721839905, "learning_rate": 1.599540100204379e-05, "loss": 0.0617, "step": 22441 }, { "epoch": 0.49451596732166564, "grad_norm": 0.7916575074195862, "learning_rate": 1.5994332824027147e-05, "loss": 0.0816, "step": 22442 }, { "epoch": 0.4945380026111818, "grad_norm": 0.9423279762268066, "learning_rate": 1.5993264640945836e-05, "loss": 0.0512, "step": 22443 }, { "epoch": 0.49456003790069797, "grad_norm": 0.7628740072250366, "learning_rate": 1.5992196452805292e-05, "loss": 0.0925, "step": 22444 }, { "epoch": 0.49458207319021413, "grad_norm": 0.7278252840042114, "learning_rate": 1.5991128259610957e-05, "loss": 0.0737, "step": 22445 }, { "epoch": 0.4946041084797303, "grad_norm": 0.439081609249115, "learning_rate": 1.5990060061368278e-05, "loss": 0.0752, "step": 22446 }, { "epoch": 0.49462614376924646, "grad_norm": 0.564940869808197, "learning_rate": 1.598899185808269e-05, "loss": 0.0974, "step": 22447 }, { "epoch": 0.4946481790587626, "grad_norm": 0.46146270632743835, "learning_rate": 1.5987923649759634e-05, "loss": 0.0819, "step": 22448 }, { "epoch": 0.4946702143482788, "grad_norm": 0.5207476019859314, "learning_rate": 1.5986855436404556e-05, "loss": 0.0694, "step": 22449 }, { "epoch": 0.49469224963779496, "grad_norm": 0.9097130298614502, "learning_rate": 1.5985787218022892e-05, "loss": 0.084, "step": 22450 }, { "epoch": 0.49471428492731107, "grad_norm": 1.050504446029663, "learning_rate": 1.5984718994620092e-05, "loss": 0.077, "step": 22451 }, { "epoch": 0.49473632021682723, "grad_norm": 0.48637473583221436, "learning_rate": 1.5983650766201587e-05, "loss": 0.0804, "step": 22452 }, { "epoch": 0.4947583555063434, "grad_norm": 0.5147369503974915, "learning_rate": 1.598258253277282e-05, "loss": 0.0516, "step": 22453 }, { "epoch": 0.49478039079585956, "grad_norm": 0.598965048789978, "learning_rate": 1.598151429433923e-05, "loss": 0.1071, "step": 22454 }, { "epoch": 0.4948024260853757, "grad_norm": 0.5196638107299805, "learning_rate": 1.5980446050906266e-05, "loss": 0.085, "step": 22455 }, { "epoch": 0.4948244613748919, "grad_norm": 0.4770892560482025, "learning_rate": 1.5979377802479365e-05, "loss": 0.0458, "step": 22456 }, { "epoch": 0.49484649666440805, "grad_norm": 0.7593492269515991, "learning_rate": 1.5978309549063965e-05, "loss": 0.0839, "step": 22457 }, { "epoch": 0.4948685319539242, "grad_norm": 0.8145363926887512, "learning_rate": 1.5977241290665508e-05, "loss": 0.1032, "step": 22458 }, { "epoch": 0.4948905672434404, "grad_norm": 0.5980483889579773, "learning_rate": 1.5976173027289444e-05, "loss": 0.0932, "step": 22459 }, { "epoch": 0.49491260253295655, "grad_norm": 0.7067267894744873, "learning_rate": 1.5975104758941202e-05, "loss": 0.0583, "step": 22460 }, { "epoch": 0.4949346378224727, "grad_norm": 0.6212840676307678, "learning_rate": 1.597403648562623e-05, "loss": 0.0736, "step": 22461 }, { "epoch": 0.4949566731119889, "grad_norm": 0.6628887057304382, "learning_rate": 1.5972968207349962e-05, "loss": 0.0684, "step": 22462 }, { "epoch": 0.494978708401505, "grad_norm": 0.7431968450546265, "learning_rate": 1.5971899924117854e-05, "loss": 0.0928, "step": 22463 }, { "epoch": 0.49500074369102115, "grad_norm": 0.5263752341270447, "learning_rate": 1.5970831635935333e-05, "loss": 0.079, "step": 22464 }, { "epoch": 0.4950227789805373, "grad_norm": 0.4460807144641876, "learning_rate": 1.596976334280785e-05, "loss": 0.0764, "step": 22465 }, { "epoch": 0.4950448142700535, "grad_norm": 0.8346723914146423, "learning_rate": 1.5968695044740837e-05, "loss": 0.0778, "step": 22466 }, { "epoch": 0.49506684955956964, "grad_norm": 0.7515541315078735, "learning_rate": 1.5967626741739746e-05, "loss": 0.0826, "step": 22467 }, { "epoch": 0.4950888848490858, "grad_norm": 0.5334516167640686, "learning_rate": 1.596655843381001e-05, "loss": 0.0875, "step": 22468 }, { "epoch": 0.495110920138602, "grad_norm": 0.5122300982475281, "learning_rate": 1.5965490120957075e-05, "loss": 0.0925, "step": 22469 }, { "epoch": 0.49513295542811814, "grad_norm": 0.9131929874420166, "learning_rate": 1.5964421803186378e-05, "loss": 0.0974, "step": 22470 }, { "epoch": 0.4951549907176343, "grad_norm": 0.6058739423751831, "learning_rate": 1.5963353480503368e-05, "loss": 0.0744, "step": 22471 }, { "epoch": 0.49517702600715047, "grad_norm": 0.5428001284599304, "learning_rate": 1.596228515291348e-05, "loss": 0.0748, "step": 22472 }, { "epoch": 0.49519906129666663, "grad_norm": 0.828734278678894, "learning_rate": 1.5961216820422156e-05, "loss": 0.0753, "step": 22473 }, { "epoch": 0.4952210965861828, "grad_norm": 0.6128023266792297, "learning_rate": 1.596014848303484e-05, "loss": 0.0519, "step": 22474 }, { "epoch": 0.4952431318756989, "grad_norm": 0.6659802794456482, "learning_rate": 1.5959080140756977e-05, "loss": 0.0558, "step": 22475 }, { "epoch": 0.49526516716521507, "grad_norm": 0.5312381386756897, "learning_rate": 1.5958011793594007e-05, "loss": 0.0916, "step": 22476 }, { "epoch": 0.49528720245473123, "grad_norm": 0.7533063292503357, "learning_rate": 1.595694344155136e-05, "loss": 0.0498, "step": 22477 }, { "epoch": 0.4953092377442474, "grad_norm": 0.744559645652771, "learning_rate": 1.595587508463449e-05, "loss": 0.083, "step": 22478 }, { "epoch": 0.49533127303376356, "grad_norm": 1.1045318841934204, "learning_rate": 1.5954806722848835e-05, "loss": 0.0609, "step": 22479 }, { "epoch": 0.49535330832327973, "grad_norm": 0.5959398150444031, "learning_rate": 1.595373835619984e-05, "loss": 0.0644, "step": 22480 }, { "epoch": 0.4953753436127959, "grad_norm": 0.795166015625, "learning_rate": 1.5952669984692944e-05, "loss": 0.0847, "step": 22481 }, { "epoch": 0.49539737890231206, "grad_norm": 0.6990796327590942, "learning_rate": 1.5951601608333585e-05, "loss": 0.0669, "step": 22482 }, { "epoch": 0.4954194141918282, "grad_norm": 0.5278724431991577, "learning_rate": 1.5950533227127215e-05, "loss": 0.0559, "step": 22483 }, { "epoch": 0.4954414494813444, "grad_norm": 0.7875949144363403, "learning_rate": 1.5949464841079266e-05, "loss": 0.1451, "step": 22484 }, { "epoch": 0.49546348477086055, "grad_norm": 0.379548579454422, "learning_rate": 1.5948396450195183e-05, "loss": 0.0519, "step": 22485 }, { "epoch": 0.4954855200603767, "grad_norm": 0.906466543674469, "learning_rate": 1.594732805448041e-05, "loss": 0.0967, "step": 22486 }, { "epoch": 0.4955075553498929, "grad_norm": 0.4505690932273865, "learning_rate": 1.5946259653940384e-05, "loss": 0.0739, "step": 22487 }, { "epoch": 0.495529590639409, "grad_norm": 0.6408618688583374, "learning_rate": 1.5945191248580555e-05, "loss": 0.0843, "step": 22488 }, { "epoch": 0.49555162592892515, "grad_norm": 0.8570835590362549, "learning_rate": 1.5944122838406353e-05, "loss": 0.0934, "step": 22489 }, { "epoch": 0.4955736612184413, "grad_norm": 0.6327635645866394, "learning_rate": 1.5943054423423233e-05, "loss": 0.072, "step": 22490 }, { "epoch": 0.4955956965079575, "grad_norm": 1.109899878501892, "learning_rate": 1.594198600363663e-05, "loss": 0.0728, "step": 22491 }, { "epoch": 0.49561773179747365, "grad_norm": 0.638490617275238, "learning_rate": 1.5940917579051985e-05, "loss": 0.0746, "step": 22492 }, { "epoch": 0.4956397670869898, "grad_norm": 0.4511356055736542, "learning_rate": 1.5939849149674747e-05, "loss": 0.0968, "step": 22493 }, { "epoch": 0.495661802376506, "grad_norm": 0.6715280413627625, "learning_rate": 1.5938780715510346e-05, "loss": 0.0795, "step": 22494 }, { "epoch": 0.49568383766602214, "grad_norm": 0.9090638756752014, "learning_rate": 1.5937712276564237e-05, "loss": 0.1153, "step": 22495 }, { "epoch": 0.4957058729555383, "grad_norm": 0.6244463324546814, "learning_rate": 1.5936643832841856e-05, "loss": 0.0922, "step": 22496 }, { "epoch": 0.49572790824505447, "grad_norm": 0.6689375638961792, "learning_rate": 1.5935575384348646e-05, "loss": 0.0722, "step": 22497 }, { "epoch": 0.49574994353457064, "grad_norm": 0.8928379416465759, "learning_rate": 1.593450693109004e-05, "loss": 0.0546, "step": 22498 }, { "epoch": 0.4957719788240868, "grad_norm": 0.48720040917396545, "learning_rate": 1.5933438473071506e-05, "loss": 0.068, "step": 22499 }, { "epoch": 0.4957940141136029, "grad_norm": 0.6670413613319397, "learning_rate": 1.593237001029846e-05, "loss": 0.0932, "step": 22500 }, { "epoch": 0.4958160494031191, "grad_norm": 0.894088625907898, "learning_rate": 1.593130154277635e-05, "loss": 0.1028, "step": 22501 }, { "epoch": 0.49583808469263524, "grad_norm": 0.9714820981025696, "learning_rate": 1.5930233070510626e-05, "loss": 0.0978, "step": 22502 }, { "epoch": 0.4958601199821514, "grad_norm": 0.6253893971443176, "learning_rate": 1.5929164593506724e-05, "loss": 0.0839, "step": 22503 }, { "epoch": 0.49588215527166757, "grad_norm": 0.575320839881897, "learning_rate": 1.592809611177009e-05, "loss": 0.0875, "step": 22504 }, { "epoch": 0.49590419056118373, "grad_norm": 0.7386876940727234, "learning_rate": 1.592702762530617e-05, "loss": 0.0906, "step": 22505 }, { "epoch": 0.4959262258506999, "grad_norm": 1.116025447845459, "learning_rate": 1.592595913412039e-05, "loss": 0.1144, "step": 22506 }, { "epoch": 0.49594826114021606, "grad_norm": 1.1184754371643066, "learning_rate": 1.592489063821821e-05, "loss": 0.0967, "step": 22507 }, { "epoch": 0.4959702964297322, "grad_norm": 0.7806857824325562, "learning_rate": 1.592382213760507e-05, "loss": 0.1011, "step": 22508 }, { "epoch": 0.4959923317192484, "grad_norm": 0.6129628419876099, "learning_rate": 1.5922753632286406e-05, "loss": 0.0897, "step": 22509 }, { "epoch": 0.49601436700876456, "grad_norm": 0.653958797454834, "learning_rate": 1.5921685122267657e-05, "loss": 0.1156, "step": 22510 }, { "epoch": 0.4960364022982807, "grad_norm": 0.7825790047645569, "learning_rate": 1.592061660755428e-05, "loss": 0.0706, "step": 22511 }, { "epoch": 0.49605843758779683, "grad_norm": 0.6283257007598877, "learning_rate": 1.5919548088151698e-05, "loss": 0.095, "step": 22512 }, { "epoch": 0.496080472877313, "grad_norm": 0.6401978135108948, "learning_rate": 1.591847956406537e-05, "loss": 0.0884, "step": 22513 }, { "epoch": 0.49610250816682916, "grad_norm": 0.4284697473049164, "learning_rate": 1.5917411035300736e-05, "loss": 0.0553, "step": 22514 }, { "epoch": 0.4961245434563453, "grad_norm": 0.47637346386909485, "learning_rate": 1.5916342501863234e-05, "loss": 0.0597, "step": 22515 }, { "epoch": 0.4961465787458615, "grad_norm": 0.7485055327415466, "learning_rate": 1.591527396375831e-05, "loss": 0.073, "step": 22516 }, { "epoch": 0.49616861403537765, "grad_norm": 0.8966975212097168, "learning_rate": 1.59142054209914e-05, "loss": 0.0911, "step": 22517 }, { "epoch": 0.4961906493248938, "grad_norm": 0.47613686323165894, "learning_rate": 1.5913136873567955e-05, "loss": 0.0558, "step": 22518 }, { "epoch": 0.49621268461441, "grad_norm": 0.435981810092926, "learning_rate": 1.5912068321493414e-05, "loss": 0.0885, "step": 22519 }, { "epoch": 0.49623471990392615, "grad_norm": 0.8272391557693481, "learning_rate": 1.591099976477322e-05, "loss": 0.0771, "step": 22520 }, { "epoch": 0.4962567551934423, "grad_norm": 0.7425220012664795, "learning_rate": 1.5909931203412814e-05, "loss": 0.098, "step": 22521 }, { "epoch": 0.4962787904829585, "grad_norm": 0.6819807291030884, "learning_rate": 1.590886263741764e-05, "loss": 0.0967, "step": 22522 }, { "epoch": 0.49630082577247464, "grad_norm": 0.6834820508956909, "learning_rate": 1.5907794066793146e-05, "loss": 0.0784, "step": 22523 }, { "epoch": 0.4963228610619908, "grad_norm": 0.9578232169151306, "learning_rate": 1.590672549154477e-05, "loss": 0.0696, "step": 22524 }, { "epoch": 0.4963448963515069, "grad_norm": 0.6847259402275085, "learning_rate": 1.590565691167795e-05, "loss": 0.117, "step": 22525 }, { "epoch": 0.4963669316410231, "grad_norm": 0.32728973031044006, "learning_rate": 1.5904588327198135e-05, "loss": 0.0769, "step": 22526 }, { "epoch": 0.49638896693053924, "grad_norm": 0.45919808745384216, "learning_rate": 1.5903519738110764e-05, "loss": 0.0759, "step": 22527 }, { "epoch": 0.4964110022200554, "grad_norm": 0.698712944984436, "learning_rate": 1.5902451144421288e-05, "loss": 0.0778, "step": 22528 }, { "epoch": 0.4964330375095716, "grad_norm": 0.6054946780204773, "learning_rate": 1.590138254613514e-05, "loss": 0.0869, "step": 22529 }, { "epoch": 0.49645507279908774, "grad_norm": 0.5128828883171082, "learning_rate": 1.590031394325777e-05, "loss": 0.0825, "step": 22530 }, { "epoch": 0.4964771080886039, "grad_norm": 0.5057942867279053, "learning_rate": 1.589924533579462e-05, "loss": 0.0504, "step": 22531 }, { "epoch": 0.49649914337812007, "grad_norm": 0.7966471910476685, "learning_rate": 1.5898176723751128e-05, "loss": 0.0869, "step": 22532 }, { "epoch": 0.49652117866763623, "grad_norm": 0.6915633678436279, "learning_rate": 1.589710810713274e-05, "loss": 0.0988, "step": 22533 }, { "epoch": 0.4965432139571524, "grad_norm": 0.444663405418396, "learning_rate": 1.58960394859449e-05, "loss": 0.0686, "step": 22534 }, { "epoch": 0.49656524924666856, "grad_norm": 0.7537934184074402, "learning_rate": 1.589497086019305e-05, "loss": 0.0794, "step": 22535 }, { "epoch": 0.4965872845361847, "grad_norm": 0.9821959137916565, "learning_rate": 1.5893902229882634e-05, "loss": 0.0752, "step": 22536 }, { "epoch": 0.49660931982570083, "grad_norm": 0.9306247234344482, "learning_rate": 1.5892833595019095e-05, "loss": 0.0907, "step": 22537 }, { "epoch": 0.496631355115217, "grad_norm": 0.6639814376831055, "learning_rate": 1.5891764955607878e-05, "loss": 0.1097, "step": 22538 }, { "epoch": 0.49665339040473316, "grad_norm": 0.552836000919342, "learning_rate": 1.5890696311654422e-05, "loss": 0.0685, "step": 22539 }, { "epoch": 0.49667542569424933, "grad_norm": 0.7585369944572449, "learning_rate": 1.588962766316417e-05, "loss": 0.0866, "step": 22540 }, { "epoch": 0.4966974609837655, "grad_norm": 0.4585699439048767, "learning_rate": 1.588855901014257e-05, "loss": 0.0776, "step": 22541 }, { "epoch": 0.49671949627328166, "grad_norm": 0.9757600426673889, "learning_rate": 1.5887490352595058e-05, "loss": 0.0889, "step": 22542 }, { "epoch": 0.4967415315627978, "grad_norm": 0.9149371981620789, "learning_rate": 1.5886421690527088e-05, "loss": 0.0848, "step": 22543 }, { "epoch": 0.496763566852314, "grad_norm": 0.5849915742874146, "learning_rate": 1.588535302394409e-05, "loss": 0.0676, "step": 22544 }, { "epoch": 0.49678560214183015, "grad_norm": 0.68227219581604, "learning_rate": 1.5884284352851523e-05, "loss": 0.0704, "step": 22545 }, { "epoch": 0.4968076374313463, "grad_norm": 0.704494059085846, "learning_rate": 1.5883215677254817e-05, "loss": 0.0934, "step": 22546 }, { "epoch": 0.4968296727208625, "grad_norm": 0.6498862504959106, "learning_rate": 1.588214699715942e-05, "loss": 0.0799, "step": 22547 }, { "epoch": 0.49685170801037865, "grad_norm": 0.6902983784675598, "learning_rate": 1.5881078312570776e-05, "loss": 0.0874, "step": 22548 }, { "epoch": 0.49687374329989475, "grad_norm": 0.7550615668296814, "learning_rate": 1.5880009623494324e-05, "loss": 0.077, "step": 22549 }, { "epoch": 0.4968957785894109, "grad_norm": 1.105411171913147, "learning_rate": 1.5878940929935515e-05, "loss": 0.0719, "step": 22550 }, { "epoch": 0.4969178138789271, "grad_norm": 0.6532039046287537, "learning_rate": 1.587787223189979e-05, "loss": 0.0696, "step": 22551 }, { "epoch": 0.49693984916844325, "grad_norm": 0.7065139412879944, "learning_rate": 1.5876803529392585e-05, "loss": 0.1046, "step": 22552 }, { "epoch": 0.4969618844579594, "grad_norm": 0.7307572364807129, "learning_rate": 1.5875734822419353e-05, "loss": 0.0726, "step": 22553 }, { "epoch": 0.4969839197474756, "grad_norm": 0.6410892605781555, "learning_rate": 1.587466611098553e-05, "loss": 0.0694, "step": 22554 }, { "epoch": 0.49700595503699174, "grad_norm": 0.4620853364467621, "learning_rate": 1.5873597395096572e-05, "loss": 0.067, "step": 22555 }, { "epoch": 0.4970279903265079, "grad_norm": 0.744839072227478, "learning_rate": 1.587252867475791e-05, "loss": 0.0649, "step": 22556 }, { "epoch": 0.49705002561602407, "grad_norm": 0.8739324808120728, "learning_rate": 1.5871459949974992e-05, "loss": 0.1141, "step": 22557 }, { "epoch": 0.49707206090554024, "grad_norm": 0.6181445121765137, "learning_rate": 1.5870391220753258e-05, "loss": 0.0805, "step": 22558 }, { "epoch": 0.4970940961950564, "grad_norm": 0.5183557868003845, "learning_rate": 1.5869322487098152e-05, "loss": 0.0668, "step": 22559 }, { "epoch": 0.49711613148457257, "grad_norm": 0.6104374527931213, "learning_rate": 1.5868253749015128e-05, "loss": 0.106, "step": 22560 }, { "epoch": 0.49713816677408873, "grad_norm": 0.31603002548217773, "learning_rate": 1.5867185006509616e-05, "loss": 0.0579, "step": 22561 }, { "epoch": 0.49716020206360484, "grad_norm": 0.3317141532897949, "learning_rate": 1.586611625958707e-05, "loss": 0.0512, "step": 22562 }, { "epoch": 0.497182237353121, "grad_norm": 0.5394383668899536, "learning_rate": 1.586504750825293e-05, "loss": 0.0602, "step": 22563 }, { "epoch": 0.49720427264263717, "grad_norm": 0.4255112409591675, "learning_rate": 1.5863978752512638e-05, "loss": 0.046, "step": 22564 }, { "epoch": 0.49722630793215333, "grad_norm": 0.7399749159812927, "learning_rate": 1.586290999237164e-05, "loss": 0.0616, "step": 22565 }, { "epoch": 0.4972483432216695, "grad_norm": 0.6138855218887329, "learning_rate": 1.5861841227835376e-05, "loss": 0.0814, "step": 22566 }, { "epoch": 0.49727037851118566, "grad_norm": 0.6116283535957336, "learning_rate": 1.5860772458909294e-05, "loss": 0.0995, "step": 22567 }, { "epoch": 0.4972924138007018, "grad_norm": 0.7235848903656006, "learning_rate": 1.5859703685598837e-05, "loss": 0.0741, "step": 22568 }, { "epoch": 0.497314449090218, "grad_norm": 0.46695536375045776, "learning_rate": 1.5858634907909443e-05, "loss": 0.0924, "step": 22569 }, { "epoch": 0.49733648437973416, "grad_norm": 1.0196537971496582, "learning_rate": 1.5857566125846567e-05, "loss": 0.0749, "step": 22570 }, { "epoch": 0.4973585196692503, "grad_norm": 0.7335901260375977, "learning_rate": 1.5856497339415648e-05, "loss": 0.0694, "step": 22571 }, { "epoch": 0.4973805549587665, "grad_norm": 0.7758324146270752, "learning_rate": 1.5855428548622126e-05, "loss": 0.092, "step": 22572 }, { "epoch": 0.49740259024828265, "grad_norm": 0.44922420382499695, "learning_rate": 1.5854359753471446e-05, "loss": 0.0591, "step": 22573 }, { "epoch": 0.49742462553779876, "grad_norm": 0.6362043023109436, "learning_rate": 1.5853290953969053e-05, "loss": 0.0987, "step": 22574 }, { "epoch": 0.4974466608273149, "grad_norm": 0.6539222598075867, "learning_rate": 1.5852222150120392e-05, "loss": 0.0654, "step": 22575 }, { "epoch": 0.4974686961168311, "grad_norm": 0.8463866114616394, "learning_rate": 1.585115334193091e-05, "loss": 0.1199, "step": 22576 }, { "epoch": 0.49749073140634725, "grad_norm": 0.5479122400283813, "learning_rate": 1.5850084529406043e-05, "loss": 0.0938, "step": 22577 }, { "epoch": 0.4975127666958634, "grad_norm": 1.0367283821105957, "learning_rate": 1.5849015712551245e-05, "loss": 0.0705, "step": 22578 }, { "epoch": 0.4975348019853796, "grad_norm": 0.6132524609565735, "learning_rate": 1.5847946891371954e-05, "loss": 0.1, "step": 22579 }, { "epoch": 0.49755683727489575, "grad_norm": 0.5035528540611267, "learning_rate": 1.5846878065873612e-05, "loss": 0.0799, "step": 22580 }, { "epoch": 0.4975788725644119, "grad_norm": 0.6604532599449158, "learning_rate": 1.5845809236061667e-05, "loss": 0.0743, "step": 22581 }, { "epoch": 0.4976009078539281, "grad_norm": 0.7286449670791626, "learning_rate": 1.584474040194156e-05, "loss": 0.0834, "step": 22582 }, { "epoch": 0.49762294314344424, "grad_norm": 0.5691867470741272, "learning_rate": 1.584367156351874e-05, "loss": 0.0995, "step": 22583 }, { "epoch": 0.4976449784329604, "grad_norm": 0.8489865064620972, "learning_rate": 1.5842602720798643e-05, "loss": 0.076, "step": 22584 }, { "epoch": 0.49766701372247657, "grad_norm": 0.8119446039199829, "learning_rate": 1.5841533873786728e-05, "loss": 0.0713, "step": 22585 }, { "epoch": 0.4976890490119927, "grad_norm": 0.5331999063491821, "learning_rate": 1.5840465022488423e-05, "loss": 0.0561, "step": 22586 }, { "epoch": 0.49771108430150884, "grad_norm": 0.9701763987541199, "learning_rate": 1.5839396166909182e-05, "loss": 0.0866, "step": 22587 }, { "epoch": 0.497733119591025, "grad_norm": 0.8698695302009583, "learning_rate": 1.5838327307054442e-05, "loss": 0.0868, "step": 22588 }, { "epoch": 0.4977551548805412, "grad_norm": 0.7450476288795471, "learning_rate": 1.5837258442929656e-05, "loss": 0.089, "step": 22589 }, { "epoch": 0.49777719017005734, "grad_norm": 0.5497557520866394, "learning_rate": 1.5836189574540262e-05, "loss": 0.0733, "step": 22590 }, { "epoch": 0.4977992254595735, "grad_norm": 0.9602679014205933, "learning_rate": 1.5835120701891705e-05, "loss": 0.097, "step": 22591 }, { "epoch": 0.49782126074908967, "grad_norm": 0.5048753023147583, "learning_rate": 1.5834051824989427e-05, "loss": 0.0677, "step": 22592 }, { "epoch": 0.49784329603860583, "grad_norm": 0.5534718036651611, "learning_rate": 1.5832982943838886e-05, "loss": 0.0537, "step": 22593 }, { "epoch": 0.497865331328122, "grad_norm": 0.9140766859054565, "learning_rate": 1.583191405844551e-05, "loss": 0.0774, "step": 22594 }, { "epoch": 0.49788736661763816, "grad_norm": 1.0239667892456055, "learning_rate": 1.5830845168814748e-05, "loss": 0.1008, "step": 22595 }, { "epoch": 0.4979094019071543, "grad_norm": 0.6986075639724731, "learning_rate": 1.5829776274952048e-05, "loss": 0.0679, "step": 22596 }, { "epoch": 0.4979314371966705, "grad_norm": 0.570419430732727, "learning_rate": 1.5828707376862853e-05, "loss": 0.0698, "step": 22597 }, { "epoch": 0.49795347248618665, "grad_norm": 0.4732744097709656, "learning_rate": 1.5827638474552605e-05, "loss": 0.0806, "step": 22598 }, { "epoch": 0.49797550777570276, "grad_norm": 0.903961718082428, "learning_rate": 1.5826569568026756e-05, "loss": 0.101, "step": 22599 }, { "epoch": 0.49799754306521893, "grad_norm": 0.7795261740684509, "learning_rate": 1.582550065729074e-05, "loss": 0.0837, "step": 22600 }, { "epoch": 0.4980195783547351, "grad_norm": 1.0146515369415283, "learning_rate": 1.5824431742350004e-05, "loss": 0.0801, "step": 22601 }, { "epoch": 0.49804161364425126, "grad_norm": 0.5475738644599915, "learning_rate": 1.5823362823209998e-05, "loss": 0.0834, "step": 22602 }, { "epoch": 0.4980636489337674, "grad_norm": 0.8774673938751221, "learning_rate": 1.5822293899876165e-05, "loss": 0.0857, "step": 22603 }, { "epoch": 0.4980856842232836, "grad_norm": 0.44113433361053467, "learning_rate": 1.5821224972353948e-05, "loss": 0.0757, "step": 22604 }, { "epoch": 0.49810771951279975, "grad_norm": 0.5460522770881653, "learning_rate": 1.582015604064879e-05, "loss": 0.0831, "step": 22605 }, { "epoch": 0.4981297548023159, "grad_norm": 0.9459829330444336, "learning_rate": 1.581908710476614e-05, "loss": 0.0558, "step": 22606 }, { "epoch": 0.4981517900918321, "grad_norm": 0.7425414323806763, "learning_rate": 1.5818018164711434e-05, "loss": 0.0649, "step": 22607 }, { "epoch": 0.49817382538134825, "grad_norm": 0.6051685810089111, "learning_rate": 1.581694922049013e-05, "loss": 0.072, "step": 22608 }, { "epoch": 0.4981958606708644, "grad_norm": 0.6683171391487122, "learning_rate": 1.581588027210766e-05, "loss": 0.0749, "step": 22609 }, { "epoch": 0.4982178959603806, "grad_norm": 0.4851173162460327, "learning_rate": 1.5814811319569485e-05, "loss": 0.0837, "step": 22610 }, { "epoch": 0.4982399312498967, "grad_norm": 0.7185255885124207, "learning_rate": 1.581374236288103e-05, "loss": 0.0726, "step": 22611 }, { "epoch": 0.49826196653941285, "grad_norm": 0.5103797912597656, "learning_rate": 1.5812673402047747e-05, "loss": 0.07, "step": 22612 }, { "epoch": 0.498284001828929, "grad_norm": 0.9868446588516235, "learning_rate": 1.5811604437075086e-05, "loss": 0.0867, "step": 22613 }, { "epoch": 0.4983060371184452, "grad_norm": 0.502289354801178, "learning_rate": 1.581053546796849e-05, "loss": 0.0745, "step": 22614 }, { "epoch": 0.49832807240796134, "grad_norm": 0.5220031142234802, "learning_rate": 1.5809466494733396e-05, "loss": 0.0726, "step": 22615 }, { "epoch": 0.4983501076974775, "grad_norm": 0.4415467381477356, "learning_rate": 1.580839751737526e-05, "loss": 0.0773, "step": 22616 }, { "epoch": 0.49837214298699367, "grad_norm": 0.4478374719619751, "learning_rate": 1.580732853589952e-05, "loss": 0.0931, "step": 22617 }, { "epoch": 0.49839417827650984, "grad_norm": 0.7425403594970703, "learning_rate": 1.5806259550311623e-05, "loss": 0.1076, "step": 22618 }, { "epoch": 0.498416213566026, "grad_norm": 0.7481151819229126, "learning_rate": 1.5805190560617018e-05, "loss": 0.0782, "step": 22619 }, { "epoch": 0.49843824885554217, "grad_norm": 0.6493238806724548, "learning_rate": 1.5804121566821144e-05, "loss": 0.0637, "step": 22620 }, { "epoch": 0.49846028414505833, "grad_norm": 0.6062812805175781, "learning_rate": 1.5803052568929445e-05, "loss": 0.0753, "step": 22621 }, { "epoch": 0.4984823194345745, "grad_norm": 0.5836787223815918, "learning_rate": 1.580198356694737e-05, "loss": 0.0733, "step": 22622 }, { "epoch": 0.4985043547240906, "grad_norm": 0.5726331472396851, "learning_rate": 1.5800914560880364e-05, "loss": 0.0548, "step": 22623 }, { "epoch": 0.49852639001360677, "grad_norm": 0.5547634363174438, "learning_rate": 1.579984555073387e-05, "loss": 0.0639, "step": 22624 }, { "epoch": 0.49854842530312293, "grad_norm": 0.5530779957771301, "learning_rate": 1.5798776536513328e-05, "loss": 0.078, "step": 22625 }, { "epoch": 0.4985704605926391, "grad_norm": 0.4555148482322693, "learning_rate": 1.57977075182242e-05, "loss": 0.0595, "step": 22626 }, { "epoch": 0.49859249588215526, "grad_norm": 0.6903082728385925, "learning_rate": 1.579663849587191e-05, "loss": 0.0899, "step": 22627 }, { "epoch": 0.4986145311716714, "grad_norm": 0.6948409080505371, "learning_rate": 1.5795569469461916e-05, "loss": 0.0599, "step": 22628 }, { "epoch": 0.4986365664611876, "grad_norm": 0.7347464561462402, "learning_rate": 1.579450043899966e-05, "loss": 0.0849, "step": 22629 }, { "epoch": 0.49865860175070376, "grad_norm": 0.6973076462745667, "learning_rate": 1.5793431404490587e-05, "loss": 0.097, "step": 22630 }, { "epoch": 0.4986806370402199, "grad_norm": 0.6904201507568359, "learning_rate": 1.5792362365940145e-05, "loss": 0.0627, "step": 22631 }, { "epoch": 0.4987026723297361, "grad_norm": 0.858462929725647, "learning_rate": 1.579129332335377e-05, "loss": 0.0417, "step": 22632 }, { "epoch": 0.49872470761925225, "grad_norm": 0.6162169575691223, "learning_rate": 1.579022427673692e-05, "loss": 0.0711, "step": 22633 }, { "epoch": 0.4987467429087684, "grad_norm": 0.6924610733985901, "learning_rate": 1.5789155226095033e-05, "loss": 0.0693, "step": 22634 }, { "epoch": 0.4987687781982846, "grad_norm": 0.6078591346740723, "learning_rate": 1.5788086171433557e-05, "loss": 0.0584, "step": 22635 }, { "epoch": 0.4987908134878007, "grad_norm": 0.5936557650566101, "learning_rate": 1.578701711275793e-05, "loss": 0.0894, "step": 22636 }, { "epoch": 0.49881284877731685, "grad_norm": 0.7936655282974243, "learning_rate": 1.578594805007361e-05, "loss": 0.0946, "step": 22637 }, { "epoch": 0.498834884066833, "grad_norm": 0.8095375895500183, "learning_rate": 1.5784878983386026e-05, "loss": 0.1301, "step": 22638 }, { "epoch": 0.4988569193563492, "grad_norm": 0.49386778473854065, "learning_rate": 1.578380991270064e-05, "loss": 0.0576, "step": 22639 }, { "epoch": 0.49887895464586535, "grad_norm": 0.8078412413597107, "learning_rate": 1.5782740838022885e-05, "loss": 0.1073, "step": 22640 }, { "epoch": 0.4989009899353815, "grad_norm": 0.5119258761405945, "learning_rate": 1.5781671759358216e-05, "loss": 0.0759, "step": 22641 }, { "epoch": 0.4989230252248977, "grad_norm": 0.516811728477478, "learning_rate": 1.5780602676712075e-05, "loss": 0.0464, "step": 22642 }, { "epoch": 0.49894506051441384, "grad_norm": 0.9277639985084534, "learning_rate": 1.57795335900899e-05, "loss": 0.0736, "step": 22643 }, { "epoch": 0.49896709580393, "grad_norm": 0.5892415046691895, "learning_rate": 1.5778464499497145e-05, "loss": 0.0691, "step": 22644 }, { "epoch": 0.49898913109344617, "grad_norm": 0.41719380021095276, "learning_rate": 1.577739540493925e-05, "loss": 0.0985, "step": 22645 }, { "epoch": 0.49901116638296233, "grad_norm": 0.6461089849472046, "learning_rate": 1.5776326306421664e-05, "loss": 0.1163, "step": 22646 }, { "epoch": 0.4990332016724785, "grad_norm": 0.7129507660865784, "learning_rate": 1.5775257203949833e-05, "loss": 0.0692, "step": 22647 }, { "epoch": 0.4990552369619946, "grad_norm": 0.47067466378211975, "learning_rate": 1.5774188097529202e-05, "loss": 0.073, "step": 22648 }, { "epoch": 0.4990772722515108, "grad_norm": 1.0425543785095215, "learning_rate": 1.577311898716522e-05, "loss": 0.1024, "step": 22649 }, { "epoch": 0.49909930754102694, "grad_norm": 0.735419511795044, "learning_rate": 1.5772049872863324e-05, "loss": 0.0977, "step": 22650 }, { "epoch": 0.4991213428305431, "grad_norm": 0.6119177341461182, "learning_rate": 1.577098075462896e-05, "loss": 0.0505, "step": 22651 }, { "epoch": 0.49914337812005927, "grad_norm": 0.7956892251968384, "learning_rate": 1.5769911632467585e-05, "loss": 0.0682, "step": 22652 }, { "epoch": 0.49916541340957543, "grad_norm": 0.6153674721717834, "learning_rate": 1.576884250638463e-05, "loss": 0.0778, "step": 22653 }, { "epoch": 0.4991874486990916, "grad_norm": 0.5010301470756531, "learning_rate": 1.5767773376385556e-05, "loss": 0.0786, "step": 22654 }, { "epoch": 0.49920948398860776, "grad_norm": 0.6960654258728027, "learning_rate": 1.5766704242475792e-05, "loss": 0.0744, "step": 22655 }, { "epoch": 0.4992315192781239, "grad_norm": 0.8174468278884888, "learning_rate": 1.57656351046608e-05, "loss": 0.0845, "step": 22656 }, { "epoch": 0.4992535545676401, "grad_norm": 0.773748517036438, "learning_rate": 1.5764565962946014e-05, "loss": 0.0841, "step": 22657 }, { "epoch": 0.49927558985715625, "grad_norm": 0.8719984292984009, "learning_rate": 1.576349681733688e-05, "loss": 0.0662, "step": 22658 }, { "epoch": 0.4992976251466724, "grad_norm": 0.7357733249664307, "learning_rate": 1.5762427667838852e-05, "loss": 0.0607, "step": 22659 }, { "epoch": 0.49931966043618853, "grad_norm": 0.5092818140983582, "learning_rate": 1.5761358514457373e-05, "loss": 0.0843, "step": 22660 }, { "epoch": 0.4993416957257047, "grad_norm": 0.6106587648391724, "learning_rate": 1.5760289357197882e-05, "loss": 0.0443, "step": 22661 }, { "epoch": 0.49936373101522086, "grad_norm": 0.6890721321105957, "learning_rate": 1.575922019606583e-05, "loss": 0.0669, "step": 22662 }, { "epoch": 0.499385766304737, "grad_norm": 0.9879788160324097, "learning_rate": 1.575815103106666e-05, "loss": 0.1065, "step": 22663 }, { "epoch": 0.4994078015942532, "grad_norm": 0.5236446857452393, "learning_rate": 1.575708186220583e-05, "loss": 0.0631, "step": 22664 }, { "epoch": 0.49942983688376935, "grad_norm": 0.8422370553016663, "learning_rate": 1.5756012689488765e-05, "loss": 0.1008, "step": 22665 }, { "epoch": 0.4994518721732855, "grad_norm": 0.3633711040019989, "learning_rate": 1.5754943512920935e-05, "loss": 0.0598, "step": 22666 }, { "epoch": 0.4994739074628017, "grad_norm": 0.8351005911827087, "learning_rate": 1.575387433250776e-05, "loss": 0.062, "step": 22667 }, { "epoch": 0.49949594275231785, "grad_norm": 0.48719120025634766, "learning_rate": 1.575280514825471e-05, "loss": 0.1074, "step": 22668 }, { "epoch": 0.499517978041834, "grad_norm": 1.204460620880127, "learning_rate": 1.575173596016721e-05, "loss": 0.127, "step": 22669 }, { "epoch": 0.4995400133313502, "grad_norm": 0.5809881091117859, "learning_rate": 1.5750666768250716e-05, "loss": 0.089, "step": 22670 }, { "epoch": 0.49956204862086634, "grad_norm": 0.5631718039512634, "learning_rate": 1.5749597572510678e-05, "loss": 0.0845, "step": 22671 }, { "epoch": 0.4995840839103825, "grad_norm": 1.0293210744857788, "learning_rate": 1.5748528372952537e-05, "loss": 0.0886, "step": 22672 }, { "epoch": 0.4996061191998986, "grad_norm": 0.3245700001716614, "learning_rate": 1.5747459169581738e-05, "loss": 0.0441, "step": 22673 }, { "epoch": 0.4996281544894148, "grad_norm": 0.6416283249855042, "learning_rate": 1.5746389962403733e-05, "loss": 0.0818, "step": 22674 }, { "epoch": 0.49965018977893094, "grad_norm": 0.9654818773269653, "learning_rate": 1.574532075142396e-05, "loss": 0.0715, "step": 22675 }, { "epoch": 0.4996722250684471, "grad_norm": 0.577239453792572, "learning_rate": 1.5744251536647868e-05, "loss": 0.083, "step": 22676 }, { "epoch": 0.49969426035796327, "grad_norm": 0.9459485411643982, "learning_rate": 1.5743182318080905e-05, "loss": 0.1255, "step": 22677 }, { "epoch": 0.49971629564747944, "grad_norm": 0.5455735921859741, "learning_rate": 1.5742113095728515e-05, "loss": 0.0752, "step": 22678 }, { "epoch": 0.4997383309369956, "grad_norm": 0.7791998982429504, "learning_rate": 1.5741043869596148e-05, "loss": 0.0664, "step": 22679 }, { "epoch": 0.49976036622651177, "grad_norm": 0.7753355503082275, "learning_rate": 1.573997463968924e-05, "loss": 0.0689, "step": 22680 }, { "epoch": 0.49978240151602793, "grad_norm": 0.39114466309547424, "learning_rate": 1.5738905406013252e-05, "loss": 0.0728, "step": 22681 }, { "epoch": 0.4998044368055441, "grad_norm": 0.6315441727638245, "learning_rate": 1.573783616857362e-05, "loss": 0.0664, "step": 22682 }, { "epoch": 0.49982647209506026, "grad_norm": 1.003084659576416, "learning_rate": 1.5736766927375796e-05, "loss": 0.0973, "step": 22683 }, { "epoch": 0.4998485073845764, "grad_norm": 0.49177443981170654, "learning_rate": 1.5735697682425217e-05, "loss": 0.0837, "step": 22684 }, { "epoch": 0.49987054267409253, "grad_norm": 0.6649188995361328, "learning_rate": 1.573462843372734e-05, "loss": 0.0732, "step": 22685 }, { "epoch": 0.4998925779636087, "grad_norm": 0.7286300659179688, "learning_rate": 1.57335591812876e-05, "loss": 0.0731, "step": 22686 }, { "epoch": 0.49991461325312486, "grad_norm": 0.5699769854545593, "learning_rate": 1.5732489925111457e-05, "loss": 0.0678, "step": 22687 }, { "epoch": 0.499936648542641, "grad_norm": 0.9247300624847412, "learning_rate": 1.5731420665204343e-05, "loss": 0.0723, "step": 22688 }, { "epoch": 0.4999586838321572, "grad_norm": 0.38837122917175293, "learning_rate": 1.5730351401571717e-05, "loss": 0.082, "step": 22689 }, { "epoch": 0.49998071912167336, "grad_norm": 0.9357825517654419, "learning_rate": 1.572928213421902e-05, "loss": 0.0657, "step": 22690 }, { "epoch": 0.5000027544111895, "grad_norm": 1.2776453495025635, "learning_rate": 1.57282128631517e-05, "loss": 0.0878, "step": 22691 }, { "epoch": 0.5000247897007056, "grad_norm": 0.8339084982872009, "learning_rate": 1.5727143588375192e-05, "loss": 0.065, "step": 22692 }, { "epoch": 0.5000468249902218, "grad_norm": 0.5033590197563171, "learning_rate": 1.5726074309894956e-05, "loss": 0.0655, "step": 22693 }, { "epoch": 0.500068860279738, "grad_norm": 0.748437762260437, "learning_rate": 1.5725005027716436e-05, "loss": 0.0844, "step": 22694 }, { "epoch": 0.5000908955692541, "grad_norm": 0.5383591055870056, "learning_rate": 1.5723935741845075e-05, "loss": 0.0435, "step": 22695 }, { "epoch": 0.5001129308587703, "grad_norm": 0.526664674282074, "learning_rate": 1.5722866452286322e-05, "loss": 0.0636, "step": 22696 }, { "epoch": 0.5001349661482865, "grad_norm": 0.5697662234306335, "learning_rate": 1.5721797159045626e-05, "loss": 0.09, "step": 22697 }, { "epoch": 0.5001570014378026, "grad_norm": 0.5157052278518677, "learning_rate": 1.5720727862128427e-05, "loss": 0.0714, "step": 22698 }, { "epoch": 0.5001790367273188, "grad_norm": 1.0005245208740234, "learning_rate": 1.5719658561540174e-05, "loss": 0.0763, "step": 22699 }, { "epoch": 0.500201072016835, "grad_norm": 0.7799795269966125, "learning_rate": 1.5718589257286314e-05, "loss": 0.1287, "step": 22700 }, { "epoch": 0.5002231073063511, "grad_norm": 0.58543860912323, "learning_rate": 1.5717519949372293e-05, "loss": 0.0895, "step": 22701 }, { "epoch": 0.5002451425958673, "grad_norm": 0.6238034963607788, "learning_rate": 1.571645063780356e-05, "loss": 0.1036, "step": 22702 }, { "epoch": 0.5002671778853834, "grad_norm": 0.8515539169311523, "learning_rate": 1.5715381322585556e-05, "loss": 0.1163, "step": 22703 }, { "epoch": 0.5002892131748996, "grad_norm": 0.4520154595375061, "learning_rate": 1.5714312003723736e-05, "loss": 0.0963, "step": 22704 }, { "epoch": 0.5003112484644158, "grad_norm": 0.39260122179985046, "learning_rate": 1.571324268122354e-05, "loss": 0.0824, "step": 22705 }, { "epoch": 0.5003332837539319, "grad_norm": 0.5301716327667236, "learning_rate": 1.571217335509042e-05, "loss": 0.1066, "step": 22706 }, { "epoch": 0.5003553190434481, "grad_norm": 0.6583189368247986, "learning_rate": 1.571110402532981e-05, "loss": 0.1037, "step": 22707 }, { "epoch": 0.5003773543329643, "grad_norm": 0.8142934441566467, "learning_rate": 1.5710034691947174e-05, "loss": 0.0823, "step": 22708 }, { "epoch": 0.5003993896224804, "grad_norm": 0.5645349025726318, "learning_rate": 1.5708965354947946e-05, "loss": 0.0862, "step": 22709 }, { "epoch": 0.5004214249119966, "grad_norm": 0.7005075812339783, "learning_rate": 1.570789601433758e-05, "loss": 0.1007, "step": 22710 }, { "epoch": 0.5004434602015128, "grad_norm": 0.9358555674552917, "learning_rate": 1.5706826670121515e-05, "loss": 0.0888, "step": 22711 }, { "epoch": 0.5004654954910289, "grad_norm": 0.5066126585006714, "learning_rate": 1.570575732230521e-05, "loss": 0.0685, "step": 22712 }, { "epoch": 0.5004875307805451, "grad_norm": 0.5504778623580933, "learning_rate": 1.5704687970894104e-05, "loss": 0.0691, "step": 22713 }, { "epoch": 0.5005095660700613, "grad_norm": 0.532314658164978, "learning_rate": 1.570361861589364e-05, "loss": 0.0719, "step": 22714 }, { "epoch": 0.5005316013595774, "grad_norm": 0.3700786232948303, "learning_rate": 1.570254925730927e-05, "loss": 0.0787, "step": 22715 }, { "epoch": 0.5005536366490935, "grad_norm": 0.6252123713493347, "learning_rate": 1.570147989514644e-05, "loss": 0.0711, "step": 22716 }, { "epoch": 0.5005756719386096, "grad_norm": 0.6709398627281189, "learning_rate": 1.57004105294106e-05, "loss": 0.099, "step": 22717 }, { "epoch": 0.5005977072281258, "grad_norm": 0.6240414381027222, "learning_rate": 1.5699341160107187e-05, "loss": 0.0899, "step": 22718 }, { "epoch": 0.500619742517642, "grad_norm": 0.35281088948249817, "learning_rate": 1.569827178724166e-05, "loss": 0.0776, "step": 22719 }, { "epoch": 0.5006417778071581, "grad_norm": 0.9046418070793152, "learning_rate": 1.5697202410819455e-05, "loss": 0.0795, "step": 22720 }, { "epoch": 0.5006638130966743, "grad_norm": 0.47461724281311035, "learning_rate": 1.5696133030846034e-05, "loss": 0.0597, "step": 22721 }, { "epoch": 0.5006858483861905, "grad_norm": 0.7168700695037842, "learning_rate": 1.5695063647326827e-05, "loss": 0.0938, "step": 22722 }, { "epoch": 0.5007078836757066, "grad_norm": 0.45597630739212036, "learning_rate": 1.569399426026729e-05, "loss": 0.0691, "step": 22723 }, { "epoch": 0.5007299189652228, "grad_norm": 0.5549359917640686, "learning_rate": 1.5692924869672863e-05, "loss": 0.0509, "step": 22724 }, { "epoch": 0.500751954254739, "grad_norm": 0.6072924733161926, "learning_rate": 1.5691855475549008e-05, "loss": 0.0768, "step": 22725 }, { "epoch": 0.5007739895442551, "grad_norm": 1.0174965858459473, "learning_rate": 1.5690786077901153e-05, "loss": 0.0898, "step": 22726 }, { "epoch": 0.5007960248337713, "grad_norm": 0.613601565361023, "learning_rate": 1.5689716676734758e-05, "loss": 0.0768, "step": 22727 }, { "epoch": 0.5008180601232874, "grad_norm": 0.7244788408279419, "learning_rate": 1.5688647272055266e-05, "loss": 0.0841, "step": 22728 }, { "epoch": 0.5008400954128036, "grad_norm": 0.5437216758728027, "learning_rate": 1.5687577863868126e-05, "loss": 0.0879, "step": 22729 }, { "epoch": 0.5008621307023198, "grad_norm": 0.477291464805603, "learning_rate": 1.5686508452178783e-05, "loss": 0.0589, "step": 22730 }, { "epoch": 0.5008841659918359, "grad_norm": 0.4117105305194855, "learning_rate": 1.5685439036992684e-05, "loss": 0.0893, "step": 22731 }, { "epoch": 0.5009062012813521, "grad_norm": 0.7026206851005554, "learning_rate": 1.5684369618315268e-05, "loss": 0.1054, "step": 22732 }, { "epoch": 0.5009282365708683, "grad_norm": 0.6639347672462463, "learning_rate": 1.5683300196152003e-05, "loss": 0.0715, "step": 22733 }, { "epoch": 0.5009502718603844, "grad_norm": 0.5158440470695496, "learning_rate": 1.5682230770508314e-05, "loss": 0.0605, "step": 22734 }, { "epoch": 0.5009723071499006, "grad_norm": 0.746627151966095, "learning_rate": 1.5681161341389665e-05, "loss": 0.0908, "step": 22735 }, { "epoch": 0.5009943424394168, "grad_norm": 0.6032345294952393, "learning_rate": 1.5680091908801488e-05, "loss": 0.1084, "step": 22736 }, { "epoch": 0.5010163777289329, "grad_norm": 0.7918092012405396, "learning_rate": 1.5679022472749248e-05, "loss": 0.0965, "step": 22737 }, { "epoch": 0.5010384130184491, "grad_norm": 0.6853142976760864, "learning_rate": 1.5677953033238375e-05, "loss": 0.0722, "step": 22738 }, { "epoch": 0.5010604483079653, "grad_norm": 0.4373256266117096, "learning_rate": 1.5676883590274326e-05, "loss": 0.0723, "step": 22739 }, { "epoch": 0.5010824835974814, "grad_norm": 0.633966326713562, "learning_rate": 1.567581414386255e-05, "loss": 0.0593, "step": 22740 }, { "epoch": 0.5011045188869975, "grad_norm": 0.6791214346885681, "learning_rate": 1.567474469400848e-05, "loss": 0.142, "step": 22741 }, { "epoch": 0.5011265541765136, "grad_norm": 0.6127809882164001, "learning_rate": 1.5673675240717582e-05, "loss": 0.1002, "step": 22742 }, { "epoch": 0.5011485894660298, "grad_norm": 0.7791149020195007, "learning_rate": 1.567260578399529e-05, "loss": 0.0803, "step": 22743 }, { "epoch": 0.501170624755546, "grad_norm": 0.9934385418891907, "learning_rate": 1.5671536323847065e-05, "loss": 0.0971, "step": 22744 }, { "epoch": 0.5011926600450621, "grad_norm": 0.8105043768882751, "learning_rate": 1.5670466860278336e-05, "loss": 0.0917, "step": 22745 }, { "epoch": 0.5012146953345783, "grad_norm": 0.5374793410301208, "learning_rate": 1.5669397393294563e-05, "loss": 0.0915, "step": 22746 }, { "epoch": 0.5012367306240945, "grad_norm": 0.6031549572944641, "learning_rate": 1.566832792290119e-05, "loss": 0.0687, "step": 22747 }, { "epoch": 0.5012587659136106, "grad_norm": 0.5947824716567993, "learning_rate": 1.5667258449103665e-05, "loss": 0.0752, "step": 22748 }, { "epoch": 0.5012808012031268, "grad_norm": 0.7414289116859436, "learning_rate": 1.5666188971907433e-05, "loss": 0.072, "step": 22749 }, { "epoch": 0.501302836492643, "grad_norm": 0.5164929032325745, "learning_rate": 1.5665119491317946e-05, "loss": 0.1183, "step": 22750 }, { "epoch": 0.5013248717821591, "grad_norm": 0.5499878525733948, "learning_rate": 1.5664050007340646e-05, "loss": 0.0746, "step": 22751 }, { "epoch": 0.5013469070716753, "grad_norm": 0.721983790397644, "learning_rate": 1.5662980519980988e-05, "loss": 0.0503, "step": 22752 }, { "epoch": 0.5013689423611914, "grad_norm": 0.5895379781723022, "learning_rate": 1.5661911029244412e-05, "loss": 0.0865, "step": 22753 }, { "epoch": 0.5013909776507076, "grad_norm": 0.8402085900306702, "learning_rate": 1.5660841535136368e-05, "loss": 0.1035, "step": 22754 }, { "epoch": 0.5014130129402238, "grad_norm": 0.6652764678001404, "learning_rate": 1.5659772037662302e-05, "loss": 0.0686, "step": 22755 }, { "epoch": 0.5014350482297399, "grad_norm": 0.5181528925895691, "learning_rate": 1.5658702536827664e-05, "loss": 0.0618, "step": 22756 }, { "epoch": 0.5014570835192561, "grad_norm": 0.773084282875061, "learning_rate": 1.56576330326379e-05, "loss": 0.0874, "step": 22757 }, { "epoch": 0.5014791188087723, "grad_norm": 0.4627651274204254, "learning_rate": 1.5656563525098456e-05, "loss": 0.0502, "step": 22758 }, { "epoch": 0.5015011540982884, "grad_norm": 0.6748678088188171, "learning_rate": 1.5655494014214788e-05, "loss": 0.0662, "step": 22759 }, { "epoch": 0.5015231893878046, "grad_norm": 0.5721747875213623, "learning_rate": 1.5654424499992337e-05, "loss": 0.0684, "step": 22760 }, { "epoch": 0.5015452246773208, "grad_norm": 0.6408124566078186, "learning_rate": 1.5653354982436547e-05, "loss": 0.0626, "step": 22761 }, { "epoch": 0.5015672599668369, "grad_norm": 0.3960929811000824, "learning_rate": 1.5652285461552873e-05, "loss": 0.0347, "step": 22762 }, { "epoch": 0.5015892952563531, "grad_norm": 0.7870716452598572, "learning_rate": 1.565121593734676e-05, "loss": 0.0592, "step": 22763 }, { "epoch": 0.5016113305458693, "grad_norm": 0.5274310111999512, "learning_rate": 1.5650146409823648e-05, "loss": 0.0543, "step": 22764 }, { "epoch": 0.5016333658353853, "grad_norm": 53.07933807373047, "learning_rate": 1.5649076878988997e-05, "loss": 0.0944, "step": 22765 }, { "epoch": 0.5016554011249015, "grad_norm": 0.8684996962547302, "learning_rate": 1.5648007344848247e-05, "loss": 0.122, "step": 22766 }, { "epoch": 0.5016774364144176, "grad_norm": 1.2990528345108032, "learning_rate": 1.5646937807406854e-05, "loss": 0.1281, "step": 22767 }, { "epoch": 0.5016994717039338, "grad_norm": 1.140921950340271, "learning_rate": 1.564586826667026e-05, "loss": 0.1048, "step": 22768 }, { "epoch": 0.50172150699345, "grad_norm": 0.7557927966117859, "learning_rate": 1.5644798722643907e-05, "loss": 0.0687, "step": 22769 }, { "epoch": 0.5017435422829661, "grad_norm": 0.46425220370292664, "learning_rate": 1.5643729175333245e-05, "loss": 0.0933, "step": 22770 }, { "epoch": 0.5017655775724823, "grad_norm": 1.562450885772705, "learning_rate": 1.5642659624743732e-05, "loss": 0.0601, "step": 22771 }, { "epoch": 0.5017876128619985, "grad_norm": 0.4990595877170563, "learning_rate": 1.5641590070880807e-05, "loss": 0.068, "step": 22772 }, { "epoch": 0.5018096481515146, "grad_norm": 0.5313605070114136, "learning_rate": 1.5640520513749923e-05, "loss": 0.067, "step": 22773 }, { "epoch": 0.5018316834410308, "grad_norm": 0.9225032925605774, "learning_rate": 1.5639450953356516e-05, "loss": 0.0808, "step": 22774 }, { "epoch": 0.501853718730547, "grad_norm": 0.6980651021003723, "learning_rate": 1.563838138970605e-05, "loss": 0.0672, "step": 22775 }, { "epoch": 0.5018757540200631, "grad_norm": 0.6344738602638245, "learning_rate": 1.563731182280396e-05, "loss": 0.0822, "step": 22776 }, { "epoch": 0.5018977893095793, "grad_norm": 0.5413287281990051, "learning_rate": 1.5636242252655704e-05, "loss": 0.0806, "step": 22777 }, { "epoch": 0.5019198245990955, "grad_norm": 0.5818551182746887, "learning_rate": 1.5635172679266728e-05, "loss": 0.0726, "step": 22778 }, { "epoch": 0.5019418598886116, "grad_norm": 0.43772757053375244, "learning_rate": 1.563410310264247e-05, "loss": 0.0452, "step": 22779 }, { "epoch": 0.5019638951781278, "grad_norm": 0.7176340222358704, "learning_rate": 1.5633033522788387e-05, "loss": 0.0714, "step": 22780 }, { "epoch": 0.501985930467644, "grad_norm": 0.6068233251571655, "learning_rate": 1.563196393970992e-05, "loss": 0.0847, "step": 22781 }, { "epoch": 0.5020079657571601, "grad_norm": 0.5838596820831299, "learning_rate": 1.563089435341253e-05, "loss": 0.0916, "step": 22782 }, { "epoch": 0.5020300010466763, "grad_norm": 0.8864414095878601, "learning_rate": 1.562982476390165e-05, "loss": 0.0974, "step": 22783 }, { "epoch": 0.5020520363361924, "grad_norm": 0.7456751465797424, "learning_rate": 1.562875517118274e-05, "loss": 0.065, "step": 22784 }, { "epoch": 0.5020740716257086, "grad_norm": 0.7707659006118774, "learning_rate": 1.562768557526124e-05, "loss": 0.1015, "step": 22785 }, { "epoch": 0.5020961069152248, "grad_norm": 0.6109864115715027, "learning_rate": 1.5626615976142603e-05, "loss": 0.0711, "step": 22786 }, { "epoch": 0.5021181422047409, "grad_norm": 0.46347513794898987, "learning_rate": 1.5625546373832273e-05, "loss": 0.0684, "step": 22787 }, { "epoch": 0.5021401774942571, "grad_norm": 0.5993274450302124, "learning_rate": 1.56244767683357e-05, "loss": 0.0851, "step": 22788 }, { "epoch": 0.5021622127837733, "grad_norm": 0.8365088105201721, "learning_rate": 1.562340715965833e-05, "loss": 0.0878, "step": 22789 }, { "epoch": 0.5021842480732893, "grad_norm": 0.5818867087364197, "learning_rate": 1.5622337547805615e-05, "loss": 0.097, "step": 22790 }, { "epoch": 0.5022062833628055, "grad_norm": 0.5116657018661499, "learning_rate": 1.5621267932783e-05, "loss": 0.0913, "step": 22791 }, { "epoch": 0.5022283186523216, "grad_norm": 0.7129654288291931, "learning_rate": 1.5620198314595936e-05, "loss": 0.0763, "step": 22792 }, { "epoch": 0.5022503539418378, "grad_norm": 0.8562208414077759, "learning_rate": 1.561912869324987e-05, "loss": 0.0837, "step": 22793 }, { "epoch": 0.502272389231354, "grad_norm": 0.6579369902610779, "learning_rate": 1.561805906875025e-05, "loss": 0.091, "step": 22794 }, { "epoch": 0.5022944245208701, "grad_norm": 0.7979406714439392, "learning_rate": 1.5616989441102524e-05, "loss": 0.1073, "step": 22795 }, { "epoch": 0.5023164598103863, "grad_norm": 0.5984780788421631, "learning_rate": 1.5615919810312137e-05, "loss": 0.0636, "step": 22796 }, { "epoch": 0.5023384950999025, "grad_norm": 0.5510466694831848, "learning_rate": 1.561485017638454e-05, "loss": 0.0763, "step": 22797 }, { "epoch": 0.5023605303894186, "grad_norm": 0.5866994857788086, "learning_rate": 1.5613780539325185e-05, "loss": 0.0805, "step": 22798 }, { "epoch": 0.5023825656789348, "grad_norm": 0.8780958652496338, "learning_rate": 1.5612710899139512e-05, "loss": 0.0859, "step": 22799 }, { "epoch": 0.502404600968451, "grad_norm": 0.4992845356464386, "learning_rate": 1.5611641255832976e-05, "loss": 0.0646, "step": 22800 }, { "epoch": 0.5024266362579671, "grad_norm": 0.4558177590370178, "learning_rate": 1.5610571609411023e-05, "loss": 0.0564, "step": 22801 }, { "epoch": 0.5024486715474833, "grad_norm": 0.4003760814666748, "learning_rate": 1.5609501959879104e-05, "loss": 0.0491, "step": 22802 }, { "epoch": 0.5024707068369995, "grad_norm": 0.5824988484382629, "learning_rate": 1.560843230724266e-05, "loss": 0.0557, "step": 22803 }, { "epoch": 0.5024927421265156, "grad_norm": 0.6567390561103821, "learning_rate": 1.5607362651507143e-05, "loss": 0.087, "step": 22804 }, { "epoch": 0.5025147774160318, "grad_norm": 0.6037647128105164, "learning_rate": 1.560629299267801e-05, "loss": 0.0764, "step": 22805 }, { "epoch": 0.502536812705548, "grad_norm": 0.6590256094932556, "learning_rate": 1.5605223330760695e-05, "loss": 0.0549, "step": 22806 }, { "epoch": 0.5025588479950641, "grad_norm": 0.8915261030197144, "learning_rate": 1.5604153665760656e-05, "loss": 0.0829, "step": 22807 }, { "epoch": 0.5025808832845803, "grad_norm": 0.3687972128391266, "learning_rate": 1.560308399768334e-05, "loss": 0.0491, "step": 22808 }, { "epoch": 0.5026029185740964, "grad_norm": 0.5525622367858887, "learning_rate": 1.560201432653419e-05, "loss": 0.0821, "step": 22809 }, { "epoch": 0.5026249538636126, "grad_norm": 0.7045332193374634, "learning_rate": 1.560094465231866e-05, "loss": 0.0894, "step": 22810 }, { "epoch": 0.5026469891531288, "grad_norm": 0.5848399996757507, "learning_rate": 1.5599874975042196e-05, "loss": 0.0728, "step": 22811 }, { "epoch": 0.5026690244426449, "grad_norm": 0.7300952076911926, "learning_rate": 1.5598805294710248e-05, "loss": 0.0561, "step": 22812 }, { "epoch": 0.5026910597321611, "grad_norm": 0.5266293883323669, "learning_rate": 1.5597735611328263e-05, "loss": 0.0613, "step": 22813 }, { "epoch": 0.5027130950216773, "grad_norm": 0.8381249904632568, "learning_rate": 1.5596665924901686e-05, "loss": 0.0697, "step": 22814 }, { "epoch": 0.5027351303111933, "grad_norm": 0.6150969862937927, "learning_rate": 1.5595596235435975e-05, "loss": 0.078, "step": 22815 }, { "epoch": 0.5027571656007095, "grad_norm": 1.1092565059661865, "learning_rate": 1.5594526542936575e-05, "loss": 0.1113, "step": 22816 }, { "epoch": 0.5027792008902257, "grad_norm": 1.073426604270935, "learning_rate": 1.5593456847408927e-05, "loss": 0.0806, "step": 22817 }, { "epoch": 0.5028012361797418, "grad_norm": 0.679638683795929, "learning_rate": 1.5592387148858486e-05, "loss": 0.1029, "step": 22818 }, { "epoch": 0.502823271469258, "grad_norm": 0.6014620065689087, "learning_rate": 1.5591317447290704e-05, "loss": 0.088, "step": 22819 }, { "epoch": 0.5028453067587741, "grad_norm": 0.7924999594688416, "learning_rate": 1.5590247742711018e-05, "loss": 0.0831, "step": 22820 }, { "epoch": 0.5028673420482903, "grad_norm": 0.5594256520271301, "learning_rate": 1.5589178035124887e-05, "loss": 0.1069, "step": 22821 }, { "epoch": 0.5028893773378065, "grad_norm": 0.5831232666969299, "learning_rate": 1.5588108324537756e-05, "loss": 0.0762, "step": 22822 }, { "epoch": 0.5029114126273226, "grad_norm": 0.4226284623146057, "learning_rate": 1.5587038610955075e-05, "loss": 0.0735, "step": 22823 }, { "epoch": 0.5029334479168388, "grad_norm": 0.7809003591537476, "learning_rate": 1.5585968894382294e-05, "loss": 0.0774, "step": 22824 }, { "epoch": 0.502955483206355, "grad_norm": 0.7142230272293091, "learning_rate": 1.5584899174824854e-05, "loss": 0.082, "step": 22825 }, { "epoch": 0.5029775184958711, "grad_norm": 0.6435109972953796, "learning_rate": 1.5583829452288212e-05, "loss": 0.0641, "step": 22826 }, { "epoch": 0.5029995537853873, "grad_norm": 0.9630844593048096, "learning_rate": 1.558275972677781e-05, "loss": 0.0676, "step": 22827 }, { "epoch": 0.5030215890749035, "grad_norm": 1.4062200784683228, "learning_rate": 1.5581689998299103e-05, "loss": 0.0837, "step": 22828 }, { "epoch": 0.5030436243644196, "grad_norm": 0.5117207765579224, "learning_rate": 1.5580620266857536e-05, "loss": 0.0767, "step": 22829 }, { "epoch": 0.5030656596539358, "grad_norm": 0.3854345977306366, "learning_rate": 1.557955053245856e-05, "loss": 0.0663, "step": 22830 }, { "epoch": 0.503087694943452, "grad_norm": 0.5389720797538757, "learning_rate": 1.557848079510762e-05, "loss": 0.098, "step": 22831 }, { "epoch": 0.5031097302329681, "grad_norm": 0.6918298602104187, "learning_rate": 1.557741105481017e-05, "loss": 0.0843, "step": 22832 }, { "epoch": 0.5031317655224843, "grad_norm": 0.4863146245479584, "learning_rate": 1.5576341311571658e-05, "loss": 0.0714, "step": 22833 }, { "epoch": 0.5031538008120005, "grad_norm": 0.42440885305404663, "learning_rate": 1.557527156539753e-05, "loss": 0.0578, "step": 22834 }, { "epoch": 0.5031758361015166, "grad_norm": 0.4849870204925537, "learning_rate": 1.5574201816293227e-05, "loss": 0.0837, "step": 22835 }, { "epoch": 0.5031978713910328, "grad_norm": 0.4755145013332367, "learning_rate": 1.5573132064264216e-05, "loss": 0.0626, "step": 22836 }, { "epoch": 0.503219906680549, "grad_norm": 0.515077531337738, "learning_rate": 1.557206230931593e-05, "loss": 0.0652, "step": 22837 }, { "epoch": 0.5032419419700651, "grad_norm": 0.5024060010910034, "learning_rate": 1.5570992551453826e-05, "loss": 0.0759, "step": 22838 }, { "epoch": 0.5032639772595813, "grad_norm": 0.5679887533187866, "learning_rate": 1.556992279068335e-05, "loss": 0.0857, "step": 22839 }, { "epoch": 0.5032860125490973, "grad_norm": 0.5117592215538025, "learning_rate": 1.5568853027009954e-05, "loss": 0.0795, "step": 22840 }, { "epoch": 0.5033080478386135, "grad_norm": 0.5131163597106934, "learning_rate": 1.556778326043908e-05, "loss": 0.0641, "step": 22841 }, { "epoch": 0.5033300831281297, "grad_norm": 0.41076239943504333, "learning_rate": 1.556671349097619e-05, "loss": 0.0723, "step": 22842 }, { "epoch": 0.5033521184176458, "grad_norm": 0.8115758895874023, "learning_rate": 1.5565643718626716e-05, "loss": 0.0844, "step": 22843 }, { "epoch": 0.503374153707162, "grad_norm": 0.7333071231842041, "learning_rate": 1.556457394339612e-05, "loss": 0.0771, "step": 22844 }, { "epoch": 0.5033961889966782, "grad_norm": 0.437062531709671, "learning_rate": 1.5563504165289842e-05, "loss": 0.1204, "step": 22845 }, { "epoch": 0.5034182242861943, "grad_norm": 0.6153429746627808, "learning_rate": 1.5562434384313337e-05, "loss": 0.0668, "step": 22846 }, { "epoch": 0.5034402595757105, "grad_norm": 0.6241270303726196, "learning_rate": 1.5561364600472052e-05, "loss": 0.1085, "step": 22847 }, { "epoch": 0.5034622948652266, "grad_norm": 0.8863058090209961, "learning_rate": 1.5560294813771438e-05, "loss": 0.0724, "step": 22848 }, { "epoch": 0.5034843301547428, "grad_norm": 0.6760368943214417, "learning_rate": 1.555922502421694e-05, "loss": 0.0552, "step": 22849 }, { "epoch": 0.503506365444259, "grad_norm": 0.35174572467803955, "learning_rate": 1.5558155231814008e-05, "loss": 0.0807, "step": 22850 }, { "epoch": 0.5035284007337751, "grad_norm": 0.5411885380744934, "learning_rate": 1.5557085436568092e-05, "loss": 0.0738, "step": 22851 }, { "epoch": 0.5035504360232913, "grad_norm": 1.0552622079849243, "learning_rate": 1.5556015638484643e-05, "loss": 0.1238, "step": 22852 }, { "epoch": 0.5035724713128075, "grad_norm": 0.5596551895141602, "learning_rate": 1.5554945837569108e-05, "loss": 0.0806, "step": 22853 }, { "epoch": 0.5035945066023236, "grad_norm": 0.5075563192367554, "learning_rate": 1.5553876033826936e-05, "loss": 0.084, "step": 22854 }, { "epoch": 0.5036165418918398, "grad_norm": 0.6715010404586792, "learning_rate": 1.5552806227263576e-05, "loss": 0.0844, "step": 22855 }, { "epoch": 0.503638577181356, "grad_norm": 0.6733447909355164, "learning_rate": 1.5551736417884478e-05, "loss": 0.0769, "step": 22856 }, { "epoch": 0.5036606124708721, "grad_norm": 0.5947930812835693, "learning_rate": 1.5550666605695094e-05, "loss": 0.1038, "step": 22857 }, { "epoch": 0.5036826477603883, "grad_norm": 0.8126372694969177, "learning_rate": 1.5549596790700865e-05, "loss": 0.0774, "step": 22858 }, { "epoch": 0.5037046830499045, "grad_norm": 0.45305997133255005, "learning_rate": 1.5548526972907244e-05, "loss": 0.0699, "step": 22859 }, { "epoch": 0.5037267183394206, "grad_norm": 0.5752963423728943, "learning_rate": 1.5547457152319684e-05, "loss": 0.0689, "step": 22860 }, { "epoch": 0.5037487536289368, "grad_norm": 0.7705838680267334, "learning_rate": 1.5546387328943627e-05, "loss": 0.06, "step": 22861 }, { "epoch": 0.503770788918453, "grad_norm": 0.5220360159873962, "learning_rate": 1.5545317502784532e-05, "loss": 0.0512, "step": 22862 }, { "epoch": 0.5037928242079691, "grad_norm": 0.6984876990318298, "learning_rate": 1.554424767384784e-05, "loss": 0.0835, "step": 22863 }, { "epoch": 0.5038148594974852, "grad_norm": 0.7567824721336365, "learning_rate": 1.5543177842139002e-05, "loss": 0.1001, "step": 22864 }, { "epoch": 0.5038368947870013, "grad_norm": 0.4391126334667206, "learning_rate": 1.5542108007663473e-05, "loss": 0.1061, "step": 22865 }, { "epoch": 0.5038589300765175, "grad_norm": 0.5484703779220581, "learning_rate": 1.554103817042669e-05, "loss": 0.0631, "step": 22866 }, { "epoch": 0.5038809653660337, "grad_norm": 0.37643858790397644, "learning_rate": 1.5539968330434115e-05, "loss": 0.0521, "step": 22867 }, { "epoch": 0.5039030006555498, "grad_norm": 0.5206847190856934, "learning_rate": 1.5538898487691185e-05, "loss": 0.0775, "step": 22868 }, { "epoch": 0.503925035945066, "grad_norm": 0.5799950361251831, "learning_rate": 1.553782864220336e-05, "loss": 0.0658, "step": 22869 }, { "epoch": 0.5039470712345822, "grad_norm": 0.5799241662025452, "learning_rate": 1.5536758793976086e-05, "loss": 0.0684, "step": 22870 }, { "epoch": 0.5039691065240983, "grad_norm": 0.6413479447364807, "learning_rate": 1.5535688943014812e-05, "loss": 0.0945, "step": 22871 }, { "epoch": 0.5039911418136145, "grad_norm": 0.6734643578529358, "learning_rate": 1.5534619089324985e-05, "loss": 0.0946, "step": 22872 }, { "epoch": 0.5040131771031306, "grad_norm": 0.6546637415885925, "learning_rate": 1.5533549232912058e-05, "loss": 0.0588, "step": 22873 }, { "epoch": 0.5040352123926468, "grad_norm": 0.5927422046661377, "learning_rate": 1.5532479373781477e-05, "loss": 0.0811, "step": 22874 }, { "epoch": 0.504057247682163, "grad_norm": 0.4983251690864563, "learning_rate": 1.5531409511938693e-05, "loss": 0.067, "step": 22875 }, { "epoch": 0.5040792829716791, "grad_norm": 0.6786003112792969, "learning_rate": 1.5530339647389158e-05, "loss": 0.0646, "step": 22876 }, { "epoch": 0.5041013182611953, "grad_norm": 0.5738491415977478, "learning_rate": 1.5529269780138316e-05, "loss": 0.1005, "step": 22877 }, { "epoch": 0.5041233535507115, "grad_norm": 0.701287567615509, "learning_rate": 1.5528199910191622e-05, "loss": 0.0611, "step": 22878 }, { "epoch": 0.5041453888402276, "grad_norm": 0.8007387518882751, "learning_rate": 1.5527130037554518e-05, "loss": 0.0984, "step": 22879 }, { "epoch": 0.5041674241297438, "grad_norm": 0.5328847765922546, "learning_rate": 1.5526060162232465e-05, "loss": 0.0665, "step": 22880 }, { "epoch": 0.50418945941926, "grad_norm": 0.6644367575645447, "learning_rate": 1.55249902842309e-05, "loss": 0.1036, "step": 22881 }, { "epoch": 0.5042114947087761, "grad_norm": 0.8856773972511292, "learning_rate": 1.5523920403555282e-05, "loss": 0.0795, "step": 22882 }, { "epoch": 0.5042335299982923, "grad_norm": 0.6962190866470337, "learning_rate": 1.5522850520211048e-05, "loss": 0.0861, "step": 22883 }, { "epoch": 0.5042555652878085, "grad_norm": 0.6059262156486511, "learning_rate": 1.5521780634203664e-05, "loss": 0.0605, "step": 22884 }, { "epoch": 0.5042776005773246, "grad_norm": 0.7791168689727783, "learning_rate": 1.552071074553857e-05, "loss": 0.0735, "step": 22885 }, { "epoch": 0.5042996358668408, "grad_norm": 0.4471011459827423, "learning_rate": 1.5519640854221217e-05, "loss": 0.0522, "step": 22886 }, { "epoch": 0.504321671156357, "grad_norm": 0.5408470034599304, "learning_rate": 1.551857096025705e-05, "loss": 0.0787, "step": 22887 }, { "epoch": 0.5043437064458731, "grad_norm": 0.8918523788452148, "learning_rate": 1.551750106365153e-05, "loss": 0.0646, "step": 22888 }, { "epoch": 0.5043657417353892, "grad_norm": 0.62630295753479, "learning_rate": 1.5516431164410095e-05, "loss": 0.1042, "step": 22889 }, { "epoch": 0.5043877770249053, "grad_norm": 0.5464113354682922, "learning_rate": 1.5515361262538205e-05, "loss": 0.0773, "step": 22890 }, { "epoch": 0.5044098123144215, "grad_norm": 0.7149949669837952, "learning_rate": 1.5514291358041298e-05, "loss": 0.0809, "step": 22891 }, { "epoch": 0.5044318476039377, "grad_norm": 0.32656243443489075, "learning_rate": 1.5513221450924823e-05, "loss": 0.0497, "step": 22892 }, { "epoch": 0.5044538828934538, "grad_norm": 0.7040824890136719, "learning_rate": 1.5512151541194246e-05, "loss": 0.0755, "step": 22893 }, { "epoch": 0.50447591818297, "grad_norm": 0.6171397566795349, "learning_rate": 1.5511081628855003e-05, "loss": 0.0827, "step": 22894 }, { "epoch": 0.5044979534724862, "grad_norm": 0.4518760144710541, "learning_rate": 1.551001171391255e-05, "loss": 0.0516, "step": 22895 }, { "epoch": 0.5045199887620023, "grad_norm": 0.7050080895423889, "learning_rate": 1.5508941796372332e-05, "loss": 0.0669, "step": 22896 }, { "epoch": 0.5045420240515185, "grad_norm": 0.6702313423156738, "learning_rate": 1.5507871876239804e-05, "loss": 0.0887, "step": 22897 }, { "epoch": 0.5045640593410347, "grad_norm": 0.5491058230400085, "learning_rate": 1.5506801953520406e-05, "loss": 0.0624, "step": 22898 }, { "epoch": 0.5045860946305508, "grad_norm": 1.0040173530578613, "learning_rate": 1.5505732028219598e-05, "loss": 0.1134, "step": 22899 }, { "epoch": 0.504608129920067, "grad_norm": 0.7695379257202148, "learning_rate": 1.5504662100342823e-05, "loss": 0.0825, "step": 22900 }, { "epoch": 0.5046301652095831, "grad_norm": 0.46424856781959534, "learning_rate": 1.5503592169895534e-05, "loss": 0.0584, "step": 22901 }, { "epoch": 0.5046522004990993, "grad_norm": 0.42436549067497253, "learning_rate": 1.550252223688318e-05, "loss": 0.0778, "step": 22902 }, { "epoch": 0.5046742357886155, "grad_norm": 0.523478090763092, "learning_rate": 1.5501452301311215e-05, "loss": 0.1091, "step": 22903 }, { "epoch": 0.5046962710781316, "grad_norm": 0.4400472342967987, "learning_rate": 1.550038236318508e-05, "loss": 0.0936, "step": 22904 }, { "epoch": 0.5047183063676478, "grad_norm": 0.49468523263931274, "learning_rate": 1.549931242251023e-05, "loss": 0.0547, "step": 22905 }, { "epoch": 0.504740341657164, "grad_norm": 0.4451681673526764, "learning_rate": 1.5498242479292114e-05, "loss": 0.085, "step": 22906 }, { "epoch": 0.5047623769466801, "grad_norm": 0.4420836865901947, "learning_rate": 1.5497172533536185e-05, "loss": 0.0689, "step": 22907 }, { "epoch": 0.5047844122361963, "grad_norm": 0.9253472685813904, "learning_rate": 1.5496102585247882e-05, "loss": 0.0895, "step": 22908 }, { "epoch": 0.5048064475257125, "grad_norm": 0.4917895793914795, "learning_rate": 1.549503263443267e-05, "loss": 0.0581, "step": 22909 }, { "epoch": 0.5048284828152286, "grad_norm": 0.7821015119552612, "learning_rate": 1.5493962681095985e-05, "loss": 0.0543, "step": 22910 }, { "epoch": 0.5048505181047448, "grad_norm": 0.5604903697967529, "learning_rate": 1.5492892725243287e-05, "loss": 0.0809, "step": 22911 }, { "epoch": 0.504872553394261, "grad_norm": 0.4708687365055084, "learning_rate": 1.5491822766880025e-05, "loss": 0.0669, "step": 22912 }, { "epoch": 0.5048945886837771, "grad_norm": 0.5218456387519836, "learning_rate": 1.5490752806011646e-05, "loss": 0.0662, "step": 22913 }, { "epoch": 0.5049166239732932, "grad_norm": 0.4716533422470093, "learning_rate": 1.548968284264359e-05, "loss": 0.0977, "step": 22914 }, { "epoch": 0.5049386592628093, "grad_norm": 0.571463942527771, "learning_rate": 1.5488612876781326e-05, "loss": 0.0915, "step": 22915 }, { "epoch": 0.5049606945523255, "grad_norm": 1.068320870399475, "learning_rate": 1.548754290843029e-05, "loss": 0.1056, "step": 22916 }, { "epoch": 0.5049827298418417, "grad_norm": 0.38216665387153625, "learning_rate": 1.5486472937595936e-05, "loss": 0.0979, "step": 22917 }, { "epoch": 0.5050047651313578, "grad_norm": 0.4809802174568176, "learning_rate": 1.5485402964283718e-05, "loss": 0.0864, "step": 22918 }, { "epoch": 0.505026800420874, "grad_norm": 0.711575448513031, "learning_rate": 1.5484332988499085e-05, "loss": 0.0746, "step": 22919 }, { "epoch": 0.5050488357103902, "grad_norm": 0.4951505959033966, "learning_rate": 1.5483263010247483e-05, "loss": 0.0508, "step": 22920 }, { "epoch": 0.5050708709999063, "grad_norm": 0.6901126503944397, "learning_rate": 1.5482193029534358e-05, "loss": 0.0841, "step": 22921 }, { "epoch": 0.5050929062894225, "grad_norm": 0.6181434988975525, "learning_rate": 1.5481123046365167e-05, "loss": 0.0505, "step": 22922 }, { "epoch": 0.5051149415789387, "grad_norm": 0.7159351110458374, "learning_rate": 1.548005306074536e-05, "loss": 0.0735, "step": 22923 }, { "epoch": 0.5051369768684548, "grad_norm": 0.26891764998435974, "learning_rate": 1.5478983072680386e-05, "loss": 0.0759, "step": 22924 }, { "epoch": 0.505159012157971, "grad_norm": 1.3336899280548096, "learning_rate": 1.547791308217569e-05, "loss": 0.077, "step": 22925 }, { "epoch": 0.5051810474474872, "grad_norm": 0.46498748660087585, "learning_rate": 1.5476843089236732e-05, "loss": 0.0482, "step": 22926 }, { "epoch": 0.5052030827370033, "grad_norm": 0.6770893931388855, "learning_rate": 1.5475773093868955e-05, "loss": 0.1026, "step": 22927 }, { "epoch": 0.5052251180265195, "grad_norm": 0.49543362855911255, "learning_rate": 1.547470309607781e-05, "loss": 0.0407, "step": 22928 }, { "epoch": 0.5052471533160356, "grad_norm": 0.44859519600868225, "learning_rate": 1.5473633095868745e-05, "loss": 0.0513, "step": 22929 }, { "epoch": 0.5052691886055518, "grad_norm": 0.887895941734314, "learning_rate": 1.5472563093247218e-05, "loss": 0.1077, "step": 22930 }, { "epoch": 0.505291223895068, "grad_norm": 0.5463072061538696, "learning_rate": 1.5471493088218668e-05, "loss": 0.0525, "step": 22931 }, { "epoch": 0.5053132591845841, "grad_norm": 0.8122719526290894, "learning_rate": 1.5470423080788555e-05, "loss": 0.1021, "step": 22932 }, { "epoch": 0.5053352944741003, "grad_norm": 0.6396376490592957, "learning_rate": 1.5469353070962322e-05, "loss": 0.0875, "step": 22933 }, { "epoch": 0.5053573297636165, "grad_norm": 0.3911190330982208, "learning_rate": 1.5468283058745424e-05, "loss": 0.0344, "step": 22934 }, { "epoch": 0.5053793650531326, "grad_norm": 0.910847008228302, "learning_rate": 1.546721304414331e-05, "loss": 0.081, "step": 22935 }, { "epoch": 0.5054014003426488, "grad_norm": 0.8345983028411865, "learning_rate": 1.546614302716143e-05, "loss": 0.0946, "step": 22936 }, { "epoch": 0.505423435632165, "grad_norm": 0.6363545060157776, "learning_rate": 1.546507300780523e-05, "loss": 0.0793, "step": 22937 }, { "epoch": 0.505445470921681, "grad_norm": 0.47511783242225647, "learning_rate": 1.5464002986080163e-05, "loss": 0.0935, "step": 22938 }, { "epoch": 0.5054675062111972, "grad_norm": 0.507538914680481, "learning_rate": 1.5462932961991684e-05, "loss": 0.0658, "step": 22939 }, { "epoch": 0.5054895415007133, "grad_norm": 0.6555160284042358, "learning_rate": 1.5461862935545235e-05, "loss": 0.0671, "step": 22940 }, { "epoch": 0.5055115767902295, "grad_norm": 0.60369473695755, "learning_rate": 1.5460792906746277e-05, "loss": 0.0774, "step": 22941 }, { "epoch": 0.5055336120797457, "grad_norm": 0.634652853012085, "learning_rate": 1.5459722875600245e-05, "loss": 0.0735, "step": 22942 }, { "epoch": 0.5055556473692618, "grad_norm": 0.8787281513214111, "learning_rate": 1.5458652842112608e-05, "loss": 0.0837, "step": 22943 }, { "epoch": 0.505577682658778, "grad_norm": 0.49672985076904297, "learning_rate": 1.54575828062888e-05, "loss": 0.0599, "step": 22944 }, { "epoch": 0.5055997179482942, "grad_norm": 0.45549607276916504, "learning_rate": 1.545651276813428e-05, "loss": 0.0773, "step": 22945 }, { "epoch": 0.5056217532378103, "grad_norm": 0.745635449886322, "learning_rate": 1.545544272765449e-05, "loss": 0.0713, "step": 22946 }, { "epoch": 0.5056437885273265, "grad_norm": 0.5352278351783752, "learning_rate": 1.5454372684854895e-05, "loss": 0.0319, "step": 22947 }, { "epoch": 0.5056658238168427, "grad_norm": 0.7059797048568726, "learning_rate": 1.5453302639740924e-05, "loss": 0.0804, "step": 22948 }, { "epoch": 0.5056878591063588, "grad_norm": 0.589609682559967, "learning_rate": 1.545223259231805e-05, "loss": 0.0791, "step": 22949 }, { "epoch": 0.505709894395875, "grad_norm": 0.6142086982727051, "learning_rate": 1.5451162542591706e-05, "loss": 0.0545, "step": 22950 }, { "epoch": 0.5057319296853912, "grad_norm": 0.7731893658638, "learning_rate": 1.5450092490567356e-05, "loss": 0.0992, "step": 22951 }, { "epoch": 0.5057539649749073, "grad_norm": 0.6226949095726013, "learning_rate": 1.5449022436250443e-05, "loss": 0.0655, "step": 22952 }, { "epoch": 0.5057760002644235, "grad_norm": 0.7625014185905457, "learning_rate": 1.544795237964642e-05, "loss": 0.0697, "step": 22953 }, { "epoch": 0.5057980355539397, "grad_norm": 0.6653750538825989, "learning_rate": 1.5446882320760725e-05, "loss": 0.0949, "step": 22954 }, { "epoch": 0.5058200708434558, "grad_norm": 0.8894027471542358, "learning_rate": 1.5445812259598827e-05, "loss": 0.0609, "step": 22955 }, { "epoch": 0.505842106132972, "grad_norm": 0.8495218753814697, "learning_rate": 1.544474219616616e-05, "loss": 0.1271, "step": 22956 }, { "epoch": 0.5058641414224881, "grad_norm": 0.5616289973258972, "learning_rate": 1.544367213046819e-05, "loss": 0.088, "step": 22957 }, { "epoch": 0.5058861767120043, "grad_norm": 1.3266332149505615, "learning_rate": 1.544260206251036e-05, "loss": 0.0719, "step": 22958 }, { "epoch": 0.5059082120015205, "grad_norm": 0.47209665179252625, "learning_rate": 1.5441531992298114e-05, "loss": 0.056, "step": 22959 }, { "epoch": 0.5059302472910366, "grad_norm": 0.3900158107280731, "learning_rate": 1.5440461919836915e-05, "loss": 0.0573, "step": 22960 }, { "epoch": 0.5059522825805528, "grad_norm": 0.2469031661748886, "learning_rate": 1.5439391845132207e-05, "loss": 0.0338, "step": 22961 }, { "epoch": 0.505974317870069, "grad_norm": 0.7918123602867126, "learning_rate": 1.543832176818944e-05, "loss": 0.0844, "step": 22962 }, { "epoch": 0.505996353159585, "grad_norm": 0.8782889246940613, "learning_rate": 1.5437251689014063e-05, "loss": 0.0608, "step": 22963 }, { "epoch": 0.5060183884491012, "grad_norm": 0.7657281160354614, "learning_rate": 1.5436181607611527e-05, "loss": 0.1088, "step": 22964 }, { "epoch": 0.5060404237386174, "grad_norm": 0.6545800566673279, "learning_rate": 1.5435111523987287e-05, "loss": 0.0611, "step": 22965 }, { "epoch": 0.5060624590281335, "grad_norm": 0.73447585105896, "learning_rate": 1.5434041438146793e-05, "loss": 0.0656, "step": 22966 }, { "epoch": 0.5060844943176497, "grad_norm": 0.5888711214065552, "learning_rate": 1.5432971350095492e-05, "loss": 0.0593, "step": 22967 }, { "epoch": 0.5061065296071658, "grad_norm": 0.42216217517852783, "learning_rate": 1.5431901259838835e-05, "loss": 0.0523, "step": 22968 }, { "epoch": 0.506128564896682, "grad_norm": 0.8185524940490723, "learning_rate": 1.5430831167382272e-05, "loss": 0.1047, "step": 22969 }, { "epoch": 0.5061506001861982, "grad_norm": 0.7675761580467224, "learning_rate": 1.5429761072731256e-05, "loss": 0.0528, "step": 22970 }, { "epoch": 0.5061726354757143, "grad_norm": 0.7827600240707397, "learning_rate": 1.5428690975891236e-05, "loss": 0.0942, "step": 22971 }, { "epoch": 0.5061946707652305, "grad_norm": 0.4468197226524353, "learning_rate": 1.5427620876867663e-05, "loss": 0.0481, "step": 22972 }, { "epoch": 0.5062167060547467, "grad_norm": 0.8337081670761108, "learning_rate": 1.5426550775665985e-05, "loss": 0.1131, "step": 22973 }, { "epoch": 0.5062387413442628, "grad_norm": 0.5207734704017639, "learning_rate": 1.5425480672291658e-05, "loss": 0.0658, "step": 22974 }, { "epoch": 0.506260776633779, "grad_norm": 0.8708640336990356, "learning_rate": 1.5424410566750133e-05, "loss": 0.0758, "step": 22975 }, { "epoch": 0.5062828119232952, "grad_norm": 0.7549993991851807, "learning_rate": 1.5423340459046854e-05, "loss": 0.0669, "step": 22976 }, { "epoch": 0.5063048472128113, "grad_norm": 0.7737268805503845, "learning_rate": 1.5422270349187274e-05, "loss": 0.0591, "step": 22977 }, { "epoch": 0.5063268825023275, "grad_norm": 0.5624218583106995, "learning_rate": 1.5421200237176846e-05, "loss": 0.0835, "step": 22978 }, { "epoch": 0.5063489177918437, "grad_norm": 1.1557921171188354, "learning_rate": 1.542013012302102e-05, "loss": 0.0982, "step": 22979 }, { "epoch": 0.5063709530813598, "grad_norm": 0.7550857067108154, "learning_rate": 1.5419060006725245e-05, "loss": 0.0926, "step": 22980 }, { "epoch": 0.506392988370876, "grad_norm": 0.6787119507789612, "learning_rate": 1.5417989888294972e-05, "loss": 0.0738, "step": 22981 }, { "epoch": 0.5064150236603921, "grad_norm": 0.5342717170715332, "learning_rate": 1.5416919767735654e-05, "loss": 0.0604, "step": 22982 }, { "epoch": 0.5064370589499083, "grad_norm": 0.7730913162231445, "learning_rate": 1.5415849645052743e-05, "loss": 0.1034, "step": 22983 }, { "epoch": 0.5064590942394245, "grad_norm": 0.8337910771369934, "learning_rate": 1.541477952025168e-05, "loss": 0.1274, "step": 22984 }, { "epoch": 0.5064811295289406, "grad_norm": 0.47875019907951355, "learning_rate": 1.5413709393337922e-05, "loss": 0.0921, "step": 22985 }, { "epoch": 0.5065031648184568, "grad_norm": 0.36303216218948364, "learning_rate": 1.5412639264316925e-05, "loss": 0.0879, "step": 22986 }, { "epoch": 0.506525200107973, "grad_norm": 0.7580116391181946, "learning_rate": 1.5411569133194134e-05, "loss": 0.1009, "step": 22987 }, { "epoch": 0.506547235397489, "grad_norm": 0.5566300749778748, "learning_rate": 1.5410498999975e-05, "loss": 0.0783, "step": 22988 }, { "epoch": 0.5065692706870052, "grad_norm": 0.4886854290962219, "learning_rate": 1.5409428864664977e-05, "loss": 0.0709, "step": 22989 }, { "epoch": 0.5065913059765214, "grad_norm": 0.5302459597587585, "learning_rate": 1.5408358727269507e-05, "loss": 0.0654, "step": 22990 }, { "epoch": 0.5066133412660375, "grad_norm": 0.6502591967582703, "learning_rate": 1.5407288587794056e-05, "loss": 0.0753, "step": 22991 }, { "epoch": 0.5066353765555537, "grad_norm": 0.4732096791267395, "learning_rate": 1.540621844624406e-05, "loss": 0.0759, "step": 22992 }, { "epoch": 0.5066574118450698, "grad_norm": 0.9705875515937805, "learning_rate": 1.5405148302624978e-05, "loss": 0.0653, "step": 22993 }, { "epoch": 0.506679447134586, "grad_norm": 1.126006841659546, "learning_rate": 1.5404078156942253e-05, "loss": 0.0969, "step": 22994 }, { "epoch": 0.5067014824241022, "grad_norm": 0.5567160248756409, "learning_rate": 1.540300800920135e-05, "loss": 0.0594, "step": 22995 }, { "epoch": 0.5067235177136183, "grad_norm": 0.5501742959022522, "learning_rate": 1.5401937859407702e-05, "loss": 0.0562, "step": 22996 }, { "epoch": 0.5067455530031345, "grad_norm": 0.5313112139701843, "learning_rate": 1.5400867707566774e-05, "loss": 0.0977, "step": 22997 }, { "epoch": 0.5067675882926507, "grad_norm": 0.6426730155944824, "learning_rate": 1.5399797553684007e-05, "loss": 0.0772, "step": 22998 }, { "epoch": 0.5067896235821668, "grad_norm": 0.9611454010009766, "learning_rate": 1.5398727397764865e-05, "loss": 0.0921, "step": 22999 }, { "epoch": 0.506811658871683, "grad_norm": 0.3949361741542816, "learning_rate": 1.5397657239814786e-05, "loss": 0.0757, "step": 23000 }, { "epoch": 0.5068336941611992, "grad_norm": 0.47879788279533386, "learning_rate": 1.5396587079839227e-05, "loss": 0.0668, "step": 23001 }, { "epoch": 0.5068557294507153, "grad_norm": 0.6322994232177734, "learning_rate": 1.5395516917843633e-05, "loss": 0.1105, "step": 23002 }, { "epoch": 0.5068777647402315, "grad_norm": 0.7301620244979858, "learning_rate": 1.5394446753833464e-05, "loss": 0.069, "step": 23003 }, { "epoch": 0.5068998000297477, "grad_norm": 0.4365326464176178, "learning_rate": 1.5393376587814162e-05, "loss": 0.0538, "step": 23004 }, { "epoch": 0.5069218353192638, "grad_norm": 0.8339946866035461, "learning_rate": 1.5392306419791183e-05, "loss": 0.0602, "step": 23005 }, { "epoch": 0.50694387060878, "grad_norm": 0.2778126001358032, "learning_rate": 1.539123624976998e-05, "loss": 0.0482, "step": 23006 }, { "epoch": 0.5069659058982962, "grad_norm": 0.9056039452552795, "learning_rate": 1.5390166077756002e-05, "loss": 0.0712, "step": 23007 }, { "epoch": 0.5069879411878123, "grad_norm": 0.6131107211112976, "learning_rate": 1.53890959037547e-05, "loss": 0.0749, "step": 23008 }, { "epoch": 0.5070099764773285, "grad_norm": 0.6797325611114502, "learning_rate": 1.5388025727771515e-05, "loss": 0.101, "step": 23009 }, { "epoch": 0.5070320117668446, "grad_norm": 0.9317691922187805, "learning_rate": 1.5386955549811916e-05, "loss": 0.1106, "step": 23010 }, { "epoch": 0.5070540470563608, "grad_norm": 0.6607794761657715, "learning_rate": 1.5385885369881338e-05, "loss": 0.0749, "step": 23011 }, { "epoch": 0.507076082345877, "grad_norm": 0.6406458020210266, "learning_rate": 1.5384815187985244e-05, "loss": 0.0692, "step": 23012 }, { "epoch": 0.507098117635393, "grad_norm": 0.37218600511550903, "learning_rate": 1.5383745004129075e-05, "loss": 0.0697, "step": 23013 }, { "epoch": 0.5071201529249092, "grad_norm": 0.7329109907150269, "learning_rate": 1.538267481831829e-05, "loss": 0.0663, "step": 23014 }, { "epoch": 0.5071421882144254, "grad_norm": 0.8764710426330566, "learning_rate": 1.538160463055834e-05, "loss": 0.0812, "step": 23015 }, { "epoch": 0.5071642235039415, "grad_norm": 0.38980934023857117, "learning_rate": 1.538053444085467e-05, "loss": 0.0402, "step": 23016 }, { "epoch": 0.5071862587934577, "grad_norm": 0.7488194704055786, "learning_rate": 1.537946424921273e-05, "loss": 0.0973, "step": 23017 }, { "epoch": 0.5072082940829739, "grad_norm": 0.699390709400177, "learning_rate": 1.537839405563798e-05, "loss": 0.0729, "step": 23018 }, { "epoch": 0.50723032937249, "grad_norm": 0.8814457654953003, "learning_rate": 1.5377323860135865e-05, "loss": 0.0841, "step": 23019 }, { "epoch": 0.5072523646620062, "grad_norm": 0.6946125626564026, "learning_rate": 1.5376253662711837e-05, "loss": 0.1021, "step": 23020 }, { "epoch": 0.5072743999515223, "grad_norm": 0.5367323160171509, "learning_rate": 1.5375183463371347e-05, "loss": 0.0569, "step": 23021 }, { "epoch": 0.5072964352410385, "grad_norm": 0.6555061340332031, "learning_rate": 1.537411326211985e-05, "loss": 0.0615, "step": 23022 }, { "epoch": 0.5073184705305547, "grad_norm": 0.4967585802078247, "learning_rate": 1.537304305896279e-05, "loss": 0.0684, "step": 23023 }, { "epoch": 0.5073405058200708, "grad_norm": 0.7913835048675537, "learning_rate": 1.5371972853905625e-05, "loss": 0.0727, "step": 23024 }, { "epoch": 0.507362541109587, "grad_norm": 0.5232970118522644, "learning_rate": 1.5370902646953798e-05, "loss": 0.0716, "step": 23025 }, { "epoch": 0.5073845763991032, "grad_norm": 0.9243103265762329, "learning_rate": 1.5369832438112763e-05, "loss": 0.0994, "step": 23026 }, { "epoch": 0.5074066116886193, "grad_norm": 0.5959473252296448, "learning_rate": 1.536876222738798e-05, "loss": 0.0632, "step": 23027 }, { "epoch": 0.5074286469781355, "grad_norm": 1.4019653797149658, "learning_rate": 1.536769201478489e-05, "loss": 0.0851, "step": 23028 }, { "epoch": 0.5074506822676517, "grad_norm": 0.8977420330047607, "learning_rate": 1.5366621800308947e-05, "loss": 0.0893, "step": 23029 }, { "epoch": 0.5074727175571678, "grad_norm": 0.44043174386024475, "learning_rate": 1.5365551583965605e-05, "loss": 0.1063, "step": 23030 }, { "epoch": 0.507494752846684, "grad_norm": 0.4176560342311859, "learning_rate": 1.5364481365760312e-05, "loss": 0.0637, "step": 23031 }, { "epoch": 0.5075167881362002, "grad_norm": 0.5689228773117065, "learning_rate": 1.5363411145698516e-05, "loss": 0.0588, "step": 23032 }, { "epoch": 0.5075388234257163, "grad_norm": 0.5209582448005676, "learning_rate": 1.5362340923785675e-05, "loss": 0.0933, "step": 23033 }, { "epoch": 0.5075608587152325, "grad_norm": 0.6165401935577393, "learning_rate": 1.5361270700027238e-05, "loss": 0.0699, "step": 23034 }, { "epoch": 0.5075828940047487, "grad_norm": 0.9297153949737549, "learning_rate": 1.5360200474428655e-05, "loss": 0.095, "step": 23035 }, { "epoch": 0.5076049292942648, "grad_norm": 0.7317276000976562, "learning_rate": 1.5359130246995375e-05, "loss": 0.094, "step": 23036 }, { "epoch": 0.5076269645837809, "grad_norm": 0.6610873937606812, "learning_rate": 1.5358060017732857e-05, "loss": 0.0806, "step": 23037 }, { "epoch": 0.507648999873297, "grad_norm": 0.7115340232849121, "learning_rate": 1.5356989786646548e-05, "loss": 0.0688, "step": 23038 }, { "epoch": 0.5076710351628132, "grad_norm": 0.6620056629180908, "learning_rate": 1.5355919553741896e-05, "loss": 0.0668, "step": 23039 }, { "epoch": 0.5076930704523294, "grad_norm": 0.8435268402099609, "learning_rate": 1.5354849319024353e-05, "loss": 0.0781, "step": 23040 }, { "epoch": 0.5077151057418455, "grad_norm": 0.7285828590393066, "learning_rate": 1.5353779082499374e-05, "loss": 0.0861, "step": 23041 }, { "epoch": 0.5077371410313617, "grad_norm": 0.44597962498664856, "learning_rate": 1.5352708844172407e-05, "loss": 0.0571, "step": 23042 }, { "epoch": 0.5077591763208779, "grad_norm": 0.6558988094329834, "learning_rate": 1.535163860404891e-05, "loss": 0.0744, "step": 23043 }, { "epoch": 0.507781211610394, "grad_norm": 0.7430146336555481, "learning_rate": 1.535056836213432e-05, "loss": 0.0549, "step": 23044 }, { "epoch": 0.5078032468999102, "grad_norm": 0.41584205627441406, "learning_rate": 1.5349498118434106e-05, "loss": 0.0729, "step": 23045 }, { "epoch": 0.5078252821894264, "grad_norm": 0.7597320675849915, "learning_rate": 1.5348427872953705e-05, "loss": 0.0558, "step": 23046 }, { "epoch": 0.5078473174789425, "grad_norm": 0.5580065250396729, "learning_rate": 1.534735762569858e-05, "loss": 0.0828, "step": 23047 }, { "epoch": 0.5078693527684587, "grad_norm": 1.0068823099136353, "learning_rate": 1.534628737667417e-05, "loss": 0.1243, "step": 23048 }, { "epoch": 0.5078913880579748, "grad_norm": 0.5567494630813599, "learning_rate": 1.534521712588593e-05, "loss": 0.113, "step": 23049 }, { "epoch": 0.507913423347491, "grad_norm": 0.3411621153354645, "learning_rate": 1.5344146873339324e-05, "loss": 0.0462, "step": 23050 }, { "epoch": 0.5079354586370072, "grad_norm": 0.41101762652397156, "learning_rate": 1.534307661903979e-05, "loss": 0.0763, "step": 23051 }, { "epoch": 0.5079574939265233, "grad_norm": 0.8209657073020935, "learning_rate": 1.5342006362992777e-05, "loss": 0.0887, "step": 23052 }, { "epoch": 0.5079795292160395, "grad_norm": 0.6566458940505981, "learning_rate": 1.5340936105203748e-05, "loss": 0.0808, "step": 23053 }, { "epoch": 0.5080015645055557, "grad_norm": 0.3486173152923584, "learning_rate": 1.5339865845678148e-05, "loss": 0.0582, "step": 23054 }, { "epoch": 0.5080235997950718, "grad_norm": 0.7496557831764221, "learning_rate": 1.533879558442143e-05, "loss": 0.061, "step": 23055 }, { "epoch": 0.508045635084588, "grad_norm": 0.4541926383972168, "learning_rate": 1.5337725321439045e-05, "loss": 0.0695, "step": 23056 }, { "epoch": 0.5080676703741042, "grad_norm": 0.6198638081550598, "learning_rate": 1.5336655056736437e-05, "loss": 0.0709, "step": 23057 }, { "epoch": 0.5080897056636203, "grad_norm": 0.6361922025680542, "learning_rate": 1.533558479031907e-05, "loss": 0.0514, "step": 23058 }, { "epoch": 0.5081117409531365, "grad_norm": 0.6975902318954468, "learning_rate": 1.5334514522192385e-05, "loss": 0.0801, "step": 23059 }, { "epoch": 0.5081337762426527, "grad_norm": 0.8581796288490295, "learning_rate": 1.5333444252361844e-05, "loss": 0.1006, "step": 23060 }, { "epoch": 0.5081558115321688, "grad_norm": 0.7056273221969604, "learning_rate": 1.533237398083289e-05, "loss": 0.0602, "step": 23061 }, { "epoch": 0.5081778468216849, "grad_norm": 0.4443649649620056, "learning_rate": 1.5331303707610977e-05, "loss": 0.0915, "step": 23062 }, { "epoch": 0.508199882111201, "grad_norm": 0.6841567754745483, "learning_rate": 1.5330233432701558e-05, "loss": 0.0871, "step": 23063 }, { "epoch": 0.5082219174007172, "grad_norm": 0.6499466896057129, "learning_rate": 1.5329163156110084e-05, "loss": 0.0685, "step": 23064 }, { "epoch": 0.5082439526902334, "grad_norm": 0.514769971370697, "learning_rate": 1.5328092877842e-05, "loss": 0.0729, "step": 23065 }, { "epoch": 0.5082659879797495, "grad_norm": 1.3128015995025635, "learning_rate": 1.532702259790277e-05, "loss": 0.0859, "step": 23066 }, { "epoch": 0.5082880232692657, "grad_norm": 0.8284004330635071, "learning_rate": 1.532595231629783e-05, "loss": 0.1091, "step": 23067 }, { "epoch": 0.5083100585587819, "grad_norm": 0.6320717930793762, "learning_rate": 1.532488203303265e-05, "loss": 0.1377, "step": 23068 }, { "epoch": 0.508332093848298, "grad_norm": 1.0328699350357056, "learning_rate": 1.5323811748112663e-05, "loss": 0.0934, "step": 23069 }, { "epoch": 0.5083541291378142, "grad_norm": 0.4840121865272522, "learning_rate": 1.5322741461543334e-05, "loss": 0.0656, "step": 23070 }, { "epoch": 0.5083761644273304, "grad_norm": 0.8518113493919373, "learning_rate": 1.5321671173330107e-05, "loss": 0.1424, "step": 23071 }, { "epoch": 0.5083981997168465, "grad_norm": 0.5087548494338989, "learning_rate": 1.532060088347844e-05, "loss": 0.0747, "step": 23072 }, { "epoch": 0.5084202350063627, "grad_norm": 0.8707850575447083, "learning_rate": 1.531953059199378e-05, "loss": 0.081, "step": 23073 }, { "epoch": 0.5084422702958789, "grad_norm": 0.6432281136512756, "learning_rate": 1.5318460298881572e-05, "loss": 0.0597, "step": 23074 }, { "epoch": 0.508464305585395, "grad_norm": 0.4456581473350525, "learning_rate": 1.5317390004147283e-05, "loss": 0.0881, "step": 23075 }, { "epoch": 0.5084863408749112, "grad_norm": 0.5284616947174072, "learning_rate": 1.5316319707796353e-05, "loss": 0.1105, "step": 23076 }, { "epoch": 0.5085083761644273, "grad_norm": 0.950589656829834, "learning_rate": 1.531524940983424e-05, "loss": 0.1111, "step": 23077 }, { "epoch": 0.5085304114539435, "grad_norm": 0.67911696434021, "learning_rate": 1.5314179110266392e-05, "loss": 0.0726, "step": 23078 }, { "epoch": 0.5085524467434597, "grad_norm": 0.5845063328742981, "learning_rate": 1.531310880909826e-05, "loss": 0.0787, "step": 23079 }, { "epoch": 0.5085744820329758, "grad_norm": 0.7915725111961365, "learning_rate": 1.5312038506335295e-05, "loss": 0.0517, "step": 23080 }, { "epoch": 0.508596517322492, "grad_norm": 0.5876719951629639, "learning_rate": 1.5310968201982957e-05, "loss": 0.0491, "step": 23081 }, { "epoch": 0.5086185526120082, "grad_norm": 0.6377397775650024, "learning_rate": 1.5309897896046684e-05, "loss": 0.0752, "step": 23082 }, { "epoch": 0.5086405879015243, "grad_norm": 1.2267441749572754, "learning_rate": 1.530882758853194e-05, "loss": 0.0781, "step": 23083 }, { "epoch": 0.5086626231910405, "grad_norm": 0.5962533950805664, "learning_rate": 1.5307757279444167e-05, "loss": 0.0634, "step": 23084 }, { "epoch": 0.5086846584805567, "grad_norm": 0.456263929605484, "learning_rate": 1.5306686968788823e-05, "loss": 0.0377, "step": 23085 }, { "epoch": 0.5087066937700728, "grad_norm": 0.7522743344306946, "learning_rate": 1.5305616656571363e-05, "loss": 0.0876, "step": 23086 }, { "epoch": 0.5087287290595889, "grad_norm": 0.7584968209266663, "learning_rate": 1.530454634279723e-05, "loss": 0.0981, "step": 23087 }, { "epoch": 0.508750764349105, "grad_norm": 0.62801194190979, "learning_rate": 1.5303476027471875e-05, "loss": 0.0727, "step": 23088 }, { "epoch": 0.5087727996386212, "grad_norm": 0.5976563096046448, "learning_rate": 1.530240571060076e-05, "loss": 0.0893, "step": 23089 }, { "epoch": 0.5087948349281374, "grad_norm": 0.7537693977355957, "learning_rate": 1.530133539218932e-05, "loss": 0.0859, "step": 23090 }, { "epoch": 0.5088168702176535, "grad_norm": 0.5859147906303406, "learning_rate": 1.530026507224303e-05, "loss": 0.0953, "step": 23091 }, { "epoch": 0.5088389055071697, "grad_norm": 0.3360046446323395, "learning_rate": 1.5299194750767322e-05, "loss": 0.0738, "step": 23092 }, { "epoch": 0.5088609407966859, "grad_norm": 0.5232274532318115, "learning_rate": 1.5298124427767658e-05, "loss": 0.0833, "step": 23093 }, { "epoch": 0.508882976086202, "grad_norm": 1.2065595388412476, "learning_rate": 1.5297054103249485e-05, "loss": 0.1131, "step": 23094 }, { "epoch": 0.5089050113757182, "grad_norm": 0.5552087426185608, "learning_rate": 1.5295983777218253e-05, "loss": 0.0613, "step": 23095 }, { "epoch": 0.5089270466652344, "grad_norm": 0.5299206972122192, "learning_rate": 1.529491344967942e-05, "loss": 0.0707, "step": 23096 }, { "epoch": 0.5089490819547505, "grad_norm": 0.4307856261730194, "learning_rate": 1.5293843120638434e-05, "loss": 0.0743, "step": 23097 }, { "epoch": 0.5089711172442667, "grad_norm": 1.014568567276001, "learning_rate": 1.529277279010075e-05, "loss": 0.0704, "step": 23098 }, { "epoch": 0.5089931525337829, "grad_norm": 2.0308620929718018, "learning_rate": 1.5291702458071813e-05, "loss": 0.0919, "step": 23099 }, { "epoch": 0.509015187823299, "grad_norm": 0.438332736492157, "learning_rate": 1.529063212455708e-05, "loss": 0.0836, "step": 23100 }, { "epoch": 0.5090372231128152, "grad_norm": 0.7331836223602295, "learning_rate": 1.5289561789562002e-05, "loss": 0.0995, "step": 23101 }, { "epoch": 0.5090592584023313, "grad_norm": 0.4640343189239502, "learning_rate": 1.528849145309203e-05, "loss": 0.0489, "step": 23102 }, { "epoch": 0.5090812936918475, "grad_norm": 0.9395710825920105, "learning_rate": 1.528742111515262e-05, "loss": 0.0913, "step": 23103 }, { "epoch": 0.5091033289813637, "grad_norm": 0.5725128650665283, "learning_rate": 1.528635077574922e-05, "loss": 0.0667, "step": 23104 }, { "epoch": 0.5091253642708798, "grad_norm": 0.5015145540237427, "learning_rate": 1.5285280434887277e-05, "loss": 0.0879, "step": 23105 }, { "epoch": 0.509147399560396, "grad_norm": 0.5799930095672607, "learning_rate": 1.528421009257225e-05, "loss": 0.0564, "step": 23106 }, { "epoch": 0.5091694348499122, "grad_norm": 0.6716088652610779, "learning_rate": 1.528313974880959e-05, "loss": 0.1067, "step": 23107 }, { "epoch": 0.5091914701394283, "grad_norm": 0.4255351424217224, "learning_rate": 1.5282069403604747e-05, "loss": 0.0726, "step": 23108 }, { "epoch": 0.5092135054289445, "grad_norm": 0.7331172823905945, "learning_rate": 1.528099905696317e-05, "loss": 0.0969, "step": 23109 }, { "epoch": 0.5092355407184607, "grad_norm": 0.7745229005813599, "learning_rate": 1.527992870889032e-05, "loss": 0.0826, "step": 23110 }, { "epoch": 0.5092575760079767, "grad_norm": 0.8525299429893494, "learning_rate": 1.527885835939164e-05, "loss": 0.1172, "step": 23111 }, { "epoch": 0.5092796112974929, "grad_norm": 0.33119815587997437, "learning_rate": 1.5277788008472588e-05, "loss": 0.0883, "step": 23112 }, { "epoch": 0.509301646587009, "grad_norm": 0.9457696676254272, "learning_rate": 1.5276717656138605e-05, "loss": 0.0996, "step": 23113 }, { "epoch": 0.5093236818765252, "grad_norm": 0.3396362364292145, "learning_rate": 1.527564730239516e-05, "loss": 0.0634, "step": 23114 }, { "epoch": 0.5093457171660414, "grad_norm": 0.7206916809082031, "learning_rate": 1.5274576947247684e-05, "loss": 0.0772, "step": 23115 }, { "epoch": 0.5093677524555575, "grad_norm": 0.39306896924972534, "learning_rate": 1.5273506590701643e-05, "loss": 0.0838, "step": 23116 }, { "epoch": 0.5093897877450737, "grad_norm": 0.4169502556324005, "learning_rate": 1.5272436232762494e-05, "loss": 0.0702, "step": 23117 }, { "epoch": 0.5094118230345899, "grad_norm": 0.6448120474815369, "learning_rate": 1.5271365873435678e-05, "loss": 0.0654, "step": 23118 }, { "epoch": 0.509433858324106, "grad_norm": 0.6666316986083984, "learning_rate": 1.5270295512726653e-05, "loss": 0.071, "step": 23119 }, { "epoch": 0.5094558936136222, "grad_norm": 0.5048478841781616, "learning_rate": 1.5269225150640864e-05, "loss": 0.0501, "step": 23120 }, { "epoch": 0.5094779289031384, "grad_norm": 0.8650287985801697, "learning_rate": 1.5268154787183766e-05, "loss": 0.0701, "step": 23121 }, { "epoch": 0.5094999641926545, "grad_norm": 0.7991403341293335, "learning_rate": 1.526708442236081e-05, "loss": 0.1062, "step": 23122 }, { "epoch": 0.5095219994821707, "grad_norm": 0.62488853931427, "learning_rate": 1.5266014056177452e-05, "loss": 0.0727, "step": 23123 }, { "epoch": 0.5095440347716869, "grad_norm": 1.0037463903427124, "learning_rate": 1.5264943688639143e-05, "loss": 0.0841, "step": 23124 }, { "epoch": 0.509566070061203, "grad_norm": 0.9086629152297974, "learning_rate": 1.5263873319751333e-05, "loss": 0.0911, "step": 23125 }, { "epoch": 0.5095881053507192, "grad_norm": 0.47062772512435913, "learning_rate": 1.526280294951948e-05, "loss": 0.0601, "step": 23126 }, { "epoch": 0.5096101406402354, "grad_norm": 0.7985476851463318, "learning_rate": 1.5261732577949026e-05, "loss": 0.1146, "step": 23127 }, { "epoch": 0.5096321759297515, "grad_norm": 0.3356001675128937, "learning_rate": 1.5260662205045423e-05, "loss": 0.0734, "step": 23128 }, { "epoch": 0.5096542112192677, "grad_norm": 0.554113507270813, "learning_rate": 1.5259591830814134e-05, "loss": 0.0817, "step": 23129 }, { "epoch": 0.5096762465087838, "grad_norm": 0.8111319541931152, "learning_rate": 1.5258521455260604e-05, "loss": 0.0542, "step": 23130 }, { "epoch": 0.5096982817983, "grad_norm": 1.1592196226119995, "learning_rate": 1.5257451078390282e-05, "loss": 0.0774, "step": 23131 }, { "epoch": 0.5097203170878162, "grad_norm": 0.4380519390106201, "learning_rate": 1.5256380700208626e-05, "loss": 0.0662, "step": 23132 }, { "epoch": 0.5097423523773323, "grad_norm": 0.9512485861778259, "learning_rate": 1.5255310320721086e-05, "loss": 0.0905, "step": 23133 }, { "epoch": 0.5097643876668485, "grad_norm": 0.585034191608429, "learning_rate": 1.5254239939933114e-05, "loss": 0.0577, "step": 23134 }, { "epoch": 0.5097864229563647, "grad_norm": 0.6188586950302124, "learning_rate": 1.5253169557850163e-05, "loss": 0.0869, "step": 23135 }, { "epoch": 0.5098084582458807, "grad_norm": 0.6189014315605164, "learning_rate": 1.5252099174477679e-05, "loss": 0.1113, "step": 23136 }, { "epoch": 0.5098304935353969, "grad_norm": 0.9953896999359131, "learning_rate": 1.5251028789821123e-05, "loss": 0.0834, "step": 23137 }, { "epoch": 0.509852528824913, "grad_norm": 0.6264012455940247, "learning_rate": 1.524995840388594e-05, "loss": 0.0752, "step": 23138 }, { "epoch": 0.5098745641144292, "grad_norm": 0.7587311863899231, "learning_rate": 1.5248888016677582e-05, "loss": 0.0547, "step": 23139 }, { "epoch": 0.5098965994039454, "grad_norm": 0.6233490705490112, "learning_rate": 1.524781762820151e-05, "loss": 0.0741, "step": 23140 }, { "epoch": 0.5099186346934615, "grad_norm": 0.5447966456413269, "learning_rate": 1.524674723846317e-05, "loss": 0.0902, "step": 23141 }, { "epoch": 0.5099406699829777, "grad_norm": 0.6419314742088318, "learning_rate": 1.524567684746801e-05, "loss": 0.0886, "step": 23142 }, { "epoch": 0.5099627052724939, "grad_norm": 0.728931188583374, "learning_rate": 1.5244606455221485e-05, "loss": 0.1176, "step": 23143 }, { "epoch": 0.50998474056201, "grad_norm": 0.6571764945983887, "learning_rate": 1.5243536061729053e-05, "loss": 0.0676, "step": 23144 }, { "epoch": 0.5100067758515262, "grad_norm": 0.9610382318496704, "learning_rate": 1.5242465666996156e-05, "loss": 0.0975, "step": 23145 }, { "epoch": 0.5100288111410424, "grad_norm": 0.4233153164386749, "learning_rate": 1.5241395271028255e-05, "loss": 0.071, "step": 23146 }, { "epoch": 0.5100508464305585, "grad_norm": 0.6338602304458618, "learning_rate": 1.5240324873830797e-05, "loss": 0.0822, "step": 23147 }, { "epoch": 0.5100728817200747, "grad_norm": 0.7434011697769165, "learning_rate": 1.5239254475409238e-05, "loss": 0.0786, "step": 23148 }, { "epoch": 0.5100949170095909, "grad_norm": 0.5144989490509033, "learning_rate": 1.5238184075769029e-05, "loss": 0.0632, "step": 23149 }, { "epoch": 0.510116952299107, "grad_norm": 0.5728550553321838, "learning_rate": 1.5237113674915618e-05, "loss": 0.0839, "step": 23150 }, { "epoch": 0.5101389875886232, "grad_norm": 0.7711213827133179, "learning_rate": 1.5236043272854458e-05, "loss": 0.0878, "step": 23151 }, { "epoch": 0.5101610228781394, "grad_norm": 0.46925896406173706, "learning_rate": 1.5234972869591006e-05, "loss": 0.0982, "step": 23152 }, { "epoch": 0.5101830581676555, "grad_norm": 0.4619925022125244, "learning_rate": 1.5233902465130706e-05, "loss": 0.0812, "step": 23153 }, { "epoch": 0.5102050934571717, "grad_norm": 0.9868060946464539, "learning_rate": 1.5232832059479021e-05, "loss": 0.1163, "step": 23154 }, { "epoch": 0.5102271287466879, "grad_norm": 0.49424314498901367, "learning_rate": 1.5231761652641392e-05, "loss": 0.0579, "step": 23155 }, { "epoch": 0.510249164036204, "grad_norm": 0.5305029153823853, "learning_rate": 1.5230691244623283e-05, "loss": 0.0641, "step": 23156 }, { "epoch": 0.5102711993257202, "grad_norm": 0.4904043972492218, "learning_rate": 1.5229620835430133e-05, "loss": 0.0629, "step": 23157 }, { "epoch": 0.5102932346152363, "grad_norm": 0.607814610004425, "learning_rate": 1.5228550425067406e-05, "loss": 0.0659, "step": 23158 }, { "epoch": 0.5103152699047525, "grad_norm": 0.6488043665885925, "learning_rate": 1.5227480013540552e-05, "loss": 0.0822, "step": 23159 }, { "epoch": 0.5103373051942687, "grad_norm": 0.7109388709068298, "learning_rate": 1.5226409600855012e-05, "loss": 0.109, "step": 23160 }, { "epoch": 0.5103593404837847, "grad_norm": 0.9065991044044495, "learning_rate": 1.5225339187016253e-05, "loss": 0.0769, "step": 23161 }, { "epoch": 0.5103813757733009, "grad_norm": 0.39144808053970337, "learning_rate": 1.5224268772029718e-05, "loss": 0.0657, "step": 23162 }, { "epoch": 0.5104034110628171, "grad_norm": 0.5484939217567444, "learning_rate": 1.5223198355900864e-05, "loss": 0.055, "step": 23163 }, { "epoch": 0.5104254463523332, "grad_norm": 0.6175647377967834, "learning_rate": 1.5222127938635136e-05, "loss": 0.0633, "step": 23164 }, { "epoch": 0.5104474816418494, "grad_norm": 0.5351701378822327, "learning_rate": 1.5221057520237998e-05, "loss": 0.0874, "step": 23165 }, { "epoch": 0.5104695169313656, "grad_norm": 0.6449423432350159, "learning_rate": 1.5219987100714893e-05, "loss": 0.0869, "step": 23166 }, { "epoch": 0.5104915522208817, "grad_norm": 0.6926020979881287, "learning_rate": 1.5218916680071276e-05, "loss": 0.0766, "step": 23167 }, { "epoch": 0.5105135875103979, "grad_norm": 0.4568384885787964, "learning_rate": 1.5217846258312595e-05, "loss": 0.0734, "step": 23168 }, { "epoch": 0.510535622799914, "grad_norm": 0.6463109254837036, "learning_rate": 1.5216775835444311e-05, "loss": 0.0754, "step": 23169 }, { "epoch": 0.5105576580894302, "grad_norm": 0.6263461112976074, "learning_rate": 1.5215705411471868e-05, "loss": 0.111, "step": 23170 }, { "epoch": 0.5105796933789464, "grad_norm": 0.7527421116828918, "learning_rate": 1.5214634986400724e-05, "loss": 0.0824, "step": 23171 }, { "epoch": 0.5106017286684625, "grad_norm": 0.7798570990562439, "learning_rate": 1.5213564560236327e-05, "loss": 0.0868, "step": 23172 }, { "epoch": 0.5106237639579787, "grad_norm": 0.5780429840087891, "learning_rate": 1.5212494132984134e-05, "loss": 0.0637, "step": 23173 }, { "epoch": 0.5106457992474949, "grad_norm": 0.5508707165718079, "learning_rate": 1.5211423704649593e-05, "loss": 0.0517, "step": 23174 }, { "epoch": 0.510667834537011, "grad_norm": 0.6012007594108582, "learning_rate": 1.521035327523816e-05, "loss": 0.1105, "step": 23175 }, { "epoch": 0.5106898698265272, "grad_norm": 0.7676886320114136, "learning_rate": 1.5209282844755282e-05, "loss": 0.0983, "step": 23176 }, { "epoch": 0.5107119051160434, "grad_norm": 0.6087155342102051, "learning_rate": 1.5208212413206415e-05, "loss": 0.0743, "step": 23177 }, { "epoch": 0.5107339404055595, "grad_norm": 0.6420783996582031, "learning_rate": 1.5207141980597006e-05, "loss": 0.08, "step": 23178 }, { "epoch": 0.5107559756950757, "grad_norm": 0.7470993399620056, "learning_rate": 1.5206071546932519e-05, "loss": 0.0951, "step": 23179 }, { "epoch": 0.5107780109845919, "grad_norm": 0.9048964977264404, "learning_rate": 1.5205001112218395e-05, "loss": 0.0522, "step": 23180 }, { "epoch": 0.510800046274108, "grad_norm": 0.7575063109397888, "learning_rate": 1.5203930676460092e-05, "loss": 0.124, "step": 23181 }, { "epoch": 0.5108220815636242, "grad_norm": 0.6549547910690308, "learning_rate": 1.5202860239663062e-05, "loss": 0.1094, "step": 23182 }, { "epoch": 0.5108441168531404, "grad_norm": 0.37343862652778625, "learning_rate": 1.5201789801832752e-05, "loss": 0.0707, "step": 23183 }, { "epoch": 0.5108661521426565, "grad_norm": 0.6480770707130432, "learning_rate": 1.5200719362974623e-05, "loss": 0.075, "step": 23184 }, { "epoch": 0.5108881874321726, "grad_norm": 0.711678147315979, "learning_rate": 1.5199648923094118e-05, "loss": 0.0723, "step": 23185 }, { "epoch": 0.5109102227216887, "grad_norm": 0.7386056184768677, "learning_rate": 1.5198578482196697e-05, "loss": 0.0589, "step": 23186 }, { "epoch": 0.5109322580112049, "grad_norm": 0.23453563451766968, "learning_rate": 1.5197508040287808e-05, "loss": 0.0567, "step": 23187 }, { "epoch": 0.5109542933007211, "grad_norm": 0.946458637714386, "learning_rate": 1.5196437597372907e-05, "loss": 0.0926, "step": 23188 }, { "epoch": 0.5109763285902372, "grad_norm": 0.6169202923774719, "learning_rate": 1.5195367153457442e-05, "loss": 0.0913, "step": 23189 }, { "epoch": 0.5109983638797534, "grad_norm": 0.35387179255485535, "learning_rate": 1.5194296708546867e-05, "loss": 0.1035, "step": 23190 }, { "epoch": 0.5110203991692696, "grad_norm": 0.6923203468322754, "learning_rate": 1.5193226262646635e-05, "loss": 0.0921, "step": 23191 }, { "epoch": 0.5110424344587857, "grad_norm": 0.5821531414985657, "learning_rate": 1.5192155815762199e-05, "loss": 0.0657, "step": 23192 }, { "epoch": 0.5110644697483019, "grad_norm": 0.9045333862304688, "learning_rate": 1.519108536789901e-05, "loss": 0.0816, "step": 23193 }, { "epoch": 0.511086505037818, "grad_norm": 1.1687666177749634, "learning_rate": 1.5190014919062519e-05, "loss": 0.1029, "step": 23194 }, { "epoch": 0.5111085403273342, "grad_norm": 0.656569242477417, "learning_rate": 1.5188944469258179e-05, "loss": 0.0641, "step": 23195 }, { "epoch": 0.5111305756168504, "grad_norm": 1.0708400011062622, "learning_rate": 1.5187874018491447e-05, "loss": 0.071, "step": 23196 }, { "epoch": 0.5111526109063665, "grad_norm": 0.6871470212936401, "learning_rate": 1.5186803566767773e-05, "loss": 0.0746, "step": 23197 }, { "epoch": 0.5111746461958827, "grad_norm": 0.7261080145835876, "learning_rate": 1.5185733114092608e-05, "loss": 0.0616, "step": 23198 }, { "epoch": 0.5111966814853989, "grad_norm": 0.9603574275970459, "learning_rate": 1.51846626604714e-05, "loss": 0.1038, "step": 23199 }, { "epoch": 0.511218716774915, "grad_norm": 0.7339153289794922, "learning_rate": 1.5183592205909611e-05, "loss": 0.0884, "step": 23200 }, { "epoch": 0.5112407520644312, "grad_norm": 0.6856909990310669, "learning_rate": 1.5182521750412685e-05, "loss": 0.0975, "step": 23201 }, { "epoch": 0.5112627873539474, "grad_norm": 0.40536972880363464, "learning_rate": 1.5181451293986079e-05, "loss": 0.0922, "step": 23202 }, { "epoch": 0.5112848226434635, "grad_norm": 0.5125966668128967, "learning_rate": 1.5180380836635242e-05, "loss": 0.047, "step": 23203 }, { "epoch": 0.5113068579329797, "grad_norm": 0.7517149448394775, "learning_rate": 1.5179310378365633e-05, "loss": 0.0921, "step": 23204 }, { "epoch": 0.5113288932224959, "grad_norm": 0.6378623247146606, "learning_rate": 1.5178239919182699e-05, "loss": 0.0857, "step": 23205 }, { "epoch": 0.511350928512012, "grad_norm": 0.5013365745544434, "learning_rate": 1.5177169459091889e-05, "loss": 0.0678, "step": 23206 }, { "epoch": 0.5113729638015282, "grad_norm": 0.4657124876976013, "learning_rate": 1.5176098998098666e-05, "loss": 0.0981, "step": 23207 }, { "epoch": 0.5113949990910444, "grad_norm": 0.5868332386016846, "learning_rate": 1.5175028536208474e-05, "loss": 0.0988, "step": 23208 }, { "epoch": 0.5114170343805605, "grad_norm": 0.756561279296875, "learning_rate": 1.5173958073426768e-05, "loss": 0.1004, "step": 23209 }, { "epoch": 0.5114390696700766, "grad_norm": 0.6129051446914673, "learning_rate": 1.5172887609758998e-05, "loss": 0.1145, "step": 23210 }, { "epoch": 0.5114611049595927, "grad_norm": 0.5179138779640198, "learning_rate": 1.5171817145210622e-05, "loss": 0.0812, "step": 23211 }, { "epoch": 0.5114831402491089, "grad_norm": 0.7412659525871277, "learning_rate": 1.5170746679787086e-05, "loss": 0.0869, "step": 23212 }, { "epoch": 0.5115051755386251, "grad_norm": 0.539834201335907, "learning_rate": 1.5169676213493849e-05, "loss": 0.0921, "step": 23213 }, { "epoch": 0.5115272108281412, "grad_norm": 0.2897133231163025, "learning_rate": 1.5168605746336359e-05, "loss": 0.0687, "step": 23214 }, { "epoch": 0.5115492461176574, "grad_norm": 0.4872075021266937, "learning_rate": 1.516753527832007e-05, "loss": 0.0755, "step": 23215 }, { "epoch": 0.5115712814071736, "grad_norm": 0.4709528684616089, "learning_rate": 1.516646480945043e-05, "loss": 0.1164, "step": 23216 }, { "epoch": 0.5115933166966897, "grad_norm": 0.5347977876663208, "learning_rate": 1.51653943397329e-05, "loss": 0.054, "step": 23217 }, { "epoch": 0.5116153519862059, "grad_norm": 0.5434224605560303, "learning_rate": 1.5164323869172926e-05, "loss": 0.0645, "step": 23218 }, { "epoch": 0.511637387275722, "grad_norm": 0.7145439982414246, "learning_rate": 1.5163253397775965e-05, "loss": 0.0918, "step": 23219 }, { "epoch": 0.5116594225652382, "grad_norm": 0.6220094561576843, "learning_rate": 1.5162182925547461e-05, "loss": 0.0567, "step": 23220 }, { "epoch": 0.5116814578547544, "grad_norm": 0.7672867178916931, "learning_rate": 1.5161112452492877e-05, "loss": 0.0697, "step": 23221 }, { "epoch": 0.5117034931442705, "grad_norm": 0.4969959557056427, "learning_rate": 1.5160041978617662e-05, "loss": 0.0791, "step": 23222 }, { "epoch": 0.5117255284337867, "grad_norm": 0.5737061500549316, "learning_rate": 1.5158971503927269e-05, "loss": 0.0692, "step": 23223 }, { "epoch": 0.5117475637233029, "grad_norm": 0.7756608128547668, "learning_rate": 1.5157901028427143e-05, "loss": 0.0507, "step": 23224 }, { "epoch": 0.511769599012819, "grad_norm": 0.6046514511108398, "learning_rate": 1.5156830552122744e-05, "loss": 0.0533, "step": 23225 }, { "epoch": 0.5117916343023352, "grad_norm": 0.5101085901260376, "learning_rate": 1.515576007501952e-05, "loss": 0.0937, "step": 23226 }, { "epoch": 0.5118136695918514, "grad_norm": 0.7406867146492004, "learning_rate": 1.5154689597122928e-05, "loss": 0.0791, "step": 23227 }, { "epoch": 0.5118357048813675, "grad_norm": 0.8983639478683472, "learning_rate": 1.5153619118438424e-05, "loss": 0.0853, "step": 23228 }, { "epoch": 0.5118577401708837, "grad_norm": 0.8044599294662476, "learning_rate": 1.5152548638971454e-05, "loss": 0.0661, "step": 23229 }, { "epoch": 0.5118797754603999, "grad_norm": 0.7099875211715698, "learning_rate": 1.515147815872747e-05, "loss": 0.0903, "step": 23230 }, { "epoch": 0.511901810749916, "grad_norm": 0.8301781415939331, "learning_rate": 1.5150407677711926e-05, "loss": 0.0725, "step": 23231 }, { "epoch": 0.5119238460394322, "grad_norm": 0.8505340218544006, "learning_rate": 1.5149337195930278e-05, "loss": 0.0901, "step": 23232 }, { "epoch": 0.5119458813289484, "grad_norm": 0.8953942060470581, "learning_rate": 1.514826671338797e-05, "loss": 0.0768, "step": 23233 }, { "epoch": 0.5119679166184645, "grad_norm": 0.6512780785560608, "learning_rate": 1.5147196230090464e-05, "loss": 0.0699, "step": 23234 }, { "epoch": 0.5119899519079806, "grad_norm": 0.6528610587120056, "learning_rate": 1.5146125746043209e-05, "loss": 0.0818, "step": 23235 }, { "epoch": 0.5120119871974967, "grad_norm": 0.6653344631195068, "learning_rate": 1.5145055261251656e-05, "loss": 0.0682, "step": 23236 }, { "epoch": 0.5120340224870129, "grad_norm": 0.7115921378135681, "learning_rate": 1.5143984775721261e-05, "loss": 0.0601, "step": 23237 }, { "epoch": 0.5120560577765291, "grad_norm": 0.5753883719444275, "learning_rate": 1.5142914289457474e-05, "loss": 0.0618, "step": 23238 }, { "epoch": 0.5120780930660452, "grad_norm": 0.8837563991546631, "learning_rate": 1.5141843802465745e-05, "loss": 0.0765, "step": 23239 }, { "epoch": 0.5121001283555614, "grad_norm": 0.5094034075737, "learning_rate": 1.5140773314751533e-05, "loss": 0.0487, "step": 23240 }, { "epoch": 0.5121221636450776, "grad_norm": 0.5531195402145386, "learning_rate": 1.513970282632028e-05, "loss": 0.0924, "step": 23241 }, { "epoch": 0.5121441989345937, "grad_norm": 0.784827709197998, "learning_rate": 1.5138632337177455e-05, "loss": 0.0744, "step": 23242 }, { "epoch": 0.5121662342241099, "grad_norm": 0.5116878747940063, "learning_rate": 1.5137561847328495e-05, "loss": 0.0645, "step": 23243 }, { "epoch": 0.5121882695136261, "grad_norm": 0.6485500931739807, "learning_rate": 1.513649135677886e-05, "loss": 0.0709, "step": 23244 }, { "epoch": 0.5122103048031422, "grad_norm": 0.8757891654968262, "learning_rate": 1.5135420865534002e-05, "loss": 0.084, "step": 23245 }, { "epoch": 0.5122323400926584, "grad_norm": 0.48742228746414185, "learning_rate": 1.5134350373599376e-05, "loss": 0.0675, "step": 23246 }, { "epoch": 0.5122543753821746, "grad_norm": 0.5051912069320679, "learning_rate": 1.5133279880980424e-05, "loss": 0.0675, "step": 23247 }, { "epoch": 0.5122764106716907, "grad_norm": 0.4233904480934143, "learning_rate": 1.5132209387682611e-05, "loss": 0.0572, "step": 23248 }, { "epoch": 0.5122984459612069, "grad_norm": 0.8213474154472351, "learning_rate": 1.5131138893711385e-05, "loss": 0.064, "step": 23249 }, { "epoch": 0.512320481250723, "grad_norm": 0.6160303354263306, "learning_rate": 1.5130068399072196e-05, "loss": 0.0804, "step": 23250 }, { "epoch": 0.5123425165402392, "grad_norm": 1.068737506866455, "learning_rate": 1.51289979037705e-05, "loss": 0.0954, "step": 23251 }, { "epoch": 0.5123645518297554, "grad_norm": 0.7889905571937561, "learning_rate": 1.512792740781175e-05, "loss": 0.0913, "step": 23252 }, { "epoch": 0.5123865871192715, "grad_norm": 0.4510003626346588, "learning_rate": 1.5126856911201397e-05, "loss": 0.0617, "step": 23253 }, { "epoch": 0.5124086224087877, "grad_norm": 0.745071530342102, "learning_rate": 1.5125786413944891e-05, "loss": 0.0676, "step": 23254 }, { "epoch": 0.5124306576983039, "grad_norm": 0.6888232231140137, "learning_rate": 1.5124715916047688e-05, "loss": 0.0678, "step": 23255 }, { "epoch": 0.51245269298782, "grad_norm": 0.5630518198013306, "learning_rate": 1.512364541751524e-05, "loss": 0.0654, "step": 23256 }, { "epoch": 0.5124747282773362, "grad_norm": 0.6659764647483826, "learning_rate": 1.5122574918353002e-05, "loss": 0.0971, "step": 23257 }, { "epoch": 0.5124967635668524, "grad_norm": 0.9457240104675293, "learning_rate": 1.512150441856642e-05, "loss": 0.0639, "step": 23258 }, { "epoch": 0.5125187988563685, "grad_norm": 0.7138251662254333, "learning_rate": 1.5120433918160957e-05, "loss": 0.0772, "step": 23259 }, { "epoch": 0.5125408341458846, "grad_norm": 0.6305885910987854, "learning_rate": 1.5119363417142054e-05, "loss": 0.0781, "step": 23260 }, { "epoch": 0.5125628694354007, "grad_norm": 0.5865901708602905, "learning_rate": 1.5118292915515173e-05, "loss": 0.0813, "step": 23261 }, { "epoch": 0.5125849047249169, "grad_norm": 0.30075499415397644, "learning_rate": 1.5117222413285759e-05, "loss": 0.0454, "step": 23262 }, { "epoch": 0.5126069400144331, "grad_norm": 0.7222412824630737, "learning_rate": 1.5116151910459273e-05, "loss": 0.0992, "step": 23263 }, { "epoch": 0.5126289753039492, "grad_norm": 0.6969188451766968, "learning_rate": 1.5115081407041156e-05, "loss": 0.072, "step": 23264 }, { "epoch": 0.5126510105934654, "grad_norm": 0.7545232772827148, "learning_rate": 1.5114010903036874e-05, "loss": 0.0527, "step": 23265 }, { "epoch": 0.5126730458829816, "grad_norm": 0.39067113399505615, "learning_rate": 1.5112940398451867e-05, "loss": 0.0738, "step": 23266 }, { "epoch": 0.5126950811724977, "grad_norm": 0.38587427139282227, "learning_rate": 1.51118698932916e-05, "loss": 0.0447, "step": 23267 }, { "epoch": 0.5127171164620139, "grad_norm": 0.5733981728553772, "learning_rate": 1.5110799387561517e-05, "loss": 0.0637, "step": 23268 }, { "epoch": 0.5127391517515301, "grad_norm": 0.44363465905189514, "learning_rate": 1.5109728881267075e-05, "loss": 0.0935, "step": 23269 }, { "epoch": 0.5127611870410462, "grad_norm": 0.6539469361305237, "learning_rate": 1.5108658374413725e-05, "loss": 0.083, "step": 23270 }, { "epoch": 0.5127832223305624, "grad_norm": 0.6519931554794312, "learning_rate": 1.5107587867006923e-05, "loss": 0.0899, "step": 23271 }, { "epoch": 0.5128052576200786, "grad_norm": 0.7070756554603577, "learning_rate": 1.5106517359052111e-05, "loss": 0.0861, "step": 23272 }, { "epoch": 0.5128272929095947, "grad_norm": 0.8437256813049316, "learning_rate": 1.5105446850554751e-05, "loss": 0.0872, "step": 23273 }, { "epoch": 0.5128493281991109, "grad_norm": 0.5548228621482849, "learning_rate": 1.5104376341520293e-05, "loss": 0.0571, "step": 23274 }, { "epoch": 0.512871363488627, "grad_norm": 0.6138449311256409, "learning_rate": 1.510330583195419e-05, "loss": 0.0662, "step": 23275 }, { "epoch": 0.5128933987781432, "grad_norm": 0.6281082034111023, "learning_rate": 1.51022353218619e-05, "loss": 0.0991, "step": 23276 }, { "epoch": 0.5129154340676594, "grad_norm": 0.7024047374725342, "learning_rate": 1.5101164811248868e-05, "loss": 0.101, "step": 23277 }, { "epoch": 0.5129374693571755, "grad_norm": 0.5007731914520264, "learning_rate": 1.5100094300120551e-05, "loss": 0.0694, "step": 23278 }, { "epoch": 0.5129595046466917, "grad_norm": 0.6523112654685974, "learning_rate": 1.5099023788482396e-05, "loss": 0.0647, "step": 23279 }, { "epoch": 0.5129815399362079, "grad_norm": 0.603629469871521, "learning_rate": 1.5097953276339862e-05, "loss": 0.0778, "step": 23280 }, { "epoch": 0.513003575225724, "grad_norm": 0.748335599899292, "learning_rate": 1.5096882763698397e-05, "loss": 0.0824, "step": 23281 }, { "epoch": 0.5130256105152402, "grad_norm": 0.7804545760154724, "learning_rate": 1.5095812250563462e-05, "loss": 0.0937, "step": 23282 }, { "epoch": 0.5130476458047564, "grad_norm": 0.5205997228622437, "learning_rate": 1.5094741736940499e-05, "loss": 0.0803, "step": 23283 }, { "epoch": 0.5130696810942724, "grad_norm": 0.5597139596939087, "learning_rate": 1.5093671222834968e-05, "loss": 0.0744, "step": 23284 }, { "epoch": 0.5130917163837886, "grad_norm": 0.8173606991767883, "learning_rate": 1.5092600708252318e-05, "loss": 0.0778, "step": 23285 }, { "epoch": 0.5131137516733048, "grad_norm": 0.6828446388244629, "learning_rate": 1.5091530193198002e-05, "loss": 0.1146, "step": 23286 }, { "epoch": 0.5131357869628209, "grad_norm": 0.7146026492118835, "learning_rate": 1.5090459677677473e-05, "loss": 0.0926, "step": 23287 }, { "epoch": 0.5131578222523371, "grad_norm": 0.6489685773849487, "learning_rate": 1.5089389161696189e-05, "loss": 0.0644, "step": 23288 }, { "epoch": 0.5131798575418532, "grad_norm": 0.7374799847602844, "learning_rate": 1.5088318645259593e-05, "loss": 0.086, "step": 23289 }, { "epoch": 0.5132018928313694, "grad_norm": 0.6275569200515747, "learning_rate": 1.5087248128373145e-05, "loss": 0.0703, "step": 23290 }, { "epoch": 0.5132239281208856, "grad_norm": 0.6144689917564392, "learning_rate": 1.5086177611042294e-05, "loss": 0.0582, "step": 23291 }, { "epoch": 0.5132459634104017, "grad_norm": 0.6026073098182678, "learning_rate": 1.5085107093272498e-05, "loss": 0.0578, "step": 23292 }, { "epoch": 0.5132679986999179, "grad_norm": 0.644340455532074, "learning_rate": 1.5084036575069204e-05, "loss": 0.1014, "step": 23293 }, { "epoch": 0.5132900339894341, "grad_norm": 0.28226354718208313, "learning_rate": 1.5082966056437864e-05, "loss": 0.0563, "step": 23294 }, { "epoch": 0.5133120692789502, "grad_norm": 0.8299998044967651, "learning_rate": 1.5081895537383938e-05, "loss": 0.0763, "step": 23295 }, { "epoch": 0.5133341045684664, "grad_norm": 0.6453337073326111, "learning_rate": 1.5080825017912868e-05, "loss": 0.0619, "step": 23296 }, { "epoch": 0.5133561398579826, "grad_norm": 0.5157550573348999, "learning_rate": 1.5079754498030118e-05, "loss": 0.0987, "step": 23297 }, { "epoch": 0.5133781751474987, "grad_norm": 0.5656881332397461, "learning_rate": 1.507868397774113e-05, "loss": 0.08, "step": 23298 }, { "epoch": 0.5134002104370149, "grad_norm": 1.08943772315979, "learning_rate": 1.5077613457051369e-05, "loss": 0.0942, "step": 23299 }, { "epoch": 0.5134222457265311, "grad_norm": 0.48134851455688477, "learning_rate": 1.5076542935966277e-05, "loss": 0.0803, "step": 23300 }, { "epoch": 0.5134442810160472, "grad_norm": 0.7187719941139221, "learning_rate": 1.5075472414491316e-05, "loss": 0.0861, "step": 23301 }, { "epoch": 0.5134663163055634, "grad_norm": 0.9351230263710022, "learning_rate": 1.5074401892631926e-05, "loss": 0.1066, "step": 23302 }, { "epoch": 0.5134883515950796, "grad_norm": 0.7060516476631165, "learning_rate": 1.5073331370393574e-05, "loss": 0.1013, "step": 23303 }, { "epoch": 0.5135103868845957, "grad_norm": 0.3348674476146698, "learning_rate": 1.5072260847781701e-05, "loss": 0.0794, "step": 23304 }, { "epoch": 0.5135324221741119, "grad_norm": 0.9161953926086426, "learning_rate": 1.5071190324801768e-05, "loss": 0.1083, "step": 23305 }, { "epoch": 0.513554457463628, "grad_norm": 1.0471419095993042, "learning_rate": 1.5070119801459219e-05, "loss": 0.1347, "step": 23306 }, { "epoch": 0.5135764927531442, "grad_norm": 0.6262153387069702, "learning_rate": 1.5069049277759518e-05, "loss": 0.0536, "step": 23307 }, { "epoch": 0.5135985280426604, "grad_norm": 0.5297232866287231, "learning_rate": 1.5067978753708114e-05, "loss": 0.0634, "step": 23308 }, { "epoch": 0.5136205633321764, "grad_norm": 0.5584396123886108, "learning_rate": 1.5066908229310455e-05, "loss": 0.072, "step": 23309 }, { "epoch": 0.5136425986216926, "grad_norm": 0.6416253447532654, "learning_rate": 1.5065837704571992e-05, "loss": 0.0834, "step": 23310 }, { "epoch": 0.5136646339112088, "grad_norm": 0.3091144263744354, "learning_rate": 1.5064767179498189e-05, "loss": 0.0532, "step": 23311 }, { "epoch": 0.5136866692007249, "grad_norm": 0.8251736164093018, "learning_rate": 1.5063696654094485e-05, "loss": 0.1075, "step": 23312 }, { "epoch": 0.5137087044902411, "grad_norm": 0.6716774702072144, "learning_rate": 1.5062626128366345e-05, "loss": 0.0847, "step": 23313 }, { "epoch": 0.5137307397797573, "grad_norm": 0.5825867652893066, "learning_rate": 1.5061555602319215e-05, "loss": 0.0696, "step": 23314 }, { "epoch": 0.5137527750692734, "grad_norm": 0.5125057101249695, "learning_rate": 1.5060485075958552e-05, "loss": 0.0801, "step": 23315 }, { "epoch": 0.5137748103587896, "grad_norm": 0.5026567578315735, "learning_rate": 1.5059414549289802e-05, "loss": 0.0588, "step": 23316 }, { "epoch": 0.5137968456483057, "grad_norm": 0.4683312475681305, "learning_rate": 1.5058344022318424e-05, "loss": 0.0717, "step": 23317 }, { "epoch": 0.5138188809378219, "grad_norm": 0.6751630306243896, "learning_rate": 1.5057273495049868e-05, "loss": 0.0919, "step": 23318 }, { "epoch": 0.5138409162273381, "grad_norm": 0.8131273984909058, "learning_rate": 1.5056202967489586e-05, "loss": 0.0699, "step": 23319 }, { "epoch": 0.5138629515168542, "grad_norm": 0.7869321703910828, "learning_rate": 1.5055132439643034e-05, "loss": 0.091, "step": 23320 }, { "epoch": 0.5138849868063704, "grad_norm": 0.5037937760353088, "learning_rate": 1.5054061911515662e-05, "loss": 0.0884, "step": 23321 }, { "epoch": 0.5139070220958866, "grad_norm": 0.7902286052703857, "learning_rate": 1.5052991383112924e-05, "loss": 0.0964, "step": 23322 }, { "epoch": 0.5139290573854027, "grad_norm": 0.622503936290741, "learning_rate": 1.5051920854440273e-05, "loss": 0.057, "step": 23323 }, { "epoch": 0.5139510926749189, "grad_norm": 0.5592162609100342, "learning_rate": 1.505085032550316e-05, "loss": 0.0731, "step": 23324 }, { "epoch": 0.5139731279644351, "grad_norm": 0.7897180914878845, "learning_rate": 1.5049779796307041e-05, "loss": 0.0719, "step": 23325 }, { "epoch": 0.5139951632539512, "grad_norm": 0.9027296900749207, "learning_rate": 1.5048709266857368e-05, "loss": 0.1003, "step": 23326 }, { "epoch": 0.5140171985434674, "grad_norm": 0.490614116191864, "learning_rate": 1.504763873715959e-05, "loss": 0.067, "step": 23327 }, { "epoch": 0.5140392338329836, "grad_norm": 0.5771010518074036, "learning_rate": 1.5046568207219162e-05, "loss": 0.0612, "step": 23328 }, { "epoch": 0.5140612691224997, "grad_norm": 0.6835867762565613, "learning_rate": 1.5045497677041536e-05, "loss": 0.0784, "step": 23329 }, { "epoch": 0.5140833044120159, "grad_norm": 0.5156660079956055, "learning_rate": 1.504442714663217e-05, "loss": 0.0696, "step": 23330 }, { "epoch": 0.514105339701532, "grad_norm": 0.8660701513290405, "learning_rate": 1.5043356615996509e-05, "loss": 0.1032, "step": 23331 }, { "epoch": 0.5141273749910482, "grad_norm": 0.622747004032135, "learning_rate": 1.504228608514001e-05, "loss": 0.075, "step": 23332 }, { "epoch": 0.5141494102805644, "grad_norm": 0.67693692445755, "learning_rate": 1.504121555406813e-05, "loss": 0.0704, "step": 23333 }, { "epoch": 0.5141714455700804, "grad_norm": 0.583005428314209, "learning_rate": 1.5040145022786313e-05, "loss": 0.0764, "step": 23334 }, { "epoch": 0.5141934808595966, "grad_norm": 0.7254242300987244, "learning_rate": 1.5039074491300014e-05, "loss": 0.068, "step": 23335 }, { "epoch": 0.5142155161491128, "grad_norm": 0.4251459836959839, "learning_rate": 1.5038003959614692e-05, "loss": 0.0547, "step": 23336 }, { "epoch": 0.5142375514386289, "grad_norm": 0.6030527353286743, "learning_rate": 1.5036933427735792e-05, "loss": 0.0845, "step": 23337 }, { "epoch": 0.5142595867281451, "grad_norm": 0.5082382559776306, "learning_rate": 1.5035862895668772e-05, "loss": 0.0713, "step": 23338 }, { "epoch": 0.5142816220176613, "grad_norm": 0.6724446415901184, "learning_rate": 1.5034792363419081e-05, "loss": 0.0648, "step": 23339 }, { "epoch": 0.5143036573071774, "grad_norm": 0.40982431173324585, "learning_rate": 1.5033721830992178e-05, "loss": 0.0745, "step": 23340 }, { "epoch": 0.5143256925966936, "grad_norm": 0.6153289079666138, "learning_rate": 1.503265129839351e-05, "loss": 0.0755, "step": 23341 }, { "epoch": 0.5143477278862097, "grad_norm": 0.7962473630905151, "learning_rate": 1.5031580765628528e-05, "loss": 0.0757, "step": 23342 }, { "epoch": 0.5143697631757259, "grad_norm": 0.6741437911987305, "learning_rate": 1.5030510232702692e-05, "loss": 0.0899, "step": 23343 }, { "epoch": 0.5143917984652421, "grad_norm": 1.0398675203323364, "learning_rate": 1.5029439699621447e-05, "loss": 0.1061, "step": 23344 }, { "epoch": 0.5144138337547582, "grad_norm": 0.7365657091140747, "learning_rate": 1.5028369166390254e-05, "loss": 0.0628, "step": 23345 }, { "epoch": 0.5144358690442744, "grad_norm": 1.026435375213623, "learning_rate": 1.5027298633014558e-05, "loss": 0.0962, "step": 23346 }, { "epoch": 0.5144579043337906, "grad_norm": 0.7117837071418762, "learning_rate": 1.5026228099499819e-05, "loss": 0.0782, "step": 23347 }, { "epoch": 0.5144799396233067, "grad_norm": 0.5780321359634399, "learning_rate": 1.5025157565851487e-05, "loss": 0.0607, "step": 23348 }, { "epoch": 0.5145019749128229, "grad_norm": 0.712397038936615, "learning_rate": 1.5024087032075015e-05, "loss": 0.0737, "step": 23349 }, { "epoch": 0.5145240102023391, "grad_norm": 0.5791054368019104, "learning_rate": 1.502301649817585e-05, "loss": 0.0769, "step": 23350 }, { "epoch": 0.5145460454918552, "grad_norm": 1.041545033454895, "learning_rate": 1.5021945964159453e-05, "loss": 0.0858, "step": 23351 }, { "epoch": 0.5145680807813714, "grad_norm": 0.7900774478912354, "learning_rate": 1.5020875430031267e-05, "loss": 0.0847, "step": 23352 }, { "epoch": 0.5145901160708876, "grad_norm": 1.0018322467803955, "learning_rate": 1.5019804895796758e-05, "loss": 0.1048, "step": 23353 }, { "epoch": 0.5146121513604037, "grad_norm": 0.6769972443580627, "learning_rate": 1.501873436146137e-05, "loss": 0.0569, "step": 23354 }, { "epoch": 0.5146341866499199, "grad_norm": 0.7049272656440735, "learning_rate": 1.5017663827030556e-05, "loss": 0.0926, "step": 23355 }, { "epoch": 0.514656221939436, "grad_norm": 0.6435530781745911, "learning_rate": 1.5016593292509776e-05, "loss": 0.0574, "step": 23356 }, { "epoch": 0.5146782572289522, "grad_norm": 0.4388877749443054, "learning_rate": 1.5015522757904477e-05, "loss": 0.0789, "step": 23357 }, { "epoch": 0.5147002925184683, "grad_norm": 0.6019571423530579, "learning_rate": 1.5014452223220106e-05, "loss": 0.0737, "step": 23358 }, { "epoch": 0.5147223278079844, "grad_norm": 0.5654922723770142, "learning_rate": 1.5013381688462126e-05, "loss": 0.0722, "step": 23359 }, { "epoch": 0.5147443630975006, "grad_norm": 0.6350138783454895, "learning_rate": 1.5012311153635983e-05, "loss": 0.0633, "step": 23360 }, { "epoch": 0.5147663983870168, "grad_norm": 1.138871669769287, "learning_rate": 1.5011240618747133e-05, "loss": 0.0775, "step": 23361 }, { "epoch": 0.5147884336765329, "grad_norm": 0.5904474258422852, "learning_rate": 1.5010170083801032e-05, "loss": 0.0697, "step": 23362 }, { "epoch": 0.5148104689660491, "grad_norm": 0.6468558311462402, "learning_rate": 1.500909954880313e-05, "loss": 0.0889, "step": 23363 }, { "epoch": 0.5148325042555653, "grad_norm": 0.667289674282074, "learning_rate": 1.5008029013758878e-05, "loss": 0.0811, "step": 23364 }, { "epoch": 0.5148545395450814, "grad_norm": 0.5514311194419861, "learning_rate": 1.5006958478673725e-05, "loss": 0.0745, "step": 23365 }, { "epoch": 0.5148765748345976, "grad_norm": 0.4081525206565857, "learning_rate": 1.5005887943553135e-05, "loss": 0.0731, "step": 23366 }, { "epoch": 0.5148986101241138, "grad_norm": 0.7149788737297058, "learning_rate": 1.5004817408402552e-05, "loss": 0.0688, "step": 23367 }, { "epoch": 0.5149206454136299, "grad_norm": 0.9061553478240967, "learning_rate": 1.500374687322743e-05, "loss": 0.0627, "step": 23368 }, { "epoch": 0.5149426807031461, "grad_norm": 0.6505672931671143, "learning_rate": 1.5002676338033225e-05, "loss": 0.0838, "step": 23369 }, { "epoch": 0.5149647159926622, "grad_norm": 0.45341557264328003, "learning_rate": 1.5001605802825387e-05, "loss": 0.0835, "step": 23370 }, { "epoch": 0.5149867512821784, "grad_norm": 0.5372177362442017, "learning_rate": 1.500053526760937e-05, "loss": 0.08, "step": 23371 }, { "epoch": 0.5150087865716946, "grad_norm": 0.7244195342063904, "learning_rate": 1.4999464732390632e-05, "loss": 0.0922, "step": 23372 }, { "epoch": 0.5150308218612107, "grad_norm": 0.4202951490879059, "learning_rate": 1.4998394197174617e-05, "loss": 0.0681, "step": 23373 }, { "epoch": 0.5150528571507269, "grad_norm": 0.6528840661048889, "learning_rate": 1.4997323661966774e-05, "loss": 0.087, "step": 23374 }, { "epoch": 0.5150748924402431, "grad_norm": 0.5715413093566895, "learning_rate": 1.499625312677257e-05, "loss": 0.1043, "step": 23375 }, { "epoch": 0.5150969277297592, "grad_norm": 0.6517866253852844, "learning_rate": 1.499518259159745e-05, "loss": 0.0581, "step": 23376 }, { "epoch": 0.5151189630192754, "grad_norm": 0.5527974367141724, "learning_rate": 1.4994112056446869e-05, "loss": 0.081, "step": 23377 }, { "epoch": 0.5151409983087916, "grad_norm": 0.46695563197135925, "learning_rate": 1.4993041521326272e-05, "loss": 0.0673, "step": 23378 }, { "epoch": 0.5151630335983077, "grad_norm": 0.5312846899032593, "learning_rate": 1.4991970986241124e-05, "loss": 0.0784, "step": 23379 }, { "epoch": 0.5151850688878239, "grad_norm": 0.625402569770813, "learning_rate": 1.4990900451196872e-05, "loss": 0.0741, "step": 23380 }, { "epoch": 0.5152071041773401, "grad_norm": 0.6845158934593201, "learning_rate": 1.498982991619897e-05, "loss": 0.0953, "step": 23381 }, { "epoch": 0.5152291394668562, "grad_norm": 0.5887941718101501, "learning_rate": 1.4988759381252864e-05, "loss": 0.0597, "step": 23382 }, { "epoch": 0.5152511747563723, "grad_norm": 0.8413463830947876, "learning_rate": 1.4987688846364018e-05, "loss": 0.0756, "step": 23383 }, { "epoch": 0.5152732100458884, "grad_norm": 0.5551754236221313, "learning_rate": 1.4986618311537878e-05, "loss": 0.0983, "step": 23384 }, { "epoch": 0.5152952453354046, "grad_norm": 0.8281273245811462, "learning_rate": 1.4985547776779893e-05, "loss": 0.066, "step": 23385 }, { "epoch": 0.5153172806249208, "grad_norm": 0.8392823338508606, "learning_rate": 1.4984477242095526e-05, "loss": 0.0635, "step": 23386 }, { "epoch": 0.5153393159144369, "grad_norm": 0.6954783797264099, "learning_rate": 1.4983406707490227e-05, "loss": 0.059, "step": 23387 }, { "epoch": 0.5153613512039531, "grad_norm": 0.998786211013794, "learning_rate": 1.4982336172969444e-05, "loss": 0.1176, "step": 23388 }, { "epoch": 0.5153833864934693, "grad_norm": 0.5051615834236145, "learning_rate": 1.498126563853863e-05, "loss": 0.0916, "step": 23389 }, { "epoch": 0.5154054217829854, "grad_norm": 0.49687474966049194, "learning_rate": 1.4980195104203244e-05, "loss": 0.0892, "step": 23390 }, { "epoch": 0.5154274570725016, "grad_norm": 0.6059668064117432, "learning_rate": 1.4979124569968734e-05, "loss": 0.0998, "step": 23391 }, { "epoch": 0.5154494923620178, "grad_norm": 0.7018516063690186, "learning_rate": 1.4978054035840555e-05, "loss": 0.0699, "step": 23392 }, { "epoch": 0.5154715276515339, "grad_norm": 0.7097777724266052, "learning_rate": 1.497698350182415e-05, "loss": 0.09, "step": 23393 }, { "epoch": 0.5154935629410501, "grad_norm": 0.5793947577476501, "learning_rate": 1.4975912967924987e-05, "loss": 0.0741, "step": 23394 }, { "epoch": 0.5155155982305663, "grad_norm": 0.8881354331970215, "learning_rate": 1.4974842434148515e-05, "loss": 0.0771, "step": 23395 }, { "epoch": 0.5155376335200824, "grad_norm": 0.5450683236122131, "learning_rate": 1.4973771900500182e-05, "loss": 0.0811, "step": 23396 }, { "epoch": 0.5155596688095986, "grad_norm": 0.8620381951332092, "learning_rate": 1.497270136698544e-05, "loss": 0.0732, "step": 23397 }, { "epoch": 0.5155817040991147, "grad_norm": 0.8056970834732056, "learning_rate": 1.4971630833609748e-05, "loss": 0.0629, "step": 23398 }, { "epoch": 0.5156037393886309, "grad_norm": 0.6885209679603577, "learning_rate": 1.4970560300378553e-05, "loss": 0.0604, "step": 23399 }, { "epoch": 0.5156257746781471, "grad_norm": 0.5646056532859802, "learning_rate": 1.4969489767297314e-05, "loss": 0.1203, "step": 23400 }, { "epoch": 0.5156478099676632, "grad_norm": 0.5393333435058594, "learning_rate": 1.4968419234371471e-05, "loss": 0.085, "step": 23401 }, { "epoch": 0.5156698452571794, "grad_norm": 0.5590370297431946, "learning_rate": 1.4967348701606494e-05, "loss": 0.0964, "step": 23402 }, { "epoch": 0.5156918805466956, "grad_norm": 0.8302238583564758, "learning_rate": 1.4966278169007826e-05, "loss": 0.1122, "step": 23403 }, { "epoch": 0.5157139158362117, "grad_norm": 0.44500696659088135, "learning_rate": 1.4965207636580921e-05, "loss": 0.0924, "step": 23404 }, { "epoch": 0.5157359511257279, "grad_norm": 0.6071099042892456, "learning_rate": 1.496413710433123e-05, "loss": 0.114, "step": 23405 }, { "epoch": 0.5157579864152441, "grad_norm": 1.0606886148452759, "learning_rate": 1.496306657226421e-05, "loss": 0.1013, "step": 23406 }, { "epoch": 0.5157800217047602, "grad_norm": 0.6360095143318176, "learning_rate": 1.4961996040385314e-05, "loss": 0.0596, "step": 23407 }, { "epoch": 0.5158020569942763, "grad_norm": 0.529163658618927, "learning_rate": 1.4960925508699985e-05, "loss": 0.0544, "step": 23408 }, { "epoch": 0.5158240922837924, "grad_norm": 0.6502758264541626, "learning_rate": 1.4959854977213688e-05, "loss": 0.0755, "step": 23409 }, { "epoch": 0.5158461275733086, "grad_norm": 0.5496346950531006, "learning_rate": 1.4958784445931874e-05, "loss": 0.0572, "step": 23410 }, { "epoch": 0.5158681628628248, "grad_norm": 0.49389126896858215, "learning_rate": 1.4957713914859991e-05, "loss": 0.0578, "step": 23411 }, { "epoch": 0.5158901981523409, "grad_norm": 0.8988507986068726, "learning_rate": 1.495664338400349e-05, "loss": 0.0754, "step": 23412 }, { "epoch": 0.5159122334418571, "grad_norm": 0.5414434671401978, "learning_rate": 1.4955572853367833e-05, "loss": 0.0546, "step": 23413 }, { "epoch": 0.5159342687313733, "grad_norm": 0.7635588049888611, "learning_rate": 1.4954502322958466e-05, "loss": 0.096, "step": 23414 }, { "epoch": 0.5159563040208894, "grad_norm": 1.1459922790527344, "learning_rate": 1.4953431792780842e-05, "loss": 0.0474, "step": 23415 }, { "epoch": 0.5159783393104056, "grad_norm": 0.6119840741157532, "learning_rate": 1.4952361262840411e-05, "loss": 0.069, "step": 23416 }, { "epoch": 0.5160003745999218, "grad_norm": 0.8713931441307068, "learning_rate": 1.4951290733142635e-05, "loss": 0.0673, "step": 23417 }, { "epoch": 0.5160224098894379, "grad_norm": 0.5291497707366943, "learning_rate": 1.4950220203692961e-05, "loss": 0.0671, "step": 23418 }, { "epoch": 0.5160444451789541, "grad_norm": 0.7714124321937561, "learning_rate": 1.4949149674496842e-05, "loss": 0.0717, "step": 23419 }, { "epoch": 0.5160664804684703, "grad_norm": 0.5841349363327026, "learning_rate": 1.4948079145559726e-05, "loss": 0.0554, "step": 23420 }, { "epoch": 0.5160885157579864, "grad_norm": 0.654916524887085, "learning_rate": 1.4947008616887078e-05, "loss": 0.0744, "step": 23421 }, { "epoch": 0.5161105510475026, "grad_norm": 0.5447404980659485, "learning_rate": 1.4945938088484342e-05, "loss": 0.0925, "step": 23422 }, { "epoch": 0.5161325863370188, "grad_norm": 0.41735973954200745, "learning_rate": 1.494486756035697e-05, "loss": 0.0591, "step": 23423 }, { "epoch": 0.5161546216265349, "grad_norm": 0.7667180895805359, "learning_rate": 1.4943797032510415e-05, "loss": 0.0968, "step": 23424 }, { "epoch": 0.5161766569160511, "grad_norm": 0.6933169960975647, "learning_rate": 1.4942726504950134e-05, "loss": 0.0878, "step": 23425 }, { "epoch": 0.5161986922055672, "grad_norm": 0.8306069374084473, "learning_rate": 1.4941655977681579e-05, "loss": 0.1035, "step": 23426 }, { "epoch": 0.5162207274950834, "grad_norm": 0.4535316824913025, "learning_rate": 1.4940585450710203e-05, "loss": 0.0576, "step": 23427 }, { "epoch": 0.5162427627845996, "grad_norm": 0.5003872513771057, "learning_rate": 1.4939514924041452e-05, "loss": 0.0547, "step": 23428 }, { "epoch": 0.5162647980741157, "grad_norm": 0.8551313877105713, "learning_rate": 1.4938444397680787e-05, "loss": 0.0743, "step": 23429 }, { "epoch": 0.5162868333636319, "grad_norm": 0.6893265843391418, "learning_rate": 1.4937373871633659e-05, "loss": 0.0887, "step": 23430 }, { "epoch": 0.5163088686531481, "grad_norm": 0.8725407719612122, "learning_rate": 1.4936303345905514e-05, "loss": 0.0788, "step": 23431 }, { "epoch": 0.5163309039426642, "grad_norm": 0.7794297337532043, "learning_rate": 1.4935232820501815e-05, "loss": 0.0706, "step": 23432 }, { "epoch": 0.5163529392321803, "grad_norm": 0.7472670078277588, "learning_rate": 1.493416229542801e-05, "loss": 0.0917, "step": 23433 }, { "epoch": 0.5163749745216965, "grad_norm": 1.167507529258728, "learning_rate": 1.4933091770689552e-05, "loss": 0.0891, "step": 23434 }, { "epoch": 0.5163970098112126, "grad_norm": 0.4380243122577667, "learning_rate": 1.493202124629189e-05, "loss": 0.0546, "step": 23435 }, { "epoch": 0.5164190451007288, "grad_norm": 0.5919099450111389, "learning_rate": 1.4930950722240484e-05, "loss": 0.059, "step": 23436 }, { "epoch": 0.5164410803902449, "grad_norm": 0.4270760416984558, "learning_rate": 1.4929880198540782e-05, "loss": 0.0434, "step": 23437 }, { "epoch": 0.5164631156797611, "grad_norm": 0.7123448252677917, "learning_rate": 1.492880967519824e-05, "loss": 0.0961, "step": 23438 }, { "epoch": 0.5164851509692773, "grad_norm": 0.8763170838356018, "learning_rate": 1.4927739152218298e-05, "loss": 0.0885, "step": 23439 }, { "epoch": 0.5165071862587934, "grad_norm": 0.8092538714408875, "learning_rate": 1.4926668629606428e-05, "loss": 0.0973, "step": 23440 }, { "epoch": 0.5165292215483096, "grad_norm": 0.8058256506919861, "learning_rate": 1.4925598107368075e-05, "loss": 0.0644, "step": 23441 }, { "epoch": 0.5165512568378258, "grad_norm": 0.804147481918335, "learning_rate": 1.492452758550869e-05, "loss": 0.0768, "step": 23442 }, { "epoch": 0.5165732921273419, "grad_norm": 0.32375413179397583, "learning_rate": 1.4923457064033723e-05, "loss": 0.0506, "step": 23443 }, { "epoch": 0.5165953274168581, "grad_norm": 0.6870251893997192, "learning_rate": 1.4922386542948634e-05, "loss": 0.0496, "step": 23444 }, { "epoch": 0.5166173627063743, "grad_norm": 1.1152695417404175, "learning_rate": 1.4921316022258872e-05, "loss": 0.0841, "step": 23445 }, { "epoch": 0.5166393979958904, "grad_norm": 0.7368463277816772, "learning_rate": 1.492024550196989e-05, "loss": 0.1095, "step": 23446 }, { "epoch": 0.5166614332854066, "grad_norm": 0.9737436771392822, "learning_rate": 1.4919174982087133e-05, "loss": 0.0538, "step": 23447 }, { "epoch": 0.5166834685749228, "grad_norm": 0.3504738509654999, "learning_rate": 1.4918104462616066e-05, "loss": 0.0513, "step": 23448 }, { "epoch": 0.5167055038644389, "grad_norm": 0.7037091851234436, "learning_rate": 1.4917033943562138e-05, "loss": 0.0766, "step": 23449 }, { "epoch": 0.5167275391539551, "grad_norm": 0.5658246278762817, "learning_rate": 1.49159634249308e-05, "loss": 0.0706, "step": 23450 }, { "epoch": 0.5167495744434712, "grad_norm": 0.7353602647781372, "learning_rate": 1.4914892906727506e-05, "loss": 0.0965, "step": 23451 }, { "epoch": 0.5167716097329874, "grad_norm": 1.168016791343689, "learning_rate": 1.491382238895771e-05, "loss": 0.0779, "step": 23452 }, { "epoch": 0.5167936450225036, "grad_norm": 0.5392890572547913, "learning_rate": 1.491275187162686e-05, "loss": 0.0356, "step": 23453 }, { "epoch": 0.5168156803120197, "grad_norm": 0.4465305805206299, "learning_rate": 1.4911681354740407e-05, "loss": 0.0695, "step": 23454 }, { "epoch": 0.5168377156015359, "grad_norm": 0.7886744141578674, "learning_rate": 1.4910610838303814e-05, "loss": 0.0999, "step": 23455 }, { "epoch": 0.5168597508910521, "grad_norm": 0.5450021028518677, "learning_rate": 1.4909540322322528e-05, "loss": 0.0579, "step": 23456 }, { "epoch": 0.5168817861805681, "grad_norm": 0.5373152494430542, "learning_rate": 1.4908469806802003e-05, "loss": 0.0809, "step": 23457 }, { "epoch": 0.5169038214700843, "grad_norm": 0.5060412883758545, "learning_rate": 1.4907399291747684e-05, "loss": 0.0641, "step": 23458 }, { "epoch": 0.5169258567596005, "grad_norm": 0.8493390083312988, "learning_rate": 1.4906328777165036e-05, "loss": 0.0969, "step": 23459 }, { "epoch": 0.5169478920491166, "grad_norm": 0.49961256980895996, "learning_rate": 1.4905258263059505e-05, "loss": 0.0878, "step": 23460 }, { "epoch": 0.5169699273386328, "grad_norm": 0.8456290364265442, "learning_rate": 1.4904187749436545e-05, "loss": 0.0894, "step": 23461 }, { "epoch": 0.516991962628149, "grad_norm": 0.8162969946861267, "learning_rate": 1.49031172363016e-05, "loss": 0.0867, "step": 23462 }, { "epoch": 0.5170139979176651, "grad_norm": 0.5137395262718201, "learning_rate": 1.490204672366014e-05, "loss": 0.0648, "step": 23463 }, { "epoch": 0.5170360332071813, "grad_norm": 0.9285287857055664, "learning_rate": 1.4900976211517608e-05, "loss": 0.0635, "step": 23464 }, { "epoch": 0.5170580684966974, "grad_norm": 0.4352411925792694, "learning_rate": 1.4899905699879456e-05, "loss": 0.0718, "step": 23465 }, { "epoch": 0.5170801037862136, "grad_norm": 0.7693251371383667, "learning_rate": 1.4898835188751134e-05, "loss": 0.0799, "step": 23466 }, { "epoch": 0.5171021390757298, "grad_norm": 0.47176894545555115, "learning_rate": 1.4897764678138103e-05, "loss": 0.0664, "step": 23467 }, { "epoch": 0.5171241743652459, "grad_norm": 0.6941890716552734, "learning_rate": 1.4896694168045812e-05, "loss": 0.1022, "step": 23468 }, { "epoch": 0.5171462096547621, "grad_norm": 0.5719466209411621, "learning_rate": 1.4895623658479711e-05, "loss": 0.0683, "step": 23469 }, { "epoch": 0.5171682449442783, "grad_norm": 0.869109570980072, "learning_rate": 1.489455314944525e-05, "loss": 0.0851, "step": 23470 }, { "epoch": 0.5171902802337944, "grad_norm": 1.0149729251861572, "learning_rate": 1.4893482640947893e-05, "loss": 0.068, "step": 23471 }, { "epoch": 0.5172123155233106, "grad_norm": 0.7768535614013672, "learning_rate": 1.4892412132993085e-05, "loss": 0.1199, "step": 23472 }, { "epoch": 0.5172343508128268, "grad_norm": 0.7715979218482971, "learning_rate": 1.4891341625586274e-05, "loss": 0.0609, "step": 23473 }, { "epoch": 0.5172563861023429, "grad_norm": 0.7704749703407288, "learning_rate": 1.4890271118732926e-05, "loss": 0.0658, "step": 23474 }, { "epoch": 0.5172784213918591, "grad_norm": 0.39163047075271606, "learning_rate": 1.4889200612438485e-05, "loss": 0.0893, "step": 23475 }, { "epoch": 0.5173004566813753, "grad_norm": 0.9543651342391968, "learning_rate": 1.4888130106708405e-05, "loss": 0.0766, "step": 23476 }, { "epoch": 0.5173224919708914, "grad_norm": 0.5345056056976318, "learning_rate": 1.488705960154813e-05, "loss": 0.0929, "step": 23477 }, { "epoch": 0.5173445272604076, "grad_norm": 0.46455368399620056, "learning_rate": 1.488598909696313e-05, "loss": 0.0662, "step": 23478 }, { "epoch": 0.5173665625499237, "grad_norm": 0.3891243040561676, "learning_rate": 1.4884918592958846e-05, "loss": 0.0738, "step": 23479 }, { "epoch": 0.5173885978394399, "grad_norm": 0.6547167301177979, "learning_rate": 1.4883848089540735e-05, "loss": 0.0521, "step": 23480 }, { "epoch": 0.5174106331289561, "grad_norm": 0.9052369594573975, "learning_rate": 1.488277758671424e-05, "loss": 0.0922, "step": 23481 }, { "epoch": 0.5174326684184721, "grad_norm": 0.9005935192108154, "learning_rate": 1.4881707084484829e-05, "loss": 0.073, "step": 23482 }, { "epoch": 0.5174547037079883, "grad_norm": 0.5790213346481323, "learning_rate": 1.4880636582857946e-05, "loss": 0.0509, "step": 23483 }, { "epoch": 0.5174767389975045, "grad_norm": 0.6670674085617065, "learning_rate": 1.487956608183905e-05, "loss": 0.0716, "step": 23484 }, { "epoch": 0.5174987742870206, "grad_norm": 0.75861656665802, "learning_rate": 1.4878495581433579e-05, "loss": 0.1185, "step": 23485 }, { "epoch": 0.5175208095765368, "grad_norm": 1.0406650304794312, "learning_rate": 1.4877425081647001e-05, "loss": 0.0905, "step": 23486 }, { "epoch": 0.517542844866053, "grad_norm": 0.5900388956069946, "learning_rate": 1.4876354582484762e-05, "loss": 0.0818, "step": 23487 }, { "epoch": 0.5175648801555691, "grad_norm": 0.6715051531791687, "learning_rate": 1.4875284083952316e-05, "loss": 0.0973, "step": 23488 }, { "epoch": 0.5175869154450853, "grad_norm": 0.8952205181121826, "learning_rate": 1.487421358605511e-05, "loss": 0.1148, "step": 23489 }, { "epoch": 0.5176089507346014, "grad_norm": 0.5608043074607849, "learning_rate": 1.4873143088798605e-05, "loss": 0.1173, "step": 23490 }, { "epoch": 0.5176309860241176, "grad_norm": 0.47464799880981445, "learning_rate": 1.4872072592188253e-05, "loss": 0.0916, "step": 23491 }, { "epoch": 0.5176530213136338, "grad_norm": 0.740679144859314, "learning_rate": 1.4871002096229503e-05, "loss": 0.0879, "step": 23492 }, { "epoch": 0.5176750566031499, "grad_norm": 0.5941629409790039, "learning_rate": 1.4869931600927805e-05, "loss": 0.0887, "step": 23493 }, { "epoch": 0.5176970918926661, "grad_norm": 0.9974339008331299, "learning_rate": 1.4868861106288619e-05, "loss": 0.1115, "step": 23494 }, { "epoch": 0.5177191271821823, "grad_norm": 0.43624845147132874, "learning_rate": 1.4867790612317395e-05, "loss": 0.0509, "step": 23495 }, { "epoch": 0.5177411624716984, "grad_norm": 0.5450630187988281, "learning_rate": 1.4866720119019573e-05, "loss": 0.082, "step": 23496 }, { "epoch": 0.5177631977612146, "grad_norm": 0.6308197975158691, "learning_rate": 1.4865649626400629e-05, "loss": 0.0598, "step": 23497 }, { "epoch": 0.5177852330507308, "grad_norm": 0.8327744603157043, "learning_rate": 1.4864579134466e-05, "loss": 0.0769, "step": 23498 }, { "epoch": 0.5178072683402469, "grad_norm": 0.32779863476753235, "learning_rate": 1.4863508643221143e-05, "loss": 0.0613, "step": 23499 }, { "epoch": 0.5178293036297631, "grad_norm": 0.7222368717193604, "learning_rate": 1.4862438152671505e-05, "loss": 0.07, "step": 23500 }, { "epoch": 0.5178513389192793, "grad_norm": 0.9729716181755066, "learning_rate": 1.4861367662822549e-05, "loss": 0.0704, "step": 23501 }, { "epoch": 0.5178733742087954, "grad_norm": 0.4320250451564789, "learning_rate": 1.4860297173679722e-05, "loss": 0.0819, "step": 23502 }, { "epoch": 0.5178954094983116, "grad_norm": 0.789360523223877, "learning_rate": 1.4859226685248475e-05, "loss": 0.0628, "step": 23503 }, { "epoch": 0.5179174447878278, "grad_norm": 0.8434747457504272, "learning_rate": 1.4858156197534254e-05, "loss": 0.0694, "step": 23504 }, { "epoch": 0.5179394800773439, "grad_norm": 0.47838088870048523, "learning_rate": 1.4857085710542528e-05, "loss": 0.0536, "step": 23505 }, { "epoch": 0.5179615153668601, "grad_norm": 0.8125970363616943, "learning_rate": 1.485601522427874e-05, "loss": 0.0641, "step": 23506 }, { "epoch": 0.5179835506563761, "grad_norm": 0.6439363956451416, "learning_rate": 1.4854944738748344e-05, "loss": 0.073, "step": 23507 }, { "epoch": 0.5180055859458923, "grad_norm": 0.4072512686252594, "learning_rate": 1.4853874253956792e-05, "loss": 0.0769, "step": 23508 }, { "epoch": 0.5180276212354085, "grad_norm": 0.5231990218162537, "learning_rate": 1.4852803769909536e-05, "loss": 0.0628, "step": 23509 }, { "epoch": 0.5180496565249246, "grad_norm": 0.7855514883995056, "learning_rate": 1.4851733286612032e-05, "loss": 0.0758, "step": 23510 }, { "epoch": 0.5180716918144408, "grad_norm": 0.9576924443244934, "learning_rate": 1.485066280406973e-05, "loss": 0.0728, "step": 23511 }, { "epoch": 0.518093727103957, "grad_norm": 0.6513553857803345, "learning_rate": 1.4849592322288073e-05, "loss": 0.0876, "step": 23512 }, { "epoch": 0.5181157623934731, "grad_norm": 0.6267356872558594, "learning_rate": 1.4848521841272531e-05, "loss": 0.0446, "step": 23513 }, { "epoch": 0.5181377976829893, "grad_norm": 0.5116540193557739, "learning_rate": 1.4847451361028548e-05, "loss": 0.0507, "step": 23514 }, { "epoch": 0.5181598329725055, "grad_norm": 0.5612503290176392, "learning_rate": 1.4846380881561579e-05, "loss": 0.0744, "step": 23515 }, { "epoch": 0.5181818682620216, "grad_norm": 0.8912001252174377, "learning_rate": 1.484531040287707e-05, "loss": 0.0754, "step": 23516 }, { "epoch": 0.5182039035515378, "grad_norm": 0.6728427410125732, "learning_rate": 1.4844239924980481e-05, "loss": 0.0723, "step": 23517 }, { "epoch": 0.518225938841054, "grad_norm": 0.4840354323387146, "learning_rate": 1.4843169447877262e-05, "loss": 0.1074, "step": 23518 }, { "epoch": 0.5182479741305701, "grad_norm": 0.6206148266792297, "learning_rate": 1.4842098971572858e-05, "loss": 0.0655, "step": 23519 }, { "epoch": 0.5182700094200863, "grad_norm": 0.6896069645881653, "learning_rate": 1.4841028496072735e-05, "loss": 0.0628, "step": 23520 }, { "epoch": 0.5182920447096024, "grad_norm": 0.5291637778282166, "learning_rate": 1.4839958021382339e-05, "loss": 0.0687, "step": 23521 }, { "epoch": 0.5183140799991186, "grad_norm": 0.5189874768257141, "learning_rate": 1.4838887547507124e-05, "loss": 0.1013, "step": 23522 }, { "epoch": 0.5183361152886348, "grad_norm": 0.7013773322105408, "learning_rate": 1.4837817074452538e-05, "loss": 0.0672, "step": 23523 }, { "epoch": 0.5183581505781509, "grad_norm": 0.8662773966789246, "learning_rate": 1.483674660222404e-05, "loss": 0.0866, "step": 23524 }, { "epoch": 0.5183801858676671, "grad_norm": 0.9790453910827637, "learning_rate": 1.4835676130827077e-05, "loss": 0.107, "step": 23525 }, { "epoch": 0.5184022211571833, "grad_norm": 0.6309071183204651, "learning_rate": 1.4834605660267104e-05, "loss": 0.088, "step": 23526 }, { "epoch": 0.5184242564466994, "grad_norm": 0.6181610822677612, "learning_rate": 1.4833535190549569e-05, "loss": 0.0585, "step": 23527 }, { "epoch": 0.5184462917362156, "grad_norm": 0.6713955402374268, "learning_rate": 1.4832464721679932e-05, "loss": 0.0764, "step": 23528 }, { "epoch": 0.5184683270257318, "grad_norm": 0.647374153137207, "learning_rate": 1.4831394253663644e-05, "loss": 0.0803, "step": 23529 }, { "epoch": 0.5184903623152479, "grad_norm": 1.2571288347244263, "learning_rate": 1.4830323786506154e-05, "loss": 0.1074, "step": 23530 }, { "epoch": 0.518512397604764, "grad_norm": 0.5319185256958008, "learning_rate": 1.4829253320212913e-05, "loss": 0.0778, "step": 23531 }, { "epoch": 0.5185344328942801, "grad_norm": 0.3804630935192108, "learning_rate": 1.4828182854789381e-05, "loss": 0.0772, "step": 23532 }, { "epoch": 0.5185564681837963, "grad_norm": 0.522705078125, "learning_rate": 1.4827112390241005e-05, "loss": 0.0785, "step": 23533 }, { "epoch": 0.5185785034733125, "grad_norm": 0.7768685817718506, "learning_rate": 1.4826041926573238e-05, "loss": 0.0969, "step": 23534 }, { "epoch": 0.5186005387628286, "grad_norm": 0.45651426911354065, "learning_rate": 1.4824971463791527e-05, "loss": 0.0615, "step": 23535 }, { "epoch": 0.5186225740523448, "grad_norm": 0.7969645857810974, "learning_rate": 1.4823901001901336e-05, "loss": 0.0815, "step": 23536 }, { "epoch": 0.518644609341861, "grad_norm": 0.859416663646698, "learning_rate": 1.4822830540908112e-05, "loss": 0.1254, "step": 23537 }, { "epoch": 0.5186666446313771, "grad_norm": 0.47574368119239807, "learning_rate": 1.4821760080817306e-05, "loss": 0.087, "step": 23538 }, { "epoch": 0.5186886799208933, "grad_norm": 0.7145892977714539, "learning_rate": 1.482068962163437e-05, "loss": 0.1157, "step": 23539 }, { "epoch": 0.5187107152104095, "grad_norm": 0.9145069122314453, "learning_rate": 1.481961916336476e-05, "loss": 0.0871, "step": 23540 }, { "epoch": 0.5187327504999256, "grad_norm": 0.5457668900489807, "learning_rate": 1.4818548706013927e-05, "loss": 0.0978, "step": 23541 }, { "epoch": 0.5187547857894418, "grad_norm": 1.0452375411987305, "learning_rate": 1.4817478249587316e-05, "loss": 0.0972, "step": 23542 }, { "epoch": 0.518776821078958, "grad_norm": 0.6274834275245667, "learning_rate": 1.4816407794090391e-05, "loss": 0.0689, "step": 23543 }, { "epoch": 0.5187988563684741, "grad_norm": 0.4423941969871521, "learning_rate": 1.4815337339528601e-05, "loss": 0.0464, "step": 23544 }, { "epoch": 0.5188208916579903, "grad_norm": 0.7463938593864441, "learning_rate": 1.4814266885907397e-05, "loss": 0.0939, "step": 23545 }, { "epoch": 0.5188429269475064, "grad_norm": 0.7606734037399292, "learning_rate": 1.481319643323223e-05, "loss": 0.0711, "step": 23546 }, { "epoch": 0.5188649622370226, "grad_norm": 0.7600039839744568, "learning_rate": 1.4812125981508555e-05, "loss": 0.0758, "step": 23547 }, { "epoch": 0.5188869975265388, "grad_norm": 1.076208233833313, "learning_rate": 1.4811055530741822e-05, "loss": 0.0726, "step": 23548 }, { "epoch": 0.5189090328160549, "grad_norm": 0.8821616768836975, "learning_rate": 1.4809985080937486e-05, "loss": 0.0661, "step": 23549 }, { "epoch": 0.5189310681055711, "grad_norm": 0.7477660179138184, "learning_rate": 1.4808914632100991e-05, "loss": 0.0953, "step": 23550 }, { "epoch": 0.5189531033950873, "grad_norm": 0.670659065246582, "learning_rate": 1.4807844184237804e-05, "loss": 0.0815, "step": 23551 }, { "epoch": 0.5189751386846034, "grad_norm": 0.6544035077095032, "learning_rate": 1.4806773737353368e-05, "loss": 0.0805, "step": 23552 }, { "epoch": 0.5189971739741196, "grad_norm": 0.552862286567688, "learning_rate": 1.4805703291453139e-05, "loss": 0.0743, "step": 23553 }, { "epoch": 0.5190192092636358, "grad_norm": 0.5689886212348938, "learning_rate": 1.4804632846542562e-05, "loss": 0.085, "step": 23554 }, { "epoch": 0.5190412445531519, "grad_norm": 0.5420511960983276, "learning_rate": 1.4803562402627097e-05, "loss": 0.0761, "step": 23555 }, { "epoch": 0.519063279842668, "grad_norm": 0.6370207071304321, "learning_rate": 1.4802491959712197e-05, "loss": 0.0688, "step": 23556 }, { "epoch": 0.5190853151321841, "grad_norm": 0.45573148131370544, "learning_rate": 1.4801421517803309e-05, "loss": 0.064, "step": 23557 }, { "epoch": 0.5191073504217003, "grad_norm": 0.5258446335792542, "learning_rate": 1.4800351076905883e-05, "loss": 0.0611, "step": 23558 }, { "epoch": 0.5191293857112165, "grad_norm": 1.0343067646026611, "learning_rate": 1.4799280637025381e-05, "loss": 0.0798, "step": 23559 }, { "epoch": 0.5191514210007326, "grad_norm": 0.5216862559318542, "learning_rate": 1.4798210198167252e-05, "loss": 0.074, "step": 23560 }, { "epoch": 0.5191734562902488, "grad_norm": 0.5166853070259094, "learning_rate": 1.4797139760336944e-05, "loss": 0.059, "step": 23561 }, { "epoch": 0.519195491579765, "grad_norm": 0.580376148223877, "learning_rate": 1.4796069323539912e-05, "loss": 0.0966, "step": 23562 }, { "epoch": 0.5192175268692811, "grad_norm": 0.448794960975647, "learning_rate": 1.479499888778161e-05, "loss": 0.0972, "step": 23563 }, { "epoch": 0.5192395621587973, "grad_norm": 0.4259757697582245, "learning_rate": 1.4793928453067487e-05, "loss": 0.0674, "step": 23564 }, { "epoch": 0.5192615974483135, "grad_norm": 0.350935697555542, "learning_rate": 1.4792858019402993e-05, "loss": 0.0486, "step": 23565 }, { "epoch": 0.5192836327378296, "grad_norm": 0.5091186165809631, "learning_rate": 1.4791787586793588e-05, "loss": 0.1158, "step": 23566 }, { "epoch": 0.5193056680273458, "grad_norm": 0.6029223799705505, "learning_rate": 1.4790717155244722e-05, "loss": 0.0654, "step": 23567 }, { "epoch": 0.519327703316862, "grad_norm": 0.7235823273658752, "learning_rate": 1.4789646724761846e-05, "loss": 0.0808, "step": 23568 }, { "epoch": 0.5193497386063781, "grad_norm": 0.6267390847206116, "learning_rate": 1.478857629535041e-05, "loss": 0.0827, "step": 23569 }, { "epoch": 0.5193717738958943, "grad_norm": 0.49309754371643066, "learning_rate": 1.4787505867015868e-05, "loss": 0.044, "step": 23570 }, { "epoch": 0.5193938091854104, "grad_norm": 0.6407565474510193, "learning_rate": 1.4786435439763676e-05, "loss": 0.0658, "step": 23571 }, { "epoch": 0.5194158444749266, "grad_norm": 0.6954757571220398, "learning_rate": 1.4785365013599282e-05, "loss": 0.0696, "step": 23572 }, { "epoch": 0.5194378797644428, "grad_norm": 0.5364985466003418, "learning_rate": 1.4784294588528131e-05, "loss": 0.0516, "step": 23573 }, { "epoch": 0.5194599150539589, "grad_norm": 0.7236348986625671, "learning_rate": 1.4783224164555691e-05, "loss": 0.0977, "step": 23574 }, { "epoch": 0.5194819503434751, "grad_norm": 0.6145312786102295, "learning_rate": 1.4782153741687407e-05, "loss": 0.0611, "step": 23575 }, { "epoch": 0.5195039856329913, "grad_norm": 0.5343713164329529, "learning_rate": 1.478108331992873e-05, "loss": 0.0704, "step": 23576 }, { "epoch": 0.5195260209225074, "grad_norm": 0.9478992819786072, "learning_rate": 1.4780012899285109e-05, "loss": 0.0816, "step": 23577 }, { "epoch": 0.5195480562120236, "grad_norm": 0.5812360048294067, "learning_rate": 1.4778942479762006e-05, "loss": 0.108, "step": 23578 }, { "epoch": 0.5195700915015398, "grad_norm": 0.9473631978034973, "learning_rate": 1.4777872061364865e-05, "loss": 0.0818, "step": 23579 }, { "epoch": 0.5195921267910559, "grad_norm": 0.5050522685050964, "learning_rate": 1.4776801644099142e-05, "loss": 0.0658, "step": 23580 }, { "epoch": 0.519614162080572, "grad_norm": 0.6633456349372864, "learning_rate": 1.4775731227970281e-05, "loss": 0.0454, "step": 23581 }, { "epoch": 0.5196361973700881, "grad_norm": 0.8595299124717712, "learning_rate": 1.4774660812983747e-05, "loss": 0.059, "step": 23582 }, { "epoch": 0.5196582326596043, "grad_norm": 0.5462784171104431, "learning_rate": 1.4773590399144989e-05, "loss": 0.1034, "step": 23583 }, { "epoch": 0.5196802679491205, "grad_norm": 1.025648832321167, "learning_rate": 1.4772519986459447e-05, "loss": 0.0746, "step": 23584 }, { "epoch": 0.5197023032386366, "grad_norm": 0.4167158007621765, "learning_rate": 1.4771449574932594e-05, "loss": 0.0777, "step": 23585 }, { "epoch": 0.5197243385281528, "grad_norm": 0.8286634683609009, "learning_rate": 1.4770379164569868e-05, "loss": 0.0844, "step": 23586 }, { "epoch": 0.519746373817669, "grad_norm": 0.5040532946586609, "learning_rate": 1.4769308755376723e-05, "loss": 0.0605, "step": 23587 }, { "epoch": 0.5197684091071851, "grad_norm": 0.7155494093894958, "learning_rate": 1.4768238347358606e-05, "loss": 0.063, "step": 23588 }, { "epoch": 0.5197904443967013, "grad_norm": 0.6274830102920532, "learning_rate": 1.4767167940520983e-05, "loss": 0.0863, "step": 23589 }, { "epoch": 0.5198124796862175, "grad_norm": 0.7320688366889954, "learning_rate": 1.4766097534869296e-05, "loss": 0.0689, "step": 23590 }, { "epoch": 0.5198345149757336, "grad_norm": 0.9821781516075134, "learning_rate": 1.4765027130409002e-05, "loss": 0.0941, "step": 23591 }, { "epoch": 0.5198565502652498, "grad_norm": 0.5560005307197571, "learning_rate": 1.4763956727145543e-05, "loss": 0.1002, "step": 23592 }, { "epoch": 0.519878585554766, "grad_norm": 0.580321729183197, "learning_rate": 1.4762886325084386e-05, "loss": 0.0867, "step": 23593 }, { "epoch": 0.5199006208442821, "grad_norm": 0.6578675508499146, "learning_rate": 1.4761815924230975e-05, "loss": 0.0887, "step": 23594 }, { "epoch": 0.5199226561337983, "grad_norm": 0.9960235357284546, "learning_rate": 1.4760745524590768e-05, "loss": 0.1059, "step": 23595 }, { "epoch": 0.5199446914233145, "grad_norm": 0.6374540328979492, "learning_rate": 1.4759675126169202e-05, "loss": 0.0604, "step": 23596 }, { "epoch": 0.5199667267128306, "grad_norm": 0.5376290678977966, "learning_rate": 1.4758604728971747e-05, "loss": 0.0705, "step": 23597 }, { "epoch": 0.5199887620023468, "grad_norm": 0.8225374221801758, "learning_rate": 1.4757534333003846e-05, "loss": 0.0929, "step": 23598 }, { "epoch": 0.520010797291863, "grad_norm": 0.7747481465339661, "learning_rate": 1.4756463938270953e-05, "loss": 0.0981, "step": 23599 }, { "epoch": 0.5200328325813791, "grad_norm": 0.9268501996994019, "learning_rate": 1.4755393544778512e-05, "loss": 0.1149, "step": 23600 }, { "epoch": 0.5200548678708953, "grad_norm": 0.5466170907020569, "learning_rate": 1.4754323152531992e-05, "loss": 0.0968, "step": 23601 }, { "epoch": 0.5200769031604114, "grad_norm": 0.6548262238502502, "learning_rate": 1.4753252761536835e-05, "loss": 0.0786, "step": 23602 }, { "epoch": 0.5200989384499276, "grad_norm": 0.31859731674194336, "learning_rate": 1.4752182371798493e-05, "loss": 0.0748, "step": 23603 }, { "epoch": 0.5201209737394438, "grad_norm": 0.5998836755752563, "learning_rate": 1.4751111983322417e-05, "loss": 0.0644, "step": 23604 }, { "epoch": 0.5201430090289598, "grad_norm": 0.44934606552124023, "learning_rate": 1.4750041596114065e-05, "loss": 0.0915, "step": 23605 }, { "epoch": 0.520165044318476, "grad_norm": 0.5633076429367065, "learning_rate": 1.4748971210178884e-05, "loss": 0.0964, "step": 23606 }, { "epoch": 0.5201870796079922, "grad_norm": 0.5824442505836487, "learning_rate": 1.474790082552232e-05, "loss": 0.071, "step": 23607 }, { "epoch": 0.5202091148975083, "grad_norm": 0.5586819052696228, "learning_rate": 1.474683044214984e-05, "loss": 0.0664, "step": 23608 }, { "epoch": 0.5202311501870245, "grad_norm": 0.571471095085144, "learning_rate": 1.4745760060066888e-05, "loss": 0.0692, "step": 23609 }, { "epoch": 0.5202531854765406, "grad_norm": 0.6176053285598755, "learning_rate": 1.4744689679278917e-05, "loss": 0.0609, "step": 23610 }, { "epoch": 0.5202752207660568, "grad_norm": 0.7630354762077332, "learning_rate": 1.4743619299791375e-05, "loss": 0.0526, "step": 23611 }, { "epoch": 0.520297256055573, "grad_norm": 0.2952514886856079, "learning_rate": 1.4742548921609719e-05, "loss": 0.0599, "step": 23612 }, { "epoch": 0.5203192913450891, "grad_norm": 0.44822150468826294, "learning_rate": 1.47414785447394e-05, "loss": 0.0745, "step": 23613 }, { "epoch": 0.5203413266346053, "grad_norm": 0.6854050159454346, "learning_rate": 1.4740408169185872e-05, "loss": 0.0793, "step": 23614 }, { "epoch": 0.5203633619241215, "grad_norm": 0.6640791296958923, "learning_rate": 1.4739337794954574e-05, "loss": 0.0521, "step": 23615 }, { "epoch": 0.5203853972136376, "grad_norm": 0.5543003678321838, "learning_rate": 1.4738267422050977e-05, "loss": 0.0607, "step": 23616 }, { "epoch": 0.5204074325031538, "grad_norm": 0.8886722922325134, "learning_rate": 1.4737197050480523e-05, "loss": 0.088, "step": 23617 }, { "epoch": 0.52042946779267, "grad_norm": 1.0067033767700195, "learning_rate": 1.4736126680248667e-05, "loss": 0.1017, "step": 23618 }, { "epoch": 0.5204515030821861, "grad_norm": 0.5802428126335144, "learning_rate": 1.4735056311360855e-05, "loss": 0.0868, "step": 23619 }, { "epoch": 0.5204735383717023, "grad_norm": 0.6722846031188965, "learning_rate": 1.4733985943822547e-05, "loss": 0.0553, "step": 23620 }, { "epoch": 0.5204955736612185, "grad_norm": 0.624823272228241, "learning_rate": 1.4732915577639192e-05, "loss": 0.0925, "step": 23621 }, { "epoch": 0.5205176089507346, "grad_norm": 0.6913990378379822, "learning_rate": 1.4731845212816242e-05, "loss": 0.0852, "step": 23622 }, { "epoch": 0.5205396442402508, "grad_norm": 0.5262311100959778, "learning_rate": 1.4730774849359139e-05, "loss": 0.0793, "step": 23623 }, { "epoch": 0.520561679529767, "grad_norm": 0.4844120740890503, "learning_rate": 1.4729704487273352e-05, "loss": 0.0859, "step": 23624 }, { "epoch": 0.5205837148192831, "grad_norm": 0.4832264482975006, "learning_rate": 1.4728634126564323e-05, "loss": 0.076, "step": 23625 }, { "epoch": 0.5206057501087993, "grad_norm": 0.6859831809997559, "learning_rate": 1.4727563767237509e-05, "loss": 0.0969, "step": 23626 }, { "epoch": 0.5206277853983154, "grad_norm": 0.5965831279754639, "learning_rate": 1.4726493409298353e-05, "loss": 0.0679, "step": 23627 }, { "epoch": 0.5206498206878316, "grad_norm": 0.8425556421279907, "learning_rate": 1.4725423052752317e-05, "loss": 0.0969, "step": 23628 }, { "epoch": 0.5206718559773478, "grad_norm": 0.6505900025367737, "learning_rate": 1.4724352697604849e-05, "loss": 0.0793, "step": 23629 }, { "epoch": 0.5206938912668638, "grad_norm": 0.8193554282188416, "learning_rate": 1.4723282343861393e-05, "loss": 0.0954, "step": 23630 }, { "epoch": 0.52071592655638, "grad_norm": 0.6450494527816772, "learning_rate": 1.4722211991527416e-05, "loss": 0.0785, "step": 23631 }, { "epoch": 0.5207379618458962, "grad_norm": 0.6382456421852112, "learning_rate": 1.4721141640608363e-05, "loss": 0.0517, "step": 23632 }, { "epoch": 0.5207599971354123, "grad_norm": 0.9699258208274841, "learning_rate": 1.4720071291109683e-05, "loss": 0.0888, "step": 23633 }, { "epoch": 0.5207820324249285, "grad_norm": 0.8542101383209229, "learning_rate": 1.4719000943036828e-05, "loss": 0.0652, "step": 23634 }, { "epoch": 0.5208040677144447, "grad_norm": 0.5655797123908997, "learning_rate": 1.4717930596395255e-05, "loss": 0.0682, "step": 23635 }, { "epoch": 0.5208261030039608, "grad_norm": 0.8305032849311829, "learning_rate": 1.4716860251190414e-05, "loss": 0.0779, "step": 23636 }, { "epoch": 0.520848138293477, "grad_norm": 0.34857138991355896, "learning_rate": 1.4715789907427754e-05, "loss": 0.043, "step": 23637 }, { "epoch": 0.5208701735829931, "grad_norm": 0.612291157245636, "learning_rate": 1.4714719565112722e-05, "loss": 0.0806, "step": 23638 }, { "epoch": 0.5208922088725093, "grad_norm": 0.8983736038208008, "learning_rate": 1.4713649224250783e-05, "loss": 0.0987, "step": 23639 }, { "epoch": 0.5209142441620255, "grad_norm": 0.5632844567298889, "learning_rate": 1.4712578884847382e-05, "loss": 0.0776, "step": 23640 }, { "epoch": 0.5209362794515416, "grad_norm": 0.3187597393989563, "learning_rate": 1.4711508546907971e-05, "loss": 0.0518, "step": 23641 }, { "epoch": 0.5209583147410578, "grad_norm": 0.6054888367652893, "learning_rate": 1.4710438210437997e-05, "loss": 0.0634, "step": 23642 }, { "epoch": 0.520980350030574, "grad_norm": 0.3694397807121277, "learning_rate": 1.4709367875442922e-05, "loss": 0.0626, "step": 23643 }, { "epoch": 0.5210023853200901, "grad_norm": 0.7097086310386658, "learning_rate": 1.4708297541928191e-05, "loss": 0.1186, "step": 23644 }, { "epoch": 0.5210244206096063, "grad_norm": 0.8421666622161865, "learning_rate": 1.4707227209899258e-05, "loss": 0.0904, "step": 23645 }, { "epoch": 0.5210464558991225, "grad_norm": 0.72809237241745, "learning_rate": 1.4706156879361565e-05, "loss": 0.0792, "step": 23646 }, { "epoch": 0.5210684911886386, "grad_norm": 0.6789387464523315, "learning_rate": 1.4705086550320581e-05, "loss": 0.0462, "step": 23647 }, { "epoch": 0.5210905264781548, "grad_norm": 0.7751420140266418, "learning_rate": 1.470401622278175e-05, "loss": 0.0773, "step": 23648 }, { "epoch": 0.521112561767671, "grad_norm": 0.6818137168884277, "learning_rate": 1.4702945896750523e-05, "loss": 0.0954, "step": 23649 }, { "epoch": 0.5211345970571871, "grad_norm": 0.7362663745880127, "learning_rate": 1.4701875572232348e-05, "loss": 0.0556, "step": 23650 }, { "epoch": 0.5211566323467033, "grad_norm": 0.5800633430480957, "learning_rate": 1.4700805249232682e-05, "loss": 0.0676, "step": 23651 }, { "epoch": 0.5211786676362195, "grad_norm": 0.6692997813224792, "learning_rate": 1.4699734927756977e-05, "loss": 0.0972, "step": 23652 }, { "epoch": 0.5212007029257356, "grad_norm": 0.5400360822677612, "learning_rate": 1.4698664607810675e-05, "loss": 0.0713, "step": 23653 }, { "epoch": 0.5212227382152518, "grad_norm": 0.6955057978630066, "learning_rate": 1.4697594289399245e-05, "loss": 0.0611, "step": 23654 }, { "epoch": 0.5212447735047678, "grad_norm": 0.8702366948127747, "learning_rate": 1.4696523972528128e-05, "loss": 0.0836, "step": 23655 }, { "epoch": 0.521266808794284, "grad_norm": 0.7585894465446472, "learning_rate": 1.4695453657202777e-05, "loss": 0.1017, "step": 23656 }, { "epoch": 0.5212888440838002, "grad_norm": 0.759231448173523, "learning_rate": 1.469438334342864e-05, "loss": 0.0622, "step": 23657 }, { "epoch": 0.5213108793733163, "grad_norm": 0.8439688086509705, "learning_rate": 1.4693313031211178e-05, "loss": 0.0899, "step": 23658 }, { "epoch": 0.5213329146628325, "grad_norm": 0.7505139112472534, "learning_rate": 1.4692242720555835e-05, "loss": 0.0689, "step": 23659 }, { "epoch": 0.5213549499523487, "grad_norm": 1.3929712772369385, "learning_rate": 1.4691172411468066e-05, "loss": 0.1127, "step": 23660 }, { "epoch": 0.5213769852418648, "grad_norm": 0.7249590158462524, "learning_rate": 1.4690102103953315e-05, "loss": 0.0808, "step": 23661 }, { "epoch": 0.521399020531381, "grad_norm": 0.43105483055114746, "learning_rate": 1.4689031798017046e-05, "loss": 0.0583, "step": 23662 }, { "epoch": 0.5214210558208972, "grad_norm": 0.5223560333251953, "learning_rate": 1.4687961493664705e-05, "loss": 0.079, "step": 23663 }, { "epoch": 0.5214430911104133, "grad_norm": 0.7246713042259216, "learning_rate": 1.4686891190901743e-05, "loss": 0.1047, "step": 23664 }, { "epoch": 0.5214651263999295, "grad_norm": 0.7461389303207397, "learning_rate": 1.468582088973361e-05, "loss": 0.0819, "step": 23665 }, { "epoch": 0.5214871616894456, "grad_norm": 0.6219590306282043, "learning_rate": 1.4684750590165763e-05, "loss": 0.0891, "step": 23666 }, { "epoch": 0.5215091969789618, "grad_norm": 0.9757251143455505, "learning_rate": 1.468368029220365e-05, "loss": 0.0888, "step": 23667 }, { "epoch": 0.521531232268478, "grad_norm": 0.6611592769622803, "learning_rate": 1.4682609995852721e-05, "loss": 0.0511, "step": 23668 }, { "epoch": 0.5215532675579941, "grad_norm": 0.7521966099739075, "learning_rate": 1.4681539701118425e-05, "loss": 0.0775, "step": 23669 }, { "epoch": 0.5215753028475103, "grad_norm": 0.5530872344970703, "learning_rate": 1.4680469408006224e-05, "loss": 0.0538, "step": 23670 }, { "epoch": 0.5215973381370265, "grad_norm": 0.9836027026176453, "learning_rate": 1.4679399116521565e-05, "loss": 0.0794, "step": 23671 }, { "epoch": 0.5216193734265426, "grad_norm": 0.6088750958442688, "learning_rate": 1.4678328826669895e-05, "loss": 0.057, "step": 23672 }, { "epoch": 0.5216414087160588, "grad_norm": 0.6330373883247375, "learning_rate": 1.4677258538456667e-05, "loss": 0.085, "step": 23673 }, { "epoch": 0.521663444005575, "grad_norm": 1.3258209228515625, "learning_rate": 1.467618825188734e-05, "loss": 0.1029, "step": 23674 }, { "epoch": 0.5216854792950911, "grad_norm": 0.7822802662849426, "learning_rate": 1.4675117966967357e-05, "loss": 0.0723, "step": 23675 }, { "epoch": 0.5217075145846073, "grad_norm": 0.6763815879821777, "learning_rate": 1.4674047683702166e-05, "loss": 0.102, "step": 23676 }, { "epoch": 0.5217295498741235, "grad_norm": 0.40441080927848816, "learning_rate": 1.4672977402097233e-05, "loss": 0.0776, "step": 23677 }, { "epoch": 0.5217515851636396, "grad_norm": 0.8776611685752869, "learning_rate": 1.4671907122158e-05, "loss": 0.0984, "step": 23678 }, { "epoch": 0.5217736204531558, "grad_norm": 0.45004525780677795, "learning_rate": 1.4670836843889923e-05, "loss": 0.0708, "step": 23679 }, { "epoch": 0.5217956557426718, "grad_norm": 0.5108767151832581, "learning_rate": 1.4669766567298443e-05, "loss": 0.05, "step": 23680 }, { "epoch": 0.521817691032188, "grad_norm": 0.5280763506889343, "learning_rate": 1.4668696292389026e-05, "loss": 0.0775, "step": 23681 }, { "epoch": 0.5218397263217042, "grad_norm": 1.2435542345046997, "learning_rate": 1.4667626019167113e-05, "loss": 0.1173, "step": 23682 }, { "epoch": 0.5218617616112203, "grad_norm": 0.6246316432952881, "learning_rate": 1.466655574763816e-05, "loss": 0.1118, "step": 23683 }, { "epoch": 0.5218837969007365, "grad_norm": 0.815199613571167, "learning_rate": 1.4665485477807612e-05, "loss": 0.0823, "step": 23684 }, { "epoch": 0.5219058321902527, "grad_norm": 0.7262936234474182, "learning_rate": 1.4664415209680932e-05, "loss": 0.0953, "step": 23685 }, { "epoch": 0.5219278674797688, "grad_norm": 0.48726826906204224, "learning_rate": 1.4663344943263565e-05, "loss": 0.0889, "step": 23686 }, { "epoch": 0.521949902769285, "grad_norm": 0.5839471220970154, "learning_rate": 1.4662274678560963e-05, "loss": 0.0884, "step": 23687 }, { "epoch": 0.5219719380588012, "grad_norm": 0.5692282915115356, "learning_rate": 1.4661204415578574e-05, "loss": 0.0588, "step": 23688 }, { "epoch": 0.5219939733483173, "grad_norm": 0.7232224941253662, "learning_rate": 1.4660134154321854e-05, "loss": 0.0628, "step": 23689 }, { "epoch": 0.5220160086378335, "grad_norm": 0.5075183510780334, "learning_rate": 1.4659063894796255e-05, "loss": 0.083, "step": 23690 }, { "epoch": 0.5220380439273496, "grad_norm": 0.5027068853378296, "learning_rate": 1.4657993637007225e-05, "loss": 0.0813, "step": 23691 }, { "epoch": 0.5220600792168658, "grad_norm": 0.8259183168411255, "learning_rate": 1.4656923380960212e-05, "loss": 0.0587, "step": 23692 }, { "epoch": 0.522082114506382, "grad_norm": 0.8130736351013184, "learning_rate": 1.4655853126660678e-05, "loss": 0.0651, "step": 23693 }, { "epoch": 0.5221041497958981, "grad_norm": 0.6205142736434937, "learning_rate": 1.465478287411407e-05, "loss": 0.0825, "step": 23694 }, { "epoch": 0.5221261850854143, "grad_norm": 0.5187328457832336, "learning_rate": 1.4653712623325835e-05, "loss": 0.0797, "step": 23695 }, { "epoch": 0.5221482203749305, "grad_norm": 0.9593389630317688, "learning_rate": 1.4652642374301422e-05, "loss": 0.0842, "step": 23696 }, { "epoch": 0.5221702556644466, "grad_norm": 0.6214829683303833, "learning_rate": 1.4651572127046298e-05, "loss": 0.084, "step": 23697 }, { "epoch": 0.5221922909539628, "grad_norm": 0.7386770248413086, "learning_rate": 1.46505018815659e-05, "loss": 0.0684, "step": 23698 }, { "epoch": 0.522214326243479, "grad_norm": 0.8400108218193054, "learning_rate": 1.4649431637865678e-05, "loss": 0.0927, "step": 23699 }, { "epoch": 0.5222363615329951, "grad_norm": 0.8544064164161682, "learning_rate": 1.4648361395951094e-05, "loss": 0.0753, "step": 23700 }, { "epoch": 0.5222583968225113, "grad_norm": 0.36131951212882996, "learning_rate": 1.4647291155827594e-05, "loss": 0.0698, "step": 23701 }, { "epoch": 0.5222804321120275, "grad_norm": 0.3520404100418091, "learning_rate": 1.4646220917500629e-05, "loss": 0.0734, "step": 23702 }, { "epoch": 0.5223024674015436, "grad_norm": 0.46620261669158936, "learning_rate": 1.4645150680975645e-05, "loss": 0.0727, "step": 23703 }, { "epoch": 0.5223245026910597, "grad_norm": 0.49967190623283386, "learning_rate": 1.4644080446258105e-05, "loss": 0.0716, "step": 23704 }, { "epoch": 0.5223465379805758, "grad_norm": 0.753839373588562, "learning_rate": 1.4643010213353455e-05, "loss": 0.0629, "step": 23705 }, { "epoch": 0.522368573270092, "grad_norm": 0.5580222010612488, "learning_rate": 1.4641939982267144e-05, "loss": 0.0862, "step": 23706 }, { "epoch": 0.5223906085596082, "grad_norm": 0.709148645401001, "learning_rate": 1.4640869753004622e-05, "loss": 0.0789, "step": 23707 }, { "epoch": 0.5224126438491243, "grad_norm": 0.5988780856132507, "learning_rate": 1.4639799525571346e-05, "loss": 0.0834, "step": 23708 }, { "epoch": 0.5224346791386405, "grad_norm": 0.9047741293907166, "learning_rate": 1.4638729299972765e-05, "loss": 0.0964, "step": 23709 }, { "epoch": 0.5224567144281567, "grad_norm": 0.8407015800476074, "learning_rate": 1.4637659076214329e-05, "loss": 0.0971, "step": 23710 }, { "epoch": 0.5224787497176728, "grad_norm": 0.6961863040924072, "learning_rate": 1.4636588854301481e-05, "loss": 0.1189, "step": 23711 }, { "epoch": 0.522500785007189, "grad_norm": 0.934226393699646, "learning_rate": 1.463551863423969e-05, "loss": 0.0734, "step": 23712 }, { "epoch": 0.5225228202967052, "grad_norm": 0.7625618577003479, "learning_rate": 1.4634448416034397e-05, "loss": 0.0549, "step": 23713 }, { "epoch": 0.5225448555862213, "grad_norm": 0.9720985293388367, "learning_rate": 1.4633378199691056e-05, "loss": 0.0653, "step": 23714 }, { "epoch": 0.5225668908757375, "grad_norm": 0.6266868710517883, "learning_rate": 1.4632307985215112e-05, "loss": 0.0729, "step": 23715 }, { "epoch": 0.5225889261652537, "grad_norm": 0.5422595143318176, "learning_rate": 1.4631237772612022e-05, "loss": 0.0706, "step": 23716 }, { "epoch": 0.5226109614547698, "grad_norm": 0.8311228156089783, "learning_rate": 1.4630167561887238e-05, "loss": 0.0632, "step": 23717 }, { "epoch": 0.522632996744286, "grad_norm": 0.8421968817710876, "learning_rate": 1.4629097353046203e-05, "loss": 0.0672, "step": 23718 }, { "epoch": 0.5226550320338021, "grad_norm": 0.5508202314376831, "learning_rate": 1.462802714609438e-05, "loss": 0.0895, "step": 23719 }, { "epoch": 0.5226770673233183, "grad_norm": 0.4063095152378082, "learning_rate": 1.4626956941037211e-05, "loss": 0.0657, "step": 23720 }, { "epoch": 0.5226991026128345, "grad_norm": 0.8679909110069275, "learning_rate": 1.4625886737880153e-05, "loss": 0.0868, "step": 23721 }, { "epoch": 0.5227211379023506, "grad_norm": 0.9537873268127441, "learning_rate": 1.4624816536628652e-05, "loss": 0.1042, "step": 23722 }, { "epoch": 0.5227431731918668, "grad_norm": 0.8864849209785461, "learning_rate": 1.4623746337288165e-05, "loss": 0.0974, "step": 23723 }, { "epoch": 0.522765208481383, "grad_norm": 0.9594675898551941, "learning_rate": 1.4622676139864138e-05, "loss": 0.1024, "step": 23724 }, { "epoch": 0.5227872437708991, "grad_norm": 0.583772599697113, "learning_rate": 1.4621605944362023e-05, "loss": 0.0816, "step": 23725 }, { "epoch": 0.5228092790604153, "grad_norm": 0.570320725440979, "learning_rate": 1.4620535750787267e-05, "loss": 0.0719, "step": 23726 }, { "epoch": 0.5228313143499315, "grad_norm": 0.8793016672134399, "learning_rate": 1.4619465559145333e-05, "loss": 0.1006, "step": 23727 }, { "epoch": 0.5228533496394476, "grad_norm": 0.4931916296482086, "learning_rate": 1.4618395369441663e-05, "loss": 0.0819, "step": 23728 }, { "epoch": 0.5228753849289637, "grad_norm": 0.4651116132736206, "learning_rate": 1.4617325181681711e-05, "loss": 0.0705, "step": 23729 }, { "epoch": 0.5228974202184798, "grad_norm": 0.5392591953277588, "learning_rate": 1.4616254995870924e-05, "loss": 0.0464, "step": 23730 }, { "epoch": 0.522919455507996, "grad_norm": 0.9285737872123718, "learning_rate": 1.461518481201476e-05, "loss": 0.1024, "step": 23731 }, { "epoch": 0.5229414907975122, "grad_norm": 0.3860335648059845, "learning_rate": 1.4614114630118665e-05, "loss": 0.0611, "step": 23732 }, { "epoch": 0.5229635260870283, "grad_norm": 0.5568629503250122, "learning_rate": 1.4613044450188092e-05, "loss": 0.0584, "step": 23733 }, { "epoch": 0.5229855613765445, "grad_norm": 0.6460963487625122, "learning_rate": 1.4611974272228484e-05, "loss": 0.0795, "step": 23734 }, { "epoch": 0.5230075966660607, "grad_norm": 0.609107494354248, "learning_rate": 1.4610904096245305e-05, "loss": 0.068, "step": 23735 }, { "epoch": 0.5230296319555768, "grad_norm": 0.7930256128311157, "learning_rate": 1.4609833922244e-05, "loss": 0.0867, "step": 23736 }, { "epoch": 0.523051667245093, "grad_norm": 0.7355238795280457, "learning_rate": 1.4608763750230021e-05, "loss": 0.107, "step": 23737 }, { "epoch": 0.5230737025346092, "grad_norm": 0.6980423331260681, "learning_rate": 1.4607693580208816e-05, "loss": 0.0739, "step": 23738 }, { "epoch": 0.5230957378241253, "grad_norm": 0.635330855846405, "learning_rate": 1.4606623412185839e-05, "loss": 0.0871, "step": 23739 }, { "epoch": 0.5231177731136415, "grad_norm": 0.3774547576904297, "learning_rate": 1.460555324616654e-05, "loss": 0.0972, "step": 23740 }, { "epoch": 0.5231398084031577, "grad_norm": 0.7973373532295227, "learning_rate": 1.4604483082156364e-05, "loss": 0.0915, "step": 23741 }, { "epoch": 0.5231618436926738, "grad_norm": 0.6414675116539001, "learning_rate": 1.4603412920160775e-05, "loss": 0.0783, "step": 23742 }, { "epoch": 0.52318387898219, "grad_norm": 0.7455163598060608, "learning_rate": 1.4602342760185215e-05, "loss": 0.0871, "step": 23743 }, { "epoch": 0.5232059142717062, "grad_norm": 0.6557657718658447, "learning_rate": 1.4601272602235139e-05, "loss": 0.075, "step": 23744 }, { "epoch": 0.5232279495612223, "grad_norm": 0.6898702383041382, "learning_rate": 1.460020244631599e-05, "loss": 0.0838, "step": 23745 }, { "epoch": 0.5232499848507385, "grad_norm": 0.37642934918403625, "learning_rate": 1.4599132292433229e-05, "loss": 0.0869, "step": 23746 }, { "epoch": 0.5232720201402546, "grad_norm": 0.5098279714584351, "learning_rate": 1.4598062140592302e-05, "loss": 0.0893, "step": 23747 }, { "epoch": 0.5232940554297708, "grad_norm": 0.6983438730239868, "learning_rate": 1.459699199079866e-05, "loss": 0.0802, "step": 23748 }, { "epoch": 0.523316090719287, "grad_norm": 0.5366332530975342, "learning_rate": 1.4595921843057745e-05, "loss": 0.0605, "step": 23749 }, { "epoch": 0.5233381260088031, "grad_norm": 1.224758505821228, "learning_rate": 1.4594851697375027e-05, "loss": 0.1255, "step": 23750 }, { "epoch": 0.5233601612983193, "grad_norm": 0.6738669276237488, "learning_rate": 1.4593781553755943e-05, "loss": 0.1019, "step": 23751 }, { "epoch": 0.5233821965878355, "grad_norm": 0.502920389175415, "learning_rate": 1.459271141220595e-05, "loss": 0.0546, "step": 23752 }, { "epoch": 0.5234042318773516, "grad_norm": 0.5503279566764832, "learning_rate": 1.4591641272730492e-05, "loss": 0.0706, "step": 23753 }, { "epoch": 0.5234262671668677, "grad_norm": 1.052677869796753, "learning_rate": 1.4590571135335027e-05, "loss": 0.1087, "step": 23754 }, { "epoch": 0.5234483024563839, "grad_norm": 0.6806453466415405, "learning_rate": 1.4589501000025004e-05, "loss": 0.1049, "step": 23755 }, { "epoch": 0.5234703377459, "grad_norm": 1.1335585117340088, "learning_rate": 1.4588430866805872e-05, "loss": 0.0862, "step": 23756 }, { "epoch": 0.5234923730354162, "grad_norm": 0.9304910898208618, "learning_rate": 1.4587360735683074e-05, "loss": 0.0766, "step": 23757 }, { "epoch": 0.5235144083249323, "grad_norm": 0.5187784433364868, "learning_rate": 1.4586290606662077e-05, "loss": 0.0824, "step": 23758 }, { "epoch": 0.5235364436144485, "grad_norm": 0.41472217440605164, "learning_rate": 1.4585220479748325e-05, "loss": 0.0589, "step": 23759 }, { "epoch": 0.5235584789039647, "grad_norm": 0.30882319808006287, "learning_rate": 1.4584150354947266e-05, "loss": 0.0764, "step": 23760 }, { "epoch": 0.5235805141934808, "grad_norm": 0.7890546917915344, "learning_rate": 1.4583080232264348e-05, "loss": 0.0752, "step": 23761 }, { "epoch": 0.523602549482997, "grad_norm": 0.68942791223526, "learning_rate": 1.458201011170503e-05, "loss": 0.0945, "step": 23762 }, { "epoch": 0.5236245847725132, "grad_norm": 0.6549037098884583, "learning_rate": 1.4580939993274759e-05, "loss": 0.0778, "step": 23763 }, { "epoch": 0.5236466200620293, "grad_norm": 0.7183879017829895, "learning_rate": 1.457986987697898e-05, "loss": 0.0828, "step": 23764 }, { "epoch": 0.5236686553515455, "grad_norm": 0.5256056189537048, "learning_rate": 1.4578799762823153e-05, "loss": 0.0851, "step": 23765 }, { "epoch": 0.5236906906410617, "grad_norm": 0.413723349571228, "learning_rate": 1.4577729650812729e-05, "loss": 0.0722, "step": 23766 }, { "epoch": 0.5237127259305778, "grad_norm": 0.5001441240310669, "learning_rate": 1.457665954095315e-05, "loss": 0.077, "step": 23767 }, { "epoch": 0.523734761220094, "grad_norm": 0.5957474112510681, "learning_rate": 1.457558943324987e-05, "loss": 0.1257, "step": 23768 }, { "epoch": 0.5237567965096102, "grad_norm": 0.9285033345222473, "learning_rate": 1.4574519327708343e-05, "loss": 0.0916, "step": 23769 }, { "epoch": 0.5237788317991263, "grad_norm": 0.75111323595047, "learning_rate": 1.4573449224334016e-05, "loss": 0.0939, "step": 23770 }, { "epoch": 0.5238008670886425, "grad_norm": 0.852949857711792, "learning_rate": 1.4572379123132341e-05, "loss": 0.0852, "step": 23771 }, { "epoch": 0.5238229023781587, "grad_norm": 0.6120345592498779, "learning_rate": 1.4571309024108765e-05, "loss": 0.0724, "step": 23772 }, { "epoch": 0.5238449376676748, "grad_norm": 0.6550245881080627, "learning_rate": 1.4570238927268745e-05, "loss": 0.0883, "step": 23773 }, { "epoch": 0.523866972957191, "grad_norm": 1.1150970458984375, "learning_rate": 1.456916883261773e-05, "loss": 0.1415, "step": 23774 }, { "epoch": 0.5238890082467071, "grad_norm": 0.6977888941764832, "learning_rate": 1.456809874016117e-05, "loss": 0.0731, "step": 23775 }, { "epoch": 0.5239110435362233, "grad_norm": 0.4827212989330292, "learning_rate": 1.456702864990451e-05, "loss": 0.0933, "step": 23776 }, { "epoch": 0.5239330788257395, "grad_norm": 0.9319630265235901, "learning_rate": 1.4565958561853211e-05, "loss": 0.1176, "step": 23777 }, { "epoch": 0.5239551141152555, "grad_norm": 0.5976973176002502, "learning_rate": 1.4564888476012715e-05, "loss": 0.0665, "step": 23778 }, { "epoch": 0.5239771494047717, "grad_norm": 0.7411410212516785, "learning_rate": 1.4563818392388476e-05, "loss": 0.0931, "step": 23779 }, { "epoch": 0.5239991846942879, "grad_norm": 0.5553072690963745, "learning_rate": 1.4562748310985938e-05, "loss": 0.0385, "step": 23780 }, { "epoch": 0.524021219983804, "grad_norm": 0.7518373131752014, "learning_rate": 1.4561678231810564e-05, "loss": 0.0832, "step": 23781 }, { "epoch": 0.5240432552733202, "grad_norm": 0.4095604419708252, "learning_rate": 1.4560608154867797e-05, "loss": 0.0846, "step": 23782 }, { "epoch": 0.5240652905628364, "grad_norm": 1.0437370538711548, "learning_rate": 1.4559538080163089e-05, "loss": 0.1178, "step": 23783 }, { "epoch": 0.5240873258523525, "grad_norm": 0.42473143339157104, "learning_rate": 1.4558468007701886e-05, "loss": 0.0329, "step": 23784 }, { "epoch": 0.5241093611418687, "grad_norm": 0.7615542411804199, "learning_rate": 1.4557397937489646e-05, "loss": 0.0674, "step": 23785 }, { "epoch": 0.5241313964313848, "grad_norm": 0.37803635001182556, "learning_rate": 1.4556327869531815e-05, "loss": 0.0487, "step": 23786 }, { "epoch": 0.524153431720901, "grad_norm": 0.5847317576408386, "learning_rate": 1.4555257803833837e-05, "loss": 0.0654, "step": 23787 }, { "epoch": 0.5241754670104172, "grad_norm": 0.5665585994720459, "learning_rate": 1.4554187740401179e-05, "loss": 0.0749, "step": 23788 }, { "epoch": 0.5241975022999333, "grad_norm": 0.5801910161972046, "learning_rate": 1.4553117679239277e-05, "loss": 0.0576, "step": 23789 }, { "epoch": 0.5242195375894495, "grad_norm": 0.7803645730018616, "learning_rate": 1.455204762035359e-05, "loss": 0.0848, "step": 23790 }, { "epoch": 0.5242415728789657, "grad_norm": 0.960826575756073, "learning_rate": 1.455097756374956e-05, "loss": 0.0884, "step": 23791 }, { "epoch": 0.5242636081684818, "grad_norm": 0.4347909986972809, "learning_rate": 1.4549907509432647e-05, "loss": 0.0727, "step": 23792 }, { "epoch": 0.524285643457998, "grad_norm": 0.5316637754440308, "learning_rate": 1.4548837457408295e-05, "loss": 0.0725, "step": 23793 }, { "epoch": 0.5243076787475142, "grad_norm": 0.5308687686920166, "learning_rate": 1.4547767407681956e-05, "loss": 0.0761, "step": 23794 }, { "epoch": 0.5243297140370303, "grad_norm": 0.8412036895751953, "learning_rate": 1.4546697360259073e-05, "loss": 0.0629, "step": 23795 }, { "epoch": 0.5243517493265465, "grad_norm": 0.7941833734512329, "learning_rate": 1.454562731514511e-05, "loss": 0.1016, "step": 23796 }, { "epoch": 0.5243737846160627, "grad_norm": 0.8278917670249939, "learning_rate": 1.4544557272345511e-05, "loss": 0.0659, "step": 23797 }, { "epoch": 0.5243958199055788, "grad_norm": 0.7999269962310791, "learning_rate": 1.4543487231865727e-05, "loss": 0.1026, "step": 23798 }, { "epoch": 0.524417855195095, "grad_norm": 0.4602844715118408, "learning_rate": 1.4542417193711203e-05, "loss": 0.0699, "step": 23799 }, { "epoch": 0.5244398904846111, "grad_norm": 0.459361732006073, "learning_rate": 1.4541347157887396e-05, "loss": 0.0648, "step": 23800 }, { "epoch": 0.5244619257741273, "grad_norm": 0.39008477330207825, "learning_rate": 1.4540277124399755e-05, "loss": 0.0788, "step": 23801 }, { "epoch": 0.5244839610636435, "grad_norm": 0.6838597059249878, "learning_rate": 1.453920709325373e-05, "loss": 0.0996, "step": 23802 }, { "epoch": 0.5245059963531595, "grad_norm": 0.4610568583011627, "learning_rate": 1.4538137064454764e-05, "loss": 0.1024, "step": 23803 }, { "epoch": 0.5245280316426757, "grad_norm": 0.878348171710968, "learning_rate": 1.4537067038008318e-05, "loss": 0.0679, "step": 23804 }, { "epoch": 0.5245500669321919, "grad_norm": 0.7532339096069336, "learning_rate": 1.453599701391984e-05, "loss": 0.0686, "step": 23805 }, { "epoch": 0.524572102221708, "grad_norm": 0.4245532155036926, "learning_rate": 1.4534926992194777e-05, "loss": 0.0685, "step": 23806 }, { "epoch": 0.5245941375112242, "grad_norm": 0.8105254173278809, "learning_rate": 1.4533856972838573e-05, "loss": 0.0662, "step": 23807 }, { "epoch": 0.5246161728007404, "grad_norm": 0.5366338491439819, "learning_rate": 1.4532786955856692e-05, "loss": 0.0785, "step": 23808 }, { "epoch": 0.5246382080902565, "grad_norm": 0.6975083947181702, "learning_rate": 1.4531716941254582e-05, "loss": 0.1021, "step": 23809 }, { "epoch": 0.5246602433797727, "grad_norm": 0.5036147236824036, "learning_rate": 1.4530646929037678e-05, "loss": 0.0558, "step": 23810 }, { "epoch": 0.5246822786692888, "grad_norm": 0.5936070084571838, "learning_rate": 1.4529576919211447e-05, "loss": 0.0796, "step": 23811 }, { "epoch": 0.524704313958805, "grad_norm": 0.7308490872383118, "learning_rate": 1.4528506911781335e-05, "loss": 0.0692, "step": 23812 }, { "epoch": 0.5247263492483212, "grad_norm": 0.6101308465003967, "learning_rate": 1.452743690675279e-05, "loss": 0.0772, "step": 23813 }, { "epoch": 0.5247483845378373, "grad_norm": 0.5579413771629333, "learning_rate": 1.4526366904131253e-05, "loss": 0.0694, "step": 23814 }, { "epoch": 0.5247704198273535, "grad_norm": 0.8714691400527954, "learning_rate": 1.4525296903922193e-05, "loss": 0.0807, "step": 23815 }, { "epoch": 0.5247924551168697, "grad_norm": 0.6752066612243652, "learning_rate": 1.4524226906131048e-05, "loss": 0.0974, "step": 23816 }, { "epoch": 0.5248144904063858, "grad_norm": 0.6750283241271973, "learning_rate": 1.452315691076327e-05, "loss": 0.0981, "step": 23817 }, { "epoch": 0.524836525695902, "grad_norm": 0.5800110697746277, "learning_rate": 1.452208691782431e-05, "loss": 0.0444, "step": 23818 }, { "epoch": 0.5248585609854182, "grad_norm": 0.954807460308075, "learning_rate": 1.4521016927319618e-05, "loss": 0.0437, "step": 23819 }, { "epoch": 0.5248805962749343, "grad_norm": 0.862966775894165, "learning_rate": 1.4519946939254644e-05, "loss": 0.0624, "step": 23820 }, { "epoch": 0.5249026315644505, "grad_norm": 0.8105518221855164, "learning_rate": 1.4518876953634837e-05, "loss": 0.085, "step": 23821 }, { "epoch": 0.5249246668539667, "grad_norm": 0.759880006313324, "learning_rate": 1.4517806970465643e-05, "loss": 0.0689, "step": 23822 }, { "epoch": 0.5249467021434828, "grad_norm": 0.737359344959259, "learning_rate": 1.4516736989752523e-05, "loss": 0.059, "step": 23823 }, { "epoch": 0.524968737432999, "grad_norm": 0.8247553110122681, "learning_rate": 1.4515667011500919e-05, "loss": 0.0825, "step": 23824 }, { "epoch": 0.5249907727225152, "grad_norm": 0.9850982427597046, "learning_rate": 1.4514597035716283e-05, "loss": 0.0732, "step": 23825 }, { "epoch": 0.5250128080120313, "grad_norm": 0.6281585097312927, "learning_rate": 1.4513527062404061e-05, "loss": 0.0891, "step": 23826 }, { "epoch": 0.5250348433015475, "grad_norm": 0.8612545132637024, "learning_rate": 1.4512457091569713e-05, "loss": 0.0742, "step": 23827 }, { "epoch": 0.5250568785910635, "grad_norm": 0.3501603901386261, "learning_rate": 1.451138712321868e-05, "loss": 0.0677, "step": 23828 }, { "epoch": 0.5250789138805797, "grad_norm": 0.824717104434967, "learning_rate": 1.4510317157356406e-05, "loss": 0.0721, "step": 23829 }, { "epoch": 0.5251009491700959, "grad_norm": 0.5314421653747559, "learning_rate": 1.4509247193988358e-05, "loss": 0.0707, "step": 23830 }, { "epoch": 0.525122984459612, "grad_norm": 0.4998528063297272, "learning_rate": 1.4508177233119977e-05, "loss": 0.0701, "step": 23831 }, { "epoch": 0.5251450197491282, "grad_norm": 0.697625994682312, "learning_rate": 1.4507107274756713e-05, "loss": 0.0897, "step": 23832 }, { "epoch": 0.5251670550386444, "grad_norm": 0.49855881929397583, "learning_rate": 1.4506037318904014e-05, "loss": 0.0576, "step": 23833 }, { "epoch": 0.5251890903281605, "grad_norm": 0.5610367059707642, "learning_rate": 1.4504967365567333e-05, "loss": 0.0691, "step": 23834 }, { "epoch": 0.5252111256176767, "grad_norm": 0.6750076413154602, "learning_rate": 1.450389741475212e-05, "loss": 0.0906, "step": 23835 }, { "epoch": 0.5252331609071929, "grad_norm": 0.25266245007514954, "learning_rate": 1.4502827466463824e-05, "loss": 0.0828, "step": 23836 }, { "epoch": 0.525255196196709, "grad_norm": 0.5606952905654907, "learning_rate": 1.4501757520707886e-05, "loss": 0.0771, "step": 23837 }, { "epoch": 0.5252772314862252, "grad_norm": 0.7107253670692444, "learning_rate": 1.4500687577489771e-05, "loss": 0.1101, "step": 23838 }, { "epoch": 0.5252992667757413, "grad_norm": 1.1126033067703247, "learning_rate": 1.4499617636814924e-05, "loss": 0.0823, "step": 23839 }, { "epoch": 0.5253213020652575, "grad_norm": 0.6271606683731079, "learning_rate": 1.4498547698688791e-05, "loss": 0.0659, "step": 23840 }, { "epoch": 0.5253433373547737, "grad_norm": 0.8842442035675049, "learning_rate": 1.4497477763116819e-05, "loss": 0.0856, "step": 23841 }, { "epoch": 0.5253653726442898, "grad_norm": 0.6053587794303894, "learning_rate": 1.4496407830104467e-05, "loss": 0.0489, "step": 23842 }, { "epoch": 0.525387407933806, "grad_norm": 0.3859078288078308, "learning_rate": 1.4495337899657181e-05, "loss": 0.0756, "step": 23843 }, { "epoch": 0.5254094432233222, "grad_norm": 0.38964420557022095, "learning_rate": 1.4494267971780408e-05, "loss": 0.0661, "step": 23844 }, { "epoch": 0.5254314785128383, "grad_norm": 0.8114281892776489, "learning_rate": 1.4493198046479593e-05, "loss": 0.1017, "step": 23845 }, { "epoch": 0.5254535138023545, "grad_norm": 0.6681833267211914, "learning_rate": 1.4492128123760199e-05, "loss": 0.0537, "step": 23846 }, { "epoch": 0.5254755490918707, "grad_norm": 0.3972938358783722, "learning_rate": 1.4491058203627669e-05, "loss": 0.0526, "step": 23847 }, { "epoch": 0.5254975843813868, "grad_norm": 0.7606290578842163, "learning_rate": 1.4489988286087452e-05, "loss": 0.082, "step": 23848 }, { "epoch": 0.525519619670903, "grad_norm": 0.5906533002853394, "learning_rate": 1.4488918371144994e-05, "loss": 0.087, "step": 23849 }, { "epoch": 0.5255416549604192, "grad_norm": 0.6057451367378235, "learning_rate": 1.4487848458805755e-05, "loss": 0.0837, "step": 23850 }, { "epoch": 0.5255636902499353, "grad_norm": 0.6436817646026611, "learning_rate": 1.4486778549075176e-05, "loss": 0.0889, "step": 23851 }, { "epoch": 0.5255857255394514, "grad_norm": 0.42713409662246704, "learning_rate": 1.4485708641958704e-05, "loss": 0.0658, "step": 23852 }, { "epoch": 0.5256077608289675, "grad_norm": 0.45693346858024597, "learning_rate": 1.44846387374618e-05, "loss": 0.0778, "step": 23853 }, { "epoch": 0.5256297961184837, "grad_norm": 0.526805579662323, "learning_rate": 1.4483568835589905e-05, "loss": 0.0578, "step": 23854 }, { "epoch": 0.5256518314079999, "grad_norm": 0.6212767958641052, "learning_rate": 1.4482498936348474e-05, "loss": 0.076, "step": 23855 }, { "epoch": 0.525673866697516, "grad_norm": 0.8304803967475891, "learning_rate": 1.4481429039742948e-05, "loss": 0.1032, "step": 23856 }, { "epoch": 0.5256959019870322, "grad_norm": 0.4179587960243225, "learning_rate": 1.4480359145778786e-05, "loss": 0.0615, "step": 23857 }, { "epoch": 0.5257179372765484, "grad_norm": 0.6493831276893616, "learning_rate": 1.4479289254461434e-05, "loss": 0.0712, "step": 23858 }, { "epoch": 0.5257399725660645, "grad_norm": 0.5693266987800598, "learning_rate": 1.447821936579634e-05, "loss": 0.0719, "step": 23859 }, { "epoch": 0.5257620078555807, "grad_norm": 0.8959646821022034, "learning_rate": 1.4477149479788948e-05, "loss": 0.1119, "step": 23860 }, { "epoch": 0.5257840431450969, "grad_norm": 0.715610682964325, "learning_rate": 1.4476079596444722e-05, "loss": 0.0649, "step": 23861 }, { "epoch": 0.525806078434613, "grad_norm": 0.4254596531391144, "learning_rate": 1.4475009715769103e-05, "loss": 0.0632, "step": 23862 }, { "epoch": 0.5258281137241292, "grad_norm": 0.764602541923523, "learning_rate": 1.447393983776754e-05, "loss": 0.0822, "step": 23863 }, { "epoch": 0.5258501490136454, "grad_norm": 0.5086377263069153, "learning_rate": 1.4472869962445481e-05, "loss": 0.1137, "step": 23864 }, { "epoch": 0.5258721843031615, "grad_norm": 0.7462278008460999, "learning_rate": 1.4471800089808382e-05, "loss": 0.0854, "step": 23865 }, { "epoch": 0.5258942195926777, "grad_norm": 0.385215699672699, "learning_rate": 1.4470730219861686e-05, "loss": 0.0874, "step": 23866 }, { "epoch": 0.5259162548821938, "grad_norm": 0.6621024012565613, "learning_rate": 1.4469660352610848e-05, "loss": 0.0688, "step": 23867 }, { "epoch": 0.52593829017171, "grad_norm": 0.7326233983039856, "learning_rate": 1.4468590488061306e-05, "loss": 0.0585, "step": 23868 }, { "epoch": 0.5259603254612262, "grad_norm": 0.34182605147361755, "learning_rate": 1.4467520626218524e-05, "loss": 0.067, "step": 23869 }, { "epoch": 0.5259823607507423, "grad_norm": 0.8037694692611694, "learning_rate": 1.4466450767087946e-05, "loss": 0.0811, "step": 23870 }, { "epoch": 0.5260043960402585, "grad_norm": 0.5586363673210144, "learning_rate": 1.446538091067502e-05, "loss": 0.0468, "step": 23871 }, { "epoch": 0.5260264313297747, "grad_norm": 0.7609515190124512, "learning_rate": 1.446431105698519e-05, "loss": 0.0698, "step": 23872 }, { "epoch": 0.5260484666192908, "grad_norm": 0.5184838175773621, "learning_rate": 1.4463241206023916e-05, "loss": 0.0584, "step": 23873 }, { "epoch": 0.526070501908807, "grad_norm": 0.5842844843864441, "learning_rate": 1.4462171357796643e-05, "loss": 0.0844, "step": 23874 }, { "epoch": 0.5260925371983232, "grad_norm": 0.6219222545623779, "learning_rate": 1.4461101512308814e-05, "loss": 0.0701, "step": 23875 }, { "epoch": 0.5261145724878393, "grad_norm": 1.163906455039978, "learning_rate": 1.446003166956589e-05, "loss": 0.081, "step": 23876 }, { "epoch": 0.5261366077773554, "grad_norm": 0.7611828446388245, "learning_rate": 1.4458961829573313e-05, "loss": 0.0788, "step": 23877 }, { "epoch": 0.5261586430668715, "grad_norm": 0.5110177397727966, "learning_rate": 1.4457891992336535e-05, "loss": 0.0601, "step": 23878 }, { "epoch": 0.5261806783563877, "grad_norm": 0.9070239663124084, "learning_rate": 1.4456822157860999e-05, "loss": 0.1118, "step": 23879 }, { "epoch": 0.5262027136459039, "grad_norm": 0.7528945207595825, "learning_rate": 1.4455752326152162e-05, "loss": 0.0866, "step": 23880 }, { "epoch": 0.52622474893542, "grad_norm": 0.7701524496078491, "learning_rate": 1.4454682497215472e-05, "loss": 0.089, "step": 23881 }, { "epoch": 0.5262467842249362, "grad_norm": 0.9113484621047974, "learning_rate": 1.4453612671056375e-05, "loss": 0.0754, "step": 23882 }, { "epoch": 0.5262688195144524, "grad_norm": 0.6645132899284363, "learning_rate": 1.4452542847680317e-05, "loss": 0.0727, "step": 23883 }, { "epoch": 0.5262908548039685, "grad_norm": 0.8775434494018555, "learning_rate": 1.4451473027092757e-05, "loss": 0.0658, "step": 23884 }, { "epoch": 0.5263128900934847, "grad_norm": 0.8086909055709839, "learning_rate": 1.4450403209299139e-05, "loss": 0.0875, "step": 23885 }, { "epoch": 0.5263349253830009, "grad_norm": 0.5074462890625, "learning_rate": 1.4449333394304912e-05, "loss": 0.054, "step": 23886 }, { "epoch": 0.526356960672517, "grad_norm": 0.6767222881317139, "learning_rate": 1.4448263582115523e-05, "loss": 0.0575, "step": 23887 }, { "epoch": 0.5263789959620332, "grad_norm": 0.7260271310806274, "learning_rate": 1.4447193772736427e-05, "loss": 0.0802, "step": 23888 }, { "epoch": 0.5264010312515494, "grad_norm": 0.8085185289382935, "learning_rate": 1.4446123966173068e-05, "loss": 0.0871, "step": 23889 }, { "epoch": 0.5264230665410655, "grad_norm": 0.4767796993255615, "learning_rate": 1.4445054162430896e-05, "loss": 0.0592, "step": 23890 }, { "epoch": 0.5264451018305817, "grad_norm": 0.758622944355011, "learning_rate": 1.4443984361515356e-05, "loss": 0.0802, "step": 23891 }, { "epoch": 0.5264671371200979, "grad_norm": 0.711956262588501, "learning_rate": 1.4442914563431909e-05, "loss": 0.0881, "step": 23892 }, { "epoch": 0.526489172409614, "grad_norm": 0.4402812123298645, "learning_rate": 1.4441844768185996e-05, "loss": 0.0625, "step": 23893 }, { "epoch": 0.5265112076991302, "grad_norm": 0.514543354511261, "learning_rate": 1.4440774975783066e-05, "loss": 0.0634, "step": 23894 }, { "epoch": 0.5265332429886463, "grad_norm": 0.7187220454216003, "learning_rate": 1.4439705186228566e-05, "loss": 0.0828, "step": 23895 }, { "epoch": 0.5265552782781625, "grad_norm": 0.6960951685905457, "learning_rate": 1.4438635399527952e-05, "loss": 0.065, "step": 23896 }, { "epoch": 0.5265773135676787, "grad_norm": 0.6423302292823792, "learning_rate": 1.4437565615686668e-05, "loss": 0.0842, "step": 23897 }, { "epoch": 0.5265993488571948, "grad_norm": 0.573380708694458, "learning_rate": 1.4436495834710157e-05, "loss": 0.0833, "step": 23898 }, { "epoch": 0.526621384146711, "grad_norm": 0.6613307595252991, "learning_rate": 1.4435426056603883e-05, "loss": 0.0562, "step": 23899 }, { "epoch": 0.5266434194362272, "grad_norm": 0.5062395334243774, "learning_rate": 1.4434356281373286e-05, "loss": 0.0445, "step": 23900 }, { "epoch": 0.5266654547257433, "grad_norm": 0.5844756364822388, "learning_rate": 1.4433286509023817e-05, "loss": 0.0747, "step": 23901 }, { "epoch": 0.5266874900152594, "grad_norm": 0.6174210906028748, "learning_rate": 1.4432216739560918e-05, "loss": 0.0985, "step": 23902 }, { "epoch": 0.5267095253047755, "grad_norm": 0.8443540334701538, "learning_rate": 1.4431146972990049e-05, "loss": 0.1282, "step": 23903 }, { "epoch": 0.5267315605942917, "grad_norm": 0.30975115299224854, "learning_rate": 1.4430077209316654e-05, "loss": 0.0575, "step": 23904 }, { "epoch": 0.5267535958838079, "grad_norm": 0.2736152410507202, "learning_rate": 1.4429007448546178e-05, "loss": 0.051, "step": 23905 }, { "epoch": 0.526775631173324, "grad_norm": 0.3923630714416504, "learning_rate": 1.442793769068407e-05, "loss": 0.0637, "step": 23906 }, { "epoch": 0.5267976664628402, "grad_norm": 0.4528963267803192, "learning_rate": 1.4426867935735788e-05, "loss": 0.0366, "step": 23907 }, { "epoch": 0.5268197017523564, "grad_norm": 0.494952529668808, "learning_rate": 1.4425798183706773e-05, "loss": 0.0765, "step": 23908 }, { "epoch": 0.5268417370418725, "grad_norm": 1.580161452293396, "learning_rate": 1.4424728434602477e-05, "loss": 0.0431, "step": 23909 }, { "epoch": 0.5268637723313887, "grad_norm": 0.4334549009799957, "learning_rate": 1.4423658688428344e-05, "loss": 0.0463, "step": 23910 }, { "epoch": 0.5268858076209049, "grad_norm": 0.725546658039093, "learning_rate": 1.4422588945189831e-05, "loss": 0.0512, "step": 23911 }, { "epoch": 0.526907842910421, "grad_norm": 0.7807321548461914, "learning_rate": 1.4421519204892382e-05, "loss": 0.0631, "step": 23912 }, { "epoch": 0.5269298781999372, "grad_norm": 0.7677041888237, "learning_rate": 1.4420449467541445e-05, "loss": 0.1077, "step": 23913 }, { "epoch": 0.5269519134894534, "grad_norm": 0.5959102511405945, "learning_rate": 1.4419379733142463e-05, "loss": 0.063, "step": 23914 }, { "epoch": 0.5269739487789695, "grad_norm": 0.5597962141036987, "learning_rate": 1.4418310001700898e-05, "loss": 0.0751, "step": 23915 }, { "epoch": 0.5269959840684857, "grad_norm": 0.5542335510253906, "learning_rate": 1.4417240273222193e-05, "loss": 0.0617, "step": 23916 }, { "epoch": 0.5270180193580019, "grad_norm": 0.6285355687141418, "learning_rate": 1.4416170547711794e-05, "loss": 0.0675, "step": 23917 }, { "epoch": 0.527040054647518, "grad_norm": 0.6125801801681519, "learning_rate": 1.4415100825175147e-05, "loss": 0.0661, "step": 23918 }, { "epoch": 0.5270620899370342, "grad_norm": 0.36403021216392517, "learning_rate": 1.441403110561771e-05, "loss": 0.0545, "step": 23919 }, { "epoch": 0.5270841252265503, "grad_norm": 0.49846506118774414, "learning_rate": 1.441296138904493e-05, "loss": 0.0642, "step": 23920 }, { "epoch": 0.5271061605160665, "grad_norm": 0.5263556838035583, "learning_rate": 1.4411891675462243e-05, "loss": 0.0641, "step": 23921 }, { "epoch": 0.5271281958055827, "grad_norm": 0.572912335395813, "learning_rate": 1.4410821964875114e-05, "loss": 0.0432, "step": 23922 }, { "epoch": 0.5271502310950988, "grad_norm": 0.38116979598999023, "learning_rate": 1.4409752257288986e-05, "loss": 0.0785, "step": 23923 }, { "epoch": 0.527172266384615, "grad_norm": 0.6262803673744202, "learning_rate": 1.4408682552709305e-05, "loss": 0.0783, "step": 23924 }, { "epoch": 0.5271943016741312, "grad_norm": 0.4679287374019623, "learning_rate": 1.4407612851141515e-05, "loss": 0.0474, "step": 23925 }, { "epoch": 0.5272163369636473, "grad_norm": 0.6724183559417725, "learning_rate": 1.4406543152591074e-05, "loss": 0.0914, "step": 23926 }, { "epoch": 0.5272383722531634, "grad_norm": 0.5773499608039856, "learning_rate": 1.4405473457063431e-05, "loss": 0.0849, "step": 23927 }, { "epoch": 0.5272604075426796, "grad_norm": 0.615675687789917, "learning_rate": 1.4404403764564028e-05, "loss": 0.0618, "step": 23928 }, { "epoch": 0.5272824428321957, "grad_norm": 0.9233871102333069, "learning_rate": 1.4403334075098313e-05, "loss": 0.0862, "step": 23929 }, { "epoch": 0.5273044781217119, "grad_norm": 0.7900962829589844, "learning_rate": 1.4402264388671741e-05, "loss": 0.1161, "step": 23930 }, { "epoch": 0.527326513411228, "grad_norm": 0.5712472200393677, "learning_rate": 1.4401194705289758e-05, "loss": 0.103, "step": 23931 }, { "epoch": 0.5273485487007442, "grad_norm": 0.5082858800888062, "learning_rate": 1.440012502495781e-05, "loss": 0.0774, "step": 23932 }, { "epoch": 0.5273705839902604, "grad_norm": 1.188707947731018, "learning_rate": 1.4399055347681342e-05, "loss": 0.0963, "step": 23933 }, { "epoch": 0.5273926192797765, "grad_norm": 0.7185783982276917, "learning_rate": 1.439798567346581e-05, "loss": 0.0813, "step": 23934 }, { "epoch": 0.5274146545692927, "grad_norm": 0.5062659978866577, "learning_rate": 1.4396916002316665e-05, "loss": 0.0415, "step": 23935 }, { "epoch": 0.5274366898588089, "grad_norm": 0.6722710132598877, "learning_rate": 1.4395846334239348e-05, "loss": 0.0783, "step": 23936 }, { "epoch": 0.527458725148325, "grad_norm": 0.5554061532020569, "learning_rate": 1.4394776669239306e-05, "loss": 0.0751, "step": 23937 }, { "epoch": 0.5274807604378412, "grad_norm": 0.673872709274292, "learning_rate": 1.4393707007321995e-05, "loss": 0.0764, "step": 23938 }, { "epoch": 0.5275027957273574, "grad_norm": 0.6676788926124573, "learning_rate": 1.4392637348492858e-05, "loss": 0.0868, "step": 23939 }, { "epoch": 0.5275248310168735, "grad_norm": 0.7633715867996216, "learning_rate": 1.4391567692757345e-05, "loss": 0.0841, "step": 23940 }, { "epoch": 0.5275468663063897, "grad_norm": 0.5221190452575684, "learning_rate": 1.4390498040120898e-05, "loss": 0.0675, "step": 23941 }, { "epoch": 0.5275689015959059, "grad_norm": 0.8654349446296692, "learning_rate": 1.4389428390588979e-05, "loss": 0.0615, "step": 23942 }, { "epoch": 0.527590936885422, "grad_norm": 0.5214987993240356, "learning_rate": 1.4388358744167028e-05, "loss": 0.0613, "step": 23943 }, { "epoch": 0.5276129721749382, "grad_norm": 0.5861511826515198, "learning_rate": 1.4387289100860489e-05, "loss": 0.0762, "step": 23944 }, { "epoch": 0.5276350074644544, "grad_norm": 0.8458799123764038, "learning_rate": 1.438621946067482e-05, "loss": 0.0904, "step": 23945 }, { "epoch": 0.5276570427539705, "grad_norm": 0.6206072568893433, "learning_rate": 1.4385149823615462e-05, "loss": 0.0638, "step": 23946 }, { "epoch": 0.5276790780434867, "grad_norm": 0.4958368241786957, "learning_rate": 1.4384080189687869e-05, "loss": 0.0538, "step": 23947 }, { "epoch": 0.5277011133330028, "grad_norm": 0.44993576407432556, "learning_rate": 1.4383010558897479e-05, "loss": 0.0329, "step": 23948 }, { "epoch": 0.527723148622519, "grad_norm": 1.0896323919296265, "learning_rate": 1.438194093124975e-05, "loss": 0.0952, "step": 23949 }, { "epoch": 0.5277451839120352, "grad_norm": 0.45347079634666443, "learning_rate": 1.438087130675013e-05, "loss": 0.115, "step": 23950 }, { "epoch": 0.5277672192015512, "grad_norm": 0.7893814444541931, "learning_rate": 1.4379801685404065e-05, "loss": 0.0952, "step": 23951 }, { "epoch": 0.5277892544910674, "grad_norm": 0.4069610834121704, "learning_rate": 1.4378732067216998e-05, "loss": 0.0701, "step": 23952 }, { "epoch": 0.5278112897805836, "grad_norm": 0.5383341312408447, "learning_rate": 1.4377662452194386e-05, "loss": 0.0554, "step": 23953 }, { "epoch": 0.5278333250700997, "grad_norm": 0.5306621789932251, "learning_rate": 1.4376592840341672e-05, "loss": 0.0994, "step": 23954 }, { "epoch": 0.5278553603596159, "grad_norm": 0.7582114934921265, "learning_rate": 1.4375523231664307e-05, "loss": 0.0823, "step": 23955 }, { "epoch": 0.527877395649132, "grad_norm": 0.9321688413619995, "learning_rate": 1.4374453626167728e-05, "loss": 0.1094, "step": 23956 }, { "epoch": 0.5278994309386482, "grad_norm": 0.8799129128456116, "learning_rate": 1.43733840238574e-05, "loss": 0.093, "step": 23957 }, { "epoch": 0.5279214662281644, "grad_norm": 0.3962092101573944, "learning_rate": 1.4372314424738762e-05, "loss": 0.056, "step": 23958 }, { "epoch": 0.5279435015176805, "grad_norm": 0.5803810954093933, "learning_rate": 1.4371244828817264e-05, "loss": 0.067, "step": 23959 }, { "epoch": 0.5279655368071967, "grad_norm": 0.6711086630821228, "learning_rate": 1.437017523609835e-05, "loss": 0.0662, "step": 23960 }, { "epoch": 0.5279875720967129, "grad_norm": 0.6918203234672546, "learning_rate": 1.4369105646587476e-05, "loss": 0.0772, "step": 23961 }, { "epoch": 0.528009607386229, "grad_norm": 0.44895437359809875, "learning_rate": 1.4368036060290083e-05, "loss": 0.0805, "step": 23962 }, { "epoch": 0.5280316426757452, "grad_norm": 0.3566529452800751, "learning_rate": 1.4366966477211615e-05, "loss": 0.073, "step": 23963 }, { "epoch": 0.5280536779652614, "grad_norm": 0.6362183094024658, "learning_rate": 1.4365896897357533e-05, "loss": 0.0596, "step": 23964 }, { "epoch": 0.5280757132547775, "grad_norm": 0.4576544165611267, "learning_rate": 1.4364827320733278e-05, "loss": 0.0597, "step": 23965 }, { "epoch": 0.5280977485442937, "grad_norm": 0.512097954750061, "learning_rate": 1.4363757747344298e-05, "loss": 0.0724, "step": 23966 }, { "epoch": 0.5281197838338099, "grad_norm": 0.4687756299972534, "learning_rate": 1.436268817719604e-05, "loss": 0.059, "step": 23967 }, { "epoch": 0.528141819123326, "grad_norm": 0.6866800785064697, "learning_rate": 1.4361618610293953e-05, "loss": 0.0736, "step": 23968 }, { "epoch": 0.5281638544128422, "grad_norm": 0.5792249441146851, "learning_rate": 1.4360549046643485e-05, "loss": 0.055, "step": 23969 }, { "epoch": 0.5281858897023584, "grad_norm": 0.9765554666519165, "learning_rate": 1.4359479486250086e-05, "loss": 0.0821, "step": 23970 }, { "epoch": 0.5282079249918745, "grad_norm": 0.6399824023246765, "learning_rate": 1.4358409929119192e-05, "loss": 0.0542, "step": 23971 }, { "epoch": 0.5282299602813907, "grad_norm": 0.3620666563510895, "learning_rate": 1.4357340375256268e-05, "loss": 0.1036, "step": 23972 }, { "epoch": 0.5282519955709069, "grad_norm": 0.32058677077293396, "learning_rate": 1.4356270824666756e-05, "loss": 0.0793, "step": 23973 }, { "epoch": 0.528274030860423, "grad_norm": 0.6778358817100525, "learning_rate": 1.43552012773561e-05, "loss": 0.084, "step": 23974 }, { "epoch": 0.5282960661499392, "grad_norm": 0.8608207106590271, "learning_rate": 1.4354131733329747e-05, "loss": 0.1122, "step": 23975 }, { "epoch": 0.5283181014394552, "grad_norm": 0.5372586250305176, "learning_rate": 1.435306219259315e-05, "loss": 0.0803, "step": 23976 }, { "epoch": 0.5283401367289714, "grad_norm": 0.6575307250022888, "learning_rate": 1.4351992655151754e-05, "loss": 0.068, "step": 23977 }, { "epoch": 0.5283621720184876, "grad_norm": 0.5152378678321838, "learning_rate": 1.4350923121011008e-05, "loss": 0.0782, "step": 23978 }, { "epoch": 0.5283842073080037, "grad_norm": 0.5646185278892517, "learning_rate": 1.434985359017635e-05, "loss": 0.0803, "step": 23979 }, { "epoch": 0.5284062425975199, "grad_norm": 0.8380458950996399, "learning_rate": 1.4348784062653245e-05, "loss": 0.0906, "step": 23980 }, { "epoch": 0.5284282778870361, "grad_norm": 0.6936518549919128, "learning_rate": 1.4347714538447132e-05, "loss": 0.0818, "step": 23981 }, { "epoch": 0.5284503131765522, "grad_norm": 0.7719066739082336, "learning_rate": 1.4346645017563457e-05, "loss": 0.0832, "step": 23982 }, { "epoch": 0.5284723484660684, "grad_norm": 0.9069734215736389, "learning_rate": 1.4345575500007669e-05, "loss": 0.0796, "step": 23983 }, { "epoch": 0.5284943837555846, "grad_norm": 0.7109729051589966, "learning_rate": 1.4344505985785214e-05, "loss": 0.1167, "step": 23984 }, { "epoch": 0.5285164190451007, "grad_norm": 1.3507479429244995, "learning_rate": 1.4343436474901547e-05, "loss": 0.0914, "step": 23985 }, { "epoch": 0.5285384543346169, "grad_norm": 0.7563494443893433, "learning_rate": 1.4342366967362101e-05, "loss": 0.1232, "step": 23986 }, { "epoch": 0.528560489624133, "grad_norm": 0.5499937534332275, "learning_rate": 1.4341297463172338e-05, "loss": 0.1097, "step": 23987 }, { "epoch": 0.5285825249136492, "grad_norm": 0.553489625453949, "learning_rate": 1.4340227962337704e-05, "loss": 0.0859, "step": 23988 }, { "epoch": 0.5286045602031654, "grad_norm": 0.6649671792984009, "learning_rate": 1.433915846486364e-05, "loss": 0.0694, "step": 23989 }, { "epoch": 0.5286265954926815, "grad_norm": 0.677325427532196, "learning_rate": 1.4338088970755594e-05, "loss": 0.088, "step": 23990 }, { "epoch": 0.5286486307821977, "grad_norm": 0.6231722235679626, "learning_rate": 1.4337019480019018e-05, "loss": 0.0787, "step": 23991 }, { "epoch": 0.5286706660717139, "grad_norm": 0.8029872179031372, "learning_rate": 1.4335949992659356e-05, "loss": 0.0937, "step": 23992 }, { "epoch": 0.52869270136123, "grad_norm": 0.3686777949333191, "learning_rate": 1.433488050868206e-05, "loss": 0.0534, "step": 23993 }, { "epoch": 0.5287147366507462, "grad_norm": 0.43177542090415955, "learning_rate": 1.4333811028092566e-05, "loss": 0.0812, "step": 23994 }, { "epoch": 0.5287367719402624, "grad_norm": 0.4823814630508423, "learning_rate": 1.4332741550896337e-05, "loss": 0.0541, "step": 23995 }, { "epoch": 0.5287588072297785, "grad_norm": 0.8039636015892029, "learning_rate": 1.4331672077098813e-05, "loss": 0.1092, "step": 23996 }, { "epoch": 0.5287808425192947, "grad_norm": 0.9310533404350281, "learning_rate": 1.4330602606705441e-05, "loss": 0.0874, "step": 23997 }, { "epoch": 0.5288028778088109, "grad_norm": 0.571425199508667, "learning_rate": 1.4329533139721665e-05, "loss": 0.0488, "step": 23998 }, { "epoch": 0.528824913098327, "grad_norm": 0.43109509348869324, "learning_rate": 1.4328463676152941e-05, "loss": 0.0742, "step": 23999 }, { "epoch": 0.5288469483878432, "grad_norm": 0.47442305088043213, "learning_rate": 1.432739421600471e-05, "loss": 0.07, "step": 24000 }, { "epoch": 0.5288689836773592, "grad_norm": 0.5230516195297241, "learning_rate": 1.4326324759282422e-05, "loss": 0.062, "step": 24001 }, { "epoch": 0.5288910189668754, "grad_norm": 0.6325511932373047, "learning_rate": 1.4325255305991518e-05, "loss": 0.0865, "step": 24002 }, { "epoch": 0.5289130542563916, "grad_norm": 0.7869532704353333, "learning_rate": 1.4324185856137455e-05, "loss": 0.0734, "step": 24003 }, { "epoch": 0.5289350895459077, "grad_norm": 0.5372092723846436, "learning_rate": 1.4323116409725675e-05, "loss": 0.0651, "step": 24004 }, { "epoch": 0.5289571248354239, "grad_norm": 0.7126482725143433, "learning_rate": 1.4322046966761629e-05, "loss": 0.1109, "step": 24005 }, { "epoch": 0.5289791601249401, "grad_norm": 0.9315067529678345, "learning_rate": 1.4320977527250757e-05, "loss": 0.084, "step": 24006 }, { "epoch": 0.5290011954144562, "grad_norm": 0.701728105545044, "learning_rate": 1.4319908091198513e-05, "loss": 0.0504, "step": 24007 }, { "epoch": 0.5290232307039724, "grad_norm": 0.6769034266471863, "learning_rate": 1.4318838658610343e-05, "loss": 0.0816, "step": 24008 }, { "epoch": 0.5290452659934886, "grad_norm": 0.825584888458252, "learning_rate": 1.4317769229491685e-05, "loss": 0.0853, "step": 24009 }, { "epoch": 0.5290673012830047, "grad_norm": 0.852439284324646, "learning_rate": 1.4316699803848001e-05, "loss": 0.0873, "step": 24010 }, { "epoch": 0.5290893365725209, "grad_norm": 0.5348724126815796, "learning_rate": 1.4315630381684733e-05, "loss": 0.0621, "step": 24011 }, { "epoch": 0.529111371862037, "grad_norm": 0.8079448938369751, "learning_rate": 1.4314560963007325e-05, "loss": 0.0956, "step": 24012 }, { "epoch": 0.5291334071515532, "grad_norm": 0.6673167943954468, "learning_rate": 1.4313491547821223e-05, "loss": 0.0896, "step": 24013 }, { "epoch": 0.5291554424410694, "grad_norm": 0.7763359546661377, "learning_rate": 1.4312422136131878e-05, "loss": 0.0614, "step": 24014 }, { "epoch": 0.5291774777305855, "grad_norm": 1.3021544218063354, "learning_rate": 1.4311352727944737e-05, "loss": 0.0883, "step": 24015 }, { "epoch": 0.5291995130201017, "grad_norm": 0.5896070599555969, "learning_rate": 1.4310283323265246e-05, "loss": 0.0908, "step": 24016 }, { "epoch": 0.5292215483096179, "grad_norm": 0.6634376049041748, "learning_rate": 1.4309213922098846e-05, "loss": 0.0528, "step": 24017 }, { "epoch": 0.529243583599134, "grad_norm": 0.6201992034912109, "learning_rate": 1.4308144524450996e-05, "loss": 0.0651, "step": 24018 }, { "epoch": 0.5292656188886502, "grad_norm": 0.39444518089294434, "learning_rate": 1.4307075130327136e-05, "loss": 0.0866, "step": 24019 }, { "epoch": 0.5292876541781664, "grad_norm": 0.4594488739967346, "learning_rate": 1.4306005739732716e-05, "loss": 0.0633, "step": 24020 }, { "epoch": 0.5293096894676825, "grad_norm": 0.461880624294281, "learning_rate": 1.4304936352673173e-05, "loss": 0.0379, "step": 24021 }, { "epoch": 0.5293317247571987, "grad_norm": 0.7477598786354065, "learning_rate": 1.430386696915397e-05, "loss": 0.0824, "step": 24022 }, { "epoch": 0.5293537600467149, "grad_norm": 0.560461163520813, "learning_rate": 1.4302797589180544e-05, "loss": 0.0571, "step": 24023 }, { "epoch": 0.529375795336231, "grad_norm": 0.47259050607681274, "learning_rate": 1.4301728212758346e-05, "loss": 0.075, "step": 24024 }, { "epoch": 0.5293978306257471, "grad_norm": 0.4958370327949524, "learning_rate": 1.4300658839892812e-05, "loss": 0.0755, "step": 24025 }, { "epoch": 0.5294198659152632, "grad_norm": 0.5702357292175293, "learning_rate": 1.4299589470589404e-05, "loss": 0.0628, "step": 24026 }, { "epoch": 0.5294419012047794, "grad_norm": 0.4771520793437958, "learning_rate": 1.4298520104853563e-05, "loss": 0.0584, "step": 24027 }, { "epoch": 0.5294639364942956, "grad_norm": 0.6416261196136475, "learning_rate": 1.4297450742690734e-05, "loss": 0.0493, "step": 24028 }, { "epoch": 0.5294859717838117, "grad_norm": 0.7606553435325623, "learning_rate": 1.429638138410636e-05, "loss": 0.0897, "step": 24029 }, { "epoch": 0.5295080070733279, "grad_norm": 0.4683094620704651, "learning_rate": 1.42953120291059e-05, "loss": 0.0789, "step": 24030 }, { "epoch": 0.5295300423628441, "grad_norm": 0.8784950375556946, "learning_rate": 1.4294242677694792e-05, "loss": 0.1031, "step": 24031 }, { "epoch": 0.5295520776523602, "grad_norm": 0.5733307003974915, "learning_rate": 1.4293173329878484e-05, "loss": 0.1121, "step": 24032 }, { "epoch": 0.5295741129418764, "grad_norm": 0.4352293610572815, "learning_rate": 1.4292103985662423e-05, "loss": 0.0833, "step": 24033 }, { "epoch": 0.5295961482313926, "grad_norm": 0.4988047480583191, "learning_rate": 1.4291034645052058e-05, "loss": 0.0491, "step": 24034 }, { "epoch": 0.5296181835209087, "grad_norm": 0.5363315343856812, "learning_rate": 1.4289965308052833e-05, "loss": 0.0695, "step": 24035 }, { "epoch": 0.5296402188104249, "grad_norm": 0.6450173258781433, "learning_rate": 1.4288895974670188e-05, "loss": 0.0737, "step": 24036 }, { "epoch": 0.529662254099941, "grad_norm": 0.44327425956726074, "learning_rate": 1.4287826644909587e-05, "loss": 0.0633, "step": 24037 }, { "epoch": 0.5296842893894572, "grad_norm": 0.7097384929656982, "learning_rate": 1.4286757318776465e-05, "loss": 0.0799, "step": 24038 }, { "epoch": 0.5297063246789734, "grad_norm": 0.44059908390045166, "learning_rate": 1.4285687996276269e-05, "loss": 0.0516, "step": 24039 }, { "epoch": 0.5297283599684895, "grad_norm": 0.9496634006500244, "learning_rate": 1.4284618677414445e-05, "loss": 0.0528, "step": 24040 }, { "epoch": 0.5297503952580057, "grad_norm": 0.951092541217804, "learning_rate": 1.4283549362196445e-05, "loss": 0.0735, "step": 24041 }, { "epoch": 0.5297724305475219, "grad_norm": 0.3495687246322632, "learning_rate": 1.4282480050627713e-05, "loss": 0.0531, "step": 24042 }, { "epoch": 0.529794465837038, "grad_norm": 0.7116972804069519, "learning_rate": 1.4281410742713692e-05, "loss": 0.0547, "step": 24043 }, { "epoch": 0.5298165011265542, "grad_norm": 0.7107669711112976, "learning_rate": 1.4280341438459827e-05, "loss": 0.082, "step": 24044 }, { "epoch": 0.5298385364160704, "grad_norm": 1.0283584594726562, "learning_rate": 1.4279272137871576e-05, "loss": 0.0972, "step": 24045 }, { "epoch": 0.5298605717055865, "grad_norm": 0.5842780470848083, "learning_rate": 1.4278202840954378e-05, "loss": 0.1106, "step": 24046 }, { "epoch": 0.5298826069951027, "grad_norm": 0.7994417548179626, "learning_rate": 1.427713354771368e-05, "loss": 0.0963, "step": 24047 }, { "epoch": 0.5299046422846189, "grad_norm": 0.9163094162940979, "learning_rate": 1.4276064258154924e-05, "loss": 0.0609, "step": 24048 }, { "epoch": 0.529926677574135, "grad_norm": 0.6579210758209229, "learning_rate": 1.4274994972283565e-05, "loss": 0.0737, "step": 24049 }, { "epoch": 0.5299487128636511, "grad_norm": 0.44483864307403564, "learning_rate": 1.4273925690105046e-05, "loss": 0.0692, "step": 24050 }, { "epoch": 0.5299707481531672, "grad_norm": 0.6006047129631042, "learning_rate": 1.4272856411624812e-05, "loss": 0.0983, "step": 24051 }, { "epoch": 0.5299927834426834, "grad_norm": 0.47992461919784546, "learning_rate": 1.4271787136848305e-05, "loss": 0.0631, "step": 24052 }, { "epoch": 0.5300148187321996, "grad_norm": 0.7182474732398987, "learning_rate": 1.4270717865780982e-05, "loss": 0.0476, "step": 24053 }, { "epoch": 0.5300368540217157, "grad_norm": 0.5332654714584351, "learning_rate": 1.4269648598428285e-05, "loss": 0.0536, "step": 24054 }, { "epoch": 0.5300588893112319, "grad_norm": 0.6416850090026855, "learning_rate": 1.4268579334795654e-05, "loss": 0.0929, "step": 24055 }, { "epoch": 0.5300809246007481, "grad_norm": 0.8315443396568298, "learning_rate": 1.4267510074888547e-05, "loss": 0.0755, "step": 24056 }, { "epoch": 0.5301029598902642, "grad_norm": 0.6347345113754272, "learning_rate": 1.4266440818712401e-05, "loss": 0.0852, "step": 24057 }, { "epoch": 0.5301249951797804, "grad_norm": 0.9346219301223755, "learning_rate": 1.4265371566272667e-05, "loss": 0.0772, "step": 24058 }, { "epoch": 0.5301470304692966, "grad_norm": 0.851200520992279, "learning_rate": 1.4264302317574782e-05, "loss": 0.0649, "step": 24059 }, { "epoch": 0.5301690657588127, "grad_norm": 0.3659990131855011, "learning_rate": 1.4263233072624208e-05, "loss": 0.054, "step": 24060 }, { "epoch": 0.5301911010483289, "grad_norm": 0.7842218279838562, "learning_rate": 1.4262163831426381e-05, "loss": 0.0498, "step": 24061 }, { "epoch": 0.5302131363378451, "grad_norm": 0.5558714866638184, "learning_rate": 1.426109459398675e-05, "loss": 0.0673, "step": 24062 }, { "epoch": 0.5302351716273612, "grad_norm": 0.4641015827655792, "learning_rate": 1.4260025360310759e-05, "loss": 0.0638, "step": 24063 }, { "epoch": 0.5302572069168774, "grad_norm": 0.6005699634552002, "learning_rate": 1.4258956130403856e-05, "loss": 0.0597, "step": 24064 }, { "epoch": 0.5302792422063936, "grad_norm": 0.8440539836883545, "learning_rate": 1.4257886904271488e-05, "loss": 0.0875, "step": 24065 }, { "epoch": 0.5303012774959097, "grad_norm": 0.4171743392944336, "learning_rate": 1.4256817681919099e-05, "loss": 0.0989, "step": 24066 }, { "epoch": 0.5303233127854259, "grad_norm": 0.7584100961685181, "learning_rate": 1.4255748463352131e-05, "loss": 0.0743, "step": 24067 }, { "epoch": 0.530345348074942, "grad_norm": 0.41078248620033264, "learning_rate": 1.4254679248576044e-05, "loss": 0.0736, "step": 24068 }, { "epoch": 0.5303673833644582, "grad_norm": 0.6778038740158081, "learning_rate": 1.4253610037596271e-05, "loss": 0.0923, "step": 24069 }, { "epoch": 0.5303894186539744, "grad_norm": 0.653331995010376, "learning_rate": 1.4252540830418264e-05, "loss": 0.0696, "step": 24070 }, { "epoch": 0.5304114539434905, "grad_norm": 0.4741288125514984, "learning_rate": 1.4251471627047464e-05, "loss": 0.0614, "step": 24071 }, { "epoch": 0.5304334892330067, "grad_norm": 0.6689926981925964, "learning_rate": 1.4250402427489323e-05, "loss": 0.0995, "step": 24072 }, { "epoch": 0.5304555245225229, "grad_norm": 0.5118910670280457, "learning_rate": 1.4249333231749286e-05, "loss": 0.0661, "step": 24073 }, { "epoch": 0.530477559812039, "grad_norm": 0.8332403898239136, "learning_rate": 1.4248264039832797e-05, "loss": 0.0958, "step": 24074 }, { "epoch": 0.5304995951015551, "grad_norm": 0.5129744410514832, "learning_rate": 1.4247194851745293e-05, "loss": 0.0471, "step": 24075 }, { "epoch": 0.5305216303910713, "grad_norm": 1.20685613155365, "learning_rate": 1.424612566749224e-05, "loss": 0.1082, "step": 24076 }, { "epoch": 0.5305436656805874, "grad_norm": 0.7253837585449219, "learning_rate": 1.4245056487079071e-05, "loss": 0.0678, "step": 24077 }, { "epoch": 0.5305657009701036, "grad_norm": 0.9768562912940979, "learning_rate": 1.424398731051123e-05, "loss": 0.0845, "step": 24078 }, { "epoch": 0.5305877362596197, "grad_norm": 0.5160112380981445, "learning_rate": 1.4242918137794173e-05, "loss": 0.0757, "step": 24079 }, { "epoch": 0.5306097715491359, "grad_norm": 0.4647740423679352, "learning_rate": 1.4241848968933337e-05, "loss": 0.049, "step": 24080 }, { "epoch": 0.5306318068386521, "grad_norm": 0.7289013862609863, "learning_rate": 1.4240779803934173e-05, "loss": 0.0924, "step": 24081 }, { "epoch": 0.5306538421281682, "grad_norm": 0.43648749589920044, "learning_rate": 1.4239710642802117e-05, "loss": 0.097, "step": 24082 }, { "epoch": 0.5306758774176844, "grad_norm": 0.44212305545806885, "learning_rate": 1.423864148554263e-05, "loss": 0.0576, "step": 24083 }, { "epoch": 0.5306979127072006, "grad_norm": 0.9037903547286987, "learning_rate": 1.4237572332161149e-05, "loss": 0.1145, "step": 24084 }, { "epoch": 0.5307199479967167, "grad_norm": 0.5828554034233093, "learning_rate": 1.423650318266312e-05, "loss": 0.0884, "step": 24085 }, { "epoch": 0.5307419832862329, "grad_norm": 0.29879531264305115, "learning_rate": 1.4235434037053989e-05, "loss": 0.0584, "step": 24086 }, { "epoch": 0.5307640185757491, "grad_norm": 0.42804402112960815, "learning_rate": 1.4234364895339204e-05, "loss": 0.0747, "step": 24087 }, { "epoch": 0.5307860538652652, "grad_norm": 0.5430418252944946, "learning_rate": 1.4233295757524209e-05, "loss": 0.0894, "step": 24088 }, { "epoch": 0.5308080891547814, "grad_norm": 0.5801957249641418, "learning_rate": 1.4232226623614452e-05, "loss": 0.0812, "step": 24089 }, { "epoch": 0.5308301244442976, "grad_norm": 0.5709904432296753, "learning_rate": 1.4231157493615368e-05, "loss": 0.0733, "step": 24090 }, { "epoch": 0.5308521597338137, "grad_norm": 0.8073480725288391, "learning_rate": 1.4230088367532419e-05, "loss": 0.0643, "step": 24091 }, { "epoch": 0.5308741950233299, "grad_norm": 0.7558793425559998, "learning_rate": 1.4229019245371041e-05, "loss": 0.0877, "step": 24092 }, { "epoch": 0.530896230312846, "grad_norm": 0.6298035383224487, "learning_rate": 1.4227950127136682e-05, "loss": 0.0762, "step": 24093 }, { "epoch": 0.5309182656023622, "grad_norm": 0.5752454996109009, "learning_rate": 1.4226881012834784e-05, "loss": 0.0805, "step": 24094 }, { "epoch": 0.5309403008918784, "grad_norm": 0.6022166013717651, "learning_rate": 1.42258119024708e-05, "loss": 0.0831, "step": 24095 }, { "epoch": 0.5309623361813945, "grad_norm": 0.5368829965591431, "learning_rate": 1.4224742796050169e-05, "loss": 0.0738, "step": 24096 }, { "epoch": 0.5309843714709107, "grad_norm": 0.5214838981628418, "learning_rate": 1.4223673693578335e-05, "loss": 0.0766, "step": 24097 }, { "epoch": 0.5310064067604269, "grad_norm": 0.5705913305282593, "learning_rate": 1.422260459506075e-05, "loss": 0.0533, "step": 24098 }, { "epoch": 0.531028442049943, "grad_norm": 0.3889229893684387, "learning_rate": 1.422153550050286e-05, "loss": 0.073, "step": 24099 }, { "epoch": 0.5310504773394591, "grad_norm": 0.9724746942520142, "learning_rate": 1.4220466409910105e-05, "loss": 0.0748, "step": 24100 }, { "epoch": 0.5310725126289753, "grad_norm": 0.6122474670410156, "learning_rate": 1.421939732328793e-05, "loss": 0.0599, "step": 24101 }, { "epoch": 0.5310945479184914, "grad_norm": 0.9253201484680176, "learning_rate": 1.4218328240641787e-05, "loss": 0.067, "step": 24102 }, { "epoch": 0.5311165832080076, "grad_norm": 0.6163077354431152, "learning_rate": 1.4217259161977118e-05, "loss": 0.0738, "step": 24103 }, { "epoch": 0.5311386184975238, "grad_norm": 0.7812986969947815, "learning_rate": 1.4216190087299367e-05, "loss": 0.1, "step": 24104 }, { "epoch": 0.5311606537870399, "grad_norm": 0.5757312178611755, "learning_rate": 1.4215121016613971e-05, "loss": 0.0969, "step": 24105 }, { "epoch": 0.5311826890765561, "grad_norm": 0.9362503886222839, "learning_rate": 1.4214051949926394e-05, "loss": 0.106, "step": 24106 }, { "epoch": 0.5312047243660722, "grad_norm": 0.3601226210594177, "learning_rate": 1.421298288724207e-05, "loss": 0.0732, "step": 24107 }, { "epoch": 0.5312267596555884, "grad_norm": 0.4454745948314667, "learning_rate": 1.4211913828566449e-05, "loss": 0.0651, "step": 24108 }, { "epoch": 0.5312487949451046, "grad_norm": 0.5910677313804626, "learning_rate": 1.4210844773904968e-05, "loss": 0.082, "step": 24109 }, { "epoch": 0.5312708302346207, "grad_norm": 0.3424248695373535, "learning_rate": 1.4209775723263083e-05, "loss": 0.0762, "step": 24110 }, { "epoch": 0.5312928655241369, "grad_norm": 0.7454431056976318, "learning_rate": 1.4208706676646232e-05, "loss": 0.1227, "step": 24111 }, { "epoch": 0.5313149008136531, "grad_norm": 0.5364949703216553, "learning_rate": 1.4207637634059864e-05, "loss": 0.0703, "step": 24112 }, { "epoch": 0.5313369361031692, "grad_norm": 0.5050483345985413, "learning_rate": 1.4206568595509413e-05, "loss": 0.0542, "step": 24113 }, { "epoch": 0.5313589713926854, "grad_norm": 0.6767748594284058, "learning_rate": 1.4205499561000343e-05, "loss": 0.0651, "step": 24114 }, { "epoch": 0.5313810066822016, "grad_norm": 0.5692682862281799, "learning_rate": 1.4204430530538088e-05, "loss": 0.0653, "step": 24115 }, { "epoch": 0.5314030419717177, "grad_norm": 0.7221617102622986, "learning_rate": 1.4203361504128096e-05, "loss": 0.0751, "step": 24116 }, { "epoch": 0.5314250772612339, "grad_norm": 0.850663959980011, "learning_rate": 1.4202292481775807e-05, "loss": 0.0651, "step": 24117 }, { "epoch": 0.5314471125507501, "grad_norm": 0.8104090690612793, "learning_rate": 1.4201223463486673e-05, "loss": 0.0823, "step": 24118 }, { "epoch": 0.5314691478402662, "grad_norm": 0.3908251225948334, "learning_rate": 1.4200154449266139e-05, "loss": 0.0849, "step": 24119 }, { "epoch": 0.5314911831297824, "grad_norm": 0.6802241802215576, "learning_rate": 1.4199085439119639e-05, "loss": 0.0791, "step": 24120 }, { "epoch": 0.5315132184192985, "grad_norm": 0.827406108379364, "learning_rate": 1.4198016433052632e-05, "loss": 0.0763, "step": 24121 }, { "epoch": 0.5315352537088147, "grad_norm": 0.6702730059623718, "learning_rate": 1.4196947431070557e-05, "loss": 0.0827, "step": 24122 }, { "epoch": 0.5315572889983309, "grad_norm": 0.48766759037971497, "learning_rate": 1.419587843317886e-05, "loss": 0.0589, "step": 24123 }, { "epoch": 0.5315793242878469, "grad_norm": 0.624220609664917, "learning_rate": 1.4194809439382983e-05, "loss": 0.0767, "step": 24124 }, { "epoch": 0.5316013595773631, "grad_norm": 0.7127884030342102, "learning_rate": 1.4193740449688376e-05, "loss": 0.06, "step": 24125 }, { "epoch": 0.5316233948668793, "grad_norm": 0.7283519506454468, "learning_rate": 1.4192671464100483e-05, "loss": 0.0574, "step": 24126 }, { "epoch": 0.5316454301563954, "grad_norm": 0.6095547676086426, "learning_rate": 1.4191602482624744e-05, "loss": 0.0687, "step": 24127 }, { "epoch": 0.5316674654459116, "grad_norm": 0.5111637115478516, "learning_rate": 1.4190533505266603e-05, "loss": 0.031, "step": 24128 }, { "epoch": 0.5316895007354278, "grad_norm": 0.8859856724739075, "learning_rate": 1.4189464532031515e-05, "loss": 0.0658, "step": 24129 }, { "epoch": 0.5317115360249439, "grad_norm": 1.0697211027145386, "learning_rate": 1.4188395562924916e-05, "loss": 0.0975, "step": 24130 }, { "epoch": 0.5317335713144601, "grad_norm": 0.5196109414100647, "learning_rate": 1.4187326597952257e-05, "loss": 0.0734, "step": 24131 }, { "epoch": 0.5317556066039762, "grad_norm": 0.39559200406074524, "learning_rate": 1.4186257637118973e-05, "loss": 0.1098, "step": 24132 }, { "epoch": 0.5317776418934924, "grad_norm": 0.8924368023872375, "learning_rate": 1.418518868043052e-05, "loss": 0.0745, "step": 24133 }, { "epoch": 0.5317996771830086, "grad_norm": 0.5271127223968506, "learning_rate": 1.4184119727892341e-05, "loss": 0.0478, "step": 24134 }, { "epoch": 0.5318217124725247, "grad_norm": 0.6754576563835144, "learning_rate": 1.4183050779509876e-05, "loss": 0.0905, "step": 24135 }, { "epoch": 0.5318437477620409, "grad_norm": 0.7893736362457275, "learning_rate": 1.4181981835288563e-05, "loss": 0.0892, "step": 24136 }, { "epoch": 0.5318657830515571, "grad_norm": 0.4457751512527466, "learning_rate": 1.4180912895233865e-05, "loss": 0.0668, "step": 24137 }, { "epoch": 0.5318878183410732, "grad_norm": 0.7516569495201111, "learning_rate": 1.4179843959351214e-05, "loss": 0.0646, "step": 24138 }, { "epoch": 0.5319098536305894, "grad_norm": 0.5989553332328796, "learning_rate": 1.4178775027646058e-05, "loss": 0.065, "step": 24139 }, { "epoch": 0.5319318889201056, "grad_norm": 0.7810519933700562, "learning_rate": 1.4177706100123836e-05, "loss": 0.0822, "step": 24140 }, { "epoch": 0.5319539242096217, "grad_norm": 0.5384502410888672, "learning_rate": 1.4176637176790001e-05, "loss": 0.0574, "step": 24141 }, { "epoch": 0.5319759594991379, "grad_norm": 0.6939339637756348, "learning_rate": 1.4175568257649997e-05, "loss": 0.0927, "step": 24142 }, { "epoch": 0.5319979947886541, "grad_norm": 1.057806134223938, "learning_rate": 1.4174499342709263e-05, "loss": 0.1065, "step": 24143 }, { "epoch": 0.5320200300781702, "grad_norm": 0.6432257294654846, "learning_rate": 1.4173430431973248e-05, "loss": 0.0902, "step": 24144 }, { "epoch": 0.5320420653676864, "grad_norm": 0.3910943567752838, "learning_rate": 1.4172361525447397e-05, "loss": 0.0565, "step": 24145 }, { "epoch": 0.5320641006572026, "grad_norm": 0.28700169920921326, "learning_rate": 1.4171292623137151e-05, "loss": 0.0758, "step": 24146 }, { "epoch": 0.5320861359467187, "grad_norm": 0.7711946964263916, "learning_rate": 1.4170223725047951e-05, "loss": 0.09, "step": 24147 }, { "epoch": 0.5321081712362349, "grad_norm": 0.8624743223190308, "learning_rate": 1.4169154831185251e-05, "loss": 0.0612, "step": 24148 }, { "epoch": 0.5321302065257509, "grad_norm": 0.7450226545333862, "learning_rate": 1.4168085941554494e-05, "loss": 0.0808, "step": 24149 }, { "epoch": 0.5321522418152671, "grad_norm": 0.515261173248291, "learning_rate": 1.4167017056161118e-05, "loss": 0.0718, "step": 24150 }, { "epoch": 0.5321742771047833, "grad_norm": 0.7379947304725647, "learning_rate": 1.4165948175010569e-05, "loss": 0.0889, "step": 24151 }, { "epoch": 0.5321963123942994, "grad_norm": 0.5461111664772034, "learning_rate": 1.4164879298108297e-05, "loss": 0.0452, "step": 24152 }, { "epoch": 0.5322183476838156, "grad_norm": 0.567291796207428, "learning_rate": 1.4163810425459744e-05, "loss": 0.062, "step": 24153 }, { "epoch": 0.5322403829733318, "grad_norm": 0.7132524847984314, "learning_rate": 1.416274155707035e-05, "loss": 0.1165, "step": 24154 }, { "epoch": 0.5322624182628479, "grad_norm": 0.4706585109233856, "learning_rate": 1.4161672692945557e-05, "loss": 0.0606, "step": 24155 }, { "epoch": 0.5322844535523641, "grad_norm": 0.8262262940406799, "learning_rate": 1.416060383309082e-05, "loss": 0.0822, "step": 24156 }, { "epoch": 0.5323064888418803, "grad_norm": 0.6965577602386475, "learning_rate": 1.4159534977511578e-05, "loss": 0.1034, "step": 24157 }, { "epoch": 0.5323285241313964, "grad_norm": 0.609256386756897, "learning_rate": 1.4158466126213278e-05, "loss": 0.0574, "step": 24158 }, { "epoch": 0.5323505594209126, "grad_norm": 0.9963542819023132, "learning_rate": 1.4157397279201354e-05, "loss": 0.0974, "step": 24159 }, { "epoch": 0.5323725947104287, "grad_norm": 0.5856556296348572, "learning_rate": 1.4156328436481263e-05, "loss": 0.0532, "step": 24160 }, { "epoch": 0.5323946299999449, "grad_norm": 0.5029864311218262, "learning_rate": 1.4155259598058444e-05, "loss": 0.0975, "step": 24161 }, { "epoch": 0.5324166652894611, "grad_norm": 0.2586846947669983, "learning_rate": 1.415419076393834e-05, "loss": 0.0569, "step": 24162 }, { "epoch": 0.5324387005789772, "grad_norm": 0.6444533467292786, "learning_rate": 1.415312193412639e-05, "loss": 0.059, "step": 24163 }, { "epoch": 0.5324607358684934, "grad_norm": 0.5330219268798828, "learning_rate": 1.415205310862805e-05, "loss": 0.0748, "step": 24164 }, { "epoch": 0.5324827711580096, "grad_norm": 0.5852739810943604, "learning_rate": 1.4150984287448757e-05, "loss": 0.0969, "step": 24165 }, { "epoch": 0.5325048064475257, "grad_norm": 0.47446656227111816, "learning_rate": 1.4149915470593954e-05, "loss": 0.0614, "step": 24166 }, { "epoch": 0.5325268417370419, "grad_norm": 0.8194131851196289, "learning_rate": 1.414884665806909e-05, "loss": 0.0889, "step": 24167 }, { "epoch": 0.5325488770265581, "grad_norm": 0.3937348425388336, "learning_rate": 1.4147777849879609e-05, "loss": 0.0461, "step": 24168 }, { "epoch": 0.5325709123160742, "grad_norm": 0.789923369884491, "learning_rate": 1.4146709046030951e-05, "loss": 0.0654, "step": 24169 }, { "epoch": 0.5325929476055904, "grad_norm": 1.2154479026794434, "learning_rate": 1.4145640246528555e-05, "loss": 0.0952, "step": 24170 }, { "epoch": 0.5326149828951066, "grad_norm": 0.5821582674980164, "learning_rate": 1.4144571451377877e-05, "loss": 0.0616, "step": 24171 }, { "epoch": 0.5326370181846227, "grad_norm": 0.33184006810188293, "learning_rate": 1.4143502660584356e-05, "loss": 0.0549, "step": 24172 }, { "epoch": 0.5326590534741389, "grad_norm": 0.4313371777534485, "learning_rate": 1.4142433874153435e-05, "loss": 0.0586, "step": 24173 }, { "epoch": 0.5326810887636549, "grad_norm": 0.545741617679596, "learning_rate": 1.4141365092090556e-05, "loss": 0.0632, "step": 24174 }, { "epoch": 0.5327031240531711, "grad_norm": 0.35142311453819275, "learning_rate": 1.4140296314401167e-05, "loss": 0.0626, "step": 24175 }, { "epoch": 0.5327251593426873, "grad_norm": 0.6410566568374634, "learning_rate": 1.413922754109071e-05, "loss": 0.0818, "step": 24176 }, { "epoch": 0.5327471946322034, "grad_norm": 0.8745995163917542, "learning_rate": 1.413815877216463e-05, "loss": 0.0829, "step": 24177 }, { "epoch": 0.5327692299217196, "grad_norm": 0.6175238490104675, "learning_rate": 1.4137090007628362e-05, "loss": 0.0599, "step": 24178 }, { "epoch": 0.5327912652112358, "grad_norm": 0.5980939865112305, "learning_rate": 1.4136021247487363e-05, "loss": 0.0739, "step": 24179 }, { "epoch": 0.5328133005007519, "grad_norm": 0.5610913038253784, "learning_rate": 1.4134952491747071e-05, "loss": 0.0555, "step": 24180 }, { "epoch": 0.5328353357902681, "grad_norm": 0.5082880854606628, "learning_rate": 1.4133883740412931e-05, "loss": 0.0779, "step": 24181 }, { "epoch": 0.5328573710797843, "grad_norm": 0.41382354497909546, "learning_rate": 1.4132814993490381e-05, "loss": 0.0577, "step": 24182 }, { "epoch": 0.5328794063693004, "grad_norm": 0.2775253355503082, "learning_rate": 1.4131746250984874e-05, "loss": 0.0787, "step": 24183 }, { "epoch": 0.5329014416588166, "grad_norm": 0.49457886815071106, "learning_rate": 1.4130677512901849e-05, "loss": 0.0611, "step": 24184 }, { "epoch": 0.5329234769483328, "grad_norm": 0.8118119239807129, "learning_rate": 1.412960877924675e-05, "loss": 0.076, "step": 24185 }, { "epoch": 0.5329455122378489, "grad_norm": 0.7610254287719727, "learning_rate": 1.4128540050025013e-05, "loss": 0.0853, "step": 24186 }, { "epoch": 0.5329675475273651, "grad_norm": 0.44569119811058044, "learning_rate": 1.4127471325242094e-05, "loss": 0.0728, "step": 24187 }, { "epoch": 0.5329895828168812, "grad_norm": 0.9051384925842285, "learning_rate": 1.4126402604903432e-05, "loss": 0.1364, "step": 24188 }, { "epoch": 0.5330116181063974, "grad_norm": 0.3382290005683899, "learning_rate": 1.4125333889014469e-05, "loss": 0.0464, "step": 24189 }, { "epoch": 0.5330336533959136, "grad_norm": 0.669165849685669, "learning_rate": 1.4124265177580651e-05, "loss": 0.0912, "step": 24190 }, { "epoch": 0.5330556886854297, "grad_norm": 0.8420785069465637, "learning_rate": 1.4123196470607419e-05, "loss": 0.0627, "step": 24191 }, { "epoch": 0.5330777239749459, "grad_norm": 0.21068336069583893, "learning_rate": 1.4122127768100219e-05, "loss": 0.0771, "step": 24192 }, { "epoch": 0.5330997592644621, "grad_norm": 0.6314049959182739, "learning_rate": 1.4121059070064486e-05, "loss": 0.0386, "step": 24193 }, { "epoch": 0.5331217945539782, "grad_norm": 0.6721465587615967, "learning_rate": 1.4119990376505677e-05, "loss": 0.0736, "step": 24194 }, { "epoch": 0.5331438298434944, "grad_norm": 0.6735050082206726, "learning_rate": 1.411892168742923e-05, "loss": 0.0675, "step": 24195 }, { "epoch": 0.5331658651330106, "grad_norm": 0.5052980780601501, "learning_rate": 1.4117853002840587e-05, "loss": 0.0814, "step": 24196 }, { "epoch": 0.5331879004225267, "grad_norm": 0.5982456207275391, "learning_rate": 1.4116784322745187e-05, "loss": 0.0614, "step": 24197 }, { "epoch": 0.5332099357120428, "grad_norm": 0.5175297260284424, "learning_rate": 1.4115715647148481e-05, "loss": 0.0733, "step": 24198 }, { "epoch": 0.533231971001559, "grad_norm": 0.7414355278015137, "learning_rate": 1.4114646976055912e-05, "loss": 0.0674, "step": 24199 }, { "epoch": 0.5332540062910751, "grad_norm": 0.8070424199104309, "learning_rate": 1.411357830947292e-05, "loss": 0.0487, "step": 24200 }, { "epoch": 0.5332760415805913, "grad_norm": 0.6065378785133362, "learning_rate": 1.411250964740494e-05, "loss": 0.0847, "step": 24201 }, { "epoch": 0.5332980768701074, "grad_norm": 0.6372898817062378, "learning_rate": 1.4111440989857433e-05, "loss": 0.0768, "step": 24202 }, { "epoch": 0.5333201121596236, "grad_norm": 0.6083773374557495, "learning_rate": 1.4110372336835833e-05, "loss": 0.0474, "step": 24203 }, { "epoch": 0.5333421474491398, "grad_norm": 0.6160567998886108, "learning_rate": 1.4109303688345584e-05, "loss": 0.0732, "step": 24204 }, { "epoch": 0.5333641827386559, "grad_norm": 0.6156817078590393, "learning_rate": 1.4108235044392125e-05, "loss": 0.0631, "step": 24205 }, { "epoch": 0.5333862180281721, "grad_norm": 0.8763466477394104, "learning_rate": 1.4107166404980907e-05, "loss": 0.0862, "step": 24206 }, { "epoch": 0.5334082533176883, "grad_norm": 0.4944503903388977, "learning_rate": 1.4106097770117369e-05, "loss": 0.0864, "step": 24207 }, { "epoch": 0.5334302886072044, "grad_norm": 0.6896975040435791, "learning_rate": 1.4105029139806954e-05, "loss": 0.0812, "step": 24208 }, { "epoch": 0.5334523238967206, "grad_norm": 0.6584462523460388, "learning_rate": 1.41039605140551e-05, "loss": 0.0569, "step": 24209 }, { "epoch": 0.5334743591862368, "grad_norm": 0.4563281834125519, "learning_rate": 1.4102891892867263e-05, "loss": 0.0718, "step": 24210 }, { "epoch": 0.5334963944757529, "grad_norm": 0.48211371898651123, "learning_rate": 1.4101823276248876e-05, "loss": 0.0805, "step": 24211 }, { "epoch": 0.5335184297652691, "grad_norm": 0.8301001191139221, "learning_rate": 1.4100754664205383e-05, "loss": 0.1089, "step": 24212 }, { "epoch": 0.5335404650547853, "grad_norm": 0.5380733609199524, "learning_rate": 1.4099686056742232e-05, "loss": 0.0919, "step": 24213 }, { "epoch": 0.5335625003443014, "grad_norm": 0.7598515748977661, "learning_rate": 1.4098617453864864e-05, "loss": 0.067, "step": 24214 }, { "epoch": 0.5335845356338176, "grad_norm": 0.7697640657424927, "learning_rate": 1.4097548855578717e-05, "loss": 0.0935, "step": 24215 }, { "epoch": 0.5336065709233337, "grad_norm": 0.7144738435745239, "learning_rate": 1.4096480261889235e-05, "loss": 0.0663, "step": 24216 }, { "epoch": 0.5336286062128499, "grad_norm": 0.40045449137687683, "learning_rate": 1.4095411672801867e-05, "loss": 0.0692, "step": 24217 }, { "epoch": 0.5336506415023661, "grad_norm": 0.8659520745277405, "learning_rate": 1.4094343088322054e-05, "loss": 0.0962, "step": 24218 }, { "epoch": 0.5336726767918822, "grad_norm": 0.6299130916595459, "learning_rate": 1.4093274508455237e-05, "loss": 0.0662, "step": 24219 }, { "epoch": 0.5336947120813984, "grad_norm": 0.7071003913879395, "learning_rate": 1.4092205933206856e-05, "loss": 0.0612, "step": 24220 }, { "epoch": 0.5337167473709146, "grad_norm": 0.491698682308197, "learning_rate": 1.4091137362582361e-05, "loss": 0.0883, "step": 24221 }, { "epoch": 0.5337387826604307, "grad_norm": 0.6423003673553467, "learning_rate": 1.409006879658719e-05, "loss": 0.0625, "step": 24222 }, { "epoch": 0.5337608179499468, "grad_norm": 0.8338626027107239, "learning_rate": 1.4089000235226786e-05, "loss": 0.078, "step": 24223 }, { "epoch": 0.533782853239463, "grad_norm": 0.44281309843063354, "learning_rate": 1.4087931678506587e-05, "loss": 0.0845, "step": 24224 }, { "epoch": 0.5338048885289791, "grad_norm": 0.7740726470947266, "learning_rate": 1.4086863126432047e-05, "loss": 0.0984, "step": 24225 }, { "epoch": 0.5338269238184953, "grad_norm": 0.5282160639762878, "learning_rate": 1.40857945790086e-05, "loss": 0.0578, "step": 24226 }, { "epoch": 0.5338489591080114, "grad_norm": 0.5750927329063416, "learning_rate": 1.4084726036241696e-05, "loss": 0.0556, "step": 24227 }, { "epoch": 0.5338709943975276, "grad_norm": 0.713193416595459, "learning_rate": 1.4083657498136767e-05, "loss": 0.0701, "step": 24228 }, { "epoch": 0.5338930296870438, "grad_norm": 0.4902523159980774, "learning_rate": 1.4082588964699266e-05, "loss": 0.0402, "step": 24229 }, { "epoch": 0.5339150649765599, "grad_norm": 0.7113789916038513, "learning_rate": 1.4081520435934631e-05, "loss": 0.0745, "step": 24230 }, { "epoch": 0.5339371002660761, "grad_norm": 0.4578756093978882, "learning_rate": 1.4080451911848299e-05, "loss": 0.0569, "step": 24231 }, { "epoch": 0.5339591355555923, "grad_norm": 0.7910763025283813, "learning_rate": 1.4079383392445724e-05, "loss": 0.0702, "step": 24232 }, { "epoch": 0.5339811708451084, "grad_norm": 0.6695659756660461, "learning_rate": 1.4078314877732346e-05, "loss": 0.0939, "step": 24233 }, { "epoch": 0.5340032061346246, "grad_norm": 0.7199349999427795, "learning_rate": 1.4077246367713601e-05, "loss": 0.0672, "step": 24234 }, { "epoch": 0.5340252414241408, "grad_norm": 0.669055163860321, "learning_rate": 1.4076177862394933e-05, "loss": 0.0783, "step": 24235 }, { "epoch": 0.5340472767136569, "grad_norm": 0.4661238491535187, "learning_rate": 1.4075109361781792e-05, "loss": 0.0705, "step": 24236 }, { "epoch": 0.5340693120031731, "grad_norm": 0.34962934255599976, "learning_rate": 1.407404086587961e-05, "loss": 0.0736, "step": 24237 }, { "epoch": 0.5340913472926893, "grad_norm": 0.4696561098098755, "learning_rate": 1.4072972374693837e-05, "loss": 0.0988, "step": 24238 }, { "epoch": 0.5341133825822054, "grad_norm": 0.6976951956748962, "learning_rate": 1.4071903888229909e-05, "loss": 0.0822, "step": 24239 }, { "epoch": 0.5341354178717216, "grad_norm": 0.6183152794837952, "learning_rate": 1.4070835406493275e-05, "loss": 0.0749, "step": 24240 }, { "epoch": 0.5341574531612377, "grad_norm": 0.5963233709335327, "learning_rate": 1.4069766929489376e-05, "loss": 0.0703, "step": 24241 }, { "epoch": 0.5341794884507539, "grad_norm": 0.35896384716033936, "learning_rate": 1.4068698457223653e-05, "loss": 0.0521, "step": 24242 }, { "epoch": 0.5342015237402701, "grad_norm": 0.8082230687141418, "learning_rate": 1.406762998970154e-05, "loss": 0.0661, "step": 24243 }, { "epoch": 0.5342235590297862, "grad_norm": 0.6787409782409668, "learning_rate": 1.4066561526928497e-05, "loss": 0.0887, "step": 24244 }, { "epoch": 0.5342455943193024, "grad_norm": 1.0624663829803467, "learning_rate": 1.4065493068909957e-05, "loss": 0.0853, "step": 24245 }, { "epoch": 0.5342676296088186, "grad_norm": 0.7647998929023743, "learning_rate": 1.4064424615651362e-05, "loss": 0.0808, "step": 24246 }, { "epoch": 0.5342896648983347, "grad_norm": 0.5405821204185486, "learning_rate": 1.4063356167158145e-05, "loss": 0.0618, "step": 24247 }, { "epoch": 0.5343117001878508, "grad_norm": 0.602792501449585, "learning_rate": 1.4062287723435763e-05, "loss": 0.0841, "step": 24248 }, { "epoch": 0.534333735477367, "grad_norm": 0.6571505069732666, "learning_rate": 1.4061219284489655e-05, "loss": 0.072, "step": 24249 }, { "epoch": 0.5343557707668831, "grad_norm": 0.8076237440109253, "learning_rate": 1.406015085032526e-05, "loss": 0.081, "step": 24250 }, { "epoch": 0.5343778060563993, "grad_norm": 0.6273037791252136, "learning_rate": 1.4059082420948014e-05, "loss": 0.066, "step": 24251 }, { "epoch": 0.5343998413459154, "grad_norm": 0.7845762968063354, "learning_rate": 1.405801399636337e-05, "loss": 0.0771, "step": 24252 }, { "epoch": 0.5344218766354316, "grad_norm": 0.2281210720539093, "learning_rate": 1.405694557657677e-05, "loss": 0.0555, "step": 24253 }, { "epoch": 0.5344439119249478, "grad_norm": 0.7178633809089661, "learning_rate": 1.4055877161593646e-05, "loss": 0.0944, "step": 24254 }, { "epoch": 0.5344659472144639, "grad_norm": 0.5521398782730103, "learning_rate": 1.405480875141945e-05, "loss": 0.0708, "step": 24255 }, { "epoch": 0.5344879825039801, "grad_norm": 0.3956106901168823, "learning_rate": 1.4053740346059619e-05, "loss": 0.0648, "step": 24256 }, { "epoch": 0.5345100177934963, "grad_norm": 0.4027083218097687, "learning_rate": 1.4052671945519595e-05, "loss": 0.0659, "step": 24257 }, { "epoch": 0.5345320530830124, "grad_norm": 0.45857375860214233, "learning_rate": 1.4051603549804816e-05, "loss": 0.059, "step": 24258 }, { "epoch": 0.5345540883725286, "grad_norm": 0.7969568371772766, "learning_rate": 1.4050535158920734e-05, "loss": 0.0767, "step": 24259 }, { "epoch": 0.5345761236620448, "grad_norm": 0.599035918712616, "learning_rate": 1.4049466772872787e-05, "loss": 0.0486, "step": 24260 }, { "epoch": 0.5345981589515609, "grad_norm": 0.6897026300430298, "learning_rate": 1.4048398391666416e-05, "loss": 0.0857, "step": 24261 }, { "epoch": 0.5346201942410771, "grad_norm": 0.9535536170005798, "learning_rate": 1.4047330015307057e-05, "loss": 0.0735, "step": 24262 }, { "epoch": 0.5346422295305933, "grad_norm": 0.46922922134399414, "learning_rate": 1.4046261643800161e-05, "loss": 0.0974, "step": 24263 }, { "epoch": 0.5346642648201094, "grad_norm": 0.5372456312179565, "learning_rate": 1.4045193277151166e-05, "loss": 0.0607, "step": 24264 }, { "epoch": 0.5346863001096256, "grad_norm": 0.42498695850372314, "learning_rate": 1.4044124915365513e-05, "loss": 0.0566, "step": 24265 }, { "epoch": 0.5347083353991418, "grad_norm": 0.8379828333854675, "learning_rate": 1.404305655844864e-05, "loss": 0.0834, "step": 24266 }, { "epoch": 0.5347303706886579, "grad_norm": 0.7990678548812866, "learning_rate": 1.4041988206405997e-05, "loss": 0.0978, "step": 24267 }, { "epoch": 0.5347524059781741, "grad_norm": 0.3954184949398041, "learning_rate": 1.4040919859243024e-05, "loss": 0.0714, "step": 24268 }, { "epoch": 0.5347744412676902, "grad_norm": 0.6465191841125488, "learning_rate": 1.4039851516965158e-05, "loss": 0.0651, "step": 24269 }, { "epoch": 0.5347964765572064, "grad_norm": 0.6457647085189819, "learning_rate": 1.4038783179577843e-05, "loss": 0.087, "step": 24270 }, { "epoch": 0.5348185118467226, "grad_norm": 0.5946184396743774, "learning_rate": 1.4037714847086521e-05, "loss": 0.0645, "step": 24271 }, { "epoch": 0.5348405471362386, "grad_norm": 0.30615320801734924, "learning_rate": 1.4036646519496636e-05, "loss": 0.0821, "step": 24272 }, { "epoch": 0.5348625824257548, "grad_norm": 0.8070293068885803, "learning_rate": 1.4035578196813624e-05, "loss": 0.0915, "step": 24273 }, { "epoch": 0.534884617715271, "grad_norm": 0.5613046288490295, "learning_rate": 1.4034509879042926e-05, "loss": 0.0664, "step": 24274 }, { "epoch": 0.5349066530047871, "grad_norm": 0.32950732111930847, "learning_rate": 1.403344156618999e-05, "loss": 0.036, "step": 24275 }, { "epoch": 0.5349286882943033, "grad_norm": 0.7109591960906982, "learning_rate": 1.4032373258260257e-05, "loss": 0.1509, "step": 24276 }, { "epoch": 0.5349507235838195, "grad_norm": 0.8867334127426147, "learning_rate": 1.4031304955259162e-05, "loss": 0.0705, "step": 24277 }, { "epoch": 0.5349727588733356, "grad_norm": 0.5527431964874268, "learning_rate": 1.4030236657192154e-05, "loss": 0.0812, "step": 24278 }, { "epoch": 0.5349947941628518, "grad_norm": 0.5918125510215759, "learning_rate": 1.402916836406467e-05, "loss": 0.0861, "step": 24279 }, { "epoch": 0.535016829452368, "grad_norm": 0.5461627840995789, "learning_rate": 1.4028100075882152e-05, "loss": 0.0738, "step": 24280 }, { "epoch": 0.5350388647418841, "grad_norm": 0.6427819132804871, "learning_rate": 1.4027031792650033e-05, "loss": 0.064, "step": 24281 }, { "epoch": 0.5350609000314003, "grad_norm": 0.5819337368011475, "learning_rate": 1.4025963514373772e-05, "loss": 0.1004, "step": 24282 }, { "epoch": 0.5350829353209164, "grad_norm": 0.922315776348114, "learning_rate": 1.40248952410588e-05, "loss": 0.1155, "step": 24283 }, { "epoch": 0.5351049706104326, "grad_norm": 0.8461258411407471, "learning_rate": 1.4023826972710562e-05, "loss": 0.1213, "step": 24284 }, { "epoch": 0.5351270058999488, "grad_norm": 0.5443723797798157, "learning_rate": 1.4022758709334491e-05, "loss": 0.0688, "step": 24285 }, { "epoch": 0.5351490411894649, "grad_norm": 0.4944875240325928, "learning_rate": 1.4021690450936039e-05, "loss": 0.0572, "step": 24286 }, { "epoch": 0.5351710764789811, "grad_norm": 0.3897840976715088, "learning_rate": 1.402062219752064e-05, "loss": 0.0789, "step": 24287 }, { "epoch": 0.5351931117684973, "grad_norm": 0.48885753750801086, "learning_rate": 1.4019553949093738e-05, "loss": 0.0845, "step": 24288 }, { "epoch": 0.5352151470580134, "grad_norm": 0.8310675024986267, "learning_rate": 1.4018485705660769e-05, "loss": 0.0893, "step": 24289 }, { "epoch": 0.5352371823475296, "grad_norm": 0.6059616208076477, "learning_rate": 1.401741746722718e-05, "loss": 0.0713, "step": 24290 }, { "epoch": 0.5352592176370458, "grad_norm": 0.6344659328460693, "learning_rate": 1.4016349233798416e-05, "loss": 0.0705, "step": 24291 }, { "epoch": 0.5352812529265619, "grad_norm": 0.44557568430900574, "learning_rate": 1.401528100537991e-05, "loss": 0.0688, "step": 24292 }, { "epoch": 0.5353032882160781, "grad_norm": 0.7330262064933777, "learning_rate": 1.4014212781977103e-05, "loss": 0.1022, "step": 24293 }, { "epoch": 0.5353253235055943, "grad_norm": 0.5674310326576233, "learning_rate": 1.4013144563595444e-05, "loss": 0.0475, "step": 24294 }, { "epoch": 0.5353473587951104, "grad_norm": 0.733736515045166, "learning_rate": 1.4012076350240367e-05, "loss": 0.1014, "step": 24295 }, { "epoch": 0.5353693940846266, "grad_norm": 0.831522524356842, "learning_rate": 1.4011008141917316e-05, "loss": 0.0771, "step": 24296 }, { "epoch": 0.5353914293741426, "grad_norm": 0.43219560384750366, "learning_rate": 1.4009939938631723e-05, "loss": 0.063, "step": 24297 }, { "epoch": 0.5354134646636588, "grad_norm": 0.4287152886390686, "learning_rate": 1.4008871740389044e-05, "loss": 0.0476, "step": 24298 }, { "epoch": 0.535435499953175, "grad_norm": 0.7007820010185242, "learning_rate": 1.4007803547194715e-05, "loss": 0.0911, "step": 24299 }, { "epoch": 0.5354575352426911, "grad_norm": 0.5006918907165527, "learning_rate": 1.400673535905417e-05, "loss": 0.059, "step": 24300 }, { "epoch": 0.5354795705322073, "grad_norm": 0.7919371128082275, "learning_rate": 1.4005667175972855e-05, "loss": 0.0776, "step": 24301 }, { "epoch": 0.5355016058217235, "grad_norm": 0.7953392267227173, "learning_rate": 1.4004598997956213e-05, "loss": 0.076, "step": 24302 }, { "epoch": 0.5355236411112396, "grad_norm": 1.3702598810195923, "learning_rate": 1.4003530825009682e-05, "loss": 0.0539, "step": 24303 }, { "epoch": 0.5355456764007558, "grad_norm": 0.40834784507751465, "learning_rate": 1.4002462657138694e-05, "loss": 0.0478, "step": 24304 }, { "epoch": 0.535567711690272, "grad_norm": 0.8183213472366333, "learning_rate": 1.4001394494348707e-05, "loss": 0.0601, "step": 24305 }, { "epoch": 0.5355897469797881, "grad_norm": 0.6344149112701416, "learning_rate": 1.4000326336645154e-05, "loss": 0.0745, "step": 24306 }, { "epoch": 0.5356117822693043, "grad_norm": 0.6523129940032959, "learning_rate": 1.3999258184033473e-05, "loss": 0.0667, "step": 24307 }, { "epoch": 0.5356338175588204, "grad_norm": 0.6443838477134705, "learning_rate": 1.3998190036519106e-05, "loss": 0.0645, "step": 24308 }, { "epoch": 0.5356558528483366, "grad_norm": 0.6774309277534485, "learning_rate": 1.3997121894107495e-05, "loss": 0.0745, "step": 24309 }, { "epoch": 0.5356778881378528, "grad_norm": 0.8503209948539734, "learning_rate": 1.3996053756804082e-05, "loss": 0.0832, "step": 24310 }, { "epoch": 0.5356999234273689, "grad_norm": 0.8280054330825806, "learning_rate": 1.3994985624614304e-05, "loss": 0.1187, "step": 24311 }, { "epoch": 0.5357219587168851, "grad_norm": 0.8422187566757202, "learning_rate": 1.3993917497543597e-05, "loss": 0.0846, "step": 24312 }, { "epoch": 0.5357439940064013, "grad_norm": 0.4819692075252533, "learning_rate": 1.3992849375597416e-05, "loss": 0.0686, "step": 24313 }, { "epoch": 0.5357660292959174, "grad_norm": 0.4855031371116638, "learning_rate": 1.3991781258781191e-05, "loss": 0.0465, "step": 24314 }, { "epoch": 0.5357880645854336, "grad_norm": 1.12306547164917, "learning_rate": 1.3990713147100367e-05, "loss": 0.1066, "step": 24315 }, { "epoch": 0.5358100998749498, "grad_norm": 0.8579094409942627, "learning_rate": 1.3989645040560376e-05, "loss": 0.0999, "step": 24316 }, { "epoch": 0.5358321351644659, "grad_norm": 0.7853105068206787, "learning_rate": 1.398857693916667e-05, "loss": 0.0876, "step": 24317 }, { "epoch": 0.5358541704539821, "grad_norm": 0.6532067060470581, "learning_rate": 1.3987508842924683e-05, "loss": 0.0625, "step": 24318 }, { "epoch": 0.5358762057434983, "grad_norm": 0.6155188679695129, "learning_rate": 1.3986440751839857e-05, "loss": 0.0797, "step": 24319 }, { "epoch": 0.5358982410330144, "grad_norm": 0.7344777584075928, "learning_rate": 1.3985372665917623e-05, "loss": 0.0577, "step": 24320 }, { "epoch": 0.5359202763225306, "grad_norm": 0.7118974328041077, "learning_rate": 1.398430458516344e-05, "loss": 0.0851, "step": 24321 }, { "epoch": 0.5359423116120466, "grad_norm": 0.8793216943740845, "learning_rate": 1.3983236509582738e-05, "loss": 0.0712, "step": 24322 }, { "epoch": 0.5359643469015628, "grad_norm": 0.7623695135116577, "learning_rate": 1.3982168439180953e-05, "loss": 0.1009, "step": 24323 }, { "epoch": 0.535986382191079, "grad_norm": 0.44258877635002136, "learning_rate": 1.3981100373963534e-05, "loss": 0.0586, "step": 24324 }, { "epoch": 0.5360084174805951, "grad_norm": 0.5799273252487183, "learning_rate": 1.3980032313935917e-05, "loss": 0.0712, "step": 24325 }, { "epoch": 0.5360304527701113, "grad_norm": 0.6524266004562378, "learning_rate": 1.3978964259103543e-05, "loss": 0.0758, "step": 24326 }, { "epoch": 0.5360524880596275, "grad_norm": 0.5566747784614563, "learning_rate": 1.3977896209471842e-05, "loss": 0.0661, "step": 24327 }, { "epoch": 0.5360745233491436, "grad_norm": 0.9252143502235413, "learning_rate": 1.3976828165046274e-05, "loss": 0.043, "step": 24328 }, { "epoch": 0.5360965586386598, "grad_norm": 0.6493873596191406, "learning_rate": 1.3975760125832265e-05, "loss": 0.0825, "step": 24329 }, { "epoch": 0.536118593928176, "grad_norm": 0.551688015460968, "learning_rate": 1.3974692091835261e-05, "loss": 0.0633, "step": 24330 }, { "epoch": 0.5361406292176921, "grad_norm": 0.4534767270088196, "learning_rate": 1.3973624063060698e-05, "loss": 0.0737, "step": 24331 }, { "epoch": 0.5361626645072083, "grad_norm": 0.9740526080131531, "learning_rate": 1.3972556039514021e-05, "loss": 0.0725, "step": 24332 }, { "epoch": 0.5361846997967245, "grad_norm": 0.6548305749893188, "learning_rate": 1.3971488021200666e-05, "loss": 0.1094, "step": 24333 }, { "epoch": 0.5362067350862406, "grad_norm": 0.7393476963043213, "learning_rate": 1.3970420008126074e-05, "loss": 0.0577, "step": 24334 }, { "epoch": 0.5362287703757568, "grad_norm": 0.8085168600082397, "learning_rate": 1.3969352000295678e-05, "loss": 0.0567, "step": 24335 }, { "epoch": 0.536250805665273, "grad_norm": 0.5489203333854675, "learning_rate": 1.3968283997714933e-05, "loss": 0.0552, "step": 24336 }, { "epoch": 0.5362728409547891, "grad_norm": 0.6300405859947205, "learning_rate": 1.396721600038927e-05, "loss": 0.0824, "step": 24337 }, { "epoch": 0.5362948762443053, "grad_norm": 0.521785318851471, "learning_rate": 1.396614800832413e-05, "loss": 0.0683, "step": 24338 }, { "epoch": 0.5363169115338214, "grad_norm": 0.9939685463905334, "learning_rate": 1.3965080021524948e-05, "loss": 0.1089, "step": 24339 }, { "epoch": 0.5363389468233376, "grad_norm": 0.5292518734931946, "learning_rate": 1.3964012039997174e-05, "loss": 0.0661, "step": 24340 }, { "epoch": 0.5363609821128538, "grad_norm": 0.6520742177963257, "learning_rate": 1.3962944063746241e-05, "loss": 0.0746, "step": 24341 }, { "epoch": 0.5363830174023699, "grad_norm": 0.9914208054542542, "learning_rate": 1.396187609277759e-05, "loss": 0.0902, "step": 24342 }, { "epoch": 0.5364050526918861, "grad_norm": 0.6021437644958496, "learning_rate": 1.3960808127096654e-05, "loss": 0.0685, "step": 24343 }, { "epoch": 0.5364270879814023, "grad_norm": 0.5805134177207947, "learning_rate": 1.3959740166708887e-05, "loss": 0.0414, "step": 24344 }, { "epoch": 0.5364491232709184, "grad_norm": 0.532608151435852, "learning_rate": 1.3958672211619719e-05, "loss": 0.0603, "step": 24345 }, { "epoch": 0.5364711585604346, "grad_norm": 0.6251916885375977, "learning_rate": 1.3957604261834588e-05, "loss": 0.0541, "step": 24346 }, { "epoch": 0.5364931938499506, "grad_norm": 1.0787837505340576, "learning_rate": 1.3956536317358945e-05, "loss": 0.1141, "step": 24347 }, { "epoch": 0.5365152291394668, "grad_norm": 0.6048372387886047, "learning_rate": 1.395546837819822e-05, "loss": 0.0489, "step": 24348 }, { "epoch": 0.536537264428983, "grad_norm": 0.3433799147605896, "learning_rate": 1.3954400444357856e-05, "loss": 0.0488, "step": 24349 }, { "epoch": 0.5365592997184991, "grad_norm": 1.0278881788253784, "learning_rate": 1.3953332515843282e-05, "loss": 0.1105, "step": 24350 }, { "epoch": 0.5365813350080153, "grad_norm": 0.5503925681114197, "learning_rate": 1.3952264592659954e-05, "loss": 0.0532, "step": 24351 }, { "epoch": 0.5366033702975315, "grad_norm": 0.5154939293861389, "learning_rate": 1.3951196674813304e-05, "loss": 0.0751, "step": 24352 }, { "epoch": 0.5366254055870476, "grad_norm": 0.6157435178756714, "learning_rate": 1.3950128762308774e-05, "loss": 0.0565, "step": 24353 }, { "epoch": 0.5366474408765638, "grad_norm": 0.65500408411026, "learning_rate": 1.3949060855151793e-05, "loss": 0.0794, "step": 24354 }, { "epoch": 0.53666947616608, "grad_norm": 1.2284117937088013, "learning_rate": 1.3947992953347814e-05, "loss": 0.0997, "step": 24355 }, { "epoch": 0.5366915114555961, "grad_norm": 0.6789564490318298, "learning_rate": 1.3946925056902274e-05, "loss": 0.0573, "step": 24356 }, { "epoch": 0.5367135467451123, "grad_norm": 0.9617324471473694, "learning_rate": 1.3945857165820612e-05, "loss": 0.1206, "step": 24357 }, { "epoch": 0.5367355820346285, "grad_norm": 0.6621602773666382, "learning_rate": 1.3944789280108253e-05, "loss": 0.0665, "step": 24358 }, { "epoch": 0.5367576173241446, "grad_norm": 0.500234842300415, "learning_rate": 1.3943721399770655e-05, "loss": 0.0595, "step": 24359 }, { "epoch": 0.5367796526136608, "grad_norm": 0.705668032169342, "learning_rate": 1.3942653524813251e-05, "loss": 0.0895, "step": 24360 }, { "epoch": 0.536801687903177, "grad_norm": 0.7328488826751709, "learning_rate": 1.394158565524148e-05, "loss": 0.0873, "step": 24361 }, { "epoch": 0.5368237231926931, "grad_norm": 0.7066688537597656, "learning_rate": 1.3940517791060774e-05, "loss": 0.0701, "step": 24362 }, { "epoch": 0.5368457584822093, "grad_norm": 0.7081019282341003, "learning_rate": 1.3939449932276583e-05, "loss": 0.0942, "step": 24363 }, { "epoch": 0.5368677937717254, "grad_norm": 0.3261268436908722, "learning_rate": 1.3938382078894345e-05, "loss": 0.059, "step": 24364 }, { "epoch": 0.5368898290612416, "grad_norm": 0.9375355243682861, "learning_rate": 1.3937314230919492e-05, "loss": 0.0792, "step": 24365 }, { "epoch": 0.5369118643507578, "grad_norm": 0.7354575395584106, "learning_rate": 1.3936246388357472e-05, "loss": 0.0891, "step": 24366 }, { "epoch": 0.5369338996402739, "grad_norm": 0.587752640247345, "learning_rate": 1.3935178551213719e-05, "loss": 0.0589, "step": 24367 }, { "epoch": 0.5369559349297901, "grad_norm": 0.8135894536972046, "learning_rate": 1.3934110719493673e-05, "loss": 0.091, "step": 24368 }, { "epoch": 0.5369779702193063, "grad_norm": 0.617111086845398, "learning_rate": 1.3933042893202765e-05, "loss": 0.0658, "step": 24369 }, { "epoch": 0.5370000055088224, "grad_norm": 0.8304750919342041, "learning_rate": 1.393197507234645e-05, "loss": 0.054, "step": 24370 }, { "epoch": 0.5370220407983385, "grad_norm": 0.6155238151550293, "learning_rate": 1.3930907256930158e-05, "loss": 0.048, "step": 24371 }, { "epoch": 0.5370440760878546, "grad_norm": 0.5137189626693726, "learning_rate": 1.3929839446959327e-05, "loss": 0.0733, "step": 24372 }, { "epoch": 0.5370661113773708, "grad_norm": 0.4131665825843811, "learning_rate": 1.3928771642439396e-05, "loss": 0.0568, "step": 24373 }, { "epoch": 0.537088146666887, "grad_norm": 1.037956953048706, "learning_rate": 1.3927703843375807e-05, "loss": 0.0977, "step": 24374 }, { "epoch": 0.5371101819564031, "grad_norm": 0.36349666118621826, "learning_rate": 1.3926636049774e-05, "loss": 0.061, "step": 24375 }, { "epoch": 0.5371322172459193, "grad_norm": 0.4642624855041504, "learning_rate": 1.392556826163941e-05, "loss": 0.0804, "step": 24376 }, { "epoch": 0.5371542525354355, "grad_norm": 0.7934428453445435, "learning_rate": 1.392450047897747e-05, "loss": 0.076, "step": 24377 }, { "epoch": 0.5371762878249516, "grad_norm": 0.7499760389328003, "learning_rate": 1.3923432701793631e-05, "loss": 0.0924, "step": 24378 }, { "epoch": 0.5371983231144678, "grad_norm": 0.7073940634727478, "learning_rate": 1.3922364930093329e-05, "loss": 0.081, "step": 24379 }, { "epoch": 0.537220358403984, "grad_norm": 0.5511934161186218, "learning_rate": 1.3921297163881998e-05, "loss": 0.1012, "step": 24380 }, { "epoch": 0.5372423936935001, "grad_norm": 0.4350053369998932, "learning_rate": 1.3920229403165078e-05, "loss": 0.0791, "step": 24381 }, { "epoch": 0.5372644289830163, "grad_norm": 0.861813485622406, "learning_rate": 1.3919161647948011e-05, "loss": 0.0593, "step": 24382 }, { "epoch": 0.5372864642725325, "grad_norm": 0.7560686469078064, "learning_rate": 1.3918093898236234e-05, "loss": 0.0798, "step": 24383 }, { "epoch": 0.5373084995620486, "grad_norm": 0.6253484487533569, "learning_rate": 1.3917026154035185e-05, "loss": 0.0828, "step": 24384 }, { "epoch": 0.5373305348515648, "grad_norm": 0.30205079913139343, "learning_rate": 1.3915958415350294e-05, "loss": 0.0504, "step": 24385 }, { "epoch": 0.537352570141081, "grad_norm": 0.4306131899356842, "learning_rate": 1.3914890682187016e-05, "loss": 0.0732, "step": 24386 }, { "epoch": 0.5373746054305971, "grad_norm": 0.8851040005683899, "learning_rate": 1.3913822954550782e-05, "loss": 0.071, "step": 24387 }, { "epoch": 0.5373966407201133, "grad_norm": 0.42585325241088867, "learning_rate": 1.3912755232447027e-05, "loss": 0.0498, "step": 24388 }, { "epoch": 0.5374186760096294, "grad_norm": 0.7775554060935974, "learning_rate": 1.3911687515881195e-05, "loss": 0.0781, "step": 24389 }, { "epoch": 0.5374407112991456, "grad_norm": 0.572396457195282, "learning_rate": 1.3910619804858722e-05, "loss": 0.1016, "step": 24390 }, { "epoch": 0.5374627465886618, "grad_norm": 0.6045390367507935, "learning_rate": 1.3909552099385047e-05, "loss": 0.0438, "step": 24391 }, { "epoch": 0.5374847818781779, "grad_norm": 0.32858169078826904, "learning_rate": 1.3908484399465602e-05, "loss": 0.0409, "step": 24392 }, { "epoch": 0.5375068171676941, "grad_norm": 0.5801371335983276, "learning_rate": 1.3907416705105837e-05, "loss": 0.055, "step": 24393 }, { "epoch": 0.5375288524572103, "grad_norm": 0.5528146624565125, "learning_rate": 1.3906349016311185e-05, "loss": 0.0727, "step": 24394 }, { "epoch": 0.5375508877467264, "grad_norm": 0.6711031794548035, "learning_rate": 1.3905281333087082e-05, "loss": 0.0658, "step": 24395 }, { "epoch": 0.5375729230362425, "grad_norm": 0.9395796656608582, "learning_rate": 1.3904213655438967e-05, "loss": 0.0991, "step": 24396 }, { "epoch": 0.5375949583257587, "grad_norm": 0.6182236671447754, "learning_rate": 1.3903145983372284e-05, "loss": 0.0739, "step": 24397 }, { "epoch": 0.5376169936152748, "grad_norm": 0.5917195081710815, "learning_rate": 1.3902078316892464e-05, "loss": 0.0792, "step": 24398 }, { "epoch": 0.537639028904791, "grad_norm": 0.7938175797462463, "learning_rate": 1.3901010656004948e-05, "loss": 0.0644, "step": 24399 }, { "epoch": 0.5376610641943071, "grad_norm": 0.40763843059539795, "learning_rate": 1.389994300071517e-05, "loss": 0.0594, "step": 24400 }, { "epoch": 0.5376830994838233, "grad_norm": 0.6877562403678894, "learning_rate": 1.3898875351028576e-05, "loss": 0.0612, "step": 24401 }, { "epoch": 0.5377051347733395, "grad_norm": 0.4825073182582855, "learning_rate": 1.3897807706950601e-05, "loss": 0.0733, "step": 24402 }, { "epoch": 0.5377271700628556, "grad_norm": 0.42676863074302673, "learning_rate": 1.3896740068486684e-05, "loss": 0.0529, "step": 24403 }, { "epoch": 0.5377492053523718, "grad_norm": 0.8468932509422302, "learning_rate": 1.3895672435642257e-05, "loss": 0.0673, "step": 24404 }, { "epoch": 0.537771240641888, "grad_norm": 0.8957934975624084, "learning_rate": 1.3894604808422766e-05, "loss": 0.0834, "step": 24405 }, { "epoch": 0.5377932759314041, "grad_norm": 0.6146579384803772, "learning_rate": 1.3893537186833645e-05, "loss": 0.0844, "step": 24406 }, { "epoch": 0.5378153112209203, "grad_norm": 0.5620076060295105, "learning_rate": 1.3892469570880331e-05, "loss": 0.0709, "step": 24407 }, { "epoch": 0.5378373465104365, "grad_norm": 0.48256468772888184, "learning_rate": 1.389140196056826e-05, "loss": 0.0648, "step": 24408 }, { "epoch": 0.5378593817999526, "grad_norm": 0.5157220363616943, "learning_rate": 1.3890334355902878e-05, "loss": 0.0635, "step": 24409 }, { "epoch": 0.5378814170894688, "grad_norm": 0.8011152148246765, "learning_rate": 1.3889266756889619e-05, "loss": 0.0822, "step": 24410 }, { "epoch": 0.537903452378985, "grad_norm": 0.3668389916419983, "learning_rate": 1.3888199163533916e-05, "loss": 0.0719, "step": 24411 }, { "epoch": 0.5379254876685011, "grad_norm": 0.587049126625061, "learning_rate": 1.3887131575841213e-05, "loss": 0.0738, "step": 24412 }, { "epoch": 0.5379475229580173, "grad_norm": 0.8060751557350159, "learning_rate": 1.3886063993816948e-05, "loss": 0.0756, "step": 24413 }, { "epoch": 0.5379695582475335, "grad_norm": 0.7692887187004089, "learning_rate": 1.3884996417466555e-05, "loss": 0.0683, "step": 24414 }, { "epoch": 0.5379915935370496, "grad_norm": 0.918226420879364, "learning_rate": 1.3883928846795469e-05, "loss": 0.0532, "step": 24415 }, { "epoch": 0.5380136288265658, "grad_norm": 0.5432367920875549, "learning_rate": 1.3882861281809136e-05, "loss": 0.0951, "step": 24416 }, { "epoch": 0.538035664116082, "grad_norm": 0.6136394739151001, "learning_rate": 1.3881793722512989e-05, "loss": 0.0808, "step": 24417 }, { "epoch": 0.5380576994055981, "grad_norm": 0.5832324624061584, "learning_rate": 1.3880726168912466e-05, "loss": 0.0767, "step": 24418 }, { "epoch": 0.5380797346951143, "grad_norm": 0.7109405398368835, "learning_rate": 1.3879658621013005e-05, "loss": 0.071, "step": 24419 }, { "epoch": 0.5381017699846304, "grad_norm": 0.5817522406578064, "learning_rate": 1.3878591078820043e-05, "loss": 0.0886, "step": 24420 }, { "epoch": 0.5381238052741465, "grad_norm": 0.7234192490577698, "learning_rate": 1.3877523542339021e-05, "loss": 0.0616, "step": 24421 }, { "epoch": 0.5381458405636627, "grad_norm": 0.6718862056732178, "learning_rate": 1.387645601157537e-05, "loss": 0.0718, "step": 24422 }, { "epoch": 0.5381678758531788, "grad_norm": 0.8491989970207214, "learning_rate": 1.3875388486534527e-05, "loss": 0.0656, "step": 24423 }, { "epoch": 0.538189911142695, "grad_norm": 1.0220773220062256, "learning_rate": 1.387432096722194e-05, "loss": 0.086, "step": 24424 }, { "epoch": 0.5382119464322112, "grad_norm": 0.4747089147567749, "learning_rate": 1.387325345364304e-05, "loss": 0.0438, "step": 24425 }, { "epoch": 0.5382339817217273, "grad_norm": 0.541407585144043, "learning_rate": 1.3872185945803263e-05, "loss": 0.0652, "step": 24426 }, { "epoch": 0.5382560170112435, "grad_norm": 0.5543246865272522, "learning_rate": 1.3871118443708046e-05, "loss": 0.0618, "step": 24427 }, { "epoch": 0.5382780523007596, "grad_norm": 0.4593106806278229, "learning_rate": 1.387005094736283e-05, "loss": 0.0824, "step": 24428 }, { "epoch": 0.5383000875902758, "grad_norm": 0.8955851793289185, "learning_rate": 1.3868983456773051e-05, "loss": 0.077, "step": 24429 }, { "epoch": 0.538322122879792, "grad_norm": 0.5728259682655334, "learning_rate": 1.3867915971944146e-05, "loss": 0.0723, "step": 24430 }, { "epoch": 0.5383441581693081, "grad_norm": 0.5233218669891357, "learning_rate": 1.3866848492881545e-05, "loss": 0.0751, "step": 24431 }, { "epoch": 0.5383661934588243, "grad_norm": 0.327923059463501, "learning_rate": 1.38657810195907e-05, "loss": 0.071, "step": 24432 }, { "epoch": 0.5383882287483405, "grad_norm": 0.49240633845329285, "learning_rate": 1.386471355207704e-05, "loss": 0.0656, "step": 24433 }, { "epoch": 0.5384102640378566, "grad_norm": 0.6053370237350464, "learning_rate": 1.3863646090346e-05, "loss": 0.0938, "step": 24434 }, { "epoch": 0.5384322993273728, "grad_norm": 0.3629307746887207, "learning_rate": 1.3862578634403022e-05, "loss": 0.0822, "step": 24435 }, { "epoch": 0.538454334616889, "grad_norm": 0.7600717544555664, "learning_rate": 1.3861511184253542e-05, "loss": 0.0501, "step": 24436 }, { "epoch": 0.5384763699064051, "grad_norm": 0.6021134257316589, "learning_rate": 1.3860443739902993e-05, "loss": 0.0891, "step": 24437 }, { "epoch": 0.5384984051959213, "grad_norm": 0.4860113561153412, "learning_rate": 1.385937630135681e-05, "loss": 0.0706, "step": 24438 }, { "epoch": 0.5385204404854375, "grad_norm": 0.7119532227516174, "learning_rate": 1.3858308868620444e-05, "loss": 0.0824, "step": 24439 }, { "epoch": 0.5385424757749536, "grad_norm": 0.7964289784431458, "learning_rate": 1.3857241441699322e-05, "loss": 0.0769, "step": 24440 }, { "epoch": 0.5385645110644698, "grad_norm": 0.4759289026260376, "learning_rate": 1.3856174020598881e-05, "loss": 0.0592, "step": 24441 }, { "epoch": 0.538586546353986, "grad_norm": 0.9178438782691956, "learning_rate": 1.3855106605324556e-05, "loss": 0.0903, "step": 24442 }, { "epoch": 0.5386085816435021, "grad_norm": 0.5169252157211304, "learning_rate": 1.3854039195881792e-05, "loss": 0.0567, "step": 24443 }, { "epoch": 0.5386306169330183, "grad_norm": 0.4564627707004547, "learning_rate": 1.3852971792276018e-05, "loss": 0.0798, "step": 24444 }, { "epoch": 0.5386526522225343, "grad_norm": 0.5202652812004089, "learning_rate": 1.3851904394512676e-05, "loss": 0.0581, "step": 24445 }, { "epoch": 0.5386746875120505, "grad_norm": 0.7376152873039246, "learning_rate": 1.3850837002597194e-05, "loss": 0.0722, "step": 24446 }, { "epoch": 0.5386967228015667, "grad_norm": 0.5977004766464233, "learning_rate": 1.3849769616535022e-05, "loss": 0.1077, "step": 24447 }, { "epoch": 0.5387187580910828, "grad_norm": 0.6849361062049866, "learning_rate": 1.3848702236331588e-05, "loss": 0.0679, "step": 24448 }, { "epoch": 0.538740793380599, "grad_norm": 0.6626212000846863, "learning_rate": 1.3847634861992332e-05, "loss": 0.0759, "step": 24449 }, { "epoch": 0.5387628286701152, "grad_norm": 0.6875748634338379, "learning_rate": 1.3846567493522688e-05, "loss": 0.0925, "step": 24450 }, { "epoch": 0.5387848639596313, "grad_norm": 0.5462405681610107, "learning_rate": 1.3845500130928097e-05, "loss": 0.0563, "step": 24451 }, { "epoch": 0.5388068992491475, "grad_norm": 0.8404689431190491, "learning_rate": 1.384443277421399e-05, "loss": 0.0999, "step": 24452 }, { "epoch": 0.5388289345386637, "grad_norm": 0.6470611691474915, "learning_rate": 1.384336542338581e-05, "loss": 0.0655, "step": 24453 }, { "epoch": 0.5388509698281798, "grad_norm": 1.4476089477539062, "learning_rate": 1.3842298078448982e-05, "loss": 0.0605, "step": 24454 }, { "epoch": 0.538873005117696, "grad_norm": 0.670019268989563, "learning_rate": 1.3841230739408957e-05, "loss": 0.0532, "step": 24455 }, { "epoch": 0.5388950404072121, "grad_norm": 0.6095783710479736, "learning_rate": 1.3840163406271168e-05, "loss": 0.0628, "step": 24456 }, { "epoch": 0.5389170756967283, "grad_norm": 0.7373868227005005, "learning_rate": 1.3839096079041039e-05, "loss": 0.0731, "step": 24457 }, { "epoch": 0.5389391109862445, "grad_norm": 0.49392515420913696, "learning_rate": 1.3838028757724025e-05, "loss": 0.0771, "step": 24458 }, { "epoch": 0.5389611462757606, "grad_norm": 0.8127003908157349, "learning_rate": 1.3836961442325555e-05, "loss": 0.0833, "step": 24459 }, { "epoch": 0.5389831815652768, "grad_norm": 0.6006488800048828, "learning_rate": 1.3835894132851061e-05, "loss": 0.0774, "step": 24460 }, { "epoch": 0.539005216854793, "grad_norm": 0.667278528213501, "learning_rate": 1.3834826829305975e-05, "loss": 0.0724, "step": 24461 }, { "epoch": 0.5390272521443091, "grad_norm": 0.5245424509048462, "learning_rate": 1.383375953169575e-05, "loss": 0.0757, "step": 24462 }, { "epoch": 0.5390492874338253, "grad_norm": 0.464453786611557, "learning_rate": 1.383269224002581e-05, "loss": 0.0744, "step": 24463 }, { "epoch": 0.5390713227233415, "grad_norm": 0.49826136231422424, "learning_rate": 1.3831624954301598e-05, "loss": 0.0756, "step": 24464 }, { "epoch": 0.5390933580128576, "grad_norm": 0.7880929112434387, "learning_rate": 1.3830557674528536e-05, "loss": 0.0783, "step": 24465 }, { "epoch": 0.5391153933023738, "grad_norm": 0.4401174783706665, "learning_rate": 1.3829490400712081e-05, "loss": 0.0594, "step": 24466 }, { "epoch": 0.53913742859189, "grad_norm": 0.6269577741622925, "learning_rate": 1.3828423132857659e-05, "loss": 0.0648, "step": 24467 }, { "epoch": 0.5391594638814061, "grad_norm": 0.9296384453773499, "learning_rate": 1.3827355870970702e-05, "loss": 0.053, "step": 24468 }, { "epoch": 0.5391814991709223, "grad_norm": 0.7625714540481567, "learning_rate": 1.3826288615056651e-05, "loss": 0.076, "step": 24469 }, { "epoch": 0.5392035344604383, "grad_norm": 0.6610508561134338, "learning_rate": 1.3825221365120945e-05, "loss": 0.0647, "step": 24470 }, { "epoch": 0.5392255697499545, "grad_norm": 0.5004535913467407, "learning_rate": 1.3824154121169015e-05, "loss": 0.0662, "step": 24471 }, { "epoch": 0.5392476050394707, "grad_norm": 0.6244816780090332, "learning_rate": 1.3823086883206299e-05, "loss": 0.0696, "step": 24472 }, { "epoch": 0.5392696403289868, "grad_norm": 0.6784924864768982, "learning_rate": 1.3822019651238225e-05, "loss": 0.085, "step": 24473 }, { "epoch": 0.539291675618503, "grad_norm": 0.5200055837631226, "learning_rate": 1.3820952425270244e-05, "loss": 0.0548, "step": 24474 }, { "epoch": 0.5393137109080192, "grad_norm": 0.818412721157074, "learning_rate": 1.3819885205307785e-05, "loss": 0.0785, "step": 24475 }, { "epoch": 0.5393357461975353, "grad_norm": 0.8677898049354553, "learning_rate": 1.3818817991356281e-05, "loss": 0.1012, "step": 24476 }, { "epoch": 0.5393577814870515, "grad_norm": 0.7854176759719849, "learning_rate": 1.3817750783421173e-05, "loss": 0.0653, "step": 24477 }, { "epoch": 0.5393798167765677, "grad_norm": 0.6234732270240784, "learning_rate": 1.3816683581507893e-05, "loss": 0.0651, "step": 24478 }, { "epoch": 0.5394018520660838, "grad_norm": 0.792050302028656, "learning_rate": 1.3815616385621877e-05, "loss": 0.0868, "step": 24479 }, { "epoch": 0.5394238873556, "grad_norm": 0.6148056983947754, "learning_rate": 1.3814549195768559e-05, "loss": 0.0705, "step": 24480 }, { "epoch": 0.5394459226451161, "grad_norm": 0.5382503271102905, "learning_rate": 1.3813482011953383e-05, "loss": 0.07, "step": 24481 }, { "epoch": 0.5394679579346323, "grad_norm": 0.8731228709220886, "learning_rate": 1.3812414834181777e-05, "loss": 0.0712, "step": 24482 }, { "epoch": 0.5394899932241485, "grad_norm": 0.8259410262107849, "learning_rate": 1.381134766245918e-05, "loss": 0.0743, "step": 24483 }, { "epoch": 0.5395120285136646, "grad_norm": 0.8863316774368286, "learning_rate": 1.3810280496791026e-05, "loss": 0.0813, "step": 24484 }, { "epoch": 0.5395340638031808, "grad_norm": 0.8799805641174316, "learning_rate": 1.3809213337182753e-05, "loss": 0.1045, "step": 24485 }, { "epoch": 0.539556099092697, "grad_norm": 0.5466641187667847, "learning_rate": 1.3808146183639796e-05, "loss": 0.0691, "step": 24486 }, { "epoch": 0.5395781343822131, "grad_norm": 0.5170338749885559, "learning_rate": 1.3807079036167588e-05, "loss": 0.0694, "step": 24487 }, { "epoch": 0.5396001696717293, "grad_norm": 0.3466044068336487, "learning_rate": 1.380601189477156e-05, "loss": 0.0815, "step": 24488 }, { "epoch": 0.5396222049612455, "grad_norm": 0.4860549569129944, "learning_rate": 1.380494475945716e-05, "loss": 0.0757, "step": 24489 }, { "epoch": 0.5396442402507616, "grad_norm": 0.6768980026245117, "learning_rate": 1.3803877630229817e-05, "loss": 0.0775, "step": 24490 }, { "epoch": 0.5396662755402778, "grad_norm": 0.6650841236114502, "learning_rate": 1.3802810507094968e-05, "loss": 0.0727, "step": 24491 }, { "epoch": 0.539688310829794, "grad_norm": 0.5005526542663574, "learning_rate": 1.3801743390058042e-05, "loss": 0.058, "step": 24492 }, { "epoch": 0.5397103461193101, "grad_norm": 0.5239549875259399, "learning_rate": 1.3800676279124486e-05, "loss": 0.0625, "step": 24493 }, { "epoch": 0.5397323814088263, "grad_norm": 0.7711155414581299, "learning_rate": 1.3799609174299725e-05, "loss": 0.0721, "step": 24494 }, { "epoch": 0.5397544166983423, "grad_norm": 1.4212350845336914, "learning_rate": 1.37985420755892e-05, "loss": 0.1005, "step": 24495 }, { "epoch": 0.5397764519878585, "grad_norm": 1.182016134262085, "learning_rate": 1.3797474982998337e-05, "loss": 0.1049, "step": 24496 }, { "epoch": 0.5397984872773747, "grad_norm": 1.144085168838501, "learning_rate": 1.3796407896532587e-05, "loss": 0.0854, "step": 24497 }, { "epoch": 0.5398205225668908, "grad_norm": 0.4550173580646515, "learning_rate": 1.3795340816197376e-05, "loss": 0.0699, "step": 24498 }, { "epoch": 0.539842557856407, "grad_norm": 0.49072834849357605, "learning_rate": 1.3794273741998137e-05, "loss": 0.0761, "step": 24499 }, { "epoch": 0.5398645931459232, "grad_norm": 0.5549163222312927, "learning_rate": 1.3793206673940311e-05, "loss": 0.0651, "step": 24500 }, { "epoch": 0.5398866284354393, "grad_norm": 0.575986385345459, "learning_rate": 1.379213961202933e-05, "loss": 0.076, "step": 24501 }, { "epoch": 0.5399086637249555, "grad_norm": 0.6075440049171448, "learning_rate": 1.3791072556270632e-05, "loss": 0.0674, "step": 24502 }, { "epoch": 0.5399306990144717, "grad_norm": 0.599091649055481, "learning_rate": 1.3790005506669643e-05, "loss": 0.0784, "step": 24503 }, { "epoch": 0.5399527343039878, "grad_norm": 0.5452534556388855, "learning_rate": 1.3788938463231809e-05, "loss": 0.0917, "step": 24504 }, { "epoch": 0.539974769593504, "grad_norm": 0.614593505859375, "learning_rate": 1.3787871425962564e-05, "loss": 0.0806, "step": 24505 }, { "epoch": 0.5399968048830202, "grad_norm": 0.6294558048248291, "learning_rate": 1.3786804394867338e-05, "loss": 0.0745, "step": 24506 }, { "epoch": 0.5400188401725363, "grad_norm": 0.6432304978370667, "learning_rate": 1.3785737369951564e-05, "loss": 0.0786, "step": 24507 }, { "epoch": 0.5400408754620525, "grad_norm": 0.5236521363258362, "learning_rate": 1.3784670351220683e-05, "loss": 0.0759, "step": 24508 }, { "epoch": 0.5400629107515686, "grad_norm": 0.6466280221939087, "learning_rate": 1.3783603338680132e-05, "loss": 0.0707, "step": 24509 }, { "epoch": 0.5400849460410848, "grad_norm": 1.0267322063446045, "learning_rate": 1.3782536332335336e-05, "loss": 0.0527, "step": 24510 }, { "epoch": 0.540106981330601, "grad_norm": 0.6445229053497314, "learning_rate": 1.3781469332191731e-05, "loss": 0.0766, "step": 24511 }, { "epoch": 0.5401290166201171, "grad_norm": 0.7452754974365234, "learning_rate": 1.3780402338254764e-05, "loss": 0.0607, "step": 24512 }, { "epoch": 0.5401510519096333, "grad_norm": 0.8330348134040833, "learning_rate": 1.3779335350529862e-05, "loss": 0.0831, "step": 24513 }, { "epoch": 0.5401730871991495, "grad_norm": 0.7275879979133606, "learning_rate": 1.3778268369022459e-05, "loss": 0.0795, "step": 24514 }, { "epoch": 0.5401951224886656, "grad_norm": 0.661677896976471, "learning_rate": 1.3777201393737987e-05, "loss": 0.0783, "step": 24515 }, { "epoch": 0.5402171577781818, "grad_norm": 0.75322026014328, "learning_rate": 1.3776134424681886e-05, "loss": 0.0865, "step": 24516 }, { "epoch": 0.540239193067698, "grad_norm": 0.6154307723045349, "learning_rate": 1.377506746185959e-05, "loss": 0.1198, "step": 24517 }, { "epoch": 0.5402612283572141, "grad_norm": 0.4791072905063629, "learning_rate": 1.3774000505276531e-05, "loss": 0.0512, "step": 24518 }, { "epoch": 0.5402832636467302, "grad_norm": 0.5557443499565125, "learning_rate": 1.377293355493814e-05, "loss": 0.0548, "step": 24519 }, { "epoch": 0.5403052989362463, "grad_norm": 0.7327262759208679, "learning_rate": 1.3771866610849861e-05, "loss": 0.0795, "step": 24520 }, { "epoch": 0.5403273342257625, "grad_norm": 0.253401517868042, "learning_rate": 1.3770799673017126e-05, "loss": 0.0597, "step": 24521 }, { "epoch": 0.5403493695152787, "grad_norm": 0.5294133424758911, "learning_rate": 1.3769732741445363e-05, "loss": 0.0842, "step": 24522 }, { "epoch": 0.5403714048047948, "grad_norm": 0.47905388474464417, "learning_rate": 1.3768665816140014e-05, "loss": 0.0624, "step": 24523 }, { "epoch": 0.540393440094311, "grad_norm": 0.62371826171875, "learning_rate": 1.376759889710651e-05, "loss": 0.0686, "step": 24524 }, { "epoch": 0.5404154753838272, "grad_norm": 0.43336474895477295, "learning_rate": 1.3766531984350287e-05, "loss": 0.0678, "step": 24525 }, { "epoch": 0.5404375106733433, "grad_norm": 0.6004616022109985, "learning_rate": 1.3765465077876771e-05, "loss": 0.0604, "step": 24526 }, { "epoch": 0.5404595459628595, "grad_norm": 0.846502423286438, "learning_rate": 1.376439817769141e-05, "loss": 0.1064, "step": 24527 }, { "epoch": 0.5404815812523757, "grad_norm": 0.5594213008880615, "learning_rate": 1.3763331283799631e-05, "loss": 0.0706, "step": 24528 }, { "epoch": 0.5405036165418918, "grad_norm": 1.0263097286224365, "learning_rate": 1.376226439620687e-05, "loss": 0.0643, "step": 24529 }, { "epoch": 0.540525651831408, "grad_norm": 0.6852681636810303, "learning_rate": 1.3761197514918555e-05, "loss": 0.0879, "step": 24530 }, { "epoch": 0.5405476871209242, "grad_norm": 0.5579724311828613, "learning_rate": 1.3760130639940132e-05, "loss": 0.0531, "step": 24531 }, { "epoch": 0.5405697224104403, "grad_norm": 0.7152391076087952, "learning_rate": 1.3759063771277026e-05, "loss": 0.0703, "step": 24532 }, { "epoch": 0.5405917576999565, "grad_norm": 0.5693143606185913, "learning_rate": 1.3757996908934674e-05, "loss": 0.0662, "step": 24533 }, { "epoch": 0.5406137929894727, "grad_norm": 1.1134051084518433, "learning_rate": 1.3756930052918503e-05, "loss": 0.1207, "step": 24534 }, { "epoch": 0.5406358282789888, "grad_norm": 0.6290291547775269, "learning_rate": 1.375586320323396e-05, "loss": 0.1084, "step": 24535 }, { "epoch": 0.540657863568505, "grad_norm": 0.32029271125793457, "learning_rate": 1.3754796359886474e-05, "loss": 0.0446, "step": 24536 }, { "epoch": 0.5406798988580211, "grad_norm": 0.5721719861030579, "learning_rate": 1.3753729522881476e-05, "loss": 0.0887, "step": 24537 }, { "epoch": 0.5407019341475373, "grad_norm": 0.4238708019256592, "learning_rate": 1.37526626922244e-05, "loss": 0.0566, "step": 24538 }, { "epoch": 0.5407239694370535, "grad_norm": 0.48254868388175964, "learning_rate": 1.3751595867920687e-05, "loss": 0.0462, "step": 24539 }, { "epoch": 0.5407460047265696, "grad_norm": 0.7885640263557434, "learning_rate": 1.3750529049975763e-05, "loss": 0.0966, "step": 24540 }, { "epoch": 0.5407680400160858, "grad_norm": 0.26686960458755493, "learning_rate": 1.3749462238395065e-05, "loss": 0.084, "step": 24541 }, { "epoch": 0.540790075305602, "grad_norm": 0.858008086681366, "learning_rate": 1.374839543318402e-05, "loss": 0.1105, "step": 24542 }, { "epoch": 0.5408121105951181, "grad_norm": 0.5539808869361877, "learning_rate": 1.3747328634348075e-05, "loss": 0.0555, "step": 24543 }, { "epoch": 0.5408341458846342, "grad_norm": 0.7224644422531128, "learning_rate": 1.3746261841892657e-05, "loss": 0.0634, "step": 24544 }, { "epoch": 0.5408561811741504, "grad_norm": 0.9348924160003662, "learning_rate": 1.3745195055823196e-05, "loss": 0.0688, "step": 24545 }, { "epoch": 0.5408782164636665, "grad_norm": 0.3911435306072235, "learning_rate": 1.3744128276145134e-05, "loss": 0.0578, "step": 24546 }, { "epoch": 0.5409002517531827, "grad_norm": 0.3518119156360626, "learning_rate": 1.3743061502863898e-05, "loss": 0.0957, "step": 24547 }, { "epoch": 0.5409222870426988, "grad_norm": 0.6719237565994263, "learning_rate": 1.3741994735984926e-05, "loss": 0.114, "step": 24548 }, { "epoch": 0.540944322332215, "grad_norm": 0.5893019437789917, "learning_rate": 1.3740927975513642e-05, "loss": 0.0715, "step": 24549 }, { "epoch": 0.5409663576217312, "grad_norm": 0.33975470066070557, "learning_rate": 1.3739861221455494e-05, "loss": 0.0462, "step": 24550 }, { "epoch": 0.5409883929112473, "grad_norm": 0.48573023080825806, "learning_rate": 1.3738794473815907e-05, "loss": 0.0711, "step": 24551 }, { "epoch": 0.5410104282007635, "grad_norm": 0.6229434013366699, "learning_rate": 1.3737727732600317e-05, "loss": 0.0743, "step": 24552 }, { "epoch": 0.5410324634902797, "grad_norm": 0.4099985659122467, "learning_rate": 1.3736660997814154e-05, "loss": 0.0574, "step": 24553 }, { "epoch": 0.5410544987797958, "grad_norm": 0.5840693116188049, "learning_rate": 1.3735594269462856e-05, "loss": 0.083, "step": 24554 }, { "epoch": 0.541076534069312, "grad_norm": 0.4157538414001465, "learning_rate": 1.3734527547551854e-05, "loss": 0.0442, "step": 24555 }, { "epoch": 0.5410985693588282, "grad_norm": 0.4727090001106262, "learning_rate": 1.3733460832086584e-05, "loss": 0.0755, "step": 24556 }, { "epoch": 0.5411206046483443, "grad_norm": 0.5129256248474121, "learning_rate": 1.373239412307247e-05, "loss": 0.0662, "step": 24557 }, { "epoch": 0.5411426399378605, "grad_norm": 0.6565025448799133, "learning_rate": 1.3731327420514957e-05, "loss": 0.0676, "step": 24558 }, { "epoch": 0.5411646752273767, "grad_norm": 0.6806927919387817, "learning_rate": 1.3730260724419477e-05, "loss": 0.0806, "step": 24559 }, { "epoch": 0.5411867105168928, "grad_norm": 0.43982994556427, "learning_rate": 1.3729194034791456e-05, "loss": 0.0591, "step": 24560 }, { "epoch": 0.541208745806409, "grad_norm": 0.8108766674995422, "learning_rate": 1.372812735163633e-05, "loss": 0.0762, "step": 24561 }, { "epoch": 0.5412307810959252, "grad_norm": 0.6868154406547546, "learning_rate": 1.3727060674959537e-05, "loss": 0.0836, "step": 24562 }, { "epoch": 0.5412528163854413, "grad_norm": 0.8419924974441528, "learning_rate": 1.3725994004766507e-05, "loss": 0.0912, "step": 24563 }, { "epoch": 0.5412748516749575, "grad_norm": 0.45112621784210205, "learning_rate": 1.3724927341062672e-05, "loss": 0.0669, "step": 24564 }, { "epoch": 0.5412968869644736, "grad_norm": 0.7037308812141418, "learning_rate": 1.3723860683853461e-05, "loss": 0.0711, "step": 24565 }, { "epoch": 0.5413189222539898, "grad_norm": 0.41551923751831055, "learning_rate": 1.3722794033144315e-05, "loss": 0.0616, "step": 24566 }, { "epoch": 0.541340957543506, "grad_norm": 0.8909372687339783, "learning_rate": 1.3721727388940667e-05, "loss": 0.0676, "step": 24567 }, { "epoch": 0.5413629928330221, "grad_norm": 0.3062599003314972, "learning_rate": 1.3720660751247937e-05, "loss": 0.0681, "step": 24568 }, { "epoch": 0.5413850281225382, "grad_norm": 0.711773157119751, "learning_rate": 1.3719594120071577e-05, "loss": 0.0567, "step": 24569 }, { "epoch": 0.5414070634120544, "grad_norm": 0.5508072972297668, "learning_rate": 1.3718527495417014e-05, "loss": 0.0633, "step": 24570 }, { "epoch": 0.5414290987015705, "grad_norm": 0.395282506942749, "learning_rate": 1.3717460877289673e-05, "loss": 0.0732, "step": 24571 }, { "epoch": 0.5414511339910867, "grad_norm": 0.6343677043914795, "learning_rate": 1.3716394265694987e-05, "loss": 0.0556, "step": 24572 }, { "epoch": 0.5414731692806029, "grad_norm": 0.6306450963020325, "learning_rate": 1.3715327660638398e-05, "loss": 0.0778, "step": 24573 }, { "epoch": 0.541495204570119, "grad_norm": 0.7494737505912781, "learning_rate": 1.3714261062125337e-05, "loss": 0.0514, "step": 24574 }, { "epoch": 0.5415172398596352, "grad_norm": 0.9495155215263367, "learning_rate": 1.371319447016123e-05, "loss": 0.1001, "step": 24575 }, { "epoch": 0.5415392751491513, "grad_norm": 0.9086179137229919, "learning_rate": 1.371212788475151e-05, "loss": 0.0645, "step": 24576 }, { "epoch": 0.5415613104386675, "grad_norm": 0.4202643930912018, "learning_rate": 1.3711061305901617e-05, "loss": 0.0871, "step": 24577 }, { "epoch": 0.5415833457281837, "grad_norm": 0.24557152390480042, "learning_rate": 1.3709994733616982e-05, "loss": 0.0572, "step": 24578 }, { "epoch": 0.5416053810176998, "grad_norm": 0.3708217144012451, "learning_rate": 1.3708928167903035e-05, "loss": 0.0488, "step": 24579 }, { "epoch": 0.541627416307216, "grad_norm": 0.5892358422279358, "learning_rate": 1.3707861608765206e-05, "loss": 0.0659, "step": 24580 }, { "epoch": 0.5416494515967322, "grad_norm": 0.9440626502037048, "learning_rate": 1.3706795056208934e-05, "loss": 0.1057, "step": 24581 }, { "epoch": 0.5416714868862483, "grad_norm": 0.6090911030769348, "learning_rate": 1.3705728510239647e-05, "loss": 0.0647, "step": 24582 }, { "epoch": 0.5416935221757645, "grad_norm": 0.8841250538825989, "learning_rate": 1.370466197086278e-05, "loss": 0.0754, "step": 24583 }, { "epoch": 0.5417155574652807, "grad_norm": 0.6095200777053833, "learning_rate": 1.3703595438083758e-05, "loss": 0.0701, "step": 24584 }, { "epoch": 0.5417375927547968, "grad_norm": 0.7197979688644409, "learning_rate": 1.3702528911908025e-05, "loss": 0.0633, "step": 24585 }, { "epoch": 0.541759628044313, "grad_norm": 0.5289662480354309, "learning_rate": 1.370146239234101e-05, "loss": 0.0799, "step": 24586 }, { "epoch": 0.5417816633338292, "grad_norm": 0.541434109210968, "learning_rate": 1.3700395879388142e-05, "loss": 0.0991, "step": 24587 }, { "epoch": 0.5418036986233453, "grad_norm": 0.5384852290153503, "learning_rate": 1.369932937305485e-05, "loss": 0.0457, "step": 24588 }, { "epoch": 0.5418257339128615, "grad_norm": 0.6692975759506226, "learning_rate": 1.3698262873346578e-05, "loss": 0.0893, "step": 24589 }, { "epoch": 0.5418477692023776, "grad_norm": 0.7934095859527588, "learning_rate": 1.3697196380268749e-05, "loss": 0.1045, "step": 24590 }, { "epoch": 0.5418698044918938, "grad_norm": 0.6364783048629761, "learning_rate": 1.369612989382679e-05, "loss": 0.0707, "step": 24591 }, { "epoch": 0.54189183978141, "grad_norm": 0.7117460370063782, "learning_rate": 1.3695063414026148e-05, "loss": 0.0971, "step": 24592 }, { "epoch": 0.5419138750709261, "grad_norm": 0.46498313546180725, "learning_rate": 1.3693996940872248e-05, "loss": 0.0706, "step": 24593 }, { "epoch": 0.5419359103604422, "grad_norm": 0.4953424036502838, "learning_rate": 1.3692930474370522e-05, "loss": 0.0486, "step": 24594 }, { "epoch": 0.5419579456499584, "grad_norm": 0.48692336678504944, "learning_rate": 1.36918640145264e-05, "loss": 0.078, "step": 24595 }, { "epoch": 0.5419799809394745, "grad_norm": 0.5593085885047913, "learning_rate": 1.3690797561345317e-05, "loss": 0.0807, "step": 24596 }, { "epoch": 0.5420020162289907, "grad_norm": 0.37136051058769226, "learning_rate": 1.3689731114832704e-05, "loss": 0.0862, "step": 24597 }, { "epoch": 0.5420240515185069, "grad_norm": 0.5658836960792542, "learning_rate": 1.3688664674993994e-05, "loss": 0.0882, "step": 24598 }, { "epoch": 0.542046086808023, "grad_norm": 0.5574008226394653, "learning_rate": 1.3687598241834612e-05, "loss": 0.0946, "step": 24599 }, { "epoch": 0.5420681220975392, "grad_norm": 0.5428546071052551, "learning_rate": 1.3686531815360002e-05, "loss": 0.0865, "step": 24600 }, { "epoch": 0.5420901573870553, "grad_norm": 0.8395499587059021, "learning_rate": 1.368546539557559e-05, "loss": 0.0986, "step": 24601 }, { "epoch": 0.5421121926765715, "grad_norm": 0.942223072052002, "learning_rate": 1.3684398982486807e-05, "loss": 0.0707, "step": 24602 }, { "epoch": 0.5421342279660877, "grad_norm": 0.5445312261581421, "learning_rate": 1.3683332576099082e-05, "loss": 0.0628, "step": 24603 }, { "epoch": 0.5421562632556038, "grad_norm": 0.639168381690979, "learning_rate": 1.3682266176417854e-05, "loss": 0.1054, "step": 24604 }, { "epoch": 0.54217829854512, "grad_norm": 0.769190788269043, "learning_rate": 1.3681199783448552e-05, "loss": 0.0903, "step": 24605 }, { "epoch": 0.5422003338346362, "grad_norm": 0.5382562875747681, "learning_rate": 1.3680133397196607e-05, "loss": 0.0525, "step": 24606 }, { "epoch": 0.5422223691241523, "grad_norm": 0.5232256054878235, "learning_rate": 1.3679067017667441e-05, "loss": 0.0746, "step": 24607 }, { "epoch": 0.5422444044136685, "grad_norm": 0.5712631344795227, "learning_rate": 1.3678000644866503e-05, "loss": 0.0673, "step": 24608 }, { "epoch": 0.5422664397031847, "grad_norm": 0.5707948207855225, "learning_rate": 1.3676934278799218e-05, "loss": 0.051, "step": 24609 }, { "epoch": 0.5422884749927008, "grad_norm": 0.5683419108390808, "learning_rate": 1.367586791947101e-05, "loss": 0.0627, "step": 24610 }, { "epoch": 0.542310510282217, "grad_norm": 0.5050530433654785, "learning_rate": 1.3674801566887323e-05, "loss": 0.0619, "step": 24611 }, { "epoch": 0.5423325455717332, "grad_norm": 0.5941553115844727, "learning_rate": 1.3673735221053581e-05, "loss": 0.0553, "step": 24612 }, { "epoch": 0.5423545808612493, "grad_norm": 0.9123733639717102, "learning_rate": 1.3672668881975216e-05, "loss": 0.0868, "step": 24613 }, { "epoch": 0.5423766161507655, "grad_norm": 0.6179419755935669, "learning_rate": 1.3671602549657655e-05, "loss": 0.0614, "step": 24614 }, { "epoch": 0.5423986514402817, "grad_norm": 0.6775954961776733, "learning_rate": 1.367053622410634e-05, "loss": 0.1102, "step": 24615 }, { "epoch": 0.5424206867297978, "grad_norm": 0.7427788376808167, "learning_rate": 1.3669469905326697e-05, "loss": 0.048, "step": 24616 }, { "epoch": 0.542442722019314, "grad_norm": 0.5958977937698364, "learning_rate": 1.3668403593324158e-05, "loss": 0.0833, "step": 24617 }, { "epoch": 0.54246475730883, "grad_norm": 0.5310479402542114, "learning_rate": 1.366733728810415e-05, "loss": 0.0782, "step": 24618 }, { "epoch": 0.5424867925983462, "grad_norm": 1.2889243364334106, "learning_rate": 1.3666270989672111e-05, "loss": 0.0563, "step": 24619 }, { "epoch": 0.5425088278878624, "grad_norm": 1.1730979681015015, "learning_rate": 1.3665204698033469e-05, "loss": 0.0923, "step": 24620 }, { "epoch": 0.5425308631773785, "grad_norm": 0.5071101188659668, "learning_rate": 1.3664138413193655e-05, "loss": 0.0664, "step": 24621 }, { "epoch": 0.5425528984668947, "grad_norm": 0.5656729340553284, "learning_rate": 1.3663072135158094e-05, "loss": 0.0578, "step": 24622 }, { "epoch": 0.5425749337564109, "grad_norm": 0.5878570079803467, "learning_rate": 1.3662005863932231e-05, "loss": 0.0489, "step": 24623 }, { "epoch": 0.542596969045927, "grad_norm": 0.7530471682548523, "learning_rate": 1.3660939599521489e-05, "loss": 0.0655, "step": 24624 }, { "epoch": 0.5426190043354432, "grad_norm": 0.6816887259483337, "learning_rate": 1.36598733419313e-05, "loss": 0.0832, "step": 24625 }, { "epoch": 0.5426410396249594, "grad_norm": 0.49106597900390625, "learning_rate": 1.365880709116709e-05, "loss": 0.0712, "step": 24626 }, { "epoch": 0.5426630749144755, "grad_norm": 0.690397322177887, "learning_rate": 1.36577408472343e-05, "loss": 0.0925, "step": 24627 }, { "epoch": 0.5426851102039917, "grad_norm": 0.5188047289848328, "learning_rate": 1.3656674610138354e-05, "loss": 0.0825, "step": 24628 }, { "epoch": 0.5427071454935078, "grad_norm": 0.9891031384468079, "learning_rate": 1.3655608379884685e-05, "loss": 0.0838, "step": 24629 }, { "epoch": 0.542729180783024, "grad_norm": 0.5014339685440063, "learning_rate": 1.3654542156478716e-05, "loss": 0.0656, "step": 24630 }, { "epoch": 0.5427512160725402, "grad_norm": 0.8642140030860901, "learning_rate": 1.3653475939925894e-05, "loss": 0.0676, "step": 24631 }, { "epoch": 0.5427732513620563, "grad_norm": 0.6114509701728821, "learning_rate": 1.365240973023164e-05, "loss": 0.0905, "step": 24632 }, { "epoch": 0.5427952866515725, "grad_norm": 0.6131248474121094, "learning_rate": 1.3651343527401382e-05, "loss": 0.0669, "step": 24633 }, { "epoch": 0.5428173219410887, "grad_norm": 1.300649881362915, "learning_rate": 1.3650277331440558e-05, "loss": 0.1098, "step": 24634 }, { "epoch": 0.5428393572306048, "grad_norm": 0.611181914806366, "learning_rate": 1.3649211142354596e-05, "loss": 0.066, "step": 24635 }, { "epoch": 0.542861392520121, "grad_norm": 0.47985246777534485, "learning_rate": 1.3648144960148926e-05, "loss": 0.0769, "step": 24636 }, { "epoch": 0.5428834278096372, "grad_norm": 0.5884562730789185, "learning_rate": 1.3647078784828972e-05, "loss": 0.0681, "step": 24637 }, { "epoch": 0.5429054630991533, "grad_norm": 0.8070378303527832, "learning_rate": 1.3646012616400178e-05, "loss": 0.0801, "step": 24638 }, { "epoch": 0.5429274983886695, "grad_norm": 0.5068776607513428, "learning_rate": 1.3644946454867968e-05, "loss": 0.0679, "step": 24639 }, { "epoch": 0.5429495336781857, "grad_norm": 0.45564720034599304, "learning_rate": 1.364388030023777e-05, "loss": 0.0524, "step": 24640 }, { "epoch": 0.5429715689677018, "grad_norm": 0.6483713388442993, "learning_rate": 1.3642814152515017e-05, "loss": 0.0905, "step": 24641 }, { "epoch": 0.542993604257218, "grad_norm": 0.466994971036911, "learning_rate": 1.3641748011705141e-05, "loss": 0.0551, "step": 24642 }, { "epoch": 0.543015639546734, "grad_norm": 0.5172483921051025, "learning_rate": 1.364068187781357e-05, "loss": 0.0531, "step": 24643 }, { "epoch": 0.5430376748362502, "grad_norm": 0.4695052206516266, "learning_rate": 1.3639615750845737e-05, "loss": 0.0817, "step": 24644 }, { "epoch": 0.5430597101257664, "grad_norm": 0.644817054271698, "learning_rate": 1.3638549630807063e-05, "loss": 0.0935, "step": 24645 }, { "epoch": 0.5430817454152825, "grad_norm": 0.6910818815231323, "learning_rate": 1.3637483517702991e-05, "loss": 0.0817, "step": 24646 }, { "epoch": 0.5431037807047987, "grad_norm": 2.678696632385254, "learning_rate": 1.3636417411538947e-05, "loss": 0.0921, "step": 24647 }, { "epoch": 0.5431258159943149, "grad_norm": 0.4797976315021515, "learning_rate": 1.3635351312320361e-05, "loss": 0.0686, "step": 24648 }, { "epoch": 0.543147851283831, "grad_norm": 0.32939326763153076, "learning_rate": 1.3634285220052658e-05, "loss": 0.0617, "step": 24649 }, { "epoch": 0.5431698865733472, "grad_norm": 0.784613847732544, "learning_rate": 1.3633219134741277e-05, "loss": 0.1, "step": 24650 }, { "epoch": 0.5431919218628634, "grad_norm": 0.5440207123756409, "learning_rate": 1.3632153056391645e-05, "loss": 0.0711, "step": 24651 }, { "epoch": 0.5432139571523795, "grad_norm": 0.5602807402610779, "learning_rate": 1.3631086985009189e-05, "loss": 0.0732, "step": 24652 }, { "epoch": 0.5432359924418957, "grad_norm": 0.6876671314239502, "learning_rate": 1.3630020920599334e-05, "loss": 0.0832, "step": 24653 }, { "epoch": 0.5432580277314119, "grad_norm": 0.9243708252906799, "learning_rate": 1.3628954863167526e-05, "loss": 0.0897, "step": 24654 }, { "epoch": 0.543280063020928, "grad_norm": 0.2952728271484375, "learning_rate": 1.3627888812719184e-05, "loss": 0.0556, "step": 24655 }, { "epoch": 0.5433020983104442, "grad_norm": 0.786523163318634, "learning_rate": 1.3626822769259737e-05, "loss": 0.1323, "step": 24656 }, { "epoch": 0.5433241335999603, "grad_norm": 0.5577515363693237, "learning_rate": 1.3625756732794624e-05, "loss": 0.0756, "step": 24657 }, { "epoch": 0.5433461688894765, "grad_norm": 0.8323096036911011, "learning_rate": 1.3624690703329267e-05, "loss": 0.1158, "step": 24658 }, { "epoch": 0.5433682041789927, "grad_norm": 0.4598351716995239, "learning_rate": 1.3623624680869097e-05, "loss": 0.0552, "step": 24659 }, { "epoch": 0.5433902394685088, "grad_norm": 0.567155122756958, "learning_rate": 1.3622558665419538e-05, "loss": 0.0608, "step": 24660 }, { "epoch": 0.543412274758025, "grad_norm": 0.6903582811355591, "learning_rate": 1.3621492656986033e-05, "loss": 0.0741, "step": 24661 }, { "epoch": 0.5434343100475412, "grad_norm": 0.5727840065956116, "learning_rate": 1.3620426655574006e-05, "loss": 0.0693, "step": 24662 }, { "epoch": 0.5434563453370573, "grad_norm": 0.6673732399940491, "learning_rate": 1.3619360661188887e-05, "loss": 0.0902, "step": 24663 }, { "epoch": 0.5434783806265735, "grad_norm": 0.7185238599777222, "learning_rate": 1.3618294673836099e-05, "loss": 0.0883, "step": 24664 }, { "epoch": 0.5435004159160897, "grad_norm": 0.646873414516449, "learning_rate": 1.361722869352108e-05, "loss": 0.0741, "step": 24665 }, { "epoch": 0.5435224512056058, "grad_norm": 0.798891007900238, "learning_rate": 1.361616272024926e-05, "loss": 0.0604, "step": 24666 }, { "epoch": 0.543544486495122, "grad_norm": 0.4880817234516144, "learning_rate": 1.3615096754026062e-05, "loss": 0.0754, "step": 24667 }, { "epoch": 0.543566521784638, "grad_norm": 0.8180834054946899, "learning_rate": 1.3614030794856915e-05, "loss": 0.102, "step": 24668 }, { "epoch": 0.5435885570741542, "grad_norm": 0.4096689820289612, "learning_rate": 1.3612964842747255e-05, "loss": 0.0636, "step": 24669 }, { "epoch": 0.5436105923636704, "grad_norm": 0.6435591578483582, "learning_rate": 1.3611898897702513e-05, "loss": 0.0677, "step": 24670 }, { "epoch": 0.5436326276531865, "grad_norm": 1.3995925188064575, "learning_rate": 1.3610832959728112e-05, "loss": 0.139, "step": 24671 }, { "epoch": 0.5436546629427027, "grad_norm": 1.1681782007217407, "learning_rate": 1.3609767028829482e-05, "loss": 0.0956, "step": 24672 }, { "epoch": 0.5436766982322189, "grad_norm": 0.49936163425445557, "learning_rate": 1.3608701105012055e-05, "loss": 0.0589, "step": 24673 }, { "epoch": 0.543698733521735, "grad_norm": 0.6169090867042542, "learning_rate": 1.360763518828126e-05, "loss": 0.0851, "step": 24674 }, { "epoch": 0.5437207688112512, "grad_norm": 3.7744431495666504, "learning_rate": 1.3606569278642526e-05, "loss": 0.0923, "step": 24675 }, { "epoch": 0.5437428041007674, "grad_norm": 1.2505460977554321, "learning_rate": 1.3605503376101275e-05, "loss": 0.0666, "step": 24676 }, { "epoch": 0.5437648393902835, "grad_norm": 0.7396154403686523, "learning_rate": 1.3604437480662952e-05, "loss": 0.1062, "step": 24677 }, { "epoch": 0.5437868746797997, "grad_norm": 0.7143181562423706, "learning_rate": 1.3603371592332974e-05, "loss": 0.1048, "step": 24678 }, { "epoch": 0.5438089099693159, "grad_norm": 0.5428934097290039, "learning_rate": 1.3602305711116769e-05, "loss": 0.0696, "step": 24679 }, { "epoch": 0.543830945258832, "grad_norm": 0.3154911398887634, "learning_rate": 1.3601239837019777e-05, "loss": 0.0628, "step": 24680 }, { "epoch": 0.5438529805483482, "grad_norm": 0.735458493232727, "learning_rate": 1.360017397004742e-05, "loss": 0.0743, "step": 24681 }, { "epoch": 0.5438750158378644, "grad_norm": 0.7290059328079224, "learning_rate": 1.3599108110205131e-05, "loss": 0.0792, "step": 24682 }, { "epoch": 0.5438970511273805, "grad_norm": 0.5694199800491333, "learning_rate": 1.3598042257498325e-05, "loss": 0.0473, "step": 24683 }, { "epoch": 0.5439190864168967, "grad_norm": 1.0325075387954712, "learning_rate": 1.3596976411932449e-05, "loss": 0.1057, "step": 24684 }, { "epoch": 0.5439411217064128, "grad_norm": 0.8179279565811157, "learning_rate": 1.3595910573512924e-05, "loss": 0.0749, "step": 24685 }, { "epoch": 0.543963156995929, "grad_norm": 0.8634856939315796, "learning_rate": 1.3594844742245181e-05, "loss": 0.1244, "step": 24686 }, { "epoch": 0.5439851922854452, "grad_norm": 0.5254594087600708, "learning_rate": 1.359377891813464e-05, "loss": 0.0624, "step": 24687 }, { "epoch": 0.5440072275749613, "grad_norm": 0.6617634892463684, "learning_rate": 1.3592713101186742e-05, "loss": 0.0786, "step": 24688 }, { "epoch": 0.5440292628644775, "grad_norm": 0.48862379789352417, "learning_rate": 1.3591647291406912e-05, "loss": 0.0527, "step": 24689 }, { "epoch": 0.5440512981539937, "grad_norm": 0.5761507749557495, "learning_rate": 1.3590581488800577e-05, "loss": 0.0727, "step": 24690 }, { "epoch": 0.5440733334435098, "grad_norm": 0.8168244361877441, "learning_rate": 1.3589515693373163e-05, "loss": 0.1016, "step": 24691 }, { "epoch": 0.5440953687330259, "grad_norm": 0.4553641080856323, "learning_rate": 1.3588449905130106e-05, "loss": 0.0632, "step": 24692 }, { "epoch": 0.544117404022542, "grad_norm": 0.6263876557350159, "learning_rate": 1.3587384124076831e-05, "loss": 0.0515, "step": 24693 }, { "epoch": 0.5441394393120582, "grad_norm": 0.815920352935791, "learning_rate": 1.3586318350218764e-05, "loss": 0.0851, "step": 24694 }, { "epoch": 0.5441614746015744, "grad_norm": 0.5272948741912842, "learning_rate": 1.3585252583561332e-05, "loss": 0.0503, "step": 24695 }, { "epoch": 0.5441835098910905, "grad_norm": 0.5792530179023743, "learning_rate": 1.3584186824109972e-05, "loss": 0.0696, "step": 24696 }, { "epoch": 0.5442055451806067, "grad_norm": 0.5313214659690857, "learning_rate": 1.3583121071870108e-05, "loss": 0.0943, "step": 24697 }, { "epoch": 0.5442275804701229, "grad_norm": 0.986046552658081, "learning_rate": 1.3582055326847167e-05, "loss": 0.1026, "step": 24698 }, { "epoch": 0.544249615759639, "grad_norm": 0.7741104960441589, "learning_rate": 1.3580989589046577e-05, "loss": 0.0828, "step": 24699 }, { "epoch": 0.5442716510491552, "grad_norm": 0.6784930229187012, "learning_rate": 1.3579923858473772e-05, "loss": 0.1043, "step": 24700 }, { "epoch": 0.5442936863386714, "grad_norm": 0.810806393623352, "learning_rate": 1.3578858135134174e-05, "loss": 0.0562, "step": 24701 }, { "epoch": 0.5443157216281875, "grad_norm": 0.5507264733314514, "learning_rate": 1.3577792419033208e-05, "loss": 0.0631, "step": 24702 }, { "epoch": 0.5443377569177037, "grad_norm": 0.7885962128639221, "learning_rate": 1.3576726710176314e-05, "loss": 0.0453, "step": 24703 }, { "epoch": 0.5443597922072199, "grad_norm": 0.4151689410209656, "learning_rate": 1.3575661008568915e-05, "loss": 0.07, "step": 24704 }, { "epoch": 0.544381827496736, "grad_norm": 0.7520384192466736, "learning_rate": 1.3574595314216437e-05, "loss": 0.0933, "step": 24705 }, { "epoch": 0.5444038627862522, "grad_norm": 0.32100334763526917, "learning_rate": 1.3573529627124308e-05, "loss": 0.0526, "step": 24706 }, { "epoch": 0.5444258980757684, "grad_norm": 1.248849868774414, "learning_rate": 1.3572463947297958e-05, "loss": 0.0738, "step": 24707 }, { "epoch": 0.5444479333652845, "grad_norm": 0.5612077713012695, "learning_rate": 1.3571398274742817e-05, "loss": 0.0552, "step": 24708 }, { "epoch": 0.5444699686548007, "grad_norm": 0.8786037564277649, "learning_rate": 1.357033260946431e-05, "loss": 0.0728, "step": 24709 }, { "epoch": 0.5444920039443168, "grad_norm": 0.6251882314682007, "learning_rate": 1.3569266951467859e-05, "loss": 0.0674, "step": 24710 }, { "epoch": 0.544514039233833, "grad_norm": 0.8575459718704224, "learning_rate": 1.3568201300758907e-05, "loss": 0.0981, "step": 24711 }, { "epoch": 0.5445360745233492, "grad_norm": 0.6452678442001343, "learning_rate": 1.3567135657342871e-05, "loss": 0.0691, "step": 24712 }, { "epoch": 0.5445581098128653, "grad_norm": 0.6369922757148743, "learning_rate": 1.3566070021225184e-05, "loss": 0.0786, "step": 24713 }, { "epoch": 0.5445801451023815, "grad_norm": 0.48782798647880554, "learning_rate": 1.3565004392411265e-05, "loss": 0.0722, "step": 24714 }, { "epoch": 0.5446021803918977, "grad_norm": 1.017427682876587, "learning_rate": 1.3563938770906554e-05, "loss": 0.0752, "step": 24715 }, { "epoch": 0.5446242156814138, "grad_norm": 0.5653920769691467, "learning_rate": 1.356287315671647e-05, "loss": 0.0637, "step": 24716 }, { "epoch": 0.5446462509709299, "grad_norm": 0.5978363156318665, "learning_rate": 1.3561807549846446e-05, "loss": 0.0634, "step": 24717 }, { "epoch": 0.544668286260446, "grad_norm": 0.7916591763496399, "learning_rate": 1.3560741950301899e-05, "loss": 0.1083, "step": 24718 }, { "epoch": 0.5446903215499622, "grad_norm": 0.8099198341369629, "learning_rate": 1.3559676358088273e-05, "loss": 0.1175, "step": 24719 }, { "epoch": 0.5447123568394784, "grad_norm": 0.7233846187591553, "learning_rate": 1.3558610773210988e-05, "loss": 0.0815, "step": 24720 }, { "epoch": 0.5447343921289945, "grad_norm": 0.7478154301643372, "learning_rate": 1.3557545195675471e-05, "loss": 0.1041, "step": 24721 }, { "epoch": 0.5447564274185107, "grad_norm": 0.6940174102783203, "learning_rate": 1.3556479625487145e-05, "loss": 0.0783, "step": 24722 }, { "epoch": 0.5447784627080269, "grad_norm": 0.40458688139915466, "learning_rate": 1.3555414062651448e-05, "loss": 0.0739, "step": 24723 }, { "epoch": 0.544800497997543, "grad_norm": 0.6494733691215515, "learning_rate": 1.3554348507173801e-05, "loss": 0.0848, "step": 24724 }, { "epoch": 0.5448225332870592, "grad_norm": 0.5653416514396667, "learning_rate": 1.3553282959059624e-05, "loss": 0.046, "step": 24725 }, { "epoch": 0.5448445685765754, "grad_norm": 0.47306308150291443, "learning_rate": 1.3552217418314362e-05, "loss": 0.0635, "step": 24726 }, { "epoch": 0.5448666038660915, "grad_norm": 0.44117432832717896, "learning_rate": 1.3551151884943432e-05, "loss": 0.0987, "step": 24727 }, { "epoch": 0.5448886391556077, "grad_norm": 0.378818154335022, "learning_rate": 1.3550086358952262e-05, "loss": 0.0599, "step": 24728 }, { "epoch": 0.5449106744451239, "grad_norm": 0.48027098178863525, "learning_rate": 1.3549020840346276e-05, "loss": 0.0623, "step": 24729 }, { "epoch": 0.54493270973464, "grad_norm": 0.6309548020362854, "learning_rate": 1.354795532913091e-05, "loss": 0.0823, "step": 24730 }, { "epoch": 0.5449547450241562, "grad_norm": 0.6010926961898804, "learning_rate": 1.3546889825311584e-05, "loss": 0.0568, "step": 24731 }, { "epoch": 0.5449767803136724, "grad_norm": 0.5547261834144592, "learning_rate": 1.354582432889373e-05, "loss": 0.0606, "step": 24732 }, { "epoch": 0.5449988156031885, "grad_norm": 0.9485984444618225, "learning_rate": 1.3544758839882764e-05, "loss": 0.0611, "step": 24733 }, { "epoch": 0.5450208508927047, "grad_norm": 0.7450090646743774, "learning_rate": 1.3543693358284126e-05, "loss": 0.0934, "step": 24734 }, { "epoch": 0.5450428861822209, "grad_norm": 0.5495926737785339, "learning_rate": 1.3542627884103241e-05, "loss": 0.0626, "step": 24735 }, { "epoch": 0.545064921471737, "grad_norm": 0.5922898054122925, "learning_rate": 1.3541562417345536e-05, "loss": 0.0831, "step": 24736 }, { "epoch": 0.5450869567612532, "grad_norm": 0.5041552782058716, "learning_rate": 1.354049695801643e-05, "loss": 0.0762, "step": 24737 }, { "epoch": 0.5451089920507693, "grad_norm": 0.3390001356601715, "learning_rate": 1.3539431506121357e-05, "loss": 0.0712, "step": 24738 }, { "epoch": 0.5451310273402855, "grad_norm": 0.6634856462478638, "learning_rate": 1.3538366061665744e-05, "loss": 0.0755, "step": 24739 }, { "epoch": 0.5451530626298017, "grad_norm": 0.5946608185768127, "learning_rate": 1.3537300624655018e-05, "loss": 0.0683, "step": 24740 }, { "epoch": 0.5451750979193178, "grad_norm": 0.5763545632362366, "learning_rate": 1.3536235195094597e-05, "loss": 0.0837, "step": 24741 }, { "epoch": 0.5451971332088339, "grad_norm": 0.4810505211353302, "learning_rate": 1.3535169772989922e-05, "loss": 0.0805, "step": 24742 }, { "epoch": 0.5452191684983501, "grad_norm": 0.4943864643573761, "learning_rate": 1.3534104358346412e-05, "loss": 0.049, "step": 24743 }, { "epoch": 0.5452412037878662, "grad_norm": 0.9357994198799133, "learning_rate": 1.3533038951169492e-05, "loss": 0.0794, "step": 24744 }, { "epoch": 0.5452632390773824, "grad_norm": 0.6129265427589417, "learning_rate": 1.3531973551464593e-05, "loss": 0.0817, "step": 24745 }, { "epoch": 0.5452852743668986, "grad_norm": 0.6603384017944336, "learning_rate": 1.353090815923714e-05, "loss": 0.069, "step": 24746 }, { "epoch": 0.5453073096564147, "grad_norm": 0.551252007484436, "learning_rate": 1.3529842774492563e-05, "loss": 0.0771, "step": 24747 }, { "epoch": 0.5453293449459309, "grad_norm": 1.1206765174865723, "learning_rate": 1.3528777397236274e-05, "loss": 0.082, "step": 24748 }, { "epoch": 0.545351380235447, "grad_norm": 0.6025794744491577, "learning_rate": 1.3527712027473719e-05, "loss": 0.0856, "step": 24749 }, { "epoch": 0.5453734155249632, "grad_norm": 0.5905938744544983, "learning_rate": 1.3526646665210318e-05, "loss": 0.1045, "step": 24750 }, { "epoch": 0.5453954508144794, "grad_norm": 0.5120773911476135, "learning_rate": 1.3525581310451493e-05, "loss": 0.0451, "step": 24751 }, { "epoch": 0.5454174861039955, "grad_norm": 0.6067337393760681, "learning_rate": 1.352451596320267e-05, "loss": 0.0577, "step": 24752 }, { "epoch": 0.5454395213935117, "grad_norm": 0.6720982789993286, "learning_rate": 1.3523450623469284e-05, "loss": 0.0765, "step": 24753 }, { "epoch": 0.5454615566830279, "grad_norm": 0.7194512486457825, "learning_rate": 1.3522385291256755e-05, "loss": 0.0945, "step": 24754 }, { "epoch": 0.545483591972544, "grad_norm": 0.38280412554740906, "learning_rate": 1.3521319966570508e-05, "loss": 0.0688, "step": 24755 }, { "epoch": 0.5455056272620602, "grad_norm": 0.6576851010322571, "learning_rate": 1.3520254649415965e-05, "loss": 0.1126, "step": 24756 }, { "epoch": 0.5455276625515764, "grad_norm": 0.5634628534317017, "learning_rate": 1.3519189339798567e-05, "loss": 0.0632, "step": 24757 }, { "epoch": 0.5455496978410925, "grad_norm": 0.4733012020587921, "learning_rate": 1.3518124037723731e-05, "loss": 0.0726, "step": 24758 }, { "epoch": 0.5455717331306087, "grad_norm": 0.788215696811676, "learning_rate": 1.3517058743196884e-05, "loss": 0.0659, "step": 24759 }, { "epoch": 0.5455937684201249, "grad_norm": 0.5065005421638489, "learning_rate": 1.351599345622345e-05, "loss": 0.0468, "step": 24760 }, { "epoch": 0.545615803709641, "grad_norm": 0.6130496263504028, "learning_rate": 1.3514928176808858e-05, "loss": 0.0979, "step": 24761 }, { "epoch": 0.5456378389991572, "grad_norm": 0.576594352722168, "learning_rate": 1.3513862904958534e-05, "loss": 0.0699, "step": 24762 }, { "epoch": 0.5456598742886734, "grad_norm": 0.7489176392555237, "learning_rate": 1.3512797640677904e-05, "loss": 0.0716, "step": 24763 }, { "epoch": 0.5456819095781895, "grad_norm": 0.42491284012794495, "learning_rate": 1.3511732383972385e-05, "loss": 0.0448, "step": 24764 }, { "epoch": 0.5457039448677057, "grad_norm": 0.6557728052139282, "learning_rate": 1.351066713484742e-05, "loss": 0.1115, "step": 24765 }, { "epoch": 0.5457259801572218, "grad_norm": 0.5460827946662903, "learning_rate": 1.3509601893308424e-05, "loss": 0.066, "step": 24766 }, { "epoch": 0.5457480154467379, "grad_norm": 0.4346785545349121, "learning_rate": 1.3508536659360822e-05, "loss": 0.0665, "step": 24767 }, { "epoch": 0.5457700507362541, "grad_norm": 0.7947964072227478, "learning_rate": 1.3507471433010044e-05, "loss": 0.1091, "step": 24768 }, { "epoch": 0.5457920860257702, "grad_norm": 0.7287533283233643, "learning_rate": 1.3506406214261519e-05, "loss": 0.0765, "step": 24769 }, { "epoch": 0.5458141213152864, "grad_norm": 0.6421986818313599, "learning_rate": 1.3505341003120664e-05, "loss": 0.0799, "step": 24770 }, { "epoch": 0.5458361566048026, "grad_norm": 0.9728165864944458, "learning_rate": 1.3504275799592904e-05, "loss": 0.1026, "step": 24771 }, { "epoch": 0.5458581918943187, "grad_norm": 0.7472957968711853, "learning_rate": 1.3503210603683675e-05, "loss": 0.0917, "step": 24772 }, { "epoch": 0.5458802271838349, "grad_norm": 0.6503249406814575, "learning_rate": 1.3502145415398399e-05, "loss": 0.0918, "step": 24773 }, { "epoch": 0.545902262473351, "grad_norm": 0.26913711428642273, "learning_rate": 1.3501080234742495e-05, "loss": 0.0799, "step": 24774 }, { "epoch": 0.5459242977628672, "grad_norm": 0.4980553090572357, "learning_rate": 1.3500015061721394e-05, "loss": 0.0585, "step": 24775 }, { "epoch": 0.5459463330523834, "grad_norm": 0.6709175109863281, "learning_rate": 1.3498949896340524e-05, "loss": 0.0964, "step": 24776 }, { "epoch": 0.5459683683418995, "grad_norm": 0.5390610098838806, "learning_rate": 1.3497884738605306e-05, "loss": 0.0943, "step": 24777 }, { "epoch": 0.5459904036314157, "grad_norm": 0.6890648603439331, "learning_rate": 1.3496819588521166e-05, "loss": 0.0676, "step": 24778 }, { "epoch": 0.5460124389209319, "grad_norm": 0.8657287359237671, "learning_rate": 1.3495754446093523e-05, "loss": 0.0889, "step": 24779 }, { "epoch": 0.546034474210448, "grad_norm": 0.5062177777290344, "learning_rate": 1.3494689311327817e-05, "loss": 0.0756, "step": 24780 }, { "epoch": 0.5460565094999642, "grad_norm": 0.8798251748085022, "learning_rate": 1.3493624184229466e-05, "loss": 0.1147, "step": 24781 }, { "epoch": 0.5460785447894804, "grad_norm": 0.5955325961112976, "learning_rate": 1.3492559064803894e-05, "loss": 0.0517, "step": 24782 }, { "epoch": 0.5461005800789965, "grad_norm": 0.7303141951560974, "learning_rate": 1.3491493953056524e-05, "loss": 0.0757, "step": 24783 }, { "epoch": 0.5461226153685127, "grad_norm": 0.6031830310821533, "learning_rate": 1.3490428848992787e-05, "loss": 0.0833, "step": 24784 }, { "epoch": 0.5461446506580289, "grad_norm": 0.6539721488952637, "learning_rate": 1.3489363752618105e-05, "loss": 0.0979, "step": 24785 }, { "epoch": 0.546166685947545, "grad_norm": 0.6844163537025452, "learning_rate": 1.3488298663937904e-05, "loss": 0.0789, "step": 24786 }, { "epoch": 0.5461887212370612, "grad_norm": 0.7839064002037048, "learning_rate": 1.3487233582957604e-05, "loss": 0.097, "step": 24787 }, { "epoch": 0.5462107565265774, "grad_norm": 1.0169581174850464, "learning_rate": 1.3486168509682639e-05, "loss": 0.0894, "step": 24788 }, { "epoch": 0.5462327918160935, "grad_norm": 0.659435510635376, "learning_rate": 1.3485103444118428e-05, "loss": 0.0661, "step": 24789 }, { "epoch": 0.5462548271056097, "grad_norm": 0.5575535297393799, "learning_rate": 1.3484038386270393e-05, "loss": 0.0779, "step": 24790 }, { "epoch": 0.5462768623951257, "grad_norm": 0.3421579599380493, "learning_rate": 1.3482973336143969e-05, "loss": 0.0493, "step": 24791 }, { "epoch": 0.5462988976846419, "grad_norm": 0.7531158328056335, "learning_rate": 1.3481908293744576e-05, "loss": 0.0761, "step": 24792 }, { "epoch": 0.5463209329741581, "grad_norm": 0.7051538825035095, "learning_rate": 1.348084325907764e-05, "loss": 0.0507, "step": 24793 }, { "epoch": 0.5463429682636742, "grad_norm": 0.43546319007873535, "learning_rate": 1.3479778232148579e-05, "loss": 0.067, "step": 24794 }, { "epoch": 0.5463650035531904, "grad_norm": 0.47355297207832336, "learning_rate": 1.3478713212962825e-05, "loss": 0.07, "step": 24795 }, { "epoch": 0.5463870388427066, "grad_norm": 0.5407288074493408, "learning_rate": 1.3477648201525802e-05, "loss": 0.081, "step": 24796 }, { "epoch": 0.5464090741322227, "grad_norm": 0.558255136013031, "learning_rate": 1.3476583197842931e-05, "loss": 0.0587, "step": 24797 }, { "epoch": 0.5464311094217389, "grad_norm": 0.6748467087745667, "learning_rate": 1.3475518201919633e-05, "loss": 0.0577, "step": 24798 }, { "epoch": 0.5464531447112551, "grad_norm": 0.9663577079772949, "learning_rate": 1.3474453213761345e-05, "loss": 0.0922, "step": 24799 }, { "epoch": 0.5464751800007712, "grad_norm": 0.326479971408844, "learning_rate": 1.3473388233373485e-05, "loss": 0.077, "step": 24800 }, { "epoch": 0.5464972152902874, "grad_norm": 0.6395024061203003, "learning_rate": 1.3472323260761477e-05, "loss": 0.0591, "step": 24801 }, { "epoch": 0.5465192505798036, "grad_norm": 0.798431396484375, "learning_rate": 1.3471258295930743e-05, "loss": 0.0784, "step": 24802 }, { "epoch": 0.5465412858693197, "grad_norm": 0.8143096566200256, "learning_rate": 1.3470193338886714e-05, "loss": 0.0982, "step": 24803 }, { "epoch": 0.5465633211588359, "grad_norm": 0.6451351046562195, "learning_rate": 1.3469128389634811e-05, "loss": 0.1039, "step": 24804 }, { "epoch": 0.546585356448352, "grad_norm": 0.6765217781066895, "learning_rate": 1.3468063448180456e-05, "loss": 0.0534, "step": 24805 }, { "epoch": 0.5466073917378682, "grad_norm": 0.6007091403007507, "learning_rate": 1.3466998514529069e-05, "loss": 0.0999, "step": 24806 }, { "epoch": 0.5466294270273844, "grad_norm": 0.4813441336154938, "learning_rate": 1.346593358868609e-05, "loss": 0.0654, "step": 24807 }, { "epoch": 0.5466514623169005, "grad_norm": 0.8446833491325378, "learning_rate": 1.3464868670656933e-05, "loss": 0.0974, "step": 24808 }, { "epoch": 0.5466734976064167, "grad_norm": 0.5623347163200378, "learning_rate": 1.346380376044702e-05, "loss": 0.0697, "step": 24809 }, { "epoch": 0.5466955328959329, "grad_norm": 0.5592586994171143, "learning_rate": 1.3462738858061778e-05, "loss": 0.0665, "step": 24810 }, { "epoch": 0.546717568185449, "grad_norm": 0.6547972559928894, "learning_rate": 1.3461673963506633e-05, "loss": 0.0989, "step": 24811 }, { "epoch": 0.5467396034749652, "grad_norm": 0.792656421661377, "learning_rate": 1.3460609076787009e-05, "loss": 0.0891, "step": 24812 }, { "epoch": 0.5467616387644814, "grad_norm": 0.5347294807434082, "learning_rate": 1.345954419790832e-05, "loss": 0.0805, "step": 24813 }, { "epoch": 0.5467836740539975, "grad_norm": 0.7665225863456726, "learning_rate": 1.3458479326876005e-05, "loss": 0.1174, "step": 24814 }, { "epoch": 0.5468057093435137, "grad_norm": 0.4394109845161438, "learning_rate": 1.3457414463695484e-05, "loss": 0.0591, "step": 24815 }, { "epoch": 0.5468277446330297, "grad_norm": 0.6233540177345276, "learning_rate": 1.3456349608372176e-05, "loss": 0.0758, "step": 24816 }, { "epoch": 0.5468497799225459, "grad_norm": 0.6898427605628967, "learning_rate": 1.3455284760911505e-05, "loss": 0.0611, "step": 24817 }, { "epoch": 0.5468718152120621, "grad_norm": 0.5045852661132812, "learning_rate": 1.34542199213189e-05, "loss": 0.069, "step": 24818 }, { "epoch": 0.5468938505015782, "grad_norm": 0.5249772071838379, "learning_rate": 1.3453155089599783e-05, "loss": 0.0741, "step": 24819 }, { "epoch": 0.5469158857910944, "grad_norm": 0.4739348888397217, "learning_rate": 1.3452090265759574e-05, "loss": 0.0644, "step": 24820 }, { "epoch": 0.5469379210806106, "grad_norm": 0.9291877746582031, "learning_rate": 1.3451025449803696e-05, "loss": 0.0756, "step": 24821 }, { "epoch": 0.5469599563701267, "grad_norm": 0.6078941822052002, "learning_rate": 1.3449960641737581e-05, "loss": 0.0757, "step": 24822 }, { "epoch": 0.5469819916596429, "grad_norm": 0.6149871349334717, "learning_rate": 1.3448895841566646e-05, "loss": 0.0694, "step": 24823 }, { "epoch": 0.5470040269491591, "grad_norm": 0.6883177757263184, "learning_rate": 1.3447831049296318e-05, "loss": 0.0784, "step": 24824 }, { "epoch": 0.5470260622386752, "grad_norm": 0.4062272310256958, "learning_rate": 1.3446766264932014e-05, "loss": 0.0776, "step": 24825 }, { "epoch": 0.5470480975281914, "grad_norm": 0.7651819586753845, "learning_rate": 1.3445701488479168e-05, "loss": 0.1021, "step": 24826 }, { "epoch": 0.5470701328177076, "grad_norm": 0.9485141038894653, "learning_rate": 1.3444636719943197e-05, "loss": 0.0941, "step": 24827 }, { "epoch": 0.5470921681072237, "grad_norm": 0.4987960755825043, "learning_rate": 1.3443571959329525e-05, "loss": 0.0487, "step": 24828 }, { "epoch": 0.5471142033967399, "grad_norm": 1.0976412296295166, "learning_rate": 1.3442507206643569e-05, "loss": 0.1261, "step": 24829 }, { "epoch": 0.547136238686256, "grad_norm": 0.37330010533332825, "learning_rate": 1.3441442461890766e-05, "loss": 0.0481, "step": 24830 }, { "epoch": 0.5471582739757722, "grad_norm": 1.0022984743118286, "learning_rate": 1.3440377725076532e-05, "loss": 0.0883, "step": 24831 }, { "epoch": 0.5471803092652884, "grad_norm": 1.2354085445404053, "learning_rate": 1.343931299620629e-05, "loss": 0.0868, "step": 24832 }, { "epoch": 0.5472023445548045, "grad_norm": 0.5230719447135925, "learning_rate": 1.343824827528546e-05, "loss": 0.0456, "step": 24833 }, { "epoch": 0.5472243798443207, "grad_norm": 0.6120073199272156, "learning_rate": 1.3437183562319474e-05, "loss": 0.0584, "step": 24834 }, { "epoch": 0.5472464151338369, "grad_norm": 0.716549813747406, "learning_rate": 1.343611885731375e-05, "loss": 0.0805, "step": 24835 }, { "epoch": 0.547268450423353, "grad_norm": 0.6175405383110046, "learning_rate": 1.3435054160273704e-05, "loss": 0.0897, "step": 24836 }, { "epoch": 0.5472904857128692, "grad_norm": 0.678442120552063, "learning_rate": 1.3433989471204773e-05, "loss": 0.0707, "step": 24837 }, { "epoch": 0.5473125210023854, "grad_norm": 0.3393581509590149, "learning_rate": 1.3432924790112373e-05, "loss": 0.0864, "step": 24838 }, { "epoch": 0.5473345562919015, "grad_norm": 0.8710468411445618, "learning_rate": 1.3431860117001926e-05, "loss": 0.0749, "step": 24839 }, { "epoch": 0.5473565915814177, "grad_norm": 0.37627485394477844, "learning_rate": 1.3430795451878855e-05, "loss": 0.0604, "step": 24840 }, { "epoch": 0.5473786268709337, "grad_norm": 0.512991726398468, "learning_rate": 1.342973079474859e-05, "loss": 0.0432, "step": 24841 }, { "epoch": 0.5474006621604499, "grad_norm": 0.6860986351966858, "learning_rate": 1.3428666145616544e-05, "loss": 0.0774, "step": 24842 }, { "epoch": 0.5474226974499661, "grad_norm": 0.6665167212486267, "learning_rate": 1.3427601504488145e-05, "loss": 0.0651, "step": 24843 }, { "epoch": 0.5474447327394822, "grad_norm": 0.44231945276260376, "learning_rate": 1.3426536871368808e-05, "loss": 0.0645, "step": 24844 }, { "epoch": 0.5474667680289984, "grad_norm": 0.8484863042831421, "learning_rate": 1.3425472246263971e-05, "loss": 0.0815, "step": 24845 }, { "epoch": 0.5474888033185146, "grad_norm": 1.342897653579712, "learning_rate": 1.3424407629179047e-05, "loss": 0.0757, "step": 24846 }, { "epoch": 0.5475108386080307, "grad_norm": 0.5932536721229553, "learning_rate": 1.342334302011946e-05, "loss": 0.1029, "step": 24847 }, { "epoch": 0.5475328738975469, "grad_norm": 0.5669799447059631, "learning_rate": 1.3422278419090628e-05, "loss": 0.0774, "step": 24848 }, { "epoch": 0.5475549091870631, "grad_norm": 0.6473957300186157, "learning_rate": 1.3421213826097983e-05, "loss": 0.0878, "step": 24849 }, { "epoch": 0.5475769444765792, "grad_norm": 0.5908703804016113, "learning_rate": 1.342014924114694e-05, "loss": 0.0429, "step": 24850 }, { "epoch": 0.5475989797660954, "grad_norm": 0.6480099558830261, "learning_rate": 1.3419084664242927e-05, "loss": 0.0635, "step": 24851 }, { "epoch": 0.5476210150556116, "grad_norm": 0.6543007493019104, "learning_rate": 1.3418020095391355e-05, "loss": 0.0534, "step": 24852 }, { "epoch": 0.5476430503451277, "grad_norm": 1.299362063407898, "learning_rate": 1.3416955534597663e-05, "loss": 0.0804, "step": 24853 }, { "epoch": 0.5476650856346439, "grad_norm": 0.24319450557231903, "learning_rate": 1.3415890981867264e-05, "loss": 0.0424, "step": 24854 }, { "epoch": 0.54768712092416, "grad_norm": 0.8894132971763611, "learning_rate": 1.3414826437205583e-05, "loss": 0.0781, "step": 24855 }, { "epoch": 0.5477091562136762, "grad_norm": 0.48294252157211304, "learning_rate": 1.3413761900618036e-05, "loss": 0.104, "step": 24856 }, { "epoch": 0.5477311915031924, "grad_norm": 0.9289728403091431, "learning_rate": 1.3412697372110056e-05, "loss": 0.0823, "step": 24857 }, { "epoch": 0.5477532267927085, "grad_norm": 0.6146622896194458, "learning_rate": 1.3411632851687058e-05, "loss": 0.066, "step": 24858 }, { "epoch": 0.5477752620822247, "grad_norm": 0.5773467421531677, "learning_rate": 1.341056833935446e-05, "loss": 0.1013, "step": 24859 }, { "epoch": 0.5477972973717409, "grad_norm": 0.8514646291732788, "learning_rate": 1.3409503835117693e-05, "loss": 0.0714, "step": 24860 }, { "epoch": 0.547819332661257, "grad_norm": 0.4449460804462433, "learning_rate": 1.3408439338982181e-05, "loss": 0.0673, "step": 24861 }, { "epoch": 0.5478413679507732, "grad_norm": 0.9232749938964844, "learning_rate": 1.3407374850953336e-05, "loss": 0.0777, "step": 24862 }, { "epoch": 0.5478634032402894, "grad_norm": 0.7435095310211182, "learning_rate": 1.3406310371036584e-05, "loss": 0.0736, "step": 24863 }, { "epoch": 0.5478854385298055, "grad_norm": 0.4600948095321655, "learning_rate": 1.3405245899237351e-05, "loss": 0.0769, "step": 24864 }, { "epoch": 0.5479074738193216, "grad_norm": 0.5526705384254456, "learning_rate": 1.3404181435561056e-05, "loss": 0.0771, "step": 24865 }, { "epoch": 0.5479295091088378, "grad_norm": 0.7148650288581848, "learning_rate": 1.3403116980013123e-05, "loss": 0.0948, "step": 24866 }, { "epoch": 0.5479515443983539, "grad_norm": 0.5746528506278992, "learning_rate": 1.3402052532598961e-05, "loss": 0.0859, "step": 24867 }, { "epoch": 0.5479735796878701, "grad_norm": 0.8461405038833618, "learning_rate": 1.3400988093324011e-05, "loss": 0.0459, "step": 24868 }, { "epoch": 0.5479956149773862, "grad_norm": 0.579569399356842, "learning_rate": 1.3399923662193685e-05, "loss": 0.0763, "step": 24869 }, { "epoch": 0.5480176502669024, "grad_norm": 0.6542734503746033, "learning_rate": 1.3398859239213406e-05, "loss": 0.0942, "step": 24870 }, { "epoch": 0.5480396855564186, "grad_norm": 0.8072529435157776, "learning_rate": 1.3397794824388593e-05, "loss": 0.0656, "step": 24871 }, { "epoch": 0.5480617208459347, "grad_norm": 0.5454979538917542, "learning_rate": 1.3396730417724674e-05, "loss": 0.0863, "step": 24872 }, { "epoch": 0.5480837561354509, "grad_norm": 0.80629962682724, "learning_rate": 1.3395666019227065e-05, "loss": 0.0633, "step": 24873 }, { "epoch": 0.5481057914249671, "grad_norm": 0.7470710873603821, "learning_rate": 1.339460162890119e-05, "loss": 0.0706, "step": 24874 }, { "epoch": 0.5481278267144832, "grad_norm": 0.9048532247543335, "learning_rate": 1.3393537246752464e-05, "loss": 0.0746, "step": 24875 }, { "epoch": 0.5481498620039994, "grad_norm": 0.7782913446426392, "learning_rate": 1.339247287278632e-05, "loss": 0.0803, "step": 24876 }, { "epoch": 0.5481718972935156, "grad_norm": 0.43095946311950684, "learning_rate": 1.3391408507008173e-05, "loss": 0.0751, "step": 24877 }, { "epoch": 0.5481939325830317, "grad_norm": 0.5689815878868103, "learning_rate": 1.3390344149423444e-05, "loss": 0.07, "step": 24878 }, { "epoch": 0.5482159678725479, "grad_norm": 1.0007209777832031, "learning_rate": 1.3389279800037555e-05, "loss": 0.0748, "step": 24879 }, { "epoch": 0.5482380031620641, "grad_norm": 0.6799044609069824, "learning_rate": 1.3388215458855931e-05, "loss": 0.0783, "step": 24880 }, { "epoch": 0.5482600384515802, "grad_norm": 0.2929142117500305, "learning_rate": 1.3387151125883989e-05, "loss": 0.0921, "step": 24881 }, { "epoch": 0.5482820737410964, "grad_norm": 0.6798421144485474, "learning_rate": 1.3386086801127145e-05, "loss": 0.066, "step": 24882 }, { "epoch": 0.5483041090306126, "grad_norm": 0.7310482859611511, "learning_rate": 1.338502248459083e-05, "loss": 0.0861, "step": 24883 }, { "epoch": 0.5483261443201287, "grad_norm": 0.8276525735855103, "learning_rate": 1.3383958176280466e-05, "loss": 0.0967, "step": 24884 }, { "epoch": 0.5483481796096449, "grad_norm": 4.366606712341309, "learning_rate": 1.3382893876201468e-05, "loss": 0.0848, "step": 24885 }, { "epoch": 0.548370214899161, "grad_norm": 0.48837369680404663, "learning_rate": 1.3381829584359256e-05, "loss": 0.0724, "step": 24886 }, { "epoch": 0.5483922501886772, "grad_norm": 0.8448359370231628, "learning_rate": 1.3380765300759253e-05, "loss": 0.0776, "step": 24887 }, { "epoch": 0.5484142854781934, "grad_norm": 0.672981858253479, "learning_rate": 1.3379701025406885e-05, "loss": 0.0646, "step": 24888 }, { "epoch": 0.5484363207677095, "grad_norm": 0.7169055342674255, "learning_rate": 1.3378636758307567e-05, "loss": 0.0552, "step": 24889 }, { "epoch": 0.5484583560572256, "grad_norm": 0.7630279064178467, "learning_rate": 1.3377572499466715e-05, "loss": 0.0626, "step": 24890 }, { "epoch": 0.5484803913467418, "grad_norm": 0.7655586004257202, "learning_rate": 1.3376508248889762e-05, "loss": 0.0913, "step": 24891 }, { "epoch": 0.5485024266362579, "grad_norm": 0.7473529577255249, "learning_rate": 1.3375444006582123e-05, "loss": 0.0628, "step": 24892 }, { "epoch": 0.5485244619257741, "grad_norm": 0.6433458924293518, "learning_rate": 1.3374379772549221e-05, "loss": 0.0649, "step": 24893 }, { "epoch": 0.5485464972152903, "grad_norm": 0.7086083292961121, "learning_rate": 1.3373315546796467e-05, "loss": 0.0746, "step": 24894 }, { "epoch": 0.5485685325048064, "grad_norm": 0.477281391620636, "learning_rate": 1.3372251329329298e-05, "loss": 0.0534, "step": 24895 }, { "epoch": 0.5485905677943226, "grad_norm": 0.7829447984695435, "learning_rate": 1.3371187120153124e-05, "loss": 0.0802, "step": 24896 }, { "epoch": 0.5486126030838387, "grad_norm": 0.4906426966190338, "learning_rate": 1.3370122919273366e-05, "loss": 0.0823, "step": 24897 }, { "epoch": 0.5486346383733549, "grad_norm": 0.6630672812461853, "learning_rate": 1.336905872669544e-05, "loss": 0.068, "step": 24898 }, { "epoch": 0.5486566736628711, "grad_norm": 0.6400968432426453, "learning_rate": 1.336799454242478e-05, "loss": 0.0884, "step": 24899 }, { "epoch": 0.5486787089523872, "grad_norm": 0.3108188509941101, "learning_rate": 1.3366930366466798e-05, "loss": 0.0665, "step": 24900 }, { "epoch": 0.5487007442419034, "grad_norm": 0.5241563320159912, "learning_rate": 1.3365866198826909e-05, "loss": 0.084, "step": 24901 }, { "epoch": 0.5487227795314196, "grad_norm": 0.76723712682724, "learning_rate": 1.3364802039510546e-05, "loss": 0.0594, "step": 24902 }, { "epoch": 0.5487448148209357, "grad_norm": 0.47228890657424927, "learning_rate": 1.3363737888523125e-05, "loss": 0.0984, "step": 24903 }, { "epoch": 0.5487668501104519, "grad_norm": 0.5061798691749573, "learning_rate": 1.3362673745870062e-05, "loss": 0.0498, "step": 24904 }, { "epoch": 0.5487888853999681, "grad_norm": 0.6732307076454163, "learning_rate": 1.3361609611556775e-05, "loss": 0.0797, "step": 24905 }, { "epoch": 0.5488109206894842, "grad_norm": 0.5271602272987366, "learning_rate": 1.3360545485588695e-05, "loss": 0.1272, "step": 24906 }, { "epoch": 0.5488329559790004, "grad_norm": 0.6907035112380981, "learning_rate": 1.3359481367971236e-05, "loss": 0.0621, "step": 24907 }, { "epoch": 0.5488549912685166, "grad_norm": 0.792349100112915, "learning_rate": 1.3358417258709817e-05, "loss": 0.0635, "step": 24908 }, { "epoch": 0.5488770265580327, "grad_norm": 0.5827834010124207, "learning_rate": 1.3357353157809853e-05, "loss": 0.0448, "step": 24909 }, { "epoch": 0.5488990618475489, "grad_norm": 0.681756854057312, "learning_rate": 1.3356289065276776e-05, "loss": 0.0755, "step": 24910 }, { "epoch": 0.548921097137065, "grad_norm": 0.4703383445739746, "learning_rate": 1.3355224981116002e-05, "loss": 0.0605, "step": 24911 }, { "epoch": 0.5489431324265812, "grad_norm": 0.7040212154388428, "learning_rate": 1.3354160905332948e-05, "loss": 0.1036, "step": 24912 }, { "epoch": 0.5489651677160974, "grad_norm": 0.7142643928527832, "learning_rate": 1.3353096837933032e-05, "loss": 0.0563, "step": 24913 }, { "epoch": 0.5489872030056135, "grad_norm": 0.587863564491272, "learning_rate": 1.3352032778921679e-05, "loss": 0.0773, "step": 24914 }, { "epoch": 0.5490092382951296, "grad_norm": 0.6223209500312805, "learning_rate": 1.3350968728304307e-05, "loss": 0.0577, "step": 24915 }, { "epoch": 0.5490312735846458, "grad_norm": 0.8123850226402283, "learning_rate": 1.3349904686086337e-05, "loss": 0.075, "step": 24916 }, { "epoch": 0.5490533088741619, "grad_norm": 0.5006393790245056, "learning_rate": 1.334884065227318e-05, "loss": 0.0613, "step": 24917 }, { "epoch": 0.5490753441636781, "grad_norm": 0.642633855342865, "learning_rate": 1.3347776626870269e-05, "loss": 0.0778, "step": 24918 }, { "epoch": 0.5490973794531943, "grad_norm": 0.5991992354393005, "learning_rate": 1.334671260988302e-05, "loss": 0.065, "step": 24919 }, { "epoch": 0.5491194147427104, "grad_norm": 0.6960445046424866, "learning_rate": 1.3345648601316847e-05, "loss": 0.1001, "step": 24920 }, { "epoch": 0.5491414500322266, "grad_norm": 0.8615541458129883, "learning_rate": 1.3344584601177171e-05, "loss": 0.0824, "step": 24921 }, { "epoch": 0.5491634853217428, "grad_norm": 0.6219335794448853, "learning_rate": 1.3343520609469418e-05, "loss": 0.0667, "step": 24922 }, { "epoch": 0.5491855206112589, "grad_norm": 0.7756834030151367, "learning_rate": 1.3342456626199e-05, "loss": 0.1322, "step": 24923 }, { "epoch": 0.5492075559007751, "grad_norm": 0.7180185914039612, "learning_rate": 1.3341392651371335e-05, "loss": 0.0837, "step": 24924 }, { "epoch": 0.5492295911902912, "grad_norm": 0.5168980956077576, "learning_rate": 1.3340328684991851e-05, "loss": 0.0736, "step": 24925 }, { "epoch": 0.5492516264798074, "grad_norm": 0.828825831413269, "learning_rate": 1.3339264727065964e-05, "loss": 0.1051, "step": 24926 }, { "epoch": 0.5492736617693236, "grad_norm": 0.8914805054664612, "learning_rate": 1.3338200777599096e-05, "loss": 0.0756, "step": 24927 }, { "epoch": 0.5492956970588397, "grad_norm": 0.5529322028160095, "learning_rate": 1.3337136836596654e-05, "loss": 0.0734, "step": 24928 }, { "epoch": 0.5493177323483559, "grad_norm": 0.6578320264816284, "learning_rate": 1.3336072904064071e-05, "loss": 0.0668, "step": 24929 }, { "epoch": 0.5493397676378721, "grad_norm": 0.8396787047386169, "learning_rate": 1.3335008980006761e-05, "loss": 0.0445, "step": 24930 }, { "epoch": 0.5493618029273882, "grad_norm": 0.8209436535835266, "learning_rate": 1.3333945064430145e-05, "loss": 0.1177, "step": 24931 }, { "epoch": 0.5493838382169044, "grad_norm": 1.115821361541748, "learning_rate": 1.333288115733963e-05, "loss": 0.0533, "step": 24932 }, { "epoch": 0.5494058735064206, "grad_norm": 1.0484369993209839, "learning_rate": 1.3331817258740652e-05, "loss": 0.0949, "step": 24933 }, { "epoch": 0.5494279087959367, "grad_norm": 0.4586401581764221, "learning_rate": 1.3330753368638626e-05, "loss": 0.0689, "step": 24934 }, { "epoch": 0.5494499440854529, "grad_norm": 0.9785701036453247, "learning_rate": 1.3329689487038968e-05, "loss": 0.1012, "step": 24935 }, { "epoch": 0.549471979374969, "grad_norm": 0.5139539837837219, "learning_rate": 1.3328625613947091e-05, "loss": 0.0633, "step": 24936 }, { "epoch": 0.5494940146644852, "grad_norm": 0.48650509119033813, "learning_rate": 1.3327561749368426e-05, "loss": 0.0718, "step": 24937 }, { "epoch": 0.5495160499540014, "grad_norm": 0.6278865337371826, "learning_rate": 1.3326497893308385e-05, "loss": 0.0935, "step": 24938 }, { "epoch": 0.5495380852435174, "grad_norm": 0.5867575407028198, "learning_rate": 1.3325434045772387e-05, "loss": 0.0922, "step": 24939 }, { "epoch": 0.5495601205330336, "grad_norm": 0.6375458836555481, "learning_rate": 1.3324370206765847e-05, "loss": 0.0539, "step": 24940 }, { "epoch": 0.5495821558225498, "grad_norm": 0.7877240180969238, "learning_rate": 1.3323306376294193e-05, "loss": 0.0705, "step": 24941 }, { "epoch": 0.5496041911120659, "grad_norm": 0.49096226692199707, "learning_rate": 1.3322242554362838e-05, "loss": 0.0681, "step": 24942 }, { "epoch": 0.5496262264015821, "grad_norm": 0.4596295654773712, "learning_rate": 1.3321178740977202e-05, "loss": 0.0731, "step": 24943 }, { "epoch": 0.5496482616910983, "grad_norm": 0.6375166773796082, "learning_rate": 1.33201149361427e-05, "loss": 0.0782, "step": 24944 }, { "epoch": 0.5496702969806144, "grad_norm": 0.8087726831436157, "learning_rate": 1.3319051139864757e-05, "loss": 0.0881, "step": 24945 }, { "epoch": 0.5496923322701306, "grad_norm": 0.904382050037384, "learning_rate": 1.3317987352148787e-05, "loss": 0.1025, "step": 24946 }, { "epoch": 0.5497143675596468, "grad_norm": 0.48953530192375183, "learning_rate": 1.3316923573000203e-05, "loss": 0.0924, "step": 24947 }, { "epoch": 0.5497364028491629, "grad_norm": 0.38067442178726196, "learning_rate": 1.3315859802424437e-05, "loss": 0.0744, "step": 24948 }, { "epoch": 0.5497584381386791, "grad_norm": 0.4271077513694763, "learning_rate": 1.33147960404269e-05, "loss": 0.054, "step": 24949 }, { "epoch": 0.5497804734281952, "grad_norm": 0.6720245480537415, "learning_rate": 1.3313732287013011e-05, "loss": 0.079, "step": 24950 }, { "epoch": 0.5498025087177114, "grad_norm": 0.7230595350265503, "learning_rate": 1.3312668542188183e-05, "loss": 0.0666, "step": 24951 }, { "epoch": 0.5498245440072276, "grad_norm": 0.6098372340202332, "learning_rate": 1.3311604805957842e-05, "loss": 0.08, "step": 24952 }, { "epoch": 0.5498465792967437, "grad_norm": 0.46810075640678406, "learning_rate": 1.3310541078327405e-05, "loss": 0.0773, "step": 24953 }, { "epoch": 0.5498686145862599, "grad_norm": 0.743399441242218, "learning_rate": 1.3309477359302287e-05, "loss": 0.0759, "step": 24954 }, { "epoch": 0.5498906498757761, "grad_norm": 0.637317419052124, "learning_rate": 1.33084136488879e-05, "loss": 0.1069, "step": 24955 }, { "epoch": 0.5499126851652922, "grad_norm": 0.5800350308418274, "learning_rate": 1.3307349947089678e-05, "loss": 0.0764, "step": 24956 }, { "epoch": 0.5499347204548084, "grad_norm": 1.3166961669921875, "learning_rate": 1.3306286253913029e-05, "loss": 0.0685, "step": 24957 }, { "epoch": 0.5499567557443246, "grad_norm": 0.6003263592720032, "learning_rate": 1.3305222569363373e-05, "loss": 0.0394, "step": 24958 }, { "epoch": 0.5499787910338407, "grad_norm": 0.8027743697166443, "learning_rate": 1.3304158893446123e-05, "loss": 0.0812, "step": 24959 }, { "epoch": 0.5500008263233569, "grad_norm": 0.6023247241973877, "learning_rate": 1.3303095226166705e-05, "loss": 0.0932, "step": 24960 }, { "epoch": 0.5500228616128731, "grad_norm": 0.5076144933700562, "learning_rate": 1.3302031567530534e-05, "loss": 0.0717, "step": 24961 }, { "epoch": 0.5500448969023892, "grad_norm": 0.6847483515739441, "learning_rate": 1.3300967917543024e-05, "loss": 0.0914, "step": 24962 }, { "epoch": 0.5500669321919054, "grad_norm": 0.6090415716171265, "learning_rate": 1.3299904276209593e-05, "loss": 0.0664, "step": 24963 }, { "epoch": 0.5500889674814214, "grad_norm": 0.7602853775024414, "learning_rate": 1.3298840643535665e-05, "loss": 0.0663, "step": 24964 }, { "epoch": 0.5501110027709376, "grad_norm": 0.5213527083396912, "learning_rate": 1.3297777019526654e-05, "loss": 0.0433, "step": 24965 }, { "epoch": 0.5501330380604538, "grad_norm": 0.8255929350852966, "learning_rate": 1.3296713404187978e-05, "loss": 0.0599, "step": 24966 }, { "epoch": 0.5501550733499699, "grad_norm": 0.6510791778564453, "learning_rate": 1.3295649797525052e-05, "loss": 0.0951, "step": 24967 }, { "epoch": 0.5501771086394861, "grad_norm": 0.49088940024375916, "learning_rate": 1.3294586199543295e-05, "loss": 0.0648, "step": 24968 }, { "epoch": 0.5501991439290023, "grad_norm": 0.5426775813102722, "learning_rate": 1.3293522610248127e-05, "loss": 0.0781, "step": 24969 }, { "epoch": 0.5502211792185184, "grad_norm": 0.5873098373413086, "learning_rate": 1.3292459029644959e-05, "loss": 0.0595, "step": 24970 }, { "epoch": 0.5502432145080346, "grad_norm": 0.4112757444381714, "learning_rate": 1.3291395457739218e-05, "loss": 0.0703, "step": 24971 }, { "epoch": 0.5502652497975508, "grad_norm": 0.7693429589271545, "learning_rate": 1.3290331894536314e-05, "loss": 0.0869, "step": 24972 }, { "epoch": 0.5502872850870669, "grad_norm": 0.5283950567245483, "learning_rate": 1.3289268340041667e-05, "loss": 0.0822, "step": 24973 }, { "epoch": 0.5503093203765831, "grad_norm": 0.6247841119766235, "learning_rate": 1.3288204794260693e-05, "loss": 0.0464, "step": 24974 }, { "epoch": 0.5503313556660993, "grad_norm": 0.6800105571746826, "learning_rate": 1.3287141257198812e-05, "loss": 0.0726, "step": 24975 }, { "epoch": 0.5503533909556154, "grad_norm": 0.6377047300338745, "learning_rate": 1.3286077728861438e-05, "loss": 0.0493, "step": 24976 }, { "epoch": 0.5503754262451316, "grad_norm": 0.807949423789978, "learning_rate": 1.3285014209253991e-05, "loss": 0.068, "step": 24977 }, { "epoch": 0.5503974615346477, "grad_norm": 0.5428268909454346, "learning_rate": 1.3283950698381879e-05, "loss": 0.0817, "step": 24978 }, { "epoch": 0.5504194968241639, "grad_norm": 0.419806569814682, "learning_rate": 1.3282887196250531e-05, "loss": 0.0353, "step": 24979 }, { "epoch": 0.5504415321136801, "grad_norm": 0.4545651376247406, "learning_rate": 1.3281823702865363e-05, "loss": 0.0692, "step": 24980 }, { "epoch": 0.5504635674031962, "grad_norm": 0.49800795316696167, "learning_rate": 1.3280760218231785e-05, "loss": 0.0813, "step": 24981 }, { "epoch": 0.5504856026927124, "grad_norm": 0.7726523280143738, "learning_rate": 1.3279696742355216e-05, "loss": 0.0761, "step": 24982 }, { "epoch": 0.5505076379822286, "grad_norm": 0.25913238525390625, "learning_rate": 1.3278633275241075e-05, "loss": 0.0553, "step": 24983 }, { "epoch": 0.5505296732717447, "grad_norm": 0.42712709307670593, "learning_rate": 1.327756981689478e-05, "loss": 0.0624, "step": 24984 }, { "epoch": 0.5505517085612609, "grad_norm": 1.0239561796188354, "learning_rate": 1.3276506367321745e-05, "loss": 0.0895, "step": 24985 }, { "epoch": 0.5505737438507771, "grad_norm": 0.4391905963420868, "learning_rate": 1.3275442926527379e-05, "loss": 0.0659, "step": 24986 }, { "epoch": 0.5505957791402932, "grad_norm": 0.4601622521877289, "learning_rate": 1.3274379494517117e-05, "loss": 0.0352, "step": 24987 }, { "epoch": 0.5506178144298094, "grad_norm": 0.4590438902378082, "learning_rate": 1.3273316071296363e-05, "loss": 0.071, "step": 24988 }, { "epoch": 0.5506398497193254, "grad_norm": 0.6050764918327332, "learning_rate": 1.3272252656870536e-05, "loss": 0.0653, "step": 24989 }, { "epoch": 0.5506618850088416, "grad_norm": 0.6640529036521912, "learning_rate": 1.327118925124505e-05, "loss": 0.0672, "step": 24990 }, { "epoch": 0.5506839202983578, "grad_norm": 0.6382718086242676, "learning_rate": 1.3270125854425328e-05, "loss": 0.0492, "step": 24991 }, { "epoch": 0.5507059555878739, "grad_norm": 0.23824146389961243, "learning_rate": 1.3269062466416782e-05, "loss": 0.0718, "step": 24992 }, { "epoch": 0.5507279908773901, "grad_norm": 1.0765262842178345, "learning_rate": 1.3267999087224822e-05, "loss": 0.1082, "step": 24993 }, { "epoch": 0.5507500261669063, "grad_norm": 0.6847104430198669, "learning_rate": 1.3266935716854879e-05, "loss": 0.0687, "step": 24994 }, { "epoch": 0.5507720614564224, "grad_norm": 1.7766484022140503, "learning_rate": 1.3265872355312362e-05, "loss": 0.0916, "step": 24995 }, { "epoch": 0.5507940967459386, "grad_norm": 0.5575975775718689, "learning_rate": 1.3264809002602687e-05, "loss": 0.0673, "step": 24996 }, { "epoch": 0.5508161320354548, "grad_norm": 0.8579702377319336, "learning_rate": 1.3263745658731265e-05, "loss": 0.0832, "step": 24997 }, { "epoch": 0.5508381673249709, "grad_norm": 0.9064362049102783, "learning_rate": 1.3262682323703524e-05, "loss": 0.0731, "step": 24998 }, { "epoch": 0.5508602026144871, "grad_norm": 0.5602543354034424, "learning_rate": 1.326161899752487e-05, "loss": 0.0677, "step": 24999 }, { "epoch": 0.5508822379040033, "grad_norm": 0.397066593170166, "learning_rate": 1.3260555680200725e-05, "loss": 0.0837, "step": 25000 }, { "epoch": 0.5509042731935194, "grad_norm": 0.4501698315143585, "learning_rate": 1.3259492371736496e-05, "loss": 0.0977, "step": 25001 }, { "epoch": 0.5509263084830356, "grad_norm": 0.7129002213478088, "learning_rate": 1.3258429072137613e-05, "loss": 0.0738, "step": 25002 }, { "epoch": 0.5509483437725518, "grad_norm": 0.9573169350624084, "learning_rate": 1.3257365781409482e-05, "loss": 0.087, "step": 25003 }, { "epoch": 0.5509703790620679, "grad_norm": 0.24842461943626404, "learning_rate": 1.3256302499557522e-05, "loss": 0.0656, "step": 25004 }, { "epoch": 0.5509924143515841, "grad_norm": 0.48044952750205994, "learning_rate": 1.3255239226587142e-05, "loss": 0.0752, "step": 25005 }, { "epoch": 0.5510144496411002, "grad_norm": 0.6396931409835815, "learning_rate": 1.3254175962503773e-05, "loss": 0.0716, "step": 25006 }, { "epoch": 0.5510364849306164, "grad_norm": 0.3863593637943268, "learning_rate": 1.3253112707312824e-05, "loss": 0.0704, "step": 25007 }, { "epoch": 0.5510585202201326, "grad_norm": 0.5741385221481323, "learning_rate": 1.3252049461019705e-05, "loss": 0.0978, "step": 25008 }, { "epoch": 0.5510805555096487, "grad_norm": 0.6182830333709717, "learning_rate": 1.3250986223629828e-05, "loss": 0.061, "step": 25009 }, { "epoch": 0.5511025907991649, "grad_norm": 0.5126571655273438, "learning_rate": 1.3249922995148625e-05, "loss": 0.0716, "step": 25010 }, { "epoch": 0.5511246260886811, "grad_norm": 0.9590998888015747, "learning_rate": 1.3248859775581502e-05, "loss": 0.0731, "step": 25011 }, { "epoch": 0.5511466613781972, "grad_norm": 0.8606657385826111, "learning_rate": 1.3247796564933867e-05, "loss": 0.0642, "step": 25012 }, { "epoch": 0.5511686966677134, "grad_norm": 0.4248389005661011, "learning_rate": 1.3246733363211152e-05, "loss": 0.0603, "step": 25013 }, { "epoch": 0.5511907319572295, "grad_norm": 0.7789793610572815, "learning_rate": 1.3245670170418764e-05, "loss": 0.0823, "step": 25014 }, { "epoch": 0.5512127672467456, "grad_norm": 0.6152138710021973, "learning_rate": 1.3244606986562116e-05, "loss": 0.0563, "step": 25015 }, { "epoch": 0.5512348025362618, "grad_norm": 0.7648125886917114, "learning_rate": 1.3243543811646627e-05, "loss": 0.0803, "step": 25016 }, { "epoch": 0.551256837825778, "grad_norm": 0.648682713508606, "learning_rate": 1.3242480645677711e-05, "loss": 0.0783, "step": 25017 }, { "epoch": 0.5512788731152941, "grad_norm": 0.6775879263877869, "learning_rate": 1.3241417488660785e-05, "loss": 0.0956, "step": 25018 }, { "epoch": 0.5513009084048103, "grad_norm": 0.49419260025024414, "learning_rate": 1.3240354340601262e-05, "loss": 0.059, "step": 25019 }, { "epoch": 0.5513229436943264, "grad_norm": 0.536417543888092, "learning_rate": 1.3239291201504553e-05, "loss": 0.0799, "step": 25020 }, { "epoch": 0.5513449789838426, "grad_norm": 0.8362354040145874, "learning_rate": 1.3238228071376083e-05, "loss": 0.0912, "step": 25021 }, { "epoch": 0.5513670142733588, "grad_norm": 0.39422836899757385, "learning_rate": 1.3237164950221263e-05, "loss": 0.0572, "step": 25022 }, { "epoch": 0.5513890495628749, "grad_norm": 0.9676899909973145, "learning_rate": 1.3236101838045505e-05, "loss": 0.0679, "step": 25023 }, { "epoch": 0.5514110848523911, "grad_norm": 0.5699076056480408, "learning_rate": 1.3235038734854226e-05, "loss": 0.0632, "step": 25024 }, { "epoch": 0.5514331201419073, "grad_norm": 0.35618701577186584, "learning_rate": 1.3233975640652843e-05, "loss": 0.0616, "step": 25025 }, { "epoch": 0.5514551554314234, "grad_norm": 0.7305698990821838, "learning_rate": 1.323291255544677e-05, "loss": 0.0781, "step": 25026 }, { "epoch": 0.5514771907209396, "grad_norm": 0.5512388348579407, "learning_rate": 1.323184947924142e-05, "loss": 0.0582, "step": 25027 }, { "epoch": 0.5514992260104558, "grad_norm": 0.5838499069213867, "learning_rate": 1.3230786412042203e-05, "loss": 0.0618, "step": 25028 }, { "epoch": 0.5515212612999719, "grad_norm": 0.42614349722862244, "learning_rate": 1.3229723353854545e-05, "loss": 0.0742, "step": 25029 }, { "epoch": 0.5515432965894881, "grad_norm": 0.671466052532196, "learning_rate": 1.3228660304683856e-05, "loss": 0.0689, "step": 25030 }, { "epoch": 0.5515653318790043, "grad_norm": 0.5523115396499634, "learning_rate": 1.322759726453555e-05, "loss": 0.0537, "step": 25031 }, { "epoch": 0.5515873671685204, "grad_norm": 0.5027294754981995, "learning_rate": 1.3226534233415039e-05, "loss": 0.0708, "step": 25032 }, { "epoch": 0.5516094024580366, "grad_norm": 0.6841239333152771, "learning_rate": 1.3225471211327742e-05, "loss": 0.0664, "step": 25033 }, { "epoch": 0.5516314377475527, "grad_norm": 0.5165575742721558, "learning_rate": 1.3224408198279073e-05, "loss": 0.0825, "step": 25034 }, { "epoch": 0.5516534730370689, "grad_norm": 0.6288177967071533, "learning_rate": 1.3223345194274437e-05, "loss": 0.079, "step": 25035 }, { "epoch": 0.5516755083265851, "grad_norm": 0.7027472853660583, "learning_rate": 1.3222282199319264e-05, "loss": 0.0908, "step": 25036 }, { "epoch": 0.5516975436161012, "grad_norm": 0.36258256435394287, "learning_rate": 1.322121921341896e-05, "loss": 0.0416, "step": 25037 }, { "epoch": 0.5517195789056173, "grad_norm": 0.5487545132637024, "learning_rate": 1.3220156236578942e-05, "loss": 0.07, "step": 25038 }, { "epoch": 0.5517416141951335, "grad_norm": 1.00253427028656, "learning_rate": 1.321909326880462e-05, "loss": 0.0808, "step": 25039 }, { "epoch": 0.5517636494846496, "grad_norm": 0.26344162225723267, "learning_rate": 1.3218030310101411e-05, "loss": 0.0475, "step": 25040 }, { "epoch": 0.5517856847741658, "grad_norm": 0.32731562852859497, "learning_rate": 1.3216967360474732e-05, "loss": 0.0439, "step": 25041 }, { "epoch": 0.551807720063682, "grad_norm": 0.7188755869865417, "learning_rate": 1.3215904419929995e-05, "loss": 0.0757, "step": 25042 }, { "epoch": 0.5518297553531981, "grad_norm": 0.48121461272239685, "learning_rate": 1.3214841488472606e-05, "loss": 0.08, "step": 25043 }, { "epoch": 0.5518517906427143, "grad_norm": 0.48892742395401, "learning_rate": 1.3213778566107992e-05, "loss": 0.0415, "step": 25044 }, { "epoch": 0.5518738259322304, "grad_norm": 0.9444195032119751, "learning_rate": 1.3212715652841563e-05, "loss": 0.0768, "step": 25045 }, { "epoch": 0.5518958612217466, "grad_norm": 0.46144983172416687, "learning_rate": 1.3211652748678732e-05, "loss": 0.0667, "step": 25046 }, { "epoch": 0.5519178965112628, "grad_norm": 0.6722738742828369, "learning_rate": 1.3210589853624908e-05, "loss": 0.0845, "step": 25047 }, { "epoch": 0.5519399318007789, "grad_norm": 0.7387889623641968, "learning_rate": 1.3209526967685512e-05, "loss": 0.0842, "step": 25048 }, { "epoch": 0.5519619670902951, "grad_norm": 0.8536366820335388, "learning_rate": 1.3208464090865956e-05, "loss": 0.0857, "step": 25049 }, { "epoch": 0.5519840023798113, "grad_norm": 0.5007948279380798, "learning_rate": 1.3207401223171654e-05, "loss": 0.0709, "step": 25050 }, { "epoch": 0.5520060376693274, "grad_norm": 0.5795924663543701, "learning_rate": 1.3206338364608012e-05, "loss": 0.0517, "step": 25051 }, { "epoch": 0.5520280729588436, "grad_norm": 0.504014253616333, "learning_rate": 1.3205275515180457e-05, "loss": 0.0685, "step": 25052 }, { "epoch": 0.5520501082483598, "grad_norm": 0.43190038204193115, "learning_rate": 1.3204212674894395e-05, "loss": 0.053, "step": 25053 }, { "epoch": 0.5520721435378759, "grad_norm": 0.8656960129737854, "learning_rate": 1.3203149843755243e-05, "loss": 0.0762, "step": 25054 }, { "epoch": 0.5520941788273921, "grad_norm": 0.6911311745643616, "learning_rate": 1.3202087021768408e-05, "loss": 0.0734, "step": 25055 }, { "epoch": 0.5521162141169083, "grad_norm": 0.6469516754150391, "learning_rate": 1.320102420893931e-05, "loss": 0.0613, "step": 25056 }, { "epoch": 0.5521382494064244, "grad_norm": 0.4540466368198395, "learning_rate": 1.3199961405273363e-05, "loss": 0.05, "step": 25057 }, { "epoch": 0.5521602846959406, "grad_norm": 0.4735250473022461, "learning_rate": 1.319889861077597e-05, "loss": 0.0512, "step": 25058 }, { "epoch": 0.5521823199854567, "grad_norm": 0.5050233006477356, "learning_rate": 1.3197835825452557e-05, "loss": 0.0769, "step": 25059 }, { "epoch": 0.5522043552749729, "grad_norm": 0.7026810050010681, "learning_rate": 1.3196773049308534e-05, "loss": 0.1015, "step": 25060 }, { "epoch": 0.5522263905644891, "grad_norm": 0.4955466687679291, "learning_rate": 1.3195710282349313e-05, "loss": 0.0853, "step": 25061 }, { "epoch": 0.5522484258540052, "grad_norm": 0.6100175380706787, "learning_rate": 1.3194647524580304e-05, "loss": 0.0889, "step": 25062 }, { "epoch": 0.5522704611435213, "grad_norm": 0.6712892651557922, "learning_rate": 1.3193584776006927e-05, "loss": 0.0709, "step": 25063 }, { "epoch": 0.5522924964330375, "grad_norm": 0.5610163807868958, "learning_rate": 1.319252203663459e-05, "loss": 0.0543, "step": 25064 }, { "epoch": 0.5523145317225536, "grad_norm": 0.4480536878108978, "learning_rate": 1.3191459306468708e-05, "loss": 0.0707, "step": 25065 }, { "epoch": 0.5523365670120698, "grad_norm": 0.7269745469093323, "learning_rate": 1.3190396585514688e-05, "loss": 0.0749, "step": 25066 }, { "epoch": 0.552358602301586, "grad_norm": 0.44417405128479004, "learning_rate": 1.3189333873777954e-05, "loss": 0.0653, "step": 25067 }, { "epoch": 0.5523806375911021, "grad_norm": 1.1273939609527588, "learning_rate": 1.3188271171263914e-05, "loss": 0.1068, "step": 25068 }, { "epoch": 0.5524026728806183, "grad_norm": 0.6107332110404968, "learning_rate": 1.318720847797798e-05, "loss": 0.0706, "step": 25069 }, { "epoch": 0.5524247081701344, "grad_norm": 0.5359141230583191, "learning_rate": 1.3186145793925562e-05, "loss": 0.0604, "step": 25070 }, { "epoch": 0.5524467434596506, "grad_norm": 0.4720577597618103, "learning_rate": 1.318508311911208e-05, "loss": 0.0841, "step": 25071 }, { "epoch": 0.5524687787491668, "grad_norm": 0.7426758408546448, "learning_rate": 1.3184020453542944e-05, "loss": 0.0687, "step": 25072 }, { "epoch": 0.5524908140386829, "grad_norm": 0.30433687567710876, "learning_rate": 1.3182957797223564e-05, "loss": 0.0886, "step": 25073 }, { "epoch": 0.5525128493281991, "grad_norm": 0.7566916346549988, "learning_rate": 1.3181895150159348e-05, "loss": 0.0708, "step": 25074 }, { "epoch": 0.5525348846177153, "grad_norm": 0.5543760061264038, "learning_rate": 1.318083251235572e-05, "loss": 0.0708, "step": 25075 }, { "epoch": 0.5525569199072314, "grad_norm": 0.8099015951156616, "learning_rate": 1.317976988381809e-05, "loss": 0.056, "step": 25076 }, { "epoch": 0.5525789551967476, "grad_norm": 0.4042776823043823, "learning_rate": 1.3178707264551868e-05, "loss": 0.0787, "step": 25077 }, { "epoch": 0.5526009904862638, "grad_norm": 0.6965463161468506, "learning_rate": 1.3177644654562461e-05, "loss": 0.095, "step": 25078 }, { "epoch": 0.5526230257757799, "grad_norm": 0.6433984041213989, "learning_rate": 1.3176582053855291e-05, "loss": 0.0748, "step": 25079 }, { "epoch": 0.5526450610652961, "grad_norm": 0.402927428483963, "learning_rate": 1.3175519462435767e-05, "loss": 0.0591, "step": 25080 }, { "epoch": 0.5526670963548123, "grad_norm": 0.4978258013725281, "learning_rate": 1.3174456880309296e-05, "loss": 0.0523, "step": 25081 }, { "epoch": 0.5526891316443284, "grad_norm": 0.6052389740943909, "learning_rate": 1.31733943074813e-05, "loss": 0.1004, "step": 25082 }, { "epoch": 0.5527111669338446, "grad_norm": 0.8141664266586304, "learning_rate": 1.3172331743957185e-05, "loss": 0.0792, "step": 25083 }, { "epoch": 0.5527332022233608, "grad_norm": 0.6272234916687012, "learning_rate": 1.3171269189742365e-05, "loss": 0.0628, "step": 25084 }, { "epoch": 0.5527552375128769, "grad_norm": 0.6144679188728333, "learning_rate": 1.317020664484225e-05, "loss": 0.054, "step": 25085 }, { "epoch": 0.5527772728023931, "grad_norm": 0.5991050004959106, "learning_rate": 1.3169144109262255e-05, "loss": 0.0682, "step": 25086 }, { "epoch": 0.5527993080919092, "grad_norm": 0.50503009557724, "learning_rate": 1.3168081583007794e-05, "loss": 0.0946, "step": 25087 }, { "epoch": 0.5528213433814253, "grad_norm": 0.6671251654624939, "learning_rate": 1.3167019066084274e-05, "loss": 0.0672, "step": 25088 }, { "epoch": 0.5528433786709415, "grad_norm": 0.6719494462013245, "learning_rate": 1.3165956558497102e-05, "loss": 0.0739, "step": 25089 }, { "epoch": 0.5528654139604576, "grad_norm": 0.5235205888748169, "learning_rate": 1.3164894060251702e-05, "loss": 0.0722, "step": 25090 }, { "epoch": 0.5528874492499738, "grad_norm": 0.9298655986785889, "learning_rate": 1.3163831571353483e-05, "loss": 0.1052, "step": 25091 }, { "epoch": 0.55290948453949, "grad_norm": 0.4461482763290405, "learning_rate": 1.3162769091807852e-05, "loss": 0.0713, "step": 25092 }, { "epoch": 0.5529315198290061, "grad_norm": 0.6860535144805908, "learning_rate": 1.3161706621620224e-05, "loss": 0.0783, "step": 25093 }, { "epoch": 0.5529535551185223, "grad_norm": 0.3902377188205719, "learning_rate": 1.316064416079601e-05, "loss": 0.0718, "step": 25094 }, { "epoch": 0.5529755904080385, "grad_norm": 0.5980656147003174, "learning_rate": 1.315958170934062e-05, "loss": 0.0731, "step": 25095 }, { "epoch": 0.5529976256975546, "grad_norm": 0.3783371150493622, "learning_rate": 1.315851926725947e-05, "loss": 0.0497, "step": 25096 }, { "epoch": 0.5530196609870708, "grad_norm": 0.635415256023407, "learning_rate": 1.3157456834557963e-05, "loss": 0.0741, "step": 25097 }, { "epoch": 0.553041696276587, "grad_norm": 0.7909044027328491, "learning_rate": 1.315639441124152e-05, "loss": 0.0721, "step": 25098 }, { "epoch": 0.5530637315661031, "grad_norm": 0.5797526836395264, "learning_rate": 1.3155331997315551e-05, "loss": 0.093, "step": 25099 }, { "epoch": 0.5530857668556193, "grad_norm": 0.735619068145752, "learning_rate": 1.3154269592785467e-05, "loss": 0.0706, "step": 25100 }, { "epoch": 0.5531078021451354, "grad_norm": 0.6898642778396606, "learning_rate": 1.315320719765667e-05, "loss": 0.0495, "step": 25101 }, { "epoch": 0.5531298374346516, "grad_norm": 0.7600478529930115, "learning_rate": 1.3152144811934586e-05, "loss": 0.0687, "step": 25102 }, { "epoch": 0.5531518727241678, "grad_norm": 0.5074670910835266, "learning_rate": 1.3151082435624617e-05, "loss": 0.1056, "step": 25103 }, { "epoch": 0.5531739080136839, "grad_norm": 0.5050769448280334, "learning_rate": 1.3150020068732173e-05, "loss": 0.0837, "step": 25104 }, { "epoch": 0.5531959433032001, "grad_norm": 0.6588029265403748, "learning_rate": 1.3148957711262673e-05, "loss": 0.0663, "step": 25105 }, { "epoch": 0.5532179785927163, "grad_norm": 0.6981498599052429, "learning_rate": 1.3147895363221524e-05, "loss": 0.0629, "step": 25106 }, { "epoch": 0.5532400138822324, "grad_norm": 0.6075233221054077, "learning_rate": 1.3146833024614137e-05, "loss": 0.0993, "step": 25107 }, { "epoch": 0.5532620491717486, "grad_norm": 0.4438678026199341, "learning_rate": 1.3145770695445922e-05, "loss": 0.0823, "step": 25108 }, { "epoch": 0.5532840844612648, "grad_norm": 0.7994573712348938, "learning_rate": 1.3144708375722292e-05, "loss": 0.0641, "step": 25109 }, { "epoch": 0.5533061197507809, "grad_norm": 0.4937741160392761, "learning_rate": 1.3143646065448658e-05, "loss": 0.0864, "step": 25110 }, { "epoch": 0.5533281550402971, "grad_norm": 0.46122947335243225, "learning_rate": 1.314258376463043e-05, "loss": 0.0598, "step": 25111 }, { "epoch": 0.5533501903298131, "grad_norm": 1.2233121395111084, "learning_rate": 1.3141521473273014e-05, "loss": 0.0599, "step": 25112 }, { "epoch": 0.5533722256193293, "grad_norm": 0.6889098286628723, "learning_rate": 1.314045919138183e-05, "loss": 0.0805, "step": 25113 }, { "epoch": 0.5533942609088455, "grad_norm": 0.6614972949028015, "learning_rate": 1.3139396918962284e-05, "loss": 0.0982, "step": 25114 }, { "epoch": 0.5534162961983616, "grad_norm": 0.8255869150161743, "learning_rate": 1.313833465601979e-05, "loss": 0.0641, "step": 25115 }, { "epoch": 0.5534383314878778, "grad_norm": 0.6739578247070312, "learning_rate": 1.3137272402559746e-05, "loss": 0.12, "step": 25116 }, { "epoch": 0.553460366777394, "grad_norm": 0.6606425642967224, "learning_rate": 1.3136210158587582e-05, "loss": 0.0986, "step": 25117 }, { "epoch": 0.5534824020669101, "grad_norm": 0.6536394953727722, "learning_rate": 1.3135147924108698e-05, "loss": 0.065, "step": 25118 }, { "epoch": 0.5535044373564263, "grad_norm": 0.5808535218238831, "learning_rate": 1.313408569912851e-05, "loss": 0.0752, "step": 25119 }, { "epoch": 0.5535264726459425, "grad_norm": 0.6836459636688232, "learning_rate": 1.3133023483652412e-05, "loss": 0.1028, "step": 25120 }, { "epoch": 0.5535485079354586, "grad_norm": 0.8817705512046814, "learning_rate": 1.3131961277685834e-05, "loss": 0.0824, "step": 25121 }, { "epoch": 0.5535705432249748, "grad_norm": 0.743970513343811, "learning_rate": 1.3130899081234179e-05, "loss": 0.0539, "step": 25122 }, { "epoch": 0.553592578514491, "grad_norm": 0.5333095192909241, "learning_rate": 1.3129836894302852e-05, "loss": 0.0831, "step": 25123 }, { "epoch": 0.5536146138040071, "grad_norm": 0.47061723470687866, "learning_rate": 1.3128774716897274e-05, "loss": 0.0414, "step": 25124 }, { "epoch": 0.5536366490935233, "grad_norm": 0.693223774433136, "learning_rate": 1.3127712549022849e-05, "loss": 0.0898, "step": 25125 }, { "epoch": 0.5536586843830394, "grad_norm": 0.47831693291664124, "learning_rate": 1.312665039068499e-05, "loss": 0.0664, "step": 25126 }, { "epoch": 0.5536807196725556, "grad_norm": 0.8065673112869263, "learning_rate": 1.31255882418891e-05, "loss": 0.0645, "step": 25127 }, { "epoch": 0.5537027549620718, "grad_norm": 0.7045735716819763, "learning_rate": 1.3124526102640597e-05, "loss": 0.0681, "step": 25128 }, { "epoch": 0.5537247902515879, "grad_norm": 0.5951393842697144, "learning_rate": 1.312346397294489e-05, "loss": 0.0912, "step": 25129 }, { "epoch": 0.5537468255411041, "grad_norm": 0.5520156025886536, "learning_rate": 1.3122401852807387e-05, "loss": 0.0727, "step": 25130 }, { "epoch": 0.5537688608306203, "grad_norm": 0.6377302408218384, "learning_rate": 1.3121339742233492e-05, "loss": 0.0762, "step": 25131 }, { "epoch": 0.5537908961201364, "grad_norm": 0.6591942310333252, "learning_rate": 1.3120277641228626e-05, "loss": 0.086, "step": 25132 }, { "epoch": 0.5538129314096526, "grad_norm": 1.064940094947815, "learning_rate": 1.3119215549798196e-05, "loss": 0.0634, "step": 25133 }, { "epoch": 0.5538349666991688, "grad_norm": 0.4978211522102356, "learning_rate": 1.311815346794761e-05, "loss": 0.0496, "step": 25134 }, { "epoch": 0.5538570019886849, "grad_norm": 0.364981472492218, "learning_rate": 1.3117091395682272e-05, "loss": 0.0852, "step": 25135 }, { "epoch": 0.5538790372782011, "grad_norm": 0.7416483759880066, "learning_rate": 1.31160293330076e-05, "loss": 0.0793, "step": 25136 }, { "epoch": 0.5539010725677171, "grad_norm": 0.8065766096115112, "learning_rate": 1.3114967279929004e-05, "loss": 0.0858, "step": 25137 }, { "epoch": 0.5539231078572333, "grad_norm": 0.3780450224876404, "learning_rate": 1.311390523645189e-05, "loss": 0.0506, "step": 25138 }, { "epoch": 0.5539451431467495, "grad_norm": 0.8645209074020386, "learning_rate": 1.311284320258166e-05, "loss": 0.0899, "step": 25139 }, { "epoch": 0.5539671784362656, "grad_norm": 0.6733118891716003, "learning_rate": 1.3111781178323737e-05, "loss": 0.0611, "step": 25140 }, { "epoch": 0.5539892137257818, "grad_norm": 0.7087020874023438, "learning_rate": 1.3110719163683527e-05, "loss": 0.0845, "step": 25141 }, { "epoch": 0.554011249015298, "grad_norm": 0.8386966586112976, "learning_rate": 1.3109657158666435e-05, "loss": 0.0753, "step": 25142 }, { "epoch": 0.5540332843048141, "grad_norm": 0.7664763927459717, "learning_rate": 1.3108595163277874e-05, "loss": 0.0671, "step": 25143 }, { "epoch": 0.5540553195943303, "grad_norm": 0.7666050791740417, "learning_rate": 1.310753317752325e-05, "loss": 0.0748, "step": 25144 }, { "epoch": 0.5540773548838465, "grad_norm": 0.3425390124320984, "learning_rate": 1.310647120140798e-05, "loss": 0.0472, "step": 25145 }, { "epoch": 0.5540993901733626, "grad_norm": 0.34021517634391785, "learning_rate": 1.3105409234937457e-05, "loss": 0.0492, "step": 25146 }, { "epoch": 0.5541214254628788, "grad_norm": 0.8551701903343201, "learning_rate": 1.310434727811711e-05, "loss": 0.1243, "step": 25147 }, { "epoch": 0.554143460752395, "grad_norm": 0.5340185761451721, "learning_rate": 1.3103285330952335e-05, "loss": 0.0723, "step": 25148 }, { "epoch": 0.5541654960419111, "grad_norm": 0.7079594731330872, "learning_rate": 1.3102223393448548e-05, "loss": 0.0734, "step": 25149 }, { "epoch": 0.5541875313314273, "grad_norm": 0.6770722270011902, "learning_rate": 1.310116146561115e-05, "loss": 0.0556, "step": 25150 }, { "epoch": 0.5542095666209435, "grad_norm": 0.4392089545726776, "learning_rate": 1.310009954744556e-05, "loss": 0.0434, "step": 25151 }, { "epoch": 0.5542316019104596, "grad_norm": 0.708343505859375, "learning_rate": 1.309903763895718e-05, "loss": 0.0635, "step": 25152 }, { "epoch": 0.5542536371999758, "grad_norm": 0.9289592504501343, "learning_rate": 1.3097975740151422e-05, "loss": 0.0814, "step": 25153 }, { "epoch": 0.5542756724894919, "grad_norm": 0.5402634143829346, "learning_rate": 1.3096913851033686e-05, "loss": 0.0756, "step": 25154 }, { "epoch": 0.5542977077790081, "grad_norm": 0.6086277961730957, "learning_rate": 1.3095851971609395e-05, "loss": 0.0776, "step": 25155 }, { "epoch": 0.5543197430685243, "grad_norm": 0.23334211111068726, "learning_rate": 1.3094790101883952e-05, "loss": 0.0491, "step": 25156 }, { "epoch": 0.5543417783580404, "grad_norm": 1.0292373895645142, "learning_rate": 1.3093728241862763e-05, "loss": 0.0945, "step": 25157 }, { "epoch": 0.5543638136475566, "grad_norm": 0.5485037565231323, "learning_rate": 1.3092666391551237e-05, "loss": 0.0586, "step": 25158 }, { "epoch": 0.5543858489370728, "grad_norm": 0.6446743011474609, "learning_rate": 1.3091604550954784e-05, "loss": 0.0823, "step": 25159 }, { "epoch": 0.5544078842265889, "grad_norm": 0.6594183444976807, "learning_rate": 1.3090542720078815e-05, "loss": 0.0718, "step": 25160 }, { "epoch": 0.5544299195161051, "grad_norm": 0.4149931073188782, "learning_rate": 1.3089480898928736e-05, "loss": 0.0469, "step": 25161 }, { "epoch": 0.5544519548056211, "grad_norm": 0.8128769993782043, "learning_rate": 1.3088419087509949e-05, "loss": 0.061, "step": 25162 }, { "epoch": 0.5544739900951373, "grad_norm": 0.7936100959777832, "learning_rate": 1.3087357285827873e-05, "loss": 0.0662, "step": 25163 }, { "epoch": 0.5544960253846535, "grad_norm": 0.7766440510749817, "learning_rate": 1.3086295493887913e-05, "loss": 0.0649, "step": 25164 }, { "epoch": 0.5545180606741696, "grad_norm": 1.1900174617767334, "learning_rate": 1.3085233711695476e-05, "loss": 0.0756, "step": 25165 }, { "epoch": 0.5545400959636858, "grad_norm": 0.3062582015991211, "learning_rate": 1.3084171939255968e-05, "loss": 0.0607, "step": 25166 }, { "epoch": 0.554562131253202, "grad_norm": 0.6879498362541199, "learning_rate": 1.3083110176574803e-05, "loss": 0.0898, "step": 25167 }, { "epoch": 0.5545841665427181, "grad_norm": 0.6606168746948242, "learning_rate": 1.3082048423657385e-05, "loss": 0.0696, "step": 25168 }, { "epoch": 0.5546062018322343, "grad_norm": 0.4741494357585907, "learning_rate": 1.3080986680509117e-05, "loss": 0.0611, "step": 25169 }, { "epoch": 0.5546282371217505, "grad_norm": 0.5558257699012756, "learning_rate": 1.3079924947135418e-05, "loss": 0.0543, "step": 25170 }, { "epoch": 0.5546502724112666, "grad_norm": 0.8665533661842346, "learning_rate": 1.3078863223541693e-05, "loss": 0.0765, "step": 25171 }, { "epoch": 0.5546723077007828, "grad_norm": 0.4135166108608246, "learning_rate": 1.3077801509733348e-05, "loss": 0.0796, "step": 25172 }, { "epoch": 0.554694342990299, "grad_norm": 0.5613123774528503, "learning_rate": 1.3076739805715786e-05, "loss": 0.0663, "step": 25173 }, { "epoch": 0.5547163782798151, "grad_norm": 0.6536484956741333, "learning_rate": 1.3075678111494422e-05, "loss": 0.079, "step": 25174 }, { "epoch": 0.5547384135693313, "grad_norm": 0.8457440733909607, "learning_rate": 1.3074616427074661e-05, "loss": 0.0708, "step": 25175 }, { "epoch": 0.5547604488588475, "grad_norm": 0.6084219217300415, "learning_rate": 1.3073554752461914e-05, "loss": 0.0753, "step": 25176 }, { "epoch": 0.5547824841483636, "grad_norm": 0.6890961527824402, "learning_rate": 1.3072493087661577e-05, "loss": 0.0852, "step": 25177 }, { "epoch": 0.5548045194378798, "grad_norm": 0.428072065114975, "learning_rate": 1.3071431432679073e-05, "loss": 0.0745, "step": 25178 }, { "epoch": 0.554826554727396, "grad_norm": 0.7212421894073486, "learning_rate": 1.3070369787519804e-05, "loss": 0.0781, "step": 25179 }, { "epoch": 0.5548485900169121, "grad_norm": 0.6972479820251465, "learning_rate": 1.3069308152189175e-05, "loss": 0.0699, "step": 25180 }, { "epoch": 0.5548706253064283, "grad_norm": 0.40153589844703674, "learning_rate": 1.3068246526692594e-05, "loss": 0.0464, "step": 25181 }, { "epoch": 0.5548926605959444, "grad_norm": 1.0779037475585938, "learning_rate": 1.306718491103547e-05, "loss": 0.0881, "step": 25182 }, { "epoch": 0.5549146958854606, "grad_norm": 0.8552408814430237, "learning_rate": 1.306612330522321e-05, "loss": 0.0714, "step": 25183 }, { "epoch": 0.5549367311749768, "grad_norm": 1.0857864618301392, "learning_rate": 1.3065061709261222e-05, "loss": 0.1433, "step": 25184 }, { "epoch": 0.5549587664644929, "grad_norm": 0.41068968176841736, "learning_rate": 1.3064000123154905e-05, "loss": 0.1028, "step": 25185 }, { "epoch": 0.5549808017540091, "grad_norm": 0.5936794281005859, "learning_rate": 1.3062938546909681e-05, "loss": 0.0798, "step": 25186 }, { "epoch": 0.5550028370435252, "grad_norm": 0.5734399557113647, "learning_rate": 1.306187698053095e-05, "loss": 0.0849, "step": 25187 }, { "epoch": 0.5550248723330413, "grad_norm": 0.3695528507232666, "learning_rate": 1.3060815424024115e-05, "loss": 0.0557, "step": 25188 }, { "epoch": 0.5550469076225575, "grad_norm": 0.4789755642414093, "learning_rate": 1.3059753877394587e-05, "loss": 0.0607, "step": 25189 }, { "epoch": 0.5550689429120736, "grad_norm": 0.4631120264530182, "learning_rate": 1.3058692340647777e-05, "loss": 0.0455, "step": 25190 }, { "epoch": 0.5550909782015898, "grad_norm": 0.6745152473449707, "learning_rate": 1.3057630813789085e-05, "loss": 0.0622, "step": 25191 }, { "epoch": 0.555113013491106, "grad_norm": 0.6891072988510132, "learning_rate": 1.3056569296823917e-05, "loss": 0.0558, "step": 25192 }, { "epoch": 0.5551350487806221, "grad_norm": 1.0760318040847778, "learning_rate": 1.305550778975769e-05, "loss": 0.0935, "step": 25193 }, { "epoch": 0.5551570840701383, "grad_norm": 0.3356717824935913, "learning_rate": 1.3054446292595804e-05, "loss": 0.0881, "step": 25194 }, { "epoch": 0.5551791193596545, "grad_norm": 0.4573068618774414, "learning_rate": 1.3053384805343667e-05, "loss": 0.076, "step": 25195 }, { "epoch": 0.5552011546491706, "grad_norm": 0.938736081123352, "learning_rate": 1.305232332800668e-05, "loss": 0.1344, "step": 25196 }, { "epoch": 0.5552231899386868, "grad_norm": 0.7477266192436218, "learning_rate": 1.3051261860590258e-05, "loss": 0.0586, "step": 25197 }, { "epoch": 0.555245225228203, "grad_norm": 0.9730266332626343, "learning_rate": 1.3050200403099807e-05, "loss": 0.1003, "step": 25198 }, { "epoch": 0.5552672605177191, "grad_norm": 0.5860245227813721, "learning_rate": 1.3049138955540728e-05, "loss": 0.0831, "step": 25199 }, { "epoch": 0.5552892958072353, "grad_norm": 0.7893185615539551, "learning_rate": 1.3048077517918424e-05, "loss": 0.0629, "step": 25200 }, { "epoch": 0.5553113310967515, "grad_norm": 0.8483283519744873, "learning_rate": 1.3047016090238317e-05, "loss": 0.1121, "step": 25201 }, { "epoch": 0.5553333663862676, "grad_norm": 0.7502670884132385, "learning_rate": 1.3045954672505802e-05, "loss": 0.0841, "step": 25202 }, { "epoch": 0.5553554016757838, "grad_norm": 0.45933660864830017, "learning_rate": 1.304489326472629e-05, "loss": 0.0836, "step": 25203 }, { "epoch": 0.5553774369653, "grad_norm": 0.5615059733390808, "learning_rate": 1.3043831866905179e-05, "loss": 0.0587, "step": 25204 }, { "epoch": 0.5553994722548161, "grad_norm": 0.47132983803749084, "learning_rate": 1.3042770479047887e-05, "loss": 0.0681, "step": 25205 }, { "epoch": 0.5554215075443323, "grad_norm": 0.7408165335655212, "learning_rate": 1.3041709101159814e-05, "loss": 0.074, "step": 25206 }, { "epoch": 0.5554435428338484, "grad_norm": 0.6026065945625305, "learning_rate": 1.3040647733246365e-05, "loss": 0.0778, "step": 25207 }, { "epoch": 0.5554655781233646, "grad_norm": 0.7180289626121521, "learning_rate": 1.3039586375312941e-05, "loss": 0.1061, "step": 25208 }, { "epoch": 0.5554876134128808, "grad_norm": 0.5027901530265808, "learning_rate": 1.3038525027364964e-05, "loss": 0.0508, "step": 25209 }, { "epoch": 0.5555096487023969, "grad_norm": 0.7466033697128296, "learning_rate": 1.303746368940783e-05, "loss": 0.0903, "step": 25210 }, { "epoch": 0.555531683991913, "grad_norm": 0.629909336566925, "learning_rate": 1.3036402361446944e-05, "loss": 0.0476, "step": 25211 }, { "epoch": 0.5555537192814292, "grad_norm": 0.951000452041626, "learning_rate": 1.3035341043487713e-05, "loss": 0.1121, "step": 25212 }, { "epoch": 0.5555757545709453, "grad_norm": 0.4305940568447113, "learning_rate": 1.3034279735535546e-05, "loss": 0.0735, "step": 25213 }, { "epoch": 0.5555977898604615, "grad_norm": 0.4868997633457184, "learning_rate": 1.3033218437595845e-05, "loss": 0.0859, "step": 25214 }, { "epoch": 0.5556198251499777, "grad_norm": 0.671051025390625, "learning_rate": 1.3032157149674011e-05, "loss": 0.064, "step": 25215 }, { "epoch": 0.5556418604394938, "grad_norm": 0.2927933633327484, "learning_rate": 1.3031095871775463e-05, "loss": 0.0608, "step": 25216 }, { "epoch": 0.55566389572901, "grad_norm": 2.210923433303833, "learning_rate": 1.30300346039056e-05, "loss": 0.0529, "step": 25217 }, { "epoch": 0.5556859310185261, "grad_norm": 0.6391040086746216, "learning_rate": 1.3028973346069828e-05, "loss": 0.0804, "step": 25218 }, { "epoch": 0.5557079663080423, "grad_norm": 0.4247671961784363, "learning_rate": 1.3027912098273544e-05, "loss": 0.0586, "step": 25219 }, { "epoch": 0.5557300015975585, "grad_norm": 0.48099470138549805, "learning_rate": 1.3026850860522171e-05, "loss": 0.0871, "step": 25220 }, { "epoch": 0.5557520368870746, "grad_norm": 0.7293133735656738, "learning_rate": 1.3025789632821101e-05, "loss": 0.0915, "step": 25221 }, { "epoch": 0.5557740721765908, "grad_norm": 0.44044652581214905, "learning_rate": 1.3024728415175743e-05, "loss": 0.0615, "step": 25222 }, { "epoch": 0.555796107466107, "grad_norm": 0.5169993042945862, "learning_rate": 1.3023667207591495e-05, "loss": 0.0571, "step": 25223 }, { "epoch": 0.5558181427556231, "grad_norm": 0.8296316862106323, "learning_rate": 1.3022606010073779e-05, "loss": 0.1127, "step": 25224 }, { "epoch": 0.5558401780451393, "grad_norm": 0.4640015661716461, "learning_rate": 1.302154482262799e-05, "loss": 0.0808, "step": 25225 }, { "epoch": 0.5558622133346555, "grad_norm": 0.5957985520362854, "learning_rate": 1.3020483645259533e-05, "loss": 0.0709, "step": 25226 }, { "epoch": 0.5558842486241716, "grad_norm": 0.21112394332885742, "learning_rate": 1.3019422477973809e-05, "loss": 0.0619, "step": 25227 }, { "epoch": 0.5559062839136878, "grad_norm": 0.6135818362236023, "learning_rate": 1.3018361320776235e-05, "loss": 0.0594, "step": 25228 }, { "epoch": 0.555928319203204, "grad_norm": 0.4941554367542267, "learning_rate": 1.3017300173672208e-05, "loss": 0.1002, "step": 25229 }, { "epoch": 0.5559503544927201, "grad_norm": 1.0104663372039795, "learning_rate": 1.3016239036667137e-05, "loss": 0.0707, "step": 25230 }, { "epoch": 0.5559723897822363, "grad_norm": 0.6063658595085144, "learning_rate": 1.3015177909766417e-05, "loss": 0.0686, "step": 25231 }, { "epoch": 0.5559944250717525, "grad_norm": 0.48636674880981445, "learning_rate": 1.3014116792975466e-05, "loss": 0.062, "step": 25232 }, { "epoch": 0.5560164603612686, "grad_norm": 0.7707263231277466, "learning_rate": 1.3013055686299683e-05, "loss": 0.0756, "step": 25233 }, { "epoch": 0.5560384956507848, "grad_norm": 0.7710700631141663, "learning_rate": 1.3011994589744472e-05, "loss": 0.0656, "step": 25234 }, { "epoch": 0.556060530940301, "grad_norm": 0.5549325346946716, "learning_rate": 1.3010933503315232e-05, "loss": 0.0672, "step": 25235 }, { "epoch": 0.556082566229817, "grad_norm": 0.4815938472747803, "learning_rate": 1.3009872427017382e-05, "loss": 0.0688, "step": 25236 }, { "epoch": 0.5561046015193332, "grad_norm": 0.5568186044692993, "learning_rate": 1.3008811360856319e-05, "loss": 0.0905, "step": 25237 }, { "epoch": 0.5561266368088493, "grad_norm": 0.6899706721305847, "learning_rate": 1.3007750304837443e-05, "loss": 0.0974, "step": 25238 }, { "epoch": 0.5561486720983655, "grad_norm": 0.7587937116622925, "learning_rate": 1.3006689258966168e-05, "loss": 0.0861, "step": 25239 }, { "epoch": 0.5561707073878817, "grad_norm": 0.3520660698413849, "learning_rate": 1.3005628223247893e-05, "loss": 0.0555, "step": 25240 }, { "epoch": 0.5561927426773978, "grad_norm": 0.6097980737686157, "learning_rate": 1.3004567197688022e-05, "loss": 0.0631, "step": 25241 }, { "epoch": 0.556214777966914, "grad_norm": 0.630982518196106, "learning_rate": 1.3003506182291956e-05, "loss": 0.0944, "step": 25242 }, { "epoch": 0.5562368132564302, "grad_norm": 0.5851479768753052, "learning_rate": 1.3002445177065106e-05, "loss": 0.0558, "step": 25243 }, { "epoch": 0.5562588485459463, "grad_norm": 0.44418370723724365, "learning_rate": 1.3001384182012876e-05, "loss": 0.0314, "step": 25244 }, { "epoch": 0.5562808838354625, "grad_norm": 0.5435184240341187, "learning_rate": 1.300032319714067e-05, "loss": 0.0423, "step": 25245 }, { "epoch": 0.5563029191249786, "grad_norm": 0.5254384875297546, "learning_rate": 1.2999262222453885e-05, "loss": 0.0451, "step": 25246 }, { "epoch": 0.5563249544144948, "grad_norm": 0.6366215944290161, "learning_rate": 1.2998201257957934e-05, "loss": 0.0705, "step": 25247 }, { "epoch": 0.556346989704011, "grad_norm": 0.6573607325553894, "learning_rate": 1.2997140303658217e-05, "loss": 0.0674, "step": 25248 }, { "epoch": 0.5563690249935271, "grad_norm": 0.603506326675415, "learning_rate": 1.299607935956014e-05, "loss": 0.0639, "step": 25249 }, { "epoch": 0.5563910602830433, "grad_norm": 0.6188821792602539, "learning_rate": 1.2995018425669099e-05, "loss": 0.0851, "step": 25250 }, { "epoch": 0.5564130955725595, "grad_norm": 0.6132418513298035, "learning_rate": 1.2993957501990509e-05, "loss": 0.0708, "step": 25251 }, { "epoch": 0.5564351308620756, "grad_norm": 0.8853250741958618, "learning_rate": 1.2992896588529769e-05, "loss": 0.0442, "step": 25252 }, { "epoch": 0.5564571661515918, "grad_norm": 0.7368199825286865, "learning_rate": 1.2991835685292284e-05, "loss": 0.0652, "step": 25253 }, { "epoch": 0.556479201441108, "grad_norm": 0.5170119404792786, "learning_rate": 1.2990774792283453e-05, "loss": 0.0767, "step": 25254 }, { "epoch": 0.5565012367306241, "grad_norm": 0.512137234210968, "learning_rate": 1.2989713909508689e-05, "loss": 0.0508, "step": 25255 }, { "epoch": 0.5565232720201403, "grad_norm": 0.8679001927375793, "learning_rate": 1.2988653036973386e-05, "loss": 0.0957, "step": 25256 }, { "epoch": 0.5565453073096565, "grad_norm": 0.7000023722648621, "learning_rate": 1.2987592174682946e-05, "loss": 0.0875, "step": 25257 }, { "epoch": 0.5565673425991726, "grad_norm": 0.6560959219932556, "learning_rate": 1.2986531322642786e-05, "loss": 0.0763, "step": 25258 }, { "epoch": 0.5565893778886888, "grad_norm": 0.4151372015476227, "learning_rate": 1.2985470480858303e-05, "loss": 0.0537, "step": 25259 }, { "epoch": 0.556611413178205, "grad_norm": 0.473448783159256, "learning_rate": 1.2984409649334899e-05, "loss": 0.0547, "step": 25260 }, { "epoch": 0.556633448467721, "grad_norm": 0.5603361129760742, "learning_rate": 1.2983348828077972e-05, "loss": 0.0536, "step": 25261 }, { "epoch": 0.5566554837572372, "grad_norm": 0.4999927282333374, "learning_rate": 1.2982288017092935e-05, "loss": 0.0709, "step": 25262 }, { "epoch": 0.5566775190467533, "grad_norm": 0.8129159212112427, "learning_rate": 1.2981227216385186e-05, "loss": 0.0829, "step": 25263 }, { "epoch": 0.5566995543362695, "grad_norm": 0.7582510113716125, "learning_rate": 1.2980166425960132e-05, "loss": 0.074, "step": 25264 }, { "epoch": 0.5567215896257857, "grad_norm": 0.7223076224327087, "learning_rate": 1.2979105645823164e-05, "loss": 0.0614, "step": 25265 }, { "epoch": 0.5567436249153018, "grad_norm": 0.6775492429733276, "learning_rate": 1.2978044875979702e-05, "loss": 0.0919, "step": 25266 }, { "epoch": 0.556765660204818, "grad_norm": 0.7553768754005432, "learning_rate": 1.2976984116435144e-05, "loss": 0.1041, "step": 25267 }, { "epoch": 0.5567876954943342, "grad_norm": 0.5692847371101379, "learning_rate": 1.2975923367194888e-05, "loss": 0.0919, "step": 25268 }, { "epoch": 0.5568097307838503, "grad_norm": 0.7407435178756714, "learning_rate": 1.2974862628264337e-05, "loss": 0.0999, "step": 25269 }, { "epoch": 0.5568317660733665, "grad_norm": 0.667204737663269, "learning_rate": 1.2973801899648901e-05, "loss": 0.0785, "step": 25270 }, { "epoch": 0.5568538013628826, "grad_norm": 0.47731104493141174, "learning_rate": 1.2972741181353978e-05, "loss": 0.0634, "step": 25271 }, { "epoch": 0.5568758366523988, "grad_norm": 0.7430585622787476, "learning_rate": 1.2971680473384971e-05, "loss": 0.0899, "step": 25272 }, { "epoch": 0.556897871941915, "grad_norm": 0.6915107369422913, "learning_rate": 1.2970619775747276e-05, "loss": 0.0921, "step": 25273 }, { "epoch": 0.5569199072314311, "grad_norm": 0.7127285599708557, "learning_rate": 1.296955908844631e-05, "loss": 0.069, "step": 25274 }, { "epoch": 0.5569419425209473, "grad_norm": 0.8386399745941162, "learning_rate": 1.296849841148747e-05, "loss": 0.1007, "step": 25275 }, { "epoch": 0.5569639778104635, "grad_norm": 0.9535778760910034, "learning_rate": 1.2967437744876153e-05, "loss": 0.0784, "step": 25276 }, { "epoch": 0.5569860130999796, "grad_norm": 0.3716444671154022, "learning_rate": 1.2966377088617766e-05, "loss": 0.0439, "step": 25277 }, { "epoch": 0.5570080483894958, "grad_norm": 0.8176338076591492, "learning_rate": 1.296531644271771e-05, "loss": 0.0628, "step": 25278 }, { "epoch": 0.557030083679012, "grad_norm": 0.6859873533248901, "learning_rate": 1.2964255807181393e-05, "loss": 0.0918, "step": 25279 }, { "epoch": 0.5570521189685281, "grad_norm": 0.5206556916236877, "learning_rate": 1.2963195182014203e-05, "loss": 0.0468, "step": 25280 }, { "epoch": 0.5570741542580443, "grad_norm": 0.5181360840797424, "learning_rate": 1.2962134567221561e-05, "loss": 0.0674, "step": 25281 }, { "epoch": 0.5570961895475605, "grad_norm": 0.569921612739563, "learning_rate": 1.296107396280886e-05, "loss": 0.0707, "step": 25282 }, { "epoch": 0.5571182248370766, "grad_norm": 0.7884076237678528, "learning_rate": 1.2960013368781501e-05, "loss": 0.1054, "step": 25283 }, { "epoch": 0.5571402601265928, "grad_norm": 0.7784155607223511, "learning_rate": 1.2958952785144886e-05, "loss": 0.0873, "step": 25284 }, { "epoch": 0.5571622954161088, "grad_norm": 0.25877755880355835, "learning_rate": 1.2957892211904423e-05, "loss": 0.0542, "step": 25285 }, { "epoch": 0.557184330705625, "grad_norm": 0.8068006038665771, "learning_rate": 1.2956831649065509e-05, "loss": 0.0934, "step": 25286 }, { "epoch": 0.5572063659951412, "grad_norm": 0.6374332904815674, "learning_rate": 1.2955771096633547e-05, "loss": 0.0801, "step": 25287 }, { "epoch": 0.5572284012846573, "grad_norm": 0.9291046857833862, "learning_rate": 1.2954710554613932e-05, "loss": 0.0827, "step": 25288 }, { "epoch": 0.5572504365741735, "grad_norm": 0.8039708733558655, "learning_rate": 1.2953650023012079e-05, "loss": 0.0969, "step": 25289 }, { "epoch": 0.5572724718636897, "grad_norm": 0.4584670066833496, "learning_rate": 1.2952589501833386e-05, "loss": 0.0463, "step": 25290 }, { "epoch": 0.5572945071532058, "grad_norm": 0.3801995515823364, "learning_rate": 1.2951528991083251e-05, "loss": 0.0987, "step": 25291 }, { "epoch": 0.557316542442722, "grad_norm": 0.6605775356292725, "learning_rate": 1.2950468490767073e-05, "loss": 0.0718, "step": 25292 }, { "epoch": 0.5573385777322382, "grad_norm": 0.627755880355835, "learning_rate": 1.2949408000890264e-05, "loss": 0.1018, "step": 25293 }, { "epoch": 0.5573606130217543, "grad_norm": 0.4803782105445862, "learning_rate": 1.2948347521458219e-05, "loss": 0.0713, "step": 25294 }, { "epoch": 0.5573826483112705, "grad_norm": 1.186930537223816, "learning_rate": 1.2947287052476339e-05, "loss": 0.131, "step": 25295 }, { "epoch": 0.5574046836007867, "grad_norm": 0.5511131882667542, "learning_rate": 1.294622659395002e-05, "loss": 0.0555, "step": 25296 }, { "epoch": 0.5574267188903028, "grad_norm": 0.46175646781921387, "learning_rate": 1.2945166145884679e-05, "loss": 0.0694, "step": 25297 }, { "epoch": 0.557448754179819, "grad_norm": 0.6485688090324402, "learning_rate": 1.2944105708285707e-05, "loss": 0.0628, "step": 25298 }, { "epoch": 0.5574707894693351, "grad_norm": 0.3878749907016754, "learning_rate": 1.2943045281158507e-05, "loss": 0.0919, "step": 25299 }, { "epoch": 0.5574928247588513, "grad_norm": 0.7938959002494812, "learning_rate": 1.2941984864508477e-05, "loss": 0.1017, "step": 25300 }, { "epoch": 0.5575148600483675, "grad_norm": 0.6911990642547607, "learning_rate": 1.2940924458341026e-05, "loss": 0.0737, "step": 25301 }, { "epoch": 0.5575368953378836, "grad_norm": 0.7499107718467712, "learning_rate": 1.2939864062661552e-05, "loss": 0.0578, "step": 25302 }, { "epoch": 0.5575589306273998, "grad_norm": 0.7604348659515381, "learning_rate": 1.2938803677475446e-05, "loss": 0.0971, "step": 25303 }, { "epoch": 0.557580965916916, "grad_norm": 0.4464343786239624, "learning_rate": 1.2937743302788126e-05, "loss": 0.0709, "step": 25304 }, { "epoch": 0.5576030012064321, "grad_norm": 0.8639369606971741, "learning_rate": 1.2936682938604985e-05, "loss": 0.1176, "step": 25305 }, { "epoch": 0.5576250364959483, "grad_norm": 1.0449470281600952, "learning_rate": 1.2935622584931423e-05, "loss": 0.077, "step": 25306 }, { "epoch": 0.5576470717854645, "grad_norm": 1.0329844951629639, "learning_rate": 1.293456224177284e-05, "loss": 0.0581, "step": 25307 }, { "epoch": 0.5576691070749806, "grad_norm": 0.7359972596168518, "learning_rate": 1.2933501909134643e-05, "loss": 0.0568, "step": 25308 }, { "epoch": 0.5576911423644968, "grad_norm": 0.6024153828620911, "learning_rate": 1.2932441587022229e-05, "loss": 0.065, "step": 25309 }, { "epoch": 0.5577131776540128, "grad_norm": 0.7657211422920227, "learning_rate": 1.2931381275440997e-05, "loss": 0.0749, "step": 25310 }, { "epoch": 0.557735212943529, "grad_norm": 0.4501591920852661, "learning_rate": 1.2930320974396343e-05, "loss": 0.0557, "step": 25311 }, { "epoch": 0.5577572482330452, "grad_norm": 0.5546259880065918, "learning_rate": 1.2929260683893684e-05, "loss": 0.081, "step": 25312 }, { "epoch": 0.5577792835225613, "grad_norm": 0.3914041817188263, "learning_rate": 1.2928200403938408e-05, "loss": 0.0505, "step": 25313 }, { "epoch": 0.5578013188120775, "grad_norm": 0.7781869173049927, "learning_rate": 1.2927140134535918e-05, "loss": 0.0612, "step": 25314 }, { "epoch": 0.5578233541015937, "grad_norm": 0.4275805950164795, "learning_rate": 1.2926079875691611e-05, "loss": 0.0831, "step": 25315 }, { "epoch": 0.5578453893911098, "grad_norm": 0.3066352605819702, "learning_rate": 1.2925019627410897e-05, "loss": 0.0341, "step": 25316 }, { "epoch": 0.557867424680626, "grad_norm": 0.5615230202674866, "learning_rate": 1.2923959389699171e-05, "loss": 0.1109, "step": 25317 }, { "epoch": 0.5578894599701422, "grad_norm": 0.6056694984436035, "learning_rate": 1.292289916256183e-05, "loss": 0.074, "step": 25318 }, { "epoch": 0.5579114952596583, "grad_norm": 0.47923046350479126, "learning_rate": 1.2921838946004273e-05, "loss": 0.0574, "step": 25319 }, { "epoch": 0.5579335305491745, "grad_norm": 0.5599235892295837, "learning_rate": 1.292077874003191e-05, "loss": 0.0843, "step": 25320 }, { "epoch": 0.5579555658386907, "grad_norm": 0.46585360169410706, "learning_rate": 1.2919718544650137e-05, "loss": 0.0692, "step": 25321 }, { "epoch": 0.5579776011282068, "grad_norm": 0.4405149221420288, "learning_rate": 1.2918658359864354e-05, "loss": 0.0764, "step": 25322 }, { "epoch": 0.557999636417723, "grad_norm": 0.4778924584388733, "learning_rate": 1.2917598185679953e-05, "loss": 0.0772, "step": 25323 }, { "epoch": 0.5580216717072392, "grad_norm": 0.5508067607879639, "learning_rate": 1.2916538022102349e-05, "loss": 0.0542, "step": 25324 }, { "epoch": 0.5580437069967553, "grad_norm": 1.0166648626327515, "learning_rate": 1.2915477869136932e-05, "loss": 0.1082, "step": 25325 }, { "epoch": 0.5580657422862715, "grad_norm": 0.475119024515152, "learning_rate": 1.2914417726789096e-05, "loss": 0.0734, "step": 25326 }, { "epoch": 0.5580877775757876, "grad_norm": 0.2665714621543884, "learning_rate": 1.2913357595064257e-05, "loss": 0.0498, "step": 25327 }, { "epoch": 0.5581098128653038, "grad_norm": 0.6764726042747498, "learning_rate": 1.2912297473967806e-05, "loss": 0.0664, "step": 25328 }, { "epoch": 0.55813184815482, "grad_norm": 0.6276232004165649, "learning_rate": 1.2911237363505144e-05, "loss": 0.0442, "step": 25329 }, { "epoch": 0.5581538834443361, "grad_norm": 0.8239219784736633, "learning_rate": 1.2910177263681664e-05, "loss": 0.0753, "step": 25330 }, { "epoch": 0.5581759187338523, "grad_norm": 0.8680703639984131, "learning_rate": 1.2909117174502778e-05, "loss": 0.0923, "step": 25331 }, { "epoch": 0.5581979540233685, "grad_norm": 0.7771061658859253, "learning_rate": 1.2908057095973883e-05, "loss": 0.0866, "step": 25332 }, { "epoch": 0.5582199893128846, "grad_norm": 0.724823534488678, "learning_rate": 1.2906997028100371e-05, "loss": 0.0606, "step": 25333 }, { "epoch": 0.5582420246024008, "grad_norm": 1.1655436754226685, "learning_rate": 1.2905936970887638e-05, "loss": 0.103, "step": 25334 }, { "epoch": 0.5582640598919169, "grad_norm": 0.6745744347572327, "learning_rate": 1.29048769243411e-05, "loss": 0.0837, "step": 25335 }, { "epoch": 0.558286095181433, "grad_norm": 0.4860284626483917, "learning_rate": 1.2903816888466145e-05, "loss": 0.0483, "step": 25336 }, { "epoch": 0.5583081304709492, "grad_norm": 0.8144727945327759, "learning_rate": 1.2902756863268178e-05, "loss": 0.0913, "step": 25337 }, { "epoch": 0.5583301657604653, "grad_norm": 0.6792830228805542, "learning_rate": 1.2901696848752585e-05, "loss": 0.084, "step": 25338 }, { "epoch": 0.5583522010499815, "grad_norm": 0.6252135634422302, "learning_rate": 1.2900636844924784e-05, "loss": 0.0572, "step": 25339 }, { "epoch": 0.5583742363394977, "grad_norm": 0.6588314771652222, "learning_rate": 1.2899576851790164e-05, "loss": 0.0959, "step": 25340 }, { "epoch": 0.5583962716290138, "grad_norm": 0.5996837019920349, "learning_rate": 1.2898516869354126e-05, "loss": 0.0676, "step": 25341 }, { "epoch": 0.55841830691853, "grad_norm": 0.5691195726394653, "learning_rate": 1.2897456897622064e-05, "loss": 0.0851, "step": 25342 }, { "epoch": 0.5584403422080462, "grad_norm": 0.4472092390060425, "learning_rate": 1.2896396936599385e-05, "loss": 0.0456, "step": 25343 }, { "epoch": 0.5584623774975623, "grad_norm": 1.2407153844833374, "learning_rate": 1.2895336986291486e-05, "loss": 0.0761, "step": 25344 }, { "epoch": 0.5584844127870785, "grad_norm": 0.5729302167892456, "learning_rate": 1.2894277046703762e-05, "loss": 0.0984, "step": 25345 }, { "epoch": 0.5585064480765947, "grad_norm": 1.18821120262146, "learning_rate": 1.289321711784161e-05, "loss": 0.0734, "step": 25346 }, { "epoch": 0.5585284833661108, "grad_norm": 0.8994804620742798, "learning_rate": 1.2892157199710439e-05, "loss": 0.1016, "step": 25347 }, { "epoch": 0.558550518655627, "grad_norm": 0.5708094239234924, "learning_rate": 1.289109729231564e-05, "loss": 0.0531, "step": 25348 }, { "epoch": 0.5585725539451432, "grad_norm": 1.0188772678375244, "learning_rate": 1.2890037395662613e-05, "loss": 0.0813, "step": 25349 }, { "epoch": 0.5585945892346593, "grad_norm": 0.8802253007888794, "learning_rate": 1.2888977509756759e-05, "loss": 0.0783, "step": 25350 }, { "epoch": 0.5586166245241755, "grad_norm": 0.6596077084541321, "learning_rate": 1.2887917634603473e-05, "loss": 0.0472, "step": 25351 }, { "epoch": 0.5586386598136917, "grad_norm": 0.31930193305015564, "learning_rate": 1.2886857770208156e-05, "loss": 0.0653, "step": 25352 }, { "epoch": 0.5586606951032078, "grad_norm": 0.45636656880378723, "learning_rate": 1.2885797916576198e-05, "loss": 0.0439, "step": 25353 }, { "epoch": 0.558682730392724, "grad_norm": 0.7227697968482971, "learning_rate": 1.2884738073713015e-05, "loss": 0.0706, "step": 25354 }, { "epoch": 0.5587047656822401, "grad_norm": 0.4085932970046997, "learning_rate": 1.2883678241623992e-05, "loss": 0.064, "step": 25355 }, { "epoch": 0.5587268009717563, "grad_norm": 0.640029788017273, "learning_rate": 1.2882618420314532e-05, "loss": 0.064, "step": 25356 }, { "epoch": 0.5587488362612725, "grad_norm": 0.48656725883483887, "learning_rate": 1.2881558609790029e-05, "loss": 0.0646, "step": 25357 }, { "epoch": 0.5587708715507886, "grad_norm": 0.8081212043762207, "learning_rate": 1.2880498810055886e-05, "loss": 0.0867, "step": 25358 }, { "epoch": 0.5587929068403047, "grad_norm": 0.7057340145111084, "learning_rate": 1.28794390211175e-05, "loss": 0.0854, "step": 25359 }, { "epoch": 0.5588149421298209, "grad_norm": 0.8811397552490234, "learning_rate": 1.2878379242980267e-05, "loss": 0.0772, "step": 25360 }, { "epoch": 0.558836977419337, "grad_norm": 0.4287300407886505, "learning_rate": 1.287731947564958e-05, "loss": 0.0684, "step": 25361 }, { "epoch": 0.5588590127088532, "grad_norm": 0.5659166574478149, "learning_rate": 1.2876259719130849e-05, "loss": 0.0782, "step": 25362 }, { "epoch": 0.5588810479983694, "grad_norm": 0.5546437501907349, "learning_rate": 1.287519997342947e-05, "loss": 0.0721, "step": 25363 }, { "epoch": 0.5589030832878855, "grad_norm": 0.655771017074585, "learning_rate": 1.2874140238550833e-05, "loss": 0.0651, "step": 25364 }, { "epoch": 0.5589251185774017, "grad_norm": 0.6959389448165894, "learning_rate": 1.2873080514500337e-05, "loss": 0.0907, "step": 25365 }, { "epoch": 0.5589471538669178, "grad_norm": 0.7423612475395203, "learning_rate": 1.2872020801283388e-05, "loss": 0.0694, "step": 25366 }, { "epoch": 0.558969189156434, "grad_norm": 0.3873816430568695, "learning_rate": 1.2870961098905375e-05, "loss": 0.0569, "step": 25367 }, { "epoch": 0.5589912244459502, "grad_norm": 0.6799825429916382, "learning_rate": 1.28699014073717e-05, "loss": 0.0808, "step": 25368 }, { "epoch": 0.5590132597354663, "grad_norm": 0.6600309014320374, "learning_rate": 1.2868841726687755e-05, "loss": 0.0673, "step": 25369 }, { "epoch": 0.5590352950249825, "grad_norm": 1.062683343887329, "learning_rate": 1.2867782056858945e-05, "loss": 0.0832, "step": 25370 }, { "epoch": 0.5590573303144987, "grad_norm": 0.7912835478782654, "learning_rate": 1.2866722397890666e-05, "loss": 0.074, "step": 25371 }, { "epoch": 0.5590793656040148, "grad_norm": 0.7463339567184448, "learning_rate": 1.2865662749788312e-05, "loss": 0.0581, "step": 25372 }, { "epoch": 0.559101400893531, "grad_norm": 0.6862999200820923, "learning_rate": 1.2864603112557284e-05, "loss": 0.0693, "step": 25373 }, { "epoch": 0.5591234361830472, "grad_norm": 0.6290021538734436, "learning_rate": 1.2863543486202976e-05, "loss": 0.0655, "step": 25374 }, { "epoch": 0.5591454714725633, "grad_norm": 0.696368396282196, "learning_rate": 1.2862483870730789e-05, "loss": 0.0862, "step": 25375 }, { "epoch": 0.5591675067620795, "grad_norm": 0.8487623333930969, "learning_rate": 1.286142426614611e-05, "loss": 0.1017, "step": 25376 }, { "epoch": 0.5591895420515957, "grad_norm": 0.3418671786785126, "learning_rate": 1.286036467245435e-05, "loss": 0.0804, "step": 25377 }, { "epoch": 0.5592115773411118, "grad_norm": 0.49700042605400085, "learning_rate": 1.2859305089660902e-05, "loss": 0.0736, "step": 25378 }, { "epoch": 0.559233612630628, "grad_norm": 0.47991982102394104, "learning_rate": 1.285824551777116e-05, "loss": 0.0487, "step": 25379 }, { "epoch": 0.5592556479201441, "grad_norm": 0.7462952733039856, "learning_rate": 1.285718595679052e-05, "loss": 0.0874, "step": 25380 }, { "epoch": 0.5592776832096603, "grad_norm": 0.7486597895622253, "learning_rate": 1.285612640672438e-05, "loss": 0.0854, "step": 25381 }, { "epoch": 0.5592997184991765, "grad_norm": 0.5976587533950806, "learning_rate": 1.2855066867578143e-05, "loss": 0.0838, "step": 25382 }, { "epoch": 0.5593217537886926, "grad_norm": 0.7388336062431335, "learning_rate": 1.28540073393572e-05, "loss": 0.1158, "step": 25383 }, { "epoch": 0.5593437890782087, "grad_norm": 0.2682683765888214, "learning_rate": 1.285294782206694e-05, "loss": 0.047, "step": 25384 }, { "epoch": 0.5593658243677249, "grad_norm": 0.5324749946594238, "learning_rate": 1.2851888315712775e-05, "loss": 0.0654, "step": 25385 }, { "epoch": 0.559387859657241, "grad_norm": 0.9597316980361938, "learning_rate": 1.2850828820300093e-05, "loss": 0.1098, "step": 25386 }, { "epoch": 0.5594098949467572, "grad_norm": 0.8514904975891113, "learning_rate": 1.2849769335834297e-05, "loss": 0.0922, "step": 25387 }, { "epoch": 0.5594319302362734, "grad_norm": 0.63399338722229, "learning_rate": 1.2848709862320773e-05, "loss": 0.0991, "step": 25388 }, { "epoch": 0.5594539655257895, "grad_norm": 0.4555242955684662, "learning_rate": 1.2847650399764925e-05, "loss": 0.0527, "step": 25389 }, { "epoch": 0.5594760008153057, "grad_norm": 0.4001404047012329, "learning_rate": 1.2846590948172148e-05, "loss": 0.0997, "step": 25390 }, { "epoch": 0.5594980361048218, "grad_norm": 0.35378149151802063, "learning_rate": 1.2845531507547831e-05, "loss": 0.085, "step": 25391 }, { "epoch": 0.559520071394338, "grad_norm": 0.6307134032249451, "learning_rate": 1.2844472077897387e-05, "loss": 0.0937, "step": 25392 }, { "epoch": 0.5595421066838542, "grad_norm": 0.5975882411003113, "learning_rate": 1.2843412659226201e-05, "loss": 0.0831, "step": 25393 }, { "epoch": 0.5595641419733703, "grad_norm": 0.728447437286377, "learning_rate": 1.2842353251539672e-05, "loss": 0.0685, "step": 25394 }, { "epoch": 0.5595861772628865, "grad_norm": 0.8224610090255737, "learning_rate": 1.284129385484319e-05, "loss": 0.0872, "step": 25395 }, { "epoch": 0.5596082125524027, "grad_norm": 0.7420092225074768, "learning_rate": 1.2840234469142158e-05, "loss": 0.05, "step": 25396 }, { "epoch": 0.5596302478419188, "grad_norm": 0.48143264651298523, "learning_rate": 1.2839175094441972e-05, "loss": 0.085, "step": 25397 }, { "epoch": 0.559652283131435, "grad_norm": 0.9730167984962463, "learning_rate": 1.2838115730748027e-05, "loss": 0.1093, "step": 25398 }, { "epoch": 0.5596743184209512, "grad_norm": 0.3297046720981598, "learning_rate": 1.283705637806571e-05, "loss": 0.0656, "step": 25399 }, { "epoch": 0.5596963537104673, "grad_norm": 0.611918032169342, "learning_rate": 1.283599703640043e-05, "loss": 0.0345, "step": 25400 }, { "epoch": 0.5597183889999835, "grad_norm": 0.5237235426902771, "learning_rate": 1.283493770575758e-05, "loss": 0.0605, "step": 25401 }, { "epoch": 0.5597404242894997, "grad_norm": 0.36427316069602966, "learning_rate": 1.2833878386142552e-05, "loss": 0.0641, "step": 25402 }, { "epoch": 0.5597624595790158, "grad_norm": 0.8405230045318604, "learning_rate": 1.2832819077560739e-05, "loss": 0.0987, "step": 25403 }, { "epoch": 0.559784494868532, "grad_norm": 0.5532090067863464, "learning_rate": 1.2831759780017545e-05, "loss": 0.0855, "step": 25404 }, { "epoch": 0.5598065301580482, "grad_norm": 0.8472732305526733, "learning_rate": 1.283070049351836e-05, "loss": 0.0837, "step": 25405 }, { "epoch": 0.5598285654475643, "grad_norm": 1.7865045070648193, "learning_rate": 1.2829641218068582e-05, "loss": 0.0662, "step": 25406 }, { "epoch": 0.5598506007370805, "grad_norm": 0.7110946178436279, "learning_rate": 1.2828581953673597e-05, "loss": 0.0532, "step": 25407 }, { "epoch": 0.5598726360265966, "grad_norm": 0.564326286315918, "learning_rate": 1.2827522700338817e-05, "loss": 0.0668, "step": 25408 }, { "epoch": 0.5598946713161127, "grad_norm": 0.7857182025909424, "learning_rate": 1.2826463458069628e-05, "loss": 0.0885, "step": 25409 }, { "epoch": 0.5599167066056289, "grad_norm": 0.7447174191474915, "learning_rate": 1.2825404226871426e-05, "loss": 0.0926, "step": 25410 }, { "epoch": 0.559938741895145, "grad_norm": 0.7948185205459595, "learning_rate": 1.2824345006749602e-05, "loss": 0.0986, "step": 25411 }, { "epoch": 0.5599607771846612, "grad_norm": 0.8583720326423645, "learning_rate": 1.282328579770956e-05, "loss": 0.0629, "step": 25412 }, { "epoch": 0.5599828124741774, "grad_norm": 0.5468264818191528, "learning_rate": 1.2822226599756691e-05, "loss": 0.0917, "step": 25413 }, { "epoch": 0.5600048477636935, "grad_norm": 0.7512183785438538, "learning_rate": 1.2821167412896383e-05, "loss": 0.0782, "step": 25414 }, { "epoch": 0.5600268830532097, "grad_norm": 0.6397020816802979, "learning_rate": 1.2820108237134044e-05, "loss": 0.0922, "step": 25415 }, { "epoch": 0.5600489183427259, "grad_norm": 0.9105654358863831, "learning_rate": 1.2819049072475062e-05, "loss": 0.093, "step": 25416 }, { "epoch": 0.560070953632242, "grad_norm": 0.7665863037109375, "learning_rate": 1.2817989918924835e-05, "loss": 0.097, "step": 25417 }, { "epoch": 0.5600929889217582, "grad_norm": 0.7109552025794983, "learning_rate": 1.281693077648875e-05, "loss": 0.07, "step": 25418 }, { "epoch": 0.5601150242112743, "grad_norm": 0.5925206542015076, "learning_rate": 1.2815871645172212e-05, "loss": 0.0643, "step": 25419 }, { "epoch": 0.5601370595007905, "grad_norm": 0.4488794207572937, "learning_rate": 1.281481252498061e-05, "loss": 0.0517, "step": 25420 }, { "epoch": 0.5601590947903067, "grad_norm": 0.8352174758911133, "learning_rate": 1.281375341591934e-05, "loss": 0.0851, "step": 25421 }, { "epoch": 0.5601811300798228, "grad_norm": 0.5574578642845154, "learning_rate": 1.2812694317993787e-05, "loss": 0.0783, "step": 25422 }, { "epoch": 0.560203165369339, "grad_norm": 0.8381698727607727, "learning_rate": 1.2811635231209364e-05, "loss": 0.108, "step": 25423 }, { "epoch": 0.5602252006588552, "grad_norm": 0.5286946296691895, "learning_rate": 1.2810576155571457e-05, "loss": 0.0583, "step": 25424 }, { "epoch": 0.5602472359483713, "grad_norm": 0.7505930066108704, "learning_rate": 1.2809517091085459e-05, "loss": 0.0946, "step": 25425 }, { "epoch": 0.5602692712378875, "grad_norm": 0.5810065269470215, "learning_rate": 1.280845803775676e-05, "loss": 0.0786, "step": 25426 }, { "epoch": 0.5602913065274037, "grad_norm": 0.4373711943626404, "learning_rate": 1.2807398995590764e-05, "loss": 0.0567, "step": 25427 }, { "epoch": 0.5603133418169198, "grad_norm": 0.6151080131530762, "learning_rate": 1.2806339964592861e-05, "loss": 0.0947, "step": 25428 }, { "epoch": 0.560335377106436, "grad_norm": 0.6520392298698425, "learning_rate": 1.2805280944768445e-05, "loss": 0.1143, "step": 25429 }, { "epoch": 0.5603574123959522, "grad_norm": 0.4601856768131256, "learning_rate": 1.2804221936122901e-05, "loss": 0.0739, "step": 25430 }, { "epoch": 0.5603794476854683, "grad_norm": 0.5849829316139221, "learning_rate": 1.2803162938661642e-05, "loss": 0.0585, "step": 25431 }, { "epoch": 0.5604014829749845, "grad_norm": 0.5424953103065491, "learning_rate": 1.2802103952390052e-05, "loss": 0.0787, "step": 25432 }, { "epoch": 0.5604235182645007, "grad_norm": 0.7852782607078552, "learning_rate": 1.2801044977313523e-05, "loss": 0.0639, "step": 25433 }, { "epoch": 0.5604455535540167, "grad_norm": 0.4286869466304779, "learning_rate": 1.279998601343745e-05, "loss": 0.071, "step": 25434 }, { "epoch": 0.5604675888435329, "grad_norm": 0.6939895749092102, "learning_rate": 1.2798927060767229e-05, "loss": 0.1058, "step": 25435 }, { "epoch": 0.560489624133049, "grad_norm": 0.8115241527557373, "learning_rate": 1.2797868119308254e-05, "loss": 0.0624, "step": 25436 }, { "epoch": 0.5605116594225652, "grad_norm": 0.4319693446159363, "learning_rate": 1.2796809189065911e-05, "loss": 0.0745, "step": 25437 }, { "epoch": 0.5605336947120814, "grad_norm": 0.4652092158794403, "learning_rate": 1.2795750270045608e-05, "loss": 0.0965, "step": 25438 }, { "epoch": 0.5605557300015975, "grad_norm": 1.101475477218628, "learning_rate": 1.2794691362252729e-05, "loss": 0.0923, "step": 25439 }, { "epoch": 0.5605777652911137, "grad_norm": 0.705195963382721, "learning_rate": 1.279363246569267e-05, "loss": 0.0669, "step": 25440 }, { "epoch": 0.5605998005806299, "grad_norm": 0.6326592564582825, "learning_rate": 1.2792573580370819e-05, "loss": 0.0637, "step": 25441 }, { "epoch": 0.560621835870146, "grad_norm": 0.9743574857711792, "learning_rate": 1.2791514706292578e-05, "loss": 0.1102, "step": 25442 }, { "epoch": 0.5606438711596622, "grad_norm": 0.46216732263565063, "learning_rate": 1.2790455843463341e-05, "loss": 0.0572, "step": 25443 }, { "epoch": 0.5606659064491784, "grad_norm": 0.6955832839012146, "learning_rate": 1.2789396991888497e-05, "loss": 0.0867, "step": 25444 }, { "epoch": 0.5606879417386945, "grad_norm": 0.4111938178539276, "learning_rate": 1.278833815157343e-05, "loss": 0.0542, "step": 25445 }, { "epoch": 0.5607099770282107, "grad_norm": 0.7347907423973083, "learning_rate": 1.278727932252355e-05, "loss": 0.0621, "step": 25446 }, { "epoch": 0.5607320123177268, "grad_norm": 0.4187089800834656, "learning_rate": 1.2786220504744242e-05, "loss": 0.0645, "step": 25447 }, { "epoch": 0.560754047607243, "grad_norm": 0.5032228231430054, "learning_rate": 1.2785161698240901e-05, "loss": 0.0832, "step": 25448 }, { "epoch": 0.5607760828967592, "grad_norm": 0.5265456438064575, "learning_rate": 1.2784102903018914e-05, "loss": 0.0724, "step": 25449 }, { "epoch": 0.5607981181862753, "grad_norm": 0.9266153573989868, "learning_rate": 1.2783044119083684e-05, "loss": 0.0698, "step": 25450 }, { "epoch": 0.5608201534757915, "grad_norm": 0.7748314738273621, "learning_rate": 1.2781985346440598e-05, "loss": 0.0871, "step": 25451 }, { "epoch": 0.5608421887653077, "grad_norm": 0.4717954695224762, "learning_rate": 1.2780926585095053e-05, "loss": 0.0598, "step": 25452 }, { "epoch": 0.5608642240548238, "grad_norm": 0.800468385219574, "learning_rate": 1.2779867835052436e-05, "loss": 0.0552, "step": 25453 }, { "epoch": 0.56088625934434, "grad_norm": 0.5663397908210754, "learning_rate": 1.2778809096318143e-05, "loss": 0.1013, "step": 25454 }, { "epoch": 0.5609082946338562, "grad_norm": 0.7462826371192932, "learning_rate": 1.2777750368897568e-05, "loss": 0.0574, "step": 25455 }, { "epoch": 0.5609303299233723, "grad_norm": 0.5072124004364014, "learning_rate": 1.2776691652796101e-05, "loss": 0.056, "step": 25456 }, { "epoch": 0.5609523652128885, "grad_norm": 0.3715980052947998, "learning_rate": 1.2775632948019128e-05, "loss": 0.0629, "step": 25457 }, { "epoch": 0.5609744005024045, "grad_norm": 0.581688642501831, "learning_rate": 1.2774574254572058e-05, "loss": 0.0617, "step": 25458 }, { "epoch": 0.5609964357919207, "grad_norm": 0.6992067694664001, "learning_rate": 1.2773515572460275e-05, "loss": 0.0614, "step": 25459 }, { "epoch": 0.5610184710814369, "grad_norm": 0.4753732681274414, "learning_rate": 1.2772456901689168e-05, "loss": 0.05, "step": 25460 }, { "epoch": 0.561040506370953, "grad_norm": 0.6974964141845703, "learning_rate": 1.2771398242264134e-05, "loss": 0.076, "step": 25461 }, { "epoch": 0.5610625416604692, "grad_norm": 0.5127430558204651, "learning_rate": 1.2770339594190564e-05, "loss": 0.0819, "step": 25462 }, { "epoch": 0.5610845769499854, "grad_norm": 0.35045745968818665, "learning_rate": 1.276928095747385e-05, "loss": 0.0554, "step": 25463 }, { "epoch": 0.5611066122395015, "grad_norm": 0.28025686740875244, "learning_rate": 1.276822233211938e-05, "loss": 0.0648, "step": 25464 }, { "epoch": 0.5611286475290177, "grad_norm": 0.8039742708206177, "learning_rate": 1.2767163718132553e-05, "loss": 0.09, "step": 25465 }, { "epoch": 0.5611506828185339, "grad_norm": 0.5287773013114929, "learning_rate": 1.2766105115518763e-05, "loss": 0.0665, "step": 25466 }, { "epoch": 0.56117271810805, "grad_norm": 0.9129374623298645, "learning_rate": 1.2765046524283396e-05, "loss": 0.0895, "step": 25467 }, { "epoch": 0.5611947533975662, "grad_norm": 0.40560442209243774, "learning_rate": 1.2763987944431839e-05, "loss": 0.0498, "step": 25468 }, { "epoch": 0.5612167886870824, "grad_norm": 0.3333210349082947, "learning_rate": 1.2762929375969498e-05, "loss": 0.0527, "step": 25469 }, { "epoch": 0.5612388239765985, "grad_norm": 0.8918737769126892, "learning_rate": 1.2761870818901755e-05, "loss": 0.1072, "step": 25470 }, { "epoch": 0.5612608592661147, "grad_norm": 0.6744638085365295, "learning_rate": 1.2760812273234005e-05, "loss": 0.0664, "step": 25471 }, { "epoch": 0.5612828945556309, "grad_norm": 0.7078176140785217, "learning_rate": 1.2759753738971632e-05, "loss": 0.0746, "step": 25472 }, { "epoch": 0.561304929845147, "grad_norm": 0.7473446130752563, "learning_rate": 1.2758695216120041e-05, "loss": 0.0661, "step": 25473 }, { "epoch": 0.5613269651346632, "grad_norm": 0.5358117818832397, "learning_rate": 1.2757636704684618e-05, "loss": 0.0651, "step": 25474 }, { "epoch": 0.5613490004241793, "grad_norm": 0.6775948405265808, "learning_rate": 1.2756578204670754e-05, "loss": 0.0854, "step": 25475 }, { "epoch": 0.5613710357136955, "grad_norm": 0.45979079604148865, "learning_rate": 1.2755519716083834e-05, "loss": 0.0651, "step": 25476 }, { "epoch": 0.5613930710032117, "grad_norm": 1.1796177625656128, "learning_rate": 1.2754461238929261e-05, "loss": 0.097, "step": 25477 }, { "epoch": 0.5614151062927278, "grad_norm": 0.8350903987884521, "learning_rate": 1.2753402773212423e-05, "loss": 0.0835, "step": 25478 }, { "epoch": 0.561437141582244, "grad_norm": 0.7344267964363098, "learning_rate": 1.2752344318938706e-05, "loss": 0.0696, "step": 25479 }, { "epoch": 0.5614591768717602, "grad_norm": 0.6534149646759033, "learning_rate": 1.27512858761135e-05, "loss": 0.0785, "step": 25480 }, { "epoch": 0.5614812121612763, "grad_norm": 0.7958632111549377, "learning_rate": 1.2750227444742207e-05, "loss": 0.107, "step": 25481 }, { "epoch": 0.5615032474507925, "grad_norm": 0.5807988047599792, "learning_rate": 1.274916902483021e-05, "loss": 0.0524, "step": 25482 }, { "epoch": 0.5615252827403086, "grad_norm": 0.3956823945045471, "learning_rate": 1.2748110616382902e-05, "loss": 0.0714, "step": 25483 }, { "epoch": 0.5615473180298247, "grad_norm": 0.9116837978363037, "learning_rate": 1.2747052219405676e-05, "loss": 0.0591, "step": 25484 }, { "epoch": 0.5615693533193409, "grad_norm": 0.7529205083847046, "learning_rate": 1.2745993833903922e-05, "loss": 0.0816, "step": 25485 }, { "epoch": 0.561591388608857, "grad_norm": 0.8127534985542297, "learning_rate": 1.274493545988303e-05, "loss": 0.0715, "step": 25486 }, { "epoch": 0.5616134238983732, "grad_norm": 0.8264039754867554, "learning_rate": 1.2743877097348382e-05, "loss": 0.0679, "step": 25487 }, { "epoch": 0.5616354591878894, "grad_norm": 0.7121152281761169, "learning_rate": 1.2742818746305389e-05, "loss": 0.08, "step": 25488 }, { "epoch": 0.5616574944774055, "grad_norm": 0.5639092922210693, "learning_rate": 1.2741760406759427e-05, "loss": 0.0586, "step": 25489 }, { "epoch": 0.5616795297669217, "grad_norm": 1.1030750274658203, "learning_rate": 1.274070207871589e-05, "loss": 0.1245, "step": 25490 }, { "epoch": 0.5617015650564379, "grad_norm": 0.2906392216682434, "learning_rate": 1.2739643762180168e-05, "loss": 0.0299, "step": 25491 }, { "epoch": 0.561723600345954, "grad_norm": 0.19429507851600647, "learning_rate": 1.2738585457157654e-05, "loss": 0.0354, "step": 25492 }, { "epoch": 0.5617456356354702, "grad_norm": 1.0979571342468262, "learning_rate": 1.2737527163653738e-05, "loss": 0.0734, "step": 25493 }, { "epoch": 0.5617676709249864, "grad_norm": 0.6224619746208191, "learning_rate": 1.2736468881673808e-05, "loss": 0.0646, "step": 25494 }, { "epoch": 0.5617897062145025, "grad_norm": 0.5808826088905334, "learning_rate": 1.273541061122325e-05, "loss": 0.0788, "step": 25495 }, { "epoch": 0.5618117415040187, "grad_norm": 0.4296247363090515, "learning_rate": 1.2734352352307466e-05, "loss": 0.0898, "step": 25496 }, { "epoch": 0.5618337767935349, "grad_norm": 0.7030819058418274, "learning_rate": 1.2733294104931842e-05, "loss": 0.0805, "step": 25497 }, { "epoch": 0.561855812083051, "grad_norm": 0.5997333526611328, "learning_rate": 1.2732235869101766e-05, "loss": 0.0866, "step": 25498 }, { "epoch": 0.5618778473725672, "grad_norm": 0.6394799947738647, "learning_rate": 1.2731177644822625e-05, "loss": 0.0708, "step": 25499 }, { "epoch": 0.5618998826620833, "grad_norm": 0.6332237124443054, "learning_rate": 1.2730119432099815e-05, "loss": 0.0997, "step": 25500 }, { "epoch": 0.5619219179515995, "grad_norm": 0.8584142923355103, "learning_rate": 1.2729061230938725e-05, "loss": 0.0755, "step": 25501 }, { "epoch": 0.5619439532411157, "grad_norm": 0.5768314599990845, "learning_rate": 1.2728003041344744e-05, "loss": 0.0698, "step": 25502 }, { "epoch": 0.5619659885306318, "grad_norm": 0.7961972951889038, "learning_rate": 1.2726944863323256e-05, "loss": 0.0865, "step": 25503 }, { "epoch": 0.561988023820148, "grad_norm": 0.686327338218689, "learning_rate": 1.2725886696879664e-05, "loss": 0.0806, "step": 25504 }, { "epoch": 0.5620100591096642, "grad_norm": 1.003955364227295, "learning_rate": 1.2724828542019348e-05, "loss": 0.0617, "step": 25505 }, { "epoch": 0.5620320943991803, "grad_norm": 0.786334753036499, "learning_rate": 1.2723770398747698e-05, "loss": 0.0666, "step": 25506 }, { "epoch": 0.5620541296886965, "grad_norm": 1.1219784021377563, "learning_rate": 1.2722712267070109e-05, "loss": 0.1335, "step": 25507 }, { "epoch": 0.5620761649782126, "grad_norm": 0.5325019359588623, "learning_rate": 1.272165414699197e-05, "loss": 0.0654, "step": 25508 }, { "epoch": 0.5620982002677287, "grad_norm": 0.5583374500274658, "learning_rate": 1.2720596038518666e-05, "loss": 0.0813, "step": 25509 }, { "epoch": 0.5621202355572449, "grad_norm": 0.8379487991333008, "learning_rate": 1.2719537941655583e-05, "loss": 0.0769, "step": 25510 }, { "epoch": 0.562142270846761, "grad_norm": 0.6904321908950806, "learning_rate": 1.2718479856408121e-05, "loss": 0.097, "step": 25511 }, { "epoch": 0.5621643061362772, "grad_norm": 0.6777400374412537, "learning_rate": 1.2717421782781665e-05, "loss": 0.0755, "step": 25512 }, { "epoch": 0.5621863414257934, "grad_norm": 0.7006330490112305, "learning_rate": 1.2716363720781605e-05, "loss": 0.0603, "step": 25513 }, { "epoch": 0.5622083767153095, "grad_norm": 0.46941402554512024, "learning_rate": 1.2715305670413325e-05, "loss": 0.0712, "step": 25514 }, { "epoch": 0.5622304120048257, "grad_norm": 0.5666841268539429, "learning_rate": 1.2714247631682221e-05, "loss": 0.073, "step": 25515 }, { "epoch": 0.5622524472943419, "grad_norm": 0.6125743389129639, "learning_rate": 1.2713189604593682e-05, "loss": 0.0589, "step": 25516 }, { "epoch": 0.562274482583858, "grad_norm": 0.5922223925590515, "learning_rate": 1.2712131589153094e-05, "loss": 0.061, "step": 25517 }, { "epoch": 0.5622965178733742, "grad_norm": 0.47044122219085693, "learning_rate": 1.2711073585365839e-05, "loss": 0.043, "step": 25518 }, { "epoch": 0.5623185531628904, "grad_norm": 0.5226110816001892, "learning_rate": 1.271001559323732e-05, "loss": 0.0583, "step": 25519 }, { "epoch": 0.5623405884524065, "grad_norm": 0.6398178339004517, "learning_rate": 1.270895761277292e-05, "loss": 0.0648, "step": 25520 }, { "epoch": 0.5623626237419227, "grad_norm": 0.7278332710266113, "learning_rate": 1.2707899643978028e-05, "loss": 0.0696, "step": 25521 }, { "epoch": 0.5623846590314389, "grad_norm": 0.6996554136276245, "learning_rate": 1.2706841686858026e-05, "loss": 0.0897, "step": 25522 }, { "epoch": 0.562406694320955, "grad_norm": 0.4514227509498596, "learning_rate": 1.2705783741418318e-05, "loss": 0.0595, "step": 25523 }, { "epoch": 0.5624287296104712, "grad_norm": 0.7688880562782288, "learning_rate": 1.2704725807664278e-05, "loss": 0.0665, "step": 25524 }, { "epoch": 0.5624507648999874, "grad_norm": 0.7575300335884094, "learning_rate": 1.2703667885601298e-05, "loss": 0.0606, "step": 25525 }, { "epoch": 0.5624728001895035, "grad_norm": 0.5335964560508728, "learning_rate": 1.2702609975234773e-05, "loss": 0.0596, "step": 25526 }, { "epoch": 0.5624948354790197, "grad_norm": 0.5346253514289856, "learning_rate": 1.2701552076570089e-05, "loss": 0.0803, "step": 25527 }, { "epoch": 0.5625168707685358, "grad_norm": 0.7427178621292114, "learning_rate": 1.2700494189612632e-05, "loss": 0.1103, "step": 25528 }, { "epoch": 0.562538906058052, "grad_norm": 0.9615944623947144, "learning_rate": 1.2699436314367785e-05, "loss": 0.0574, "step": 25529 }, { "epoch": 0.5625609413475682, "grad_norm": 0.3597258925437927, "learning_rate": 1.269837845084095e-05, "loss": 0.0951, "step": 25530 }, { "epoch": 0.5625829766370843, "grad_norm": 0.7291692495346069, "learning_rate": 1.2697320599037505e-05, "loss": 0.0808, "step": 25531 }, { "epoch": 0.5626050119266004, "grad_norm": 0.7590207457542419, "learning_rate": 1.2696262758962842e-05, "loss": 0.0757, "step": 25532 }, { "epoch": 0.5626270472161166, "grad_norm": 0.5667076110839844, "learning_rate": 1.2695204930622342e-05, "loss": 0.077, "step": 25533 }, { "epoch": 0.5626490825056327, "grad_norm": 0.7603086233139038, "learning_rate": 1.2694147114021407e-05, "loss": 0.1088, "step": 25534 }, { "epoch": 0.5626711177951489, "grad_norm": 0.9923566579818726, "learning_rate": 1.2693089309165415e-05, "loss": 0.0918, "step": 25535 }, { "epoch": 0.562693153084665, "grad_norm": 0.45538681745529175, "learning_rate": 1.2692031516059756e-05, "loss": 0.0448, "step": 25536 }, { "epoch": 0.5627151883741812, "grad_norm": 0.6715475916862488, "learning_rate": 1.2690973734709816e-05, "loss": 0.0724, "step": 25537 }, { "epoch": 0.5627372236636974, "grad_norm": 0.5655730962753296, "learning_rate": 1.2689915965120988e-05, "loss": 0.0552, "step": 25538 }, { "epoch": 0.5627592589532135, "grad_norm": 0.670478105545044, "learning_rate": 1.2688858207298657e-05, "loss": 0.0658, "step": 25539 }, { "epoch": 0.5627812942427297, "grad_norm": 0.6036394834518433, "learning_rate": 1.2687800461248211e-05, "loss": 0.078, "step": 25540 }, { "epoch": 0.5628033295322459, "grad_norm": 0.8406614065170288, "learning_rate": 1.2686742726975029e-05, "loss": 0.0614, "step": 25541 }, { "epoch": 0.562825364821762, "grad_norm": 0.7001796960830688, "learning_rate": 1.2685685004484514e-05, "loss": 0.0829, "step": 25542 }, { "epoch": 0.5628474001112782, "grad_norm": 0.6464998722076416, "learning_rate": 1.2684627293782047e-05, "loss": 0.0524, "step": 25543 }, { "epoch": 0.5628694354007944, "grad_norm": 0.31821972131729126, "learning_rate": 1.2683569594873013e-05, "loss": 0.0461, "step": 25544 }, { "epoch": 0.5628914706903105, "grad_norm": 0.5935406684875488, "learning_rate": 1.26825119077628e-05, "loss": 0.0533, "step": 25545 }, { "epoch": 0.5629135059798267, "grad_norm": 0.777851939201355, "learning_rate": 1.26814542324568e-05, "loss": 0.0614, "step": 25546 }, { "epoch": 0.5629355412693429, "grad_norm": 0.8395606875419617, "learning_rate": 1.2680396568960396e-05, "loss": 0.061, "step": 25547 }, { "epoch": 0.562957576558859, "grad_norm": 0.44654425978660583, "learning_rate": 1.267933891727897e-05, "loss": 0.0709, "step": 25548 }, { "epoch": 0.5629796118483752, "grad_norm": 0.2661425769329071, "learning_rate": 1.267828127741792e-05, "loss": 0.0688, "step": 25549 }, { "epoch": 0.5630016471378914, "grad_norm": 0.4294235408306122, "learning_rate": 1.267722364938263e-05, "loss": 0.058, "step": 25550 }, { "epoch": 0.5630236824274075, "grad_norm": 0.702172040939331, "learning_rate": 1.2676166033178485e-05, "loss": 0.0928, "step": 25551 }, { "epoch": 0.5630457177169237, "grad_norm": 1.0413322448730469, "learning_rate": 1.2675108428810867e-05, "loss": 0.101, "step": 25552 }, { "epoch": 0.5630677530064399, "grad_norm": 1.1312037706375122, "learning_rate": 1.2674050836285174e-05, "loss": 0.0824, "step": 25553 }, { "epoch": 0.563089788295956, "grad_norm": 0.45071905851364136, "learning_rate": 1.267299325560679e-05, "loss": 0.0337, "step": 25554 }, { "epoch": 0.5631118235854722, "grad_norm": 0.9086214303970337, "learning_rate": 1.26719356867811e-05, "loss": 0.1011, "step": 25555 }, { "epoch": 0.5631338588749883, "grad_norm": 0.6655793786048889, "learning_rate": 1.267087812981348e-05, "loss": 0.0711, "step": 25556 }, { "epoch": 0.5631558941645044, "grad_norm": 0.8416358232498169, "learning_rate": 1.2669820584709334e-05, "loss": 0.0594, "step": 25557 }, { "epoch": 0.5631779294540206, "grad_norm": 0.6067999601364136, "learning_rate": 1.266876305147404e-05, "loss": 0.0948, "step": 25558 }, { "epoch": 0.5631999647435367, "grad_norm": 1.0898847579956055, "learning_rate": 1.2667705530112986e-05, "loss": 0.0857, "step": 25559 }, { "epoch": 0.5632220000330529, "grad_norm": 0.603119432926178, "learning_rate": 1.2666648020631552e-05, "loss": 0.0835, "step": 25560 }, { "epoch": 0.5632440353225691, "grad_norm": 0.8316744565963745, "learning_rate": 1.2665590523035139e-05, "loss": 0.0685, "step": 25561 }, { "epoch": 0.5632660706120852, "grad_norm": 0.3442592918872833, "learning_rate": 1.2664533037329126e-05, "loss": 0.0945, "step": 25562 }, { "epoch": 0.5632881059016014, "grad_norm": 0.5093992352485657, "learning_rate": 1.2663475563518895e-05, "loss": 0.0736, "step": 25563 }, { "epoch": 0.5633101411911176, "grad_norm": 0.40133586525917053, "learning_rate": 1.2662418101609835e-05, "loss": 0.0495, "step": 25564 }, { "epoch": 0.5633321764806337, "grad_norm": 0.753551721572876, "learning_rate": 1.2661360651607336e-05, "loss": 0.0686, "step": 25565 }, { "epoch": 0.5633542117701499, "grad_norm": 0.5949391722679138, "learning_rate": 1.2660303213516782e-05, "loss": 0.0564, "step": 25566 }, { "epoch": 0.563376247059666, "grad_norm": 0.5883041620254517, "learning_rate": 1.2659245787343556e-05, "loss": 0.0897, "step": 25567 }, { "epoch": 0.5633982823491822, "grad_norm": 0.418729692697525, "learning_rate": 1.2658188373093042e-05, "loss": 0.0609, "step": 25568 }, { "epoch": 0.5634203176386984, "grad_norm": 0.8727316856384277, "learning_rate": 1.2657130970770637e-05, "loss": 0.0974, "step": 25569 }, { "epoch": 0.5634423529282145, "grad_norm": 0.522821843624115, "learning_rate": 1.2656073580381719e-05, "loss": 0.0616, "step": 25570 }, { "epoch": 0.5634643882177307, "grad_norm": 0.8304264545440674, "learning_rate": 1.2655016201931674e-05, "loss": 0.0963, "step": 25571 }, { "epoch": 0.5634864235072469, "grad_norm": 0.2917437255382538, "learning_rate": 1.2653958835425892e-05, "loss": 0.0663, "step": 25572 }, { "epoch": 0.563508458796763, "grad_norm": 0.5703535676002502, "learning_rate": 1.2652901480869754e-05, "loss": 0.0426, "step": 25573 }, { "epoch": 0.5635304940862792, "grad_norm": 0.6326654553413391, "learning_rate": 1.2651844138268647e-05, "loss": 0.0699, "step": 25574 }, { "epoch": 0.5635525293757954, "grad_norm": 0.6455742716789246, "learning_rate": 1.2650786807627952e-05, "loss": 0.0737, "step": 25575 }, { "epoch": 0.5635745646653115, "grad_norm": 0.44083157181739807, "learning_rate": 1.2649729488953065e-05, "loss": 0.0704, "step": 25576 }, { "epoch": 0.5635965999548277, "grad_norm": 0.6216471791267395, "learning_rate": 1.2648672182249367e-05, "loss": 0.076, "step": 25577 }, { "epoch": 0.5636186352443439, "grad_norm": 0.44143617153167725, "learning_rate": 1.2647614887522243e-05, "loss": 0.0576, "step": 25578 }, { "epoch": 0.56364067053386, "grad_norm": 0.732607901096344, "learning_rate": 1.2646557604777072e-05, "loss": 0.0944, "step": 25579 }, { "epoch": 0.5636627058233762, "grad_norm": 0.4295370578765869, "learning_rate": 1.264550033401925e-05, "loss": 0.0513, "step": 25580 }, { "epoch": 0.5636847411128924, "grad_norm": 0.6698889136314392, "learning_rate": 1.2644443075254157e-05, "loss": 0.0724, "step": 25581 }, { "epoch": 0.5637067764024084, "grad_norm": 0.7280069589614868, "learning_rate": 1.264338582848718e-05, "loss": 0.0981, "step": 25582 }, { "epoch": 0.5637288116919246, "grad_norm": 0.7630650401115417, "learning_rate": 1.2642328593723695e-05, "loss": 0.074, "step": 25583 }, { "epoch": 0.5637508469814407, "grad_norm": 0.5496076345443726, "learning_rate": 1.2641271370969102e-05, "loss": 0.0708, "step": 25584 }, { "epoch": 0.5637728822709569, "grad_norm": 0.6288250088691711, "learning_rate": 1.2640214160228779e-05, "loss": 0.0792, "step": 25585 }, { "epoch": 0.5637949175604731, "grad_norm": 0.5919107794761658, "learning_rate": 1.2639156961508109e-05, "loss": 0.0668, "step": 25586 }, { "epoch": 0.5638169528499892, "grad_norm": 0.7487852573394775, "learning_rate": 1.2638099774812476e-05, "loss": 0.0646, "step": 25587 }, { "epoch": 0.5638389881395054, "grad_norm": 0.30611565709114075, "learning_rate": 1.2637042600147271e-05, "loss": 0.0611, "step": 25588 }, { "epoch": 0.5638610234290216, "grad_norm": 0.46876561641693115, "learning_rate": 1.2635985437517874e-05, "loss": 0.0856, "step": 25589 }, { "epoch": 0.5638830587185377, "grad_norm": 0.6460350751876831, "learning_rate": 1.2634928286929671e-05, "loss": 0.0665, "step": 25590 }, { "epoch": 0.5639050940080539, "grad_norm": 0.6065577268600464, "learning_rate": 1.2633871148388041e-05, "loss": 0.0932, "step": 25591 }, { "epoch": 0.56392712929757, "grad_norm": 0.6433233618736267, "learning_rate": 1.263281402189838e-05, "loss": 0.0997, "step": 25592 }, { "epoch": 0.5639491645870862, "grad_norm": 0.8472545146942139, "learning_rate": 1.2631756907466068e-05, "loss": 0.1062, "step": 25593 }, { "epoch": 0.5639711998766024, "grad_norm": 0.437208354473114, "learning_rate": 1.2630699805096483e-05, "loss": 0.0481, "step": 25594 }, { "epoch": 0.5639932351661185, "grad_norm": 0.899448037147522, "learning_rate": 1.2629642714795019e-05, "loss": 0.1012, "step": 25595 }, { "epoch": 0.5640152704556347, "grad_norm": 0.38168850541114807, "learning_rate": 1.2628585636567054e-05, "loss": 0.0934, "step": 25596 }, { "epoch": 0.5640373057451509, "grad_norm": 0.5495467185974121, "learning_rate": 1.2627528570417976e-05, "loss": 0.0853, "step": 25597 }, { "epoch": 0.564059341034667, "grad_norm": 0.9057664275169373, "learning_rate": 1.2626471516353159e-05, "loss": 0.0796, "step": 25598 }, { "epoch": 0.5640813763241832, "grad_norm": 0.651181697845459, "learning_rate": 1.2625414474378005e-05, "loss": 0.0656, "step": 25599 }, { "epoch": 0.5641034116136994, "grad_norm": 0.6648352146148682, "learning_rate": 1.2624357444497884e-05, "loss": 0.0922, "step": 25600 }, { "epoch": 0.5641254469032155, "grad_norm": 0.962149977684021, "learning_rate": 1.2623300426718187e-05, "loss": 0.0571, "step": 25601 }, { "epoch": 0.5641474821927317, "grad_norm": 0.46430301666259766, "learning_rate": 1.2622243421044293e-05, "loss": 0.0967, "step": 25602 }, { "epoch": 0.5641695174822479, "grad_norm": 0.9394530057907104, "learning_rate": 1.2621186427481588e-05, "loss": 0.1239, "step": 25603 }, { "epoch": 0.564191552771764, "grad_norm": 0.5519958138465881, "learning_rate": 1.262012944603546e-05, "loss": 0.0802, "step": 25604 }, { "epoch": 0.5642135880612802, "grad_norm": 0.5821313858032227, "learning_rate": 1.2619072476711287e-05, "loss": 0.0609, "step": 25605 }, { "epoch": 0.5642356233507962, "grad_norm": 0.7201380729675293, "learning_rate": 1.2618015519514447e-05, "loss": 0.0792, "step": 25606 }, { "epoch": 0.5642576586403124, "grad_norm": 0.6436300277709961, "learning_rate": 1.2616958574450342e-05, "loss": 0.0519, "step": 25607 }, { "epoch": 0.5642796939298286, "grad_norm": 0.7965801954269409, "learning_rate": 1.261590164152434e-05, "loss": 0.0975, "step": 25608 }, { "epoch": 0.5643017292193447, "grad_norm": 0.4462054371833801, "learning_rate": 1.2614844720741831e-05, "loss": 0.099, "step": 25609 }, { "epoch": 0.5643237645088609, "grad_norm": 0.6530981659889221, "learning_rate": 1.2613787812108194e-05, "loss": 0.055, "step": 25610 }, { "epoch": 0.5643457997983771, "grad_norm": 0.5015873312950134, "learning_rate": 1.261273091562882e-05, "loss": 0.0487, "step": 25611 }, { "epoch": 0.5643678350878932, "grad_norm": 0.636996865272522, "learning_rate": 1.2611674031309086e-05, "loss": 0.0908, "step": 25612 }, { "epoch": 0.5643898703774094, "grad_norm": 0.9142486453056335, "learning_rate": 1.2610617159154376e-05, "loss": 0.0696, "step": 25613 }, { "epoch": 0.5644119056669256, "grad_norm": 0.7884396910667419, "learning_rate": 1.260956029917007e-05, "loss": 0.0672, "step": 25614 }, { "epoch": 0.5644339409564417, "grad_norm": 0.6176270842552185, "learning_rate": 1.260850345136156e-05, "loss": 0.0884, "step": 25615 }, { "epoch": 0.5644559762459579, "grad_norm": 0.8598705530166626, "learning_rate": 1.2607446615734226e-05, "loss": 0.0925, "step": 25616 }, { "epoch": 0.5644780115354741, "grad_norm": 1.0110448598861694, "learning_rate": 1.2606389792293443e-05, "loss": 0.1001, "step": 25617 }, { "epoch": 0.5645000468249902, "grad_norm": 0.4098454415798187, "learning_rate": 1.2605332981044607e-05, "loss": 0.0496, "step": 25618 }, { "epoch": 0.5645220821145064, "grad_norm": 0.9679020047187805, "learning_rate": 1.2604276181993092e-05, "loss": 0.0964, "step": 25619 }, { "epoch": 0.5645441174040225, "grad_norm": 0.7252162098884583, "learning_rate": 1.2603219395144285e-05, "loss": 0.0572, "step": 25620 }, { "epoch": 0.5645661526935387, "grad_norm": 0.6265355348587036, "learning_rate": 1.2602162620503557e-05, "loss": 0.0683, "step": 25621 }, { "epoch": 0.5645881879830549, "grad_norm": 0.7090652585029602, "learning_rate": 1.2601105858076311e-05, "loss": 0.0666, "step": 25622 }, { "epoch": 0.564610223272571, "grad_norm": 0.44399601221084595, "learning_rate": 1.2600049107867917e-05, "loss": 0.0731, "step": 25623 }, { "epoch": 0.5646322585620872, "grad_norm": 0.49132946133613586, "learning_rate": 1.2598992369883761e-05, "loss": 0.0814, "step": 25624 }, { "epoch": 0.5646542938516034, "grad_norm": 0.5184337496757507, "learning_rate": 1.2597935644129221e-05, "loss": 0.061, "step": 25625 }, { "epoch": 0.5646763291411195, "grad_norm": 0.9874512553215027, "learning_rate": 1.2596878930609686e-05, "loss": 0.0639, "step": 25626 }, { "epoch": 0.5646983644306357, "grad_norm": 0.5708844661712646, "learning_rate": 1.2595822229330534e-05, "loss": 0.0503, "step": 25627 }, { "epoch": 0.5647203997201519, "grad_norm": 0.8427778482437134, "learning_rate": 1.259476554029715e-05, "loss": 0.0766, "step": 25628 }, { "epoch": 0.564742435009668, "grad_norm": 0.7406195998191833, "learning_rate": 1.2593708863514907e-05, "loss": 0.0736, "step": 25629 }, { "epoch": 0.5647644702991842, "grad_norm": 0.7338910102844238, "learning_rate": 1.2592652198989204e-05, "loss": 0.0658, "step": 25630 }, { "epoch": 0.5647865055887002, "grad_norm": 0.44329583644866943, "learning_rate": 1.2591595546725413e-05, "loss": 0.0733, "step": 25631 }, { "epoch": 0.5648085408782164, "grad_norm": 0.4869813024997711, "learning_rate": 1.2590538906728915e-05, "loss": 0.0645, "step": 25632 }, { "epoch": 0.5648305761677326, "grad_norm": 0.6757150292396545, "learning_rate": 1.2589482279005095e-05, "loss": 0.0933, "step": 25633 }, { "epoch": 0.5648526114572487, "grad_norm": 0.4829995930194855, "learning_rate": 1.2588425663559336e-05, "loss": 0.0594, "step": 25634 }, { "epoch": 0.5648746467467649, "grad_norm": 0.6888736486434937, "learning_rate": 1.2587369060397018e-05, "loss": 0.0915, "step": 25635 }, { "epoch": 0.5648966820362811, "grad_norm": 0.3842369019985199, "learning_rate": 1.2586312469523522e-05, "loss": 0.0631, "step": 25636 }, { "epoch": 0.5649187173257972, "grad_norm": 0.710904061794281, "learning_rate": 1.2585255890944228e-05, "loss": 0.0592, "step": 25637 }, { "epoch": 0.5649407526153134, "grad_norm": 0.4341734051704407, "learning_rate": 1.2584199324664523e-05, "loss": 0.0625, "step": 25638 }, { "epoch": 0.5649627879048296, "grad_norm": 0.6573588252067566, "learning_rate": 1.258314277068979e-05, "loss": 0.0852, "step": 25639 }, { "epoch": 0.5649848231943457, "grad_norm": 0.5831369757652283, "learning_rate": 1.2582086229025401e-05, "loss": 0.0712, "step": 25640 }, { "epoch": 0.5650068584838619, "grad_norm": 1.0128103494644165, "learning_rate": 1.2581029699676748e-05, "loss": 0.0956, "step": 25641 }, { "epoch": 0.5650288937733781, "grad_norm": 0.25356629490852356, "learning_rate": 1.2579973182649207e-05, "loss": 0.0652, "step": 25642 }, { "epoch": 0.5650509290628942, "grad_norm": 0.6129980683326721, "learning_rate": 1.257891667794816e-05, "loss": 0.0786, "step": 25643 }, { "epoch": 0.5650729643524104, "grad_norm": 0.6778916716575623, "learning_rate": 1.2577860185578984e-05, "loss": 0.0852, "step": 25644 }, { "epoch": 0.5650949996419266, "grad_norm": 0.8265918493270874, "learning_rate": 1.257680370554707e-05, "loss": 0.0523, "step": 25645 }, { "epoch": 0.5651170349314427, "grad_norm": 0.6545138955116272, "learning_rate": 1.2575747237857794e-05, "loss": 0.0746, "step": 25646 }, { "epoch": 0.5651390702209589, "grad_norm": 0.4682580530643463, "learning_rate": 1.257469078251654e-05, "loss": 0.0515, "step": 25647 }, { "epoch": 0.565161105510475, "grad_norm": 0.9753608703613281, "learning_rate": 1.2573634339528679e-05, "loss": 0.1254, "step": 25648 }, { "epoch": 0.5651831407999912, "grad_norm": 0.4175458550453186, "learning_rate": 1.2572577908899605e-05, "loss": 0.0387, "step": 25649 }, { "epoch": 0.5652051760895074, "grad_norm": 0.5732365846633911, "learning_rate": 1.2571521490634692e-05, "loss": 0.065, "step": 25650 }, { "epoch": 0.5652272113790235, "grad_norm": 0.46677589416503906, "learning_rate": 1.2570465084739324e-05, "loss": 0.0548, "step": 25651 }, { "epoch": 0.5652492466685397, "grad_norm": 0.577475368976593, "learning_rate": 1.2569408691218872e-05, "loss": 0.067, "step": 25652 }, { "epoch": 0.5652712819580559, "grad_norm": 0.8380777835845947, "learning_rate": 1.2568352310078733e-05, "loss": 0.0764, "step": 25653 }, { "epoch": 0.565293317247572, "grad_norm": 0.43004313111305237, "learning_rate": 1.256729594132428e-05, "loss": 0.0601, "step": 25654 }, { "epoch": 0.5653153525370882, "grad_norm": 0.39896297454833984, "learning_rate": 1.2566239584960892e-05, "loss": 0.0965, "step": 25655 }, { "epoch": 0.5653373878266043, "grad_norm": 0.5977529287338257, "learning_rate": 1.2565183240993943e-05, "loss": 0.0926, "step": 25656 }, { "epoch": 0.5653594231161204, "grad_norm": 0.5346905589103699, "learning_rate": 1.2564126909428835e-05, "loss": 0.0524, "step": 25657 }, { "epoch": 0.5653814584056366, "grad_norm": 0.6770470142364502, "learning_rate": 1.256307059027093e-05, "loss": 0.0935, "step": 25658 }, { "epoch": 0.5654034936951527, "grad_norm": 0.4438348710536957, "learning_rate": 1.2562014283525605e-05, "loss": 0.0661, "step": 25659 }, { "epoch": 0.5654255289846689, "grad_norm": 0.7278760075569153, "learning_rate": 1.2560957989198257e-05, "loss": 0.0562, "step": 25660 }, { "epoch": 0.5654475642741851, "grad_norm": 0.5454471707344055, "learning_rate": 1.2559901707294257e-05, "loss": 0.0686, "step": 25661 }, { "epoch": 0.5654695995637012, "grad_norm": 0.8870812058448792, "learning_rate": 1.2558845437818985e-05, "loss": 0.0763, "step": 25662 }, { "epoch": 0.5654916348532174, "grad_norm": 0.6105453968048096, "learning_rate": 1.2557789180777817e-05, "loss": 0.0577, "step": 25663 }, { "epoch": 0.5655136701427336, "grad_norm": 0.9153743982315063, "learning_rate": 1.2556732936176145e-05, "loss": 0.0835, "step": 25664 }, { "epoch": 0.5655357054322497, "grad_norm": 0.6061738729476929, "learning_rate": 1.2555676704019344e-05, "loss": 0.0731, "step": 25665 }, { "epoch": 0.5655577407217659, "grad_norm": 0.6832625269889832, "learning_rate": 1.255462048431279e-05, "loss": 0.0575, "step": 25666 }, { "epoch": 0.5655797760112821, "grad_norm": 0.9252937436103821, "learning_rate": 1.255356427706186e-05, "loss": 0.0779, "step": 25667 }, { "epoch": 0.5656018113007982, "grad_norm": 0.49632176756858826, "learning_rate": 1.2552508082271947e-05, "loss": 0.0683, "step": 25668 }, { "epoch": 0.5656238465903144, "grad_norm": 0.5617344379425049, "learning_rate": 1.255145189994842e-05, "loss": 0.0714, "step": 25669 }, { "epoch": 0.5656458818798306, "grad_norm": 0.5267910361289978, "learning_rate": 1.2550395730096661e-05, "loss": 0.0376, "step": 25670 }, { "epoch": 0.5656679171693467, "grad_norm": 0.6323957443237305, "learning_rate": 1.2549339572722044e-05, "loss": 0.0647, "step": 25671 }, { "epoch": 0.5656899524588629, "grad_norm": 0.5273299813270569, "learning_rate": 1.2548283427829963e-05, "loss": 0.0635, "step": 25672 }, { "epoch": 0.565711987748379, "grad_norm": 0.8322951793670654, "learning_rate": 1.2547227295425788e-05, "loss": 0.089, "step": 25673 }, { "epoch": 0.5657340230378952, "grad_norm": 0.48221656680107117, "learning_rate": 1.25461711755149e-05, "loss": 0.0611, "step": 25674 }, { "epoch": 0.5657560583274114, "grad_norm": 0.7504016160964966, "learning_rate": 1.2545115068102674e-05, "loss": 0.1064, "step": 25675 }, { "epoch": 0.5657780936169275, "grad_norm": 0.7121599316596985, "learning_rate": 1.2544058973194497e-05, "loss": 0.0634, "step": 25676 }, { "epoch": 0.5658001289064437, "grad_norm": 0.6559627056121826, "learning_rate": 1.2543002890795745e-05, "loss": 0.0502, "step": 25677 }, { "epoch": 0.5658221641959599, "grad_norm": 0.49849021434783936, "learning_rate": 1.2541946820911796e-05, "loss": 0.0693, "step": 25678 }, { "epoch": 0.565844199485476, "grad_norm": 0.8256306648254395, "learning_rate": 1.2540890763548025e-05, "loss": 0.0806, "step": 25679 }, { "epoch": 0.5658662347749922, "grad_norm": 0.77562016248703, "learning_rate": 1.253983471870982e-05, "loss": 0.0566, "step": 25680 }, { "epoch": 0.5658882700645083, "grad_norm": 0.5405616760253906, "learning_rate": 1.253877868640256e-05, "loss": 0.0587, "step": 25681 }, { "epoch": 0.5659103053540244, "grad_norm": 1.08725905418396, "learning_rate": 1.2537722666631613e-05, "loss": 0.0862, "step": 25682 }, { "epoch": 0.5659323406435406, "grad_norm": 0.6021896004676819, "learning_rate": 1.2536666659402371e-05, "loss": 0.0748, "step": 25683 }, { "epoch": 0.5659543759330568, "grad_norm": 0.7768195867538452, "learning_rate": 1.2535610664720203e-05, "loss": 0.0946, "step": 25684 }, { "epoch": 0.5659764112225729, "grad_norm": 0.451471209526062, "learning_rate": 1.2534554682590494e-05, "loss": 0.0578, "step": 25685 }, { "epoch": 0.5659984465120891, "grad_norm": 1.0627336502075195, "learning_rate": 1.2533498713018612e-05, "loss": 0.1086, "step": 25686 }, { "epoch": 0.5660204818016052, "grad_norm": 0.7344831228256226, "learning_rate": 1.2532442756009952e-05, "loss": 0.1034, "step": 25687 }, { "epoch": 0.5660425170911214, "grad_norm": 0.7764461636543274, "learning_rate": 1.2531386811569883e-05, "loss": 0.0895, "step": 25688 }, { "epoch": 0.5660645523806376, "grad_norm": 0.565115749835968, "learning_rate": 1.2530330879703785e-05, "loss": 0.0664, "step": 25689 }, { "epoch": 0.5660865876701537, "grad_norm": 0.4018576741218567, "learning_rate": 1.2529274960417034e-05, "loss": 0.0447, "step": 25690 }, { "epoch": 0.5661086229596699, "grad_norm": 0.6169477105140686, "learning_rate": 1.2528219053715013e-05, "loss": 0.0723, "step": 25691 }, { "epoch": 0.5661306582491861, "grad_norm": 0.700901210308075, "learning_rate": 1.2527163159603098e-05, "loss": 0.0515, "step": 25692 }, { "epoch": 0.5661526935387022, "grad_norm": 0.4459337890148163, "learning_rate": 1.2526107278086667e-05, "loss": 0.0707, "step": 25693 }, { "epoch": 0.5661747288282184, "grad_norm": 0.7482050061225891, "learning_rate": 1.252505140917109e-05, "loss": 0.1061, "step": 25694 }, { "epoch": 0.5661967641177346, "grad_norm": 0.4783961772918701, "learning_rate": 1.2523995552861763e-05, "loss": 0.0839, "step": 25695 }, { "epoch": 0.5662187994072507, "grad_norm": 0.9977014660835266, "learning_rate": 1.2522939709164052e-05, "loss": 0.072, "step": 25696 }, { "epoch": 0.5662408346967669, "grad_norm": 0.6166110038757324, "learning_rate": 1.2521883878083338e-05, "loss": 0.0698, "step": 25697 }, { "epoch": 0.5662628699862831, "grad_norm": 0.720798909664154, "learning_rate": 1.2520828059624996e-05, "loss": 0.0794, "step": 25698 }, { "epoch": 0.5662849052757992, "grad_norm": 0.5427330136299133, "learning_rate": 1.2519772253794407e-05, "loss": 0.0796, "step": 25699 }, { "epoch": 0.5663069405653154, "grad_norm": 0.7871620655059814, "learning_rate": 1.251871646059695e-05, "loss": 0.0837, "step": 25700 }, { "epoch": 0.5663289758548316, "grad_norm": 1.0980652570724487, "learning_rate": 1.2517660680038e-05, "loss": 0.075, "step": 25701 }, { "epoch": 0.5663510111443477, "grad_norm": 0.759757399559021, "learning_rate": 1.251660491212293e-05, "loss": 0.0848, "step": 25702 }, { "epoch": 0.5663730464338639, "grad_norm": 0.7224224805831909, "learning_rate": 1.2515549156857127e-05, "loss": 0.0534, "step": 25703 }, { "epoch": 0.56639508172338, "grad_norm": 0.8429970741271973, "learning_rate": 1.2514493414245965e-05, "loss": 0.0649, "step": 25704 }, { "epoch": 0.5664171170128961, "grad_norm": 0.5463057160377502, "learning_rate": 1.2513437684294819e-05, "loss": 0.0354, "step": 25705 }, { "epoch": 0.5664391523024123, "grad_norm": 0.7767556309700012, "learning_rate": 1.2512381967009071e-05, "loss": 0.0873, "step": 25706 }, { "epoch": 0.5664611875919284, "grad_norm": 0.7301696538925171, "learning_rate": 1.2511326262394096e-05, "loss": 0.1181, "step": 25707 }, { "epoch": 0.5664832228814446, "grad_norm": 0.6946013569831848, "learning_rate": 1.2510270570455271e-05, "loss": 0.0716, "step": 25708 }, { "epoch": 0.5665052581709608, "grad_norm": 0.4723023474216461, "learning_rate": 1.2509214891197964e-05, "loss": 0.0799, "step": 25709 }, { "epoch": 0.5665272934604769, "grad_norm": 0.6441892981529236, "learning_rate": 1.250815922462757e-05, "loss": 0.1055, "step": 25710 }, { "epoch": 0.5665493287499931, "grad_norm": 0.6134218573570251, "learning_rate": 1.2507103570749457e-05, "loss": 0.0752, "step": 25711 }, { "epoch": 0.5665713640395093, "grad_norm": 0.42840948700904846, "learning_rate": 1.2506047929569002e-05, "loss": 0.0566, "step": 25712 }, { "epoch": 0.5665933993290254, "grad_norm": 0.6637427806854248, "learning_rate": 1.2504992301091579e-05, "loss": 0.0834, "step": 25713 }, { "epoch": 0.5666154346185416, "grad_norm": 0.7300500273704529, "learning_rate": 1.2503936685322573e-05, "loss": 0.0704, "step": 25714 }, { "epoch": 0.5666374699080577, "grad_norm": 0.6869509816169739, "learning_rate": 1.2502881082267355e-05, "loss": 0.0492, "step": 25715 }, { "epoch": 0.5666595051975739, "grad_norm": 0.6365880370140076, "learning_rate": 1.2501825491931302e-05, "loss": 0.0717, "step": 25716 }, { "epoch": 0.5666815404870901, "grad_norm": 0.8226339817047119, "learning_rate": 1.2500769914319783e-05, "loss": 0.103, "step": 25717 }, { "epoch": 0.5667035757766062, "grad_norm": 0.30516287684440613, "learning_rate": 1.2499714349438194e-05, "loss": 0.0636, "step": 25718 }, { "epoch": 0.5667256110661224, "grad_norm": 0.845482587814331, "learning_rate": 1.2498658797291898e-05, "loss": 0.0727, "step": 25719 }, { "epoch": 0.5667476463556386, "grad_norm": 0.8296025395393372, "learning_rate": 1.2497603257886274e-05, "loss": 0.0727, "step": 25720 }, { "epoch": 0.5667696816451547, "grad_norm": 0.428724467754364, "learning_rate": 1.2496547731226696e-05, "loss": 0.0741, "step": 25721 }, { "epoch": 0.5667917169346709, "grad_norm": 0.5322721004486084, "learning_rate": 1.2495492217318545e-05, "loss": 0.0595, "step": 25722 }, { "epoch": 0.5668137522241871, "grad_norm": 0.7319961190223694, "learning_rate": 1.2494436716167196e-05, "loss": 0.0613, "step": 25723 }, { "epoch": 0.5668357875137032, "grad_norm": 0.3697547912597656, "learning_rate": 1.2493381227778025e-05, "loss": 0.0537, "step": 25724 }, { "epoch": 0.5668578228032194, "grad_norm": 0.39997220039367676, "learning_rate": 1.24923257521564e-05, "loss": 0.0787, "step": 25725 }, { "epoch": 0.5668798580927356, "grad_norm": 0.5170269012451172, "learning_rate": 1.2491270289307711e-05, "loss": 0.0916, "step": 25726 }, { "epoch": 0.5669018933822517, "grad_norm": 0.5364327430725098, "learning_rate": 1.2490214839237328e-05, "loss": 0.0502, "step": 25727 }, { "epoch": 0.5669239286717679, "grad_norm": 0.5347568392753601, "learning_rate": 1.2489159401950624e-05, "loss": 0.0748, "step": 25728 }, { "epoch": 0.566945963961284, "grad_norm": 0.4394490420818329, "learning_rate": 1.2488103977452982e-05, "loss": 0.0379, "step": 25729 }, { "epoch": 0.5669679992508001, "grad_norm": 0.6760155558586121, "learning_rate": 1.248704856574977e-05, "loss": 0.0658, "step": 25730 }, { "epoch": 0.5669900345403163, "grad_norm": 0.6220822334289551, "learning_rate": 1.248599316684637e-05, "loss": 0.0722, "step": 25731 }, { "epoch": 0.5670120698298324, "grad_norm": 1.0185080766677856, "learning_rate": 1.2484937780748146e-05, "loss": 0.0712, "step": 25732 }, { "epoch": 0.5670341051193486, "grad_norm": 0.7846991419792175, "learning_rate": 1.2483882407460491e-05, "loss": 0.1283, "step": 25733 }, { "epoch": 0.5670561404088648, "grad_norm": 0.3680340647697449, "learning_rate": 1.2482827046988772e-05, "loss": 0.0455, "step": 25734 }, { "epoch": 0.5670781756983809, "grad_norm": 0.6594178080558777, "learning_rate": 1.2481771699338366e-05, "loss": 0.0775, "step": 25735 }, { "epoch": 0.5671002109878971, "grad_norm": 1.0926135778427124, "learning_rate": 1.2480716364514641e-05, "loss": 0.1174, "step": 25736 }, { "epoch": 0.5671222462774133, "grad_norm": 0.7430744171142578, "learning_rate": 1.2479661042522984e-05, "loss": 0.0753, "step": 25737 }, { "epoch": 0.5671442815669294, "grad_norm": 0.3271806538105011, "learning_rate": 1.2478605733368762e-05, "loss": 0.0441, "step": 25738 }, { "epoch": 0.5671663168564456, "grad_norm": 0.662751317024231, "learning_rate": 1.2477550437057356e-05, "loss": 0.0651, "step": 25739 }, { "epoch": 0.5671883521459617, "grad_norm": 0.31140491366386414, "learning_rate": 1.247649515359413e-05, "loss": 0.0873, "step": 25740 }, { "epoch": 0.5672103874354779, "grad_norm": 0.42639651894569397, "learning_rate": 1.2475439882984474e-05, "loss": 0.0536, "step": 25741 }, { "epoch": 0.5672324227249941, "grad_norm": 0.602046549320221, "learning_rate": 1.2474384625233758e-05, "loss": 0.0853, "step": 25742 }, { "epoch": 0.5672544580145102, "grad_norm": 0.7389532923698425, "learning_rate": 1.2473329380347354e-05, "loss": 0.099, "step": 25743 }, { "epoch": 0.5672764933040264, "grad_norm": 0.30679023265838623, "learning_rate": 1.2472274148330636e-05, "loss": 0.0637, "step": 25744 }, { "epoch": 0.5672985285935426, "grad_norm": 0.4324534833431244, "learning_rate": 1.2471218929188983e-05, "loss": 0.0311, "step": 25745 }, { "epoch": 0.5673205638830587, "grad_norm": 0.5725373029708862, "learning_rate": 1.247016372292777e-05, "loss": 0.0762, "step": 25746 }, { "epoch": 0.5673425991725749, "grad_norm": 0.6289963722229004, "learning_rate": 1.246910852955237e-05, "loss": 0.0762, "step": 25747 }, { "epoch": 0.5673646344620911, "grad_norm": 1.1745740175247192, "learning_rate": 1.2468053349068149e-05, "loss": 0.0761, "step": 25748 }, { "epoch": 0.5673866697516072, "grad_norm": 0.412820041179657, "learning_rate": 1.2466998181480498e-05, "loss": 0.0748, "step": 25749 }, { "epoch": 0.5674087050411234, "grad_norm": 0.6003027558326721, "learning_rate": 1.2465943026794782e-05, "loss": 0.0722, "step": 25750 }, { "epoch": 0.5674307403306396, "grad_norm": 0.5592830181121826, "learning_rate": 1.2464887885016376e-05, "loss": 0.062, "step": 25751 }, { "epoch": 0.5674527756201557, "grad_norm": 1.0984628200531006, "learning_rate": 1.2463832756150657e-05, "loss": 0.0799, "step": 25752 }, { "epoch": 0.5674748109096719, "grad_norm": 0.6517522931098938, "learning_rate": 1.2462777640202998e-05, "loss": 0.0783, "step": 25753 }, { "epoch": 0.567496846199188, "grad_norm": 0.9136655926704407, "learning_rate": 1.2461722537178773e-05, "loss": 0.0886, "step": 25754 }, { "epoch": 0.5675188814887041, "grad_norm": 0.3360084891319275, "learning_rate": 1.246066744708335e-05, "loss": 0.0886, "step": 25755 }, { "epoch": 0.5675409167782203, "grad_norm": 0.5746903419494629, "learning_rate": 1.2459612369922115e-05, "loss": 0.0877, "step": 25756 }, { "epoch": 0.5675629520677364, "grad_norm": 0.737556517124176, "learning_rate": 1.2458557305700439e-05, "loss": 0.0637, "step": 25757 }, { "epoch": 0.5675849873572526, "grad_norm": 0.7324375510215759, "learning_rate": 1.2457502254423692e-05, "loss": 0.0932, "step": 25758 }, { "epoch": 0.5676070226467688, "grad_norm": 0.5614328980445862, "learning_rate": 1.2456447216097245e-05, "loss": 0.0736, "step": 25759 }, { "epoch": 0.5676290579362849, "grad_norm": 0.7291386723518372, "learning_rate": 1.245539219072648e-05, "loss": 0.0873, "step": 25760 }, { "epoch": 0.5676510932258011, "grad_norm": 0.6193265914916992, "learning_rate": 1.2454337178316768e-05, "loss": 0.057, "step": 25761 }, { "epoch": 0.5676731285153173, "grad_norm": 0.7707918882369995, "learning_rate": 1.2453282178873483e-05, "loss": 0.0775, "step": 25762 }, { "epoch": 0.5676951638048334, "grad_norm": 0.7239161133766174, "learning_rate": 1.2452227192401987e-05, "loss": 0.0603, "step": 25763 }, { "epoch": 0.5677171990943496, "grad_norm": 0.2997758388519287, "learning_rate": 1.2451172218907675e-05, "loss": 0.0661, "step": 25764 }, { "epoch": 0.5677392343838658, "grad_norm": 0.7052375078201294, "learning_rate": 1.2450117258395906e-05, "loss": 0.0598, "step": 25765 }, { "epoch": 0.5677612696733819, "grad_norm": 0.6344870328903198, "learning_rate": 1.2449062310872061e-05, "loss": 0.0739, "step": 25766 }, { "epoch": 0.5677833049628981, "grad_norm": 0.5690672397613525, "learning_rate": 1.24480073763415e-05, "loss": 0.0602, "step": 25767 }, { "epoch": 0.5678053402524142, "grad_norm": 0.6655257940292358, "learning_rate": 1.2446952454809616e-05, "loss": 0.0775, "step": 25768 }, { "epoch": 0.5678273755419304, "grad_norm": 0.657732367515564, "learning_rate": 1.2445897546281769e-05, "loss": 0.0754, "step": 25769 }, { "epoch": 0.5678494108314466, "grad_norm": 0.6351526379585266, "learning_rate": 1.244484265076333e-05, "loss": 0.0428, "step": 25770 }, { "epoch": 0.5678714461209627, "grad_norm": 0.8425223231315613, "learning_rate": 1.2443787768259683e-05, "loss": 0.068, "step": 25771 }, { "epoch": 0.5678934814104789, "grad_norm": 0.6689863204956055, "learning_rate": 1.2442732898776197e-05, "loss": 0.0653, "step": 25772 }, { "epoch": 0.5679155166999951, "grad_norm": 0.6279346346855164, "learning_rate": 1.2441678042318242e-05, "loss": 0.0995, "step": 25773 }, { "epoch": 0.5679375519895112, "grad_norm": 0.3830527067184448, "learning_rate": 1.2440623198891188e-05, "loss": 0.0321, "step": 25774 }, { "epoch": 0.5679595872790274, "grad_norm": 0.7433716654777527, "learning_rate": 1.243956836850042e-05, "loss": 0.0842, "step": 25775 }, { "epoch": 0.5679816225685436, "grad_norm": 0.4993344843387604, "learning_rate": 1.2438513551151298e-05, "loss": 0.0662, "step": 25776 }, { "epoch": 0.5680036578580597, "grad_norm": 0.5010764002799988, "learning_rate": 1.2437458746849205e-05, "loss": 0.0758, "step": 25777 }, { "epoch": 0.5680256931475759, "grad_norm": 0.7157150506973267, "learning_rate": 1.2436403955599503e-05, "loss": 0.077, "step": 25778 }, { "epoch": 0.568047728437092, "grad_norm": 0.6898374557495117, "learning_rate": 1.2435349177407575e-05, "loss": 0.0723, "step": 25779 }, { "epoch": 0.5680697637266081, "grad_norm": 0.615638792514801, "learning_rate": 1.243429441227879e-05, "loss": 0.047, "step": 25780 }, { "epoch": 0.5680917990161243, "grad_norm": 0.7407346367835999, "learning_rate": 1.2433239660218518e-05, "loss": 0.0807, "step": 25781 }, { "epoch": 0.5681138343056404, "grad_norm": 0.8151634335517883, "learning_rate": 1.2432184921232124e-05, "loss": 0.0679, "step": 25782 }, { "epoch": 0.5681358695951566, "grad_norm": 0.8308481574058533, "learning_rate": 1.2431130195324997e-05, "loss": 0.0807, "step": 25783 }, { "epoch": 0.5681579048846728, "grad_norm": 0.7258288264274597, "learning_rate": 1.2430075482502504e-05, "loss": 0.0526, "step": 25784 }, { "epoch": 0.5681799401741889, "grad_norm": 0.8835471272468567, "learning_rate": 1.2429020782770012e-05, "loss": 0.1059, "step": 25785 }, { "epoch": 0.5682019754637051, "grad_norm": 0.5647911429405212, "learning_rate": 1.2427966096132894e-05, "loss": 0.0533, "step": 25786 }, { "epoch": 0.5682240107532213, "grad_norm": 1.051623821258545, "learning_rate": 1.2426911422596528e-05, "loss": 0.0765, "step": 25787 }, { "epoch": 0.5682460460427374, "grad_norm": 0.8111100196838379, "learning_rate": 1.242585676216628e-05, "loss": 0.0922, "step": 25788 }, { "epoch": 0.5682680813322536, "grad_norm": 0.4770357012748718, "learning_rate": 1.2424802114847524e-05, "loss": 0.0836, "step": 25789 }, { "epoch": 0.5682901166217698, "grad_norm": 0.6190561056137085, "learning_rate": 1.2423747480645623e-05, "loss": 0.1106, "step": 25790 }, { "epoch": 0.5683121519112859, "grad_norm": 0.5993960499763489, "learning_rate": 1.2422692859565969e-05, "loss": 0.0738, "step": 25791 }, { "epoch": 0.5683341872008021, "grad_norm": 0.7871111631393433, "learning_rate": 1.2421638251613919e-05, "loss": 0.087, "step": 25792 }, { "epoch": 0.5683562224903183, "grad_norm": 0.40088194608688354, "learning_rate": 1.2420583656794846e-05, "loss": 0.0727, "step": 25793 }, { "epoch": 0.5683782577798344, "grad_norm": 0.5408536195755005, "learning_rate": 1.2419529075114127e-05, "loss": 0.0739, "step": 25794 }, { "epoch": 0.5684002930693506, "grad_norm": 0.7140477895736694, "learning_rate": 1.2418474506577131e-05, "loss": 0.0782, "step": 25795 }, { "epoch": 0.5684223283588667, "grad_norm": 0.7941488027572632, "learning_rate": 1.2417419951189227e-05, "loss": 0.0723, "step": 25796 }, { "epoch": 0.5684443636483829, "grad_norm": 0.438749760389328, "learning_rate": 1.2416365408955782e-05, "loss": 0.0507, "step": 25797 }, { "epoch": 0.5684663989378991, "grad_norm": 0.38419681787490845, "learning_rate": 1.2415310879882181e-05, "loss": 0.0731, "step": 25798 }, { "epoch": 0.5684884342274152, "grad_norm": 0.7586499452590942, "learning_rate": 1.2414256363973786e-05, "loss": 0.0774, "step": 25799 }, { "epoch": 0.5685104695169314, "grad_norm": 0.8767040967941284, "learning_rate": 1.241320186123597e-05, "loss": 0.0864, "step": 25800 }, { "epoch": 0.5685325048064476, "grad_norm": 0.603535532951355, "learning_rate": 1.2412147371674103e-05, "loss": 0.0803, "step": 25801 }, { "epoch": 0.5685545400959637, "grad_norm": 0.44631218910217285, "learning_rate": 1.2411092895293558e-05, "loss": 0.0665, "step": 25802 }, { "epoch": 0.5685765753854799, "grad_norm": 0.543836772441864, "learning_rate": 1.2410038432099706e-05, "loss": 0.078, "step": 25803 }, { "epoch": 0.568598610674996, "grad_norm": 0.7238060235977173, "learning_rate": 1.2408983982097919e-05, "loss": 0.0797, "step": 25804 }, { "epoch": 0.5686206459645121, "grad_norm": 0.5212175846099854, "learning_rate": 1.2407929545293556e-05, "loss": 0.11, "step": 25805 }, { "epoch": 0.5686426812540283, "grad_norm": 0.8742388486862183, "learning_rate": 1.2406875121692007e-05, "loss": 0.0865, "step": 25806 }, { "epoch": 0.5686647165435444, "grad_norm": 0.6045445203781128, "learning_rate": 1.2405820711298633e-05, "loss": 0.0523, "step": 25807 }, { "epoch": 0.5686867518330606, "grad_norm": 0.8305783271789551, "learning_rate": 1.2404766314118805e-05, "loss": 0.0594, "step": 25808 }, { "epoch": 0.5687087871225768, "grad_norm": 0.6634888052940369, "learning_rate": 1.2403711930157889e-05, "loss": 0.0891, "step": 25809 }, { "epoch": 0.5687308224120929, "grad_norm": 0.45206016302108765, "learning_rate": 1.2402657559421265e-05, "loss": 0.1075, "step": 25810 }, { "epoch": 0.5687528577016091, "grad_norm": 0.6629922389984131, "learning_rate": 1.24016032019143e-05, "loss": 0.0836, "step": 25811 }, { "epoch": 0.5687748929911253, "grad_norm": 0.5854262113571167, "learning_rate": 1.2400548857642362e-05, "loss": 0.0861, "step": 25812 }, { "epoch": 0.5687969282806414, "grad_norm": 0.6024172306060791, "learning_rate": 1.2399494526610814e-05, "loss": 0.0649, "step": 25813 }, { "epoch": 0.5688189635701576, "grad_norm": 0.5885273814201355, "learning_rate": 1.2398440208825046e-05, "loss": 0.0697, "step": 25814 }, { "epoch": 0.5688409988596738, "grad_norm": 0.7584085464477539, "learning_rate": 1.2397385904290414e-05, "loss": 0.0713, "step": 25815 }, { "epoch": 0.5688630341491899, "grad_norm": 0.8419628739356995, "learning_rate": 1.239633161301229e-05, "loss": 0.083, "step": 25816 }, { "epoch": 0.5688850694387061, "grad_norm": 0.7177296280860901, "learning_rate": 1.2395277334996045e-05, "loss": 0.093, "step": 25817 }, { "epoch": 0.5689071047282223, "grad_norm": 0.6161792278289795, "learning_rate": 1.2394223070247051e-05, "loss": 0.0663, "step": 25818 }, { "epoch": 0.5689291400177384, "grad_norm": 0.7480020523071289, "learning_rate": 1.2393168818770678e-05, "loss": 0.0665, "step": 25819 }, { "epoch": 0.5689511753072546, "grad_norm": 0.49156704545021057, "learning_rate": 1.2392114580572287e-05, "loss": 0.0609, "step": 25820 }, { "epoch": 0.5689732105967708, "grad_norm": 0.7556797862052917, "learning_rate": 1.239106035565726e-05, "loss": 0.063, "step": 25821 }, { "epoch": 0.5689952458862869, "grad_norm": 0.7530586123466492, "learning_rate": 1.2390006144030963e-05, "loss": 0.0715, "step": 25822 }, { "epoch": 0.5690172811758031, "grad_norm": 0.43867361545562744, "learning_rate": 1.2388951945698764e-05, "loss": 0.0551, "step": 25823 }, { "epoch": 0.5690393164653192, "grad_norm": 0.7637973427772522, "learning_rate": 1.238789776066603e-05, "loss": 0.0859, "step": 25824 }, { "epoch": 0.5690613517548354, "grad_norm": 0.7440693974494934, "learning_rate": 1.2386843588938135e-05, "loss": 0.0782, "step": 25825 }, { "epoch": 0.5690833870443516, "grad_norm": 0.7004427313804626, "learning_rate": 1.2385789430520448e-05, "loss": 0.1015, "step": 25826 }, { "epoch": 0.5691054223338677, "grad_norm": 0.7271105647087097, "learning_rate": 1.2384735285418337e-05, "loss": 0.0872, "step": 25827 }, { "epoch": 0.5691274576233839, "grad_norm": 0.8125648498535156, "learning_rate": 1.2383681153637165e-05, "loss": 0.0722, "step": 25828 }, { "epoch": 0.5691494929129, "grad_norm": 0.591413676738739, "learning_rate": 1.2382627035182314e-05, "loss": 0.0715, "step": 25829 }, { "epoch": 0.5691715282024161, "grad_norm": 0.531745433807373, "learning_rate": 1.2381572930059147e-05, "loss": 0.0471, "step": 25830 }, { "epoch": 0.5691935634919323, "grad_norm": 0.544754147529602, "learning_rate": 1.2380518838273035e-05, "loss": 0.0844, "step": 25831 }, { "epoch": 0.5692155987814485, "grad_norm": 0.3820025324821472, "learning_rate": 1.237946475982934e-05, "loss": 0.0613, "step": 25832 }, { "epoch": 0.5692376340709646, "grad_norm": 0.38253113627433777, "learning_rate": 1.237841069473344e-05, "loss": 0.0899, "step": 25833 }, { "epoch": 0.5692596693604808, "grad_norm": 0.7486114501953125, "learning_rate": 1.2377356642990702e-05, "loss": 0.1108, "step": 25834 }, { "epoch": 0.569281704649997, "grad_norm": 0.6920822858810425, "learning_rate": 1.2376302604606493e-05, "loss": 0.0974, "step": 25835 }, { "epoch": 0.5693037399395131, "grad_norm": 0.6052842140197754, "learning_rate": 1.2375248579586171e-05, "loss": 0.1145, "step": 25836 }, { "epoch": 0.5693257752290293, "grad_norm": 0.5562227368354797, "learning_rate": 1.2374194567935125e-05, "loss": 0.0701, "step": 25837 }, { "epoch": 0.5693478105185454, "grad_norm": 0.737013041973114, "learning_rate": 1.2373140569658715e-05, "loss": 0.0652, "step": 25838 }, { "epoch": 0.5693698458080616, "grad_norm": 0.4733319878578186, "learning_rate": 1.2372086584762304e-05, "loss": 0.0814, "step": 25839 }, { "epoch": 0.5693918810975778, "grad_norm": 0.9093303084373474, "learning_rate": 1.2371032613251268e-05, "loss": 0.062, "step": 25840 }, { "epoch": 0.5694139163870939, "grad_norm": 0.5854032039642334, "learning_rate": 1.2369978655130975e-05, "loss": 0.0663, "step": 25841 }, { "epoch": 0.5694359516766101, "grad_norm": 0.7620295882225037, "learning_rate": 1.2368924710406789e-05, "loss": 0.0944, "step": 25842 }, { "epoch": 0.5694579869661263, "grad_norm": 0.33903321623802185, "learning_rate": 1.2367870779084073e-05, "loss": 0.0628, "step": 25843 }, { "epoch": 0.5694800222556424, "grad_norm": 1.3225581645965576, "learning_rate": 1.2366816861168211e-05, "loss": 0.0738, "step": 25844 }, { "epoch": 0.5695020575451586, "grad_norm": 0.7879644632339478, "learning_rate": 1.2365762956664563e-05, "loss": 0.0901, "step": 25845 }, { "epoch": 0.5695240928346748, "grad_norm": 0.9938070774078369, "learning_rate": 1.2364709065578495e-05, "loss": 0.0688, "step": 25846 }, { "epoch": 0.5695461281241909, "grad_norm": 0.7180012464523315, "learning_rate": 1.2363655187915376e-05, "loss": 0.0839, "step": 25847 }, { "epoch": 0.5695681634137071, "grad_norm": 0.5712265372276306, "learning_rate": 1.2362601323680575e-05, "loss": 0.0913, "step": 25848 }, { "epoch": 0.5695901987032232, "grad_norm": 0.7160034775733948, "learning_rate": 1.2361547472879462e-05, "loss": 0.0779, "step": 25849 }, { "epoch": 0.5696122339927394, "grad_norm": 1.04754638671875, "learning_rate": 1.2360493635517401e-05, "loss": 0.057, "step": 25850 }, { "epoch": 0.5696342692822556, "grad_norm": 0.44828829169273376, "learning_rate": 1.2359439811599757e-05, "loss": 0.0528, "step": 25851 }, { "epoch": 0.5696563045717717, "grad_norm": 0.8820392489433289, "learning_rate": 1.2358386001131907e-05, "loss": 0.0989, "step": 25852 }, { "epoch": 0.5696783398612879, "grad_norm": 0.5195988416671753, "learning_rate": 1.2357332204119215e-05, "loss": 0.0454, "step": 25853 }, { "epoch": 0.569700375150804, "grad_norm": 0.6875737309455872, "learning_rate": 1.2356278420567046e-05, "loss": 0.0808, "step": 25854 }, { "epoch": 0.5697224104403201, "grad_norm": 0.7063564658164978, "learning_rate": 1.2355224650480767e-05, "loss": 0.0884, "step": 25855 }, { "epoch": 0.5697444457298363, "grad_norm": 0.5525670647621155, "learning_rate": 1.2354170893865747e-05, "loss": 0.0796, "step": 25856 }, { "epoch": 0.5697664810193525, "grad_norm": 0.624731183052063, "learning_rate": 1.2353117150727357e-05, "loss": 0.0668, "step": 25857 }, { "epoch": 0.5697885163088686, "grad_norm": 0.7099524140357971, "learning_rate": 1.235206342107096e-05, "loss": 0.0663, "step": 25858 }, { "epoch": 0.5698105515983848, "grad_norm": 0.44222041964530945, "learning_rate": 1.2351009704901915e-05, "loss": 0.0745, "step": 25859 }, { "epoch": 0.569832586887901, "grad_norm": 0.6398889422416687, "learning_rate": 1.2349956002225604e-05, "loss": 0.0777, "step": 25860 }, { "epoch": 0.5698546221774171, "grad_norm": 0.6078173518180847, "learning_rate": 1.2348902313047392e-05, "loss": 0.0883, "step": 25861 }, { "epoch": 0.5698766574669333, "grad_norm": 0.8132861852645874, "learning_rate": 1.2347848637372637e-05, "loss": 0.112, "step": 25862 }, { "epoch": 0.5698986927564494, "grad_norm": 0.6463491320610046, "learning_rate": 1.2346794975206715e-05, "loss": 0.067, "step": 25863 }, { "epoch": 0.5699207280459656, "grad_norm": 0.6521096229553223, "learning_rate": 1.2345741326554989e-05, "loss": 0.0633, "step": 25864 }, { "epoch": 0.5699427633354818, "grad_norm": 0.46276530623435974, "learning_rate": 1.2344687691422825e-05, "loss": 0.0822, "step": 25865 }, { "epoch": 0.5699647986249979, "grad_norm": 0.8033514022827148, "learning_rate": 1.2343634069815584e-05, "loss": 0.0878, "step": 25866 }, { "epoch": 0.5699868339145141, "grad_norm": 0.6566820740699768, "learning_rate": 1.2342580461738647e-05, "loss": 0.0964, "step": 25867 }, { "epoch": 0.5700088692040303, "grad_norm": 0.6028284430503845, "learning_rate": 1.2341526867197371e-05, "loss": 0.0712, "step": 25868 }, { "epoch": 0.5700309044935464, "grad_norm": 0.6797780990600586, "learning_rate": 1.2340473286197123e-05, "loss": 0.0519, "step": 25869 }, { "epoch": 0.5700529397830626, "grad_norm": 0.6527536511421204, "learning_rate": 1.2339419718743271e-05, "loss": 0.0772, "step": 25870 }, { "epoch": 0.5700749750725788, "grad_norm": 0.4134363830089569, "learning_rate": 1.2338366164841182e-05, "loss": 0.0533, "step": 25871 }, { "epoch": 0.5700970103620949, "grad_norm": 0.7847223877906799, "learning_rate": 1.2337312624496222e-05, "loss": 0.0727, "step": 25872 }, { "epoch": 0.5701190456516111, "grad_norm": 0.8108099699020386, "learning_rate": 1.2336259097713758e-05, "loss": 0.0781, "step": 25873 }, { "epoch": 0.5701410809411273, "grad_norm": 0.7502629160881042, "learning_rate": 1.2335205584499146e-05, "loss": 0.0902, "step": 25874 }, { "epoch": 0.5701631162306434, "grad_norm": 0.2983061671257019, "learning_rate": 1.2334152084857768e-05, "loss": 0.0509, "step": 25875 }, { "epoch": 0.5701851515201596, "grad_norm": 0.5111908316612244, "learning_rate": 1.2333098598794985e-05, "loss": 0.0713, "step": 25876 }, { "epoch": 0.5702071868096757, "grad_norm": 0.5507675409317017, "learning_rate": 1.2332045126316158e-05, "loss": 0.0557, "step": 25877 }, { "epoch": 0.5702292220991918, "grad_norm": 0.7634032964706421, "learning_rate": 1.2330991667426651e-05, "loss": 0.0554, "step": 25878 }, { "epoch": 0.570251257388708, "grad_norm": 0.7882044911384583, "learning_rate": 1.2329938222131842e-05, "loss": 0.051, "step": 25879 }, { "epoch": 0.5702732926782241, "grad_norm": 0.6111049056053162, "learning_rate": 1.2328884790437092e-05, "loss": 0.0806, "step": 25880 }, { "epoch": 0.5702953279677403, "grad_norm": 0.7882823944091797, "learning_rate": 1.232783137234776e-05, "loss": 0.078, "step": 25881 }, { "epoch": 0.5703173632572565, "grad_norm": 1.0647753477096558, "learning_rate": 1.232677796786921e-05, "loss": 0.1229, "step": 25882 }, { "epoch": 0.5703393985467726, "grad_norm": 0.8454508185386658, "learning_rate": 1.232572457700682e-05, "loss": 0.0856, "step": 25883 }, { "epoch": 0.5703614338362888, "grad_norm": 0.5924943685531616, "learning_rate": 1.2324671199765947e-05, "loss": 0.0829, "step": 25884 }, { "epoch": 0.570383469125805, "grad_norm": 0.6457887291908264, "learning_rate": 1.2323617836151953e-05, "loss": 0.0843, "step": 25885 }, { "epoch": 0.5704055044153211, "grad_norm": 0.5258154273033142, "learning_rate": 1.2322564486170216e-05, "loss": 0.0738, "step": 25886 }, { "epoch": 0.5704275397048373, "grad_norm": 0.7892983555793762, "learning_rate": 1.2321511149826094e-05, "loss": 0.0612, "step": 25887 }, { "epoch": 0.5704495749943534, "grad_norm": 0.7092400789260864, "learning_rate": 1.232045782712495e-05, "loss": 0.0488, "step": 25888 }, { "epoch": 0.5704716102838696, "grad_norm": 0.362175852060318, "learning_rate": 1.2319404518072148e-05, "loss": 0.0344, "step": 25889 }, { "epoch": 0.5704936455733858, "grad_norm": 0.510649561882019, "learning_rate": 1.231835122267306e-05, "loss": 0.0482, "step": 25890 }, { "epoch": 0.5705156808629019, "grad_norm": 0.7178335785865784, "learning_rate": 1.2317297940933048e-05, "loss": 0.103, "step": 25891 }, { "epoch": 0.5705377161524181, "grad_norm": 0.6460534930229187, "learning_rate": 1.2316244672857475e-05, "loss": 0.0799, "step": 25892 }, { "epoch": 0.5705597514419343, "grad_norm": 1.328956127166748, "learning_rate": 1.2315191418451702e-05, "loss": 0.1005, "step": 25893 }, { "epoch": 0.5705817867314504, "grad_norm": 0.6773186326026917, "learning_rate": 1.2314138177721104e-05, "loss": 0.0724, "step": 25894 }, { "epoch": 0.5706038220209666, "grad_norm": 0.7783244848251343, "learning_rate": 1.2313084950671042e-05, "loss": 0.0827, "step": 25895 }, { "epoch": 0.5706258573104828, "grad_norm": 0.660456120967865, "learning_rate": 1.231203173730688e-05, "loss": 0.0874, "step": 25896 }, { "epoch": 0.5706478925999989, "grad_norm": 0.8836838006973267, "learning_rate": 1.2310978537633976e-05, "loss": 0.083, "step": 25897 }, { "epoch": 0.5706699278895151, "grad_norm": 0.4542904794216156, "learning_rate": 1.2309925351657702e-05, "loss": 0.0793, "step": 25898 }, { "epoch": 0.5706919631790313, "grad_norm": 0.6818686127662659, "learning_rate": 1.2308872179383425e-05, "loss": 0.0637, "step": 25899 }, { "epoch": 0.5707139984685474, "grad_norm": 1.019806146621704, "learning_rate": 1.2307819020816502e-05, "loss": 0.0894, "step": 25900 }, { "epoch": 0.5707360337580636, "grad_norm": 0.6288721561431885, "learning_rate": 1.2306765875962295e-05, "loss": 0.065, "step": 25901 }, { "epoch": 0.5707580690475798, "grad_norm": 0.5273002982139587, "learning_rate": 1.2305712744826182e-05, "loss": 0.0481, "step": 25902 }, { "epoch": 0.5707801043370958, "grad_norm": 0.41802099347114563, "learning_rate": 1.2304659627413517e-05, "loss": 0.0324, "step": 25903 }, { "epoch": 0.570802139626612, "grad_norm": 0.7261552810668945, "learning_rate": 1.2303606523729662e-05, "loss": 0.0817, "step": 25904 }, { "epoch": 0.5708241749161281, "grad_norm": 0.6096245646476746, "learning_rate": 1.2302553433779988e-05, "loss": 0.0581, "step": 25905 }, { "epoch": 0.5708462102056443, "grad_norm": 0.6536520719528198, "learning_rate": 1.2301500357569857e-05, "loss": 0.0852, "step": 25906 }, { "epoch": 0.5708682454951605, "grad_norm": 0.7121023535728455, "learning_rate": 1.230044729510463e-05, "loss": 0.0726, "step": 25907 }, { "epoch": 0.5708902807846766, "grad_norm": 0.44227367639541626, "learning_rate": 1.229939424638967e-05, "loss": 0.0622, "step": 25908 }, { "epoch": 0.5709123160741928, "grad_norm": 0.6951126456260681, "learning_rate": 1.2298341211430345e-05, "loss": 0.0589, "step": 25909 }, { "epoch": 0.570934351363709, "grad_norm": 0.43423330783843994, "learning_rate": 1.229728819023202e-05, "loss": 0.069, "step": 25910 }, { "epoch": 0.5709563866532251, "grad_norm": 0.5998683571815491, "learning_rate": 1.2296235182800054e-05, "loss": 0.0618, "step": 25911 }, { "epoch": 0.5709784219427413, "grad_norm": 0.7781385779380798, "learning_rate": 1.229518218913981e-05, "loss": 0.0875, "step": 25912 }, { "epoch": 0.5710004572322575, "grad_norm": 0.8077639937400818, "learning_rate": 1.2294129209256656e-05, "loss": 0.0768, "step": 25913 }, { "epoch": 0.5710224925217736, "grad_norm": 0.27929532527923584, "learning_rate": 1.2293076243155954e-05, "loss": 0.0673, "step": 25914 }, { "epoch": 0.5710445278112898, "grad_norm": 1.1126233339309692, "learning_rate": 1.2292023290843065e-05, "loss": 0.0667, "step": 25915 }, { "epoch": 0.571066563100806, "grad_norm": 0.5964986085891724, "learning_rate": 1.2290970352323347e-05, "loss": 0.0939, "step": 25916 }, { "epoch": 0.5710885983903221, "grad_norm": 0.7874047756195068, "learning_rate": 1.2289917427602177e-05, "loss": 0.0719, "step": 25917 }, { "epoch": 0.5711106336798383, "grad_norm": 0.6358508467674255, "learning_rate": 1.228886451668491e-05, "loss": 0.0794, "step": 25918 }, { "epoch": 0.5711326689693544, "grad_norm": 0.9884729385375977, "learning_rate": 1.2287811619576911e-05, "loss": 0.0751, "step": 25919 }, { "epoch": 0.5711547042588706, "grad_norm": 0.47904643416404724, "learning_rate": 1.2286758736283538e-05, "loss": 0.0764, "step": 25920 }, { "epoch": 0.5711767395483868, "grad_norm": 0.5636378526687622, "learning_rate": 1.228570586681016e-05, "loss": 0.0705, "step": 25921 }, { "epoch": 0.5711987748379029, "grad_norm": 0.46347999572753906, "learning_rate": 1.2284653011162138e-05, "loss": 0.0711, "step": 25922 }, { "epoch": 0.5712208101274191, "grad_norm": 0.6570718288421631, "learning_rate": 1.2283600169344834e-05, "loss": 0.0682, "step": 25923 }, { "epoch": 0.5712428454169353, "grad_norm": 0.7403622269630432, "learning_rate": 1.2282547341363605e-05, "loss": 0.0615, "step": 25924 }, { "epoch": 0.5712648807064514, "grad_norm": 1.3409526348114014, "learning_rate": 1.2281494527223828e-05, "loss": 0.1262, "step": 25925 }, { "epoch": 0.5712869159959676, "grad_norm": 0.70469731092453, "learning_rate": 1.2280441726930855e-05, "loss": 0.0783, "step": 25926 }, { "epoch": 0.5713089512854838, "grad_norm": 0.2658793330192566, "learning_rate": 1.2279388940490048e-05, "loss": 0.0491, "step": 25927 }, { "epoch": 0.5713309865749998, "grad_norm": 0.6895517110824585, "learning_rate": 1.2278336167906774e-05, "loss": 0.0915, "step": 25928 }, { "epoch": 0.571353021864516, "grad_norm": 0.5278702974319458, "learning_rate": 1.2277283409186396e-05, "loss": 0.0746, "step": 25929 }, { "epoch": 0.5713750571540321, "grad_norm": 0.8388233780860901, "learning_rate": 1.2276230664334273e-05, "loss": 0.0944, "step": 25930 }, { "epoch": 0.5713970924435483, "grad_norm": 0.8456293940544128, "learning_rate": 1.227517793335576e-05, "loss": 0.0889, "step": 25931 }, { "epoch": 0.5714191277330645, "grad_norm": 0.7782309055328369, "learning_rate": 1.2274125216256232e-05, "loss": 0.0851, "step": 25932 }, { "epoch": 0.5714411630225806, "grad_norm": 0.8365254402160645, "learning_rate": 1.2273072513041049e-05, "loss": 0.085, "step": 25933 }, { "epoch": 0.5714631983120968, "grad_norm": 0.49525824189186096, "learning_rate": 1.227201982371557e-05, "loss": 0.0598, "step": 25934 }, { "epoch": 0.571485233601613, "grad_norm": 0.2792324721813202, "learning_rate": 1.2270967148285152e-05, "loss": 0.0675, "step": 25935 }, { "epoch": 0.5715072688911291, "grad_norm": 1.2516558170318604, "learning_rate": 1.2269914486755165e-05, "loss": 0.0517, "step": 25936 }, { "epoch": 0.5715293041806453, "grad_norm": 0.6943846344947815, "learning_rate": 1.2268861839130967e-05, "loss": 0.0616, "step": 25937 }, { "epoch": 0.5715513394701615, "grad_norm": 0.7566600441932678, "learning_rate": 1.2267809205417922e-05, "loss": 0.0766, "step": 25938 }, { "epoch": 0.5715733747596776, "grad_norm": 0.6639004945755005, "learning_rate": 1.2266756585621382e-05, "loss": 0.0371, "step": 25939 }, { "epoch": 0.5715954100491938, "grad_norm": 0.6063829064369202, "learning_rate": 1.2265703979746723e-05, "loss": 0.082, "step": 25940 }, { "epoch": 0.57161744533871, "grad_norm": 0.5236899852752686, "learning_rate": 1.2264651387799302e-05, "loss": 0.0977, "step": 25941 }, { "epoch": 0.5716394806282261, "grad_norm": 0.7037296891212463, "learning_rate": 1.2263598809784477e-05, "loss": 0.0799, "step": 25942 }, { "epoch": 0.5716615159177423, "grad_norm": 0.4086610972881317, "learning_rate": 1.2262546245707606e-05, "loss": 0.092, "step": 25943 }, { "epoch": 0.5716835512072584, "grad_norm": 0.9185287356376648, "learning_rate": 1.226149369557406e-05, "loss": 0.0896, "step": 25944 }, { "epoch": 0.5717055864967746, "grad_norm": 0.5502191781997681, "learning_rate": 1.2260441159389194e-05, "loss": 0.0549, "step": 25945 }, { "epoch": 0.5717276217862908, "grad_norm": 0.9713775515556335, "learning_rate": 1.2259388637158372e-05, "loss": 0.1317, "step": 25946 }, { "epoch": 0.5717496570758069, "grad_norm": 0.6999691724777222, "learning_rate": 1.2258336128886943e-05, "loss": 0.0732, "step": 25947 }, { "epoch": 0.5717716923653231, "grad_norm": 0.47661080956459045, "learning_rate": 1.2257283634580286e-05, "loss": 0.0698, "step": 25948 }, { "epoch": 0.5717937276548393, "grad_norm": 0.6958035230636597, "learning_rate": 1.2256231154243756e-05, "loss": 0.0931, "step": 25949 }, { "epoch": 0.5718157629443554, "grad_norm": 0.5148002505302429, "learning_rate": 1.2255178687882708e-05, "loss": 0.0535, "step": 25950 }, { "epoch": 0.5718377982338716, "grad_norm": 0.8661080002784729, "learning_rate": 1.2254126235502511e-05, "loss": 0.0902, "step": 25951 }, { "epoch": 0.5718598335233877, "grad_norm": 0.9527040719985962, "learning_rate": 1.2253073797108521e-05, "loss": 0.0992, "step": 25952 }, { "epoch": 0.5718818688129038, "grad_norm": 0.6692468523979187, "learning_rate": 1.2252021372706097e-05, "loss": 0.0727, "step": 25953 }, { "epoch": 0.57190390410242, "grad_norm": 0.5978614687919617, "learning_rate": 1.22509689623006e-05, "loss": 0.0634, "step": 25954 }, { "epoch": 0.5719259393919361, "grad_norm": 0.4299805462360382, "learning_rate": 1.2249916565897394e-05, "loss": 0.0447, "step": 25955 }, { "epoch": 0.5719479746814523, "grad_norm": 0.6579075455665588, "learning_rate": 1.2248864183501838e-05, "loss": 0.0654, "step": 25956 }, { "epoch": 0.5719700099709685, "grad_norm": 0.5159657597541809, "learning_rate": 1.2247811815119295e-05, "loss": 0.0707, "step": 25957 }, { "epoch": 0.5719920452604846, "grad_norm": 0.5171762108802795, "learning_rate": 1.2246759460755117e-05, "loss": 0.0676, "step": 25958 }, { "epoch": 0.5720140805500008, "grad_norm": 0.6943364143371582, "learning_rate": 1.2245707120414673e-05, "loss": 0.0583, "step": 25959 }, { "epoch": 0.572036115839517, "grad_norm": 0.648600697517395, "learning_rate": 1.2244654794103321e-05, "loss": 0.0574, "step": 25960 }, { "epoch": 0.5720581511290331, "grad_norm": 0.9321964979171753, "learning_rate": 1.224360248182642e-05, "loss": 0.0611, "step": 25961 }, { "epoch": 0.5720801864185493, "grad_norm": 0.6287356019020081, "learning_rate": 1.224255018358932e-05, "loss": 0.0756, "step": 25962 }, { "epoch": 0.5721022217080655, "grad_norm": 0.5941902995109558, "learning_rate": 1.22414978993974e-05, "loss": 0.0725, "step": 25963 }, { "epoch": 0.5721242569975816, "grad_norm": 0.6780375242233276, "learning_rate": 1.2240445629256008e-05, "loss": 0.057, "step": 25964 }, { "epoch": 0.5721462922870978, "grad_norm": 0.860323965549469, "learning_rate": 1.2239393373170507e-05, "loss": 0.0996, "step": 25965 }, { "epoch": 0.572168327576614, "grad_norm": 0.4261252284049988, "learning_rate": 1.2238341131146252e-05, "loss": 0.0814, "step": 25966 }, { "epoch": 0.5721903628661301, "grad_norm": 0.6781554222106934, "learning_rate": 1.2237288903188612e-05, "loss": 0.0947, "step": 25967 }, { "epoch": 0.5722123981556463, "grad_norm": 0.6635459065437317, "learning_rate": 1.223623668930294e-05, "loss": 0.0449, "step": 25968 }, { "epoch": 0.5722344334451624, "grad_norm": 0.7521753311157227, "learning_rate": 1.2235184489494596e-05, "loss": 0.0833, "step": 25969 }, { "epoch": 0.5722564687346786, "grad_norm": 0.5732497572898865, "learning_rate": 1.2234132303768933e-05, "loss": 0.0734, "step": 25970 }, { "epoch": 0.5722785040241948, "grad_norm": 0.7661308646202087, "learning_rate": 1.2233080132131326e-05, "loss": 0.0958, "step": 25971 }, { "epoch": 0.5723005393137109, "grad_norm": 0.702163577079773, "learning_rate": 1.2232027974587125e-05, "loss": 0.0662, "step": 25972 }, { "epoch": 0.5723225746032271, "grad_norm": 0.33974942564964294, "learning_rate": 1.2230975831141686e-05, "loss": 0.0758, "step": 25973 }, { "epoch": 0.5723446098927433, "grad_norm": 0.750880777835846, "learning_rate": 1.2229923701800376e-05, "loss": 0.0705, "step": 25974 }, { "epoch": 0.5723666451822594, "grad_norm": 0.8717869520187378, "learning_rate": 1.2228871586568547e-05, "loss": 0.0979, "step": 25975 }, { "epoch": 0.5723886804717756, "grad_norm": 0.8110038638114929, "learning_rate": 1.2227819485451565e-05, "loss": 0.0654, "step": 25976 }, { "epoch": 0.5724107157612917, "grad_norm": 0.4309307634830475, "learning_rate": 1.2226767398454775e-05, "loss": 0.0972, "step": 25977 }, { "epoch": 0.5724327510508078, "grad_norm": 0.5143579244613647, "learning_rate": 1.2225715325583556e-05, "loss": 0.0525, "step": 25978 }, { "epoch": 0.572454786340324, "grad_norm": 0.627644419670105, "learning_rate": 1.2224663266843255e-05, "loss": 0.063, "step": 25979 }, { "epoch": 0.5724768216298401, "grad_norm": 0.7487632036209106, "learning_rate": 1.2223611222239232e-05, "loss": 0.0807, "step": 25980 }, { "epoch": 0.5724988569193563, "grad_norm": 0.6995947360992432, "learning_rate": 1.2222559191776838e-05, "loss": 0.0912, "step": 25981 }, { "epoch": 0.5725208922088725, "grad_norm": 0.3744848668575287, "learning_rate": 1.222150717546145e-05, "loss": 0.0665, "step": 25982 }, { "epoch": 0.5725429274983886, "grad_norm": 0.6740745306015015, "learning_rate": 1.2220455173298412e-05, "loss": 0.1047, "step": 25983 }, { "epoch": 0.5725649627879048, "grad_norm": 0.7301942706108093, "learning_rate": 1.2219403185293089e-05, "loss": 0.0631, "step": 25984 }, { "epoch": 0.572586998077421, "grad_norm": 0.9015347957611084, "learning_rate": 1.2218351211450827e-05, "loss": 0.0703, "step": 25985 }, { "epoch": 0.5726090333669371, "grad_norm": 0.5583730936050415, "learning_rate": 1.2217299251777003e-05, "loss": 0.0785, "step": 25986 }, { "epoch": 0.5726310686564533, "grad_norm": 0.5338870882987976, "learning_rate": 1.2216247306276965e-05, "loss": 0.0635, "step": 25987 }, { "epoch": 0.5726531039459695, "grad_norm": 0.30776360630989075, "learning_rate": 1.221519537495607e-05, "loss": 0.0415, "step": 25988 }, { "epoch": 0.5726751392354856, "grad_norm": 0.3499748706817627, "learning_rate": 1.2214143457819676e-05, "loss": 0.0798, "step": 25989 }, { "epoch": 0.5726971745250018, "grad_norm": 0.2671148180961609, "learning_rate": 1.2213091554873147e-05, "loss": 0.0598, "step": 25990 }, { "epoch": 0.572719209814518, "grad_norm": 0.7510787844657898, "learning_rate": 1.2212039666121839e-05, "loss": 0.0711, "step": 25991 }, { "epoch": 0.5727412451040341, "grad_norm": 0.5996262431144714, "learning_rate": 1.221098779157111e-05, "loss": 0.0664, "step": 25992 }, { "epoch": 0.5727632803935503, "grad_norm": 0.680873692035675, "learning_rate": 1.2209935931226305e-05, "loss": 0.0939, "step": 25993 }, { "epoch": 0.5727853156830665, "grad_norm": 0.6561421155929565, "learning_rate": 1.22088840850928e-05, "loss": 0.0575, "step": 25994 }, { "epoch": 0.5728073509725826, "grad_norm": 0.6976696252822876, "learning_rate": 1.2207832253175945e-05, "loss": 0.0653, "step": 25995 }, { "epoch": 0.5728293862620988, "grad_norm": 0.6782151460647583, "learning_rate": 1.2206780435481092e-05, "loss": 0.0712, "step": 25996 }, { "epoch": 0.572851421551615, "grad_norm": 0.5577771067619324, "learning_rate": 1.2205728632013609e-05, "loss": 0.0803, "step": 25997 }, { "epoch": 0.5728734568411311, "grad_norm": 0.37802907824516296, "learning_rate": 1.220467684277885e-05, "loss": 0.0519, "step": 25998 }, { "epoch": 0.5728954921306473, "grad_norm": 0.6974880695343018, "learning_rate": 1.2203625067782171e-05, "loss": 0.067, "step": 25999 }, { "epoch": 0.5729175274201634, "grad_norm": 0.46097999811172485, "learning_rate": 1.2202573307028923e-05, "loss": 0.0722, "step": 26000 }, { "epoch": 0.5729395627096796, "grad_norm": 0.6915579438209534, "learning_rate": 1.2201521560524475e-05, "loss": 0.0819, "step": 26001 }, { "epoch": 0.5729615979991957, "grad_norm": 0.9106741547584534, "learning_rate": 1.2200469828274178e-05, "loss": 0.0714, "step": 26002 }, { "epoch": 0.5729836332887118, "grad_norm": 0.5296803116798401, "learning_rate": 1.219941811028339e-05, "loss": 0.03, "step": 26003 }, { "epoch": 0.573005668578228, "grad_norm": 0.6450569033622742, "learning_rate": 1.2198366406557461e-05, "loss": 0.0655, "step": 26004 }, { "epoch": 0.5730277038677442, "grad_norm": 0.6328442096710205, "learning_rate": 1.2197314717101758e-05, "loss": 0.0632, "step": 26005 }, { "epoch": 0.5730497391572603, "grad_norm": 0.6660107970237732, "learning_rate": 1.2196263041921637e-05, "loss": 0.0624, "step": 26006 }, { "epoch": 0.5730717744467765, "grad_norm": 0.9098265767097473, "learning_rate": 1.219521138102245e-05, "loss": 0.1222, "step": 26007 }, { "epoch": 0.5730938097362926, "grad_norm": 0.668977677822113, "learning_rate": 1.2194159734409553e-05, "loss": 0.0732, "step": 26008 }, { "epoch": 0.5731158450258088, "grad_norm": 0.36738425493240356, "learning_rate": 1.219310810208831e-05, "loss": 0.0611, "step": 26009 }, { "epoch": 0.573137880315325, "grad_norm": 0.6370362043380737, "learning_rate": 1.2192056484064069e-05, "loss": 0.06, "step": 26010 }, { "epoch": 0.5731599156048411, "grad_norm": 0.4377131760120392, "learning_rate": 1.2191004880342193e-05, "loss": 0.0503, "step": 26011 }, { "epoch": 0.5731819508943573, "grad_norm": 0.27269095182418823, "learning_rate": 1.2189953290928026e-05, "loss": 0.0503, "step": 26012 }, { "epoch": 0.5732039861838735, "grad_norm": 0.8626145720481873, "learning_rate": 1.2188901715826943e-05, "loss": 0.0865, "step": 26013 }, { "epoch": 0.5732260214733896, "grad_norm": 0.5234909653663635, "learning_rate": 1.218785015504429e-05, "loss": 0.0726, "step": 26014 }, { "epoch": 0.5732480567629058, "grad_norm": 0.4880286455154419, "learning_rate": 1.2186798608585425e-05, "loss": 0.0849, "step": 26015 }, { "epoch": 0.573270092052422, "grad_norm": 0.8355014324188232, "learning_rate": 1.2185747076455699e-05, "loss": 0.0712, "step": 26016 }, { "epoch": 0.5732921273419381, "grad_norm": 0.7747871279716492, "learning_rate": 1.2184695558660475e-05, "loss": 0.0952, "step": 26017 }, { "epoch": 0.5733141626314543, "grad_norm": 0.6982795596122742, "learning_rate": 1.2183644055205108e-05, "loss": 0.0694, "step": 26018 }, { "epoch": 0.5733361979209705, "grad_norm": 0.5201603174209595, "learning_rate": 1.2182592566094945e-05, "loss": 0.0523, "step": 26019 }, { "epoch": 0.5733582332104866, "grad_norm": 1.0240001678466797, "learning_rate": 1.2181541091335355e-05, "loss": 0.0696, "step": 26020 }, { "epoch": 0.5733802685000028, "grad_norm": 0.4135565161705017, "learning_rate": 1.2180489630931688e-05, "loss": 0.0751, "step": 26021 }, { "epoch": 0.573402303789519, "grad_norm": 1.034379005432129, "learning_rate": 1.2179438184889298e-05, "loss": 0.0543, "step": 26022 }, { "epoch": 0.5734243390790351, "grad_norm": 0.9374235272407532, "learning_rate": 1.217838675321354e-05, "loss": 0.0758, "step": 26023 }, { "epoch": 0.5734463743685513, "grad_norm": 0.6175897717475891, "learning_rate": 1.2177335335909774e-05, "loss": 0.0646, "step": 26024 }, { "epoch": 0.5734684096580674, "grad_norm": 0.7870512008666992, "learning_rate": 1.2176283932983351e-05, "loss": 0.0873, "step": 26025 }, { "epoch": 0.5734904449475835, "grad_norm": 0.3971973657608032, "learning_rate": 1.2175232544439632e-05, "loss": 0.0751, "step": 26026 }, { "epoch": 0.5735124802370997, "grad_norm": 0.81597501039505, "learning_rate": 1.2174181170283959e-05, "loss": 0.0863, "step": 26027 }, { "epoch": 0.5735345155266158, "grad_norm": 0.5750187039375305, "learning_rate": 1.2173129810521704e-05, "loss": 0.0403, "step": 26028 }, { "epoch": 0.573556550816132, "grad_norm": 0.45398637652397156, "learning_rate": 1.2172078465158214e-05, "loss": 0.048, "step": 26029 }, { "epoch": 0.5735785861056482, "grad_norm": 0.9856971502304077, "learning_rate": 1.2171027134198846e-05, "loss": 0.0634, "step": 26030 }, { "epoch": 0.5736006213951643, "grad_norm": 0.6451733708381653, "learning_rate": 1.2169975817648951e-05, "loss": 0.0689, "step": 26031 }, { "epoch": 0.5736226566846805, "grad_norm": 0.5080205798149109, "learning_rate": 1.2168924515513888e-05, "loss": 0.0766, "step": 26032 }, { "epoch": 0.5736446919741967, "grad_norm": 0.3941902220249176, "learning_rate": 1.2167873227799012e-05, "loss": 0.0856, "step": 26033 }, { "epoch": 0.5736667272637128, "grad_norm": 0.7252192497253418, "learning_rate": 1.2166821954509674e-05, "loss": 0.0786, "step": 26034 }, { "epoch": 0.573688762553229, "grad_norm": 0.4863821268081665, "learning_rate": 1.2165770695651227e-05, "loss": 0.072, "step": 26035 }, { "epoch": 0.5737107978427451, "grad_norm": 0.6679124236106873, "learning_rate": 1.2164719451229036e-05, "loss": 0.0453, "step": 26036 }, { "epoch": 0.5737328331322613, "grad_norm": 1.1968638896942139, "learning_rate": 1.216366822124845e-05, "loss": 0.0615, "step": 26037 }, { "epoch": 0.5737548684217775, "grad_norm": 0.5415858626365662, "learning_rate": 1.2162617005714819e-05, "loss": 0.0528, "step": 26038 }, { "epoch": 0.5737769037112936, "grad_norm": 0.7438758015632629, "learning_rate": 1.2161565804633502e-05, "loss": 0.0607, "step": 26039 }, { "epoch": 0.5737989390008098, "grad_norm": 1.1232380867004395, "learning_rate": 1.2160514618009856e-05, "loss": 0.0919, "step": 26040 }, { "epoch": 0.573820974290326, "grad_norm": 0.6298536062240601, "learning_rate": 1.2159463445849229e-05, "loss": 0.0764, "step": 26041 }, { "epoch": 0.5738430095798421, "grad_norm": 0.5109269618988037, "learning_rate": 1.2158412288156971e-05, "loss": 0.0766, "step": 26042 }, { "epoch": 0.5738650448693583, "grad_norm": 0.5385598540306091, "learning_rate": 1.2157361144938452e-05, "loss": 0.1048, "step": 26043 }, { "epoch": 0.5738870801588745, "grad_norm": 0.9285519123077393, "learning_rate": 1.2156310016199015e-05, "loss": 0.0746, "step": 26044 }, { "epoch": 0.5739091154483906, "grad_norm": 0.4952578544616699, "learning_rate": 1.2155258901944017e-05, "loss": 0.0837, "step": 26045 }, { "epoch": 0.5739311507379068, "grad_norm": 0.8110940456390381, "learning_rate": 1.2154207802178808e-05, "loss": 0.0436, "step": 26046 }, { "epoch": 0.573953186027423, "grad_norm": 0.5978460311889648, "learning_rate": 1.2153156716908748e-05, "loss": 0.0741, "step": 26047 }, { "epoch": 0.5739752213169391, "grad_norm": 0.4425406754016876, "learning_rate": 1.2152105646139187e-05, "loss": 0.0371, "step": 26048 }, { "epoch": 0.5739972566064553, "grad_norm": 0.6409921646118164, "learning_rate": 1.215105458987548e-05, "loss": 0.0658, "step": 26049 }, { "epoch": 0.5740192918959715, "grad_norm": 0.5245596766471863, "learning_rate": 1.215000354812297e-05, "loss": 0.0615, "step": 26050 }, { "epoch": 0.5740413271854875, "grad_norm": 0.31180644035339355, "learning_rate": 1.214895252088703e-05, "loss": 0.0601, "step": 26051 }, { "epoch": 0.5740633624750037, "grad_norm": 0.8820672035217285, "learning_rate": 1.2147901508173002e-05, "loss": 0.0639, "step": 26052 }, { "epoch": 0.5740853977645198, "grad_norm": 0.6974349617958069, "learning_rate": 1.2146850509986242e-05, "loss": 0.0736, "step": 26053 }, { "epoch": 0.574107433054036, "grad_norm": 1.5116633176803589, "learning_rate": 1.2145799526332098e-05, "loss": 0.055, "step": 26054 }, { "epoch": 0.5741294683435522, "grad_norm": 0.7125858664512634, "learning_rate": 1.214474855721593e-05, "loss": 0.0637, "step": 26055 }, { "epoch": 0.5741515036330683, "grad_norm": 0.5015868544578552, "learning_rate": 1.2143697602643092e-05, "loss": 0.0729, "step": 26056 }, { "epoch": 0.5741735389225845, "grad_norm": 0.7963918447494507, "learning_rate": 1.2142646662618932e-05, "loss": 0.0928, "step": 26057 }, { "epoch": 0.5741955742121007, "grad_norm": 0.8464738726615906, "learning_rate": 1.2141595737148798e-05, "loss": 0.0722, "step": 26058 }, { "epoch": 0.5742176095016168, "grad_norm": 0.6872653961181641, "learning_rate": 1.2140544826238057e-05, "loss": 0.0704, "step": 26059 }, { "epoch": 0.574239644791133, "grad_norm": 0.7328497171401978, "learning_rate": 1.2139493929892053e-05, "loss": 0.0994, "step": 26060 }, { "epoch": 0.5742616800806492, "grad_norm": 0.5683338642120361, "learning_rate": 1.213844304811614e-05, "loss": 0.0544, "step": 26061 }, { "epoch": 0.5742837153701653, "grad_norm": 0.6358681917190552, "learning_rate": 1.2137392180915671e-05, "loss": 0.0595, "step": 26062 }, { "epoch": 0.5743057506596815, "grad_norm": 1.181702733039856, "learning_rate": 1.2136341328296001e-05, "loss": 0.0988, "step": 26063 }, { "epoch": 0.5743277859491976, "grad_norm": 0.4045034646987915, "learning_rate": 1.2135290490262478e-05, "loss": 0.0465, "step": 26064 }, { "epoch": 0.5743498212387138, "grad_norm": 0.48094990849494934, "learning_rate": 1.2134239666820452e-05, "loss": 0.0685, "step": 26065 }, { "epoch": 0.57437185652823, "grad_norm": 0.8259422779083252, "learning_rate": 1.2133188857975286e-05, "loss": 0.0587, "step": 26066 }, { "epoch": 0.5743938918177461, "grad_norm": 0.5017862319946289, "learning_rate": 1.2132138063732327e-05, "loss": 0.0431, "step": 26067 }, { "epoch": 0.5744159271072623, "grad_norm": 0.6265166997909546, "learning_rate": 1.2131087284096927e-05, "loss": 0.0631, "step": 26068 }, { "epoch": 0.5744379623967785, "grad_norm": 0.29766660928726196, "learning_rate": 1.2130036519074434e-05, "loss": 0.0499, "step": 26069 }, { "epoch": 0.5744599976862946, "grad_norm": 0.6011150479316711, "learning_rate": 1.2128985768670207e-05, "loss": 0.074, "step": 26070 }, { "epoch": 0.5744820329758108, "grad_norm": 0.656158983707428, "learning_rate": 1.2127935032889595e-05, "loss": 0.0645, "step": 26071 }, { "epoch": 0.574504068265327, "grad_norm": 0.4801236689090729, "learning_rate": 1.212688431173795e-05, "loss": 0.0652, "step": 26072 }, { "epoch": 0.5745261035548431, "grad_norm": 0.544737696647644, "learning_rate": 1.2125833605220618e-05, "loss": 0.0844, "step": 26073 }, { "epoch": 0.5745481388443593, "grad_norm": 0.4995628297328949, "learning_rate": 1.2124782913342962e-05, "loss": 0.0727, "step": 26074 }, { "epoch": 0.5745701741338755, "grad_norm": 0.387382835149765, "learning_rate": 1.212373223611033e-05, "loss": 0.0864, "step": 26075 }, { "epoch": 0.5745922094233915, "grad_norm": 0.5391729474067688, "learning_rate": 1.212268157352807e-05, "loss": 0.0698, "step": 26076 }, { "epoch": 0.5746142447129077, "grad_norm": 0.9480449557304382, "learning_rate": 1.2121630925601534e-05, "loss": 0.1036, "step": 26077 }, { "epoch": 0.5746362800024238, "grad_norm": 0.8351890444755554, "learning_rate": 1.2120580292336079e-05, "loss": 0.0699, "step": 26078 }, { "epoch": 0.57465831529194, "grad_norm": 0.49487414956092834, "learning_rate": 1.2119529673737053e-05, "loss": 0.0634, "step": 26079 }, { "epoch": 0.5746803505814562, "grad_norm": 0.5233125686645508, "learning_rate": 1.2118479069809804e-05, "loss": 0.0654, "step": 26080 }, { "epoch": 0.5747023858709723, "grad_norm": 0.6640222668647766, "learning_rate": 1.2117428480559682e-05, "loss": 0.0812, "step": 26081 }, { "epoch": 0.5747244211604885, "grad_norm": 0.31450366973876953, "learning_rate": 1.211637790599205e-05, "loss": 0.0472, "step": 26082 }, { "epoch": 0.5747464564500047, "grad_norm": 0.780010461807251, "learning_rate": 1.211532734611225e-05, "loss": 0.0753, "step": 26083 }, { "epoch": 0.5747684917395208, "grad_norm": 0.36464977264404297, "learning_rate": 1.2114276800925632e-05, "loss": 0.0325, "step": 26084 }, { "epoch": 0.574790527029037, "grad_norm": 0.773219883441925, "learning_rate": 1.2113226270437551e-05, "loss": 0.0562, "step": 26085 }, { "epoch": 0.5748125623185532, "grad_norm": 0.5940426588058472, "learning_rate": 1.211217575465336e-05, "loss": 0.0734, "step": 26086 }, { "epoch": 0.5748345976080693, "grad_norm": 0.6541162729263306, "learning_rate": 1.2111125253578405e-05, "loss": 0.0706, "step": 26087 }, { "epoch": 0.5748566328975855, "grad_norm": 0.407687783241272, "learning_rate": 1.2110074767218032e-05, "loss": 0.0488, "step": 26088 }, { "epoch": 0.5748786681871016, "grad_norm": 0.8083363175392151, "learning_rate": 1.2109024295577603e-05, "loss": 0.0571, "step": 26089 }, { "epoch": 0.5749007034766178, "grad_norm": 0.6648869514465332, "learning_rate": 1.2107973838662464e-05, "loss": 0.068, "step": 26090 }, { "epoch": 0.574922738766134, "grad_norm": 0.5048629641532898, "learning_rate": 1.2106923396477966e-05, "loss": 0.0415, "step": 26091 }, { "epoch": 0.5749447740556501, "grad_norm": 1.161095142364502, "learning_rate": 1.2105872969029451e-05, "loss": 0.1128, "step": 26092 }, { "epoch": 0.5749668093451663, "grad_norm": 0.9443703889846802, "learning_rate": 1.2104822556322288e-05, "loss": 0.1115, "step": 26093 }, { "epoch": 0.5749888446346825, "grad_norm": 1.1390774250030518, "learning_rate": 1.2103772158361813e-05, "loss": 0.0887, "step": 26094 }, { "epoch": 0.5750108799241986, "grad_norm": 0.7946217656135559, "learning_rate": 1.2102721775153378e-05, "loss": 0.0649, "step": 26095 }, { "epoch": 0.5750329152137148, "grad_norm": 0.583951473236084, "learning_rate": 1.2101671406702328e-05, "loss": 0.0531, "step": 26096 }, { "epoch": 0.575054950503231, "grad_norm": 0.25048503279685974, "learning_rate": 1.2100621053014028e-05, "loss": 0.0474, "step": 26097 }, { "epoch": 0.5750769857927471, "grad_norm": 0.5406138300895691, "learning_rate": 1.209957071409382e-05, "loss": 0.0838, "step": 26098 }, { "epoch": 0.5750990210822633, "grad_norm": 0.4260600209236145, "learning_rate": 1.2098520389947051e-05, "loss": 0.0446, "step": 26099 }, { "epoch": 0.5751210563717795, "grad_norm": 0.41320061683654785, "learning_rate": 1.209747008057907e-05, "loss": 0.0738, "step": 26100 }, { "epoch": 0.5751430916612955, "grad_norm": 0.43508249521255493, "learning_rate": 1.2096419785995235e-05, "loss": 0.0596, "step": 26101 }, { "epoch": 0.5751651269508117, "grad_norm": 0.4234005808830261, "learning_rate": 1.209536950620089e-05, "loss": 0.0509, "step": 26102 }, { "epoch": 0.5751871622403278, "grad_norm": 0.596121609210968, "learning_rate": 1.2094319241201387e-05, "loss": 0.0888, "step": 26103 }, { "epoch": 0.575209197529844, "grad_norm": 0.7812461256980896, "learning_rate": 1.2093268991002072e-05, "loss": 0.0824, "step": 26104 }, { "epoch": 0.5752312328193602, "grad_norm": 0.6461201906204224, "learning_rate": 1.2092218755608299e-05, "loss": 0.1005, "step": 26105 }, { "epoch": 0.5752532681088763, "grad_norm": 0.733356237411499, "learning_rate": 1.2091168535025416e-05, "loss": 0.0632, "step": 26106 }, { "epoch": 0.5752753033983925, "grad_norm": 0.5722360610961914, "learning_rate": 1.2090118329258765e-05, "loss": 0.0643, "step": 26107 }, { "epoch": 0.5752973386879087, "grad_norm": 0.7226540446281433, "learning_rate": 1.2089068138313706e-05, "loss": 0.0601, "step": 26108 }, { "epoch": 0.5753193739774248, "grad_norm": 0.6327590942382812, "learning_rate": 1.2088017962195586e-05, "loss": 0.0796, "step": 26109 }, { "epoch": 0.575341409266941, "grad_norm": 0.43259942531585693, "learning_rate": 1.2086967800909751e-05, "loss": 0.0551, "step": 26110 }, { "epoch": 0.5753634445564572, "grad_norm": 0.6913139820098877, "learning_rate": 1.2085917654461549e-05, "loss": 0.0678, "step": 26111 }, { "epoch": 0.5753854798459733, "grad_norm": 0.6965394020080566, "learning_rate": 1.2084867522856333e-05, "loss": 0.0573, "step": 26112 }, { "epoch": 0.5754075151354895, "grad_norm": 0.7675196528434753, "learning_rate": 1.2083817406099453e-05, "loss": 0.0633, "step": 26113 }, { "epoch": 0.5754295504250057, "grad_norm": 0.6089170575141907, "learning_rate": 1.208276730419625e-05, "loss": 0.0897, "step": 26114 }, { "epoch": 0.5754515857145218, "grad_norm": 0.7857113480567932, "learning_rate": 1.2081717217152075e-05, "loss": 0.0804, "step": 26115 }, { "epoch": 0.575473621004038, "grad_norm": 0.6521981358528137, "learning_rate": 1.2080667144972283e-05, "loss": 0.0718, "step": 26116 }, { "epoch": 0.5754956562935541, "grad_norm": 0.32331383228302, "learning_rate": 1.207961708766222e-05, "loss": 0.0404, "step": 26117 }, { "epoch": 0.5755176915830703, "grad_norm": 0.5818246603012085, "learning_rate": 1.2078567045227232e-05, "loss": 0.0702, "step": 26118 }, { "epoch": 0.5755397268725865, "grad_norm": 0.7318677306175232, "learning_rate": 1.2077517017672667e-05, "loss": 0.0576, "step": 26119 }, { "epoch": 0.5755617621621026, "grad_norm": 0.4899287223815918, "learning_rate": 1.2076467005003878e-05, "loss": 0.0933, "step": 26120 }, { "epoch": 0.5755837974516188, "grad_norm": 0.5684465169906616, "learning_rate": 1.2075417007226209e-05, "loss": 0.072, "step": 26121 }, { "epoch": 0.575605832741135, "grad_norm": 0.2946523129940033, "learning_rate": 1.2074367024345008e-05, "loss": 0.0554, "step": 26122 }, { "epoch": 0.5756278680306511, "grad_norm": 0.9099478721618652, "learning_rate": 1.2073317056365621e-05, "loss": 0.1142, "step": 26123 }, { "epoch": 0.5756499033201673, "grad_norm": 0.5683287978172302, "learning_rate": 1.2072267103293406e-05, "loss": 0.0631, "step": 26124 }, { "epoch": 0.5756719386096834, "grad_norm": 1.112606167793274, "learning_rate": 1.2071217165133703e-05, "loss": 0.1005, "step": 26125 }, { "epoch": 0.5756939738991995, "grad_norm": 0.4882214367389679, "learning_rate": 1.2070167241891864e-05, "loss": 0.0575, "step": 26126 }, { "epoch": 0.5757160091887157, "grad_norm": 0.8966158032417297, "learning_rate": 1.2069117333573229e-05, "loss": 0.0717, "step": 26127 }, { "epoch": 0.5757380444782318, "grad_norm": 0.3861054480075836, "learning_rate": 1.2068067440183155e-05, "loss": 0.0804, "step": 26128 }, { "epoch": 0.575760079767748, "grad_norm": 0.9261550307273865, "learning_rate": 1.2067017561726987e-05, "loss": 0.0596, "step": 26129 }, { "epoch": 0.5757821150572642, "grad_norm": 0.4572608172893524, "learning_rate": 1.2065967698210062e-05, "loss": 0.0711, "step": 26130 }, { "epoch": 0.5758041503467803, "grad_norm": 0.9873202443122864, "learning_rate": 1.2064917849637747e-05, "loss": 0.0596, "step": 26131 }, { "epoch": 0.5758261856362965, "grad_norm": 0.6637618541717529, "learning_rate": 1.2063868016015376e-05, "loss": 0.0545, "step": 26132 }, { "epoch": 0.5758482209258127, "grad_norm": 0.8566721677780151, "learning_rate": 1.20628181973483e-05, "loss": 0.0675, "step": 26133 }, { "epoch": 0.5758702562153288, "grad_norm": 0.8478578925132751, "learning_rate": 1.2061768393641865e-05, "loss": 0.0795, "step": 26134 }, { "epoch": 0.575892291504845, "grad_norm": 0.7114365696907043, "learning_rate": 1.206071860490142e-05, "loss": 0.0815, "step": 26135 }, { "epoch": 0.5759143267943612, "grad_norm": 0.6072679162025452, "learning_rate": 1.2059668831132313e-05, "loss": 0.0794, "step": 26136 }, { "epoch": 0.5759363620838773, "grad_norm": 0.48925790190696716, "learning_rate": 1.2058619072339889e-05, "loss": 0.0727, "step": 26137 }, { "epoch": 0.5759583973733935, "grad_norm": 0.64645916223526, "learning_rate": 1.2057569328529489e-05, "loss": 0.0682, "step": 26138 }, { "epoch": 0.5759804326629097, "grad_norm": 0.5457625389099121, "learning_rate": 1.2056519599706473e-05, "loss": 0.0825, "step": 26139 }, { "epoch": 0.5760024679524258, "grad_norm": 0.42195606231689453, "learning_rate": 1.205546988587618e-05, "loss": 0.0576, "step": 26140 }, { "epoch": 0.576024503241942, "grad_norm": 0.4874591529369354, "learning_rate": 1.2054420187043957e-05, "loss": 0.0415, "step": 26141 }, { "epoch": 0.5760465385314582, "grad_norm": 0.5450359582901001, "learning_rate": 1.205337050321515e-05, "loss": 0.0585, "step": 26142 }, { "epoch": 0.5760685738209743, "grad_norm": 0.6357263326644897, "learning_rate": 1.205232083439511e-05, "loss": 0.0828, "step": 26143 }, { "epoch": 0.5760906091104905, "grad_norm": 0.4823504686355591, "learning_rate": 1.2051271180589181e-05, "loss": 0.0594, "step": 26144 }, { "epoch": 0.5761126444000066, "grad_norm": 0.5210872292518616, "learning_rate": 1.2050221541802708e-05, "loss": 0.0767, "step": 26145 }, { "epoch": 0.5761346796895228, "grad_norm": 0.6033445596694946, "learning_rate": 1.2049171918041034e-05, "loss": 0.1154, "step": 26146 }, { "epoch": 0.576156714979039, "grad_norm": 0.5829517841339111, "learning_rate": 1.2048122309309515e-05, "loss": 0.0615, "step": 26147 }, { "epoch": 0.5761787502685551, "grad_norm": 0.6382007598876953, "learning_rate": 1.2047072715613492e-05, "loss": 0.0472, "step": 26148 }, { "epoch": 0.5762007855580713, "grad_norm": 0.6409876942634583, "learning_rate": 1.204602313695831e-05, "loss": 0.0594, "step": 26149 }, { "epoch": 0.5762228208475874, "grad_norm": 0.5521637201309204, "learning_rate": 1.2044973573349314e-05, "loss": 0.0627, "step": 26150 }, { "epoch": 0.5762448561371035, "grad_norm": 0.7359850406646729, "learning_rate": 1.2043924024791857e-05, "loss": 0.111, "step": 26151 }, { "epoch": 0.5762668914266197, "grad_norm": 0.7293401956558228, "learning_rate": 1.2042874491291276e-05, "loss": 0.0757, "step": 26152 }, { "epoch": 0.5762889267161359, "grad_norm": 0.4159054756164551, "learning_rate": 1.2041824972852918e-05, "loss": 0.057, "step": 26153 }, { "epoch": 0.576310962005652, "grad_norm": 0.8679282069206238, "learning_rate": 1.2040775469482138e-05, "loss": 0.0709, "step": 26154 }, { "epoch": 0.5763329972951682, "grad_norm": 0.6149117350578308, "learning_rate": 1.2039725981184275e-05, "loss": 0.0825, "step": 26155 }, { "epoch": 0.5763550325846843, "grad_norm": 0.7199916243553162, "learning_rate": 1.2038676507964674e-05, "loss": 0.0707, "step": 26156 }, { "epoch": 0.5763770678742005, "grad_norm": 0.5349952578544617, "learning_rate": 1.203762704982868e-05, "loss": 0.0838, "step": 26157 }, { "epoch": 0.5763991031637167, "grad_norm": 0.3802933096885681, "learning_rate": 1.2036577606781642e-05, "loss": 0.064, "step": 26158 }, { "epoch": 0.5764211384532328, "grad_norm": 0.6512622833251953, "learning_rate": 1.2035528178828904e-05, "loss": 0.0908, "step": 26159 }, { "epoch": 0.576443173742749, "grad_norm": 0.4760757386684418, "learning_rate": 1.203447876597581e-05, "loss": 0.0686, "step": 26160 }, { "epoch": 0.5764652090322652, "grad_norm": 1.0381171703338623, "learning_rate": 1.2033429368227699e-05, "loss": 0.099, "step": 26161 }, { "epoch": 0.5764872443217813, "grad_norm": 0.9451001286506653, "learning_rate": 1.203237998558993e-05, "loss": 0.0686, "step": 26162 }, { "epoch": 0.5765092796112975, "grad_norm": 0.7391400933265686, "learning_rate": 1.2031330618067842e-05, "loss": 0.0513, "step": 26163 }, { "epoch": 0.5765313149008137, "grad_norm": 1.0532723665237427, "learning_rate": 1.2030281265666778e-05, "loss": 0.0807, "step": 26164 }, { "epoch": 0.5765533501903298, "grad_norm": 0.8141072392463684, "learning_rate": 1.2029231928392083e-05, "loss": 0.1042, "step": 26165 }, { "epoch": 0.576575385479846, "grad_norm": 0.323541522026062, "learning_rate": 1.2028182606249102e-05, "loss": 0.0632, "step": 26166 }, { "epoch": 0.5765974207693622, "grad_norm": 0.8191506862640381, "learning_rate": 1.2027133299243185e-05, "loss": 0.0984, "step": 26167 }, { "epoch": 0.5766194560588783, "grad_norm": 0.6490479111671448, "learning_rate": 1.2026084007379669e-05, "loss": 0.0431, "step": 26168 }, { "epoch": 0.5766414913483945, "grad_norm": 0.6620088815689087, "learning_rate": 1.2025034730663896e-05, "loss": 0.1133, "step": 26169 }, { "epoch": 0.5766635266379107, "grad_norm": 0.4713183343410492, "learning_rate": 1.2023985469101225e-05, "loss": 0.0689, "step": 26170 }, { "epoch": 0.5766855619274268, "grad_norm": 0.7301021814346313, "learning_rate": 1.202293622269699e-05, "loss": 0.0798, "step": 26171 }, { "epoch": 0.576707597216943, "grad_norm": 0.3941041827201843, "learning_rate": 1.2021886991456535e-05, "loss": 0.0568, "step": 26172 }, { "epoch": 0.5767296325064591, "grad_norm": 0.6086083054542542, "learning_rate": 1.2020837775385207e-05, "loss": 0.0544, "step": 26173 }, { "epoch": 0.5767516677959753, "grad_norm": 0.9385622143745422, "learning_rate": 1.2019788574488352e-05, "loss": 0.0866, "step": 26174 }, { "epoch": 0.5767737030854914, "grad_norm": 0.6493523120880127, "learning_rate": 1.2018739388771312e-05, "loss": 0.0631, "step": 26175 }, { "epoch": 0.5767957383750075, "grad_norm": 0.46028804779052734, "learning_rate": 1.2017690218239426e-05, "loss": 0.0533, "step": 26176 }, { "epoch": 0.5768177736645237, "grad_norm": 0.44494301080703735, "learning_rate": 1.2016641062898046e-05, "loss": 0.0726, "step": 26177 }, { "epoch": 0.5768398089540399, "grad_norm": 0.6023353338241577, "learning_rate": 1.2015591922752515e-05, "loss": 0.0726, "step": 26178 }, { "epoch": 0.576861844243556, "grad_norm": 0.4550054371356964, "learning_rate": 1.2014542797808173e-05, "loss": 0.0611, "step": 26179 }, { "epoch": 0.5768838795330722, "grad_norm": 0.8249104022979736, "learning_rate": 1.2013493688070364e-05, "loss": 0.0741, "step": 26180 }, { "epoch": 0.5769059148225884, "grad_norm": 0.6360874176025391, "learning_rate": 1.2012444593544435e-05, "loss": 0.1167, "step": 26181 }, { "epoch": 0.5769279501121045, "grad_norm": 0.39602455496788025, "learning_rate": 1.201139551423573e-05, "loss": 0.0537, "step": 26182 }, { "epoch": 0.5769499854016207, "grad_norm": 0.6614048480987549, "learning_rate": 1.2010346450149588e-05, "loss": 0.0457, "step": 26183 }, { "epoch": 0.5769720206911368, "grad_norm": 0.7782522439956665, "learning_rate": 1.2009297401291351e-05, "loss": 0.0893, "step": 26184 }, { "epoch": 0.576994055980653, "grad_norm": 0.6018733978271484, "learning_rate": 1.200824836766637e-05, "loss": 0.0497, "step": 26185 }, { "epoch": 0.5770160912701692, "grad_norm": 0.7448496222496033, "learning_rate": 1.2007199349279983e-05, "loss": 0.0576, "step": 26186 }, { "epoch": 0.5770381265596853, "grad_norm": 0.5118427276611328, "learning_rate": 1.2006150346137539e-05, "loss": 0.0962, "step": 26187 }, { "epoch": 0.5770601618492015, "grad_norm": 0.5832841992378235, "learning_rate": 1.200510135824437e-05, "loss": 0.0785, "step": 26188 }, { "epoch": 0.5770821971387177, "grad_norm": 0.9809568524360657, "learning_rate": 1.200405238560583e-05, "loss": 0.0741, "step": 26189 }, { "epoch": 0.5771042324282338, "grad_norm": 0.9524828791618347, "learning_rate": 1.2003003428227258e-05, "loss": 0.1046, "step": 26190 }, { "epoch": 0.57712626771775, "grad_norm": 0.7430713176727295, "learning_rate": 1.2001954486113998e-05, "loss": 0.1006, "step": 26191 }, { "epoch": 0.5771483030072662, "grad_norm": 1.0313156843185425, "learning_rate": 1.2000905559271382e-05, "loss": 0.0644, "step": 26192 }, { "epoch": 0.5771703382967823, "grad_norm": 0.6972009539604187, "learning_rate": 1.199985664770477e-05, "loss": 0.0662, "step": 26193 }, { "epoch": 0.5771923735862985, "grad_norm": 0.6503542065620422, "learning_rate": 1.1998807751419497e-05, "loss": 0.0954, "step": 26194 }, { "epoch": 0.5772144088758147, "grad_norm": 0.780325710773468, "learning_rate": 1.1997758870420903e-05, "loss": 0.0964, "step": 26195 }, { "epoch": 0.5772364441653308, "grad_norm": 0.4401824176311493, "learning_rate": 1.1996710004714337e-05, "loss": 0.0674, "step": 26196 }, { "epoch": 0.577258479454847, "grad_norm": 0.8136916160583496, "learning_rate": 1.1995661154305135e-05, "loss": 0.0621, "step": 26197 }, { "epoch": 0.5772805147443631, "grad_norm": 0.6495782136917114, "learning_rate": 1.1994612319198643e-05, "loss": 0.0872, "step": 26198 }, { "epoch": 0.5773025500338792, "grad_norm": 0.6548158526420593, "learning_rate": 1.1993563499400194e-05, "loss": 0.0468, "step": 26199 }, { "epoch": 0.5773245853233954, "grad_norm": 0.7044886946678162, "learning_rate": 1.1992514694915146e-05, "loss": 0.0508, "step": 26200 }, { "epoch": 0.5773466206129115, "grad_norm": 0.4904375672340393, "learning_rate": 1.1991465905748835e-05, "loss": 0.0669, "step": 26201 }, { "epoch": 0.5773686559024277, "grad_norm": 0.5782882571220398, "learning_rate": 1.19904171319066e-05, "loss": 0.0678, "step": 26202 }, { "epoch": 0.5773906911919439, "grad_norm": 0.4621196985244751, "learning_rate": 1.1989368373393778e-05, "loss": 0.0484, "step": 26203 }, { "epoch": 0.57741272648146, "grad_norm": 0.546838641166687, "learning_rate": 1.1988319630215724e-05, "loss": 0.0736, "step": 26204 }, { "epoch": 0.5774347617709762, "grad_norm": 0.9063295125961304, "learning_rate": 1.1987270902377776e-05, "loss": 0.1271, "step": 26205 }, { "epoch": 0.5774567970604924, "grad_norm": 0.8347792625427246, "learning_rate": 1.198622218988527e-05, "loss": 0.0634, "step": 26206 }, { "epoch": 0.5774788323500085, "grad_norm": 1.2032029628753662, "learning_rate": 1.1985173492743542e-05, "loss": 0.0725, "step": 26207 }, { "epoch": 0.5775008676395247, "grad_norm": 0.678665280342102, "learning_rate": 1.1984124810957951e-05, "loss": 0.0837, "step": 26208 }, { "epoch": 0.5775229029290408, "grad_norm": 0.6005915403366089, "learning_rate": 1.198307614453383e-05, "loss": 0.073, "step": 26209 }, { "epoch": 0.577544938218557, "grad_norm": 0.5496377944946289, "learning_rate": 1.1982027493476518e-05, "loss": 0.0581, "step": 26210 }, { "epoch": 0.5775669735080732, "grad_norm": 0.7528368830680847, "learning_rate": 1.1980978857791352e-05, "loss": 0.0851, "step": 26211 }, { "epoch": 0.5775890087975893, "grad_norm": 0.4634128212928772, "learning_rate": 1.1979930237483689e-05, "loss": 0.0627, "step": 26212 }, { "epoch": 0.5776110440871055, "grad_norm": 0.6737926006317139, "learning_rate": 1.1978881632558858e-05, "loss": 0.0776, "step": 26213 }, { "epoch": 0.5776330793766217, "grad_norm": 0.6709041595458984, "learning_rate": 1.1977833043022202e-05, "loss": 0.0892, "step": 26214 }, { "epoch": 0.5776551146661378, "grad_norm": 0.6776806116104126, "learning_rate": 1.1976784468879062e-05, "loss": 0.0986, "step": 26215 }, { "epoch": 0.577677149955654, "grad_norm": 0.9313336610794067, "learning_rate": 1.1975735910134783e-05, "loss": 0.0834, "step": 26216 }, { "epoch": 0.5776991852451702, "grad_norm": 0.5177589654922485, "learning_rate": 1.19746873667947e-05, "loss": 0.0684, "step": 26217 }, { "epoch": 0.5777212205346863, "grad_norm": 0.5241782665252686, "learning_rate": 1.1973638838864152e-05, "loss": 0.0987, "step": 26218 }, { "epoch": 0.5777432558242025, "grad_norm": 0.8305618166923523, "learning_rate": 1.1972590326348492e-05, "loss": 0.0633, "step": 26219 }, { "epoch": 0.5777652911137187, "grad_norm": 0.8204375505447388, "learning_rate": 1.1971541829253052e-05, "loss": 0.077, "step": 26220 }, { "epoch": 0.5777873264032348, "grad_norm": 0.4421525001525879, "learning_rate": 1.1970493347583174e-05, "loss": 0.0848, "step": 26221 }, { "epoch": 0.577809361692751, "grad_norm": 0.3968799114227295, "learning_rate": 1.1969444881344193e-05, "loss": 0.0865, "step": 26222 }, { "epoch": 0.5778313969822672, "grad_norm": 0.8195766806602478, "learning_rate": 1.1968396430541458e-05, "loss": 0.0757, "step": 26223 }, { "epoch": 0.5778534322717832, "grad_norm": 0.34016573429107666, "learning_rate": 1.1967347995180307e-05, "loss": 0.0723, "step": 26224 }, { "epoch": 0.5778754675612994, "grad_norm": 0.8079728484153748, "learning_rate": 1.1966299575266078e-05, "loss": 0.0853, "step": 26225 }, { "epoch": 0.5778975028508155, "grad_norm": 0.7370842099189758, "learning_rate": 1.1965251170804105e-05, "loss": 0.0695, "step": 26226 }, { "epoch": 0.5779195381403317, "grad_norm": 0.8662962317466736, "learning_rate": 1.1964202781799743e-05, "loss": 0.1188, "step": 26227 }, { "epoch": 0.5779415734298479, "grad_norm": 0.5899455547332764, "learning_rate": 1.1963154408258322e-05, "loss": 0.0769, "step": 26228 }, { "epoch": 0.577963608719364, "grad_norm": 0.8740508556365967, "learning_rate": 1.1962106050185188e-05, "loss": 0.1025, "step": 26229 }, { "epoch": 0.5779856440088802, "grad_norm": 0.40135809779167175, "learning_rate": 1.1961057707585668e-05, "loss": 0.0445, "step": 26230 }, { "epoch": 0.5780076792983964, "grad_norm": 0.38076189160346985, "learning_rate": 1.1960009380465118e-05, "loss": 0.0482, "step": 26231 }, { "epoch": 0.5780297145879125, "grad_norm": 0.46817487478256226, "learning_rate": 1.1958961068828871e-05, "loss": 0.087, "step": 26232 }, { "epoch": 0.5780517498774287, "grad_norm": 0.6705121397972107, "learning_rate": 1.1957912772682263e-05, "loss": 0.0882, "step": 26233 }, { "epoch": 0.5780737851669449, "grad_norm": 0.7264730334281921, "learning_rate": 1.1956864492030631e-05, "loss": 0.0659, "step": 26234 }, { "epoch": 0.578095820456461, "grad_norm": 0.7487410306930542, "learning_rate": 1.1955816226879329e-05, "loss": 0.0937, "step": 26235 }, { "epoch": 0.5781178557459772, "grad_norm": 0.7869793176651001, "learning_rate": 1.1954767977233684e-05, "loss": 0.0651, "step": 26236 }, { "epoch": 0.5781398910354933, "grad_norm": 0.19232624769210815, "learning_rate": 1.195371974309904e-05, "loss": 0.0603, "step": 26237 }, { "epoch": 0.5781619263250095, "grad_norm": 0.47003862261772156, "learning_rate": 1.1952671524480733e-05, "loss": 0.0568, "step": 26238 }, { "epoch": 0.5781839616145257, "grad_norm": 0.38034865260124207, "learning_rate": 1.1951623321384106e-05, "loss": 0.0557, "step": 26239 }, { "epoch": 0.5782059969040418, "grad_norm": 0.7429949641227722, "learning_rate": 1.1950575133814496e-05, "loss": 0.0873, "step": 26240 }, { "epoch": 0.578228032193558, "grad_norm": 0.5267323851585388, "learning_rate": 1.1949526961777235e-05, "loss": 0.0609, "step": 26241 }, { "epoch": 0.5782500674830742, "grad_norm": 0.9756843447685242, "learning_rate": 1.1948478805277676e-05, "loss": 0.0891, "step": 26242 }, { "epoch": 0.5782721027725903, "grad_norm": 0.8355703353881836, "learning_rate": 1.1947430664321151e-05, "loss": 0.0496, "step": 26243 }, { "epoch": 0.5782941380621065, "grad_norm": 0.5722808241844177, "learning_rate": 1.1946382538913e-05, "loss": 0.0814, "step": 26244 }, { "epoch": 0.5783161733516227, "grad_norm": 0.391155481338501, "learning_rate": 1.1945334429058555e-05, "loss": 0.1116, "step": 26245 }, { "epoch": 0.5783382086411388, "grad_norm": 0.5247480869293213, "learning_rate": 1.1944286334763164e-05, "loss": 0.0776, "step": 26246 }, { "epoch": 0.578360243930655, "grad_norm": 0.7044984698295593, "learning_rate": 1.1943238256032161e-05, "loss": 0.1036, "step": 26247 }, { "epoch": 0.5783822792201712, "grad_norm": 0.41509294509887695, "learning_rate": 1.1942190192870883e-05, "loss": 0.0626, "step": 26248 }, { "epoch": 0.5784043145096872, "grad_norm": 0.18941658735275269, "learning_rate": 1.1941142145284666e-05, "loss": 0.0844, "step": 26249 }, { "epoch": 0.5784263497992034, "grad_norm": 0.8299175500869751, "learning_rate": 1.1940094113278857e-05, "loss": 0.0814, "step": 26250 }, { "epoch": 0.5784483850887195, "grad_norm": 0.5181794166564941, "learning_rate": 1.1939046096858791e-05, "loss": 0.0765, "step": 26251 }, { "epoch": 0.5784704203782357, "grad_norm": 0.49286872148513794, "learning_rate": 1.1937998096029805e-05, "loss": 0.0572, "step": 26252 }, { "epoch": 0.5784924556677519, "grad_norm": 0.8055290579795837, "learning_rate": 1.1936950110797232e-05, "loss": 0.0931, "step": 26253 }, { "epoch": 0.578514490957268, "grad_norm": 0.648486316204071, "learning_rate": 1.1935902141166419e-05, "loss": 0.0669, "step": 26254 }, { "epoch": 0.5785365262467842, "grad_norm": 0.5567507147789001, "learning_rate": 1.19348541871427e-05, "loss": 0.0801, "step": 26255 }, { "epoch": 0.5785585615363004, "grad_norm": 0.6862525939941406, "learning_rate": 1.1933806248731408e-05, "loss": 0.075, "step": 26256 }, { "epoch": 0.5785805968258165, "grad_norm": 0.5183239579200745, "learning_rate": 1.1932758325937884e-05, "loss": 0.0659, "step": 26257 }, { "epoch": 0.5786026321153327, "grad_norm": 0.6104838252067566, "learning_rate": 1.193171041876747e-05, "loss": 0.0622, "step": 26258 }, { "epoch": 0.5786246674048489, "grad_norm": 0.7210668325424194, "learning_rate": 1.1930662527225502e-05, "loss": 0.0517, "step": 26259 }, { "epoch": 0.578646702694365, "grad_norm": 0.4038248360157013, "learning_rate": 1.1929614651317313e-05, "loss": 0.0567, "step": 26260 }, { "epoch": 0.5786687379838812, "grad_norm": 0.5029165744781494, "learning_rate": 1.1928566791048242e-05, "loss": 0.0636, "step": 26261 }, { "epoch": 0.5786907732733974, "grad_norm": 0.5595335960388184, "learning_rate": 1.192751894642363e-05, "loss": 0.0384, "step": 26262 }, { "epoch": 0.5787128085629135, "grad_norm": 0.7528541088104248, "learning_rate": 1.1926471117448813e-05, "loss": 0.0537, "step": 26263 }, { "epoch": 0.5787348438524297, "grad_norm": 0.5301264524459839, "learning_rate": 1.1925423304129118e-05, "loss": 0.0597, "step": 26264 }, { "epoch": 0.5787568791419458, "grad_norm": 0.5124308466911316, "learning_rate": 1.1924375506469899e-05, "loss": 0.0816, "step": 26265 }, { "epoch": 0.578778914431462, "grad_norm": 0.6790350079536438, "learning_rate": 1.1923327724476484e-05, "loss": 0.0706, "step": 26266 }, { "epoch": 0.5788009497209782, "grad_norm": 0.48989561200141907, "learning_rate": 1.1922279958154212e-05, "loss": 0.0525, "step": 26267 }, { "epoch": 0.5788229850104943, "grad_norm": 0.9330282211303711, "learning_rate": 1.1921232207508413e-05, "loss": 0.0892, "step": 26268 }, { "epoch": 0.5788450203000105, "grad_norm": 0.6389526724815369, "learning_rate": 1.1920184472544432e-05, "loss": 0.0759, "step": 26269 }, { "epoch": 0.5788670555895267, "grad_norm": 0.5447437167167664, "learning_rate": 1.1919136753267607e-05, "loss": 0.0701, "step": 26270 }, { "epoch": 0.5788890908790428, "grad_norm": 0.9897208213806152, "learning_rate": 1.1918089049683267e-05, "loss": 0.0993, "step": 26271 }, { "epoch": 0.578911126168559, "grad_norm": 0.9368630647659302, "learning_rate": 1.1917041361796747e-05, "loss": 0.0799, "step": 26272 }, { "epoch": 0.578933161458075, "grad_norm": 1.12721586227417, "learning_rate": 1.1915993689613397e-05, "loss": 0.0839, "step": 26273 }, { "epoch": 0.5789551967475912, "grad_norm": 0.7266512513160706, "learning_rate": 1.1914946033138542e-05, "loss": 0.0781, "step": 26274 }, { "epoch": 0.5789772320371074, "grad_norm": 0.6628829836845398, "learning_rate": 1.1913898392377522e-05, "loss": 0.0792, "step": 26275 }, { "epoch": 0.5789992673266235, "grad_norm": 0.6441993117332458, "learning_rate": 1.191285076733567e-05, "loss": 0.0786, "step": 26276 }, { "epoch": 0.5790213026161397, "grad_norm": 0.6847323775291443, "learning_rate": 1.1911803158018326e-05, "loss": 0.0598, "step": 26277 }, { "epoch": 0.5790433379056559, "grad_norm": 0.37834638357162476, "learning_rate": 1.1910755564430828e-05, "loss": 0.0667, "step": 26278 }, { "epoch": 0.579065373195172, "grad_norm": 0.6547356843948364, "learning_rate": 1.1909707986578504e-05, "loss": 0.0865, "step": 26279 }, { "epoch": 0.5790874084846882, "grad_norm": 0.6372234225273132, "learning_rate": 1.1908660424466691e-05, "loss": 0.0682, "step": 26280 }, { "epoch": 0.5791094437742044, "grad_norm": 0.7001255750656128, "learning_rate": 1.1907612878100734e-05, "loss": 0.0599, "step": 26281 }, { "epoch": 0.5791314790637205, "grad_norm": 0.7704586982727051, "learning_rate": 1.1906565347485963e-05, "loss": 0.0939, "step": 26282 }, { "epoch": 0.5791535143532367, "grad_norm": 0.6382014155387878, "learning_rate": 1.1905517832627713e-05, "loss": 0.0579, "step": 26283 }, { "epoch": 0.5791755496427529, "grad_norm": 0.9772876501083374, "learning_rate": 1.1904470333531316e-05, "loss": 0.0561, "step": 26284 }, { "epoch": 0.579197584932269, "grad_norm": 1.2276554107666016, "learning_rate": 1.1903422850202115e-05, "loss": 0.126, "step": 26285 }, { "epoch": 0.5792196202217852, "grad_norm": 0.5714072585105896, "learning_rate": 1.1902375382645443e-05, "loss": 0.0732, "step": 26286 }, { "epoch": 0.5792416555113014, "grad_norm": 0.519769549369812, "learning_rate": 1.1901327930866627e-05, "loss": 0.0534, "step": 26287 }, { "epoch": 0.5792636908008175, "grad_norm": 0.7373987436294556, "learning_rate": 1.1900280494871018e-05, "loss": 0.0809, "step": 26288 }, { "epoch": 0.5792857260903337, "grad_norm": 0.39619916677474976, "learning_rate": 1.189923307466394e-05, "loss": 0.0693, "step": 26289 }, { "epoch": 0.5793077613798499, "grad_norm": 0.7919506430625916, "learning_rate": 1.1898185670250732e-05, "loss": 0.0897, "step": 26290 }, { "epoch": 0.579329796669366, "grad_norm": 0.5855886936187744, "learning_rate": 1.1897138281636724e-05, "loss": 0.0457, "step": 26291 }, { "epoch": 0.5793518319588822, "grad_norm": 0.7018643021583557, "learning_rate": 1.1896090908827259e-05, "loss": 0.0845, "step": 26292 }, { "epoch": 0.5793738672483983, "grad_norm": 0.4189361333847046, "learning_rate": 1.1895043551827665e-05, "loss": 0.0365, "step": 26293 }, { "epoch": 0.5793959025379145, "grad_norm": 0.5823041200637817, "learning_rate": 1.1893996210643281e-05, "loss": 0.0679, "step": 26294 }, { "epoch": 0.5794179378274307, "grad_norm": 1.062027096748352, "learning_rate": 1.1892948885279433e-05, "loss": 0.0562, "step": 26295 }, { "epoch": 0.5794399731169468, "grad_norm": 0.41985124349594116, "learning_rate": 1.1891901575741469e-05, "loss": 0.0749, "step": 26296 }, { "epoch": 0.579462008406463, "grad_norm": 1.6030696630477905, "learning_rate": 1.1890854282034716e-05, "loss": 0.0918, "step": 26297 }, { "epoch": 0.5794840436959791, "grad_norm": 0.4755040407180786, "learning_rate": 1.1889807004164512e-05, "loss": 0.0719, "step": 26298 }, { "epoch": 0.5795060789854952, "grad_norm": 0.6810846328735352, "learning_rate": 1.1888759742136182e-05, "loss": 0.0557, "step": 26299 }, { "epoch": 0.5795281142750114, "grad_norm": 0.5291537642478943, "learning_rate": 1.1887712495955074e-05, "loss": 0.0626, "step": 26300 }, { "epoch": 0.5795501495645276, "grad_norm": 0.38859349489212036, "learning_rate": 1.1886665265626512e-05, "loss": 0.0699, "step": 26301 }, { "epoch": 0.5795721848540437, "grad_norm": 0.47846493124961853, "learning_rate": 1.1885618051155836e-05, "loss": 0.0614, "step": 26302 }, { "epoch": 0.5795942201435599, "grad_norm": 0.8910395503044128, "learning_rate": 1.188457085254837e-05, "loss": 0.0865, "step": 26303 }, { "epoch": 0.579616255433076, "grad_norm": 0.7331069707870483, "learning_rate": 1.188352366980946e-05, "loss": 0.0424, "step": 26304 }, { "epoch": 0.5796382907225922, "grad_norm": 0.8983004093170166, "learning_rate": 1.1882476502944436e-05, "loss": 0.0774, "step": 26305 }, { "epoch": 0.5796603260121084, "grad_norm": 0.7860511541366577, "learning_rate": 1.1881429351958626e-05, "loss": 0.0608, "step": 26306 }, { "epoch": 0.5796823613016245, "grad_norm": 0.4713990390300751, "learning_rate": 1.1880382216857377e-05, "loss": 0.0775, "step": 26307 }, { "epoch": 0.5797043965911407, "grad_norm": 0.5063431262969971, "learning_rate": 1.1879335097646011e-05, "loss": 0.0782, "step": 26308 }, { "epoch": 0.5797264318806569, "grad_norm": 0.5825364589691162, "learning_rate": 1.1878287994329865e-05, "loss": 0.0679, "step": 26309 }, { "epoch": 0.579748467170173, "grad_norm": 1.4014970064163208, "learning_rate": 1.1877240906914265e-05, "loss": 0.0951, "step": 26310 }, { "epoch": 0.5797705024596892, "grad_norm": 0.7025596499443054, "learning_rate": 1.1876193835404558e-05, "loss": 0.1127, "step": 26311 }, { "epoch": 0.5797925377492054, "grad_norm": 0.9454183578491211, "learning_rate": 1.1875146779806073e-05, "loss": 0.0691, "step": 26312 }, { "epoch": 0.5798145730387215, "grad_norm": 0.4697429835796356, "learning_rate": 1.187409974012414e-05, "loss": 0.0664, "step": 26313 }, { "epoch": 0.5798366083282377, "grad_norm": 0.4949903190135956, "learning_rate": 1.1873052716364085e-05, "loss": 0.0845, "step": 26314 }, { "epoch": 0.5798586436177539, "grad_norm": 0.5356884598731995, "learning_rate": 1.1872005708531259e-05, "loss": 0.0472, "step": 26315 }, { "epoch": 0.57988067890727, "grad_norm": 0.6959514021873474, "learning_rate": 1.1870958716630984e-05, "loss": 0.0732, "step": 26316 }, { "epoch": 0.5799027141967862, "grad_norm": 0.8350553512573242, "learning_rate": 1.1869911740668597e-05, "loss": 0.0779, "step": 26317 }, { "epoch": 0.5799247494863023, "grad_norm": 0.6467996835708618, "learning_rate": 1.1868864780649419e-05, "loss": 0.073, "step": 26318 }, { "epoch": 0.5799467847758185, "grad_norm": 0.4424208104610443, "learning_rate": 1.1867817836578797e-05, "loss": 0.0587, "step": 26319 }, { "epoch": 0.5799688200653347, "grad_norm": 0.5196837186813354, "learning_rate": 1.186677090846206e-05, "loss": 0.0519, "step": 26320 }, { "epoch": 0.5799908553548508, "grad_norm": 0.5664929151535034, "learning_rate": 1.1865723996304537e-05, "loss": 0.0671, "step": 26321 }, { "epoch": 0.580012890644367, "grad_norm": 0.4783453047275543, "learning_rate": 1.1864677100111556e-05, "loss": 0.0517, "step": 26322 }, { "epoch": 0.5800349259338831, "grad_norm": 0.5394039750099182, "learning_rate": 1.1863630219888464e-05, "loss": 0.0663, "step": 26323 }, { "epoch": 0.5800569612233992, "grad_norm": 0.7661659717559814, "learning_rate": 1.1862583355640584e-05, "loss": 0.1293, "step": 26324 }, { "epoch": 0.5800789965129154, "grad_norm": 0.7526950240135193, "learning_rate": 1.1861536507373249e-05, "loss": 0.0604, "step": 26325 }, { "epoch": 0.5801010318024316, "grad_norm": 0.49352434277534485, "learning_rate": 1.1860489675091789e-05, "loss": 0.036, "step": 26326 }, { "epoch": 0.5801230670919477, "grad_norm": 0.8943597674369812, "learning_rate": 1.1859442858801542e-05, "loss": 0.0791, "step": 26327 }, { "epoch": 0.5801451023814639, "grad_norm": 0.5273654460906982, "learning_rate": 1.1858396058507836e-05, "loss": 0.0662, "step": 26328 }, { "epoch": 0.58016713767098, "grad_norm": 0.8230316042900085, "learning_rate": 1.1857349274215998e-05, "loss": 0.0892, "step": 26329 }, { "epoch": 0.5801891729604962, "grad_norm": 0.7432610392570496, "learning_rate": 1.185630250593137e-05, "loss": 0.0956, "step": 26330 }, { "epoch": 0.5802112082500124, "grad_norm": 0.5114355683326721, "learning_rate": 1.1855255753659283e-05, "loss": 0.0786, "step": 26331 }, { "epoch": 0.5802332435395285, "grad_norm": 0.8578360676765442, "learning_rate": 1.185420901740506e-05, "loss": 0.0827, "step": 26332 }, { "epoch": 0.5802552788290447, "grad_norm": 0.7330741882324219, "learning_rate": 1.1853162297174037e-05, "loss": 0.0964, "step": 26333 }, { "epoch": 0.5802773141185609, "grad_norm": 0.6330624222755432, "learning_rate": 1.1852115592971548e-05, "loss": 0.0943, "step": 26334 }, { "epoch": 0.580299349408077, "grad_norm": 0.9645818471908569, "learning_rate": 1.1851068904802923e-05, "loss": 0.0655, "step": 26335 }, { "epoch": 0.5803213846975932, "grad_norm": 0.5127183198928833, "learning_rate": 1.1850022232673493e-05, "loss": 0.0597, "step": 26336 }, { "epoch": 0.5803434199871094, "grad_norm": 0.6326965093612671, "learning_rate": 1.1848975576588583e-05, "loss": 0.0964, "step": 26337 }, { "epoch": 0.5803654552766255, "grad_norm": 0.8729353547096252, "learning_rate": 1.1847928936553536e-05, "loss": 0.0859, "step": 26338 }, { "epoch": 0.5803874905661417, "grad_norm": 0.2384670227766037, "learning_rate": 1.1846882312573676e-05, "loss": 0.0706, "step": 26339 }, { "epoch": 0.5804095258556579, "grad_norm": 0.29241061210632324, "learning_rate": 1.1845835704654336e-05, "loss": 0.0677, "step": 26340 }, { "epoch": 0.580431561145174, "grad_norm": 0.6632505655288696, "learning_rate": 1.1844789112800843e-05, "loss": 0.0687, "step": 26341 }, { "epoch": 0.5804535964346902, "grad_norm": 0.9240792393684387, "learning_rate": 1.1843742537018534e-05, "loss": 0.08, "step": 26342 }, { "epoch": 0.5804756317242064, "grad_norm": 0.7182821035385132, "learning_rate": 1.1842695977312736e-05, "loss": 0.0982, "step": 26343 }, { "epoch": 0.5804976670137225, "grad_norm": 1.0137051343917847, "learning_rate": 1.1841649433688781e-05, "loss": 0.0837, "step": 26344 }, { "epoch": 0.5805197023032387, "grad_norm": 0.9422242641448975, "learning_rate": 1.1840602906151995e-05, "loss": 0.115, "step": 26345 }, { "epoch": 0.5805417375927548, "grad_norm": 0.6344621181488037, "learning_rate": 1.1839556394707716e-05, "loss": 0.0807, "step": 26346 }, { "epoch": 0.580563772882271, "grad_norm": 0.28340375423431396, "learning_rate": 1.1838509899361272e-05, "loss": 0.051, "step": 26347 }, { "epoch": 0.5805858081717871, "grad_norm": 0.49642711877822876, "learning_rate": 1.1837463420117991e-05, "loss": 0.0702, "step": 26348 }, { "epoch": 0.5806078434613032, "grad_norm": 0.7326763868331909, "learning_rate": 1.1836416956983205e-05, "loss": 0.0491, "step": 26349 }, { "epoch": 0.5806298787508194, "grad_norm": 0.5421097278594971, "learning_rate": 1.1835370509962245e-05, "loss": 0.0853, "step": 26350 }, { "epoch": 0.5806519140403356, "grad_norm": 0.18148738145828247, "learning_rate": 1.183432407906044e-05, "loss": 0.0569, "step": 26351 }, { "epoch": 0.5806739493298517, "grad_norm": 0.583395779132843, "learning_rate": 1.1833277664283113e-05, "loss": 0.0508, "step": 26352 }, { "epoch": 0.5806959846193679, "grad_norm": 0.5150005221366882, "learning_rate": 1.1832231265635609e-05, "loss": 0.0461, "step": 26353 }, { "epoch": 0.580718019908884, "grad_norm": 0.6710169315338135, "learning_rate": 1.1831184883123248e-05, "loss": 0.0641, "step": 26354 }, { "epoch": 0.5807400551984002, "grad_norm": 0.3421506881713867, "learning_rate": 1.1830138516751363e-05, "loss": 0.0607, "step": 26355 }, { "epoch": 0.5807620904879164, "grad_norm": 0.865198016166687, "learning_rate": 1.1829092166525278e-05, "loss": 0.0657, "step": 26356 }, { "epoch": 0.5807841257774325, "grad_norm": 0.5837509036064148, "learning_rate": 1.1828045832450332e-05, "loss": 0.0902, "step": 26357 }, { "epoch": 0.5808061610669487, "grad_norm": 0.737053632736206, "learning_rate": 1.1826999514531847e-05, "loss": 0.0643, "step": 26358 }, { "epoch": 0.5808281963564649, "grad_norm": 0.6266496777534485, "learning_rate": 1.1825953212775157e-05, "loss": 0.074, "step": 26359 }, { "epoch": 0.580850231645981, "grad_norm": 0.7964101433753967, "learning_rate": 1.1824906927185584e-05, "loss": 0.0963, "step": 26360 }, { "epoch": 0.5808722669354972, "grad_norm": 0.743958592414856, "learning_rate": 1.1823860657768468e-05, "loss": 0.0566, "step": 26361 }, { "epoch": 0.5808943022250134, "grad_norm": 0.5329320430755615, "learning_rate": 1.182281440452913e-05, "loss": 0.0607, "step": 26362 }, { "epoch": 0.5809163375145295, "grad_norm": 0.8536403179168701, "learning_rate": 1.1821768167472905e-05, "loss": 0.094, "step": 26363 }, { "epoch": 0.5809383728040457, "grad_norm": 0.33093273639678955, "learning_rate": 1.1820721946605115e-05, "loss": 0.0543, "step": 26364 }, { "epoch": 0.5809604080935619, "grad_norm": 0.7089638710021973, "learning_rate": 1.1819675741931096e-05, "loss": 0.0671, "step": 26365 }, { "epoch": 0.580982443383078, "grad_norm": 0.7247648239135742, "learning_rate": 1.1818629553456174e-05, "loss": 0.0805, "step": 26366 }, { "epoch": 0.5810044786725942, "grad_norm": 0.82901930809021, "learning_rate": 1.1817583381185679e-05, "loss": 0.0833, "step": 26367 }, { "epoch": 0.5810265139621104, "grad_norm": 0.7224381566047668, "learning_rate": 1.181653722512493e-05, "loss": 0.0606, "step": 26368 }, { "epoch": 0.5810485492516265, "grad_norm": 0.6204678416252136, "learning_rate": 1.1815491085279271e-05, "loss": 0.094, "step": 26369 }, { "epoch": 0.5810705845411427, "grad_norm": 0.5826531648635864, "learning_rate": 1.1814444961654024e-05, "loss": 0.0599, "step": 26370 }, { "epoch": 0.5810926198306589, "grad_norm": 0.6402029991149902, "learning_rate": 1.1813398854254515e-05, "loss": 0.0452, "step": 26371 }, { "epoch": 0.5811146551201749, "grad_norm": 0.5513655543327332, "learning_rate": 1.1812352763086074e-05, "loss": 0.0726, "step": 26372 }, { "epoch": 0.5811366904096911, "grad_norm": 0.7667865753173828, "learning_rate": 1.1811306688154033e-05, "loss": 0.0598, "step": 26373 }, { "epoch": 0.5811587256992072, "grad_norm": 0.6272519826889038, "learning_rate": 1.1810260629463715e-05, "loss": 0.0427, "step": 26374 }, { "epoch": 0.5811807609887234, "grad_norm": 0.5409440994262695, "learning_rate": 1.1809214587020445e-05, "loss": 0.0773, "step": 26375 }, { "epoch": 0.5812027962782396, "grad_norm": 0.835416316986084, "learning_rate": 1.1808168560829563e-05, "loss": 0.0793, "step": 26376 }, { "epoch": 0.5812248315677557, "grad_norm": 0.6446353197097778, "learning_rate": 1.1807122550896388e-05, "loss": 0.0863, "step": 26377 }, { "epoch": 0.5812468668572719, "grad_norm": 0.519810140132904, "learning_rate": 1.180607655722625e-05, "loss": 0.0778, "step": 26378 }, { "epoch": 0.5812689021467881, "grad_norm": 0.9440295100212097, "learning_rate": 1.1805030579824476e-05, "loss": 0.0776, "step": 26379 }, { "epoch": 0.5812909374363042, "grad_norm": 0.5649863481521606, "learning_rate": 1.1803984618696397e-05, "loss": 0.0821, "step": 26380 }, { "epoch": 0.5813129727258204, "grad_norm": 0.4046870768070221, "learning_rate": 1.1802938673847338e-05, "loss": 0.0528, "step": 26381 }, { "epoch": 0.5813350080153366, "grad_norm": 0.7048248052597046, "learning_rate": 1.1801892745282628e-05, "loss": 0.0747, "step": 26382 }, { "epoch": 0.5813570433048527, "grad_norm": 0.4981069564819336, "learning_rate": 1.1800846833007584e-05, "loss": 0.0898, "step": 26383 }, { "epoch": 0.5813790785943689, "grad_norm": 0.5749557614326477, "learning_rate": 1.179980093702755e-05, "loss": 0.0508, "step": 26384 }, { "epoch": 0.581401113883885, "grad_norm": 0.8924322128295898, "learning_rate": 1.1798755057347847e-05, "loss": 0.0847, "step": 26385 }, { "epoch": 0.5814231491734012, "grad_norm": 0.7481054067611694, "learning_rate": 1.17977091939738e-05, "loss": 0.0909, "step": 26386 }, { "epoch": 0.5814451844629174, "grad_norm": 0.7245464324951172, "learning_rate": 1.1796663346910735e-05, "loss": 0.0645, "step": 26387 }, { "epoch": 0.5814672197524335, "grad_norm": 0.351173996925354, "learning_rate": 1.1795617516163983e-05, "loss": 0.0607, "step": 26388 }, { "epoch": 0.5814892550419497, "grad_norm": 0.8333442211151123, "learning_rate": 1.179457170173887e-05, "loss": 0.0917, "step": 26389 }, { "epoch": 0.5815112903314659, "grad_norm": 0.518051266670227, "learning_rate": 1.1793525903640723e-05, "loss": 0.1357, "step": 26390 }, { "epoch": 0.581533325620982, "grad_norm": 0.6183807253837585, "learning_rate": 1.1792480121874861e-05, "loss": 0.0746, "step": 26391 }, { "epoch": 0.5815553609104982, "grad_norm": 0.6620652675628662, "learning_rate": 1.1791434356446624e-05, "loss": 0.0844, "step": 26392 }, { "epoch": 0.5815773962000144, "grad_norm": 0.6906409859657288, "learning_rate": 1.1790388607361333e-05, "loss": 0.0934, "step": 26393 }, { "epoch": 0.5815994314895305, "grad_norm": 0.5570943355560303, "learning_rate": 1.1789342874624314e-05, "loss": 0.0879, "step": 26394 }, { "epoch": 0.5816214667790467, "grad_norm": 0.5241018533706665, "learning_rate": 1.178829715824089e-05, "loss": 0.066, "step": 26395 }, { "epoch": 0.5816435020685629, "grad_norm": 0.5256737470626831, "learning_rate": 1.1787251458216394e-05, "loss": 0.0887, "step": 26396 }, { "epoch": 0.5816655373580789, "grad_norm": 0.48518434166908264, "learning_rate": 1.1786205774556149e-05, "loss": 0.0584, "step": 26397 }, { "epoch": 0.5816875726475951, "grad_norm": 0.6581026315689087, "learning_rate": 1.1785160107265473e-05, "loss": 0.0552, "step": 26398 }, { "epoch": 0.5817096079371112, "grad_norm": 0.6611062288284302, "learning_rate": 1.178411445634971e-05, "loss": 0.0966, "step": 26399 }, { "epoch": 0.5817316432266274, "grad_norm": 0.555547833442688, "learning_rate": 1.1783068821814176e-05, "loss": 0.059, "step": 26400 }, { "epoch": 0.5817536785161436, "grad_norm": 0.6888366937637329, "learning_rate": 1.1782023203664196e-05, "loss": 0.0768, "step": 26401 }, { "epoch": 0.5817757138056597, "grad_norm": 0.29253262281417847, "learning_rate": 1.1780977601905097e-05, "loss": 0.0447, "step": 26402 }, { "epoch": 0.5817977490951759, "grad_norm": 0.46523723006248474, "learning_rate": 1.1779932016542206e-05, "loss": 0.0643, "step": 26403 }, { "epoch": 0.5818197843846921, "grad_norm": 0.5832362174987793, "learning_rate": 1.1778886447580847e-05, "loss": 0.0666, "step": 26404 }, { "epoch": 0.5818418196742082, "grad_norm": 0.3985549211502075, "learning_rate": 1.177784089502635e-05, "loss": 0.0721, "step": 26405 }, { "epoch": 0.5818638549637244, "grad_norm": 0.5931625366210938, "learning_rate": 1.177679535888403e-05, "loss": 0.0678, "step": 26406 }, { "epoch": 0.5818858902532406, "grad_norm": 0.9672078490257263, "learning_rate": 1.1775749839159223e-05, "loss": 0.0822, "step": 26407 }, { "epoch": 0.5819079255427567, "grad_norm": 0.33064061403274536, "learning_rate": 1.1774704335857254e-05, "loss": 0.0844, "step": 26408 }, { "epoch": 0.5819299608322729, "grad_norm": 0.5450313091278076, "learning_rate": 1.1773658848983444e-05, "loss": 0.104, "step": 26409 }, { "epoch": 0.581951996121789, "grad_norm": 0.7384366989135742, "learning_rate": 1.1772613378543117e-05, "loss": 0.0692, "step": 26410 }, { "epoch": 0.5819740314113052, "grad_norm": 0.5808643102645874, "learning_rate": 1.1771567924541604e-05, "loss": 0.0887, "step": 26411 }, { "epoch": 0.5819960667008214, "grad_norm": 1.1080371141433716, "learning_rate": 1.1770522486984228e-05, "loss": 0.0681, "step": 26412 }, { "epoch": 0.5820181019903375, "grad_norm": 0.7729416489601135, "learning_rate": 1.1769477065876309e-05, "loss": 0.0924, "step": 26413 }, { "epoch": 0.5820401372798537, "grad_norm": 0.37067991495132446, "learning_rate": 1.1768431661223172e-05, "loss": 0.0568, "step": 26414 }, { "epoch": 0.5820621725693699, "grad_norm": 0.44697684049606323, "learning_rate": 1.1767386273030153e-05, "loss": 0.0778, "step": 26415 }, { "epoch": 0.582084207858886, "grad_norm": 0.43871355056762695, "learning_rate": 1.1766340901302567e-05, "loss": 0.0522, "step": 26416 }, { "epoch": 0.5821062431484022, "grad_norm": 0.4751131236553192, "learning_rate": 1.1765295546045736e-05, "loss": 0.0765, "step": 26417 }, { "epoch": 0.5821282784379184, "grad_norm": 0.4855894446372986, "learning_rate": 1.1764250207264996e-05, "loss": 0.063, "step": 26418 }, { "epoch": 0.5821503137274345, "grad_norm": 0.36529064178466797, "learning_rate": 1.1763204884965665e-05, "loss": 0.0442, "step": 26419 }, { "epoch": 0.5821723490169507, "grad_norm": 0.5350740551948547, "learning_rate": 1.1762159579153065e-05, "loss": 0.0584, "step": 26420 }, { "epoch": 0.5821943843064669, "grad_norm": 0.5457960367202759, "learning_rate": 1.1761114289832518e-05, "loss": 0.0822, "step": 26421 }, { "epoch": 0.5822164195959829, "grad_norm": 0.6140978336334229, "learning_rate": 1.1760069017009358e-05, "loss": 0.0994, "step": 26422 }, { "epoch": 0.5822384548854991, "grad_norm": 0.5223825573921204, "learning_rate": 1.1759023760688904e-05, "loss": 0.0701, "step": 26423 }, { "epoch": 0.5822604901750152, "grad_norm": 0.6660559177398682, "learning_rate": 1.175797852087648e-05, "loss": 0.0892, "step": 26424 }, { "epoch": 0.5822825254645314, "grad_norm": 0.5025088787078857, "learning_rate": 1.1756933297577404e-05, "loss": 0.0527, "step": 26425 }, { "epoch": 0.5823045607540476, "grad_norm": 0.4010156989097595, "learning_rate": 1.1755888090797012e-05, "loss": 0.0565, "step": 26426 }, { "epoch": 0.5823265960435637, "grad_norm": 0.5652672648429871, "learning_rate": 1.175484290054062e-05, "loss": 0.066, "step": 26427 }, { "epoch": 0.5823486313330799, "grad_norm": 0.6466830372810364, "learning_rate": 1.1753797726813557e-05, "loss": 0.0855, "step": 26428 }, { "epoch": 0.5823706666225961, "grad_norm": 0.39165353775024414, "learning_rate": 1.1752752569621135e-05, "loss": 0.0702, "step": 26429 }, { "epoch": 0.5823927019121122, "grad_norm": 0.38382381200790405, "learning_rate": 1.1751707428968691e-05, "loss": 0.0648, "step": 26430 }, { "epoch": 0.5824147372016284, "grad_norm": 0.5620203018188477, "learning_rate": 1.1750662304861542e-05, "loss": 0.0626, "step": 26431 }, { "epoch": 0.5824367724911446, "grad_norm": 0.595984160900116, "learning_rate": 1.1749617197305014e-05, "loss": 0.0573, "step": 26432 }, { "epoch": 0.5824588077806607, "grad_norm": 0.7222681045532227, "learning_rate": 1.1748572106304424e-05, "loss": 0.0613, "step": 26433 }, { "epoch": 0.5824808430701769, "grad_norm": 0.7619910836219788, "learning_rate": 1.1747527031865101e-05, "loss": 0.0637, "step": 26434 }, { "epoch": 0.582502878359693, "grad_norm": 0.5896205306053162, "learning_rate": 1.1746481973992371e-05, "loss": 0.0441, "step": 26435 }, { "epoch": 0.5825249136492092, "grad_norm": 0.6590976119041443, "learning_rate": 1.174543693269155e-05, "loss": 0.0711, "step": 26436 }, { "epoch": 0.5825469489387254, "grad_norm": 0.7231734991073608, "learning_rate": 1.1744391907967965e-05, "loss": 0.0768, "step": 26437 }, { "epoch": 0.5825689842282415, "grad_norm": 0.6400940418243408, "learning_rate": 1.1743346899826936e-05, "loss": 0.0715, "step": 26438 }, { "epoch": 0.5825910195177577, "grad_norm": 0.6642577648162842, "learning_rate": 1.174230190827379e-05, "loss": 0.0504, "step": 26439 }, { "epoch": 0.5826130548072739, "grad_norm": 0.691053032875061, "learning_rate": 1.1741256933313841e-05, "loss": 0.0686, "step": 26440 }, { "epoch": 0.58263509009679, "grad_norm": 0.27740347385406494, "learning_rate": 1.1740211974952424e-05, "loss": 0.0725, "step": 26441 }, { "epoch": 0.5826571253863062, "grad_norm": 0.5846413969993591, "learning_rate": 1.1739167033194856e-05, "loss": 0.0775, "step": 26442 }, { "epoch": 0.5826791606758224, "grad_norm": 0.5560669898986816, "learning_rate": 1.173812210804646e-05, "loss": 0.0485, "step": 26443 }, { "epoch": 0.5827011959653385, "grad_norm": 0.49773144721984863, "learning_rate": 1.1737077199512551e-05, "loss": 0.0635, "step": 26444 }, { "epoch": 0.5827232312548547, "grad_norm": 0.5286538600921631, "learning_rate": 1.1736032307598463e-05, "loss": 0.0523, "step": 26445 }, { "epoch": 0.5827452665443708, "grad_norm": 0.5259466767311096, "learning_rate": 1.1734987432309512e-05, "loss": 0.0809, "step": 26446 }, { "epoch": 0.5827673018338869, "grad_norm": 0.5149869322776794, "learning_rate": 1.1733942573651022e-05, "loss": 0.0645, "step": 26447 }, { "epoch": 0.5827893371234031, "grad_norm": 0.6010295748710632, "learning_rate": 1.1732897731628304e-05, "loss": 0.0832, "step": 26448 }, { "epoch": 0.5828113724129192, "grad_norm": 0.5727414488792419, "learning_rate": 1.1731852906246699e-05, "loss": 0.0755, "step": 26449 }, { "epoch": 0.5828334077024354, "grad_norm": 0.3804965019226074, "learning_rate": 1.1730808097511519e-05, "loss": 0.064, "step": 26450 }, { "epoch": 0.5828554429919516, "grad_norm": 0.41342493891716003, "learning_rate": 1.1729763305428086e-05, "loss": 0.0578, "step": 26451 }, { "epoch": 0.5828774782814677, "grad_norm": 1.005646824836731, "learning_rate": 1.172871853000172e-05, "loss": 0.0803, "step": 26452 }, { "epoch": 0.5828995135709839, "grad_norm": 0.4880230724811554, "learning_rate": 1.1727673771237748e-05, "loss": 0.0881, "step": 26453 }, { "epoch": 0.5829215488605001, "grad_norm": 0.8344451785087585, "learning_rate": 1.1726629029141487e-05, "loss": 0.091, "step": 26454 }, { "epoch": 0.5829435841500162, "grad_norm": 0.4651375412940979, "learning_rate": 1.172558430371826e-05, "loss": 0.0581, "step": 26455 }, { "epoch": 0.5829656194395324, "grad_norm": 0.7137784361839294, "learning_rate": 1.1724539594973384e-05, "loss": 0.08, "step": 26456 }, { "epoch": 0.5829876547290486, "grad_norm": 0.4207805395126343, "learning_rate": 1.1723494902912188e-05, "loss": 0.0633, "step": 26457 }, { "epoch": 0.5830096900185647, "grad_norm": 0.3976367115974426, "learning_rate": 1.172245022753999e-05, "loss": 0.0768, "step": 26458 }, { "epoch": 0.5830317253080809, "grad_norm": 0.662994384765625, "learning_rate": 1.1721405568862111e-05, "loss": 0.0526, "step": 26459 }, { "epoch": 0.5830537605975971, "grad_norm": 0.7169176340103149, "learning_rate": 1.1720360926883869e-05, "loss": 0.0783, "step": 26460 }, { "epoch": 0.5830757958871132, "grad_norm": 0.7287237644195557, "learning_rate": 1.1719316301610589e-05, "loss": 0.0691, "step": 26461 }, { "epoch": 0.5830978311766294, "grad_norm": 0.45942971110343933, "learning_rate": 1.1718271693047592e-05, "loss": 0.0725, "step": 26462 }, { "epoch": 0.5831198664661456, "grad_norm": 0.4465416669845581, "learning_rate": 1.1717227101200189e-05, "loss": 0.0719, "step": 26463 }, { "epoch": 0.5831419017556617, "grad_norm": 0.25969621539115906, "learning_rate": 1.1716182526073716e-05, "loss": 0.0487, "step": 26464 }, { "epoch": 0.5831639370451779, "grad_norm": 0.5971487164497375, "learning_rate": 1.1715137967673487e-05, "loss": 0.0671, "step": 26465 }, { "epoch": 0.583185972334694, "grad_norm": 0.6585903763771057, "learning_rate": 1.171409342600482e-05, "loss": 0.0898, "step": 26466 }, { "epoch": 0.5832080076242102, "grad_norm": 0.6691802144050598, "learning_rate": 1.1713048901073035e-05, "loss": 0.1047, "step": 26467 }, { "epoch": 0.5832300429137264, "grad_norm": 0.6642776727676392, "learning_rate": 1.1712004392883458e-05, "loss": 0.0782, "step": 26468 }, { "epoch": 0.5832520782032425, "grad_norm": 0.8382343053817749, "learning_rate": 1.1710959901441405e-05, "loss": 0.0694, "step": 26469 }, { "epoch": 0.5832741134927587, "grad_norm": 0.6152848601341248, "learning_rate": 1.1709915426752198e-05, "loss": 0.0677, "step": 26470 }, { "epoch": 0.5832961487822748, "grad_norm": 0.8535149097442627, "learning_rate": 1.170887096882115e-05, "loss": 0.1098, "step": 26471 }, { "epoch": 0.5833181840717909, "grad_norm": 0.5172783136367798, "learning_rate": 1.1707826527653592e-05, "loss": 0.0656, "step": 26472 }, { "epoch": 0.5833402193613071, "grad_norm": 0.5225508809089661, "learning_rate": 1.170678210325484e-05, "loss": 0.054, "step": 26473 }, { "epoch": 0.5833622546508233, "grad_norm": 0.5465912222862244, "learning_rate": 1.1705737695630212e-05, "loss": 0.0461, "step": 26474 }, { "epoch": 0.5833842899403394, "grad_norm": 0.4692024290561676, "learning_rate": 1.1704693304785025e-05, "loss": 0.0621, "step": 26475 }, { "epoch": 0.5834063252298556, "grad_norm": 0.624641478061676, "learning_rate": 1.1703648930724606e-05, "loss": 0.0543, "step": 26476 }, { "epoch": 0.5834283605193717, "grad_norm": 0.6390544772148132, "learning_rate": 1.170260457345427e-05, "loss": 0.0843, "step": 26477 }, { "epoch": 0.5834503958088879, "grad_norm": 0.5102810263633728, "learning_rate": 1.1701560232979338e-05, "loss": 0.0564, "step": 26478 }, { "epoch": 0.5834724310984041, "grad_norm": 0.7685039639472961, "learning_rate": 1.170051590930512e-05, "loss": 0.0721, "step": 26479 }, { "epoch": 0.5834944663879202, "grad_norm": 0.5524777173995972, "learning_rate": 1.1699471602436953e-05, "loss": 0.0768, "step": 26480 }, { "epoch": 0.5835165016774364, "grad_norm": 0.6379274725914001, "learning_rate": 1.1698427312380145e-05, "loss": 0.0675, "step": 26481 }, { "epoch": 0.5835385369669526, "grad_norm": 0.7458392381668091, "learning_rate": 1.169738303914002e-05, "loss": 0.0757, "step": 26482 }, { "epoch": 0.5835605722564687, "grad_norm": 0.9555473923683167, "learning_rate": 1.1696338782721887e-05, "loss": 0.0847, "step": 26483 }, { "epoch": 0.5835826075459849, "grad_norm": 0.4673769772052765, "learning_rate": 1.1695294543131077e-05, "loss": 0.057, "step": 26484 }, { "epoch": 0.5836046428355011, "grad_norm": 0.7915361523628235, "learning_rate": 1.1694250320372905e-05, "loss": 0.0603, "step": 26485 }, { "epoch": 0.5836266781250172, "grad_norm": 0.5589632391929626, "learning_rate": 1.1693206114452682e-05, "loss": 0.0655, "step": 26486 }, { "epoch": 0.5836487134145334, "grad_norm": 0.575592577457428, "learning_rate": 1.1692161925375739e-05, "loss": 0.0742, "step": 26487 }, { "epoch": 0.5836707487040496, "grad_norm": 0.5145832300186157, "learning_rate": 1.1691117753147389e-05, "loss": 0.0839, "step": 26488 }, { "epoch": 0.5836927839935657, "grad_norm": 0.8042338490486145, "learning_rate": 1.1690073597772951e-05, "loss": 0.0797, "step": 26489 }, { "epoch": 0.5837148192830819, "grad_norm": 0.7957267761230469, "learning_rate": 1.1689029459257742e-05, "loss": 0.0741, "step": 26490 }, { "epoch": 0.583736854572598, "grad_norm": 0.6970858573913574, "learning_rate": 1.1687985337607081e-05, "loss": 0.0718, "step": 26491 }, { "epoch": 0.5837588898621142, "grad_norm": 0.8108178377151489, "learning_rate": 1.168694123282629e-05, "loss": 0.0733, "step": 26492 }, { "epoch": 0.5837809251516304, "grad_norm": 0.43106821179389954, "learning_rate": 1.1685897144920682e-05, "loss": 0.0564, "step": 26493 }, { "epoch": 0.5838029604411465, "grad_norm": 0.9485469460487366, "learning_rate": 1.1684853073895571e-05, "loss": 0.0897, "step": 26494 }, { "epoch": 0.5838249957306627, "grad_norm": 0.72236168384552, "learning_rate": 1.1683809019756287e-05, "loss": 0.0972, "step": 26495 }, { "epoch": 0.5838470310201788, "grad_norm": 0.43564480543136597, "learning_rate": 1.1682764982508143e-05, "loss": 0.0765, "step": 26496 }, { "epoch": 0.5838690663096949, "grad_norm": 0.7924432158470154, "learning_rate": 1.1681720962156455e-05, "loss": 0.0605, "step": 26497 }, { "epoch": 0.5838911015992111, "grad_norm": 0.7649052143096924, "learning_rate": 1.168067695870654e-05, "loss": 0.0744, "step": 26498 }, { "epoch": 0.5839131368887273, "grad_norm": 0.7680648565292358, "learning_rate": 1.1679632972163718e-05, "loss": 0.1071, "step": 26499 }, { "epoch": 0.5839351721782434, "grad_norm": 0.5444563627243042, "learning_rate": 1.1678589002533306e-05, "loss": 0.0595, "step": 26500 }, { "epoch": 0.5839572074677596, "grad_norm": 0.5260304808616638, "learning_rate": 1.1677545049820622e-05, "loss": 0.0833, "step": 26501 }, { "epoch": 0.5839792427572758, "grad_norm": 0.551483154296875, "learning_rate": 1.1676501114030977e-05, "loss": 0.098, "step": 26502 }, { "epoch": 0.5840012780467919, "grad_norm": 1.0298888683319092, "learning_rate": 1.16754571951697e-05, "loss": 0.0789, "step": 26503 }, { "epoch": 0.5840233133363081, "grad_norm": 0.5468599200248718, "learning_rate": 1.1674413293242102e-05, "loss": 0.0751, "step": 26504 }, { "epoch": 0.5840453486258242, "grad_norm": 0.5537846684455872, "learning_rate": 1.16733694082535e-05, "loss": 0.0386, "step": 26505 }, { "epoch": 0.5840673839153404, "grad_norm": 0.721187949180603, "learning_rate": 1.167232554020921e-05, "loss": 0.0919, "step": 26506 }, { "epoch": 0.5840894192048566, "grad_norm": 0.6738752126693726, "learning_rate": 1.1671281689114551e-05, "loss": 0.0662, "step": 26507 }, { "epoch": 0.5841114544943727, "grad_norm": 0.6043553948402405, "learning_rate": 1.1670237854974842e-05, "loss": 0.0986, "step": 26508 }, { "epoch": 0.5841334897838889, "grad_norm": 0.7137427926063538, "learning_rate": 1.1669194037795388e-05, "loss": 0.0646, "step": 26509 }, { "epoch": 0.5841555250734051, "grad_norm": 0.8161529302597046, "learning_rate": 1.1668150237581523e-05, "loss": 0.0554, "step": 26510 }, { "epoch": 0.5841775603629212, "grad_norm": 0.639976978302002, "learning_rate": 1.1667106454338556e-05, "loss": 0.0534, "step": 26511 }, { "epoch": 0.5841995956524374, "grad_norm": 0.485193133354187, "learning_rate": 1.1666062688071803e-05, "loss": 0.0816, "step": 26512 }, { "epoch": 0.5842216309419536, "grad_norm": 0.561998188495636, "learning_rate": 1.1665018938786577e-05, "loss": 0.0687, "step": 26513 }, { "epoch": 0.5842436662314697, "grad_norm": 0.5784890651702881, "learning_rate": 1.1663975206488202e-05, "loss": 0.0672, "step": 26514 }, { "epoch": 0.5842657015209859, "grad_norm": 0.6722127199172974, "learning_rate": 1.1662931491181986e-05, "loss": 0.0646, "step": 26515 }, { "epoch": 0.5842877368105021, "grad_norm": 0.5935181379318237, "learning_rate": 1.1661887792873253e-05, "loss": 0.0667, "step": 26516 }, { "epoch": 0.5843097721000182, "grad_norm": 0.6717353463172913, "learning_rate": 1.166084411156731e-05, "loss": 0.0907, "step": 26517 }, { "epoch": 0.5843318073895344, "grad_norm": 0.5847674608230591, "learning_rate": 1.1659800447269481e-05, "loss": 0.0389, "step": 26518 }, { "epoch": 0.5843538426790506, "grad_norm": 0.9789474606513977, "learning_rate": 1.1658756799985084e-05, "loss": 0.0978, "step": 26519 }, { "epoch": 0.5843758779685667, "grad_norm": 0.6370474100112915, "learning_rate": 1.1657713169719428e-05, "loss": 0.0885, "step": 26520 }, { "epoch": 0.5843979132580828, "grad_norm": 0.722075879573822, "learning_rate": 1.1656669556477829e-05, "loss": 0.0953, "step": 26521 }, { "epoch": 0.5844199485475989, "grad_norm": 0.7947981953620911, "learning_rate": 1.1655625960265607e-05, "loss": 0.0607, "step": 26522 }, { "epoch": 0.5844419838371151, "grad_norm": 0.3763425052165985, "learning_rate": 1.1654582381088076e-05, "loss": 0.052, "step": 26523 }, { "epoch": 0.5844640191266313, "grad_norm": 0.7427366375923157, "learning_rate": 1.165353881895055e-05, "loss": 0.0884, "step": 26524 }, { "epoch": 0.5844860544161474, "grad_norm": 0.6683576703071594, "learning_rate": 1.1652495273858337e-05, "loss": 0.0822, "step": 26525 }, { "epoch": 0.5845080897056636, "grad_norm": 0.8258944749832153, "learning_rate": 1.165145174581677e-05, "loss": 0.0814, "step": 26526 }, { "epoch": 0.5845301249951798, "grad_norm": 0.6028299331665039, "learning_rate": 1.1650408234831156e-05, "loss": 0.0932, "step": 26527 }, { "epoch": 0.5845521602846959, "grad_norm": 0.9905259609222412, "learning_rate": 1.1649364740906807e-05, "loss": 0.1111, "step": 26528 }, { "epoch": 0.5845741955742121, "grad_norm": 0.7242304682731628, "learning_rate": 1.1648321264049034e-05, "loss": 0.0544, "step": 26529 }, { "epoch": 0.5845962308637283, "grad_norm": 0.4066174328327179, "learning_rate": 1.1647277804263167e-05, "loss": 0.0822, "step": 26530 }, { "epoch": 0.5846182661532444, "grad_norm": 0.605484664440155, "learning_rate": 1.164623436155451e-05, "loss": 0.0622, "step": 26531 }, { "epoch": 0.5846403014427606, "grad_norm": 0.548206627368927, "learning_rate": 1.1645190935928373e-05, "loss": 0.111, "step": 26532 }, { "epoch": 0.5846623367322767, "grad_norm": 0.6789886951446533, "learning_rate": 1.1644147527390085e-05, "loss": 0.0885, "step": 26533 }, { "epoch": 0.5846843720217929, "grad_norm": 0.38815465569496155, "learning_rate": 1.164310413594495e-05, "loss": 0.048, "step": 26534 }, { "epoch": 0.5847064073113091, "grad_norm": 0.5474706292152405, "learning_rate": 1.164206076159829e-05, "loss": 0.0665, "step": 26535 }, { "epoch": 0.5847284426008252, "grad_norm": 0.38159963488578796, "learning_rate": 1.1641017404355405e-05, "loss": 0.0542, "step": 26536 }, { "epoch": 0.5847504778903414, "grad_norm": 0.7558639049530029, "learning_rate": 1.1639974064221629e-05, "loss": 0.0516, "step": 26537 }, { "epoch": 0.5847725131798576, "grad_norm": 0.45330649614334106, "learning_rate": 1.1638930741202266e-05, "loss": 0.1025, "step": 26538 }, { "epoch": 0.5847945484693737, "grad_norm": 0.3870031237602234, "learning_rate": 1.163788743530263e-05, "loss": 0.0725, "step": 26539 }, { "epoch": 0.5848165837588899, "grad_norm": 0.8711735010147095, "learning_rate": 1.1636844146528035e-05, "loss": 0.0562, "step": 26540 }, { "epoch": 0.5848386190484061, "grad_norm": 0.479317307472229, "learning_rate": 1.1635800874883797e-05, "loss": 0.059, "step": 26541 }, { "epoch": 0.5848606543379222, "grad_norm": 0.5455420017242432, "learning_rate": 1.163475762037523e-05, "loss": 0.0699, "step": 26542 }, { "epoch": 0.5848826896274384, "grad_norm": 0.42775267362594604, "learning_rate": 1.1633714383007648e-05, "loss": 0.0696, "step": 26543 }, { "epoch": 0.5849047249169546, "grad_norm": 0.5990744233131409, "learning_rate": 1.1632671162786356e-05, "loss": 0.0533, "step": 26544 }, { "epoch": 0.5849267602064706, "grad_norm": 0.6700082421302795, "learning_rate": 1.1631627959716682e-05, "loss": 0.0725, "step": 26545 }, { "epoch": 0.5849487954959868, "grad_norm": 0.8786799311637878, "learning_rate": 1.1630584773803935e-05, "loss": 0.0552, "step": 26546 }, { "epoch": 0.5849708307855029, "grad_norm": 0.5786840915679932, "learning_rate": 1.1629541605053423e-05, "loss": 0.0608, "step": 26547 }, { "epoch": 0.5849928660750191, "grad_norm": 0.9086062908172607, "learning_rate": 1.1628498453470464e-05, "loss": 0.0653, "step": 26548 }, { "epoch": 0.5850149013645353, "grad_norm": 1.1349071264266968, "learning_rate": 1.1627455319060371e-05, "loss": 0.0641, "step": 26549 }, { "epoch": 0.5850369366540514, "grad_norm": 0.8164853453636169, "learning_rate": 1.1626412201828458e-05, "loss": 0.0808, "step": 26550 }, { "epoch": 0.5850589719435676, "grad_norm": 0.41736841201782227, "learning_rate": 1.1625369101780029e-05, "loss": 0.0634, "step": 26551 }, { "epoch": 0.5850810072330838, "grad_norm": 0.6186199188232422, "learning_rate": 1.1624326018920413e-05, "loss": 0.0593, "step": 26552 }, { "epoch": 0.5851030425225999, "grad_norm": 0.6749297380447388, "learning_rate": 1.1623282953254913e-05, "loss": 0.0957, "step": 26553 }, { "epoch": 0.5851250778121161, "grad_norm": 0.3756633996963501, "learning_rate": 1.1622239904788845e-05, "loss": 0.0792, "step": 26554 }, { "epoch": 0.5851471131016323, "grad_norm": 0.45421290397644043, "learning_rate": 1.1621196873527515e-05, "loss": 0.0663, "step": 26555 }, { "epoch": 0.5851691483911484, "grad_norm": 1.288993239402771, "learning_rate": 1.1620153859476247e-05, "loss": 0.0859, "step": 26556 }, { "epoch": 0.5851911836806646, "grad_norm": 0.6187832355499268, "learning_rate": 1.1619110862640343e-05, "loss": 0.0824, "step": 26557 }, { "epoch": 0.5852132189701807, "grad_norm": 0.4824051558971405, "learning_rate": 1.1618067883025125e-05, "loss": 0.0324, "step": 26558 }, { "epoch": 0.5852352542596969, "grad_norm": 0.7975952625274658, "learning_rate": 1.1617024920635894e-05, "loss": 0.0778, "step": 26559 }, { "epoch": 0.5852572895492131, "grad_norm": 0.6144377589225769, "learning_rate": 1.1615981975477975e-05, "loss": 0.041, "step": 26560 }, { "epoch": 0.5852793248387292, "grad_norm": 0.8115226030349731, "learning_rate": 1.1614939047556675e-05, "loss": 0.0688, "step": 26561 }, { "epoch": 0.5853013601282454, "grad_norm": 0.3853030204772949, "learning_rate": 1.1613896136877304e-05, "loss": 0.06, "step": 26562 }, { "epoch": 0.5853233954177616, "grad_norm": 0.7874189615249634, "learning_rate": 1.1612853243445174e-05, "loss": 0.0715, "step": 26563 }, { "epoch": 0.5853454307072777, "grad_norm": 0.7754642963409424, "learning_rate": 1.16118103672656e-05, "loss": 0.0663, "step": 26564 }, { "epoch": 0.5853674659967939, "grad_norm": 0.24888740479946136, "learning_rate": 1.1610767508343893e-05, "loss": 0.0705, "step": 26565 }, { "epoch": 0.5853895012863101, "grad_norm": 0.46357253193855286, "learning_rate": 1.1609724666685366e-05, "loss": 0.0703, "step": 26566 }, { "epoch": 0.5854115365758262, "grad_norm": 0.7372409105300903, "learning_rate": 1.1608681842295323e-05, "loss": 0.0885, "step": 26567 }, { "epoch": 0.5854335718653424, "grad_norm": 0.605137288570404, "learning_rate": 1.160763903517909e-05, "loss": 0.0547, "step": 26568 }, { "epoch": 0.5854556071548586, "grad_norm": 0.7148146629333496, "learning_rate": 1.1606596245341967e-05, "loss": 0.0779, "step": 26569 }, { "epoch": 0.5854776424443746, "grad_norm": 0.8173667788505554, "learning_rate": 1.160555347278927e-05, "loss": 0.0742, "step": 26570 }, { "epoch": 0.5854996777338908, "grad_norm": 0.2191147804260254, "learning_rate": 1.1604510717526308e-05, "loss": 0.053, "step": 26571 }, { "epoch": 0.5855217130234069, "grad_norm": 0.4406298100948334, "learning_rate": 1.1603467979558395e-05, "loss": 0.0416, "step": 26572 }, { "epoch": 0.5855437483129231, "grad_norm": 0.3292156755924225, "learning_rate": 1.160242525889084e-05, "loss": 0.0451, "step": 26573 }, { "epoch": 0.5855657836024393, "grad_norm": 0.3199172616004944, "learning_rate": 1.1601382555528952e-05, "loss": 0.0634, "step": 26574 }, { "epoch": 0.5855878188919554, "grad_norm": 0.8936137557029724, "learning_rate": 1.160033986947805e-05, "loss": 0.0776, "step": 26575 }, { "epoch": 0.5856098541814716, "grad_norm": 0.6277233958244324, "learning_rate": 1.1599297200743438e-05, "loss": 0.0665, "step": 26576 }, { "epoch": 0.5856318894709878, "grad_norm": 0.41375452280044556, "learning_rate": 1.159825454933043e-05, "loss": 0.0549, "step": 26577 }, { "epoch": 0.5856539247605039, "grad_norm": 0.7632766962051392, "learning_rate": 1.1597211915244334e-05, "loss": 0.0918, "step": 26578 }, { "epoch": 0.5856759600500201, "grad_norm": 0.7492431998252869, "learning_rate": 1.1596169298490466e-05, "loss": 0.0857, "step": 26579 }, { "epoch": 0.5856979953395363, "grad_norm": 0.6900736093521118, "learning_rate": 1.1595126699074131e-05, "loss": 0.0664, "step": 26580 }, { "epoch": 0.5857200306290524, "grad_norm": 0.6409854888916016, "learning_rate": 1.1594084117000643e-05, "loss": 0.068, "step": 26581 }, { "epoch": 0.5857420659185686, "grad_norm": 0.7680370211601257, "learning_rate": 1.1593041552275305e-05, "loss": 0.0789, "step": 26582 }, { "epoch": 0.5857641012080848, "grad_norm": 0.7216267585754395, "learning_rate": 1.1591999004903437e-05, "loss": 0.0803, "step": 26583 }, { "epoch": 0.5857861364976009, "grad_norm": 0.6386614441871643, "learning_rate": 1.1590956474890348e-05, "loss": 0.1034, "step": 26584 }, { "epoch": 0.5858081717871171, "grad_norm": 0.5069708824157715, "learning_rate": 1.1589913962241345e-05, "loss": 0.0545, "step": 26585 }, { "epoch": 0.5858302070766332, "grad_norm": 0.6656899452209473, "learning_rate": 1.1588871466961736e-05, "loss": 0.0689, "step": 26586 }, { "epoch": 0.5858522423661494, "grad_norm": 0.4605351984500885, "learning_rate": 1.1587828989056836e-05, "loss": 0.0652, "step": 26587 }, { "epoch": 0.5858742776556656, "grad_norm": 0.5833455324172974, "learning_rate": 1.1586786528531954e-05, "loss": 0.0563, "step": 26588 }, { "epoch": 0.5858963129451817, "grad_norm": 0.3594664931297302, "learning_rate": 1.1585744085392399e-05, "loss": 0.0513, "step": 26589 }, { "epoch": 0.5859183482346979, "grad_norm": 0.5792624354362488, "learning_rate": 1.1584701659643473e-05, "loss": 0.0593, "step": 26590 }, { "epoch": 0.5859403835242141, "grad_norm": 0.8963255286216736, "learning_rate": 1.1583659251290498e-05, "loss": 0.0499, "step": 26591 }, { "epoch": 0.5859624188137302, "grad_norm": 0.5640210509300232, "learning_rate": 1.158261686033878e-05, "loss": 0.0701, "step": 26592 }, { "epoch": 0.5859844541032464, "grad_norm": 0.9934061765670776, "learning_rate": 1.1581574486793626e-05, "loss": 0.0905, "step": 26593 }, { "epoch": 0.5860064893927626, "grad_norm": 1.2220665216445923, "learning_rate": 1.1580532130660344e-05, "loss": 0.1193, "step": 26594 }, { "epoch": 0.5860285246822786, "grad_norm": 0.5210126042366028, "learning_rate": 1.1579489791944248e-05, "loss": 0.0718, "step": 26595 }, { "epoch": 0.5860505599717948, "grad_norm": 0.647673487663269, "learning_rate": 1.1578447470650646e-05, "loss": 0.0373, "step": 26596 }, { "epoch": 0.586072595261311, "grad_norm": 0.7151232361793518, "learning_rate": 1.1577405166784837e-05, "loss": 0.0797, "step": 26597 }, { "epoch": 0.5860946305508271, "grad_norm": 0.5722622871398926, "learning_rate": 1.1576362880352148e-05, "loss": 0.0811, "step": 26598 }, { "epoch": 0.5861166658403433, "grad_norm": 0.2582981288433075, "learning_rate": 1.1575320611357877e-05, "loss": 0.053, "step": 26599 }, { "epoch": 0.5861387011298594, "grad_norm": 0.5451840162277222, "learning_rate": 1.1574278359807336e-05, "loss": 0.0719, "step": 26600 }, { "epoch": 0.5861607364193756, "grad_norm": 0.4559346139431, "learning_rate": 1.1573236125705827e-05, "loss": 0.0423, "step": 26601 }, { "epoch": 0.5861827717088918, "grad_norm": 0.4694922864437103, "learning_rate": 1.1572193909058668e-05, "loss": 0.0798, "step": 26602 }, { "epoch": 0.5862048069984079, "grad_norm": 0.5772594213485718, "learning_rate": 1.1571151709871164e-05, "loss": 0.0535, "step": 26603 }, { "epoch": 0.5862268422879241, "grad_norm": 0.9329714179039001, "learning_rate": 1.1570109528148623e-05, "loss": 0.0914, "step": 26604 }, { "epoch": 0.5862488775774403, "grad_norm": 0.37294402718544006, "learning_rate": 1.1569067363896346e-05, "loss": 0.0426, "step": 26605 }, { "epoch": 0.5862709128669564, "grad_norm": 0.4675942361354828, "learning_rate": 1.1568025217119656e-05, "loss": 0.0554, "step": 26606 }, { "epoch": 0.5862929481564726, "grad_norm": 0.7276342511177063, "learning_rate": 1.1566983087823854e-05, "loss": 0.0663, "step": 26607 }, { "epoch": 0.5863149834459888, "grad_norm": 0.5478403568267822, "learning_rate": 1.1565940976014249e-05, "loss": 0.0696, "step": 26608 }, { "epoch": 0.5863370187355049, "grad_norm": 0.485005646944046, "learning_rate": 1.1564898881696142e-05, "loss": 0.0483, "step": 26609 }, { "epoch": 0.5863590540250211, "grad_norm": 0.8287637829780579, "learning_rate": 1.1563856804874853e-05, "loss": 0.0663, "step": 26610 }, { "epoch": 0.5863810893145373, "grad_norm": 0.5469222068786621, "learning_rate": 1.1562814745555683e-05, "loss": 0.0717, "step": 26611 }, { "epoch": 0.5864031246040534, "grad_norm": 0.3995700180530548, "learning_rate": 1.1561772703743941e-05, "loss": 0.0952, "step": 26612 }, { "epoch": 0.5864251598935696, "grad_norm": 0.7157912254333496, "learning_rate": 1.1560730679444927e-05, "loss": 0.0697, "step": 26613 }, { "epoch": 0.5864471951830857, "grad_norm": 0.9406808018684387, "learning_rate": 1.1559688672663964e-05, "loss": 0.0854, "step": 26614 }, { "epoch": 0.5864692304726019, "grad_norm": 0.7990658283233643, "learning_rate": 1.1558646683406349e-05, "loss": 0.0563, "step": 26615 }, { "epoch": 0.5864912657621181, "grad_norm": 0.7799823880195618, "learning_rate": 1.1557604711677394e-05, "loss": 0.0891, "step": 26616 }, { "epoch": 0.5865133010516342, "grad_norm": 0.4880264103412628, "learning_rate": 1.1556562757482401e-05, "loss": 0.0564, "step": 26617 }, { "epoch": 0.5865353363411504, "grad_norm": 0.6597219109535217, "learning_rate": 1.1555520820826682e-05, "loss": 0.0936, "step": 26618 }, { "epoch": 0.5865573716306665, "grad_norm": 0.4140585660934448, "learning_rate": 1.1554478901715543e-05, "loss": 0.0393, "step": 26619 }, { "epoch": 0.5865794069201826, "grad_norm": 0.2284037470817566, "learning_rate": 1.1553437000154285e-05, "loss": 0.0733, "step": 26620 }, { "epoch": 0.5866014422096988, "grad_norm": 0.6209779977798462, "learning_rate": 1.1552395116148227e-05, "loss": 0.0817, "step": 26621 }, { "epoch": 0.586623477499215, "grad_norm": 0.9056647419929504, "learning_rate": 1.1551353249702667e-05, "loss": 0.0573, "step": 26622 }, { "epoch": 0.5866455127887311, "grad_norm": 0.7998713850975037, "learning_rate": 1.1550311400822916e-05, "loss": 0.0661, "step": 26623 }, { "epoch": 0.5866675480782473, "grad_norm": 0.8023237586021423, "learning_rate": 1.1549269569514276e-05, "loss": 0.1319, "step": 26624 }, { "epoch": 0.5866895833677634, "grad_norm": 0.4347778558731079, "learning_rate": 1.154822775578206e-05, "loss": 0.073, "step": 26625 }, { "epoch": 0.5867116186572796, "grad_norm": 0.6156967878341675, "learning_rate": 1.154718595963157e-05, "loss": 0.061, "step": 26626 }, { "epoch": 0.5867336539467958, "grad_norm": 1.028569221496582, "learning_rate": 1.1546144181068115e-05, "loss": 0.0973, "step": 26627 }, { "epoch": 0.5867556892363119, "grad_norm": 0.7466961145401001, "learning_rate": 1.1545102420096991e-05, "loss": 0.0591, "step": 26628 }, { "epoch": 0.5867777245258281, "grad_norm": 0.7612287998199463, "learning_rate": 1.1544060676723521e-05, "loss": 0.0683, "step": 26629 }, { "epoch": 0.5867997598153443, "grad_norm": 0.6058725714683533, "learning_rate": 1.1543018950953004e-05, "loss": 0.0912, "step": 26630 }, { "epoch": 0.5868217951048604, "grad_norm": 0.3627963662147522, "learning_rate": 1.1541977242790745e-05, "loss": 0.0475, "step": 26631 }, { "epoch": 0.5868438303943766, "grad_norm": 0.5304856896400452, "learning_rate": 1.1540935552242045e-05, "loss": 0.0656, "step": 26632 }, { "epoch": 0.5868658656838928, "grad_norm": 0.5957834124565125, "learning_rate": 1.1539893879312221e-05, "loss": 0.063, "step": 26633 }, { "epoch": 0.5868879009734089, "grad_norm": 0.40089014172554016, "learning_rate": 1.1538852224006571e-05, "loss": 0.0551, "step": 26634 }, { "epoch": 0.5869099362629251, "grad_norm": 0.7266516089439392, "learning_rate": 1.1537810586330403e-05, "loss": 0.076, "step": 26635 }, { "epoch": 0.5869319715524413, "grad_norm": 0.5897171497344971, "learning_rate": 1.1536768966289017e-05, "loss": 0.0653, "step": 26636 }, { "epoch": 0.5869540068419574, "grad_norm": 0.5295142531394958, "learning_rate": 1.1535727363887728e-05, "loss": 0.063, "step": 26637 }, { "epoch": 0.5869760421314736, "grad_norm": 0.7284359931945801, "learning_rate": 1.1534685779131839e-05, "loss": 0.0818, "step": 26638 }, { "epoch": 0.5869980774209898, "grad_norm": 1.0475763082504272, "learning_rate": 1.1533644212026653e-05, "loss": 0.1065, "step": 26639 }, { "epoch": 0.5870201127105059, "grad_norm": 0.49386516213417053, "learning_rate": 1.1532602662577468e-05, "loss": 0.0644, "step": 26640 }, { "epoch": 0.5870421480000221, "grad_norm": 0.7391321659088135, "learning_rate": 1.1531561130789607e-05, "loss": 0.0597, "step": 26641 }, { "epoch": 0.5870641832895382, "grad_norm": 0.7572419047355652, "learning_rate": 1.1530519616668366e-05, "loss": 0.0806, "step": 26642 }, { "epoch": 0.5870862185790544, "grad_norm": 0.6211380362510681, "learning_rate": 1.1529478120219039e-05, "loss": 0.0798, "step": 26643 }, { "epoch": 0.5871082538685705, "grad_norm": 0.6245557069778442, "learning_rate": 1.152843664144695e-05, "loss": 0.0509, "step": 26644 }, { "epoch": 0.5871302891580866, "grad_norm": 0.5006248950958252, "learning_rate": 1.1527395180357391e-05, "loss": 0.0447, "step": 26645 }, { "epoch": 0.5871523244476028, "grad_norm": 0.48633497953414917, "learning_rate": 1.1526353736955673e-05, "loss": 0.0819, "step": 26646 }, { "epoch": 0.587174359737119, "grad_norm": 0.5027257204055786, "learning_rate": 1.152531231124709e-05, "loss": 0.0692, "step": 26647 }, { "epoch": 0.5871963950266351, "grad_norm": 0.5571626424789429, "learning_rate": 1.1524270903236963e-05, "loss": 0.0712, "step": 26648 }, { "epoch": 0.5872184303161513, "grad_norm": 0.82099848985672, "learning_rate": 1.1523229512930587e-05, "loss": 0.085, "step": 26649 }, { "epoch": 0.5872404656056674, "grad_norm": 0.8506579399108887, "learning_rate": 1.1522188140333268e-05, "loss": 0.0855, "step": 26650 }, { "epoch": 0.5872625008951836, "grad_norm": 0.49251168966293335, "learning_rate": 1.1521146785450304e-05, "loss": 0.0446, "step": 26651 }, { "epoch": 0.5872845361846998, "grad_norm": 0.6905243992805481, "learning_rate": 1.1520105448287012e-05, "loss": 0.0703, "step": 26652 }, { "epoch": 0.5873065714742159, "grad_norm": 0.5943554043769836, "learning_rate": 1.1519064128848686e-05, "loss": 0.0663, "step": 26653 }, { "epoch": 0.5873286067637321, "grad_norm": 0.6755735278129578, "learning_rate": 1.1518022827140635e-05, "loss": 0.0453, "step": 26654 }, { "epoch": 0.5873506420532483, "grad_norm": 0.4466359615325928, "learning_rate": 1.1516981543168154e-05, "loss": 0.0509, "step": 26655 }, { "epoch": 0.5873726773427644, "grad_norm": 0.6753738522529602, "learning_rate": 1.1515940276936562e-05, "loss": 0.0565, "step": 26656 }, { "epoch": 0.5873947126322806, "grad_norm": 0.6989092826843262, "learning_rate": 1.1514899028451152e-05, "loss": 0.0674, "step": 26657 }, { "epoch": 0.5874167479217968, "grad_norm": 0.7058649063110352, "learning_rate": 1.1513857797717231e-05, "loss": 0.0607, "step": 26658 }, { "epoch": 0.5874387832113129, "grad_norm": 0.5107815265655518, "learning_rate": 1.1512816584740098e-05, "loss": 0.0663, "step": 26659 }, { "epoch": 0.5874608185008291, "grad_norm": 0.5780454277992249, "learning_rate": 1.1511775389525062e-05, "loss": 0.0755, "step": 26660 }, { "epoch": 0.5874828537903453, "grad_norm": 0.5898837447166443, "learning_rate": 1.1510734212077427e-05, "loss": 0.0564, "step": 26661 }, { "epoch": 0.5875048890798614, "grad_norm": 0.6461471319198608, "learning_rate": 1.1509693052402493e-05, "loss": 0.0495, "step": 26662 }, { "epoch": 0.5875269243693776, "grad_norm": 0.5208277106285095, "learning_rate": 1.1508651910505556e-05, "loss": 0.0962, "step": 26663 }, { "epoch": 0.5875489596588938, "grad_norm": 0.4008208215236664, "learning_rate": 1.1507610786391936e-05, "loss": 0.0704, "step": 26664 }, { "epoch": 0.5875709949484099, "grad_norm": 0.5030128359794617, "learning_rate": 1.1506569680066925e-05, "loss": 0.0628, "step": 26665 }, { "epoch": 0.5875930302379261, "grad_norm": 0.7402938604354858, "learning_rate": 1.1505528591535827e-05, "loss": 0.0716, "step": 26666 }, { "epoch": 0.5876150655274422, "grad_norm": 0.7417558431625366, "learning_rate": 1.1504487520803946e-05, "loss": 0.0791, "step": 26667 }, { "epoch": 0.5876371008169584, "grad_norm": 0.685173511505127, "learning_rate": 1.1503446467876585e-05, "loss": 0.0704, "step": 26668 }, { "epoch": 0.5876591361064745, "grad_norm": 0.7427347898483276, "learning_rate": 1.1502405432759047e-05, "loss": 0.0859, "step": 26669 }, { "epoch": 0.5876811713959906, "grad_norm": 0.7186024785041809, "learning_rate": 1.1501364415456628e-05, "loss": 0.0672, "step": 26670 }, { "epoch": 0.5877032066855068, "grad_norm": 0.6820163726806641, "learning_rate": 1.1500323415974642e-05, "loss": 0.0425, "step": 26671 }, { "epoch": 0.587725241975023, "grad_norm": 0.43257132172584534, "learning_rate": 1.1499282434318386e-05, "loss": 0.059, "step": 26672 }, { "epoch": 0.5877472772645391, "grad_norm": 0.8401899933815002, "learning_rate": 1.149824147049316e-05, "loss": 0.084, "step": 26673 }, { "epoch": 0.5877693125540553, "grad_norm": 0.5218868851661682, "learning_rate": 1.1497200524504267e-05, "loss": 0.0568, "step": 26674 }, { "epoch": 0.5877913478435715, "grad_norm": 0.3234390318393707, "learning_rate": 1.1496159596357012e-05, "loss": 0.0549, "step": 26675 }, { "epoch": 0.5878133831330876, "grad_norm": 0.8635520935058594, "learning_rate": 1.1495118686056696e-05, "loss": 0.06, "step": 26676 }, { "epoch": 0.5878354184226038, "grad_norm": 0.6388773322105408, "learning_rate": 1.1494077793608619e-05, "loss": 0.0743, "step": 26677 }, { "epoch": 0.58785745371212, "grad_norm": 0.7785149812698364, "learning_rate": 1.1493036919018078e-05, "loss": 0.0579, "step": 26678 }, { "epoch": 0.5878794890016361, "grad_norm": 0.6046472191810608, "learning_rate": 1.1491996062290385e-05, "loss": 0.0632, "step": 26679 }, { "epoch": 0.5879015242911523, "grad_norm": 0.5694724917411804, "learning_rate": 1.149095522343084e-05, "loss": 0.1022, "step": 26680 }, { "epoch": 0.5879235595806684, "grad_norm": 0.7551653385162354, "learning_rate": 1.148991440244474e-05, "loss": 0.0561, "step": 26681 }, { "epoch": 0.5879455948701846, "grad_norm": 0.9038933515548706, "learning_rate": 1.1488873599337387e-05, "loss": 0.0907, "step": 26682 }, { "epoch": 0.5879676301597008, "grad_norm": 0.4486486315727234, "learning_rate": 1.1487832814114085e-05, "loss": 0.0522, "step": 26683 }, { "epoch": 0.5879896654492169, "grad_norm": 0.3326857089996338, "learning_rate": 1.1486792046780133e-05, "loss": 0.0875, "step": 26684 }, { "epoch": 0.5880117007387331, "grad_norm": 0.4638771712779999, "learning_rate": 1.1485751297340828e-05, "loss": 0.0793, "step": 26685 }, { "epoch": 0.5880337360282493, "grad_norm": 0.793373167514801, "learning_rate": 1.1484710565801483e-05, "loss": 0.0908, "step": 26686 }, { "epoch": 0.5880557713177654, "grad_norm": 0.5571010112762451, "learning_rate": 1.1483669852167392e-05, "loss": 0.0738, "step": 26687 }, { "epoch": 0.5880778066072816, "grad_norm": 0.47759580612182617, "learning_rate": 1.1482629156443857e-05, "loss": 0.0468, "step": 26688 }, { "epoch": 0.5880998418967978, "grad_norm": 0.6496675610542297, "learning_rate": 1.1481588478636173e-05, "loss": 0.0391, "step": 26689 }, { "epoch": 0.5881218771863139, "grad_norm": 0.8636976480484009, "learning_rate": 1.1480547818749648e-05, "loss": 0.0669, "step": 26690 }, { "epoch": 0.5881439124758301, "grad_norm": 0.5189411640167236, "learning_rate": 1.1479507176789581e-05, "loss": 0.0578, "step": 26691 }, { "epoch": 0.5881659477653463, "grad_norm": 0.6989296078681946, "learning_rate": 1.1478466552761273e-05, "loss": 0.0805, "step": 26692 }, { "epoch": 0.5881879830548623, "grad_norm": 0.6330820918083191, "learning_rate": 1.1477425946670015e-05, "loss": 0.0616, "step": 26693 }, { "epoch": 0.5882100183443785, "grad_norm": 0.5442829132080078, "learning_rate": 1.1476385358521123e-05, "loss": 0.0544, "step": 26694 }, { "epoch": 0.5882320536338946, "grad_norm": 0.5249534249305725, "learning_rate": 1.147534478831989e-05, "loss": 0.1002, "step": 26695 }, { "epoch": 0.5882540889234108, "grad_norm": 0.8007626533508301, "learning_rate": 1.1474304236071617e-05, "loss": 0.0883, "step": 26696 }, { "epoch": 0.588276124212927, "grad_norm": 1.1471033096313477, "learning_rate": 1.1473263701781599e-05, "loss": 0.0801, "step": 26697 }, { "epoch": 0.5882981595024431, "grad_norm": 0.38371315598487854, "learning_rate": 1.1472223185455143e-05, "loss": 0.0792, "step": 26698 }, { "epoch": 0.5883201947919593, "grad_norm": 0.7035272121429443, "learning_rate": 1.1471182687097545e-05, "loss": 0.0651, "step": 26699 }, { "epoch": 0.5883422300814755, "grad_norm": 1.0083321332931519, "learning_rate": 1.1470142206714107e-05, "loss": 0.1033, "step": 26700 }, { "epoch": 0.5883642653709916, "grad_norm": 1.0756125450134277, "learning_rate": 1.1469101744310124e-05, "loss": 0.0851, "step": 26701 }, { "epoch": 0.5883863006605078, "grad_norm": 0.6302971839904785, "learning_rate": 1.1468061299890901e-05, "loss": 0.0654, "step": 26702 }, { "epoch": 0.588408335950024, "grad_norm": 0.8407626152038574, "learning_rate": 1.1467020873461739e-05, "loss": 0.0535, "step": 26703 }, { "epoch": 0.5884303712395401, "grad_norm": 0.6383237242698669, "learning_rate": 1.1465980465027932e-05, "loss": 0.0588, "step": 26704 }, { "epoch": 0.5884524065290563, "grad_norm": 0.6275511384010315, "learning_rate": 1.1464940074594779e-05, "loss": 0.0706, "step": 26705 }, { "epoch": 0.5884744418185724, "grad_norm": 0.5482282638549805, "learning_rate": 1.1463899702167585e-05, "loss": 0.0707, "step": 26706 }, { "epoch": 0.5884964771080886, "grad_norm": 0.6791716814041138, "learning_rate": 1.1462859347751648e-05, "loss": 0.0804, "step": 26707 }, { "epoch": 0.5885185123976048, "grad_norm": 0.6898348927497864, "learning_rate": 1.1461819011352257e-05, "loss": 0.0565, "step": 26708 }, { "epoch": 0.5885405476871209, "grad_norm": 0.7394020557403564, "learning_rate": 1.1460778692974726e-05, "loss": 0.0695, "step": 26709 }, { "epoch": 0.5885625829766371, "grad_norm": 0.3356005847454071, "learning_rate": 1.1459738392624347e-05, "loss": 0.0647, "step": 26710 }, { "epoch": 0.5885846182661533, "grad_norm": 0.599797248840332, "learning_rate": 1.1458698110306418e-05, "loss": 0.0568, "step": 26711 }, { "epoch": 0.5886066535556694, "grad_norm": 0.7373132705688477, "learning_rate": 1.1457657846026236e-05, "loss": 0.0721, "step": 26712 }, { "epoch": 0.5886286888451856, "grad_norm": 0.2998732030391693, "learning_rate": 1.1456617599789105e-05, "loss": 0.0493, "step": 26713 }, { "epoch": 0.5886507241347018, "grad_norm": 0.44881969690322876, "learning_rate": 1.1455577371600321e-05, "loss": 0.0806, "step": 26714 }, { "epoch": 0.5886727594242179, "grad_norm": 0.4884415864944458, "learning_rate": 1.1454537161465181e-05, "loss": 0.0926, "step": 26715 }, { "epoch": 0.5886947947137341, "grad_norm": 0.3123207986354828, "learning_rate": 1.145349696938898e-05, "loss": 0.0467, "step": 26716 }, { "epoch": 0.5887168300032503, "grad_norm": 0.738351583480835, "learning_rate": 1.1452456795377025e-05, "loss": 0.0683, "step": 26717 }, { "epoch": 0.5887388652927663, "grad_norm": 0.958867609500885, "learning_rate": 1.145141663943461e-05, "loss": 0.0532, "step": 26718 }, { "epoch": 0.5887609005822825, "grad_norm": 0.5910281538963318, "learning_rate": 1.1450376501567035e-05, "loss": 0.0748, "step": 26719 }, { "epoch": 0.5887829358717986, "grad_norm": 0.5243896842002869, "learning_rate": 1.1449336381779593e-05, "loss": 0.0669, "step": 26720 }, { "epoch": 0.5888049711613148, "grad_norm": 0.8469116687774658, "learning_rate": 1.1448296280077586e-05, "loss": 0.0796, "step": 26721 }, { "epoch": 0.588827006450831, "grad_norm": 0.9379822611808777, "learning_rate": 1.144725619646631e-05, "loss": 0.0745, "step": 26722 }, { "epoch": 0.5888490417403471, "grad_norm": 0.5372120141983032, "learning_rate": 1.1446216130951064e-05, "loss": 0.0527, "step": 26723 }, { "epoch": 0.5888710770298633, "grad_norm": 0.3923507630825043, "learning_rate": 1.144517608353714e-05, "loss": 0.0955, "step": 26724 }, { "epoch": 0.5888931123193795, "grad_norm": 0.5298668146133423, "learning_rate": 1.1444136054229847e-05, "loss": 0.059, "step": 26725 }, { "epoch": 0.5889151476088956, "grad_norm": 0.8591517210006714, "learning_rate": 1.1443096043034474e-05, "loss": 0.0803, "step": 26726 }, { "epoch": 0.5889371828984118, "grad_norm": 0.5876525044441223, "learning_rate": 1.1442056049956322e-05, "loss": 0.0838, "step": 26727 }, { "epoch": 0.588959218187928, "grad_norm": 0.4886619746685028, "learning_rate": 1.1441016075000683e-05, "loss": 0.0636, "step": 26728 }, { "epoch": 0.5889812534774441, "grad_norm": 0.455946147441864, "learning_rate": 1.143997611817286e-05, "loss": 0.0604, "step": 26729 }, { "epoch": 0.5890032887669603, "grad_norm": 0.47370073199272156, "learning_rate": 1.1438936179478147e-05, "loss": 0.0634, "step": 26730 }, { "epoch": 0.5890253240564765, "grad_norm": 0.5434890389442444, "learning_rate": 1.1437896258921836e-05, "loss": 0.0774, "step": 26731 }, { "epoch": 0.5890473593459926, "grad_norm": 0.881649911403656, "learning_rate": 1.1436856356509236e-05, "loss": 0.069, "step": 26732 }, { "epoch": 0.5890693946355088, "grad_norm": 0.8283348083496094, "learning_rate": 1.1435816472245637e-05, "loss": 0.0735, "step": 26733 }, { "epoch": 0.589091429925025, "grad_norm": 1.0301579236984253, "learning_rate": 1.1434776606136337e-05, "loss": 0.0805, "step": 26734 }, { "epoch": 0.5891134652145411, "grad_norm": 0.4753168523311615, "learning_rate": 1.1433736758186627e-05, "loss": 0.0844, "step": 26735 }, { "epoch": 0.5891355005040573, "grad_norm": 0.4087769389152527, "learning_rate": 1.1432696928401813e-05, "loss": 0.0512, "step": 26736 }, { "epoch": 0.5891575357935734, "grad_norm": 0.5339642763137817, "learning_rate": 1.1431657116787183e-05, "loss": 0.0644, "step": 26737 }, { "epoch": 0.5891795710830896, "grad_norm": 0.6106019020080566, "learning_rate": 1.143061732334804e-05, "loss": 0.0705, "step": 26738 }, { "epoch": 0.5892016063726058, "grad_norm": 0.6519104242324829, "learning_rate": 1.142957754808967e-05, "loss": 0.0627, "step": 26739 }, { "epoch": 0.5892236416621219, "grad_norm": 0.4876862168312073, "learning_rate": 1.1428537791017383e-05, "loss": 0.0811, "step": 26740 }, { "epoch": 0.5892456769516381, "grad_norm": 0.28518542647361755, "learning_rate": 1.1427498052136468e-05, "loss": 0.0725, "step": 26741 }, { "epoch": 0.5892677122411543, "grad_norm": 0.9562013149261475, "learning_rate": 1.1426458331452222e-05, "loss": 0.1016, "step": 26742 }, { "epoch": 0.5892897475306703, "grad_norm": 0.422031968832016, "learning_rate": 1.1425418628969934e-05, "loss": 0.0709, "step": 26743 }, { "epoch": 0.5893117828201865, "grad_norm": 0.9541630148887634, "learning_rate": 1.1424378944694911e-05, "loss": 0.051, "step": 26744 }, { "epoch": 0.5893338181097026, "grad_norm": 0.8210806250572205, "learning_rate": 1.1423339278632443e-05, "loss": 0.0512, "step": 26745 }, { "epoch": 0.5893558533992188, "grad_norm": 0.501284658908844, "learning_rate": 1.1422299630787826e-05, "loss": 0.0943, "step": 26746 }, { "epoch": 0.589377888688735, "grad_norm": 0.9547885656356812, "learning_rate": 1.1421260001166348e-05, "loss": 0.0686, "step": 26747 }, { "epoch": 0.5893999239782511, "grad_norm": 0.35118794441223145, "learning_rate": 1.142022038977332e-05, "loss": 0.0723, "step": 26748 }, { "epoch": 0.5894219592677673, "grad_norm": 0.693574845790863, "learning_rate": 1.1419180796614029e-05, "loss": 0.0855, "step": 26749 }, { "epoch": 0.5894439945572835, "grad_norm": 0.8143438696861267, "learning_rate": 1.141814122169377e-05, "loss": 0.0625, "step": 26750 }, { "epoch": 0.5894660298467996, "grad_norm": 0.9927691221237183, "learning_rate": 1.141710166501783e-05, "loss": 0.0854, "step": 26751 }, { "epoch": 0.5894880651363158, "grad_norm": 0.5521179437637329, "learning_rate": 1.1416062126591524e-05, "loss": 0.0802, "step": 26752 }, { "epoch": 0.589510100425832, "grad_norm": 0.621245801448822, "learning_rate": 1.1415022606420135e-05, "loss": 0.0465, "step": 26753 }, { "epoch": 0.5895321357153481, "grad_norm": 0.4691474139690399, "learning_rate": 1.1413983104508949e-05, "loss": 0.0518, "step": 26754 }, { "epoch": 0.5895541710048643, "grad_norm": 0.6850621104240417, "learning_rate": 1.1412943620863276e-05, "loss": 0.0894, "step": 26755 }, { "epoch": 0.5895762062943805, "grad_norm": 0.5273554921150208, "learning_rate": 1.1411904155488406e-05, "loss": 0.0469, "step": 26756 }, { "epoch": 0.5895982415838966, "grad_norm": 0.8599420785903931, "learning_rate": 1.1410864708389632e-05, "loss": 0.067, "step": 26757 }, { "epoch": 0.5896202768734128, "grad_norm": 1.0324246883392334, "learning_rate": 1.140982527957224e-05, "loss": 0.0835, "step": 26758 }, { "epoch": 0.589642312162929, "grad_norm": 0.42381957173347473, "learning_rate": 1.1408785869041542e-05, "loss": 0.0751, "step": 26759 }, { "epoch": 0.5896643474524451, "grad_norm": 0.29763713479042053, "learning_rate": 1.1407746476802823e-05, "loss": 0.0689, "step": 26760 }, { "epoch": 0.5896863827419613, "grad_norm": 0.1761743277311325, "learning_rate": 1.1406707102861377e-05, "loss": 0.0581, "step": 26761 }, { "epoch": 0.5897084180314774, "grad_norm": 0.1485271006822586, "learning_rate": 1.1405667747222496e-05, "loss": 0.0612, "step": 26762 }, { "epoch": 0.5897304533209936, "grad_norm": 0.6075643301010132, "learning_rate": 1.140462840989148e-05, "loss": 0.0939, "step": 26763 }, { "epoch": 0.5897524886105098, "grad_norm": 0.6066656708717346, "learning_rate": 1.1403589090873618e-05, "loss": 0.0751, "step": 26764 }, { "epoch": 0.5897745239000259, "grad_norm": 0.4794222414493561, "learning_rate": 1.1402549790174208e-05, "loss": 0.0779, "step": 26765 }, { "epoch": 0.5897965591895421, "grad_norm": 0.9982843399047852, "learning_rate": 1.1401510507798532e-05, "loss": 0.0631, "step": 26766 }, { "epoch": 0.5898185944790583, "grad_norm": 0.5694558620452881, "learning_rate": 1.1400471243751899e-05, "loss": 0.0922, "step": 26767 }, { "epoch": 0.5898406297685743, "grad_norm": 0.9905170202255249, "learning_rate": 1.1399431998039597e-05, "loss": 0.1033, "step": 26768 }, { "epoch": 0.5898626650580905, "grad_norm": 0.7372022271156311, "learning_rate": 1.1398392770666917e-05, "loss": 0.1209, "step": 26769 }, { "epoch": 0.5898847003476066, "grad_norm": 0.5127165913581848, "learning_rate": 1.1397353561639153e-05, "loss": 0.0787, "step": 26770 }, { "epoch": 0.5899067356371228, "grad_norm": 0.5507852435112, "learning_rate": 1.1396314370961604e-05, "loss": 0.0997, "step": 26771 }, { "epoch": 0.589928770926639, "grad_norm": 0.5036808252334595, "learning_rate": 1.1395275198639554e-05, "loss": 0.0608, "step": 26772 }, { "epoch": 0.5899508062161551, "grad_norm": 0.8408970832824707, "learning_rate": 1.1394236044678305e-05, "loss": 0.0539, "step": 26773 }, { "epoch": 0.5899728415056713, "grad_norm": 0.4350977838039398, "learning_rate": 1.1393196909083136e-05, "loss": 0.0745, "step": 26774 }, { "epoch": 0.5899948767951875, "grad_norm": 0.6561722755432129, "learning_rate": 1.1392157791859355e-05, "loss": 0.067, "step": 26775 }, { "epoch": 0.5900169120847036, "grad_norm": 0.773123025894165, "learning_rate": 1.139111869301225e-05, "loss": 0.0874, "step": 26776 }, { "epoch": 0.5900389473742198, "grad_norm": 1.0685116052627563, "learning_rate": 1.139007961254711e-05, "loss": 0.0756, "step": 26777 }, { "epoch": 0.590060982663736, "grad_norm": 0.4810149669647217, "learning_rate": 1.1389040550469235e-05, "loss": 0.0546, "step": 26778 }, { "epoch": 0.5900830179532521, "grad_norm": 0.6898455619812012, "learning_rate": 1.138800150678391e-05, "loss": 0.0644, "step": 26779 }, { "epoch": 0.5901050532427683, "grad_norm": 0.8417910933494568, "learning_rate": 1.1386962481496432e-05, "loss": 0.0811, "step": 26780 }, { "epoch": 0.5901270885322845, "grad_norm": 0.6542761325836182, "learning_rate": 1.1385923474612085e-05, "loss": 0.0748, "step": 26781 }, { "epoch": 0.5901491238218006, "grad_norm": 0.5291407704353333, "learning_rate": 1.1384884486136172e-05, "loss": 0.058, "step": 26782 }, { "epoch": 0.5901711591113168, "grad_norm": 0.5674282908439636, "learning_rate": 1.1383845516073984e-05, "loss": 0.0616, "step": 26783 }, { "epoch": 0.590193194400833, "grad_norm": 0.7449276447296143, "learning_rate": 1.1382806564430808e-05, "loss": 0.0781, "step": 26784 }, { "epoch": 0.5902152296903491, "grad_norm": 0.8315755724906921, "learning_rate": 1.1381767631211935e-05, "loss": 0.1008, "step": 26785 }, { "epoch": 0.5902372649798653, "grad_norm": 1.061316728591919, "learning_rate": 1.1380728716422663e-05, "loss": 0.1183, "step": 26786 }, { "epoch": 0.5902593002693814, "grad_norm": 0.45148271322250366, "learning_rate": 1.1379689820068279e-05, "loss": 0.0332, "step": 26787 }, { "epoch": 0.5902813355588976, "grad_norm": 0.6879762411117554, "learning_rate": 1.1378650942154078e-05, "loss": 0.0669, "step": 26788 }, { "epoch": 0.5903033708484138, "grad_norm": 0.6601232290267944, "learning_rate": 1.1377612082685344e-05, "loss": 0.0674, "step": 26789 }, { "epoch": 0.5903254061379299, "grad_norm": 0.5233439207077026, "learning_rate": 1.1376573241667378e-05, "loss": 0.0731, "step": 26790 }, { "epoch": 0.5903474414274461, "grad_norm": 0.3232407867908478, "learning_rate": 1.1375534419105471e-05, "loss": 0.0468, "step": 26791 }, { "epoch": 0.5903694767169622, "grad_norm": 0.5868828296661377, "learning_rate": 1.1374495615004908e-05, "loss": 0.0501, "step": 26792 }, { "epoch": 0.5903915120064783, "grad_norm": 0.9725292921066284, "learning_rate": 1.1373456829370982e-05, "loss": 0.0836, "step": 26793 }, { "epoch": 0.5904135472959945, "grad_norm": 0.5183890461921692, "learning_rate": 1.1372418062208985e-05, "loss": 0.0479, "step": 26794 }, { "epoch": 0.5904355825855107, "grad_norm": 0.5655269026756287, "learning_rate": 1.137137931352421e-05, "loss": 0.0691, "step": 26795 }, { "epoch": 0.5904576178750268, "grad_norm": 0.6118983626365662, "learning_rate": 1.1370340583321948e-05, "loss": 0.1109, "step": 26796 }, { "epoch": 0.590479653164543, "grad_norm": 0.5515682697296143, "learning_rate": 1.1369301871607478e-05, "loss": 0.0544, "step": 26797 }, { "epoch": 0.5905016884540591, "grad_norm": 0.6105831265449524, "learning_rate": 1.1368263178386107e-05, "loss": 0.0869, "step": 26798 }, { "epoch": 0.5905237237435753, "grad_norm": 0.5831210613250732, "learning_rate": 1.136722450366312e-05, "loss": 0.0586, "step": 26799 }, { "epoch": 0.5905457590330915, "grad_norm": 0.4020920991897583, "learning_rate": 1.1366185847443806e-05, "loss": 0.0426, "step": 26800 }, { "epoch": 0.5905677943226076, "grad_norm": 0.6238850355148315, "learning_rate": 1.1365147209733456e-05, "loss": 0.0935, "step": 26801 }, { "epoch": 0.5905898296121238, "grad_norm": 0.7088773250579834, "learning_rate": 1.1364108590537361e-05, "loss": 0.081, "step": 26802 }, { "epoch": 0.59061186490164, "grad_norm": 0.5705422759056091, "learning_rate": 1.1363069989860812e-05, "loss": 0.0694, "step": 26803 }, { "epoch": 0.5906339001911561, "grad_norm": 0.5768782496452332, "learning_rate": 1.136203140770909e-05, "loss": 0.0638, "step": 26804 }, { "epoch": 0.5906559354806723, "grad_norm": 0.5319054126739502, "learning_rate": 1.1360992844087499e-05, "loss": 0.0972, "step": 26805 }, { "epoch": 0.5906779707701885, "grad_norm": 0.46988704800605774, "learning_rate": 1.1359954299001325e-05, "loss": 0.087, "step": 26806 }, { "epoch": 0.5907000060597046, "grad_norm": 0.3281194269657135, "learning_rate": 1.1358915772455853e-05, "loss": 0.0846, "step": 26807 }, { "epoch": 0.5907220413492208, "grad_norm": 0.5813537240028381, "learning_rate": 1.1357877264456375e-05, "loss": 0.0577, "step": 26808 }, { "epoch": 0.590744076638737, "grad_norm": 0.6706205010414124, "learning_rate": 1.135683877500818e-05, "loss": 0.0602, "step": 26809 }, { "epoch": 0.5907661119282531, "grad_norm": 0.6342522501945496, "learning_rate": 1.1355800304116564e-05, "loss": 0.0738, "step": 26810 }, { "epoch": 0.5907881472177693, "grad_norm": 0.6394270658493042, "learning_rate": 1.135476185178681e-05, "loss": 0.0946, "step": 26811 }, { "epoch": 0.5908101825072855, "grad_norm": 0.9564968943595886, "learning_rate": 1.13537234180242e-05, "loss": 0.0715, "step": 26812 }, { "epoch": 0.5908322177968016, "grad_norm": 0.7303915619850159, "learning_rate": 1.135268500283404e-05, "loss": 0.0732, "step": 26813 }, { "epoch": 0.5908542530863178, "grad_norm": 0.5068123936653137, "learning_rate": 1.135164660622161e-05, "loss": 0.0825, "step": 26814 }, { "epoch": 0.590876288375834, "grad_norm": 0.5720881819725037, "learning_rate": 1.13506082281922e-05, "loss": 0.0903, "step": 26815 }, { "epoch": 0.5908983236653501, "grad_norm": 0.2513143718242645, "learning_rate": 1.1349569868751098e-05, "loss": 0.0564, "step": 26816 }, { "epoch": 0.5909203589548662, "grad_norm": 0.18489597737789154, "learning_rate": 1.1348531527903597e-05, "loss": 0.0469, "step": 26817 }, { "epoch": 0.5909423942443823, "grad_norm": 0.6761639714241028, "learning_rate": 1.1347493205654982e-05, "loss": 0.0872, "step": 26818 }, { "epoch": 0.5909644295338985, "grad_norm": 0.7658985257148743, "learning_rate": 1.1346454902010537e-05, "loss": 0.0717, "step": 26819 }, { "epoch": 0.5909864648234147, "grad_norm": 0.7502865791320801, "learning_rate": 1.1345416616975562e-05, "loss": 0.0684, "step": 26820 }, { "epoch": 0.5910085001129308, "grad_norm": 0.4991939663887024, "learning_rate": 1.134437835055534e-05, "loss": 0.0589, "step": 26821 }, { "epoch": 0.591030535402447, "grad_norm": 0.3378746807575226, "learning_rate": 1.1343340102755159e-05, "loss": 0.0414, "step": 26822 }, { "epoch": 0.5910525706919632, "grad_norm": 0.39225268363952637, "learning_rate": 1.1342301873580306e-05, "loss": 0.0481, "step": 26823 }, { "epoch": 0.5910746059814793, "grad_norm": 0.41382402181625366, "learning_rate": 1.1341263663036073e-05, "loss": 0.0495, "step": 26824 }, { "epoch": 0.5910966412709955, "grad_norm": 0.5681520104408264, "learning_rate": 1.1340225471127747e-05, "loss": 0.0459, "step": 26825 }, { "epoch": 0.5911186765605116, "grad_norm": 0.6692079305648804, "learning_rate": 1.1339187297860614e-05, "loss": 0.057, "step": 26826 }, { "epoch": 0.5911407118500278, "grad_norm": 0.49499014019966125, "learning_rate": 1.133814914323996e-05, "loss": 0.0529, "step": 26827 }, { "epoch": 0.591162747139544, "grad_norm": 0.7953980565071106, "learning_rate": 1.1337111007271077e-05, "loss": 0.0871, "step": 26828 }, { "epoch": 0.5911847824290601, "grad_norm": 0.7406864762306213, "learning_rate": 1.1336072889959255e-05, "loss": 0.1086, "step": 26829 }, { "epoch": 0.5912068177185763, "grad_norm": 0.8366915583610535, "learning_rate": 1.1335034791309779e-05, "loss": 0.094, "step": 26830 }, { "epoch": 0.5912288530080925, "grad_norm": 0.34350717067718506, "learning_rate": 1.1333996711327931e-05, "loss": 0.0503, "step": 26831 }, { "epoch": 0.5912508882976086, "grad_norm": 0.5892648696899414, "learning_rate": 1.1332958650019008e-05, "loss": 0.066, "step": 26832 }, { "epoch": 0.5912729235871248, "grad_norm": 0.33900606632232666, "learning_rate": 1.1331920607388292e-05, "loss": 0.0538, "step": 26833 }, { "epoch": 0.591294958876641, "grad_norm": 0.6300852298736572, "learning_rate": 1.1330882583441074e-05, "loss": 0.0621, "step": 26834 }, { "epoch": 0.5913169941661571, "grad_norm": 0.5076540112495422, "learning_rate": 1.132984457818263e-05, "loss": 0.0821, "step": 26835 }, { "epoch": 0.5913390294556733, "grad_norm": 0.6864676475524902, "learning_rate": 1.1328806591618263e-05, "loss": 0.0891, "step": 26836 }, { "epoch": 0.5913610647451895, "grad_norm": 0.47008734941482544, "learning_rate": 1.1327768623753252e-05, "loss": 0.0377, "step": 26837 }, { "epoch": 0.5913831000347056, "grad_norm": 0.8420394062995911, "learning_rate": 1.1326730674592885e-05, "loss": 0.0509, "step": 26838 }, { "epoch": 0.5914051353242218, "grad_norm": 0.8089789152145386, "learning_rate": 1.1325692744142444e-05, "loss": 0.0939, "step": 26839 }, { "epoch": 0.591427170613738, "grad_norm": 0.5628257393836975, "learning_rate": 1.1324654832407225e-05, "loss": 0.0636, "step": 26840 }, { "epoch": 0.5914492059032541, "grad_norm": 0.6968507170677185, "learning_rate": 1.132361693939251e-05, "loss": 0.0763, "step": 26841 }, { "epoch": 0.5914712411927702, "grad_norm": 0.686751663684845, "learning_rate": 1.1322579065103579e-05, "loss": 0.0491, "step": 26842 }, { "epoch": 0.5914932764822863, "grad_norm": 0.5541079640388489, "learning_rate": 1.132154120954573e-05, "loss": 0.0624, "step": 26843 }, { "epoch": 0.5915153117718025, "grad_norm": 0.7686299085617065, "learning_rate": 1.1320503372724244e-05, "loss": 0.1096, "step": 26844 }, { "epoch": 0.5915373470613187, "grad_norm": 0.5093553066253662, "learning_rate": 1.1319465554644407e-05, "loss": 0.0619, "step": 26845 }, { "epoch": 0.5915593823508348, "grad_norm": 1.2788493633270264, "learning_rate": 1.1318427755311504e-05, "loss": 0.0714, "step": 26846 }, { "epoch": 0.591581417640351, "grad_norm": 0.9215898513793945, "learning_rate": 1.1317389974730826e-05, "loss": 0.0739, "step": 26847 }, { "epoch": 0.5916034529298672, "grad_norm": 0.9234309196472168, "learning_rate": 1.1316352212907656e-05, "loss": 0.0936, "step": 26848 }, { "epoch": 0.5916254882193833, "grad_norm": 0.49519291520118713, "learning_rate": 1.1315314469847277e-05, "loss": 0.0701, "step": 26849 }, { "epoch": 0.5916475235088995, "grad_norm": 0.4222295880317688, "learning_rate": 1.1314276745554972e-05, "loss": 0.0569, "step": 26850 }, { "epoch": 0.5916695587984157, "grad_norm": 0.7645472884178162, "learning_rate": 1.131323904003604e-05, "loss": 0.0895, "step": 26851 }, { "epoch": 0.5916915940879318, "grad_norm": 0.38020747900009155, "learning_rate": 1.1312201353295758e-05, "loss": 0.069, "step": 26852 }, { "epoch": 0.591713629377448, "grad_norm": 0.537749171257019, "learning_rate": 1.131116368533941e-05, "loss": 0.0586, "step": 26853 }, { "epoch": 0.5917356646669641, "grad_norm": 0.528473436832428, "learning_rate": 1.1310126036172278e-05, "loss": 0.0569, "step": 26854 }, { "epoch": 0.5917576999564803, "grad_norm": 0.4854418933391571, "learning_rate": 1.1309088405799664e-05, "loss": 0.1018, "step": 26855 }, { "epoch": 0.5917797352459965, "grad_norm": 0.5085691213607788, "learning_rate": 1.1308050794226837e-05, "loss": 0.0782, "step": 26856 }, { "epoch": 0.5918017705355126, "grad_norm": 0.3867093324661255, "learning_rate": 1.1307013201459086e-05, "loss": 0.0507, "step": 26857 }, { "epoch": 0.5918238058250288, "grad_norm": 0.7605817317962646, "learning_rate": 1.1305975627501692e-05, "loss": 0.0677, "step": 26858 }, { "epoch": 0.591845841114545, "grad_norm": 0.9095923900604248, "learning_rate": 1.1304938072359953e-05, "loss": 0.0614, "step": 26859 }, { "epoch": 0.5918678764040611, "grad_norm": 0.2966926395893097, "learning_rate": 1.1303900536039143e-05, "loss": 0.0558, "step": 26860 }, { "epoch": 0.5918899116935773, "grad_norm": 0.4925277531147003, "learning_rate": 1.1302863018544552e-05, "loss": 0.0838, "step": 26861 }, { "epoch": 0.5919119469830935, "grad_norm": 0.528118371963501, "learning_rate": 1.1301825519881455e-05, "loss": 0.0574, "step": 26862 }, { "epoch": 0.5919339822726096, "grad_norm": 0.8800979852676392, "learning_rate": 1.130078804005515e-05, "loss": 0.0756, "step": 26863 }, { "epoch": 0.5919560175621258, "grad_norm": 0.6460830569267273, "learning_rate": 1.1299750579070916e-05, "loss": 0.0838, "step": 26864 }, { "epoch": 0.591978052851642, "grad_norm": 0.5783953666687012, "learning_rate": 1.1298713136934033e-05, "loss": 0.082, "step": 26865 }, { "epoch": 0.592000088141158, "grad_norm": 0.538648247718811, "learning_rate": 1.129767571364979e-05, "loss": 0.0856, "step": 26866 }, { "epoch": 0.5920221234306742, "grad_norm": 0.6985268592834473, "learning_rate": 1.1296638309223472e-05, "loss": 0.0834, "step": 26867 }, { "epoch": 0.5920441587201903, "grad_norm": 0.7015917301177979, "learning_rate": 1.1295600923660361e-05, "loss": 0.0613, "step": 26868 }, { "epoch": 0.5920661940097065, "grad_norm": 0.4752451181411743, "learning_rate": 1.1294563556965734e-05, "loss": 0.0561, "step": 26869 }, { "epoch": 0.5920882292992227, "grad_norm": 0.7706810832023621, "learning_rate": 1.1293526209144888e-05, "loss": 0.0761, "step": 26870 }, { "epoch": 0.5921102645887388, "grad_norm": 0.38259294629096985, "learning_rate": 1.1292488880203101e-05, "loss": 0.0378, "step": 26871 }, { "epoch": 0.592132299878255, "grad_norm": 0.4389475882053375, "learning_rate": 1.1291451570145655e-05, "loss": 0.0604, "step": 26872 }, { "epoch": 0.5921543351677712, "grad_norm": 0.47630733251571655, "learning_rate": 1.1290414278977834e-05, "loss": 0.0796, "step": 26873 }, { "epoch": 0.5921763704572873, "grad_norm": 0.8967902064323425, "learning_rate": 1.1289377006704924e-05, "loss": 0.0763, "step": 26874 }, { "epoch": 0.5921984057468035, "grad_norm": 0.5547839403152466, "learning_rate": 1.1288339753332208e-05, "loss": 0.065, "step": 26875 }, { "epoch": 0.5922204410363197, "grad_norm": 0.5671049952507019, "learning_rate": 1.1287302518864967e-05, "loss": 0.1253, "step": 26876 }, { "epoch": 0.5922424763258358, "grad_norm": 0.5001572370529175, "learning_rate": 1.1286265303308478e-05, "loss": 0.0744, "step": 26877 }, { "epoch": 0.592264511615352, "grad_norm": 0.6030327081680298, "learning_rate": 1.1285228106668038e-05, "loss": 0.0701, "step": 26878 }, { "epoch": 0.5922865469048681, "grad_norm": 0.6502602100372314, "learning_rate": 1.1284190928948925e-05, "loss": 0.0793, "step": 26879 }, { "epoch": 0.5923085821943843, "grad_norm": 0.8244730830192566, "learning_rate": 1.1283153770156419e-05, "loss": 0.0896, "step": 26880 }, { "epoch": 0.5923306174839005, "grad_norm": 0.418690025806427, "learning_rate": 1.12821166302958e-05, "loss": 0.0398, "step": 26881 }, { "epoch": 0.5923526527734166, "grad_norm": 0.779515266418457, "learning_rate": 1.1281079509372359e-05, "loss": 0.0815, "step": 26882 }, { "epoch": 0.5923746880629328, "grad_norm": 0.5579417943954468, "learning_rate": 1.1280042407391372e-05, "loss": 0.041, "step": 26883 }, { "epoch": 0.592396723352449, "grad_norm": 0.5207383036613464, "learning_rate": 1.1279005324358127e-05, "loss": 0.0555, "step": 26884 }, { "epoch": 0.5924187586419651, "grad_norm": 0.32639768719673157, "learning_rate": 1.1277968260277895e-05, "loss": 0.0583, "step": 26885 }, { "epoch": 0.5924407939314813, "grad_norm": 0.17594152688980103, "learning_rate": 1.1276931215155974e-05, "loss": 0.0548, "step": 26886 }, { "epoch": 0.5924628292209975, "grad_norm": 0.7355865240097046, "learning_rate": 1.1275894188997637e-05, "loss": 0.0684, "step": 26887 }, { "epoch": 0.5924848645105136, "grad_norm": 0.584223210811615, "learning_rate": 1.1274857181808167e-05, "loss": 0.0599, "step": 26888 }, { "epoch": 0.5925068998000298, "grad_norm": 0.7427023649215698, "learning_rate": 1.127382019359285e-05, "loss": 0.0699, "step": 26889 }, { "epoch": 0.592528935089546, "grad_norm": 0.5386685729026794, "learning_rate": 1.1272783224356963e-05, "loss": 0.0439, "step": 26890 }, { "epoch": 0.592550970379062, "grad_norm": 0.7987706065177917, "learning_rate": 1.127174627410579e-05, "loss": 0.0856, "step": 26891 }, { "epoch": 0.5925730056685782, "grad_norm": 0.5879330635070801, "learning_rate": 1.1270709342844607e-05, "loss": 0.0758, "step": 26892 }, { "epoch": 0.5925950409580943, "grad_norm": 0.7955295443534851, "learning_rate": 1.1269672430578707e-05, "loss": 0.1047, "step": 26893 }, { "epoch": 0.5926170762476105, "grad_norm": 0.4068935215473175, "learning_rate": 1.1268635537313366e-05, "loss": 0.0568, "step": 26894 }, { "epoch": 0.5926391115371267, "grad_norm": 0.6649166941642761, "learning_rate": 1.1267598663053866e-05, "loss": 0.0643, "step": 26895 }, { "epoch": 0.5926611468266428, "grad_norm": 0.6243423223495483, "learning_rate": 1.1266561807805482e-05, "loss": 0.0665, "step": 26896 }, { "epoch": 0.592683182116159, "grad_norm": 0.863806962966919, "learning_rate": 1.1265524971573506e-05, "loss": 0.0784, "step": 26897 }, { "epoch": 0.5927052174056752, "grad_norm": 0.6725329756736755, "learning_rate": 1.1264488154363214e-05, "loss": 0.0545, "step": 26898 }, { "epoch": 0.5927272526951913, "grad_norm": 0.42575326561927795, "learning_rate": 1.1263451356179886e-05, "loss": 0.0767, "step": 26899 }, { "epoch": 0.5927492879847075, "grad_norm": 0.8023831844329834, "learning_rate": 1.1262414577028797e-05, "loss": 0.0834, "step": 26900 }, { "epoch": 0.5927713232742237, "grad_norm": 0.475495845079422, "learning_rate": 1.1261377816915243e-05, "loss": 0.0488, "step": 26901 }, { "epoch": 0.5927933585637398, "grad_norm": 0.37999022006988525, "learning_rate": 1.1260341075844496e-05, "loss": 0.0506, "step": 26902 }, { "epoch": 0.592815393853256, "grad_norm": 0.6830474138259888, "learning_rate": 1.1259304353821835e-05, "loss": 0.0701, "step": 26903 }, { "epoch": 0.5928374291427722, "grad_norm": 0.418308824300766, "learning_rate": 1.1258267650852542e-05, "loss": 0.0578, "step": 26904 }, { "epoch": 0.5928594644322883, "grad_norm": 0.7592628002166748, "learning_rate": 1.1257230966941901e-05, "loss": 0.0543, "step": 26905 }, { "epoch": 0.5928814997218045, "grad_norm": 0.6339088678359985, "learning_rate": 1.125619430209519e-05, "loss": 0.0781, "step": 26906 }, { "epoch": 0.5929035350113206, "grad_norm": 0.5875787734985352, "learning_rate": 1.1255157656317688e-05, "loss": 0.0743, "step": 26907 }, { "epoch": 0.5929255703008368, "grad_norm": 0.5846113562583923, "learning_rate": 1.1254121029614673e-05, "loss": 0.0428, "step": 26908 }, { "epoch": 0.592947605590353, "grad_norm": 0.440093457698822, "learning_rate": 1.125308442199143e-05, "loss": 0.0527, "step": 26909 }, { "epoch": 0.5929696408798691, "grad_norm": 0.6228950619697571, "learning_rate": 1.125204783345324e-05, "loss": 0.0767, "step": 26910 }, { "epoch": 0.5929916761693853, "grad_norm": 0.6045171022415161, "learning_rate": 1.1251011264005377e-05, "loss": 0.0736, "step": 26911 }, { "epoch": 0.5930137114589015, "grad_norm": 0.5821372270584106, "learning_rate": 1.1249974713653127e-05, "loss": 0.0998, "step": 26912 }, { "epoch": 0.5930357467484176, "grad_norm": 0.8524096608161926, "learning_rate": 1.1248938182401766e-05, "loss": 0.0876, "step": 26913 }, { "epoch": 0.5930577820379338, "grad_norm": 0.9648377299308777, "learning_rate": 1.1247901670256574e-05, "loss": 0.0861, "step": 26914 }, { "epoch": 0.59307981732745, "grad_norm": 0.565887987613678, "learning_rate": 1.1246865177222825e-05, "loss": 0.0699, "step": 26915 }, { "epoch": 0.593101852616966, "grad_norm": 0.618468701839447, "learning_rate": 1.1245828703305812e-05, "loss": 0.0603, "step": 26916 }, { "epoch": 0.5931238879064822, "grad_norm": 0.6770797371864319, "learning_rate": 1.1244792248510806e-05, "loss": 0.0539, "step": 26917 }, { "epoch": 0.5931459231959983, "grad_norm": 0.4643431603908539, "learning_rate": 1.1243755812843085e-05, "loss": 0.0833, "step": 26918 }, { "epoch": 0.5931679584855145, "grad_norm": 0.5078058242797852, "learning_rate": 1.1242719396307927e-05, "loss": 0.0581, "step": 26919 }, { "epoch": 0.5931899937750307, "grad_norm": 0.5681270360946655, "learning_rate": 1.124168299891062e-05, "loss": 0.0804, "step": 26920 }, { "epoch": 0.5932120290645468, "grad_norm": 0.7953006029129028, "learning_rate": 1.1240646620656434e-05, "loss": 0.0676, "step": 26921 }, { "epoch": 0.593234064354063, "grad_norm": 0.8557927012443542, "learning_rate": 1.123961026155065e-05, "loss": 0.0818, "step": 26922 }, { "epoch": 0.5932560996435792, "grad_norm": 0.6526291370391846, "learning_rate": 1.1238573921598544e-05, "loss": 0.0651, "step": 26923 }, { "epoch": 0.5932781349330953, "grad_norm": 0.7501955032348633, "learning_rate": 1.1237537600805403e-05, "loss": 0.0943, "step": 26924 }, { "epoch": 0.5933001702226115, "grad_norm": 0.7440699934959412, "learning_rate": 1.12365012991765e-05, "loss": 0.1407, "step": 26925 }, { "epoch": 0.5933222055121277, "grad_norm": 0.6716620922088623, "learning_rate": 1.1235465016717115e-05, "loss": 0.0589, "step": 26926 }, { "epoch": 0.5933442408016438, "grad_norm": 0.5430577397346497, "learning_rate": 1.1234428753432522e-05, "loss": 0.0626, "step": 26927 }, { "epoch": 0.59336627609116, "grad_norm": 0.7227008938789368, "learning_rate": 1.1233392509328006e-05, "loss": 0.077, "step": 26928 }, { "epoch": 0.5933883113806762, "grad_norm": 0.4966898560523987, "learning_rate": 1.1232356284408841e-05, "loss": 0.0486, "step": 26929 }, { "epoch": 0.5934103466701923, "grad_norm": 0.3969112038612366, "learning_rate": 1.1231320078680306e-05, "loss": 0.0595, "step": 26930 }, { "epoch": 0.5934323819597085, "grad_norm": 1.2467997074127197, "learning_rate": 1.1230283892147672e-05, "loss": 0.096, "step": 26931 }, { "epoch": 0.5934544172492247, "grad_norm": 0.5855478048324585, "learning_rate": 1.122924772481623e-05, "loss": 0.0556, "step": 26932 }, { "epoch": 0.5934764525387408, "grad_norm": 0.641983687877655, "learning_rate": 1.1228211576691253e-05, "loss": 0.0472, "step": 26933 }, { "epoch": 0.593498487828257, "grad_norm": 0.6818510890007019, "learning_rate": 1.1227175447778013e-05, "loss": 0.0805, "step": 26934 }, { "epoch": 0.5935205231177731, "grad_norm": 0.46554943919181824, "learning_rate": 1.1226139338081794e-05, "loss": 0.0567, "step": 26935 }, { "epoch": 0.5935425584072893, "grad_norm": 0.5222178101539612, "learning_rate": 1.1225103247607871e-05, "loss": 0.0764, "step": 26936 }, { "epoch": 0.5935645936968055, "grad_norm": 0.8074623346328735, "learning_rate": 1.1224067176361524e-05, "loss": 0.0803, "step": 26937 }, { "epoch": 0.5935866289863216, "grad_norm": 0.5229977369308472, "learning_rate": 1.1223031124348019e-05, "loss": 0.0574, "step": 26938 }, { "epoch": 0.5936086642758378, "grad_norm": 0.6771077513694763, "learning_rate": 1.1221995091572647e-05, "loss": 0.0783, "step": 26939 }, { "epoch": 0.593630699565354, "grad_norm": 0.4921366572380066, "learning_rate": 1.1220959078040682e-05, "loss": 0.0435, "step": 26940 }, { "epoch": 0.59365273485487, "grad_norm": 0.725527822971344, "learning_rate": 1.1219923083757398e-05, "loss": 0.0541, "step": 26941 }, { "epoch": 0.5936747701443862, "grad_norm": 0.6844416260719299, "learning_rate": 1.121888710872807e-05, "loss": 0.0708, "step": 26942 }, { "epoch": 0.5936968054339024, "grad_norm": 0.583777666091919, "learning_rate": 1.1217851152957981e-05, "loss": 0.048, "step": 26943 }, { "epoch": 0.5937188407234185, "grad_norm": 1.228163480758667, "learning_rate": 1.1216815216452404e-05, "loss": 0.0584, "step": 26944 }, { "epoch": 0.5937408760129347, "grad_norm": 0.5828555822372437, "learning_rate": 1.1215779299216616e-05, "loss": 0.0828, "step": 26945 }, { "epoch": 0.5937629113024508, "grad_norm": 0.8944284915924072, "learning_rate": 1.1214743401255886e-05, "loss": 0.0672, "step": 26946 }, { "epoch": 0.593784946591967, "grad_norm": 0.7316061854362488, "learning_rate": 1.1213707522575505e-05, "loss": 0.0857, "step": 26947 }, { "epoch": 0.5938069818814832, "grad_norm": 0.7967024445533752, "learning_rate": 1.1212671663180742e-05, "loss": 0.1038, "step": 26948 }, { "epoch": 0.5938290171709993, "grad_norm": 0.284269779920578, "learning_rate": 1.1211635823076874e-05, "loss": 0.0647, "step": 26949 }, { "epoch": 0.5938510524605155, "grad_norm": 0.45016101002693176, "learning_rate": 1.121060000226917e-05, "loss": 0.063, "step": 26950 }, { "epoch": 0.5938730877500317, "grad_norm": 0.5646193027496338, "learning_rate": 1.1209564200762917e-05, "loss": 0.0561, "step": 26951 }, { "epoch": 0.5938951230395478, "grad_norm": 0.7198097109794617, "learning_rate": 1.1208528418563387e-05, "loss": 0.0507, "step": 26952 }, { "epoch": 0.593917158329064, "grad_norm": 0.5072436928749084, "learning_rate": 1.1207492655675848e-05, "loss": 0.0675, "step": 26953 }, { "epoch": 0.5939391936185802, "grad_norm": 0.5847324132919312, "learning_rate": 1.120645691210559e-05, "loss": 0.0401, "step": 26954 }, { "epoch": 0.5939612289080963, "grad_norm": 0.6295363306999207, "learning_rate": 1.120542118785788e-05, "loss": 0.0616, "step": 26955 }, { "epoch": 0.5939832641976125, "grad_norm": 0.9839702844619751, "learning_rate": 1.1204385482937994e-05, "loss": 0.0884, "step": 26956 }, { "epoch": 0.5940052994871287, "grad_norm": 0.4400235414505005, "learning_rate": 1.1203349797351206e-05, "loss": 0.0569, "step": 26957 }, { "epoch": 0.5940273347766448, "grad_norm": 0.7362775206565857, "learning_rate": 1.1202314131102795e-05, "loss": 0.0735, "step": 26958 }, { "epoch": 0.594049370066161, "grad_norm": 0.7471926212310791, "learning_rate": 1.1201278484198038e-05, "loss": 0.085, "step": 26959 }, { "epoch": 0.5940714053556772, "grad_norm": 0.7425081133842468, "learning_rate": 1.1200242856642204e-05, "loss": 0.0974, "step": 26960 }, { "epoch": 0.5940934406451933, "grad_norm": 0.6367300748825073, "learning_rate": 1.1199207248440564e-05, "loss": 0.0581, "step": 26961 }, { "epoch": 0.5941154759347095, "grad_norm": 0.7167508006095886, "learning_rate": 1.1198171659598408e-05, "loss": 0.0432, "step": 26962 }, { "epoch": 0.5941375112242256, "grad_norm": 0.5461434125900269, "learning_rate": 1.1197136090121002e-05, "loss": 0.0701, "step": 26963 }, { "epoch": 0.5941595465137418, "grad_norm": 0.375712126493454, "learning_rate": 1.119610054001362e-05, "loss": 0.0675, "step": 26964 }, { "epoch": 0.5941815818032579, "grad_norm": 0.6164830923080444, "learning_rate": 1.1195065009281531e-05, "loss": 0.0874, "step": 26965 }, { "epoch": 0.594203617092774, "grad_norm": 0.6537025570869446, "learning_rate": 1.1194029497930025e-05, "loss": 0.0445, "step": 26966 }, { "epoch": 0.5942256523822902, "grad_norm": 0.8826873302459717, "learning_rate": 1.1192994005964369e-05, "loss": 0.0842, "step": 26967 }, { "epoch": 0.5942476876718064, "grad_norm": 0.999011218547821, "learning_rate": 1.1191958533389834e-05, "loss": 0.0861, "step": 26968 }, { "epoch": 0.5942697229613225, "grad_norm": 0.569057285785675, "learning_rate": 1.1190923080211689e-05, "loss": 0.0485, "step": 26969 }, { "epoch": 0.5942917582508387, "grad_norm": 0.4973582625389099, "learning_rate": 1.1189887646435222e-05, "loss": 0.0669, "step": 26970 }, { "epoch": 0.5943137935403549, "grad_norm": 0.6024149060249329, "learning_rate": 1.11888522320657e-05, "loss": 0.0493, "step": 26971 }, { "epoch": 0.594335828829871, "grad_norm": 0.4895400106906891, "learning_rate": 1.1187816837108396e-05, "loss": 0.0547, "step": 26972 }, { "epoch": 0.5943578641193872, "grad_norm": 0.5133076906204224, "learning_rate": 1.118678146156858e-05, "loss": 0.0816, "step": 26973 }, { "epoch": 0.5943798994089033, "grad_norm": 0.43234050273895264, "learning_rate": 1.1185746105451539e-05, "loss": 0.0693, "step": 26974 }, { "epoch": 0.5944019346984195, "grad_norm": 0.8867066502571106, "learning_rate": 1.1184710768762538e-05, "loss": 0.1084, "step": 26975 }, { "epoch": 0.5944239699879357, "grad_norm": 0.6337714791297913, "learning_rate": 1.1183675451506846e-05, "loss": 0.0822, "step": 26976 }, { "epoch": 0.5944460052774518, "grad_norm": 0.42436113953590393, "learning_rate": 1.1182640153689745e-05, "loss": 0.0554, "step": 26977 }, { "epoch": 0.594468040566968, "grad_norm": 0.567602276802063, "learning_rate": 1.1181604875316506e-05, "loss": 0.0457, "step": 26978 }, { "epoch": 0.5944900758564842, "grad_norm": 0.5883237719535828, "learning_rate": 1.1180569616392399e-05, "loss": 0.0845, "step": 26979 }, { "epoch": 0.5945121111460003, "grad_norm": 0.6418896317481995, "learning_rate": 1.1179534376922694e-05, "loss": 0.0735, "step": 26980 }, { "epoch": 0.5945341464355165, "grad_norm": 0.3483732342720032, "learning_rate": 1.1178499156912674e-05, "loss": 0.0402, "step": 26981 }, { "epoch": 0.5945561817250327, "grad_norm": 0.6539310216903687, "learning_rate": 1.117746395636761e-05, "loss": 0.072, "step": 26982 }, { "epoch": 0.5945782170145488, "grad_norm": 0.6748249530792236, "learning_rate": 1.117642877529277e-05, "loss": 0.0736, "step": 26983 }, { "epoch": 0.594600252304065, "grad_norm": 0.6115416884422302, "learning_rate": 1.1175393613693425e-05, "loss": 0.0763, "step": 26984 }, { "epoch": 0.5946222875935812, "grad_norm": 0.49890464544296265, "learning_rate": 1.1174358471574854e-05, "loss": 0.063, "step": 26985 }, { "epoch": 0.5946443228830973, "grad_norm": 0.8215507864952087, "learning_rate": 1.1173323348942329e-05, "loss": 0.0677, "step": 26986 }, { "epoch": 0.5946663581726135, "grad_norm": 1.5125905275344849, "learning_rate": 1.1172288245801118e-05, "loss": 0.1282, "step": 26987 }, { "epoch": 0.5946883934621296, "grad_norm": 0.43468260765075684, "learning_rate": 1.1171253162156491e-05, "loss": 0.045, "step": 26988 }, { "epoch": 0.5947104287516458, "grad_norm": 0.6677646636962891, "learning_rate": 1.1170218098013729e-05, "loss": 0.0595, "step": 26989 }, { "epoch": 0.5947324640411619, "grad_norm": 0.5425921082496643, "learning_rate": 1.1169183053378102e-05, "loss": 0.0744, "step": 26990 }, { "epoch": 0.594754499330678, "grad_norm": 0.29169192910194397, "learning_rate": 1.116814802825488e-05, "loss": 0.0301, "step": 26991 }, { "epoch": 0.5947765346201942, "grad_norm": 0.41206955909729004, "learning_rate": 1.116711302264933e-05, "loss": 0.0754, "step": 26992 }, { "epoch": 0.5947985699097104, "grad_norm": 0.7875406742095947, "learning_rate": 1.1166078036566734e-05, "loss": 0.0787, "step": 26993 }, { "epoch": 0.5948206051992265, "grad_norm": 0.7750343084335327, "learning_rate": 1.1165043070012357e-05, "loss": 0.121, "step": 26994 }, { "epoch": 0.5948426404887427, "grad_norm": 0.6873050332069397, "learning_rate": 1.1164008122991473e-05, "loss": 0.0617, "step": 26995 }, { "epoch": 0.5948646757782589, "grad_norm": 0.6391029357910156, "learning_rate": 1.1162973195509342e-05, "loss": 0.0822, "step": 26996 }, { "epoch": 0.594886711067775, "grad_norm": 0.31007543206214905, "learning_rate": 1.1161938287571258e-05, "loss": 0.0717, "step": 26997 }, { "epoch": 0.5949087463572912, "grad_norm": 0.5417841672897339, "learning_rate": 1.1160903399182477e-05, "loss": 0.0696, "step": 26998 }, { "epoch": 0.5949307816468073, "grad_norm": 0.5044780373573303, "learning_rate": 1.1159868530348274e-05, "loss": 0.0561, "step": 26999 }, { "epoch": 0.5949528169363235, "grad_norm": 0.8365468382835388, "learning_rate": 1.115883368107392e-05, "loss": 0.082, "step": 27000 }, { "epoch": 0.5949748522258397, "grad_norm": 0.6580696702003479, "learning_rate": 1.1157798851364687e-05, "loss": 0.0955, "step": 27001 }, { "epoch": 0.5949968875153558, "grad_norm": 0.44864577054977417, "learning_rate": 1.1156764041225844e-05, "loss": 0.0443, "step": 27002 }, { "epoch": 0.595018922804872, "grad_norm": 0.23081068694591522, "learning_rate": 1.1155729250662657e-05, "loss": 0.0637, "step": 27003 }, { "epoch": 0.5950409580943882, "grad_norm": 0.8320671916007996, "learning_rate": 1.1154694479680411e-05, "loss": 0.0709, "step": 27004 }, { "epoch": 0.5950629933839043, "grad_norm": 0.5067625641822815, "learning_rate": 1.1153659728284365e-05, "loss": 0.0748, "step": 27005 }, { "epoch": 0.5950850286734205, "grad_norm": 0.6621452569961548, "learning_rate": 1.1152624996479793e-05, "loss": 0.0738, "step": 27006 }, { "epoch": 0.5951070639629367, "grad_norm": 0.47371307015419006, "learning_rate": 1.1151590284271963e-05, "loss": 0.0485, "step": 27007 }, { "epoch": 0.5951290992524528, "grad_norm": 0.6350097060203552, "learning_rate": 1.1150555591666149e-05, "loss": 0.0715, "step": 27008 }, { "epoch": 0.595151134541969, "grad_norm": 0.3648315966129303, "learning_rate": 1.114952091866762e-05, "loss": 0.0696, "step": 27009 }, { "epoch": 0.5951731698314852, "grad_norm": 0.4710898995399475, "learning_rate": 1.1148486265281646e-05, "loss": 0.0378, "step": 27010 }, { "epoch": 0.5951952051210013, "grad_norm": 0.40017345547676086, "learning_rate": 1.114745163151349e-05, "loss": 0.0616, "step": 27011 }, { "epoch": 0.5952172404105175, "grad_norm": 0.28758886456489563, "learning_rate": 1.1146417017368435e-05, "loss": 0.065, "step": 27012 }, { "epoch": 0.5952392757000337, "grad_norm": 0.5270193815231323, "learning_rate": 1.1145382422851748e-05, "loss": 0.0985, "step": 27013 }, { "epoch": 0.5952613109895498, "grad_norm": 0.7131128907203674, "learning_rate": 1.1144347847968691e-05, "loss": 0.0625, "step": 27014 }, { "epoch": 0.5952833462790659, "grad_norm": 0.6061751842498779, "learning_rate": 1.1143313292724537e-05, "loss": 0.0633, "step": 27015 }, { "epoch": 0.595305381568582, "grad_norm": 0.7804790139198303, "learning_rate": 1.1142278757124559e-05, "loss": 0.0797, "step": 27016 }, { "epoch": 0.5953274168580982, "grad_norm": 0.44059431552886963, "learning_rate": 1.1141244241174024e-05, "loss": 0.0445, "step": 27017 }, { "epoch": 0.5953494521476144, "grad_norm": 0.8402311205863953, "learning_rate": 1.11402097448782e-05, "loss": 0.0701, "step": 27018 }, { "epoch": 0.5953714874371305, "grad_norm": 0.360248863697052, "learning_rate": 1.1139175268242353e-05, "loss": 0.0533, "step": 27019 }, { "epoch": 0.5953935227266467, "grad_norm": 0.5438600778579712, "learning_rate": 1.1138140811271762e-05, "loss": 0.0756, "step": 27020 }, { "epoch": 0.5954155580161629, "grad_norm": 0.7318612933158875, "learning_rate": 1.1137106373971692e-05, "loss": 0.0792, "step": 27021 }, { "epoch": 0.595437593305679, "grad_norm": 0.4772547483444214, "learning_rate": 1.1136071956347407e-05, "loss": 0.0443, "step": 27022 }, { "epoch": 0.5954596285951952, "grad_norm": 0.551801323890686, "learning_rate": 1.1135037558404182e-05, "loss": 0.073, "step": 27023 }, { "epoch": 0.5954816638847114, "grad_norm": 0.5009206533432007, "learning_rate": 1.1134003180147286e-05, "loss": 0.0826, "step": 27024 }, { "epoch": 0.5955036991742275, "grad_norm": 0.8073975443840027, "learning_rate": 1.1132968821581981e-05, "loss": 0.055, "step": 27025 }, { "epoch": 0.5955257344637437, "grad_norm": 0.6252226829528809, "learning_rate": 1.1131934482713533e-05, "loss": 0.0346, "step": 27026 }, { "epoch": 0.5955477697532598, "grad_norm": 0.41142112016677856, "learning_rate": 1.1130900163547228e-05, "loss": 0.0534, "step": 27027 }, { "epoch": 0.595569805042776, "grad_norm": 0.6061493158340454, "learning_rate": 1.1129865864088319e-05, "loss": 0.0773, "step": 27028 }, { "epoch": 0.5955918403322922, "grad_norm": 0.5463695526123047, "learning_rate": 1.1128831584342081e-05, "loss": 0.0773, "step": 27029 }, { "epoch": 0.5956138756218083, "grad_norm": 0.7664284706115723, "learning_rate": 1.1127797324313773e-05, "loss": 0.0585, "step": 27030 }, { "epoch": 0.5956359109113245, "grad_norm": 0.5845158696174622, "learning_rate": 1.1126763084008676e-05, "loss": 0.0711, "step": 27031 }, { "epoch": 0.5956579462008407, "grad_norm": 0.5148389935493469, "learning_rate": 1.112572886343205e-05, "loss": 0.0449, "step": 27032 }, { "epoch": 0.5956799814903568, "grad_norm": 0.36588382720947266, "learning_rate": 1.1124694662589166e-05, "loss": 0.0958, "step": 27033 }, { "epoch": 0.595702016779873, "grad_norm": 1.0215896368026733, "learning_rate": 1.1123660481485281e-05, "loss": 0.0802, "step": 27034 }, { "epoch": 0.5957240520693892, "grad_norm": 0.6579892039299011, "learning_rate": 1.112262632012568e-05, "loss": 0.0727, "step": 27035 }, { "epoch": 0.5957460873589053, "grad_norm": 0.4696877896785736, "learning_rate": 1.1121592178515624e-05, "loss": 0.0386, "step": 27036 }, { "epoch": 0.5957681226484215, "grad_norm": 0.582578182220459, "learning_rate": 1.1120558056660377e-05, "loss": 0.0388, "step": 27037 }, { "epoch": 0.5957901579379377, "grad_norm": 0.914421558380127, "learning_rate": 1.1119523954565207e-05, "loss": 0.0767, "step": 27038 }, { "epoch": 0.5958121932274537, "grad_norm": 0.5147029757499695, "learning_rate": 1.1118489872235383e-05, "loss": 0.0725, "step": 27039 }, { "epoch": 0.5958342285169699, "grad_norm": 0.7068601846694946, "learning_rate": 1.1117455809676172e-05, "loss": 0.0595, "step": 27040 }, { "epoch": 0.595856263806486, "grad_norm": 0.6906028389930725, "learning_rate": 1.1116421766892841e-05, "loss": 0.046, "step": 27041 }, { "epoch": 0.5958782990960022, "grad_norm": 0.7588912844657898, "learning_rate": 1.111538774389065e-05, "loss": 0.0627, "step": 27042 }, { "epoch": 0.5959003343855184, "grad_norm": 0.7712987661361694, "learning_rate": 1.1114353740674877e-05, "loss": 0.0744, "step": 27043 }, { "epoch": 0.5959223696750345, "grad_norm": 0.7757319211959839, "learning_rate": 1.1113319757250787e-05, "loss": 0.0837, "step": 27044 }, { "epoch": 0.5959444049645507, "grad_norm": 0.7115063071250916, "learning_rate": 1.1112285793623638e-05, "loss": 0.0545, "step": 27045 }, { "epoch": 0.5959664402540669, "grad_norm": 0.799196183681488, "learning_rate": 1.1111251849798709e-05, "loss": 0.0932, "step": 27046 }, { "epoch": 0.595988475543583, "grad_norm": 0.504906952381134, "learning_rate": 1.1110217925781257e-05, "loss": 0.0856, "step": 27047 }, { "epoch": 0.5960105108330992, "grad_norm": 0.5353013277053833, "learning_rate": 1.110918402157655e-05, "loss": 0.0671, "step": 27048 }, { "epoch": 0.5960325461226154, "grad_norm": 0.6319162845611572, "learning_rate": 1.110815013718985e-05, "loss": 0.0786, "step": 27049 }, { "epoch": 0.5960545814121315, "grad_norm": 0.6040337085723877, "learning_rate": 1.1107116272626436e-05, "loss": 0.0596, "step": 27050 }, { "epoch": 0.5960766167016477, "grad_norm": 0.589019238948822, "learning_rate": 1.1106082427891566e-05, "loss": 0.0511, "step": 27051 }, { "epoch": 0.5960986519911639, "grad_norm": 0.8306530117988586, "learning_rate": 1.1105048602990503e-05, "loss": 0.0672, "step": 27052 }, { "epoch": 0.59612068728068, "grad_norm": 0.6666667461395264, "learning_rate": 1.1104014797928515e-05, "loss": 0.0685, "step": 27053 }, { "epoch": 0.5961427225701962, "grad_norm": 0.5424021482467651, "learning_rate": 1.1102981012710875e-05, "loss": 0.0493, "step": 27054 }, { "epoch": 0.5961647578597123, "grad_norm": 0.48879218101501465, "learning_rate": 1.1101947247342838e-05, "loss": 0.0911, "step": 27055 }, { "epoch": 0.5961867931492285, "grad_norm": 0.5287322998046875, "learning_rate": 1.1100913501829677e-05, "loss": 0.0488, "step": 27056 }, { "epoch": 0.5962088284387447, "grad_norm": 0.7664710879325867, "learning_rate": 1.1099879776176647e-05, "loss": 0.0811, "step": 27057 }, { "epoch": 0.5962308637282608, "grad_norm": 0.9635661244392395, "learning_rate": 1.1098846070389026e-05, "loss": 0.1214, "step": 27058 }, { "epoch": 0.596252899017777, "grad_norm": 0.3863082528114319, "learning_rate": 1.1097812384472073e-05, "loss": 0.0432, "step": 27059 }, { "epoch": 0.5962749343072932, "grad_norm": 0.6795898675918579, "learning_rate": 1.1096778718431057e-05, "loss": 0.0856, "step": 27060 }, { "epoch": 0.5962969695968093, "grad_norm": 0.3925599157810211, "learning_rate": 1.1095745072271236e-05, "loss": 0.0517, "step": 27061 }, { "epoch": 0.5963190048863255, "grad_norm": 1.071255087852478, "learning_rate": 1.109471144599788e-05, "loss": 0.0649, "step": 27062 }, { "epoch": 0.5963410401758417, "grad_norm": 0.6251417398452759, "learning_rate": 1.1093677839616258e-05, "loss": 0.0779, "step": 27063 }, { "epoch": 0.5963630754653577, "grad_norm": 0.513783872127533, "learning_rate": 1.1092644253131617e-05, "loss": 0.0512, "step": 27064 }, { "epoch": 0.5963851107548739, "grad_norm": 0.7379119396209717, "learning_rate": 1.1091610686549243e-05, "loss": 0.0674, "step": 27065 }, { "epoch": 0.59640714604439, "grad_norm": 0.6554207801818848, "learning_rate": 1.1090577139874393e-05, "loss": 0.0928, "step": 27066 }, { "epoch": 0.5964291813339062, "grad_norm": 0.4749281406402588, "learning_rate": 1.108954361311233e-05, "loss": 0.0514, "step": 27067 }, { "epoch": 0.5964512166234224, "grad_norm": 0.5039067268371582, "learning_rate": 1.1088510106268314e-05, "loss": 0.0769, "step": 27068 }, { "epoch": 0.5964732519129385, "grad_norm": 1.1002954244613647, "learning_rate": 1.1087476619347616e-05, "loss": 0.0553, "step": 27069 }, { "epoch": 0.5964952872024547, "grad_norm": 0.42583587765693665, "learning_rate": 1.1086443152355498e-05, "loss": 0.0498, "step": 27070 }, { "epoch": 0.5965173224919709, "grad_norm": 0.47561025619506836, "learning_rate": 1.1085409705297224e-05, "loss": 0.068, "step": 27071 }, { "epoch": 0.596539357781487, "grad_norm": 0.2982075810432434, "learning_rate": 1.108437627817805e-05, "loss": 0.0682, "step": 27072 }, { "epoch": 0.5965613930710032, "grad_norm": 0.6056956052780151, "learning_rate": 1.1083342871003256e-05, "loss": 0.0572, "step": 27073 }, { "epoch": 0.5965834283605194, "grad_norm": 0.9072754383087158, "learning_rate": 1.1082309483778098e-05, "loss": 0.0654, "step": 27074 }, { "epoch": 0.5966054636500355, "grad_norm": 0.5813214182853699, "learning_rate": 1.1081276116507833e-05, "loss": 0.0841, "step": 27075 }, { "epoch": 0.5966274989395517, "grad_norm": 0.7212943434715271, "learning_rate": 1.1080242769197729e-05, "loss": 0.0612, "step": 27076 }, { "epoch": 0.5966495342290679, "grad_norm": 0.5188328623771667, "learning_rate": 1.1079209441853055e-05, "loss": 0.0429, "step": 27077 }, { "epoch": 0.596671569518584, "grad_norm": 0.479276180267334, "learning_rate": 1.1078176134479075e-05, "loss": 0.042, "step": 27078 }, { "epoch": 0.5966936048081002, "grad_norm": 0.5732184052467346, "learning_rate": 1.107714284708104e-05, "loss": 0.0576, "step": 27079 }, { "epoch": 0.5967156400976164, "grad_norm": 0.6367870569229126, "learning_rate": 1.1076109579664216e-05, "loss": 0.0509, "step": 27080 }, { "epoch": 0.5967376753871325, "grad_norm": 0.566397488117218, "learning_rate": 1.1075076332233877e-05, "loss": 0.0573, "step": 27081 }, { "epoch": 0.5967597106766487, "grad_norm": 0.581415057182312, "learning_rate": 1.1074043104795277e-05, "loss": 0.068, "step": 27082 }, { "epoch": 0.5967817459661648, "grad_norm": 0.5909734964370728, "learning_rate": 1.1073009897353682e-05, "loss": 0.079, "step": 27083 }, { "epoch": 0.596803781255681, "grad_norm": 0.8026273846626282, "learning_rate": 1.1071976709914347e-05, "loss": 0.0674, "step": 27084 }, { "epoch": 0.5968258165451972, "grad_norm": 0.4133816063404083, "learning_rate": 1.1070943542482548e-05, "loss": 0.0535, "step": 27085 }, { "epoch": 0.5968478518347133, "grad_norm": 0.34581252932548523, "learning_rate": 1.1069910395063539e-05, "loss": 0.0736, "step": 27086 }, { "epoch": 0.5968698871242295, "grad_norm": 0.469482958316803, "learning_rate": 1.1068877267662581e-05, "loss": 0.0812, "step": 27087 }, { "epoch": 0.5968919224137457, "grad_norm": 0.6474099159240723, "learning_rate": 1.1067844160284942e-05, "loss": 0.0785, "step": 27088 }, { "epoch": 0.5969139577032617, "grad_norm": 0.6609465479850769, "learning_rate": 1.1066811072935883e-05, "loss": 0.0821, "step": 27089 }, { "epoch": 0.5969359929927779, "grad_norm": 0.7374227046966553, "learning_rate": 1.1065778005620663e-05, "loss": 0.0673, "step": 27090 }, { "epoch": 0.596958028282294, "grad_norm": 1.3004791736602783, "learning_rate": 1.106474495834454e-05, "loss": 0.0671, "step": 27091 }, { "epoch": 0.5969800635718102, "grad_norm": 0.5309193134307861, "learning_rate": 1.1063711931112787e-05, "loss": 0.0564, "step": 27092 }, { "epoch": 0.5970020988613264, "grad_norm": 0.7190597057342529, "learning_rate": 1.1062678923930659e-05, "loss": 0.0749, "step": 27093 }, { "epoch": 0.5970241341508425, "grad_norm": 0.6760700345039368, "learning_rate": 1.1061645936803419e-05, "loss": 0.0706, "step": 27094 }, { "epoch": 0.5970461694403587, "grad_norm": 0.5980477929115295, "learning_rate": 1.1060612969736327e-05, "loss": 0.0555, "step": 27095 }, { "epoch": 0.5970682047298749, "grad_norm": 0.5765116810798645, "learning_rate": 1.1059580022734647e-05, "loss": 0.091, "step": 27096 }, { "epoch": 0.597090240019391, "grad_norm": 0.7012391686439514, "learning_rate": 1.1058547095803639e-05, "loss": 0.0999, "step": 27097 }, { "epoch": 0.5971122753089072, "grad_norm": 0.43317094445228577, "learning_rate": 1.1057514188948565e-05, "loss": 0.052, "step": 27098 }, { "epoch": 0.5971343105984234, "grad_norm": 0.2910851538181305, "learning_rate": 1.1056481302174677e-05, "loss": 0.0623, "step": 27099 }, { "epoch": 0.5971563458879395, "grad_norm": 0.7219305038452148, "learning_rate": 1.1055448435487252e-05, "loss": 0.0628, "step": 27100 }, { "epoch": 0.5971783811774557, "grad_norm": 0.30333301424980164, "learning_rate": 1.1054415588891542e-05, "loss": 0.0455, "step": 27101 }, { "epoch": 0.5972004164669719, "grad_norm": 0.7691927552223206, "learning_rate": 1.1053382762392811e-05, "loss": 0.0846, "step": 27102 }, { "epoch": 0.597222451756488, "grad_norm": 0.6593594551086426, "learning_rate": 1.1052349955996316e-05, "loss": 0.0757, "step": 27103 }, { "epoch": 0.5972444870460042, "grad_norm": 0.7520001530647278, "learning_rate": 1.105131716970732e-05, "loss": 0.0557, "step": 27104 }, { "epoch": 0.5972665223355204, "grad_norm": 0.35780808329582214, "learning_rate": 1.1050284403531085e-05, "loss": 0.0663, "step": 27105 }, { "epoch": 0.5972885576250365, "grad_norm": 0.6681360006332397, "learning_rate": 1.1049251657472868e-05, "loss": 0.059, "step": 27106 }, { "epoch": 0.5973105929145527, "grad_norm": 0.9089576601982117, "learning_rate": 1.1048218931537924e-05, "loss": 0.1159, "step": 27107 }, { "epoch": 0.5973326282040688, "grad_norm": 0.9712944030761719, "learning_rate": 1.1047186225731527e-05, "loss": 0.0835, "step": 27108 }, { "epoch": 0.597354663493585, "grad_norm": 0.672358512878418, "learning_rate": 1.1046153540058933e-05, "loss": 0.0646, "step": 27109 }, { "epoch": 0.5973766987831012, "grad_norm": 0.41075220704078674, "learning_rate": 1.1045120874525396e-05, "loss": 0.0708, "step": 27110 }, { "epoch": 0.5973987340726173, "grad_norm": 0.4308270514011383, "learning_rate": 1.1044088229136179e-05, "loss": 0.0773, "step": 27111 }, { "epoch": 0.5974207693621335, "grad_norm": 0.5896673798561096, "learning_rate": 1.1043055603896544e-05, "loss": 0.0503, "step": 27112 }, { "epoch": 0.5974428046516496, "grad_norm": 0.44673529267311096, "learning_rate": 1.1042022998811747e-05, "loss": 0.0609, "step": 27113 }, { "epoch": 0.5974648399411657, "grad_norm": 0.6124690771102905, "learning_rate": 1.1040990413887043e-05, "loss": 0.0828, "step": 27114 }, { "epoch": 0.5974868752306819, "grad_norm": 0.69339919090271, "learning_rate": 1.1039957849127706e-05, "loss": 0.082, "step": 27115 }, { "epoch": 0.597508910520198, "grad_norm": 0.6972049474716187, "learning_rate": 1.1038925304538987e-05, "loss": 0.0817, "step": 27116 }, { "epoch": 0.5975309458097142, "grad_norm": 0.3508334159851074, "learning_rate": 1.1037892780126145e-05, "loss": 0.0695, "step": 27117 }, { "epoch": 0.5975529810992304, "grad_norm": 0.47699999809265137, "learning_rate": 1.1036860275894435e-05, "loss": 0.1032, "step": 27118 }, { "epoch": 0.5975750163887465, "grad_norm": 0.684144139289856, "learning_rate": 1.1035827791849124e-05, "loss": 0.0729, "step": 27119 }, { "epoch": 0.5975970516782627, "grad_norm": 0.7721728086471558, "learning_rate": 1.1034795327995472e-05, "loss": 0.084, "step": 27120 }, { "epoch": 0.5976190869677789, "grad_norm": 0.38569024205207825, "learning_rate": 1.103376288433873e-05, "loss": 0.0667, "step": 27121 }, { "epoch": 0.597641122257295, "grad_norm": 0.7914241552352905, "learning_rate": 1.1032730460884154e-05, "loss": 0.0652, "step": 27122 }, { "epoch": 0.5976631575468112, "grad_norm": 0.7145240902900696, "learning_rate": 1.1031698057637019e-05, "loss": 0.0648, "step": 27123 }, { "epoch": 0.5976851928363274, "grad_norm": 0.7080488801002502, "learning_rate": 1.1030665674602569e-05, "loss": 0.0753, "step": 27124 }, { "epoch": 0.5977072281258435, "grad_norm": 1.0350052118301392, "learning_rate": 1.102963331178607e-05, "loss": 0.1033, "step": 27125 }, { "epoch": 0.5977292634153597, "grad_norm": 1.1227569580078125, "learning_rate": 1.1028600969192774e-05, "loss": 0.0879, "step": 27126 }, { "epoch": 0.5977512987048759, "grad_norm": 0.43615439534187317, "learning_rate": 1.1027568646827947e-05, "loss": 0.0334, "step": 27127 }, { "epoch": 0.597773333994392, "grad_norm": 1.0580015182495117, "learning_rate": 1.1026536344696841e-05, "loss": 0.0601, "step": 27128 }, { "epoch": 0.5977953692839082, "grad_norm": 0.6881325244903564, "learning_rate": 1.1025504062804717e-05, "loss": 0.1094, "step": 27129 }, { "epoch": 0.5978174045734244, "grad_norm": 0.5297791957855225, "learning_rate": 1.1024471801156828e-05, "loss": 0.0704, "step": 27130 }, { "epoch": 0.5978394398629405, "grad_norm": 0.5428664088249207, "learning_rate": 1.102343955975844e-05, "loss": 0.0858, "step": 27131 }, { "epoch": 0.5978614751524567, "grad_norm": 0.6605676412582397, "learning_rate": 1.1022407338614808e-05, "loss": 0.0851, "step": 27132 }, { "epoch": 0.5978835104419729, "grad_norm": 0.6803346872329712, "learning_rate": 1.1021375137731183e-05, "loss": 0.0715, "step": 27133 }, { "epoch": 0.597905545731489, "grad_norm": 0.6499339938163757, "learning_rate": 1.1020342957112832e-05, "loss": 0.0548, "step": 27134 }, { "epoch": 0.5979275810210052, "grad_norm": 0.5121466517448425, "learning_rate": 1.101931079676501e-05, "loss": 0.0977, "step": 27135 }, { "epoch": 0.5979496163105213, "grad_norm": 0.515532374382019, "learning_rate": 1.1018278656692971e-05, "loss": 0.0743, "step": 27136 }, { "epoch": 0.5979716516000375, "grad_norm": 0.8174362778663635, "learning_rate": 1.1017246536901967e-05, "loss": 0.0829, "step": 27137 }, { "epoch": 0.5979936868895536, "grad_norm": 0.5304317474365234, "learning_rate": 1.101621443739727e-05, "loss": 0.0661, "step": 27138 }, { "epoch": 0.5980157221790697, "grad_norm": 0.7511534094810486, "learning_rate": 1.101518235818413e-05, "loss": 0.108, "step": 27139 }, { "epoch": 0.5980377574685859, "grad_norm": 0.22421881556510925, "learning_rate": 1.1014150299267801e-05, "loss": 0.0561, "step": 27140 }, { "epoch": 0.5980597927581021, "grad_norm": 0.7840405702590942, "learning_rate": 1.101311826065354e-05, "loss": 0.1187, "step": 27141 }, { "epoch": 0.5980818280476182, "grad_norm": 0.6336040496826172, "learning_rate": 1.1012086242346607e-05, "loss": 0.0771, "step": 27142 }, { "epoch": 0.5981038633371344, "grad_norm": 0.47032076120376587, "learning_rate": 1.101105424435226e-05, "loss": 0.0367, "step": 27143 }, { "epoch": 0.5981258986266506, "grad_norm": 0.47514110803604126, "learning_rate": 1.101002226667575e-05, "loss": 0.0624, "step": 27144 }, { "epoch": 0.5981479339161667, "grad_norm": 0.564605176448822, "learning_rate": 1.1008990309322333e-05, "loss": 0.0734, "step": 27145 }, { "epoch": 0.5981699692056829, "grad_norm": 0.3952552378177643, "learning_rate": 1.1007958372297273e-05, "loss": 0.0543, "step": 27146 }, { "epoch": 0.598192004495199, "grad_norm": 0.44161802530288696, "learning_rate": 1.1006926455605823e-05, "loss": 0.0635, "step": 27147 }, { "epoch": 0.5982140397847152, "grad_norm": 0.7188526391983032, "learning_rate": 1.1005894559253235e-05, "loss": 0.0734, "step": 27148 }, { "epoch": 0.5982360750742314, "grad_norm": 0.5803048014640808, "learning_rate": 1.1004862683244768e-05, "loss": 0.0449, "step": 27149 }, { "epoch": 0.5982581103637475, "grad_norm": 0.5942521691322327, "learning_rate": 1.1003830827585677e-05, "loss": 0.0609, "step": 27150 }, { "epoch": 0.5982801456532637, "grad_norm": 0.7692370414733887, "learning_rate": 1.100279899228122e-05, "loss": 0.0714, "step": 27151 }, { "epoch": 0.5983021809427799, "grad_norm": 0.8182560801506042, "learning_rate": 1.1001767177336653e-05, "loss": 0.0833, "step": 27152 }, { "epoch": 0.598324216232296, "grad_norm": 0.4815506041049957, "learning_rate": 1.1000735382757223e-05, "loss": 0.0518, "step": 27153 }, { "epoch": 0.5983462515218122, "grad_norm": 1.5079203844070435, "learning_rate": 1.0999703608548196e-05, "loss": 0.0735, "step": 27154 }, { "epoch": 0.5983682868113284, "grad_norm": 0.8237526416778564, "learning_rate": 1.0998671854714827e-05, "loss": 0.0763, "step": 27155 }, { "epoch": 0.5983903221008445, "grad_norm": 0.3716579079627991, "learning_rate": 1.0997640121262362e-05, "loss": 0.0426, "step": 27156 }, { "epoch": 0.5984123573903607, "grad_norm": 0.6905953288078308, "learning_rate": 1.0996608408196068e-05, "loss": 0.0673, "step": 27157 }, { "epoch": 0.5984343926798769, "grad_norm": 0.8915797472000122, "learning_rate": 1.0995576715521192e-05, "loss": 0.0558, "step": 27158 }, { "epoch": 0.598456427969393, "grad_norm": 0.4414023160934448, "learning_rate": 1.0994545043242993e-05, "loss": 0.0376, "step": 27159 }, { "epoch": 0.5984784632589092, "grad_norm": 0.755345344543457, "learning_rate": 1.0993513391366716e-05, "loss": 0.0713, "step": 27160 }, { "epoch": 0.5985004985484254, "grad_norm": 0.5945268869400024, "learning_rate": 1.099248175989763e-05, "loss": 0.0575, "step": 27161 }, { "epoch": 0.5985225338379415, "grad_norm": 0.686665415763855, "learning_rate": 1.0991450148840985e-05, "loss": 0.0738, "step": 27162 }, { "epoch": 0.5985445691274576, "grad_norm": 0.6030898690223694, "learning_rate": 1.0990418558202034e-05, "loss": 0.0695, "step": 27163 }, { "epoch": 0.5985666044169737, "grad_norm": 0.7269827127456665, "learning_rate": 1.0989386987986025e-05, "loss": 0.0649, "step": 27164 }, { "epoch": 0.5985886397064899, "grad_norm": 0.44011446833610535, "learning_rate": 1.0988355438198224e-05, "loss": 0.0478, "step": 27165 }, { "epoch": 0.5986106749960061, "grad_norm": 0.7083548903465271, "learning_rate": 1.0987323908843881e-05, "loss": 0.0558, "step": 27166 }, { "epoch": 0.5986327102855222, "grad_norm": 0.3760021924972534, "learning_rate": 1.098629239992825e-05, "loss": 0.0748, "step": 27167 }, { "epoch": 0.5986547455750384, "grad_norm": 0.4955733120441437, "learning_rate": 1.0985260911456574e-05, "loss": 0.0817, "step": 27168 }, { "epoch": 0.5986767808645546, "grad_norm": 0.4767915904521942, "learning_rate": 1.0984229443434125e-05, "loss": 0.0394, "step": 27169 }, { "epoch": 0.5986988161540707, "grad_norm": 0.6000245809555054, "learning_rate": 1.0983197995866149e-05, "loss": 0.0703, "step": 27170 }, { "epoch": 0.5987208514435869, "grad_norm": 0.704673707485199, "learning_rate": 1.09821665687579e-05, "loss": 0.1135, "step": 27171 }, { "epoch": 0.598742886733103, "grad_norm": 0.5522792339324951, "learning_rate": 1.0981135162114628e-05, "loss": 0.0541, "step": 27172 }, { "epoch": 0.5987649220226192, "grad_norm": 0.422922283411026, "learning_rate": 1.0980103775941594e-05, "loss": 0.0827, "step": 27173 }, { "epoch": 0.5987869573121354, "grad_norm": 0.7964358925819397, "learning_rate": 1.0979072410244045e-05, "loss": 0.0747, "step": 27174 }, { "epoch": 0.5988089926016515, "grad_norm": 0.6602451205253601, "learning_rate": 1.0978041065027237e-05, "loss": 0.0992, "step": 27175 }, { "epoch": 0.5988310278911677, "grad_norm": 0.597179114818573, "learning_rate": 1.0977009740296416e-05, "loss": 0.0818, "step": 27176 }, { "epoch": 0.5988530631806839, "grad_norm": 0.5734258890151978, "learning_rate": 1.097597843605685e-05, "loss": 0.0575, "step": 27177 }, { "epoch": 0.5988750984702, "grad_norm": 0.4556698501110077, "learning_rate": 1.0974947152313782e-05, "loss": 0.0713, "step": 27178 }, { "epoch": 0.5988971337597162, "grad_norm": 0.4635523557662964, "learning_rate": 1.097391588907246e-05, "loss": 0.0447, "step": 27179 }, { "epoch": 0.5989191690492324, "grad_norm": 0.6394344568252563, "learning_rate": 1.0972884646338152e-05, "loss": 0.0653, "step": 27180 }, { "epoch": 0.5989412043387485, "grad_norm": 0.7283861637115479, "learning_rate": 1.09718534241161e-05, "loss": 0.0604, "step": 27181 }, { "epoch": 0.5989632396282647, "grad_norm": 0.5123888254165649, "learning_rate": 1.097082222241156e-05, "loss": 0.0656, "step": 27182 }, { "epoch": 0.5989852749177809, "grad_norm": 0.621965765953064, "learning_rate": 1.0969791041229776e-05, "loss": 0.0877, "step": 27183 }, { "epoch": 0.599007310207297, "grad_norm": 0.6381653547286987, "learning_rate": 1.0968759880576013e-05, "loss": 0.0782, "step": 27184 }, { "epoch": 0.5990293454968132, "grad_norm": 0.5670404434204102, "learning_rate": 1.0967728740455518e-05, "loss": 0.0595, "step": 27185 }, { "epoch": 0.5990513807863294, "grad_norm": 0.624875545501709, "learning_rate": 1.0966697620873542e-05, "loss": 0.0679, "step": 27186 }, { "epoch": 0.5990734160758455, "grad_norm": 0.9275605082511902, "learning_rate": 1.0965666521835333e-05, "loss": 0.0999, "step": 27187 }, { "epoch": 0.5990954513653616, "grad_norm": 0.5953001976013184, "learning_rate": 1.0964635443346153e-05, "loss": 0.0523, "step": 27188 }, { "epoch": 0.5991174866548777, "grad_norm": 0.6438851356506348, "learning_rate": 1.0963604385411249e-05, "loss": 0.0715, "step": 27189 }, { "epoch": 0.5991395219443939, "grad_norm": 0.6097308993339539, "learning_rate": 1.0962573348035874e-05, "loss": 0.1292, "step": 27190 }, { "epoch": 0.5991615572339101, "grad_norm": 0.8620370626449585, "learning_rate": 1.0961542331225271e-05, "loss": 0.0861, "step": 27191 }, { "epoch": 0.5991835925234262, "grad_norm": 0.3598199486732483, "learning_rate": 1.0960511334984703e-05, "loss": 0.0702, "step": 27192 }, { "epoch": 0.5992056278129424, "grad_norm": 0.4980185329914093, "learning_rate": 1.0959480359319417e-05, "loss": 0.0768, "step": 27193 }, { "epoch": 0.5992276631024586, "grad_norm": 0.7329964637756348, "learning_rate": 1.0958449404234665e-05, "loss": 0.0659, "step": 27194 }, { "epoch": 0.5992496983919747, "grad_norm": 0.6413867473602295, "learning_rate": 1.095741846973569e-05, "loss": 0.061, "step": 27195 }, { "epoch": 0.5992717336814909, "grad_norm": 0.9948105216026306, "learning_rate": 1.095638755582776e-05, "loss": 0.0801, "step": 27196 }, { "epoch": 0.5992937689710071, "grad_norm": 0.5043652057647705, "learning_rate": 1.0955356662516113e-05, "loss": 0.0601, "step": 27197 }, { "epoch": 0.5993158042605232, "grad_norm": 0.4543437957763672, "learning_rate": 1.0954325789806002e-05, "loss": 0.062, "step": 27198 }, { "epoch": 0.5993378395500394, "grad_norm": 0.6354446411132812, "learning_rate": 1.0953294937702682e-05, "loss": 0.0876, "step": 27199 }, { "epoch": 0.5993598748395556, "grad_norm": 0.889039158821106, "learning_rate": 1.0952264106211401e-05, "loss": 0.0819, "step": 27200 }, { "epoch": 0.5993819101290717, "grad_norm": 0.6314791440963745, "learning_rate": 1.0951233295337408e-05, "loss": 0.0702, "step": 27201 }, { "epoch": 0.5994039454185879, "grad_norm": 0.6365063786506653, "learning_rate": 1.095020250508595e-05, "loss": 0.0472, "step": 27202 }, { "epoch": 0.599425980708104, "grad_norm": 0.4066975712776184, "learning_rate": 1.094917173546229e-05, "loss": 0.0678, "step": 27203 }, { "epoch": 0.5994480159976202, "grad_norm": 0.37350431084632874, "learning_rate": 1.0948140986471667e-05, "loss": 0.0588, "step": 27204 }, { "epoch": 0.5994700512871364, "grad_norm": 0.6051561236381531, "learning_rate": 1.0947110258119338e-05, "loss": 0.0683, "step": 27205 }, { "epoch": 0.5994920865766525, "grad_norm": 0.48607856035232544, "learning_rate": 1.0946079550410547e-05, "loss": 0.0651, "step": 27206 }, { "epoch": 0.5995141218661687, "grad_norm": 0.5286346673965454, "learning_rate": 1.0945048863350546e-05, "loss": 0.043, "step": 27207 }, { "epoch": 0.5995361571556849, "grad_norm": 1.0150566101074219, "learning_rate": 1.094401819694459e-05, "loss": 0.0691, "step": 27208 }, { "epoch": 0.599558192445201, "grad_norm": 0.46227988600730896, "learning_rate": 1.0942987551197921e-05, "loss": 0.0798, "step": 27209 }, { "epoch": 0.5995802277347172, "grad_norm": 0.40307489037513733, "learning_rate": 1.0941956926115788e-05, "loss": 0.076, "step": 27210 }, { "epoch": 0.5996022630242334, "grad_norm": 0.5056431889533997, "learning_rate": 1.0940926321703449e-05, "loss": 0.0464, "step": 27211 }, { "epoch": 0.5996242983137494, "grad_norm": 0.7687661051750183, "learning_rate": 1.0939895737966149e-05, "loss": 0.0452, "step": 27212 }, { "epoch": 0.5996463336032656, "grad_norm": 0.38681161403656006, "learning_rate": 1.093886517490914e-05, "loss": 0.0795, "step": 27213 }, { "epoch": 0.5996683688927817, "grad_norm": 0.7616680264472961, "learning_rate": 1.0937834632537661e-05, "loss": 0.082, "step": 27214 }, { "epoch": 0.5996904041822979, "grad_norm": 0.5146868824958801, "learning_rate": 1.0936804110856974e-05, "loss": 0.0474, "step": 27215 }, { "epoch": 0.5997124394718141, "grad_norm": 1.052801489830017, "learning_rate": 1.0935773609872324e-05, "loss": 0.0902, "step": 27216 }, { "epoch": 0.5997344747613302, "grad_norm": 0.7064259648323059, "learning_rate": 1.0934743129588956e-05, "loss": 0.0613, "step": 27217 }, { "epoch": 0.5997565100508464, "grad_norm": 0.5883163213729858, "learning_rate": 1.0933712670012116e-05, "loss": 0.0997, "step": 27218 }, { "epoch": 0.5997785453403626, "grad_norm": 0.5111056566238403, "learning_rate": 1.0932682231147062e-05, "loss": 0.0585, "step": 27219 }, { "epoch": 0.5998005806298787, "grad_norm": 0.6010251641273499, "learning_rate": 1.093165181299904e-05, "loss": 0.1028, "step": 27220 }, { "epoch": 0.5998226159193949, "grad_norm": 0.6151050329208374, "learning_rate": 1.0930621415573294e-05, "loss": 0.0721, "step": 27221 }, { "epoch": 0.5998446512089111, "grad_norm": 0.6239537596702576, "learning_rate": 1.092959103887508e-05, "loss": 0.0669, "step": 27222 }, { "epoch": 0.5998666864984272, "grad_norm": 0.4937116801738739, "learning_rate": 1.0928560682909639e-05, "loss": 0.0536, "step": 27223 }, { "epoch": 0.5998887217879434, "grad_norm": 0.37943214178085327, "learning_rate": 1.0927530347682222e-05, "loss": 0.0499, "step": 27224 }, { "epoch": 0.5999107570774596, "grad_norm": 1.1820075511932373, "learning_rate": 1.0926500033198073e-05, "loss": 0.1055, "step": 27225 }, { "epoch": 0.5999327923669757, "grad_norm": 0.5176035165786743, "learning_rate": 1.0925469739462447e-05, "loss": 0.0549, "step": 27226 }, { "epoch": 0.5999548276564919, "grad_norm": 0.6971333026885986, "learning_rate": 1.092443946648059e-05, "loss": 0.0835, "step": 27227 }, { "epoch": 0.599976862946008, "grad_norm": 0.4748058617115021, "learning_rate": 1.0923409214257749e-05, "loss": 0.0641, "step": 27228 }, { "epoch": 0.5999988982355242, "grad_norm": 0.6440466046333313, "learning_rate": 1.0922378982799166e-05, "loss": 0.0628, "step": 27229 }, { "epoch": 0.6000209335250404, "grad_norm": 0.5762989521026611, "learning_rate": 1.09213487721101e-05, "loss": 0.0625, "step": 27230 }, { "epoch": 0.6000429688145565, "grad_norm": 0.5030083656311035, "learning_rate": 1.0920318582195788e-05, "loss": 0.0601, "step": 27231 }, { "epoch": 0.6000650041040727, "grad_norm": 0.7001329064369202, "learning_rate": 1.0919288413061481e-05, "loss": 0.0852, "step": 27232 }, { "epoch": 0.6000870393935889, "grad_norm": 1.0684751272201538, "learning_rate": 1.0918258264712423e-05, "loss": 0.062, "step": 27233 }, { "epoch": 0.600109074683105, "grad_norm": 0.28390154242515564, "learning_rate": 1.0917228137153868e-05, "loss": 0.055, "step": 27234 }, { "epoch": 0.6001311099726212, "grad_norm": 0.4698553681373596, "learning_rate": 1.0916198030391063e-05, "loss": 0.0479, "step": 27235 }, { "epoch": 0.6001531452621374, "grad_norm": 1.0107959508895874, "learning_rate": 1.0915167944429247e-05, "loss": 0.05, "step": 27236 }, { "epoch": 0.6001751805516534, "grad_norm": 0.5599795579910278, "learning_rate": 1.0914137879273671e-05, "loss": 0.0723, "step": 27237 }, { "epoch": 0.6001972158411696, "grad_norm": 0.366132915019989, "learning_rate": 1.0913107834929583e-05, "loss": 0.0533, "step": 27238 }, { "epoch": 0.6002192511306857, "grad_norm": 0.4363039433956146, "learning_rate": 1.0912077811402229e-05, "loss": 0.0824, "step": 27239 }, { "epoch": 0.6002412864202019, "grad_norm": 0.6499354243278503, "learning_rate": 1.0911047808696856e-05, "loss": 0.1014, "step": 27240 }, { "epoch": 0.6002633217097181, "grad_norm": 0.8181132078170776, "learning_rate": 1.0910017826818703e-05, "loss": 0.058, "step": 27241 }, { "epoch": 0.6002853569992342, "grad_norm": 0.7288647294044495, "learning_rate": 1.0908987865773027e-05, "loss": 0.0845, "step": 27242 }, { "epoch": 0.6003073922887504, "grad_norm": 0.3683481812477112, "learning_rate": 1.090795792556507e-05, "loss": 0.0616, "step": 27243 }, { "epoch": 0.6003294275782666, "grad_norm": 0.4871029853820801, "learning_rate": 1.0906928006200075e-05, "loss": 0.0595, "step": 27244 }, { "epoch": 0.6003514628677827, "grad_norm": 0.3486674726009369, "learning_rate": 1.0905898107683295e-05, "loss": 0.0478, "step": 27245 }, { "epoch": 0.6003734981572989, "grad_norm": 0.8232396245002747, "learning_rate": 1.0904868230019968e-05, "loss": 0.0418, "step": 27246 }, { "epoch": 0.6003955334468151, "grad_norm": 0.8518216609954834, "learning_rate": 1.0903838373215345e-05, "loss": 0.0687, "step": 27247 }, { "epoch": 0.6004175687363312, "grad_norm": 0.520054280757904, "learning_rate": 1.0902808537274664e-05, "loss": 0.0845, "step": 27248 }, { "epoch": 0.6004396040258474, "grad_norm": 1.3321841955184937, "learning_rate": 1.0901778722203182e-05, "loss": 0.1022, "step": 27249 }, { "epoch": 0.6004616393153636, "grad_norm": 0.4073760211467743, "learning_rate": 1.0900748928006138e-05, "loss": 0.0551, "step": 27250 }, { "epoch": 0.6004836746048797, "grad_norm": 0.6210878491401672, "learning_rate": 1.0899719154688779e-05, "loss": 0.0663, "step": 27251 }, { "epoch": 0.6005057098943959, "grad_norm": 0.3961679935455322, "learning_rate": 1.0898689402256344e-05, "loss": 0.0666, "step": 27252 }, { "epoch": 0.600527745183912, "grad_norm": 0.4481985867023468, "learning_rate": 1.089765967071409e-05, "loss": 0.0836, "step": 27253 }, { "epoch": 0.6005497804734282, "grad_norm": 0.6145420074462891, "learning_rate": 1.089662996006725e-05, "loss": 0.0757, "step": 27254 }, { "epoch": 0.6005718157629444, "grad_norm": 0.598138153553009, "learning_rate": 1.089560027032108e-05, "loss": 0.0945, "step": 27255 }, { "epoch": 0.6005938510524605, "grad_norm": 0.3211875855922699, "learning_rate": 1.0894570601480808e-05, "loss": 0.0736, "step": 27256 }, { "epoch": 0.6006158863419767, "grad_norm": 0.8691227436065674, "learning_rate": 1.0893540953551697e-05, "loss": 0.0869, "step": 27257 }, { "epoch": 0.6006379216314929, "grad_norm": 0.6284120082855225, "learning_rate": 1.0892511326538983e-05, "loss": 0.0753, "step": 27258 }, { "epoch": 0.600659956921009, "grad_norm": 1.0308507680892944, "learning_rate": 1.0891481720447912e-05, "loss": 0.0973, "step": 27259 }, { "epoch": 0.6006819922105252, "grad_norm": 0.49432122707366943, "learning_rate": 1.0890452135283725e-05, "loss": 0.0787, "step": 27260 }, { "epoch": 0.6007040275000414, "grad_norm": 0.49257534742355347, "learning_rate": 1.0889422571051674e-05, "loss": 0.056, "step": 27261 }, { "epoch": 0.6007260627895574, "grad_norm": 0.6719111204147339, "learning_rate": 1.0888393027756997e-05, "loss": 0.0789, "step": 27262 }, { "epoch": 0.6007480980790736, "grad_norm": 0.6254735589027405, "learning_rate": 1.0887363505404936e-05, "loss": 0.0858, "step": 27263 }, { "epoch": 0.6007701333685898, "grad_norm": 1.1212925910949707, "learning_rate": 1.0886334004000734e-05, "loss": 0.0895, "step": 27264 }, { "epoch": 0.6007921686581059, "grad_norm": 0.48899829387664795, "learning_rate": 1.0885304523549647e-05, "loss": 0.0588, "step": 27265 }, { "epoch": 0.6008142039476221, "grad_norm": 0.3087001442909241, "learning_rate": 1.0884275064056909e-05, "loss": 0.049, "step": 27266 }, { "epoch": 0.6008362392371382, "grad_norm": 0.5182567834854126, "learning_rate": 1.0883245625527761e-05, "loss": 0.0675, "step": 27267 }, { "epoch": 0.6008582745266544, "grad_norm": 0.6667524576187134, "learning_rate": 1.0882216207967454e-05, "loss": 0.0866, "step": 27268 }, { "epoch": 0.6008803098161706, "grad_norm": 0.40065938234329224, "learning_rate": 1.0881186811381229e-05, "loss": 0.075, "step": 27269 }, { "epoch": 0.6009023451056867, "grad_norm": 0.5037561655044556, "learning_rate": 1.0880157435774328e-05, "loss": 0.0991, "step": 27270 }, { "epoch": 0.6009243803952029, "grad_norm": 0.5638055801391602, "learning_rate": 1.087912808115199e-05, "loss": 0.0509, "step": 27271 }, { "epoch": 0.6009464156847191, "grad_norm": 0.8239140510559082, "learning_rate": 1.0878098747519464e-05, "loss": 0.1134, "step": 27272 }, { "epoch": 0.6009684509742352, "grad_norm": 0.8178867101669312, "learning_rate": 1.0877069434881995e-05, "loss": 0.0907, "step": 27273 }, { "epoch": 0.6009904862637514, "grad_norm": 0.6260616779327393, "learning_rate": 1.0876040143244821e-05, "loss": 0.0743, "step": 27274 }, { "epoch": 0.6010125215532676, "grad_norm": 0.5431815385818481, "learning_rate": 1.0875010872613184e-05, "loss": 0.0765, "step": 27275 }, { "epoch": 0.6010345568427837, "grad_norm": 0.5525479316711426, "learning_rate": 1.0873981622992329e-05, "loss": 0.0547, "step": 27276 }, { "epoch": 0.6010565921322999, "grad_norm": 1.0118299722671509, "learning_rate": 1.0872952394387501e-05, "loss": 0.071, "step": 27277 }, { "epoch": 0.6010786274218161, "grad_norm": 0.3548218607902527, "learning_rate": 1.0871923186803938e-05, "loss": 0.065, "step": 27278 }, { "epoch": 0.6011006627113322, "grad_norm": 0.858841061592102, "learning_rate": 1.0870894000246881e-05, "loss": 0.0889, "step": 27279 }, { "epoch": 0.6011226980008484, "grad_norm": 0.742224931716919, "learning_rate": 1.0869864834721576e-05, "loss": 0.0753, "step": 27280 }, { "epoch": 0.6011447332903646, "grad_norm": 0.3057496249675751, "learning_rate": 1.0868835690233267e-05, "loss": 0.0538, "step": 27281 }, { "epoch": 0.6011667685798807, "grad_norm": 0.8126896023750305, "learning_rate": 1.0867806566787193e-05, "loss": 0.0711, "step": 27282 }, { "epoch": 0.6011888038693969, "grad_norm": 1.0074383020401, "learning_rate": 1.0866777464388594e-05, "loss": 0.0816, "step": 27283 }, { "epoch": 0.601210839158913, "grad_norm": 0.7690545320510864, "learning_rate": 1.0865748383042715e-05, "loss": 0.0806, "step": 27284 }, { "epoch": 0.6012328744484292, "grad_norm": 0.898185133934021, "learning_rate": 1.0864719322754798e-05, "loss": 0.1016, "step": 27285 }, { "epoch": 0.6012549097379453, "grad_norm": 0.6147220730781555, "learning_rate": 1.0863690283530083e-05, "loss": 0.0705, "step": 27286 }, { "epoch": 0.6012769450274614, "grad_norm": 0.6681876182556152, "learning_rate": 1.0862661265373803e-05, "loss": 0.0463, "step": 27287 }, { "epoch": 0.6012989803169776, "grad_norm": 0.72022944688797, "learning_rate": 1.0861632268291215e-05, "loss": 0.0727, "step": 27288 }, { "epoch": 0.6013210156064938, "grad_norm": 0.6074761748313904, "learning_rate": 1.0860603292287554e-05, "loss": 0.072, "step": 27289 }, { "epoch": 0.6013430508960099, "grad_norm": 0.5461824536323547, "learning_rate": 1.0859574337368052e-05, "loss": 0.1086, "step": 27290 }, { "epoch": 0.6013650861855261, "grad_norm": 1.0304335355758667, "learning_rate": 1.0858545403537966e-05, "loss": 0.1097, "step": 27291 }, { "epoch": 0.6013871214750423, "grad_norm": 0.60595703125, "learning_rate": 1.0857516490802532e-05, "loss": 0.0649, "step": 27292 }, { "epoch": 0.6014091567645584, "grad_norm": 0.4877259433269501, "learning_rate": 1.0856487599166985e-05, "loss": 0.1101, "step": 27293 }, { "epoch": 0.6014311920540746, "grad_norm": 0.5946604609489441, "learning_rate": 1.0855458728636562e-05, "loss": 0.0582, "step": 27294 }, { "epoch": 0.6014532273435907, "grad_norm": 1.2230781316757202, "learning_rate": 1.0854429879216516e-05, "loss": 0.0956, "step": 27295 }, { "epoch": 0.6014752626331069, "grad_norm": 0.7984817028045654, "learning_rate": 1.0853401050912082e-05, "loss": 0.0801, "step": 27296 }, { "epoch": 0.6014972979226231, "grad_norm": 0.5687813758850098, "learning_rate": 1.08523722437285e-05, "loss": 0.0882, "step": 27297 }, { "epoch": 0.6015193332121392, "grad_norm": 0.6467859745025635, "learning_rate": 1.0851343457671005e-05, "loss": 0.0528, "step": 27298 }, { "epoch": 0.6015413685016554, "grad_norm": 0.5932033658027649, "learning_rate": 1.0850314692744847e-05, "loss": 0.0984, "step": 27299 }, { "epoch": 0.6015634037911716, "grad_norm": 0.5709741115570068, "learning_rate": 1.0849285948955262e-05, "loss": 0.07, "step": 27300 }, { "epoch": 0.6015854390806877, "grad_norm": 0.5186732411384583, "learning_rate": 1.0848257226307491e-05, "loss": 0.0657, "step": 27301 }, { "epoch": 0.6016074743702039, "grad_norm": 0.56736820936203, "learning_rate": 1.0847228524806767e-05, "loss": 0.0777, "step": 27302 }, { "epoch": 0.6016295096597201, "grad_norm": 0.5370240211486816, "learning_rate": 1.084619984445834e-05, "loss": 0.0476, "step": 27303 }, { "epoch": 0.6016515449492362, "grad_norm": 0.3477632999420166, "learning_rate": 1.0845171185267442e-05, "loss": 0.0644, "step": 27304 }, { "epoch": 0.6016735802387524, "grad_norm": 0.5716574788093567, "learning_rate": 1.0844142547239317e-05, "loss": 0.049, "step": 27305 }, { "epoch": 0.6016956155282686, "grad_norm": 0.5020527243614197, "learning_rate": 1.0843113930379197e-05, "loss": 0.0665, "step": 27306 }, { "epoch": 0.6017176508177847, "grad_norm": 0.7817461490631104, "learning_rate": 1.0842085334692335e-05, "loss": 0.0927, "step": 27307 }, { "epoch": 0.6017396861073009, "grad_norm": 0.5744667649269104, "learning_rate": 1.0841056760183959e-05, "loss": 0.087, "step": 27308 }, { "epoch": 0.601761721396817, "grad_norm": 0.5980382561683655, "learning_rate": 1.0840028206859311e-05, "loss": 0.067, "step": 27309 }, { "epoch": 0.6017837566863332, "grad_norm": 0.6423824429512024, "learning_rate": 1.0838999674723626e-05, "loss": 0.0615, "step": 27310 }, { "epoch": 0.6018057919758493, "grad_norm": 0.3876435458660126, "learning_rate": 1.0837971163782153e-05, "loss": 0.0802, "step": 27311 }, { "epoch": 0.6018278272653654, "grad_norm": 0.9599112272262573, "learning_rate": 1.0836942674040123e-05, "loss": 0.1054, "step": 27312 }, { "epoch": 0.6018498625548816, "grad_norm": 0.709718644618988, "learning_rate": 1.0835914205502769e-05, "loss": 0.0954, "step": 27313 }, { "epoch": 0.6018718978443978, "grad_norm": 0.7264212369918823, "learning_rate": 1.0834885758175346e-05, "loss": 0.0881, "step": 27314 }, { "epoch": 0.6018939331339139, "grad_norm": 0.6074438095092773, "learning_rate": 1.0833857332063082e-05, "loss": 0.0798, "step": 27315 }, { "epoch": 0.6019159684234301, "grad_norm": 0.8007116913795471, "learning_rate": 1.0832828927171216e-05, "loss": 0.0636, "step": 27316 }, { "epoch": 0.6019380037129463, "grad_norm": 0.7912848591804504, "learning_rate": 1.0831800543504984e-05, "loss": 0.0681, "step": 27317 }, { "epoch": 0.6019600390024624, "grad_norm": 0.44714125990867615, "learning_rate": 1.0830772181069632e-05, "loss": 0.0376, "step": 27318 }, { "epoch": 0.6019820742919786, "grad_norm": 0.39349597692489624, "learning_rate": 1.0829743839870391e-05, "loss": 0.0441, "step": 27319 }, { "epoch": 0.6020041095814948, "grad_norm": 0.7950681447982788, "learning_rate": 1.08287155199125e-05, "loss": 0.0914, "step": 27320 }, { "epoch": 0.6020261448710109, "grad_norm": 0.40117764472961426, "learning_rate": 1.0827687221201193e-05, "loss": 0.0639, "step": 27321 }, { "epoch": 0.6020481801605271, "grad_norm": 0.4958101212978363, "learning_rate": 1.0826658943741719e-05, "loss": 0.0725, "step": 27322 }, { "epoch": 0.6020702154500432, "grad_norm": 0.4500519335269928, "learning_rate": 1.0825630687539307e-05, "loss": 0.0714, "step": 27323 }, { "epoch": 0.6020922507395594, "grad_norm": 0.7139012813568115, "learning_rate": 1.0824602452599198e-05, "loss": 0.0877, "step": 27324 }, { "epoch": 0.6021142860290756, "grad_norm": 0.5635547637939453, "learning_rate": 1.0823574238926625e-05, "loss": 0.0706, "step": 27325 }, { "epoch": 0.6021363213185917, "grad_norm": 0.7503998875617981, "learning_rate": 1.0822546046526831e-05, "loss": 0.095, "step": 27326 }, { "epoch": 0.6021583566081079, "grad_norm": 0.5960952639579773, "learning_rate": 1.0821517875405049e-05, "loss": 0.0661, "step": 27327 }, { "epoch": 0.6021803918976241, "grad_norm": 0.6356269717216492, "learning_rate": 1.0820489725566518e-05, "loss": 0.092, "step": 27328 }, { "epoch": 0.6022024271871402, "grad_norm": 0.5295252799987793, "learning_rate": 1.0819461597016469e-05, "loss": 0.0571, "step": 27329 }, { "epoch": 0.6022244624766564, "grad_norm": 0.6649286150932312, "learning_rate": 1.0818433489760145e-05, "loss": 0.0726, "step": 27330 }, { "epoch": 0.6022464977661726, "grad_norm": 1.085579514503479, "learning_rate": 1.0817405403802786e-05, "loss": 0.0855, "step": 27331 }, { "epoch": 0.6022685330556887, "grad_norm": 0.8040066957473755, "learning_rate": 1.0816377339149621e-05, "loss": 0.0782, "step": 27332 }, { "epoch": 0.6022905683452049, "grad_norm": 0.41805222630500793, "learning_rate": 1.0815349295805891e-05, "loss": 0.0963, "step": 27333 }, { "epoch": 0.602312603634721, "grad_norm": 0.4651358127593994, "learning_rate": 1.0814321273776834e-05, "loss": 0.0778, "step": 27334 }, { "epoch": 0.6023346389242372, "grad_norm": 0.6418988704681396, "learning_rate": 1.0813293273067679e-05, "loss": 0.0581, "step": 27335 }, { "epoch": 0.6023566742137533, "grad_norm": 0.5023190379142761, "learning_rate": 1.0812265293683662e-05, "loss": 0.0782, "step": 27336 }, { "epoch": 0.6023787095032694, "grad_norm": 0.5552853345870972, "learning_rate": 1.0811237335630031e-05, "loss": 0.0902, "step": 27337 }, { "epoch": 0.6024007447927856, "grad_norm": 0.3546786904335022, "learning_rate": 1.0810209398912015e-05, "loss": 0.0272, "step": 27338 }, { "epoch": 0.6024227800823018, "grad_norm": 0.7332711815834045, "learning_rate": 1.0809181483534848e-05, "loss": 0.0591, "step": 27339 }, { "epoch": 0.6024448153718179, "grad_norm": 0.9312301874160767, "learning_rate": 1.0808153589503765e-05, "loss": 0.0801, "step": 27340 }, { "epoch": 0.6024668506613341, "grad_norm": 0.7462382316589355, "learning_rate": 1.0807125716824004e-05, "loss": 0.0605, "step": 27341 }, { "epoch": 0.6024888859508503, "grad_norm": 0.8256205916404724, "learning_rate": 1.0806097865500803e-05, "loss": 0.0803, "step": 27342 }, { "epoch": 0.6025109212403664, "grad_norm": 0.5573312044143677, "learning_rate": 1.080507003553939e-05, "loss": 0.0616, "step": 27343 }, { "epoch": 0.6025329565298826, "grad_norm": 0.7409415245056152, "learning_rate": 1.0804042226945004e-05, "loss": 0.0783, "step": 27344 }, { "epoch": 0.6025549918193988, "grad_norm": 1.579885482788086, "learning_rate": 1.0803014439722883e-05, "loss": 0.0542, "step": 27345 }, { "epoch": 0.6025770271089149, "grad_norm": 0.6557390689849854, "learning_rate": 1.0801986673878263e-05, "loss": 0.0743, "step": 27346 }, { "epoch": 0.6025990623984311, "grad_norm": 0.7874905467033386, "learning_rate": 1.0800958929416375e-05, "loss": 0.0693, "step": 27347 }, { "epoch": 0.6026210976879472, "grad_norm": 0.42426788806915283, "learning_rate": 1.0799931206342453e-05, "loss": 0.0507, "step": 27348 }, { "epoch": 0.6026431329774634, "grad_norm": 0.41859737038612366, "learning_rate": 1.0798903504661734e-05, "loss": 0.0582, "step": 27349 }, { "epoch": 0.6026651682669796, "grad_norm": 0.3231491446495056, "learning_rate": 1.0797875824379455e-05, "loss": 0.0363, "step": 27350 }, { "epoch": 0.6026872035564957, "grad_norm": 0.46685126423835754, "learning_rate": 1.0796848165500847e-05, "loss": 0.0879, "step": 27351 }, { "epoch": 0.6027092388460119, "grad_norm": 0.4776749014854431, "learning_rate": 1.0795820528031138e-05, "loss": 0.0481, "step": 27352 }, { "epoch": 0.6027312741355281, "grad_norm": 0.6880795359611511, "learning_rate": 1.0794792911975575e-05, "loss": 0.0715, "step": 27353 }, { "epoch": 0.6027533094250442, "grad_norm": 0.3093605637550354, "learning_rate": 1.0793765317339388e-05, "loss": 0.0398, "step": 27354 }, { "epoch": 0.6027753447145604, "grad_norm": 0.7060909271240234, "learning_rate": 1.079273774412781e-05, "loss": 0.0806, "step": 27355 }, { "epoch": 0.6027973800040766, "grad_norm": 0.664942741394043, "learning_rate": 1.0791710192346072e-05, "loss": 0.0987, "step": 27356 }, { "epoch": 0.6028194152935927, "grad_norm": 0.8711800575256348, "learning_rate": 1.0790682661999414e-05, "loss": 0.0828, "step": 27357 }, { "epoch": 0.6028414505831089, "grad_norm": 0.56276535987854, "learning_rate": 1.0789655153093067e-05, "loss": 0.0558, "step": 27358 }, { "epoch": 0.6028634858726251, "grad_norm": 0.4946288764476776, "learning_rate": 1.0788627665632255e-05, "loss": 0.0675, "step": 27359 }, { "epoch": 0.6028855211621411, "grad_norm": 0.5591802000999451, "learning_rate": 1.078760019962223e-05, "loss": 0.0694, "step": 27360 }, { "epoch": 0.6029075564516573, "grad_norm": 1.0968056917190552, "learning_rate": 1.0786572755068213e-05, "loss": 0.1127, "step": 27361 }, { "epoch": 0.6029295917411734, "grad_norm": 0.3925703763961792, "learning_rate": 1.0785545331975441e-05, "loss": 0.0598, "step": 27362 }, { "epoch": 0.6029516270306896, "grad_norm": 0.5988517999649048, "learning_rate": 1.0784517930349146e-05, "loss": 0.0418, "step": 27363 }, { "epoch": 0.6029736623202058, "grad_norm": 0.5160505175590515, "learning_rate": 1.0783490550194563e-05, "loss": 0.0418, "step": 27364 }, { "epoch": 0.6029956976097219, "grad_norm": 0.4857981204986572, "learning_rate": 1.0782463191516924e-05, "loss": 0.0727, "step": 27365 }, { "epoch": 0.6030177328992381, "grad_norm": 0.6041067242622375, "learning_rate": 1.078143585432146e-05, "loss": 0.0844, "step": 27366 }, { "epoch": 0.6030397681887543, "grad_norm": 0.7409590482711792, "learning_rate": 1.07804085386134e-05, "loss": 0.0981, "step": 27367 }, { "epoch": 0.6030618034782704, "grad_norm": 0.8314995169639587, "learning_rate": 1.077938124439799e-05, "loss": 0.0964, "step": 27368 }, { "epoch": 0.6030838387677866, "grad_norm": 0.5407332181930542, "learning_rate": 1.0778353971680452e-05, "loss": 0.0548, "step": 27369 }, { "epoch": 0.6031058740573028, "grad_norm": 0.7964593768119812, "learning_rate": 1.0777326720466024e-05, "loss": 0.0667, "step": 27370 }, { "epoch": 0.6031279093468189, "grad_norm": 0.6145603060722351, "learning_rate": 1.0776299490759932e-05, "loss": 0.0706, "step": 27371 }, { "epoch": 0.6031499446363351, "grad_norm": 0.44853246212005615, "learning_rate": 1.0775272282567411e-05, "loss": 0.0456, "step": 27372 }, { "epoch": 0.6031719799258513, "grad_norm": 0.6199923157691956, "learning_rate": 1.0774245095893697e-05, "loss": 0.0827, "step": 27373 }, { "epoch": 0.6031940152153674, "grad_norm": 0.6238784790039062, "learning_rate": 1.0773217930744018e-05, "loss": 0.084, "step": 27374 }, { "epoch": 0.6032160505048836, "grad_norm": 0.8862292170524597, "learning_rate": 1.0772190787123599e-05, "loss": 0.0762, "step": 27375 }, { "epoch": 0.6032380857943997, "grad_norm": 0.5688765048980713, "learning_rate": 1.0771163665037688e-05, "loss": 0.0518, "step": 27376 }, { "epoch": 0.6032601210839159, "grad_norm": 0.8897374272346497, "learning_rate": 1.0770136564491508e-05, "loss": 0.0655, "step": 27377 }, { "epoch": 0.6032821563734321, "grad_norm": 0.7177587747573853, "learning_rate": 1.0769109485490288e-05, "loss": 0.0604, "step": 27378 }, { "epoch": 0.6033041916629482, "grad_norm": 0.9579935073852539, "learning_rate": 1.0768082428039262e-05, "loss": 0.084, "step": 27379 }, { "epoch": 0.6033262269524644, "grad_norm": 0.9130014181137085, "learning_rate": 1.0767055392143665e-05, "loss": 0.0614, "step": 27380 }, { "epoch": 0.6033482622419806, "grad_norm": 0.7376378178596497, "learning_rate": 1.0766028377808722e-05, "loss": 0.0504, "step": 27381 }, { "epoch": 0.6033702975314967, "grad_norm": 0.5595086216926575, "learning_rate": 1.0765001385039664e-05, "loss": 0.0625, "step": 27382 }, { "epoch": 0.6033923328210129, "grad_norm": 0.6254955530166626, "learning_rate": 1.0763974413841729e-05, "loss": 0.0568, "step": 27383 }, { "epoch": 0.6034143681105291, "grad_norm": 0.4992423355579376, "learning_rate": 1.0762947464220144e-05, "loss": 0.0466, "step": 27384 }, { "epoch": 0.6034364034000451, "grad_norm": 0.580155074596405, "learning_rate": 1.076192053618014e-05, "loss": 0.0615, "step": 27385 }, { "epoch": 0.6034584386895613, "grad_norm": 0.5179644823074341, "learning_rate": 1.0760893629726943e-05, "loss": 0.0562, "step": 27386 }, { "epoch": 0.6034804739790774, "grad_norm": 0.8153824806213379, "learning_rate": 1.075986674486579e-05, "loss": 0.0832, "step": 27387 }, { "epoch": 0.6035025092685936, "grad_norm": 0.5482880473136902, "learning_rate": 1.0758839881601915e-05, "loss": 0.0787, "step": 27388 }, { "epoch": 0.6035245445581098, "grad_norm": 0.451727956533432, "learning_rate": 1.0757813039940537e-05, "loss": 0.0567, "step": 27389 }, { "epoch": 0.6035465798476259, "grad_norm": 0.4357605576515198, "learning_rate": 1.0756786219886889e-05, "loss": 0.0582, "step": 27390 }, { "epoch": 0.6035686151371421, "grad_norm": 0.5287720561027527, "learning_rate": 1.0755759421446209e-05, "loss": 0.0723, "step": 27391 }, { "epoch": 0.6035906504266583, "grad_norm": 0.6153155565261841, "learning_rate": 1.0754732644623723e-05, "loss": 0.0963, "step": 27392 }, { "epoch": 0.6036126857161744, "grad_norm": 0.5235676765441895, "learning_rate": 1.075370588942466e-05, "loss": 0.08, "step": 27393 }, { "epoch": 0.6036347210056906, "grad_norm": 0.858026921749115, "learning_rate": 1.0752679155854246e-05, "loss": 0.0843, "step": 27394 }, { "epoch": 0.6036567562952068, "grad_norm": 0.4925139248371124, "learning_rate": 1.0751652443917717e-05, "loss": 0.0808, "step": 27395 }, { "epoch": 0.6036787915847229, "grad_norm": 0.5270888209342957, "learning_rate": 1.0750625753620302e-05, "loss": 0.0709, "step": 27396 }, { "epoch": 0.6037008268742391, "grad_norm": 0.5597876310348511, "learning_rate": 1.074959908496723e-05, "loss": 0.0688, "step": 27397 }, { "epoch": 0.6037228621637553, "grad_norm": 0.34865105152130127, "learning_rate": 1.0748572437963719e-05, "loss": 0.0667, "step": 27398 }, { "epoch": 0.6037448974532714, "grad_norm": 0.48698359727859497, "learning_rate": 1.0747545812615017e-05, "loss": 0.0457, "step": 27399 }, { "epoch": 0.6037669327427876, "grad_norm": 0.35443902015686035, "learning_rate": 1.0746519208926345e-05, "loss": 0.0863, "step": 27400 }, { "epoch": 0.6037889680323038, "grad_norm": 0.5072529911994934, "learning_rate": 1.0745492626902925e-05, "loss": 0.0605, "step": 27401 }, { "epoch": 0.6038110033218199, "grad_norm": 0.60857093334198, "learning_rate": 1.0744466066549999e-05, "loss": 0.0447, "step": 27402 }, { "epoch": 0.6038330386113361, "grad_norm": 0.2571641802787781, "learning_rate": 1.074343952787279e-05, "loss": 0.0729, "step": 27403 }, { "epoch": 0.6038550739008522, "grad_norm": 0.5578774213790894, "learning_rate": 1.0742413010876524e-05, "loss": 0.0641, "step": 27404 }, { "epoch": 0.6038771091903684, "grad_norm": 0.8690457940101624, "learning_rate": 1.0741386515566428e-05, "loss": 0.088, "step": 27405 }, { "epoch": 0.6038991444798846, "grad_norm": 0.6948148608207703, "learning_rate": 1.0740360041947738e-05, "loss": 0.0834, "step": 27406 }, { "epoch": 0.6039211797694007, "grad_norm": 0.4352879822254181, "learning_rate": 1.073933359002568e-05, "loss": 0.0595, "step": 27407 }, { "epoch": 0.6039432150589169, "grad_norm": 0.5729131102561951, "learning_rate": 1.0738307159805479e-05, "loss": 0.0912, "step": 27408 }, { "epoch": 0.6039652503484331, "grad_norm": 0.5949945449829102, "learning_rate": 1.073728075129236e-05, "loss": 0.0586, "step": 27409 }, { "epoch": 0.6039872856379491, "grad_norm": 0.6974051594734192, "learning_rate": 1.073625436449156e-05, "loss": 0.0771, "step": 27410 }, { "epoch": 0.6040093209274653, "grad_norm": 0.5379385948181152, "learning_rate": 1.0735227999408307e-05, "loss": 0.0573, "step": 27411 }, { "epoch": 0.6040313562169815, "grad_norm": 0.6240959763526917, "learning_rate": 1.073420165604782e-05, "loss": 0.0653, "step": 27412 }, { "epoch": 0.6040533915064976, "grad_norm": 0.6375243067741394, "learning_rate": 1.0733175334415332e-05, "loss": 0.0822, "step": 27413 }, { "epoch": 0.6040754267960138, "grad_norm": 0.5920178890228271, "learning_rate": 1.0732149034516072e-05, "loss": 0.0968, "step": 27414 }, { "epoch": 0.60409746208553, "grad_norm": 0.5298343896865845, "learning_rate": 1.0731122756355265e-05, "loss": 0.0648, "step": 27415 }, { "epoch": 0.6041194973750461, "grad_norm": 0.9298515915870667, "learning_rate": 1.073009649993814e-05, "loss": 0.1006, "step": 27416 }, { "epoch": 0.6041415326645623, "grad_norm": 0.8829814195632935, "learning_rate": 1.0729070265269918e-05, "loss": 0.1195, "step": 27417 }, { "epoch": 0.6041635679540784, "grad_norm": 0.750735342502594, "learning_rate": 1.0728044052355835e-05, "loss": 0.0773, "step": 27418 }, { "epoch": 0.6041856032435946, "grad_norm": 0.990534782409668, "learning_rate": 1.0727017861201116e-05, "loss": 0.0817, "step": 27419 }, { "epoch": 0.6042076385331108, "grad_norm": 0.8204168081283569, "learning_rate": 1.0725991691810984e-05, "loss": 0.0802, "step": 27420 }, { "epoch": 0.6042296738226269, "grad_norm": 0.7643932104110718, "learning_rate": 1.072496554419067e-05, "loss": 0.071, "step": 27421 }, { "epoch": 0.6042517091121431, "grad_norm": 0.593433141708374, "learning_rate": 1.0723939418345398e-05, "loss": 0.0699, "step": 27422 }, { "epoch": 0.6042737444016593, "grad_norm": 0.4627973139286041, "learning_rate": 1.0722913314280395e-05, "loss": 0.0627, "step": 27423 }, { "epoch": 0.6042957796911754, "grad_norm": 0.692158579826355, "learning_rate": 1.0721887232000884e-05, "loss": 0.0576, "step": 27424 }, { "epoch": 0.6043178149806916, "grad_norm": 0.5774978399276733, "learning_rate": 1.0720861171512101e-05, "loss": 0.0737, "step": 27425 }, { "epoch": 0.6043398502702078, "grad_norm": 0.5679848790168762, "learning_rate": 1.0719835132819268e-05, "loss": 0.0691, "step": 27426 }, { "epoch": 0.6043618855597239, "grad_norm": 0.3716137409210205, "learning_rate": 1.0718809115927607e-05, "loss": 0.0474, "step": 27427 }, { "epoch": 0.6043839208492401, "grad_norm": 0.44191083312034607, "learning_rate": 1.0717783120842346e-05, "loss": 0.0494, "step": 27428 }, { "epoch": 0.6044059561387563, "grad_norm": 0.6197776794433594, "learning_rate": 1.0716757147568714e-05, "loss": 0.0551, "step": 27429 }, { "epoch": 0.6044279914282724, "grad_norm": 0.590268611907959, "learning_rate": 1.0715731196111935e-05, "loss": 0.0866, "step": 27430 }, { "epoch": 0.6044500267177886, "grad_norm": 0.5946065783500671, "learning_rate": 1.0714705266477236e-05, "loss": 0.0612, "step": 27431 }, { "epoch": 0.6044720620073047, "grad_norm": 1.0526877641677856, "learning_rate": 1.0713679358669833e-05, "loss": 0.072, "step": 27432 }, { "epoch": 0.6044940972968209, "grad_norm": 1.066392421722412, "learning_rate": 1.0712653472694966e-05, "loss": 0.0785, "step": 27433 }, { "epoch": 0.6045161325863371, "grad_norm": 0.8136293292045593, "learning_rate": 1.0711627608557853e-05, "loss": 0.0676, "step": 27434 }, { "epoch": 0.6045381678758531, "grad_norm": 0.4661232531070709, "learning_rate": 1.0710601766263722e-05, "loss": 0.0537, "step": 27435 }, { "epoch": 0.6045602031653693, "grad_norm": 1.1769081354141235, "learning_rate": 1.0709575945817794e-05, "loss": 0.0811, "step": 27436 }, { "epoch": 0.6045822384548855, "grad_norm": 0.4326598346233368, "learning_rate": 1.0708550147225298e-05, "loss": 0.0358, "step": 27437 }, { "epoch": 0.6046042737444016, "grad_norm": 0.7902182340621948, "learning_rate": 1.0707524370491457e-05, "loss": 0.091, "step": 27438 }, { "epoch": 0.6046263090339178, "grad_norm": 0.3621528446674347, "learning_rate": 1.0706498615621496e-05, "loss": 0.073, "step": 27439 }, { "epoch": 0.604648344323434, "grad_norm": 0.8594639301300049, "learning_rate": 1.0705472882620635e-05, "loss": 0.0939, "step": 27440 }, { "epoch": 0.6046703796129501, "grad_norm": 0.5985599756240845, "learning_rate": 1.0704447171494108e-05, "loss": 0.0531, "step": 27441 }, { "epoch": 0.6046924149024663, "grad_norm": 0.48854899406433105, "learning_rate": 1.0703421482247137e-05, "loss": 0.0801, "step": 27442 }, { "epoch": 0.6047144501919824, "grad_norm": 0.530646562576294, "learning_rate": 1.0702395814884942e-05, "loss": 0.0532, "step": 27443 }, { "epoch": 0.6047364854814986, "grad_norm": 0.39190995693206787, "learning_rate": 1.070137016941275e-05, "loss": 0.033, "step": 27444 }, { "epoch": 0.6047585207710148, "grad_norm": 0.5511319041252136, "learning_rate": 1.0700344545835785e-05, "loss": 0.0736, "step": 27445 }, { "epoch": 0.6047805560605309, "grad_norm": 0.5460888147354126, "learning_rate": 1.0699318944159271e-05, "loss": 0.064, "step": 27446 }, { "epoch": 0.6048025913500471, "grad_norm": 0.7058736085891724, "learning_rate": 1.0698293364388425e-05, "loss": 0.0953, "step": 27447 }, { "epoch": 0.6048246266395633, "grad_norm": 0.6738972067832947, "learning_rate": 1.0697267806528487e-05, "loss": 0.0868, "step": 27448 }, { "epoch": 0.6048466619290794, "grad_norm": 0.551115095615387, "learning_rate": 1.069624227058467e-05, "loss": 0.0534, "step": 27449 }, { "epoch": 0.6048686972185956, "grad_norm": 0.6862831115722656, "learning_rate": 1.06952167565622e-05, "loss": 0.0494, "step": 27450 }, { "epoch": 0.6048907325081118, "grad_norm": 0.9148639440536499, "learning_rate": 1.0694191264466294e-05, "loss": 0.0641, "step": 27451 }, { "epoch": 0.6049127677976279, "grad_norm": 1.150164008140564, "learning_rate": 1.0693165794302186e-05, "loss": 0.0788, "step": 27452 }, { "epoch": 0.6049348030871441, "grad_norm": 0.8004284501075745, "learning_rate": 1.0692140346075092e-05, "loss": 0.0979, "step": 27453 }, { "epoch": 0.6049568383766603, "grad_norm": 0.8150988221168518, "learning_rate": 1.069111491979024e-05, "loss": 0.0868, "step": 27454 }, { "epoch": 0.6049788736661764, "grad_norm": 0.5285769701004028, "learning_rate": 1.0690089515452841e-05, "loss": 0.081, "step": 27455 }, { "epoch": 0.6050009089556926, "grad_norm": 0.5394686460494995, "learning_rate": 1.0689064133068135e-05, "loss": 0.0926, "step": 27456 }, { "epoch": 0.6050229442452087, "grad_norm": 0.6107406616210938, "learning_rate": 1.0688038772641336e-05, "loss": 0.0789, "step": 27457 }, { "epoch": 0.6050449795347249, "grad_norm": 0.6788753867149353, "learning_rate": 1.0687013434177669e-05, "loss": 0.1042, "step": 27458 }, { "epoch": 0.605067014824241, "grad_norm": 0.6560816764831543, "learning_rate": 1.0685988117682353e-05, "loss": 0.0847, "step": 27459 }, { "epoch": 0.6050890501137571, "grad_norm": 0.2653464078903198, "learning_rate": 1.0684962823160616e-05, "loss": 0.0524, "step": 27460 }, { "epoch": 0.6051110854032733, "grad_norm": 0.5200828313827515, "learning_rate": 1.0683937550617675e-05, "loss": 0.0402, "step": 27461 }, { "epoch": 0.6051331206927895, "grad_norm": 0.4706677496433258, "learning_rate": 1.0682912300058757e-05, "loss": 0.0679, "step": 27462 }, { "epoch": 0.6051551559823056, "grad_norm": 0.2343003898859024, "learning_rate": 1.0681887071489074e-05, "loss": 0.0694, "step": 27463 }, { "epoch": 0.6051771912718218, "grad_norm": 0.47313234210014343, "learning_rate": 1.068086186491386e-05, "loss": 0.0509, "step": 27464 }, { "epoch": 0.605199226561338, "grad_norm": 0.570645272731781, "learning_rate": 1.0679836680338334e-05, "loss": 0.0642, "step": 27465 }, { "epoch": 0.6052212618508541, "grad_norm": 0.6865499019622803, "learning_rate": 1.0678811517767717e-05, "loss": 0.0769, "step": 27466 }, { "epoch": 0.6052432971403703, "grad_norm": 1.2140744924545288, "learning_rate": 1.067778637720723e-05, "loss": 0.0981, "step": 27467 }, { "epoch": 0.6052653324298864, "grad_norm": 0.3870353400707245, "learning_rate": 1.0676761258662094e-05, "loss": 0.0512, "step": 27468 }, { "epoch": 0.6052873677194026, "grad_norm": 0.5504345893859863, "learning_rate": 1.0675736162137533e-05, "loss": 0.0524, "step": 27469 }, { "epoch": 0.6053094030089188, "grad_norm": 0.5842806696891785, "learning_rate": 1.0674711087638758e-05, "loss": 0.0611, "step": 27470 }, { "epoch": 0.6053314382984349, "grad_norm": 0.7434036135673523, "learning_rate": 1.067368603517101e-05, "loss": 0.1062, "step": 27471 }, { "epoch": 0.6053534735879511, "grad_norm": 0.5446159243583679, "learning_rate": 1.0672661004739495e-05, "loss": 0.0768, "step": 27472 }, { "epoch": 0.6053755088774673, "grad_norm": 0.6750325560569763, "learning_rate": 1.0671635996349439e-05, "loss": 0.0775, "step": 27473 }, { "epoch": 0.6053975441669834, "grad_norm": 0.6950846910476685, "learning_rate": 1.0670611010006059e-05, "loss": 0.0627, "step": 27474 }, { "epoch": 0.6054195794564996, "grad_norm": 0.6224101185798645, "learning_rate": 1.066958604571458e-05, "loss": 0.0536, "step": 27475 }, { "epoch": 0.6054416147460158, "grad_norm": 0.4912177324295044, "learning_rate": 1.0668561103480223e-05, "loss": 0.0477, "step": 27476 }, { "epoch": 0.6054636500355319, "grad_norm": 0.9309821724891663, "learning_rate": 1.0667536183308207e-05, "loss": 0.0706, "step": 27477 }, { "epoch": 0.6054856853250481, "grad_norm": 0.4282284677028656, "learning_rate": 1.0666511285203746e-05, "loss": 0.0642, "step": 27478 }, { "epoch": 0.6055077206145643, "grad_norm": 0.8371802568435669, "learning_rate": 1.0665486409172074e-05, "loss": 0.0748, "step": 27479 }, { "epoch": 0.6055297559040804, "grad_norm": 0.4084945619106293, "learning_rate": 1.0664461555218404e-05, "loss": 0.073, "step": 27480 }, { "epoch": 0.6055517911935966, "grad_norm": 0.8184430003166199, "learning_rate": 1.0663436723347953e-05, "loss": 0.0964, "step": 27481 }, { "epoch": 0.6055738264831128, "grad_norm": 0.5339424014091492, "learning_rate": 1.0662411913565945e-05, "loss": 0.0401, "step": 27482 }, { "epoch": 0.6055958617726289, "grad_norm": 0.6696807146072388, "learning_rate": 1.0661387125877599e-05, "loss": 0.0777, "step": 27483 }, { "epoch": 0.605617897062145, "grad_norm": 0.7098996043205261, "learning_rate": 1.0660362360288137e-05, "loss": 0.0656, "step": 27484 }, { "epoch": 0.6056399323516611, "grad_norm": 1.015161395072937, "learning_rate": 1.0659337616802776e-05, "loss": 0.0854, "step": 27485 }, { "epoch": 0.6056619676411773, "grad_norm": 0.43838608264923096, "learning_rate": 1.065831289542673e-05, "loss": 0.0624, "step": 27486 }, { "epoch": 0.6056840029306935, "grad_norm": 0.5574376583099365, "learning_rate": 1.0657288196165232e-05, "loss": 0.0448, "step": 27487 }, { "epoch": 0.6057060382202096, "grad_norm": 0.7380116581916809, "learning_rate": 1.0656263519023493e-05, "loss": 0.0487, "step": 27488 }, { "epoch": 0.6057280735097258, "grad_norm": 0.6949096322059631, "learning_rate": 1.065523886400673e-05, "loss": 0.0675, "step": 27489 }, { "epoch": 0.605750108799242, "grad_norm": 0.3686431050300598, "learning_rate": 1.065421423112017e-05, "loss": 0.0599, "step": 27490 }, { "epoch": 0.6057721440887581, "grad_norm": 0.4905130863189697, "learning_rate": 1.0653189620369024e-05, "loss": 0.0778, "step": 27491 }, { "epoch": 0.6057941793782743, "grad_norm": 0.5413377285003662, "learning_rate": 1.0652165031758516e-05, "loss": 0.0695, "step": 27492 }, { "epoch": 0.6058162146677905, "grad_norm": 0.6876312494277954, "learning_rate": 1.0651140465293858e-05, "loss": 0.0631, "step": 27493 }, { "epoch": 0.6058382499573066, "grad_norm": 0.6723707914352417, "learning_rate": 1.0650115920980278e-05, "loss": 0.049, "step": 27494 }, { "epoch": 0.6058602852468228, "grad_norm": 0.6738752722740173, "learning_rate": 1.064909139882299e-05, "loss": 0.0714, "step": 27495 }, { "epoch": 0.605882320536339, "grad_norm": 0.48071175813674927, "learning_rate": 1.0648066898827214e-05, "loss": 0.0888, "step": 27496 }, { "epoch": 0.6059043558258551, "grad_norm": 0.8253468871116638, "learning_rate": 1.0647042420998165e-05, "loss": 0.1077, "step": 27497 }, { "epoch": 0.6059263911153713, "grad_norm": 0.606809139251709, "learning_rate": 1.0646017965341064e-05, "loss": 0.0634, "step": 27498 }, { "epoch": 0.6059484264048874, "grad_norm": 0.7589455246925354, "learning_rate": 1.0644993531861129e-05, "loss": 0.0753, "step": 27499 }, { "epoch": 0.6059704616944036, "grad_norm": 0.49480387568473816, "learning_rate": 1.0643969120563577e-05, "loss": 0.0474, "step": 27500 }, { "epoch": 0.6059924969839198, "grad_norm": 0.6150256991386414, "learning_rate": 1.064294473145362e-05, "loss": 0.0629, "step": 27501 }, { "epoch": 0.6060145322734359, "grad_norm": 0.5661637783050537, "learning_rate": 1.0641920364536487e-05, "loss": 0.064, "step": 27502 }, { "epoch": 0.6060365675629521, "grad_norm": 0.517906129360199, "learning_rate": 1.0640896019817394e-05, "loss": 0.0482, "step": 27503 }, { "epoch": 0.6060586028524683, "grad_norm": 0.4797566533088684, "learning_rate": 1.0639871697301553e-05, "loss": 0.0525, "step": 27504 }, { "epoch": 0.6060806381419844, "grad_norm": 0.7693437337875366, "learning_rate": 1.0638847396994179e-05, "loss": 0.0706, "step": 27505 }, { "epoch": 0.6061026734315006, "grad_norm": 0.6361269354820251, "learning_rate": 1.0637823118900498e-05, "loss": 0.0704, "step": 27506 }, { "epoch": 0.6061247087210168, "grad_norm": 0.6674347519874573, "learning_rate": 1.0636798863025725e-05, "loss": 0.0676, "step": 27507 }, { "epoch": 0.6061467440105329, "grad_norm": 0.7113150358200073, "learning_rate": 1.0635774629375073e-05, "loss": 0.0716, "step": 27508 }, { "epoch": 0.606168779300049, "grad_norm": 0.5222017765045166, "learning_rate": 1.0634750417953754e-05, "loss": 0.073, "step": 27509 }, { "epoch": 0.6061908145895651, "grad_norm": 0.4717728793621063, "learning_rate": 1.0633726228767001e-05, "loss": 0.0826, "step": 27510 }, { "epoch": 0.6062128498790813, "grad_norm": 0.5279482007026672, "learning_rate": 1.0632702061820022e-05, "loss": 0.0548, "step": 27511 }, { "epoch": 0.6062348851685975, "grad_norm": 0.7071927785873413, "learning_rate": 1.0631677917118023e-05, "loss": 0.1041, "step": 27512 }, { "epoch": 0.6062569204581136, "grad_norm": 0.9018656611442566, "learning_rate": 1.0630653794666241e-05, "loss": 0.0758, "step": 27513 }, { "epoch": 0.6062789557476298, "grad_norm": 0.9899122714996338, "learning_rate": 1.0629629694469879e-05, "loss": 0.0685, "step": 27514 }, { "epoch": 0.606300991037146, "grad_norm": 0.5835126638412476, "learning_rate": 1.0628605616534165e-05, "loss": 0.0833, "step": 27515 }, { "epoch": 0.6063230263266621, "grad_norm": 0.7791885733604431, "learning_rate": 1.0627581560864292e-05, "loss": 0.0902, "step": 27516 }, { "epoch": 0.6063450616161783, "grad_norm": 0.5881463885307312, "learning_rate": 1.0626557527465497e-05, "loss": 0.0509, "step": 27517 }, { "epoch": 0.6063670969056945, "grad_norm": 0.49747899174690247, "learning_rate": 1.0625533516342993e-05, "loss": 0.0696, "step": 27518 }, { "epoch": 0.6063891321952106, "grad_norm": 0.6490692496299744, "learning_rate": 1.062450952750199e-05, "loss": 0.0734, "step": 27519 }, { "epoch": 0.6064111674847268, "grad_norm": 0.9760433435440063, "learning_rate": 1.0623485560947703e-05, "loss": 0.1126, "step": 27520 }, { "epoch": 0.606433202774243, "grad_norm": 0.5375094413757324, "learning_rate": 1.0622461616685355e-05, "loss": 0.0788, "step": 27521 }, { "epoch": 0.6064552380637591, "grad_norm": 0.7744367718696594, "learning_rate": 1.0621437694720158e-05, "loss": 0.0832, "step": 27522 }, { "epoch": 0.6064772733532753, "grad_norm": 1.1359456777572632, "learning_rate": 1.0620413795057325e-05, "loss": 0.0933, "step": 27523 }, { "epoch": 0.6064993086427914, "grad_norm": 0.6067039966583252, "learning_rate": 1.0619389917702072e-05, "loss": 0.0569, "step": 27524 }, { "epoch": 0.6065213439323076, "grad_norm": 0.9432395696640015, "learning_rate": 1.0618366062659618e-05, "loss": 0.0816, "step": 27525 }, { "epoch": 0.6065433792218238, "grad_norm": 0.555641770362854, "learning_rate": 1.0617342229935175e-05, "loss": 0.0636, "step": 27526 }, { "epoch": 0.6065654145113399, "grad_norm": 0.9523861408233643, "learning_rate": 1.0616318419533958e-05, "loss": 0.0886, "step": 27527 }, { "epoch": 0.6065874498008561, "grad_norm": 0.9656967520713806, "learning_rate": 1.0615294631461175e-05, "loss": 0.0762, "step": 27528 }, { "epoch": 0.6066094850903723, "grad_norm": 0.5016576647758484, "learning_rate": 1.0614270865722056e-05, "loss": 0.0649, "step": 27529 }, { "epoch": 0.6066315203798884, "grad_norm": 0.4613308012485504, "learning_rate": 1.0613247122321807e-05, "loss": 0.0742, "step": 27530 }, { "epoch": 0.6066535556694046, "grad_norm": 0.4339369833469391, "learning_rate": 1.061222340126564e-05, "loss": 0.0387, "step": 27531 }, { "epoch": 0.6066755909589208, "grad_norm": 0.504482090473175, "learning_rate": 1.0611199702558774e-05, "loss": 0.0736, "step": 27532 }, { "epoch": 0.6066976262484368, "grad_norm": 0.6196631193161011, "learning_rate": 1.061017602620642e-05, "loss": 0.0616, "step": 27533 }, { "epoch": 0.606719661537953, "grad_norm": 0.6178650856018066, "learning_rate": 1.0609152372213796e-05, "loss": 0.1057, "step": 27534 }, { "epoch": 0.6067416968274691, "grad_norm": 0.3533112108707428, "learning_rate": 1.0608128740586106e-05, "loss": 0.0414, "step": 27535 }, { "epoch": 0.6067637321169853, "grad_norm": 0.6343440413475037, "learning_rate": 1.0607105131328576e-05, "loss": 0.0666, "step": 27536 }, { "epoch": 0.6067857674065015, "grad_norm": 0.6012681722640991, "learning_rate": 1.060608154444642e-05, "loss": 0.0634, "step": 27537 }, { "epoch": 0.6068078026960176, "grad_norm": 0.5628222227096558, "learning_rate": 1.0605057979944843e-05, "loss": 0.0571, "step": 27538 }, { "epoch": 0.6068298379855338, "grad_norm": 0.41859206557273865, "learning_rate": 1.060403443782906e-05, "loss": 0.0858, "step": 27539 }, { "epoch": 0.60685187327505, "grad_norm": 0.45422691106796265, "learning_rate": 1.0603010918104291e-05, "loss": 0.0458, "step": 27540 }, { "epoch": 0.6068739085645661, "grad_norm": 0.42962750792503357, "learning_rate": 1.0601987420775745e-05, "loss": 0.0465, "step": 27541 }, { "epoch": 0.6068959438540823, "grad_norm": 0.4884352684020996, "learning_rate": 1.0600963945848634e-05, "loss": 0.0554, "step": 27542 }, { "epoch": 0.6069179791435985, "grad_norm": 0.7604690194129944, "learning_rate": 1.0599940493328168e-05, "loss": 0.0809, "step": 27543 }, { "epoch": 0.6069400144331146, "grad_norm": 0.6830998659133911, "learning_rate": 1.0598917063219572e-05, "loss": 0.0531, "step": 27544 }, { "epoch": 0.6069620497226308, "grad_norm": 0.3228684365749359, "learning_rate": 1.0597893655528049e-05, "loss": 0.0853, "step": 27545 }, { "epoch": 0.606984085012147, "grad_norm": 0.6986021399497986, "learning_rate": 1.0596870270258814e-05, "loss": 0.0802, "step": 27546 }, { "epoch": 0.6070061203016631, "grad_norm": 0.6413440108299255, "learning_rate": 1.0595846907417078e-05, "loss": 0.0814, "step": 27547 }, { "epoch": 0.6070281555911793, "grad_norm": 0.42912280559539795, "learning_rate": 1.059482356700806e-05, "loss": 0.0389, "step": 27548 }, { "epoch": 0.6070501908806955, "grad_norm": 0.48291707038879395, "learning_rate": 1.0593800249036965e-05, "loss": 0.0475, "step": 27549 }, { "epoch": 0.6070722261702116, "grad_norm": 0.6718374490737915, "learning_rate": 1.059277695350901e-05, "loss": 0.0764, "step": 27550 }, { "epoch": 0.6070942614597278, "grad_norm": 0.7430005669593811, "learning_rate": 1.0591753680429397e-05, "loss": 0.0827, "step": 27551 }, { "epoch": 0.6071162967492439, "grad_norm": 0.4403294324874878, "learning_rate": 1.0590730429803354e-05, "loss": 0.0549, "step": 27552 }, { "epoch": 0.6071383320387601, "grad_norm": 0.4892165958881378, "learning_rate": 1.0589707201636085e-05, "loss": 0.0671, "step": 27553 }, { "epoch": 0.6071603673282763, "grad_norm": 0.5594351291656494, "learning_rate": 1.0588683995932802e-05, "loss": 0.0691, "step": 27554 }, { "epoch": 0.6071824026177924, "grad_norm": 0.8158180117607117, "learning_rate": 1.0587660812698715e-05, "loss": 0.0815, "step": 27555 }, { "epoch": 0.6072044379073086, "grad_norm": 0.830268144607544, "learning_rate": 1.0586637651939041e-05, "loss": 0.0859, "step": 27556 }, { "epoch": 0.6072264731968248, "grad_norm": 0.6840503811836243, "learning_rate": 1.0585614513658986e-05, "loss": 0.0673, "step": 27557 }, { "epoch": 0.6072485084863408, "grad_norm": 0.6507549285888672, "learning_rate": 1.0584591397863758e-05, "loss": 0.0929, "step": 27558 }, { "epoch": 0.607270543775857, "grad_norm": 0.38370224833488464, "learning_rate": 1.0583568304558581e-05, "loss": 0.0626, "step": 27559 }, { "epoch": 0.6072925790653732, "grad_norm": 0.6108515858650208, "learning_rate": 1.0582545233748658e-05, "loss": 0.073, "step": 27560 }, { "epoch": 0.6073146143548893, "grad_norm": 0.49854353070259094, "learning_rate": 1.05815221854392e-05, "loss": 0.048, "step": 27561 }, { "epoch": 0.6073366496444055, "grad_norm": 0.795619547367096, "learning_rate": 1.058049915963542e-05, "loss": 0.074, "step": 27562 }, { "epoch": 0.6073586849339216, "grad_norm": 0.6727539300918579, "learning_rate": 1.0579476156342526e-05, "loss": 0.0577, "step": 27563 }, { "epoch": 0.6073807202234378, "grad_norm": 0.5736938118934631, "learning_rate": 1.057845317556573e-05, "loss": 0.069, "step": 27564 }, { "epoch": 0.607402755512954, "grad_norm": 0.4761611819267273, "learning_rate": 1.0577430217310248e-05, "loss": 0.0559, "step": 27565 }, { "epoch": 0.6074247908024701, "grad_norm": 0.3552177846431732, "learning_rate": 1.0576407281581275e-05, "loss": 0.0538, "step": 27566 }, { "epoch": 0.6074468260919863, "grad_norm": 0.6153734922409058, "learning_rate": 1.057538436838404e-05, "loss": 0.0477, "step": 27567 }, { "epoch": 0.6074688613815025, "grad_norm": 0.6333913803100586, "learning_rate": 1.0574361477723745e-05, "loss": 0.0713, "step": 27568 }, { "epoch": 0.6074908966710186, "grad_norm": 0.6205344200134277, "learning_rate": 1.0573338609605599e-05, "loss": 0.0528, "step": 27569 }, { "epoch": 0.6075129319605348, "grad_norm": 0.7419619560241699, "learning_rate": 1.0572315764034811e-05, "loss": 0.0675, "step": 27570 }, { "epoch": 0.607534967250051, "grad_norm": 0.5909939408302307, "learning_rate": 1.0571292941016596e-05, "loss": 0.0613, "step": 27571 }, { "epoch": 0.6075570025395671, "grad_norm": 0.5980557799339294, "learning_rate": 1.0570270140556162e-05, "loss": 0.0482, "step": 27572 }, { "epoch": 0.6075790378290833, "grad_norm": 0.856846272945404, "learning_rate": 1.0569247362658716e-05, "loss": 0.0527, "step": 27573 }, { "epoch": 0.6076010731185995, "grad_norm": 0.39580026268959045, "learning_rate": 1.0568224607329464e-05, "loss": 0.0672, "step": 27574 }, { "epoch": 0.6076231084081156, "grad_norm": 1.1171830892562866, "learning_rate": 1.0567201874573626e-05, "loss": 0.097, "step": 27575 }, { "epoch": 0.6076451436976318, "grad_norm": 0.6784657835960388, "learning_rate": 1.0566179164396406e-05, "loss": 0.0682, "step": 27576 }, { "epoch": 0.607667178987148, "grad_norm": 0.6673368811607361, "learning_rate": 1.0565156476803015e-05, "loss": 0.0562, "step": 27577 }, { "epoch": 0.6076892142766641, "grad_norm": 0.8991091251373291, "learning_rate": 1.0564133811798655e-05, "loss": 0.0857, "step": 27578 }, { "epoch": 0.6077112495661803, "grad_norm": 1.0701284408569336, "learning_rate": 1.0563111169388543e-05, "loss": 0.0826, "step": 27579 }, { "epoch": 0.6077332848556964, "grad_norm": 0.4137219488620758, "learning_rate": 1.0562088549577886e-05, "loss": 0.0978, "step": 27580 }, { "epoch": 0.6077553201452126, "grad_norm": 0.49239978194236755, "learning_rate": 1.0561065952371883e-05, "loss": 0.0523, "step": 27581 }, { "epoch": 0.6077773554347288, "grad_norm": 0.5516796708106995, "learning_rate": 1.0560043377775761e-05, "loss": 0.0817, "step": 27582 }, { "epoch": 0.6077993907242448, "grad_norm": 0.707292377948761, "learning_rate": 1.0559020825794719e-05, "loss": 0.0618, "step": 27583 }, { "epoch": 0.607821426013761, "grad_norm": 0.26793837547302246, "learning_rate": 1.0557998296433965e-05, "loss": 0.0513, "step": 27584 }, { "epoch": 0.6078434613032772, "grad_norm": 0.6365960240364075, "learning_rate": 1.0556975789698702e-05, "loss": 0.0353, "step": 27585 }, { "epoch": 0.6078654965927933, "grad_norm": 0.6694371700286865, "learning_rate": 1.055595330559415e-05, "loss": 0.0706, "step": 27586 }, { "epoch": 0.6078875318823095, "grad_norm": 0.7533157467842102, "learning_rate": 1.055493084412551e-05, "loss": 0.0737, "step": 27587 }, { "epoch": 0.6079095671718256, "grad_norm": 0.8017809391021729, "learning_rate": 1.055390840529799e-05, "loss": 0.0698, "step": 27588 }, { "epoch": 0.6079316024613418, "grad_norm": 0.7238321900367737, "learning_rate": 1.0552885989116792e-05, "loss": 0.0529, "step": 27589 }, { "epoch": 0.607953637750858, "grad_norm": 0.3671545088291168, "learning_rate": 1.055186359558714e-05, "loss": 0.0999, "step": 27590 }, { "epoch": 0.6079756730403741, "grad_norm": 0.4420166313648224, "learning_rate": 1.0550841224714229e-05, "loss": 0.063, "step": 27591 }, { "epoch": 0.6079977083298903, "grad_norm": 0.7307660579681396, "learning_rate": 1.054981887650327e-05, "loss": 0.0739, "step": 27592 }, { "epoch": 0.6080197436194065, "grad_norm": 0.6184145212173462, "learning_rate": 1.0548796550959466e-05, "loss": 0.0846, "step": 27593 }, { "epoch": 0.6080417789089226, "grad_norm": 0.6546812057495117, "learning_rate": 1.0547774248088034e-05, "loss": 0.0728, "step": 27594 }, { "epoch": 0.6080638141984388, "grad_norm": 0.542641818523407, "learning_rate": 1.0546751967894172e-05, "loss": 0.0398, "step": 27595 }, { "epoch": 0.608085849487955, "grad_norm": 0.34718120098114014, "learning_rate": 1.0545729710383093e-05, "loss": 0.0488, "step": 27596 }, { "epoch": 0.6081078847774711, "grad_norm": 0.5744396448135376, "learning_rate": 1.0544707475559992e-05, "loss": 0.0541, "step": 27597 }, { "epoch": 0.6081299200669873, "grad_norm": 0.6747889518737793, "learning_rate": 1.0543685263430094e-05, "loss": 0.0789, "step": 27598 }, { "epoch": 0.6081519553565035, "grad_norm": 0.45409566164016724, "learning_rate": 1.0542663073998594e-05, "loss": 0.0616, "step": 27599 }, { "epoch": 0.6081739906460196, "grad_norm": 0.5490611791610718, "learning_rate": 1.05416409072707e-05, "loss": 0.0569, "step": 27600 }, { "epoch": 0.6081960259355358, "grad_norm": 1.289622187614441, "learning_rate": 1.0540618763251622e-05, "loss": 0.0907, "step": 27601 }, { "epoch": 0.608218061225052, "grad_norm": 0.6420066356658936, "learning_rate": 1.0539596641946564e-05, "loss": 0.1071, "step": 27602 }, { "epoch": 0.6082400965145681, "grad_norm": 0.7391630411148071, "learning_rate": 1.0538574543360733e-05, "loss": 0.09, "step": 27603 }, { "epoch": 0.6082621318040843, "grad_norm": 0.7770840525627136, "learning_rate": 1.0537552467499328e-05, "loss": 0.0816, "step": 27604 }, { "epoch": 0.6082841670936004, "grad_norm": 0.8258451223373413, "learning_rate": 1.0536530414367564e-05, "loss": 0.0733, "step": 27605 }, { "epoch": 0.6083062023831166, "grad_norm": 1.011423110961914, "learning_rate": 1.0535508383970646e-05, "loss": 0.0952, "step": 27606 }, { "epoch": 0.6083282376726328, "grad_norm": 0.7005531787872314, "learning_rate": 1.053448637631378e-05, "loss": 0.0498, "step": 27607 }, { "epoch": 0.6083502729621488, "grad_norm": 0.9277834892272949, "learning_rate": 1.0533464391402166e-05, "loss": 0.1019, "step": 27608 }, { "epoch": 0.608372308251665, "grad_norm": 0.797252893447876, "learning_rate": 1.0532442429241016e-05, "loss": 0.0877, "step": 27609 }, { "epoch": 0.6083943435411812, "grad_norm": 0.46935299038887024, "learning_rate": 1.053142048983553e-05, "loss": 0.0516, "step": 27610 }, { "epoch": 0.6084163788306973, "grad_norm": 0.7735949754714966, "learning_rate": 1.0530398573190918e-05, "loss": 0.066, "step": 27611 }, { "epoch": 0.6084384141202135, "grad_norm": 0.7748066782951355, "learning_rate": 1.0529376679312374e-05, "loss": 0.0557, "step": 27612 }, { "epoch": 0.6084604494097297, "grad_norm": 0.6803348660469055, "learning_rate": 1.0528354808205121e-05, "loss": 0.0655, "step": 27613 }, { "epoch": 0.6084824846992458, "grad_norm": 1.4571789503097534, "learning_rate": 1.0527332959874355e-05, "loss": 0.0972, "step": 27614 }, { "epoch": 0.608504519988762, "grad_norm": 0.4890754520893097, "learning_rate": 1.052631113432528e-05, "loss": 0.0635, "step": 27615 }, { "epoch": 0.6085265552782781, "grad_norm": 0.7979118824005127, "learning_rate": 1.0525289331563096e-05, "loss": 0.0871, "step": 27616 }, { "epoch": 0.6085485905677943, "grad_norm": 1.1878653764724731, "learning_rate": 1.0524267551593017e-05, "loss": 0.1344, "step": 27617 }, { "epoch": 0.6085706258573105, "grad_norm": 0.48724526166915894, "learning_rate": 1.0523245794420245e-05, "loss": 0.0809, "step": 27618 }, { "epoch": 0.6085926611468266, "grad_norm": 0.6602634191513062, "learning_rate": 1.0522224060049984e-05, "loss": 0.0634, "step": 27619 }, { "epoch": 0.6086146964363428, "grad_norm": 0.4855128228664398, "learning_rate": 1.0521202348487427e-05, "loss": 0.0851, "step": 27620 }, { "epoch": 0.608636731725859, "grad_norm": 0.8115999698638916, "learning_rate": 1.0520180659737798e-05, "loss": 0.0928, "step": 27621 }, { "epoch": 0.6086587670153751, "grad_norm": 0.48173093795776367, "learning_rate": 1.0519158993806289e-05, "loss": 0.0649, "step": 27622 }, { "epoch": 0.6086808023048913, "grad_norm": 1.4036191701889038, "learning_rate": 1.05181373506981e-05, "loss": 0.1046, "step": 27623 }, { "epoch": 0.6087028375944075, "grad_norm": 0.5170337557792664, "learning_rate": 1.0517115730418446e-05, "loss": 0.071, "step": 27624 }, { "epoch": 0.6087248728839236, "grad_norm": 0.7548834085464478, "learning_rate": 1.0516094132972529e-05, "loss": 0.0988, "step": 27625 }, { "epoch": 0.6087469081734398, "grad_norm": 0.6401799321174622, "learning_rate": 1.0515072558365547e-05, "loss": 0.0723, "step": 27626 }, { "epoch": 0.608768943462956, "grad_norm": 0.5681948065757751, "learning_rate": 1.05140510066027e-05, "loss": 0.0959, "step": 27627 }, { "epoch": 0.6087909787524721, "grad_norm": 0.5233856439590454, "learning_rate": 1.0513029477689205e-05, "loss": 0.0787, "step": 27628 }, { "epoch": 0.6088130140419883, "grad_norm": 0.6065052151679993, "learning_rate": 1.0512007971630253e-05, "loss": 0.0707, "step": 27629 }, { "epoch": 0.6088350493315045, "grad_norm": 0.35313349962234497, "learning_rate": 1.051098648843105e-05, "loss": 0.0865, "step": 27630 }, { "epoch": 0.6088570846210206, "grad_norm": 0.689078688621521, "learning_rate": 1.0509965028096798e-05, "loss": 0.0594, "step": 27631 }, { "epoch": 0.6088791199105367, "grad_norm": 0.5714724063873291, "learning_rate": 1.0508943590632704e-05, "loss": 0.052, "step": 27632 }, { "epoch": 0.6089011552000528, "grad_norm": 1.6192339658737183, "learning_rate": 1.0507922176043972e-05, "loss": 0.0963, "step": 27633 }, { "epoch": 0.608923190489569, "grad_norm": 0.41484248638153076, "learning_rate": 1.0506900784335802e-05, "loss": 0.0562, "step": 27634 }, { "epoch": 0.6089452257790852, "grad_norm": 0.7591561675071716, "learning_rate": 1.050587941551339e-05, "loss": 0.0678, "step": 27635 }, { "epoch": 0.6089672610686013, "grad_norm": 0.7248894572257996, "learning_rate": 1.0504858069581947e-05, "loss": 0.0875, "step": 27636 }, { "epoch": 0.6089892963581175, "grad_norm": 0.659690260887146, "learning_rate": 1.0503836746546672e-05, "loss": 0.0746, "step": 27637 }, { "epoch": 0.6090113316476337, "grad_norm": 0.8994207382202148, "learning_rate": 1.0502815446412768e-05, "loss": 0.0642, "step": 27638 }, { "epoch": 0.6090333669371498, "grad_norm": 0.6486017107963562, "learning_rate": 1.050179416918543e-05, "loss": 0.0659, "step": 27639 }, { "epoch": 0.609055402226666, "grad_norm": 0.6956456899642944, "learning_rate": 1.0500772914869874e-05, "loss": 0.0647, "step": 27640 }, { "epoch": 0.6090774375161822, "grad_norm": 0.8659897446632385, "learning_rate": 1.0499751683471293e-05, "loss": 0.0782, "step": 27641 }, { "epoch": 0.6090994728056983, "grad_norm": 0.7190722823143005, "learning_rate": 1.0498730474994891e-05, "loss": 0.0725, "step": 27642 }, { "epoch": 0.6091215080952145, "grad_norm": 0.5998321175575256, "learning_rate": 1.0497709289445865e-05, "loss": 0.0998, "step": 27643 }, { "epoch": 0.6091435433847306, "grad_norm": 0.5638530850410461, "learning_rate": 1.0496688126829424e-05, "loss": 0.0662, "step": 27644 }, { "epoch": 0.6091655786742468, "grad_norm": 0.726597011089325, "learning_rate": 1.0495666987150764e-05, "loss": 0.074, "step": 27645 }, { "epoch": 0.609187613963763, "grad_norm": 0.3484947979450226, "learning_rate": 1.0494645870415081e-05, "loss": 0.046, "step": 27646 }, { "epoch": 0.6092096492532791, "grad_norm": 0.7917883992195129, "learning_rate": 1.0493624776627589e-05, "loss": 0.0624, "step": 27647 }, { "epoch": 0.6092316845427953, "grad_norm": 0.5936821103096008, "learning_rate": 1.0492603705793483e-05, "loss": 0.0691, "step": 27648 }, { "epoch": 0.6092537198323115, "grad_norm": 1.0073273181915283, "learning_rate": 1.0491582657917963e-05, "loss": 0.0848, "step": 27649 }, { "epoch": 0.6092757551218276, "grad_norm": 0.3988194167613983, "learning_rate": 1.0490561633006227e-05, "loss": 0.0623, "step": 27650 }, { "epoch": 0.6092977904113438, "grad_norm": 0.8675968647003174, "learning_rate": 1.048954063106348e-05, "loss": 0.0737, "step": 27651 }, { "epoch": 0.60931982570086, "grad_norm": 0.9684017300605774, "learning_rate": 1.0488519652094926e-05, "loss": 0.0616, "step": 27652 }, { "epoch": 0.6093418609903761, "grad_norm": 0.9715219736099243, "learning_rate": 1.0487498696105757e-05, "loss": 0.1013, "step": 27653 }, { "epoch": 0.6093638962798923, "grad_norm": 0.5332239270210266, "learning_rate": 1.0486477763101172e-05, "loss": 0.0747, "step": 27654 }, { "epoch": 0.6093859315694085, "grad_norm": 0.29831692576408386, "learning_rate": 1.0485456853086381e-05, "loss": 0.0332, "step": 27655 }, { "epoch": 0.6094079668589246, "grad_norm": 0.6200970411300659, "learning_rate": 1.0484435966066579e-05, "loss": 0.0662, "step": 27656 }, { "epoch": 0.6094300021484407, "grad_norm": 1.069173812866211, "learning_rate": 1.0483415102046967e-05, "loss": 0.0861, "step": 27657 }, { "epoch": 0.6094520374379568, "grad_norm": 0.6740472316741943, "learning_rate": 1.0482394261032742e-05, "loss": 0.0714, "step": 27658 }, { "epoch": 0.609474072727473, "grad_norm": 0.5354859828948975, "learning_rate": 1.0481373443029106e-05, "loss": 0.0691, "step": 27659 }, { "epoch": 0.6094961080169892, "grad_norm": 0.505122721195221, "learning_rate": 1.048035264804126e-05, "loss": 0.0397, "step": 27660 }, { "epoch": 0.6095181433065053, "grad_norm": 0.3756946325302124, "learning_rate": 1.0479331876074401e-05, "loss": 0.0812, "step": 27661 }, { "epoch": 0.6095401785960215, "grad_norm": 0.8690335750579834, "learning_rate": 1.0478311127133724e-05, "loss": 0.0686, "step": 27662 }, { "epoch": 0.6095622138855377, "grad_norm": 0.5576131939888, "learning_rate": 1.0477290401224438e-05, "loss": 0.0683, "step": 27663 }, { "epoch": 0.6095842491750538, "grad_norm": 0.6901389956474304, "learning_rate": 1.0476269698351736e-05, "loss": 0.0943, "step": 27664 }, { "epoch": 0.60960628446457, "grad_norm": 0.5926460027694702, "learning_rate": 1.0475249018520821e-05, "loss": 0.0528, "step": 27665 }, { "epoch": 0.6096283197540862, "grad_norm": 0.7034847736358643, "learning_rate": 1.0474228361736886e-05, "loss": 0.0636, "step": 27666 }, { "epoch": 0.6096503550436023, "grad_norm": 0.4007423222064972, "learning_rate": 1.0473207728005132e-05, "loss": 0.0517, "step": 27667 }, { "epoch": 0.6096723903331185, "grad_norm": 0.6900485754013062, "learning_rate": 1.0472187117330762e-05, "loss": 0.0754, "step": 27668 }, { "epoch": 0.6096944256226347, "grad_norm": 0.5179333686828613, "learning_rate": 1.0471166529718965e-05, "loss": 0.0722, "step": 27669 }, { "epoch": 0.6097164609121508, "grad_norm": 0.7098696231842041, "learning_rate": 1.047014596517495e-05, "loss": 0.087, "step": 27670 }, { "epoch": 0.609738496201667, "grad_norm": 1.0234750509262085, "learning_rate": 1.0469125423703913e-05, "loss": 0.093, "step": 27671 }, { "epoch": 0.6097605314911831, "grad_norm": 1.0843344926834106, "learning_rate": 1.0468104905311048e-05, "loss": 0.0661, "step": 27672 }, { "epoch": 0.6097825667806993, "grad_norm": 0.5632185339927673, "learning_rate": 1.0467084410001551e-05, "loss": 0.0596, "step": 27673 }, { "epoch": 0.6098046020702155, "grad_norm": 0.5990949273109436, "learning_rate": 1.0466063937780629e-05, "loss": 0.0729, "step": 27674 }, { "epoch": 0.6098266373597316, "grad_norm": 0.752920925617218, "learning_rate": 1.0465043488653473e-05, "loss": 0.0766, "step": 27675 }, { "epoch": 0.6098486726492478, "grad_norm": 0.9922420382499695, "learning_rate": 1.0464023062625282e-05, "loss": 0.0458, "step": 27676 }, { "epoch": 0.609870707938764, "grad_norm": 0.8398566842079163, "learning_rate": 1.046300265970125e-05, "loss": 0.0611, "step": 27677 }, { "epoch": 0.6098927432282801, "grad_norm": 0.9048905372619629, "learning_rate": 1.0461982279886584e-05, "loss": 0.0815, "step": 27678 }, { "epoch": 0.6099147785177963, "grad_norm": 0.5216219425201416, "learning_rate": 1.0460961923186475e-05, "loss": 0.0652, "step": 27679 }, { "epoch": 0.6099368138073125, "grad_norm": 0.3404020369052887, "learning_rate": 1.0459941589606121e-05, "loss": 0.058, "step": 27680 }, { "epoch": 0.6099588490968286, "grad_norm": 0.8194948434829712, "learning_rate": 1.0458921279150716e-05, "loss": 0.1239, "step": 27681 }, { "epoch": 0.6099808843863447, "grad_norm": 0.3890875577926636, "learning_rate": 1.0457900991825463e-05, "loss": 0.067, "step": 27682 }, { "epoch": 0.6100029196758608, "grad_norm": 0.6628458499908447, "learning_rate": 1.0456880727635556e-05, "loss": 0.0747, "step": 27683 }, { "epoch": 0.610024954965377, "grad_norm": 0.4197603166103363, "learning_rate": 1.0455860486586191e-05, "loss": 0.0706, "step": 27684 }, { "epoch": 0.6100469902548932, "grad_norm": 0.9589965343475342, "learning_rate": 1.045484026868256e-05, "loss": 0.1079, "step": 27685 }, { "epoch": 0.6100690255444093, "grad_norm": 0.7233623266220093, "learning_rate": 1.0453820073929872e-05, "loss": 0.0547, "step": 27686 }, { "epoch": 0.6100910608339255, "grad_norm": 0.5408604145050049, "learning_rate": 1.0452799902333314e-05, "loss": 0.0767, "step": 27687 }, { "epoch": 0.6101130961234417, "grad_norm": 0.39358529448509216, "learning_rate": 1.0451779753898085e-05, "loss": 0.0728, "step": 27688 }, { "epoch": 0.6101351314129578, "grad_norm": 0.6463825106620789, "learning_rate": 1.0450759628629377e-05, "loss": 0.0805, "step": 27689 }, { "epoch": 0.610157166702474, "grad_norm": 0.8764424920082092, "learning_rate": 1.0449739526532395e-05, "loss": 0.0766, "step": 27690 }, { "epoch": 0.6101792019919902, "grad_norm": 0.8528406620025635, "learning_rate": 1.0448719447612329e-05, "loss": 0.0719, "step": 27691 }, { "epoch": 0.6102012372815063, "grad_norm": 0.4803021252155304, "learning_rate": 1.044769939187437e-05, "loss": 0.059, "step": 27692 }, { "epoch": 0.6102232725710225, "grad_norm": 0.8512217402458191, "learning_rate": 1.0446679359323721e-05, "loss": 0.0951, "step": 27693 }, { "epoch": 0.6102453078605387, "grad_norm": 0.5075903534889221, "learning_rate": 1.044565934996558e-05, "loss": 0.0505, "step": 27694 }, { "epoch": 0.6102673431500548, "grad_norm": 0.8362900018692017, "learning_rate": 1.0444639363805138e-05, "loss": 0.0564, "step": 27695 }, { "epoch": 0.610289378439571, "grad_norm": 0.4170192778110504, "learning_rate": 1.0443619400847585e-05, "loss": 0.0573, "step": 27696 }, { "epoch": 0.6103114137290871, "grad_norm": 0.9475013613700867, "learning_rate": 1.0442599461098127e-05, "loss": 0.0773, "step": 27697 }, { "epoch": 0.6103334490186033, "grad_norm": 0.8701891303062439, "learning_rate": 1.0441579544561953e-05, "loss": 0.0694, "step": 27698 }, { "epoch": 0.6103554843081195, "grad_norm": 0.42660731077194214, "learning_rate": 1.0440559651244259e-05, "loss": 0.093, "step": 27699 }, { "epoch": 0.6103775195976356, "grad_norm": 0.5838785767555237, "learning_rate": 1.0439539781150234e-05, "loss": 0.0675, "step": 27700 }, { "epoch": 0.6103995548871518, "grad_norm": 0.6189916133880615, "learning_rate": 1.0438519934285083e-05, "loss": 0.0899, "step": 27701 }, { "epoch": 0.610421590176668, "grad_norm": 1.4258171319961548, "learning_rate": 1.0437500110653995e-05, "loss": 0.0821, "step": 27702 }, { "epoch": 0.6104436254661841, "grad_norm": 0.8665676712989807, "learning_rate": 1.043648031026217e-05, "loss": 0.0647, "step": 27703 }, { "epoch": 0.6104656607557003, "grad_norm": 0.4988258481025696, "learning_rate": 1.0435460533114791e-05, "loss": 0.06, "step": 27704 }, { "epoch": 0.6104876960452165, "grad_norm": 0.5073662996292114, "learning_rate": 1.0434440779217064e-05, "loss": 0.0704, "step": 27705 }, { "epoch": 0.6105097313347325, "grad_norm": 0.5636813044548035, "learning_rate": 1.0433421048574178e-05, "loss": 0.0721, "step": 27706 }, { "epoch": 0.6105317666242487, "grad_norm": 1.5379858016967773, "learning_rate": 1.0432401341191327e-05, "loss": 0.0748, "step": 27707 }, { "epoch": 0.6105538019137648, "grad_norm": 1.14046049118042, "learning_rate": 1.0431381657073699e-05, "loss": 0.0696, "step": 27708 }, { "epoch": 0.610575837203281, "grad_norm": 0.2566708028316498, "learning_rate": 1.0430361996226502e-05, "loss": 0.048, "step": 27709 }, { "epoch": 0.6105978724927972, "grad_norm": 0.6014915108680725, "learning_rate": 1.0429342358654919e-05, "loss": 0.0773, "step": 27710 }, { "epoch": 0.6106199077823133, "grad_norm": 0.5975717902183533, "learning_rate": 1.0428322744364145e-05, "loss": 0.0586, "step": 27711 }, { "epoch": 0.6106419430718295, "grad_norm": 0.46857520937919617, "learning_rate": 1.0427303153359378e-05, "loss": 0.0581, "step": 27712 }, { "epoch": 0.6106639783613457, "grad_norm": 0.8217782974243164, "learning_rate": 1.0426283585645807e-05, "loss": 0.0728, "step": 27713 }, { "epoch": 0.6106860136508618, "grad_norm": 0.8179941773414612, "learning_rate": 1.0425264041228629e-05, "loss": 0.1027, "step": 27714 }, { "epoch": 0.610708048940378, "grad_norm": 0.7415961623191833, "learning_rate": 1.0424244520113025e-05, "loss": 0.0965, "step": 27715 }, { "epoch": 0.6107300842298942, "grad_norm": 0.683483898639679, "learning_rate": 1.0423225022304204e-05, "loss": 0.0668, "step": 27716 }, { "epoch": 0.6107521195194103, "grad_norm": 0.5685106515884399, "learning_rate": 1.0422205547807354e-05, "loss": 0.1066, "step": 27717 }, { "epoch": 0.6107741548089265, "grad_norm": 0.7881841063499451, "learning_rate": 1.0421186096627666e-05, "loss": 0.1071, "step": 27718 }, { "epoch": 0.6107961900984427, "grad_norm": 0.34937137365341187, "learning_rate": 1.0420166668770328e-05, "loss": 0.0923, "step": 27719 }, { "epoch": 0.6108182253879588, "grad_norm": 1.03269624710083, "learning_rate": 1.041914726424054e-05, "loss": 0.0657, "step": 27720 }, { "epoch": 0.610840260677475, "grad_norm": 0.591639518737793, "learning_rate": 1.0418127883043494e-05, "loss": 0.0785, "step": 27721 }, { "epoch": 0.6108622959669912, "grad_norm": 0.8157667517662048, "learning_rate": 1.0417108525184377e-05, "loss": 0.0765, "step": 27722 }, { "epoch": 0.6108843312565073, "grad_norm": 0.5137091279029846, "learning_rate": 1.0416089190668379e-05, "loss": 0.0438, "step": 27723 }, { "epoch": 0.6109063665460235, "grad_norm": 0.310558021068573, "learning_rate": 1.04150698795007e-05, "loss": 0.0559, "step": 27724 }, { "epoch": 0.6109284018355396, "grad_norm": 0.6696640849113464, "learning_rate": 1.0414050591686533e-05, "loss": 0.0618, "step": 27725 }, { "epoch": 0.6109504371250558, "grad_norm": 1.057604432106018, "learning_rate": 1.0413031327231065e-05, "loss": 0.0904, "step": 27726 }, { "epoch": 0.610972472414572, "grad_norm": 0.799045205116272, "learning_rate": 1.0412012086139479e-05, "loss": 0.0673, "step": 27727 }, { "epoch": 0.6109945077040881, "grad_norm": 0.6000784635543823, "learning_rate": 1.0410992868416987e-05, "loss": 0.0699, "step": 27728 }, { "epoch": 0.6110165429936043, "grad_norm": 0.7695683240890503, "learning_rate": 1.0409973674068767e-05, "loss": 0.0694, "step": 27729 }, { "epoch": 0.6110385782831205, "grad_norm": 0.7601064443588257, "learning_rate": 1.040895450310001e-05, "loss": 0.0672, "step": 27730 }, { "epoch": 0.6110606135726365, "grad_norm": 0.7303980588912964, "learning_rate": 1.0407935355515904e-05, "loss": 0.0874, "step": 27731 }, { "epoch": 0.6110826488621527, "grad_norm": 0.6345312595367432, "learning_rate": 1.0406916231321653e-05, "loss": 0.0664, "step": 27732 }, { "epoch": 0.6111046841516689, "grad_norm": 0.26659074425697327, "learning_rate": 1.040589713052244e-05, "loss": 0.0428, "step": 27733 }, { "epoch": 0.611126719441185, "grad_norm": 0.4266596734523773, "learning_rate": 1.0404878053123449e-05, "loss": 0.0727, "step": 27734 }, { "epoch": 0.6111487547307012, "grad_norm": 0.6967199444770813, "learning_rate": 1.0403858999129887e-05, "loss": 0.0688, "step": 27735 }, { "epoch": 0.6111707900202173, "grad_norm": 0.5619440674781799, "learning_rate": 1.0402839968546935e-05, "loss": 0.0614, "step": 27736 }, { "epoch": 0.6111928253097335, "grad_norm": 0.49572497606277466, "learning_rate": 1.0401820961379784e-05, "loss": 0.069, "step": 27737 }, { "epoch": 0.6112148605992497, "grad_norm": 0.9755753874778748, "learning_rate": 1.0400801977633623e-05, "loss": 0.0788, "step": 27738 }, { "epoch": 0.6112368958887658, "grad_norm": 0.3220522701740265, "learning_rate": 1.0399783017313643e-05, "loss": 0.0534, "step": 27739 }, { "epoch": 0.611258931178282, "grad_norm": 1.0160975456237793, "learning_rate": 1.0398764080425039e-05, "loss": 0.0548, "step": 27740 }, { "epoch": 0.6112809664677982, "grad_norm": 0.40412938594818115, "learning_rate": 1.0397745166972995e-05, "loss": 0.054, "step": 27741 }, { "epoch": 0.6113030017573143, "grad_norm": 0.43412718176841736, "learning_rate": 1.0396726276962695e-05, "loss": 0.0828, "step": 27742 }, { "epoch": 0.6113250370468305, "grad_norm": 0.7888892889022827, "learning_rate": 1.0395707410399345e-05, "loss": 0.07, "step": 27743 }, { "epoch": 0.6113470723363467, "grad_norm": 0.4236994683742523, "learning_rate": 1.0394688567288125e-05, "loss": 0.0643, "step": 27744 }, { "epoch": 0.6113691076258628, "grad_norm": 0.5114601254463196, "learning_rate": 1.0393669747634228e-05, "loss": 0.0543, "step": 27745 }, { "epoch": 0.611391142915379, "grad_norm": 0.5198055505752563, "learning_rate": 1.0392650951442836e-05, "loss": 0.0797, "step": 27746 }, { "epoch": 0.6114131782048952, "grad_norm": 0.9842044711112976, "learning_rate": 1.0391632178719146e-05, "loss": 0.1089, "step": 27747 }, { "epoch": 0.6114352134944113, "grad_norm": 1.1003375053405762, "learning_rate": 1.0390613429468344e-05, "loss": 0.0663, "step": 27748 }, { "epoch": 0.6114572487839275, "grad_norm": 0.6837799549102783, "learning_rate": 1.0389594703695621e-05, "loss": 0.0702, "step": 27749 }, { "epoch": 0.6114792840734437, "grad_norm": 0.9929779171943665, "learning_rate": 1.038857600140616e-05, "loss": 0.0862, "step": 27750 }, { "epoch": 0.6115013193629598, "grad_norm": 0.388212651014328, "learning_rate": 1.0387557322605159e-05, "loss": 0.0589, "step": 27751 }, { "epoch": 0.611523354652476, "grad_norm": 0.8316110968589783, "learning_rate": 1.03865386672978e-05, "loss": 0.0722, "step": 27752 }, { "epoch": 0.6115453899419921, "grad_norm": 0.6318506002426147, "learning_rate": 1.0385520035489275e-05, "loss": 0.0595, "step": 27753 }, { "epoch": 0.6115674252315083, "grad_norm": 0.9235460162162781, "learning_rate": 1.0384501427184768e-05, "loss": 0.0816, "step": 27754 }, { "epoch": 0.6115894605210245, "grad_norm": 0.5622523427009583, "learning_rate": 1.0383482842389474e-05, "loss": 0.0698, "step": 27755 }, { "epoch": 0.6116114958105405, "grad_norm": 0.3938816487789154, "learning_rate": 1.0382464281108574e-05, "loss": 0.0511, "step": 27756 }, { "epoch": 0.6116335311000567, "grad_norm": 0.4031446576118469, "learning_rate": 1.0381445743347256e-05, "loss": 0.065, "step": 27757 }, { "epoch": 0.6116555663895729, "grad_norm": 0.786758542060852, "learning_rate": 1.0380427229110718e-05, "loss": 0.0703, "step": 27758 }, { "epoch": 0.611677601679089, "grad_norm": 1.1564440727233887, "learning_rate": 1.0379408738404143e-05, "loss": 0.0775, "step": 27759 }, { "epoch": 0.6116996369686052, "grad_norm": 0.7315996289253235, "learning_rate": 1.0378390271232714e-05, "loss": 0.0404, "step": 27760 }, { "epoch": 0.6117216722581214, "grad_norm": 0.572909951210022, "learning_rate": 1.0377371827601619e-05, "loss": 0.0561, "step": 27761 }, { "epoch": 0.6117437075476375, "grad_norm": 0.5917204022407532, "learning_rate": 1.0376353407516051e-05, "loss": 0.0514, "step": 27762 }, { "epoch": 0.6117657428371537, "grad_norm": 0.5526331067085266, "learning_rate": 1.0375335010981196e-05, "loss": 0.0645, "step": 27763 }, { "epoch": 0.6117877781266698, "grad_norm": 0.49699220061302185, "learning_rate": 1.037431663800224e-05, "loss": 0.0543, "step": 27764 }, { "epoch": 0.611809813416186, "grad_norm": 0.9500656127929688, "learning_rate": 1.0373298288584363e-05, "loss": 0.0863, "step": 27765 }, { "epoch": 0.6118318487057022, "grad_norm": 0.4309259057044983, "learning_rate": 1.0372279962732764e-05, "loss": 0.0815, "step": 27766 }, { "epoch": 0.6118538839952183, "grad_norm": 0.6861669421195984, "learning_rate": 1.0371261660452626e-05, "loss": 0.0438, "step": 27767 }, { "epoch": 0.6118759192847345, "grad_norm": 0.5657063126564026, "learning_rate": 1.0370243381749134e-05, "loss": 0.0595, "step": 27768 }, { "epoch": 0.6118979545742507, "grad_norm": 0.31000033020973206, "learning_rate": 1.036922512662747e-05, "loss": 0.0634, "step": 27769 }, { "epoch": 0.6119199898637668, "grad_norm": 0.5227342247962952, "learning_rate": 1.0368206895092833e-05, "loss": 0.0908, "step": 27770 }, { "epoch": 0.611942025153283, "grad_norm": 0.5020371675491333, "learning_rate": 1.0367188687150398e-05, "loss": 0.0651, "step": 27771 }, { "epoch": 0.6119640604427992, "grad_norm": 0.38419973850250244, "learning_rate": 1.036617050280536e-05, "loss": 0.0493, "step": 27772 }, { "epoch": 0.6119860957323153, "grad_norm": 0.49910932779312134, "learning_rate": 1.036515234206289e-05, "loss": 0.0699, "step": 27773 }, { "epoch": 0.6120081310218315, "grad_norm": 0.41393956542015076, "learning_rate": 1.0364134204928191e-05, "loss": 0.0579, "step": 27774 }, { "epoch": 0.6120301663113477, "grad_norm": 0.620244026184082, "learning_rate": 1.0363116091406444e-05, "loss": 0.0833, "step": 27775 }, { "epoch": 0.6120522016008638, "grad_norm": 0.5947390198707581, "learning_rate": 1.0362098001502833e-05, "loss": 0.0601, "step": 27776 }, { "epoch": 0.61207423689038, "grad_norm": 0.46562665700912476, "learning_rate": 1.0361079935222539e-05, "loss": 0.0614, "step": 27777 }, { "epoch": 0.6120962721798962, "grad_norm": 0.5251209139823914, "learning_rate": 1.0360061892570759e-05, "loss": 0.0923, "step": 27778 }, { "epoch": 0.6121183074694123, "grad_norm": 0.980787456035614, "learning_rate": 1.035904387355267e-05, "loss": 0.0889, "step": 27779 }, { "epoch": 0.6121403427589284, "grad_norm": 0.9408190250396729, "learning_rate": 1.0358025878173453e-05, "loss": 0.0883, "step": 27780 }, { "epoch": 0.6121623780484445, "grad_norm": 1.1074779033660889, "learning_rate": 1.0357007906438305e-05, "loss": 0.0804, "step": 27781 }, { "epoch": 0.6121844133379607, "grad_norm": 0.596882700920105, "learning_rate": 1.0355989958352406e-05, "loss": 0.0734, "step": 27782 }, { "epoch": 0.6122064486274769, "grad_norm": 0.7313040494918823, "learning_rate": 1.0354972033920942e-05, "loss": 0.0631, "step": 27783 }, { "epoch": 0.612228483916993, "grad_norm": 0.6949928998947144, "learning_rate": 1.0353954133149092e-05, "loss": 0.0596, "step": 27784 }, { "epoch": 0.6122505192065092, "grad_norm": 0.618651807308197, "learning_rate": 1.0352936256042047e-05, "loss": 0.0673, "step": 27785 }, { "epoch": 0.6122725544960254, "grad_norm": 0.5519576668739319, "learning_rate": 1.0351918402604991e-05, "loss": 0.0703, "step": 27786 }, { "epoch": 0.6122945897855415, "grad_norm": 0.49255287647247314, "learning_rate": 1.0350900572843106e-05, "loss": 0.0684, "step": 27787 }, { "epoch": 0.6123166250750577, "grad_norm": 0.8179574012756348, "learning_rate": 1.0349882766761573e-05, "loss": 0.0603, "step": 27788 }, { "epoch": 0.6123386603645739, "grad_norm": 0.4543008506298065, "learning_rate": 1.0348864984365586e-05, "loss": 0.09, "step": 27789 }, { "epoch": 0.61236069565409, "grad_norm": 0.6904951333999634, "learning_rate": 1.0347847225660324e-05, "loss": 0.0754, "step": 27790 }, { "epoch": 0.6123827309436062, "grad_norm": 0.6092172265052795, "learning_rate": 1.034682949065097e-05, "loss": 0.0615, "step": 27791 }, { "epoch": 0.6124047662331223, "grad_norm": 0.4917641580104828, "learning_rate": 1.0345811779342706e-05, "loss": 0.0551, "step": 27792 }, { "epoch": 0.6124268015226385, "grad_norm": 0.39681699872016907, "learning_rate": 1.034479409174072e-05, "loss": 0.0301, "step": 27793 }, { "epoch": 0.6124488368121547, "grad_norm": 0.48664313554763794, "learning_rate": 1.0343776427850196e-05, "loss": 0.055, "step": 27794 }, { "epoch": 0.6124708721016708, "grad_norm": 0.5619152784347534, "learning_rate": 1.0342758787676316e-05, "loss": 0.0882, "step": 27795 }, { "epoch": 0.612492907391187, "grad_norm": 0.52338045835495, "learning_rate": 1.0341741171224254e-05, "loss": 0.0627, "step": 27796 }, { "epoch": 0.6125149426807032, "grad_norm": 0.8092452883720398, "learning_rate": 1.0340723578499212e-05, "loss": 0.0912, "step": 27797 }, { "epoch": 0.6125369779702193, "grad_norm": 0.8564836382865906, "learning_rate": 1.033970600950636e-05, "loss": 0.0766, "step": 27798 }, { "epoch": 0.6125590132597355, "grad_norm": 0.8050920963287354, "learning_rate": 1.0338688464250886e-05, "loss": 0.0851, "step": 27799 }, { "epoch": 0.6125810485492517, "grad_norm": 0.30363938212394714, "learning_rate": 1.0337670942737967e-05, "loss": 0.0757, "step": 27800 }, { "epoch": 0.6126030838387678, "grad_norm": 0.6876044273376465, "learning_rate": 1.0336653444972794e-05, "loss": 0.0756, "step": 27801 }, { "epoch": 0.612625119128284, "grad_norm": 0.8820897340774536, "learning_rate": 1.0335635970960546e-05, "loss": 0.0922, "step": 27802 }, { "epoch": 0.6126471544178002, "grad_norm": 0.6414750218391418, "learning_rate": 1.0334618520706396e-05, "loss": 0.0571, "step": 27803 }, { "epoch": 0.6126691897073163, "grad_norm": 0.6843357086181641, "learning_rate": 1.0333601094215547e-05, "loss": 0.0611, "step": 27804 }, { "epoch": 0.6126912249968324, "grad_norm": 0.36438000202178955, "learning_rate": 1.0332583691493167e-05, "loss": 0.0683, "step": 27805 }, { "epoch": 0.6127132602863485, "grad_norm": 0.6177613735198975, "learning_rate": 1.033156631254444e-05, "loss": 0.0831, "step": 27806 }, { "epoch": 0.6127352955758647, "grad_norm": 0.646679699420929, "learning_rate": 1.0330548957374549e-05, "loss": 0.0756, "step": 27807 }, { "epoch": 0.6127573308653809, "grad_norm": 0.439628005027771, "learning_rate": 1.0329531625988678e-05, "loss": 0.0693, "step": 27808 }, { "epoch": 0.612779366154897, "grad_norm": 0.575124204158783, "learning_rate": 1.0328514318392007e-05, "loss": 0.0713, "step": 27809 }, { "epoch": 0.6128014014444132, "grad_norm": 0.5966833233833313, "learning_rate": 1.0327497034589718e-05, "loss": 0.0738, "step": 27810 }, { "epoch": 0.6128234367339294, "grad_norm": 0.5061553120613098, "learning_rate": 1.0326479774586984e-05, "loss": 0.0337, "step": 27811 }, { "epoch": 0.6128454720234455, "grad_norm": 0.6187899708747864, "learning_rate": 1.0325462538389002e-05, "loss": 0.0814, "step": 27812 }, { "epoch": 0.6128675073129617, "grad_norm": 0.34189507365226746, "learning_rate": 1.0324445326000947e-05, "loss": 0.0728, "step": 27813 }, { "epoch": 0.6128895426024779, "grad_norm": 0.5011139512062073, "learning_rate": 1.0323428137427997e-05, "loss": 0.07, "step": 27814 }, { "epoch": 0.612911577891994, "grad_norm": 0.3806522786617279, "learning_rate": 1.0322410972675333e-05, "loss": 0.0681, "step": 27815 }, { "epoch": 0.6129336131815102, "grad_norm": 0.5803001523017883, "learning_rate": 1.0321393831748143e-05, "loss": 0.073, "step": 27816 }, { "epoch": 0.6129556484710263, "grad_norm": 0.5764724612236023, "learning_rate": 1.0320376714651602e-05, "loss": 0.0647, "step": 27817 }, { "epoch": 0.6129776837605425, "grad_norm": 0.7280099391937256, "learning_rate": 1.0319359621390893e-05, "loss": 0.0667, "step": 27818 }, { "epoch": 0.6129997190500587, "grad_norm": 1.0155272483825684, "learning_rate": 1.0318342551971187e-05, "loss": 0.1061, "step": 27819 }, { "epoch": 0.6130217543395748, "grad_norm": 0.49068301916122437, "learning_rate": 1.0317325506397679e-05, "loss": 0.057, "step": 27820 }, { "epoch": 0.613043789629091, "grad_norm": 0.5936392545700073, "learning_rate": 1.0316308484675546e-05, "loss": 0.051, "step": 27821 }, { "epoch": 0.6130658249186072, "grad_norm": 0.5500221252441406, "learning_rate": 1.0315291486809962e-05, "loss": 0.0911, "step": 27822 }, { "epoch": 0.6130878602081233, "grad_norm": 0.4802442789077759, "learning_rate": 1.031427451280611e-05, "loss": 0.0462, "step": 27823 }, { "epoch": 0.6131098954976395, "grad_norm": 0.8746041059494019, "learning_rate": 1.0313257562669173e-05, "loss": 0.0814, "step": 27824 }, { "epoch": 0.6131319307871557, "grad_norm": 0.6492656469345093, "learning_rate": 1.031224063640433e-05, "loss": 0.0762, "step": 27825 }, { "epoch": 0.6131539660766718, "grad_norm": 0.5101556181907654, "learning_rate": 1.031122373401675e-05, "loss": 0.0703, "step": 27826 }, { "epoch": 0.613176001366188, "grad_norm": 0.4940117597579956, "learning_rate": 1.031020685551163e-05, "loss": 0.0613, "step": 27827 }, { "epoch": 0.6131980366557042, "grad_norm": 0.5260211825370789, "learning_rate": 1.0309190000894142e-05, "loss": 0.0604, "step": 27828 }, { "epoch": 0.6132200719452203, "grad_norm": 0.4187453091144562, "learning_rate": 1.0308173170169461e-05, "loss": 0.0609, "step": 27829 }, { "epoch": 0.6132421072347364, "grad_norm": 0.38958895206451416, "learning_rate": 1.0307156363342772e-05, "loss": 0.0433, "step": 27830 }, { "epoch": 0.6132641425242525, "grad_norm": 0.5711638331413269, "learning_rate": 1.0306139580419252e-05, "loss": 0.0545, "step": 27831 }, { "epoch": 0.6132861778137687, "grad_norm": 0.5939701795578003, "learning_rate": 1.030512282140408e-05, "loss": 0.0657, "step": 27832 }, { "epoch": 0.6133082131032849, "grad_norm": 0.5271154046058655, "learning_rate": 1.0304106086302437e-05, "loss": 0.0726, "step": 27833 }, { "epoch": 0.613330248392801, "grad_norm": 0.49910488724708557, "learning_rate": 1.0303089375119492e-05, "loss": 0.0488, "step": 27834 }, { "epoch": 0.6133522836823172, "grad_norm": 0.6520431637763977, "learning_rate": 1.030207268786044e-05, "loss": 0.0695, "step": 27835 }, { "epoch": 0.6133743189718334, "grad_norm": 0.6412220001220703, "learning_rate": 1.0301056024530449e-05, "loss": 0.0769, "step": 27836 }, { "epoch": 0.6133963542613495, "grad_norm": 0.8735799193382263, "learning_rate": 1.0300039385134698e-05, "loss": 0.0891, "step": 27837 }, { "epoch": 0.6134183895508657, "grad_norm": 1.0639103651046753, "learning_rate": 1.0299022769678362e-05, "loss": 0.1024, "step": 27838 }, { "epoch": 0.6134404248403819, "grad_norm": 1.075481653213501, "learning_rate": 1.029800617816663e-05, "loss": 0.1134, "step": 27839 }, { "epoch": 0.613462460129898, "grad_norm": 0.8883654475212097, "learning_rate": 1.0296989610604677e-05, "loss": 0.0946, "step": 27840 }, { "epoch": 0.6134844954194142, "grad_norm": 0.421979159116745, "learning_rate": 1.0295973066997677e-05, "loss": 0.0515, "step": 27841 }, { "epoch": 0.6135065307089304, "grad_norm": 0.6694885492324829, "learning_rate": 1.02949565473508e-05, "loss": 0.1062, "step": 27842 }, { "epoch": 0.6135285659984465, "grad_norm": 0.6313768029212952, "learning_rate": 1.029394005166924e-05, "loss": 0.0755, "step": 27843 }, { "epoch": 0.6135506012879627, "grad_norm": 0.48176148533821106, "learning_rate": 1.0292923579958166e-05, "loss": 0.0676, "step": 27844 }, { "epoch": 0.6135726365774788, "grad_norm": 0.6078301668167114, "learning_rate": 1.029190713222275e-05, "loss": 0.0638, "step": 27845 }, { "epoch": 0.613594671866995, "grad_norm": 0.6327294111251831, "learning_rate": 1.0290890708468182e-05, "loss": 0.0752, "step": 27846 }, { "epoch": 0.6136167071565112, "grad_norm": 0.7296442985534668, "learning_rate": 1.0289874308699633e-05, "loss": 0.0754, "step": 27847 }, { "epoch": 0.6136387424460273, "grad_norm": 0.6048155426979065, "learning_rate": 1.0288857932922281e-05, "loss": 0.0894, "step": 27848 }, { "epoch": 0.6136607777355435, "grad_norm": 0.5862122178077698, "learning_rate": 1.0287841581141296e-05, "loss": 0.0692, "step": 27849 }, { "epoch": 0.6136828130250597, "grad_norm": 0.6538885831832886, "learning_rate": 1.0286825253361867e-05, "loss": 0.0545, "step": 27850 }, { "epoch": 0.6137048483145758, "grad_norm": 0.9338893294334412, "learning_rate": 1.0285808949589161e-05, "loss": 0.068, "step": 27851 }, { "epoch": 0.613726883604092, "grad_norm": 0.6515185236930847, "learning_rate": 1.0284792669828362e-05, "loss": 0.0854, "step": 27852 }, { "epoch": 0.6137489188936082, "grad_norm": 0.49298155307769775, "learning_rate": 1.0283776414084634e-05, "loss": 0.0661, "step": 27853 }, { "epoch": 0.6137709541831243, "grad_norm": 0.5828709006309509, "learning_rate": 1.0282760182363168e-05, "loss": 0.058, "step": 27854 }, { "epoch": 0.6137929894726404, "grad_norm": 0.5742314457893372, "learning_rate": 1.0281743974669135e-05, "loss": 0.0867, "step": 27855 }, { "epoch": 0.6138150247621565, "grad_norm": 0.7071617841720581, "learning_rate": 1.0280727791007707e-05, "loss": 0.104, "step": 27856 }, { "epoch": 0.6138370600516727, "grad_norm": 1.0138518810272217, "learning_rate": 1.027971163138406e-05, "loss": 0.0596, "step": 27857 }, { "epoch": 0.6138590953411889, "grad_norm": 0.6212118268013, "learning_rate": 1.027869549580338e-05, "loss": 0.0449, "step": 27858 }, { "epoch": 0.613881130630705, "grad_norm": 0.806391179561615, "learning_rate": 1.0277679384270834e-05, "loss": 0.0749, "step": 27859 }, { "epoch": 0.6139031659202212, "grad_norm": 0.9202309846878052, "learning_rate": 1.02766632967916e-05, "loss": 0.1048, "step": 27860 }, { "epoch": 0.6139252012097374, "grad_norm": 0.5187145471572876, "learning_rate": 1.0275647233370842e-05, "loss": 0.0544, "step": 27861 }, { "epoch": 0.6139472364992535, "grad_norm": 0.843611478805542, "learning_rate": 1.0274631194013757e-05, "loss": 0.0846, "step": 27862 }, { "epoch": 0.6139692717887697, "grad_norm": 0.7799819111824036, "learning_rate": 1.0273615178725509e-05, "loss": 0.0792, "step": 27863 }, { "epoch": 0.6139913070782859, "grad_norm": 0.4330635070800781, "learning_rate": 1.0272599187511273e-05, "loss": 0.0371, "step": 27864 }, { "epoch": 0.614013342367802, "grad_norm": 0.6321922540664673, "learning_rate": 1.0271583220376218e-05, "loss": 0.0766, "step": 27865 }, { "epoch": 0.6140353776573182, "grad_norm": 0.34046393632888794, "learning_rate": 1.0270567277325532e-05, "loss": 0.0547, "step": 27866 }, { "epoch": 0.6140574129468344, "grad_norm": 0.8634783625602722, "learning_rate": 1.0269551358364382e-05, "loss": 0.0852, "step": 27867 }, { "epoch": 0.6140794482363505, "grad_norm": 0.7374686002731323, "learning_rate": 1.0268535463497937e-05, "loss": 0.0776, "step": 27868 }, { "epoch": 0.6141014835258667, "grad_norm": 0.6006520986557007, "learning_rate": 1.0267519592731383e-05, "loss": 0.0574, "step": 27869 }, { "epoch": 0.6141235188153829, "grad_norm": 1.0308042764663696, "learning_rate": 1.0266503746069892e-05, "loss": 0.1023, "step": 27870 }, { "epoch": 0.614145554104899, "grad_norm": 0.4644821286201477, "learning_rate": 1.0265487923518635e-05, "loss": 0.0691, "step": 27871 }, { "epoch": 0.6141675893944152, "grad_norm": 0.31451472640037537, "learning_rate": 1.0264472125082783e-05, "loss": 0.0464, "step": 27872 }, { "epoch": 0.6141896246839313, "grad_norm": 0.5745331048965454, "learning_rate": 1.0263456350767516e-05, "loss": 0.0559, "step": 27873 }, { "epoch": 0.6142116599734475, "grad_norm": 0.6460068225860596, "learning_rate": 1.0262440600578008e-05, "loss": 0.0803, "step": 27874 }, { "epoch": 0.6142336952629637, "grad_norm": 0.3629221022129059, "learning_rate": 1.026142487451943e-05, "loss": 0.0423, "step": 27875 }, { "epoch": 0.6142557305524798, "grad_norm": 0.48764702677726746, "learning_rate": 1.026040917259695e-05, "loss": 0.0486, "step": 27876 }, { "epoch": 0.614277765841996, "grad_norm": 0.7298341989517212, "learning_rate": 1.0259393494815753e-05, "loss": 0.0723, "step": 27877 }, { "epoch": 0.6142998011315122, "grad_norm": 1.0537030696868896, "learning_rate": 1.0258377841181005e-05, "loss": 0.1229, "step": 27878 }, { "epoch": 0.6143218364210282, "grad_norm": 0.6460652351379395, "learning_rate": 1.0257362211697884e-05, "loss": 0.065, "step": 27879 }, { "epoch": 0.6143438717105444, "grad_norm": 0.5733794569969177, "learning_rate": 1.0256346606371559e-05, "loss": 0.0609, "step": 27880 }, { "epoch": 0.6143659070000606, "grad_norm": 0.585905909538269, "learning_rate": 1.0255331025207206e-05, "loss": 0.0713, "step": 27881 }, { "epoch": 0.6143879422895767, "grad_norm": 0.5981658101081848, "learning_rate": 1.0254315468209995e-05, "loss": 0.0714, "step": 27882 }, { "epoch": 0.6144099775790929, "grad_norm": 0.41781431436538696, "learning_rate": 1.0253299935385102e-05, "loss": 0.0638, "step": 27883 }, { "epoch": 0.614432012868609, "grad_norm": 0.28955233097076416, "learning_rate": 1.0252284426737689e-05, "loss": 0.0701, "step": 27884 }, { "epoch": 0.6144540481581252, "grad_norm": 0.6526974439620972, "learning_rate": 1.0251268942272948e-05, "loss": 0.0716, "step": 27885 }, { "epoch": 0.6144760834476414, "grad_norm": 0.7973842620849609, "learning_rate": 1.0250253481996037e-05, "loss": 0.0593, "step": 27886 }, { "epoch": 0.6144981187371575, "grad_norm": 0.5047240257263184, "learning_rate": 1.0249238045912132e-05, "loss": 0.0676, "step": 27887 }, { "epoch": 0.6145201540266737, "grad_norm": 0.8250647187232971, "learning_rate": 1.0248222634026406e-05, "loss": 0.0957, "step": 27888 }, { "epoch": 0.6145421893161899, "grad_norm": 0.6000949144363403, "learning_rate": 1.0247207246344029e-05, "loss": 0.0532, "step": 27889 }, { "epoch": 0.614564224605706, "grad_norm": 0.34334784746170044, "learning_rate": 1.0246191882870175e-05, "loss": 0.0458, "step": 27890 }, { "epoch": 0.6145862598952222, "grad_norm": 0.557317316532135, "learning_rate": 1.024517654361001e-05, "loss": 0.0927, "step": 27891 }, { "epoch": 0.6146082951847384, "grad_norm": 0.9846892356872559, "learning_rate": 1.0244161228568716e-05, "loss": 0.0807, "step": 27892 }, { "epoch": 0.6146303304742545, "grad_norm": 0.5235311985015869, "learning_rate": 1.024314593775146e-05, "loss": 0.0593, "step": 27893 }, { "epoch": 0.6146523657637707, "grad_norm": 0.5799112319946289, "learning_rate": 1.0242130671163412e-05, "loss": 0.0676, "step": 27894 }, { "epoch": 0.6146744010532869, "grad_norm": 0.6187227964401245, "learning_rate": 1.024111542880974e-05, "loss": 0.0807, "step": 27895 }, { "epoch": 0.614696436342803, "grad_norm": 0.7884029746055603, "learning_rate": 1.0240100210695623e-05, "loss": 0.0588, "step": 27896 }, { "epoch": 0.6147184716323192, "grad_norm": 0.4861750304698944, "learning_rate": 1.0239085016826227e-05, "loss": 0.0476, "step": 27897 }, { "epoch": 0.6147405069218354, "grad_norm": 0.6708417534828186, "learning_rate": 1.0238069847206728e-05, "loss": 0.0896, "step": 27898 }, { "epoch": 0.6147625422113515, "grad_norm": 0.573625922203064, "learning_rate": 1.023705470184228e-05, "loss": 0.0517, "step": 27899 }, { "epoch": 0.6147845775008677, "grad_norm": 0.711025059223175, "learning_rate": 1.0236039580738079e-05, "loss": 0.0904, "step": 27900 }, { "epoch": 0.6148066127903838, "grad_norm": 0.9137370586395264, "learning_rate": 1.023502448389928e-05, "loss": 0.0989, "step": 27901 }, { "epoch": 0.6148286480799, "grad_norm": 0.4048032760620117, "learning_rate": 1.0234009411331056e-05, "loss": 0.0361, "step": 27902 }, { "epoch": 0.6148506833694162, "grad_norm": 0.5630102157592773, "learning_rate": 1.0232994363038574e-05, "loss": 0.1051, "step": 27903 }, { "epoch": 0.6148727186589322, "grad_norm": 0.24427758157253265, "learning_rate": 1.0231979339027013e-05, "loss": 0.0629, "step": 27904 }, { "epoch": 0.6148947539484484, "grad_norm": 0.3020617961883545, "learning_rate": 1.0230964339301538e-05, "loss": 0.0602, "step": 27905 }, { "epoch": 0.6149167892379646, "grad_norm": 0.9727991819381714, "learning_rate": 1.022994936386732e-05, "loss": 0.0957, "step": 27906 }, { "epoch": 0.6149388245274807, "grad_norm": 0.7170429825782776, "learning_rate": 1.022893441272952e-05, "loss": 0.0619, "step": 27907 }, { "epoch": 0.6149608598169969, "grad_norm": 0.8217720985412598, "learning_rate": 1.0227919485893321e-05, "loss": 0.0746, "step": 27908 }, { "epoch": 0.614982895106513, "grad_norm": 0.4001714587211609, "learning_rate": 1.0226904583363888e-05, "loss": 0.079, "step": 27909 }, { "epoch": 0.6150049303960292, "grad_norm": 0.6681176424026489, "learning_rate": 1.022588970514639e-05, "loss": 0.0594, "step": 27910 }, { "epoch": 0.6150269656855454, "grad_norm": 0.6270403265953064, "learning_rate": 1.0224874851245994e-05, "loss": 0.065, "step": 27911 }, { "epoch": 0.6150490009750615, "grad_norm": 0.3985772728919983, "learning_rate": 1.0223860021667871e-05, "loss": 0.0576, "step": 27912 }, { "epoch": 0.6150710362645777, "grad_norm": 0.409403532743454, "learning_rate": 1.022284521641719e-05, "loss": 0.0647, "step": 27913 }, { "epoch": 0.6150930715540939, "grad_norm": 0.48629769682884216, "learning_rate": 1.0221830435499116e-05, "loss": 0.0526, "step": 27914 }, { "epoch": 0.61511510684361, "grad_norm": 0.4341413676738739, "learning_rate": 1.0220815678918827e-05, "loss": 0.0633, "step": 27915 }, { "epoch": 0.6151371421331262, "grad_norm": 0.6365233063697815, "learning_rate": 1.0219800946681488e-05, "loss": 0.0852, "step": 27916 }, { "epoch": 0.6151591774226424, "grad_norm": 0.5469187498092651, "learning_rate": 1.0218786238792264e-05, "loss": 0.0502, "step": 27917 }, { "epoch": 0.6151812127121585, "grad_norm": 0.5518983602523804, "learning_rate": 1.0217771555256322e-05, "loss": 0.062, "step": 27918 }, { "epoch": 0.6152032480016747, "grad_norm": 0.6432774066925049, "learning_rate": 1.021675689607884e-05, "loss": 0.0544, "step": 27919 }, { "epoch": 0.6152252832911909, "grad_norm": 0.30111706256866455, "learning_rate": 1.0215742261264977e-05, "loss": 0.0835, "step": 27920 }, { "epoch": 0.615247318580707, "grad_norm": 0.5519589781761169, "learning_rate": 1.0214727650819906e-05, "loss": 0.0607, "step": 27921 }, { "epoch": 0.6152693538702232, "grad_norm": 0.690156877040863, "learning_rate": 1.0213713064748787e-05, "loss": 0.096, "step": 27922 }, { "epoch": 0.6152913891597394, "grad_norm": 0.5095083117485046, "learning_rate": 1.02126985030568e-05, "loss": 0.0515, "step": 27923 }, { "epoch": 0.6153134244492555, "grad_norm": 0.44376009702682495, "learning_rate": 1.0211683965749105e-05, "loss": 0.0725, "step": 27924 }, { "epoch": 0.6153354597387717, "grad_norm": 0.45380762219429016, "learning_rate": 1.0210669452830875e-05, "loss": 0.078, "step": 27925 }, { "epoch": 0.6153574950282878, "grad_norm": 0.38069841265678406, "learning_rate": 1.0209654964307268e-05, "loss": 0.0578, "step": 27926 }, { "epoch": 0.615379530317804, "grad_norm": 0.692473828792572, "learning_rate": 1.0208640500183459e-05, "loss": 0.1041, "step": 27927 }, { "epoch": 0.6154015656073202, "grad_norm": 0.5081688165664673, "learning_rate": 1.0207626060464615e-05, "loss": 0.0515, "step": 27928 }, { "epoch": 0.6154236008968362, "grad_norm": 0.7528545260429382, "learning_rate": 1.0206611645155902e-05, "loss": 0.0607, "step": 27929 }, { "epoch": 0.6154456361863524, "grad_norm": 0.4338120222091675, "learning_rate": 1.0205597254262478e-05, "loss": 0.072, "step": 27930 }, { "epoch": 0.6154676714758686, "grad_norm": 0.5128802061080933, "learning_rate": 1.0204582887789528e-05, "loss": 0.0444, "step": 27931 }, { "epoch": 0.6154897067653847, "grad_norm": 0.7313927412033081, "learning_rate": 1.0203568545742204e-05, "loss": 0.0711, "step": 27932 }, { "epoch": 0.6155117420549009, "grad_norm": 0.4362621605396271, "learning_rate": 1.0202554228125682e-05, "loss": 0.0805, "step": 27933 }, { "epoch": 0.615533777344417, "grad_norm": 0.5783857107162476, "learning_rate": 1.0201539934945117e-05, "loss": 0.065, "step": 27934 }, { "epoch": 0.6155558126339332, "grad_norm": 0.8108988404273987, "learning_rate": 1.0200525666205687e-05, "loss": 0.0766, "step": 27935 }, { "epoch": 0.6155778479234494, "grad_norm": 0.7962053418159485, "learning_rate": 1.0199511421912555e-05, "loss": 0.0637, "step": 27936 }, { "epoch": 0.6155998832129655, "grad_norm": 0.5968043208122253, "learning_rate": 1.0198497202070878e-05, "loss": 0.066, "step": 27937 }, { "epoch": 0.6156219185024817, "grad_norm": 0.6775868535041809, "learning_rate": 1.0197483006685833e-05, "loss": 0.049, "step": 27938 }, { "epoch": 0.6156439537919979, "grad_norm": 0.6131284236907959, "learning_rate": 1.0196468835762585e-05, "loss": 0.055, "step": 27939 }, { "epoch": 0.615665989081514, "grad_norm": 0.5192201733589172, "learning_rate": 1.01954546893063e-05, "loss": 0.0542, "step": 27940 }, { "epoch": 0.6156880243710302, "grad_norm": 0.559158980846405, "learning_rate": 1.0194440567322134e-05, "loss": 0.0641, "step": 27941 }, { "epoch": 0.6157100596605464, "grad_norm": 0.41780683398246765, "learning_rate": 1.0193426469815263e-05, "loss": 0.0629, "step": 27942 }, { "epoch": 0.6157320949500625, "grad_norm": 0.7115435004234314, "learning_rate": 1.0192412396790846e-05, "loss": 0.0917, "step": 27943 }, { "epoch": 0.6157541302395787, "grad_norm": 0.6049517393112183, "learning_rate": 1.0191398348254054e-05, "loss": 0.0907, "step": 27944 }, { "epoch": 0.6157761655290949, "grad_norm": 0.9267072677612305, "learning_rate": 1.0190384324210042e-05, "loss": 0.0713, "step": 27945 }, { "epoch": 0.615798200818611, "grad_norm": 0.6578006148338318, "learning_rate": 1.0189370324663987e-05, "loss": 0.0706, "step": 27946 }, { "epoch": 0.6158202361081272, "grad_norm": 0.4903297424316406, "learning_rate": 1.0188356349621048e-05, "loss": 0.0473, "step": 27947 }, { "epoch": 0.6158422713976434, "grad_norm": 0.7589625120162964, "learning_rate": 1.0187342399086393e-05, "loss": 0.1057, "step": 27948 }, { "epoch": 0.6158643066871595, "grad_norm": 0.8309835195541382, "learning_rate": 1.0186328473065175e-05, "loss": 0.0782, "step": 27949 }, { "epoch": 0.6158863419766757, "grad_norm": 0.5918761491775513, "learning_rate": 1.0185314571562577e-05, "loss": 0.0519, "step": 27950 }, { "epoch": 0.6159083772661919, "grad_norm": 0.7166769504547119, "learning_rate": 1.018430069458375e-05, "loss": 0.0701, "step": 27951 }, { "epoch": 0.615930412555708, "grad_norm": 0.5916315913200378, "learning_rate": 1.0183286842133866e-05, "loss": 0.083, "step": 27952 }, { "epoch": 0.6159524478452241, "grad_norm": 0.5103068947792053, "learning_rate": 1.0182273014218077e-05, "loss": 0.0461, "step": 27953 }, { "epoch": 0.6159744831347402, "grad_norm": 0.863844633102417, "learning_rate": 1.0181259210841559e-05, "loss": 0.069, "step": 27954 }, { "epoch": 0.6159965184242564, "grad_norm": 0.589802622795105, "learning_rate": 1.0180245432009474e-05, "loss": 0.0887, "step": 27955 }, { "epoch": 0.6160185537137726, "grad_norm": 0.7011656761169434, "learning_rate": 1.0179231677726984e-05, "loss": 0.0622, "step": 27956 }, { "epoch": 0.6160405890032887, "grad_norm": 0.677167534828186, "learning_rate": 1.0178217947999242e-05, "loss": 0.0899, "step": 27957 }, { "epoch": 0.6160626242928049, "grad_norm": 0.38062021136283875, "learning_rate": 1.0177204242831433e-05, "loss": 0.0421, "step": 27958 }, { "epoch": 0.6160846595823211, "grad_norm": 0.5513104796409607, "learning_rate": 1.0176190562228707e-05, "loss": 0.0679, "step": 27959 }, { "epoch": 0.6161066948718372, "grad_norm": 1.0506482124328613, "learning_rate": 1.0175176906196226e-05, "loss": 0.1262, "step": 27960 }, { "epoch": 0.6161287301613534, "grad_norm": 0.365496963262558, "learning_rate": 1.0174163274739159e-05, "loss": 0.0792, "step": 27961 }, { "epoch": 0.6161507654508696, "grad_norm": 0.8947935104370117, "learning_rate": 1.0173149667862665e-05, "loss": 0.066, "step": 27962 }, { "epoch": 0.6161728007403857, "grad_norm": 0.7069127559661865, "learning_rate": 1.017213608557191e-05, "loss": 0.0639, "step": 27963 }, { "epoch": 0.6161948360299019, "grad_norm": 0.5349571704864502, "learning_rate": 1.0171122527872048e-05, "loss": 0.0627, "step": 27964 }, { "epoch": 0.616216871319418, "grad_norm": 0.6906598806381226, "learning_rate": 1.0170108994768256e-05, "loss": 0.0618, "step": 27965 }, { "epoch": 0.6162389066089342, "grad_norm": 1.0136810541152954, "learning_rate": 1.0169095486265688e-05, "loss": 0.0692, "step": 27966 }, { "epoch": 0.6162609418984504, "grad_norm": 0.4484235644340515, "learning_rate": 1.0168082002369506e-05, "loss": 0.0585, "step": 27967 }, { "epoch": 0.6162829771879665, "grad_norm": 0.7160497903823853, "learning_rate": 1.0167068543084871e-05, "loss": 0.0697, "step": 27968 }, { "epoch": 0.6163050124774827, "grad_norm": 0.5068565011024475, "learning_rate": 1.016605510841695e-05, "loss": 0.0623, "step": 27969 }, { "epoch": 0.6163270477669989, "grad_norm": 0.6602235436439514, "learning_rate": 1.0165041698370905e-05, "loss": 0.0581, "step": 27970 }, { "epoch": 0.616349083056515, "grad_norm": 0.8775470852851868, "learning_rate": 1.0164028312951894e-05, "loss": 0.0667, "step": 27971 }, { "epoch": 0.6163711183460312, "grad_norm": 0.644320011138916, "learning_rate": 1.0163014952165073e-05, "loss": 0.1011, "step": 27972 }, { "epoch": 0.6163931536355474, "grad_norm": 0.4504467844963074, "learning_rate": 1.0162001616015619e-05, "loss": 0.0441, "step": 27973 }, { "epoch": 0.6164151889250635, "grad_norm": 1.0403543710708618, "learning_rate": 1.0160988304508682e-05, "loss": 0.0553, "step": 27974 }, { "epoch": 0.6164372242145797, "grad_norm": 0.748796820640564, "learning_rate": 1.0159975017649427e-05, "loss": 0.0893, "step": 27975 }, { "epoch": 0.6164592595040959, "grad_norm": 0.31429263949394226, "learning_rate": 1.0158961755443013e-05, "loss": 0.0614, "step": 27976 }, { "epoch": 0.616481294793612, "grad_norm": 0.7008322477340698, "learning_rate": 1.0157948517894605e-05, "loss": 0.1041, "step": 27977 }, { "epoch": 0.6165033300831281, "grad_norm": 0.43859198689460754, "learning_rate": 1.0156935305009363e-05, "loss": 0.0564, "step": 27978 }, { "epoch": 0.6165253653726442, "grad_norm": 0.432919442653656, "learning_rate": 1.0155922116792437e-05, "loss": 0.0503, "step": 27979 }, { "epoch": 0.6165474006621604, "grad_norm": 0.3656366169452667, "learning_rate": 1.0154908953249007e-05, "loss": 0.089, "step": 27980 }, { "epoch": 0.6165694359516766, "grad_norm": 0.743061363697052, "learning_rate": 1.0153895814384222e-05, "loss": 0.097, "step": 27981 }, { "epoch": 0.6165914712411927, "grad_norm": 0.6190357804298401, "learning_rate": 1.0152882700203245e-05, "loss": 0.0532, "step": 27982 }, { "epoch": 0.6166135065307089, "grad_norm": 0.3737018406391144, "learning_rate": 1.0151869610711235e-05, "loss": 0.0729, "step": 27983 }, { "epoch": 0.6166355418202251, "grad_norm": 0.6716538667678833, "learning_rate": 1.0150856545913352e-05, "loss": 0.0609, "step": 27984 }, { "epoch": 0.6166575771097412, "grad_norm": 0.4144507646560669, "learning_rate": 1.0149843505814759e-05, "loss": 0.0409, "step": 27985 }, { "epoch": 0.6166796123992574, "grad_norm": 0.7414059638977051, "learning_rate": 1.0148830490420613e-05, "loss": 0.089, "step": 27986 }, { "epoch": 0.6167016476887736, "grad_norm": 0.6056740880012512, "learning_rate": 1.0147817499736069e-05, "loss": 0.0609, "step": 27987 }, { "epoch": 0.6167236829782897, "grad_norm": 0.9003593325614929, "learning_rate": 1.0146804533766299e-05, "loss": 0.0988, "step": 27988 }, { "epoch": 0.6167457182678059, "grad_norm": 0.7264014482498169, "learning_rate": 1.0145791592516454e-05, "loss": 0.094, "step": 27989 }, { "epoch": 0.616767753557322, "grad_norm": 0.4152570068836212, "learning_rate": 1.0144778675991699e-05, "loss": 0.0541, "step": 27990 }, { "epoch": 0.6167897888468382, "grad_norm": 0.503076434135437, "learning_rate": 1.0143765784197184e-05, "loss": 0.0419, "step": 27991 }, { "epoch": 0.6168118241363544, "grad_norm": 0.6718035340309143, "learning_rate": 1.0142752917138079e-05, "loss": 0.067, "step": 27992 }, { "epoch": 0.6168338594258705, "grad_norm": 0.6641693115234375, "learning_rate": 1.0141740074819534e-05, "loss": 0.07, "step": 27993 }, { "epoch": 0.6168558947153867, "grad_norm": 0.7041161060333252, "learning_rate": 1.0140727257246716e-05, "loss": 0.0477, "step": 27994 }, { "epoch": 0.6168779300049029, "grad_norm": 0.6830660104751587, "learning_rate": 1.0139714464424772e-05, "loss": 0.0918, "step": 27995 }, { "epoch": 0.616899965294419, "grad_norm": 0.6040757894515991, "learning_rate": 1.0138701696358874e-05, "loss": 0.0602, "step": 27996 }, { "epoch": 0.6169220005839352, "grad_norm": 0.8025246858596802, "learning_rate": 1.0137688953054177e-05, "loss": 0.0812, "step": 27997 }, { "epoch": 0.6169440358734514, "grad_norm": 0.5701500177383423, "learning_rate": 1.0136676234515837e-05, "loss": 0.0764, "step": 27998 }, { "epoch": 0.6169660711629675, "grad_norm": 0.6447591185569763, "learning_rate": 1.0135663540749009e-05, "loss": 0.0522, "step": 27999 }, { "epoch": 0.6169881064524837, "grad_norm": 0.3973510265350342, "learning_rate": 1.013465087175886e-05, "loss": 0.0449, "step": 28000 }, { "epoch": 0.6170101417419999, "grad_norm": 0.4374709725379944, "learning_rate": 1.013363822755054e-05, "loss": 0.0472, "step": 28001 }, { "epoch": 0.617032177031516, "grad_norm": 0.34789392352104187, "learning_rate": 1.0132625608129205e-05, "loss": 0.0477, "step": 28002 }, { "epoch": 0.6170542123210321, "grad_norm": 0.8266943097114563, "learning_rate": 1.0131613013500025e-05, "loss": 0.0695, "step": 28003 }, { "epoch": 0.6170762476105482, "grad_norm": 0.4483788311481476, "learning_rate": 1.0130600443668152e-05, "loss": 0.039, "step": 28004 }, { "epoch": 0.6170982829000644, "grad_norm": 0.8364949822425842, "learning_rate": 1.012958789863874e-05, "loss": 0.0835, "step": 28005 }, { "epoch": 0.6171203181895806, "grad_norm": 0.7782479524612427, "learning_rate": 1.0128575378416946e-05, "loss": 0.0561, "step": 28006 }, { "epoch": 0.6171423534790967, "grad_norm": 0.5592374205589294, "learning_rate": 1.0127562883007931e-05, "loss": 0.0459, "step": 28007 }, { "epoch": 0.6171643887686129, "grad_norm": 0.6087893843650818, "learning_rate": 1.0126550412416854e-05, "loss": 0.079, "step": 28008 }, { "epoch": 0.6171864240581291, "grad_norm": 1.0342040061950684, "learning_rate": 1.012553796664887e-05, "loss": 0.1068, "step": 28009 }, { "epoch": 0.6172084593476452, "grad_norm": 0.5896892547607422, "learning_rate": 1.0124525545709126e-05, "loss": 0.0491, "step": 28010 }, { "epoch": 0.6172304946371614, "grad_norm": 0.6371198892593384, "learning_rate": 1.0123513149602794e-05, "loss": 0.081, "step": 28011 }, { "epoch": 0.6172525299266776, "grad_norm": 0.48241984844207764, "learning_rate": 1.0122500778335025e-05, "loss": 0.0618, "step": 28012 }, { "epoch": 0.6172745652161937, "grad_norm": 0.442243754863739, "learning_rate": 1.0121488431910975e-05, "loss": 0.0682, "step": 28013 }, { "epoch": 0.6172966005057099, "grad_norm": 0.5225282907485962, "learning_rate": 1.0120476110335798e-05, "loss": 0.0382, "step": 28014 }, { "epoch": 0.6173186357952261, "grad_norm": 1.263721227645874, "learning_rate": 1.0119463813614657e-05, "loss": 0.0736, "step": 28015 }, { "epoch": 0.6173406710847422, "grad_norm": 0.7764096260070801, "learning_rate": 1.01184515417527e-05, "loss": 0.0779, "step": 28016 }, { "epoch": 0.6173627063742584, "grad_norm": 0.6269257068634033, "learning_rate": 1.0117439294755091e-05, "loss": 0.063, "step": 28017 }, { "epoch": 0.6173847416637745, "grad_norm": 1.012450933456421, "learning_rate": 1.0116427072626974e-05, "loss": 0.0852, "step": 28018 }, { "epoch": 0.6174067769532907, "grad_norm": 0.659375011920929, "learning_rate": 1.0115414875373519e-05, "loss": 0.0695, "step": 28019 }, { "epoch": 0.6174288122428069, "grad_norm": 0.7471081018447876, "learning_rate": 1.0114402702999875e-05, "loss": 0.0777, "step": 28020 }, { "epoch": 0.617450847532323, "grad_norm": 0.7590731978416443, "learning_rate": 1.0113390555511198e-05, "loss": 0.0563, "step": 28021 }, { "epoch": 0.6174728828218392, "grad_norm": 0.8951531648635864, "learning_rate": 1.0112378432912642e-05, "loss": 0.0415, "step": 28022 }, { "epoch": 0.6174949181113554, "grad_norm": 0.7157657146453857, "learning_rate": 1.0111366335209365e-05, "loss": 0.0832, "step": 28023 }, { "epoch": 0.6175169534008715, "grad_norm": 0.7109593749046326, "learning_rate": 1.0110354262406521e-05, "loss": 0.0896, "step": 28024 }, { "epoch": 0.6175389886903877, "grad_norm": 0.811690628528595, "learning_rate": 1.0109342214509259e-05, "loss": 0.0989, "step": 28025 }, { "epoch": 0.6175610239799039, "grad_norm": 0.8023287057876587, "learning_rate": 1.0108330191522745e-05, "loss": 0.1052, "step": 28026 }, { "epoch": 0.6175830592694199, "grad_norm": 0.668948769569397, "learning_rate": 1.010731819345213e-05, "loss": 0.0455, "step": 28027 }, { "epoch": 0.6176050945589361, "grad_norm": 0.9164110422134399, "learning_rate": 1.0106306220302565e-05, "loss": 0.0958, "step": 28028 }, { "epoch": 0.6176271298484522, "grad_norm": 0.4940963387489319, "learning_rate": 1.0105294272079204e-05, "loss": 0.0651, "step": 28029 }, { "epoch": 0.6176491651379684, "grad_norm": 0.4041127562522888, "learning_rate": 1.0104282348787209e-05, "loss": 0.0736, "step": 28030 }, { "epoch": 0.6176712004274846, "grad_norm": 0.5248773694038391, "learning_rate": 1.0103270450431726e-05, "loss": 0.0683, "step": 28031 }, { "epoch": 0.6176932357170007, "grad_norm": 0.6417053937911987, "learning_rate": 1.0102258577017915e-05, "loss": 0.087, "step": 28032 }, { "epoch": 0.6177152710065169, "grad_norm": 0.597811222076416, "learning_rate": 1.0101246728550922e-05, "loss": 0.0748, "step": 28033 }, { "epoch": 0.6177373062960331, "grad_norm": 0.5636328458786011, "learning_rate": 1.010023490503591e-05, "loss": 0.0576, "step": 28034 }, { "epoch": 0.6177593415855492, "grad_norm": 0.42286214232444763, "learning_rate": 1.0099223106478031e-05, "loss": 0.063, "step": 28035 }, { "epoch": 0.6177813768750654, "grad_norm": 0.5235769152641296, "learning_rate": 1.0098211332882437e-05, "loss": 0.0682, "step": 28036 }, { "epoch": 0.6178034121645816, "grad_norm": 2.7880821228027344, "learning_rate": 1.0097199584254278e-05, "loss": 0.0504, "step": 28037 }, { "epoch": 0.6178254474540977, "grad_norm": 0.7840868234634399, "learning_rate": 1.0096187860598711e-05, "loss": 0.0639, "step": 28038 }, { "epoch": 0.6178474827436139, "grad_norm": 0.5427347421646118, "learning_rate": 1.0095176161920892e-05, "loss": 0.0865, "step": 28039 }, { "epoch": 0.6178695180331301, "grad_norm": 0.8757701516151428, "learning_rate": 1.0094164488225968e-05, "loss": 0.075, "step": 28040 }, { "epoch": 0.6178915533226462, "grad_norm": 0.46073228120803833, "learning_rate": 1.0093152839519091e-05, "loss": 0.0393, "step": 28041 }, { "epoch": 0.6179135886121624, "grad_norm": 0.5981262922286987, "learning_rate": 1.0092141215805425e-05, "loss": 0.0777, "step": 28042 }, { "epoch": 0.6179356239016786, "grad_norm": 0.5669394731521606, "learning_rate": 1.0091129617090112e-05, "loss": 0.07, "step": 28043 }, { "epoch": 0.6179576591911947, "grad_norm": 0.5586397647857666, "learning_rate": 1.009011804337831e-05, "loss": 0.053, "step": 28044 }, { "epoch": 0.6179796944807109, "grad_norm": 0.48274460434913635, "learning_rate": 1.0089106494675169e-05, "loss": 0.0601, "step": 28045 }, { "epoch": 0.618001729770227, "grad_norm": 0.6238202452659607, "learning_rate": 1.0088094970985842e-05, "loss": 0.0914, "step": 28046 }, { "epoch": 0.6180237650597432, "grad_norm": 0.7525864243507385, "learning_rate": 1.0087083472315482e-05, "loss": 0.0781, "step": 28047 }, { "epoch": 0.6180458003492594, "grad_norm": 0.9944842457771301, "learning_rate": 1.0086071998669235e-05, "loss": 0.0714, "step": 28048 }, { "epoch": 0.6180678356387755, "grad_norm": 0.6828859448432922, "learning_rate": 1.0085060550052264e-05, "loss": 0.0596, "step": 28049 }, { "epoch": 0.6180898709282917, "grad_norm": 0.4191952049732208, "learning_rate": 1.0084049126469716e-05, "loss": 0.0588, "step": 28050 }, { "epoch": 0.6181119062178079, "grad_norm": 0.5056589245796204, "learning_rate": 1.0083037727926741e-05, "loss": 0.0755, "step": 28051 }, { "epoch": 0.6181339415073239, "grad_norm": 0.4562208354473114, "learning_rate": 1.0082026354428486e-05, "loss": 0.08, "step": 28052 }, { "epoch": 0.6181559767968401, "grad_norm": 0.2066866010427475, "learning_rate": 1.0081015005980117e-05, "loss": 0.0674, "step": 28053 }, { "epoch": 0.6181780120863563, "grad_norm": 0.3052188456058502, "learning_rate": 1.0080003682586772e-05, "loss": 0.0459, "step": 28054 }, { "epoch": 0.6182000473758724, "grad_norm": 0.5979238152503967, "learning_rate": 1.007899238425361e-05, "loss": 0.0466, "step": 28055 }, { "epoch": 0.6182220826653886, "grad_norm": 0.8973085880279541, "learning_rate": 1.0077981110985769e-05, "loss": 0.0503, "step": 28056 }, { "epoch": 0.6182441179549047, "grad_norm": 0.2968192994594574, "learning_rate": 1.0076969862788416e-05, "loss": 0.0544, "step": 28057 }, { "epoch": 0.6182661532444209, "grad_norm": 0.7105609774589539, "learning_rate": 1.0075958639666698e-05, "loss": 0.0609, "step": 28058 }, { "epoch": 0.6182881885339371, "grad_norm": 0.6973702311515808, "learning_rate": 1.007494744162576e-05, "loss": 0.0606, "step": 28059 }, { "epoch": 0.6183102238234532, "grad_norm": 1.0025489330291748, "learning_rate": 1.0073936268670752e-05, "loss": 0.0721, "step": 28060 }, { "epoch": 0.6183322591129694, "grad_norm": 0.994909405708313, "learning_rate": 1.0072925120806834e-05, "loss": 0.1012, "step": 28061 }, { "epoch": 0.6183542944024856, "grad_norm": 1.5877608060836792, "learning_rate": 1.0071913998039148e-05, "loss": 0.0878, "step": 28062 }, { "epoch": 0.6183763296920017, "grad_norm": 1.1003416776657104, "learning_rate": 1.007090290037285e-05, "loss": 0.1056, "step": 28063 }, { "epoch": 0.6183983649815179, "grad_norm": 0.9078717231750488, "learning_rate": 1.0069891827813082e-05, "loss": 0.0894, "step": 28064 }, { "epoch": 0.6184204002710341, "grad_norm": 0.5324978232383728, "learning_rate": 1.0068880780365001e-05, "loss": 0.0716, "step": 28065 }, { "epoch": 0.6184424355605502, "grad_norm": 0.6859064698219299, "learning_rate": 1.0067869758033755e-05, "loss": 0.0766, "step": 28066 }, { "epoch": 0.6184644708500664, "grad_norm": 0.4981812536716461, "learning_rate": 1.0066858760824494e-05, "loss": 0.0382, "step": 28067 }, { "epoch": 0.6184865061395826, "grad_norm": 0.5681538581848145, "learning_rate": 1.0065847788742359e-05, "loss": 0.0946, "step": 28068 }, { "epoch": 0.6185085414290987, "grad_norm": 1.0053342580795288, "learning_rate": 1.0064836841792516e-05, "loss": 0.0618, "step": 28069 }, { "epoch": 0.6185305767186149, "grad_norm": 0.6344769597053528, "learning_rate": 1.0063825919980102e-05, "loss": 0.0691, "step": 28070 }, { "epoch": 0.618552612008131, "grad_norm": 0.358243465423584, "learning_rate": 1.006281502331027e-05, "loss": 0.0496, "step": 28071 }, { "epoch": 0.6185746472976472, "grad_norm": 0.7574688792228699, "learning_rate": 1.006180415178817e-05, "loss": 0.0605, "step": 28072 }, { "epoch": 0.6185966825871634, "grad_norm": 0.4006938636302948, "learning_rate": 1.0060793305418947e-05, "loss": 0.0838, "step": 28073 }, { "epoch": 0.6186187178766795, "grad_norm": 0.7133623361587524, "learning_rate": 1.0059782484207757e-05, "loss": 0.0725, "step": 28074 }, { "epoch": 0.6186407531661957, "grad_norm": 0.3559725880622864, "learning_rate": 1.0058771688159734e-05, "loss": 0.0559, "step": 28075 }, { "epoch": 0.6186627884557119, "grad_norm": 0.4828013777732849, "learning_rate": 1.0057760917280045e-05, "loss": 0.0719, "step": 28076 }, { "epoch": 0.6186848237452279, "grad_norm": 0.7705528140068054, "learning_rate": 1.0056750171573828e-05, "loss": 0.0575, "step": 28077 }, { "epoch": 0.6187068590347441, "grad_norm": 0.321058988571167, "learning_rate": 1.0055739451046235e-05, "loss": 0.069, "step": 28078 }, { "epoch": 0.6187288943242603, "grad_norm": 0.6445663571357727, "learning_rate": 1.005472875570241e-05, "loss": 0.0855, "step": 28079 }, { "epoch": 0.6187509296137764, "grad_norm": 0.7387852668762207, "learning_rate": 1.0053718085547503e-05, "loss": 0.0539, "step": 28080 }, { "epoch": 0.6187729649032926, "grad_norm": 0.7290488481521606, "learning_rate": 1.0052707440586663e-05, "loss": 0.0492, "step": 28081 }, { "epoch": 0.6187950001928088, "grad_norm": 0.3002476990222931, "learning_rate": 1.005169682082504e-05, "loss": 0.047, "step": 28082 }, { "epoch": 0.6188170354823249, "grad_norm": 0.649614155292511, "learning_rate": 1.0050686226267769e-05, "loss": 0.0737, "step": 28083 }, { "epoch": 0.6188390707718411, "grad_norm": 0.6818152070045471, "learning_rate": 1.0049675656920015e-05, "loss": 0.0459, "step": 28084 }, { "epoch": 0.6188611060613572, "grad_norm": 0.5872601270675659, "learning_rate": 1.0048665112786918e-05, "loss": 0.083, "step": 28085 }, { "epoch": 0.6188831413508734, "grad_norm": 0.48638564348220825, "learning_rate": 1.0047654593873622e-05, "loss": 0.0576, "step": 28086 }, { "epoch": 0.6189051766403896, "grad_norm": 0.396562784910202, "learning_rate": 1.0046644100185277e-05, "loss": 0.0621, "step": 28087 }, { "epoch": 0.6189272119299057, "grad_norm": 0.6935272812843323, "learning_rate": 1.0045633631727031e-05, "loss": 0.0769, "step": 28088 }, { "epoch": 0.6189492472194219, "grad_norm": 0.9424105882644653, "learning_rate": 1.0044623188504031e-05, "loss": 0.1026, "step": 28089 }, { "epoch": 0.6189712825089381, "grad_norm": 0.8290119171142578, "learning_rate": 1.0043612770521421e-05, "loss": 0.0701, "step": 28090 }, { "epoch": 0.6189933177984542, "grad_norm": 0.6439336538314819, "learning_rate": 1.0042602377784343e-05, "loss": 0.054, "step": 28091 }, { "epoch": 0.6190153530879704, "grad_norm": 0.7123641967773438, "learning_rate": 1.0041592010297954e-05, "loss": 0.0554, "step": 28092 }, { "epoch": 0.6190373883774866, "grad_norm": 0.7934586405754089, "learning_rate": 1.00405816680674e-05, "loss": 0.0805, "step": 28093 }, { "epoch": 0.6190594236670027, "grad_norm": 0.4752329885959625, "learning_rate": 1.0039571351097817e-05, "loss": 0.0728, "step": 28094 }, { "epoch": 0.6190814589565189, "grad_norm": 0.5668179392814636, "learning_rate": 1.0038561059394361e-05, "loss": 0.0669, "step": 28095 }, { "epoch": 0.6191034942460351, "grad_norm": 0.5013530254364014, "learning_rate": 1.0037550792962174e-05, "loss": 0.0777, "step": 28096 }, { "epoch": 0.6191255295355512, "grad_norm": 0.700093150138855, "learning_rate": 1.00365405518064e-05, "loss": 0.065, "step": 28097 }, { "epoch": 0.6191475648250674, "grad_norm": 0.7623233795166016, "learning_rate": 1.0035530335932184e-05, "loss": 0.0774, "step": 28098 }, { "epoch": 0.6191696001145836, "grad_norm": 0.6248090863227844, "learning_rate": 1.0034520145344677e-05, "loss": 0.0625, "step": 28099 }, { "epoch": 0.6191916354040997, "grad_norm": 0.4814813435077667, "learning_rate": 1.0033509980049025e-05, "loss": 0.069, "step": 28100 }, { "epoch": 0.6192136706936159, "grad_norm": 0.49621397256851196, "learning_rate": 1.0032499840050368e-05, "loss": 0.0665, "step": 28101 }, { "epoch": 0.6192357059831319, "grad_norm": 0.4352709949016571, "learning_rate": 1.003148972535385e-05, "loss": 0.0463, "step": 28102 }, { "epoch": 0.6192577412726481, "grad_norm": 0.6365904211997986, "learning_rate": 1.003047963596462e-05, "loss": 0.0886, "step": 28103 }, { "epoch": 0.6192797765621643, "grad_norm": 0.5324413180351257, "learning_rate": 1.0029469571887824e-05, "loss": 0.0647, "step": 28104 }, { "epoch": 0.6193018118516804, "grad_norm": 0.6387512683868408, "learning_rate": 1.0028459533128607e-05, "loss": 0.093, "step": 28105 }, { "epoch": 0.6193238471411966, "grad_norm": 0.8502814173698425, "learning_rate": 1.0027449519692103e-05, "loss": 0.0881, "step": 28106 }, { "epoch": 0.6193458824307128, "grad_norm": 0.5308400988578796, "learning_rate": 1.002643953158347e-05, "loss": 0.0679, "step": 28107 }, { "epoch": 0.6193679177202289, "grad_norm": 0.4293590784072876, "learning_rate": 1.002542956880785e-05, "loss": 0.0716, "step": 28108 }, { "epoch": 0.6193899530097451, "grad_norm": 0.7563161253929138, "learning_rate": 1.0024419631370385e-05, "loss": 0.0456, "step": 28109 }, { "epoch": 0.6194119882992613, "grad_norm": 0.997406005859375, "learning_rate": 1.0023409719276216e-05, "loss": 0.0682, "step": 28110 }, { "epoch": 0.6194340235887774, "grad_norm": 0.6169237494468689, "learning_rate": 1.0022399832530491e-05, "loss": 0.0365, "step": 28111 }, { "epoch": 0.6194560588782936, "grad_norm": 0.5181167721748352, "learning_rate": 1.0021389971138353e-05, "loss": 0.0647, "step": 28112 }, { "epoch": 0.6194780941678097, "grad_norm": 0.6640421748161316, "learning_rate": 1.0020380135104942e-05, "loss": 0.0846, "step": 28113 }, { "epoch": 0.6195001294573259, "grad_norm": 0.39584457874298096, "learning_rate": 1.001937032443541e-05, "loss": 0.0525, "step": 28114 }, { "epoch": 0.6195221647468421, "grad_norm": 0.713019073009491, "learning_rate": 1.0018360539134895e-05, "loss": 0.0609, "step": 28115 }, { "epoch": 0.6195442000363582, "grad_norm": 0.6348722577095032, "learning_rate": 1.0017350779208542e-05, "loss": 0.0525, "step": 28116 }, { "epoch": 0.6195662353258744, "grad_norm": 0.45545774698257446, "learning_rate": 1.0016341044661489e-05, "loss": 0.0762, "step": 28117 }, { "epoch": 0.6195882706153906, "grad_norm": 0.5651720762252808, "learning_rate": 1.0015331335498887e-05, "loss": 0.0641, "step": 28118 }, { "epoch": 0.6196103059049067, "grad_norm": 0.9047722220420837, "learning_rate": 1.0014321651725877e-05, "loss": 0.0802, "step": 28119 }, { "epoch": 0.6196323411944229, "grad_norm": 0.5867617130279541, "learning_rate": 1.00133119933476e-05, "loss": 0.0743, "step": 28120 }, { "epoch": 0.6196543764839391, "grad_norm": 0.41747549176216125, "learning_rate": 1.0012302360369193e-05, "loss": 0.0615, "step": 28121 }, { "epoch": 0.6196764117734552, "grad_norm": 0.5930919647216797, "learning_rate": 1.0011292752795809e-05, "loss": 0.0723, "step": 28122 }, { "epoch": 0.6196984470629714, "grad_norm": 0.9974141120910645, "learning_rate": 1.0010283170632588e-05, "loss": 0.0722, "step": 28123 }, { "epoch": 0.6197204823524876, "grad_norm": 0.4869840145111084, "learning_rate": 1.0009273613884669e-05, "loss": 0.0461, "step": 28124 }, { "epoch": 0.6197425176420037, "grad_norm": 0.8079690933227539, "learning_rate": 1.0008264082557195e-05, "loss": 0.0867, "step": 28125 }, { "epoch": 0.6197645529315198, "grad_norm": 0.567587673664093, "learning_rate": 1.000725457665531e-05, "loss": 0.0811, "step": 28126 }, { "epoch": 0.6197865882210359, "grad_norm": 0.6774776577949524, "learning_rate": 1.0006245096184156e-05, "loss": 0.0804, "step": 28127 }, { "epoch": 0.6198086235105521, "grad_norm": 0.7639543414115906, "learning_rate": 1.0005235641148871e-05, "loss": 0.0589, "step": 28128 }, { "epoch": 0.6198306588000683, "grad_norm": 0.5316359996795654, "learning_rate": 1.0004226211554596e-05, "loss": 0.0379, "step": 28129 }, { "epoch": 0.6198526940895844, "grad_norm": 0.5631542205810547, "learning_rate": 1.0003216807406479e-05, "loss": 0.0633, "step": 28130 }, { "epoch": 0.6198747293791006, "grad_norm": 0.3805120587348938, "learning_rate": 1.0002207428709661e-05, "loss": 0.0557, "step": 28131 }, { "epoch": 0.6198967646686168, "grad_norm": 0.39289504289627075, "learning_rate": 1.0001198075469282e-05, "loss": 0.0745, "step": 28132 }, { "epoch": 0.6199187999581329, "grad_norm": 0.747077226638794, "learning_rate": 1.0000188747690475e-05, "loss": 0.0626, "step": 28133 }, { "epoch": 0.6199408352476491, "grad_norm": 0.5210311412811279, "learning_rate": 9.99917944537839e-06, "loss": 0.0645, "step": 28134 }, { "epoch": 0.6199628705371653, "grad_norm": 0.6701743006706238, "learning_rate": 9.99817016853817e-06, "loss": 0.0665, "step": 28135 }, { "epoch": 0.6199849058266814, "grad_norm": 0.42265191674232483, "learning_rate": 9.997160917174943e-06, "loss": 0.0679, "step": 28136 }, { "epoch": 0.6200069411161976, "grad_norm": 0.6092590093612671, "learning_rate": 9.996151691293865e-06, "loss": 0.0637, "step": 28137 }, { "epoch": 0.6200289764057137, "grad_norm": 0.4827515780925751, "learning_rate": 9.99514249090007e-06, "loss": 0.0556, "step": 28138 }, { "epoch": 0.6200510116952299, "grad_norm": 0.3322387635707855, "learning_rate": 9.994133315998697e-06, "loss": 0.0603, "step": 28139 }, { "epoch": 0.6200730469847461, "grad_norm": 0.5685486793518066, "learning_rate": 9.993124166594886e-06, "loss": 0.0766, "step": 28140 }, { "epoch": 0.6200950822742622, "grad_norm": 0.7086492776870728, "learning_rate": 9.99211504269378e-06, "loss": 0.0725, "step": 28141 }, { "epoch": 0.6201171175637784, "grad_norm": 0.6872324347496033, "learning_rate": 9.991105944300517e-06, "loss": 0.0795, "step": 28142 }, { "epoch": 0.6201391528532946, "grad_norm": 0.8145871758460999, "learning_rate": 9.990096871420239e-06, "loss": 0.0561, "step": 28143 }, { "epoch": 0.6201611881428107, "grad_norm": 0.6146195530891418, "learning_rate": 9.989087824058078e-06, "loss": 0.0867, "step": 28144 }, { "epoch": 0.6201832234323269, "grad_norm": 0.6493242383003235, "learning_rate": 9.988078802219187e-06, "loss": 0.1022, "step": 28145 }, { "epoch": 0.6202052587218431, "grad_norm": 0.7195794582366943, "learning_rate": 9.987069805908696e-06, "loss": 0.089, "step": 28146 }, { "epoch": 0.6202272940113592, "grad_norm": 0.9318844676017761, "learning_rate": 9.986060835131746e-06, "loss": 0.1039, "step": 28147 }, { "epoch": 0.6202493293008754, "grad_norm": 0.6402902007102966, "learning_rate": 9.985051889893475e-06, "loss": 0.058, "step": 28148 }, { "epoch": 0.6202713645903916, "grad_norm": 0.8883438110351562, "learning_rate": 9.984042970199028e-06, "loss": 0.0867, "step": 28149 }, { "epoch": 0.6202933998799077, "grad_norm": 0.5346550345420837, "learning_rate": 9.983034076053539e-06, "loss": 0.056, "step": 28150 }, { "epoch": 0.6203154351694238, "grad_norm": 0.5439575910568237, "learning_rate": 9.982025207462147e-06, "loss": 0.0459, "step": 28151 }, { "epoch": 0.6203374704589399, "grad_norm": 0.4845324158668518, "learning_rate": 9.981016364429986e-06, "loss": 0.0565, "step": 28152 }, { "epoch": 0.6203595057484561, "grad_norm": 0.663676917552948, "learning_rate": 9.980007546962207e-06, "loss": 0.0658, "step": 28153 }, { "epoch": 0.6203815410379723, "grad_norm": 0.841755747795105, "learning_rate": 9.978998755063941e-06, "loss": 0.0856, "step": 28154 }, { "epoch": 0.6204035763274884, "grad_norm": 0.7976645231246948, "learning_rate": 9.977989988740326e-06, "loss": 0.0697, "step": 28155 }, { "epoch": 0.6204256116170046, "grad_norm": 0.4741429388523102, "learning_rate": 9.9769812479965e-06, "loss": 0.0576, "step": 28156 }, { "epoch": 0.6204476469065208, "grad_norm": 0.6536064743995667, "learning_rate": 9.975972532837603e-06, "loss": 0.0895, "step": 28157 }, { "epoch": 0.6204696821960369, "grad_norm": 0.5226643085479736, "learning_rate": 9.974963843268772e-06, "loss": 0.0752, "step": 28158 }, { "epoch": 0.6204917174855531, "grad_norm": 0.3101845383644104, "learning_rate": 9.97395517929514e-06, "loss": 0.0589, "step": 28159 }, { "epoch": 0.6205137527750693, "grad_norm": 0.5088985562324524, "learning_rate": 9.972946540921854e-06, "loss": 0.0623, "step": 28160 }, { "epoch": 0.6205357880645854, "grad_norm": 0.8766254782676697, "learning_rate": 9.971937928154048e-06, "loss": 0.076, "step": 28161 }, { "epoch": 0.6205578233541016, "grad_norm": 0.4252890348434448, "learning_rate": 9.97092934099686e-06, "loss": 0.0476, "step": 28162 }, { "epoch": 0.6205798586436178, "grad_norm": 0.8926199078559875, "learning_rate": 9.969920779455418e-06, "loss": 0.0894, "step": 28163 }, { "epoch": 0.6206018939331339, "grad_norm": 0.6832946538925171, "learning_rate": 9.96891224353487e-06, "loss": 0.0714, "step": 28164 }, { "epoch": 0.6206239292226501, "grad_norm": 0.9679276347160339, "learning_rate": 9.967903733240357e-06, "loss": 0.0797, "step": 28165 }, { "epoch": 0.6206459645121662, "grad_norm": 0.6087633967399597, "learning_rate": 9.966895248577005e-06, "loss": 0.074, "step": 28166 }, { "epoch": 0.6206679998016824, "grad_norm": 0.513449490070343, "learning_rate": 9.965886789549948e-06, "loss": 0.0585, "step": 28167 }, { "epoch": 0.6206900350911986, "grad_norm": 0.8730002641677856, "learning_rate": 9.964878356164334e-06, "loss": 0.0771, "step": 28168 }, { "epoch": 0.6207120703807147, "grad_norm": 0.37753185629844666, "learning_rate": 9.963869948425296e-06, "loss": 0.0636, "step": 28169 }, { "epoch": 0.6207341056702309, "grad_norm": 0.574479877948761, "learning_rate": 9.96286156633797e-06, "loss": 0.0438, "step": 28170 }, { "epoch": 0.6207561409597471, "grad_norm": 0.41807764768600464, "learning_rate": 9.961853209907482e-06, "loss": 0.0457, "step": 28171 }, { "epoch": 0.6207781762492632, "grad_norm": 0.4284111261367798, "learning_rate": 9.960844879138982e-06, "loss": 0.0573, "step": 28172 }, { "epoch": 0.6208002115387794, "grad_norm": 0.5423948764801025, "learning_rate": 9.959836574037606e-06, "loss": 0.0658, "step": 28173 }, { "epoch": 0.6208222468282956, "grad_norm": 0.770373523235321, "learning_rate": 9.958828294608485e-06, "loss": 0.0477, "step": 28174 }, { "epoch": 0.6208442821178117, "grad_norm": 0.5807511806488037, "learning_rate": 9.957820040856749e-06, "loss": 0.0873, "step": 28175 }, { "epoch": 0.6208663174073278, "grad_norm": 0.6003568172454834, "learning_rate": 9.956811812787543e-06, "loss": 0.0605, "step": 28176 }, { "epoch": 0.620888352696844, "grad_norm": 0.6371371746063232, "learning_rate": 9.955803610406e-06, "loss": 0.0576, "step": 28177 }, { "epoch": 0.6209103879863601, "grad_norm": 0.6047666072845459, "learning_rate": 9.954795433717252e-06, "loss": 0.0662, "step": 28178 }, { "epoch": 0.6209324232758763, "grad_norm": 1.0726656913757324, "learning_rate": 9.95378728272643e-06, "loss": 0.1065, "step": 28179 }, { "epoch": 0.6209544585653924, "grad_norm": 0.6131795048713684, "learning_rate": 9.952779157438683e-06, "loss": 0.0524, "step": 28180 }, { "epoch": 0.6209764938549086, "grad_norm": 0.5253931283950806, "learning_rate": 9.951771057859138e-06, "loss": 0.0733, "step": 28181 }, { "epoch": 0.6209985291444248, "grad_norm": 0.3602239489555359, "learning_rate": 9.950762983992927e-06, "loss": 0.0814, "step": 28182 }, { "epoch": 0.6210205644339409, "grad_norm": 0.35540640354156494, "learning_rate": 9.94975493584519e-06, "loss": 0.0418, "step": 28183 }, { "epoch": 0.6210425997234571, "grad_norm": 0.4973467290401459, "learning_rate": 9.948746913421063e-06, "loss": 0.0748, "step": 28184 }, { "epoch": 0.6210646350129733, "grad_norm": 0.761410117149353, "learning_rate": 9.947738916725673e-06, "loss": 0.0578, "step": 28185 }, { "epoch": 0.6210866703024894, "grad_norm": 1.051889181137085, "learning_rate": 9.946730945764152e-06, "loss": 0.0877, "step": 28186 }, { "epoch": 0.6211087055920056, "grad_norm": 0.6531800031661987, "learning_rate": 9.945723000541647e-06, "loss": 0.0749, "step": 28187 }, { "epoch": 0.6211307408815218, "grad_norm": 1.388148546218872, "learning_rate": 9.944715081063287e-06, "loss": 0.0556, "step": 28188 }, { "epoch": 0.6211527761710379, "grad_norm": 0.64451003074646, "learning_rate": 9.9437071873342e-06, "loss": 0.0904, "step": 28189 }, { "epoch": 0.6211748114605541, "grad_norm": 0.5303982496261597, "learning_rate": 9.942699319359523e-06, "loss": 0.0476, "step": 28190 }, { "epoch": 0.6211968467500703, "grad_norm": 0.6391161680221558, "learning_rate": 9.941691477144395e-06, "loss": 0.0902, "step": 28191 }, { "epoch": 0.6212188820395864, "grad_norm": 0.82655930519104, "learning_rate": 9.940683660693942e-06, "loss": 0.079, "step": 28192 }, { "epoch": 0.6212409173291026, "grad_norm": 0.7049407958984375, "learning_rate": 9.9396758700133e-06, "loss": 0.0475, "step": 28193 }, { "epoch": 0.6212629526186187, "grad_norm": 0.6598679423332214, "learning_rate": 9.938668105107598e-06, "loss": 0.096, "step": 28194 }, { "epoch": 0.6212849879081349, "grad_norm": 0.6333200335502625, "learning_rate": 9.937660365981978e-06, "loss": 0.0608, "step": 28195 }, { "epoch": 0.6213070231976511, "grad_norm": 0.6264514327049255, "learning_rate": 9.936652652641569e-06, "loss": 0.0681, "step": 28196 }, { "epoch": 0.6213290584871672, "grad_norm": 0.48584428429603577, "learning_rate": 9.935644965091502e-06, "loss": 0.0767, "step": 28197 }, { "epoch": 0.6213510937766834, "grad_norm": 0.5342967510223389, "learning_rate": 9.93463730333691e-06, "loss": 0.0738, "step": 28198 }, { "epoch": 0.6213731290661996, "grad_norm": 0.4535995423793793, "learning_rate": 9.93362966738293e-06, "loss": 0.0673, "step": 28199 }, { "epoch": 0.6213951643557156, "grad_norm": 0.6146999597549438, "learning_rate": 9.932622057234688e-06, "loss": 0.0664, "step": 28200 }, { "epoch": 0.6214171996452318, "grad_norm": 0.8528140783309937, "learning_rate": 9.931614472897322e-06, "loss": 0.053, "step": 28201 }, { "epoch": 0.621439234934748, "grad_norm": 0.5810939073562622, "learning_rate": 9.930606914375954e-06, "loss": 0.05, "step": 28202 }, { "epoch": 0.6214612702242641, "grad_norm": 0.49435409903526306, "learning_rate": 9.929599381675727e-06, "loss": 0.0815, "step": 28203 }, { "epoch": 0.6214833055137803, "grad_norm": 0.6258494853973389, "learning_rate": 9.928591874801774e-06, "loss": 0.1107, "step": 28204 }, { "epoch": 0.6215053408032964, "grad_norm": 0.8200482130050659, "learning_rate": 9.927584393759218e-06, "loss": 0.078, "step": 28205 }, { "epoch": 0.6215273760928126, "grad_norm": 0.8792429566383362, "learning_rate": 9.926576938553194e-06, "loss": 0.0669, "step": 28206 }, { "epoch": 0.6215494113823288, "grad_norm": 0.6695019006729126, "learning_rate": 9.925569509188837e-06, "loss": 0.0485, "step": 28207 }, { "epoch": 0.6215714466718449, "grad_norm": 0.600229799747467, "learning_rate": 9.924562105671278e-06, "loss": 0.0653, "step": 28208 }, { "epoch": 0.6215934819613611, "grad_norm": 0.6275995373725891, "learning_rate": 9.923554728005636e-06, "loss": 0.0625, "step": 28209 }, { "epoch": 0.6216155172508773, "grad_norm": 0.4683842658996582, "learning_rate": 9.922547376197058e-06, "loss": 0.0592, "step": 28210 }, { "epoch": 0.6216375525403934, "grad_norm": 0.5469003915786743, "learning_rate": 9.921540050250673e-06, "loss": 0.0439, "step": 28211 }, { "epoch": 0.6216595878299096, "grad_norm": 0.8567116856575012, "learning_rate": 9.920532750171604e-06, "loss": 0.0816, "step": 28212 }, { "epoch": 0.6216816231194258, "grad_norm": 0.897243082523346, "learning_rate": 9.919525475964985e-06, "loss": 0.1011, "step": 28213 }, { "epoch": 0.6217036584089419, "grad_norm": 0.719128429889679, "learning_rate": 9.918518227635948e-06, "loss": 0.0783, "step": 28214 }, { "epoch": 0.6217256936984581, "grad_norm": 1.162548303604126, "learning_rate": 9.917511005189623e-06, "loss": 0.0843, "step": 28215 }, { "epoch": 0.6217477289879743, "grad_norm": 0.7557588219642639, "learning_rate": 9.91650380863114e-06, "loss": 0.0666, "step": 28216 }, { "epoch": 0.6217697642774904, "grad_norm": 0.8527674674987793, "learning_rate": 9.915496637965623e-06, "loss": 0.0863, "step": 28217 }, { "epoch": 0.6217917995670066, "grad_norm": 0.4527962803840637, "learning_rate": 9.914489493198216e-06, "loss": 0.0674, "step": 28218 }, { "epoch": 0.6218138348565228, "grad_norm": 0.5083518028259277, "learning_rate": 9.913482374334039e-06, "loss": 0.0884, "step": 28219 }, { "epoch": 0.6218358701460389, "grad_norm": 0.9969472289085388, "learning_rate": 9.912475281378225e-06, "loss": 0.0729, "step": 28220 }, { "epoch": 0.6218579054355551, "grad_norm": 0.33168500661849976, "learning_rate": 9.911468214335898e-06, "loss": 0.0571, "step": 28221 }, { "epoch": 0.6218799407250712, "grad_norm": 0.9101342558860779, "learning_rate": 9.910461173212196e-06, "loss": 0.1001, "step": 28222 }, { "epoch": 0.6219019760145874, "grad_norm": 0.5729103088378906, "learning_rate": 9.909454158012246e-06, "loss": 0.0704, "step": 28223 }, { "epoch": 0.6219240113041036, "grad_norm": 0.4075016975402832, "learning_rate": 9.908447168741174e-06, "loss": 0.0562, "step": 28224 }, { "epoch": 0.6219460465936196, "grad_norm": 0.5805746912956238, "learning_rate": 9.907440205404103e-06, "loss": 0.0417, "step": 28225 }, { "epoch": 0.6219680818831358, "grad_norm": 0.6934046149253845, "learning_rate": 9.906433268006179e-06, "loss": 0.0572, "step": 28226 }, { "epoch": 0.621990117172652, "grad_norm": 1.0108846426010132, "learning_rate": 9.905426356552522e-06, "loss": 0.1024, "step": 28227 }, { "epoch": 0.6220121524621681, "grad_norm": 0.7146432995796204, "learning_rate": 9.904419471048254e-06, "loss": 0.059, "step": 28228 }, { "epoch": 0.6220341877516843, "grad_norm": 0.5413402915000916, "learning_rate": 9.903412611498516e-06, "loss": 0.0815, "step": 28229 }, { "epoch": 0.6220562230412005, "grad_norm": 0.46859458088874817, "learning_rate": 9.90240577790843e-06, "loss": 0.0757, "step": 28230 }, { "epoch": 0.6220782583307166, "grad_norm": 1.1755449771881104, "learning_rate": 9.901398970283127e-06, "loss": 0.0861, "step": 28231 }, { "epoch": 0.6221002936202328, "grad_norm": 0.6749512553215027, "learning_rate": 9.900392188627723e-06, "loss": 0.0386, "step": 28232 }, { "epoch": 0.622122328909749, "grad_norm": 0.745578944683075, "learning_rate": 9.899385432947365e-06, "loss": 0.0869, "step": 28233 }, { "epoch": 0.6221443641992651, "grad_norm": 0.5941500067710876, "learning_rate": 9.898378703247171e-06, "loss": 0.0728, "step": 28234 }, { "epoch": 0.6221663994887813, "grad_norm": 0.6965965032577515, "learning_rate": 9.897371999532271e-06, "loss": 0.0632, "step": 28235 }, { "epoch": 0.6221884347782974, "grad_norm": 0.6418644189834595, "learning_rate": 9.896365321807788e-06, "loss": 0.0827, "step": 28236 }, { "epoch": 0.6222104700678136, "grad_norm": 0.7702134251594543, "learning_rate": 9.895358670078857e-06, "loss": 0.05, "step": 28237 }, { "epoch": 0.6222325053573298, "grad_norm": 0.8261523246765137, "learning_rate": 9.894352044350601e-06, "loss": 0.0801, "step": 28238 }, { "epoch": 0.6222545406468459, "grad_norm": 0.620690643787384, "learning_rate": 9.893345444628145e-06, "loss": 0.0777, "step": 28239 }, { "epoch": 0.6222765759363621, "grad_norm": 0.5759385228157043, "learning_rate": 9.892338870916617e-06, "loss": 0.0623, "step": 28240 }, { "epoch": 0.6222986112258783, "grad_norm": 0.5601291060447693, "learning_rate": 9.891332323221151e-06, "loss": 0.0965, "step": 28241 }, { "epoch": 0.6223206465153944, "grad_norm": 0.7981700897216797, "learning_rate": 9.890325801546867e-06, "loss": 0.0882, "step": 28242 }, { "epoch": 0.6223426818049106, "grad_norm": 0.5529627799987793, "learning_rate": 9.889319305898899e-06, "loss": 0.0412, "step": 28243 }, { "epoch": 0.6223647170944268, "grad_norm": 0.4538933336734772, "learning_rate": 9.888312836282359e-06, "loss": 0.0985, "step": 28244 }, { "epoch": 0.6223867523839429, "grad_norm": 0.7433047890663147, "learning_rate": 9.88730639270239e-06, "loss": 0.0809, "step": 28245 }, { "epoch": 0.6224087876734591, "grad_norm": 0.6164242029190063, "learning_rate": 9.886299975164109e-06, "loss": 0.0921, "step": 28246 }, { "epoch": 0.6224308229629752, "grad_norm": 0.5270110964775085, "learning_rate": 9.88529358367264e-06, "loss": 0.079, "step": 28247 }, { "epoch": 0.6224528582524914, "grad_norm": 0.569933295249939, "learning_rate": 9.884287218233118e-06, "loss": 0.0622, "step": 28248 }, { "epoch": 0.6224748935420076, "grad_norm": 1.062769889831543, "learning_rate": 9.883280878850665e-06, "loss": 0.0783, "step": 28249 }, { "epoch": 0.6224969288315236, "grad_norm": 0.3958136737346649, "learning_rate": 9.882274565530408e-06, "loss": 0.0418, "step": 28250 }, { "epoch": 0.6225189641210398, "grad_norm": 0.38663002848625183, "learning_rate": 9.881268278277466e-06, "loss": 0.0752, "step": 28251 }, { "epoch": 0.622540999410556, "grad_norm": 0.6133289337158203, "learning_rate": 9.880262017096974e-06, "loss": 0.0646, "step": 28252 }, { "epoch": 0.6225630347000721, "grad_norm": 0.7998802661895752, "learning_rate": 9.879255781994052e-06, "loss": 0.0727, "step": 28253 }, { "epoch": 0.6225850699895883, "grad_norm": 0.5232685208320618, "learning_rate": 9.878249572973825e-06, "loss": 0.0675, "step": 28254 }, { "epoch": 0.6226071052791045, "grad_norm": 0.7632432579994202, "learning_rate": 9.877243390041417e-06, "loss": 0.0599, "step": 28255 }, { "epoch": 0.6226291405686206, "grad_norm": 0.30722174048423767, "learning_rate": 9.876237233201958e-06, "loss": 0.0879, "step": 28256 }, { "epoch": 0.6226511758581368, "grad_norm": 0.520899772644043, "learning_rate": 9.87523110246057e-06, "loss": 0.0798, "step": 28257 }, { "epoch": 0.622673211147653, "grad_norm": 0.44340264797210693, "learning_rate": 9.874224997822383e-06, "loss": 0.0832, "step": 28258 }, { "epoch": 0.6226952464371691, "grad_norm": 0.5624269843101501, "learning_rate": 9.87321891929251e-06, "loss": 0.067, "step": 28259 }, { "epoch": 0.6227172817266853, "grad_norm": 0.9322489500045776, "learning_rate": 9.872212866876084e-06, "loss": 0.0656, "step": 28260 }, { "epoch": 0.6227393170162014, "grad_norm": 0.660863995552063, "learning_rate": 9.871206840578229e-06, "loss": 0.0676, "step": 28261 }, { "epoch": 0.6227613523057176, "grad_norm": 0.8296516537666321, "learning_rate": 9.870200840404068e-06, "loss": 0.0686, "step": 28262 }, { "epoch": 0.6227833875952338, "grad_norm": 0.7028059363365173, "learning_rate": 9.869194866358718e-06, "loss": 0.0541, "step": 28263 }, { "epoch": 0.6228054228847499, "grad_norm": 0.346358984708786, "learning_rate": 9.868188918447319e-06, "loss": 0.078, "step": 28264 }, { "epoch": 0.6228274581742661, "grad_norm": 0.6404757499694824, "learning_rate": 9.867182996674984e-06, "loss": 0.0593, "step": 28265 }, { "epoch": 0.6228494934637823, "grad_norm": 0.6563915014266968, "learning_rate": 9.866177101046837e-06, "loss": 0.0481, "step": 28266 }, { "epoch": 0.6228715287532984, "grad_norm": 0.45795875787734985, "learning_rate": 9.865171231568e-06, "loss": 0.0744, "step": 28267 }, { "epoch": 0.6228935640428146, "grad_norm": 0.6454609036445618, "learning_rate": 9.864165388243603e-06, "loss": 0.0725, "step": 28268 }, { "epoch": 0.6229155993323308, "grad_norm": 0.6800025105476379, "learning_rate": 9.863159571078766e-06, "loss": 0.0966, "step": 28269 }, { "epoch": 0.6229376346218469, "grad_norm": 0.6236079335212708, "learning_rate": 9.862153780078608e-06, "loss": 0.0762, "step": 28270 }, { "epoch": 0.6229596699113631, "grad_norm": 0.7499012351036072, "learning_rate": 9.861148015248259e-06, "loss": 0.0605, "step": 28271 }, { "epoch": 0.6229817052008793, "grad_norm": 0.5788694024085999, "learning_rate": 9.860142276592841e-06, "loss": 0.0601, "step": 28272 }, { "epoch": 0.6230037404903954, "grad_norm": 0.6140907406806946, "learning_rate": 9.859136564117474e-06, "loss": 0.0614, "step": 28273 }, { "epoch": 0.6230257757799116, "grad_norm": 0.5783770084381104, "learning_rate": 9.858130877827272e-06, "loss": 0.0706, "step": 28274 }, { "epoch": 0.6230478110694276, "grad_norm": 0.671502947807312, "learning_rate": 9.857125217727377e-06, "loss": 0.0831, "step": 28275 }, { "epoch": 0.6230698463589438, "grad_norm": 0.44187039136886597, "learning_rate": 9.8561195838229e-06, "loss": 0.0782, "step": 28276 }, { "epoch": 0.62309188164846, "grad_norm": 0.5165448784828186, "learning_rate": 9.855113976118966e-06, "loss": 0.0557, "step": 28277 }, { "epoch": 0.6231139169379761, "grad_norm": 0.5761714577674866, "learning_rate": 9.854108394620688e-06, "loss": 0.0775, "step": 28278 }, { "epoch": 0.6231359522274923, "grad_norm": 0.4929935932159424, "learning_rate": 9.8531028393332e-06, "loss": 0.0606, "step": 28279 }, { "epoch": 0.6231579875170085, "grad_norm": 0.5646442770957947, "learning_rate": 9.85209731026162e-06, "loss": 0.0714, "step": 28280 }, { "epoch": 0.6231800228065246, "grad_norm": 0.43627220392227173, "learning_rate": 9.85109180741107e-06, "loss": 0.0533, "step": 28281 }, { "epoch": 0.6232020580960408, "grad_norm": 0.33787912130355835, "learning_rate": 9.850086330786663e-06, "loss": 0.0834, "step": 28282 }, { "epoch": 0.623224093385557, "grad_norm": 0.5513687133789062, "learning_rate": 9.849080880393534e-06, "loss": 0.0504, "step": 28283 }, { "epoch": 0.6232461286750731, "grad_norm": 0.6059659123420715, "learning_rate": 9.848075456236799e-06, "loss": 0.0657, "step": 28284 }, { "epoch": 0.6232681639645893, "grad_norm": 0.6925519108772278, "learning_rate": 9.847070058321577e-06, "loss": 0.0998, "step": 28285 }, { "epoch": 0.6232901992541054, "grad_norm": 0.6195046305656433, "learning_rate": 9.846064686652988e-06, "loss": 0.0778, "step": 28286 }, { "epoch": 0.6233122345436216, "grad_norm": 0.7545205354690552, "learning_rate": 9.84505934123616e-06, "loss": 0.0459, "step": 28287 }, { "epoch": 0.6233342698331378, "grad_norm": 0.661433219909668, "learning_rate": 9.844054022076208e-06, "loss": 0.0721, "step": 28288 }, { "epoch": 0.6233563051226539, "grad_norm": 0.4575781226158142, "learning_rate": 9.843048729178252e-06, "loss": 0.0658, "step": 28289 }, { "epoch": 0.6233783404121701, "grad_norm": 0.5636612772941589, "learning_rate": 9.84204346254741e-06, "loss": 0.0755, "step": 28290 }, { "epoch": 0.6234003757016863, "grad_norm": 0.4480888247489929, "learning_rate": 9.841038222188812e-06, "loss": 0.0797, "step": 28291 }, { "epoch": 0.6234224109912024, "grad_norm": 0.9551331400871277, "learning_rate": 9.840033008107573e-06, "loss": 0.0582, "step": 28292 }, { "epoch": 0.6234444462807186, "grad_norm": 0.8353686928749084, "learning_rate": 9.83902782030881e-06, "loss": 0.0489, "step": 28293 }, { "epoch": 0.6234664815702348, "grad_norm": 0.8503962755203247, "learning_rate": 9.838022658797647e-06, "loss": 0.0862, "step": 28294 }, { "epoch": 0.6234885168597509, "grad_norm": 0.49985507130622864, "learning_rate": 9.837017523579206e-06, "loss": 0.0581, "step": 28295 }, { "epoch": 0.6235105521492671, "grad_norm": 0.614017128944397, "learning_rate": 9.836012414658601e-06, "loss": 0.0433, "step": 28296 }, { "epoch": 0.6235325874387833, "grad_norm": 0.6267802119255066, "learning_rate": 9.83500733204095e-06, "loss": 0.0471, "step": 28297 }, { "epoch": 0.6235546227282994, "grad_norm": 1.0879085063934326, "learning_rate": 9.83400227573138e-06, "loss": 0.0772, "step": 28298 }, { "epoch": 0.6235766580178155, "grad_norm": 0.5295196175575256, "learning_rate": 9.832997245735008e-06, "loss": 0.0816, "step": 28299 }, { "epoch": 0.6235986933073316, "grad_norm": 0.6741745471954346, "learning_rate": 9.83199224205695e-06, "loss": 0.0845, "step": 28300 }, { "epoch": 0.6236207285968478, "grad_norm": 0.5895499587059021, "learning_rate": 9.830987264702327e-06, "loss": 0.075, "step": 28301 }, { "epoch": 0.623642763886364, "grad_norm": 0.5184118747711182, "learning_rate": 9.829982313676258e-06, "loss": 0.064, "step": 28302 }, { "epoch": 0.6236647991758801, "grad_norm": 0.4063763916492462, "learning_rate": 9.828977388983861e-06, "loss": 0.0586, "step": 28303 }, { "epoch": 0.6236868344653963, "grad_norm": 0.7489948868751526, "learning_rate": 9.827972490630258e-06, "loss": 0.0509, "step": 28304 }, { "epoch": 0.6237088697549125, "grad_norm": 0.7577168941497803, "learning_rate": 9.826967618620556e-06, "loss": 0.0802, "step": 28305 }, { "epoch": 0.6237309050444286, "grad_norm": 0.6011010408401489, "learning_rate": 9.82596277295989e-06, "loss": 0.105, "step": 28306 }, { "epoch": 0.6237529403339448, "grad_norm": 0.9611320495605469, "learning_rate": 9.824957953653368e-06, "loss": 0.0886, "step": 28307 }, { "epoch": 0.623774975623461, "grad_norm": 0.597027599811554, "learning_rate": 9.82395316070611e-06, "loss": 0.0558, "step": 28308 }, { "epoch": 0.6237970109129771, "grad_norm": 1.1212153434753418, "learning_rate": 9.822948394123234e-06, "loss": 0.0591, "step": 28309 }, { "epoch": 0.6238190462024933, "grad_norm": 0.7897434234619141, "learning_rate": 9.821943653909861e-06, "loss": 0.0872, "step": 28310 }, { "epoch": 0.6238410814920095, "grad_norm": 0.5295702815055847, "learning_rate": 9.820938940071102e-06, "loss": 0.061, "step": 28311 }, { "epoch": 0.6238631167815256, "grad_norm": 0.553920567035675, "learning_rate": 9.819934252612081e-06, "loss": 0.0908, "step": 28312 }, { "epoch": 0.6238851520710418, "grad_norm": 0.7838656902313232, "learning_rate": 9.818929591537906e-06, "loss": 0.0422, "step": 28313 }, { "epoch": 0.623907187360558, "grad_norm": 0.5562828183174133, "learning_rate": 9.817924956853707e-06, "loss": 0.0734, "step": 28314 }, { "epoch": 0.6239292226500741, "grad_norm": 0.9291930198669434, "learning_rate": 9.816920348564594e-06, "loss": 0.0826, "step": 28315 }, { "epoch": 0.6239512579395903, "grad_norm": 0.5709720253944397, "learning_rate": 9.815915766675683e-06, "loss": 0.0486, "step": 28316 }, { "epoch": 0.6239732932291064, "grad_norm": 0.5101991295814514, "learning_rate": 9.814911211192095e-06, "loss": 0.0997, "step": 28317 }, { "epoch": 0.6239953285186226, "grad_norm": 0.8359918594360352, "learning_rate": 9.813906682118947e-06, "loss": 0.0625, "step": 28318 }, { "epoch": 0.6240173638081388, "grad_norm": 0.7311578392982483, "learning_rate": 9.812902179461353e-06, "loss": 0.0682, "step": 28319 }, { "epoch": 0.6240393990976549, "grad_norm": 0.7320888042449951, "learning_rate": 9.811897703224422e-06, "loss": 0.065, "step": 28320 }, { "epoch": 0.6240614343871711, "grad_norm": 0.6313776969909668, "learning_rate": 9.810893253413283e-06, "loss": 0.0407, "step": 28321 }, { "epoch": 0.6240834696766873, "grad_norm": 0.367703914642334, "learning_rate": 9.80988883003305e-06, "loss": 0.0685, "step": 28322 }, { "epoch": 0.6241055049662034, "grad_norm": 0.5304425954818726, "learning_rate": 9.808884433088835e-06, "loss": 0.0546, "step": 28323 }, { "epoch": 0.6241275402557195, "grad_norm": 0.317097932100296, "learning_rate": 9.807880062585752e-06, "loss": 0.0734, "step": 28324 }, { "epoch": 0.6241495755452356, "grad_norm": 0.7596717476844788, "learning_rate": 9.806875718528924e-06, "loss": 0.0701, "step": 28325 }, { "epoch": 0.6241716108347518, "grad_norm": 0.7162284255027771, "learning_rate": 9.805871400923462e-06, "loss": 0.0664, "step": 28326 }, { "epoch": 0.624193646124268, "grad_norm": 0.8049184083938599, "learning_rate": 9.804867109774482e-06, "loss": 0.0728, "step": 28327 }, { "epoch": 0.6242156814137841, "grad_norm": 0.40609169006347656, "learning_rate": 9.803862845087095e-06, "loss": 0.067, "step": 28328 }, { "epoch": 0.6242377167033003, "grad_norm": 0.789763867855072, "learning_rate": 9.802858606866426e-06, "loss": 0.122, "step": 28329 }, { "epoch": 0.6242597519928165, "grad_norm": 0.508499026298523, "learning_rate": 9.801854395117586e-06, "loss": 0.0841, "step": 28330 }, { "epoch": 0.6242817872823326, "grad_norm": 0.43617212772369385, "learning_rate": 9.800850209845687e-06, "loss": 0.037, "step": 28331 }, { "epoch": 0.6243038225718488, "grad_norm": 0.6009004712104797, "learning_rate": 9.799846051055844e-06, "loss": 0.0694, "step": 28332 }, { "epoch": 0.624325857861365, "grad_norm": 0.6104342937469482, "learning_rate": 9.798841918753177e-06, "loss": 0.071, "step": 28333 }, { "epoch": 0.6243478931508811, "grad_norm": 0.37343481183052063, "learning_rate": 9.797837812942798e-06, "loss": 0.0646, "step": 28334 }, { "epoch": 0.6243699284403973, "grad_norm": 0.5275858044624329, "learning_rate": 9.79683373362982e-06, "loss": 0.0687, "step": 28335 }, { "epoch": 0.6243919637299135, "grad_norm": 0.6301142573356628, "learning_rate": 9.79582968081935e-06, "loss": 0.0861, "step": 28336 }, { "epoch": 0.6244139990194296, "grad_norm": 0.7775791883468628, "learning_rate": 9.794825654516517e-06, "loss": 0.0601, "step": 28337 }, { "epoch": 0.6244360343089458, "grad_norm": 0.7905901670455933, "learning_rate": 9.793821654726429e-06, "loss": 0.0635, "step": 28338 }, { "epoch": 0.624458069598462, "grad_norm": 0.6995081901550293, "learning_rate": 9.792817681454197e-06, "loss": 0.0691, "step": 28339 }, { "epoch": 0.6244801048879781, "grad_norm": 0.5215581655502319, "learning_rate": 9.791813734704939e-06, "loss": 0.0679, "step": 28340 }, { "epoch": 0.6245021401774943, "grad_norm": 1.078814148902893, "learning_rate": 9.790809814483766e-06, "loss": 0.0991, "step": 28341 }, { "epoch": 0.6245241754670104, "grad_norm": 0.5389086604118347, "learning_rate": 9.789805920795793e-06, "loss": 0.0836, "step": 28342 }, { "epoch": 0.6245462107565266, "grad_norm": 0.35805433988571167, "learning_rate": 9.788802053646124e-06, "loss": 0.0572, "step": 28343 }, { "epoch": 0.6245682460460428, "grad_norm": 0.5652586221694946, "learning_rate": 9.787798213039888e-06, "loss": 0.082, "step": 28344 }, { "epoch": 0.6245902813355589, "grad_norm": 0.8604792952537537, "learning_rate": 9.786794398982191e-06, "loss": 0.0611, "step": 28345 }, { "epoch": 0.6246123166250751, "grad_norm": 0.38579943776130676, "learning_rate": 9.785790611478146e-06, "loss": 0.0793, "step": 28346 }, { "epoch": 0.6246343519145913, "grad_norm": 0.5685332417488098, "learning_rate": 9.784786850532862e-06, "loss": 0.064, "step": 28347 }, { "epoch": 0.6246563872041074, "grad_norm": 0.49935752153396606, "learning_rate": 9.783783116151458e-06, "loss": 0.0706, "step": 28348 }, { "epoch": 0.6246784224936235, "grad_norm": 0.6953653693199158, "learning_rate": 9.782779408339044e-06, "loss": 0.0861, "step": 28349 }, { "epoch": 0.6247004577831397, "grad_norm": 0.662343442440033, "learning_rate": 9.781775727100734e-06, "loss": 0.0695, "step": 28350 }, { "epoch": 0.6247224930726558, "grad_norm": 0.49188482761383057, "learning_rate": 9.78077207244163e-06, "loss": 0.0686, "step": 28351 }, { "epoch": 0.624744528362172, "grad_norm": 0.5875769853591919, "learning_rate": 9.779768444366859e-06, "loss": 0.0834, "step": 28352 }, { "epoch": 0.6247665636516881, "grad_norm": 0.5951518416404724, "learning_rate": 9.778764842881527e-06, "loss": 0.0906, "step": 28353 }, { "epoch": 0.6247885989412043, "grad_norm": 0.5539989471435547, "learning_rate": 9.777761267990745e-06, "loss": 0.0627, "step": 28354 }, { "epoch": 0.6248106342307205, "grad_norm": 0.7170536518096924, "learning_rate": 9.776757719699624e-06, "loss": 0.0716, "step": 28355 }, { "epoch": 0.6248326695202366, "grad_norm": 1.0409996509552002, "learning_rate": 9.775754198013279e-06, "loss": 0.0748, "step": 28356 }, { "epoch": 0.6248547048097528, "grad_norm": 0.4466022253036499, "learning_rate": 9.774750702936817e-06, "loss": 0.0665, "step": 28357 }, { "epoch": 0.624876740099269, "grad_norm": 0.4613041281700134, "learning_rate": 9.773747234475348e-06, "loss": 0.054, "step": 28358 }, { "epoch": 0.6248987753887851, "grad_norm": 0.926547646522522, "learning_rate": 9.772743792633994e-06, "loss": 0.0876, "step": 28359 }, { "epoch": 0.6249208106783013, "grad_norm": 0.9901221394538879, "learning_rate": 9.771740377417857e-06, "loss": 0.0842, "step": 28360 }, { "epoch": 0.6249428459678175, "grad_norm": 0.7677416205406189, "learning_rate": 9.77073698883205e-06, "loss": 0.0682, "step": 28361 }, { "epoch": 0.6249648812573336, "grad_norm": 0.8771845698356628, "learning_rate": 9.76973362688168e-06, "loss": 0.0607, "step": 28362 }, { "epoch": 0.6249869165468498, "grad_norm": 0.5562412142753601, "learning_rate": 9.768730291571865e-06, "loss": 0.0776, "step": 28363 }, { "epoch": 0.625008951836366, "grad_norm": 0.885694146156311, "learning_rate": 9.767726982907714e-06, "loss": 0.1182, "step": 28364 }, { "epoch": 0.6250309871258821, "grad_norm": 0.6385494470596313, "learning_rate": 9.766723700894333e-06, "loss": 0.0701, "step": 28365 }, { "epoch": 0.6250530224153983, "grad_norm": 0.5800747871398926, "learning_rate": 9.765720445536826e-06, "loss": 0.0775, "step": 28366 }, { "epoch": 0.6250750577049144, "grad_norm": 0.8016673922538757, "learning_rate": 9.764717216840322e-06, "loss": 0.0548, "step": 28367 }, { "epoch": 0.6250970929944306, "grad_norm": 0.30312296748161316, "learning_rate": 9.763714014809918e-06, "loss": 0.0653, "step": 28368 }, { "epoch": 0.6251191282839468, "grad_norm": 0.596488356590271, "learning_rate": 9.76271083945073e-06, "loss": 0.0851, "step": 28369 }, { "epoch": 0.6251411635734629, "grad_norm": 0.41802069544792175, "learning_rate": 9.761707690767858e-06, "loss": 0.0739, "step": 28370 }, { "epoch": 0.6251631988629791, "grad_norm": 0.5416663885116577, "learning_rate": 9.760704568766421e-06, "loss": 0.0851, "step": 28371 }, { "epoch": 0.6251852341524953, "grad_norm": 0.6318672895431519, "learning_rate": 9.759701473451526e-06, "loss": 0.0583, "step": 28372 }, { "epoch": 0.6252072694420113, "grad_norm": 0.6729865074157715, "learning_rate": 9.758698404828279e-06, "loss": 0.0883, "step": 28373 }, { "epoch": 0.6252293047315275, "grad_norm": 0.9535123705863953, "learning_rate": 9.757695362901787e-06, "loss": 0.0779, "step": 28374 }, { "epoch": 0.6252513400210437, "grad_norm": 0.31623899936676025, "learning_rate": 9.756692347677171e-06, "loss": 0.0537, "step": 28375 }, { "epoch": 0.6252733753105598, "grad_norm": 0.6178817749023438, "learning_rate": 9.755689359159532e-06, "loss": 0.0883, "step": 28376 }, { "epoch": 0.625295410600076, "grad_norm": 0.5347301959991455, "learning_rate": 9.75468639735398e-06, "loss": 0.0911, "step": 28377 }, { "epoch": 0.6253174458895921, "grad_norm": 0.9206291437149048, "learning_rate": 9.753683462265618e-06, "loss": 0.0489, "step": 28378 }, { "epoch": 0.6253394811791083, "grad_norm": 0.803728461265564, "learning_rate": 9.752680553899563e-06, "loss": 0.0691, "step": 28379 }, { "epoch": 0.6253615164686245, "grad_norm": 0.5229538679122925, "learning_rate": 9.751677672260923e-06, "loss": 0.0864, "step": 28380 }, { "epoch": 0.6253835517581406, "grad_norm": 0.8380779027938843, "learning_rate": 9.750674817354793e-06, "loss": 0.0661, "step": 28381 }, { "epoch": 0.6254055870476568, "grad_norm": 0.5447230935096741, "learning_rate": 9.749671989186299e-06, "loss": 0.0818, "step": 28382 }, { "epoch": 0.625427622337173, "grad_norm": 0.479436993598938, "learning_rate": 9.74866918776054e-06, "loss": 0.0701, "step": 28383 }, { "epoch": 0.6254496576266891, "grad_norm": 0.4476025700569153, "learning_rate": 9.747666413082625e-06, "loss": 0.0666, "step": 28384 }, { "epoch": 0.6254716929162053, "grad_norm": 0.517419159412384, "learning_rate": 9.746663665157657e-06, "loss": 0.0571, "step": 28385 }, { "epoch": 0.6254937282057215, "grad_norm": 0.5670214295387268, "learning_rate": 9.74566094399075e-06, "loss": 0.0618, "step": 28386 }, { "epoch": 0.6255157634952376, "grad_norm": 0.6003810167312622, "learning_rate": 9.744658249587015e-06, "loss": 0.0602, "step": 28387 }, { "epoch": 0.6255377987847538, "grad_norm": 0.483309268951416, "learning_rate": 9.743655581951552e-06, "loss": 0.0636, "step": 28388 }, { "epoch": 0.62555983407427, "grad_norm": 0.38253888487815857, "learning_rate": 9.742652941089462e-06, "loss": 0.0664, "step": 28389 }, { "epoch": 0.6255818693637861, "grad_norm": 0.4305785596370697, "learning_rate": 9.741650327005867e-06, "loss": 0.0545, "step": 28390 }, { "epoch": 0.6256039046533023, "grad_norm": 0.7231389880180359, "learning_rate": 9.740647739705866e-06, "loss": 0.0521, "step": 28391 }, { "epoch": 0.6256259399428185, "grad_norm": 0.5904259085655212, "learning_rate": 9.739645179194566e-06, "loss": 0.0867, "step": 28392 }, { "epoch": 0.6256479752323346, "grad_norm": 0.3557097613811493, "learning_rate": 9.738642645477067e-06, "loss": 0.0426, "step": 28393 }, { "epoch": 0.6256700105218508, "grad_norm": 0.42971739172935486, "learning_rate": 9.73764013855849e-06, "loss": 0.0755, "step": 28394 }, { "epoch": 0.625692045811367, "grad_norm": 0.6861227750778198, "learning_rate": 9.736637658443933e-06, "loss": 0.0498, "step": 28395 }, { "epoch": 0.6257140811008831, "grad_norm": 0.5732776522636414, "learning_rate": 9.735635205138504e-06, "loss": 0.0731, "step": 28396 }, { "epoch": 0.6257361163903993, "grad_norm": 0.5859937071800232, "learning_rate": 9.734632778647304e-06, "loss": 0.0742, "step": 28397 }, { "epoch": 0.6257581516799153, "grad_norm": 0.5030409097671509, "learning_rate": 9.733630378975446e-06, "loss": 0.0728, "step": 28398 }, { "epoch": 0.6257801869694315, "grad_norm": 0.9010734558105469, "learning_rate": 9.732628006128032e-06, "loss": 0.0815, "step": 28399 }, { "epoch": 0.6258022222589477, "grad_norm": 0.5190117359161377, "learning_rate": 9.731625660110169e-06, "loss": 0.0604, "step": 28400 }, { "epoch": 0.6258242575484638, "grad_norm": 0.7348718643188477, "learning_rate": 9.730623340926954e-06, "loss": 0.078, "step": 28401 }, { "epoch": 0.62584629283798, "grad_norm": 0.47437289357185364, "learning_rate": 9.729621048583508e-06, "loss": 0.0757, "step": 28402 }, { "epoch": 0.6258683281274962, "grad_norm": 0.726138710975647, "learning_rate": 9.728618783084926e-06, "loss": 0.077, "step": 28403 }, { "epoch": 0.6258903634170123, "grad_norm": 0.7351050972938538, "learning_rate": 9.727616544436314e-06, "loss": 0.0839, "step": 28404 }, { "epoch": 0.6259123987065285, "grad_norm": 0.6457184553146362, "learning_rate": 9.726614332642782e-06, "loss": 0.0607, "step": 28405 }, { "epoch": 0.6259344339960446, "grad_norm": 0.25988298654556274, "learning_rate": 9.725612147709429e-06, "loss": 0.0492, "step": 28406 }, { "epoch": 0.6259564692855608, "grad_norm": 0.266861230134964, "learning_rate": 9.724609989641364e-06, "loss": 0.0515, "step": 28407 }, { "epoch": 0.625978504575077, "grad_norm": 0.7408170104026794, "learning_rate": 9.72360785844368e-06, "loss": 0.0679, "step": 28408 }, { "epoch": 0.6260005398645931, "grad_norm": 0.5085207223892212, "learning_rate": 9.722605754121498e-06, "loss": 0.0669, "step": 28409 }, { "epoch": 0.6260225751541093, "grad_norm": 0.24824057519435883, "learning_rate": 9.721603676679917e-06, "loss": 0.0501, "step": 28410 }, { "epoch": 0.6260446104436255, "grad_norm": 0.9344639778137207, "learning_rate": 9.720601626124035e-06, "loss": 0.0668, "step": 28411 }, { "epoch": 0.6260666457331416, "grad_norm": 0.7050876617431641, "learning_rate": 9.71959960245896e-06, "loss": 0.064, "step": 28412 }, { "epoch": 0.6260886810226578, "grad_norm": 0.47225067019462585, "learning_rate": 9.718597605689798e-06, "loss": 0.0774, "step": 28413 }, { "epoch": 0.626110716312174, "grad_norm": 0.6254698038101196, "learning_rate": 9.717595635821651e-06, "loss": 0.0697, "step": 28414 }, { "epoch": 0.6261327516016901, "grad_norm": 0.6665502190589905, "learning_rate": 9.71659369285962e-06, "loss": 0.0497, "step": 28415 }, { "epoch": 0.6261547868912063, "grad_norm": 0.615896999835968, "learning_rate": 9.715591776808808e-06, "loss": 0.0701, "step": 28416 }, { "epoch": 0.6261768221807225, "grad_norm": 0.6484266519546509, "learning_rate": 9.714589887674323e-06, "loss": 0.0737, "step": 28417 }, { "epoch": 0.6261988574702386, "grad_norm": 0.6076663732528687, "learning_rate": 9.71358802546127e-06, "loss": 0.0774, "step": 28418 }, { "epoch": 0.6262208927597548, "grad_norm": 0.6254674792289734, "learning_rate": 9.712586190174744e-06, "loss": 0.0937, "step": 28419 }, { "epoch": 0.626242928049271, "grad_norm": 0.8042711019515991, "learning_rate": 9.711584381819852e-06, "loss": 0.0922, "step": 28420 }, { "epoch": 0.6262649633387871, "grad_norm": 0.8584667444229126, "learning_rate": 9.710582600401697e-06, "loss": 0.0943, "step": 28421 }, { "epoch": 0.6262869986283033, "grad_norm": 0.6303531527519226, "learning_rate": 9.709580845925382e-06, "loss": 0.0634, "step": 28422 }, { "epoch": 0.6263090339178193, "grad_norm": 0.660967230796814, "learning_rate": 9.70857911839601e-06, "loss": 0.0687, "step": 28423 }, { "epoch": 0.6263310692073355, "grad_norm": 0.21618561446666718, "learning_rate": 9.707577417818675e-06, "loss": 0.0571, "step": 28424 }, { "epoch": 0.6263531044968517, "grad_norm": 0.5877669453620911, "learning_rate": 9.706575744198492e-06, "loss": 0.0897, "step": 28425 }, { "epoch": 0.6263751397863678, "grad_norm": 0.9952393770217896, "learning_rate": 9.705574097540556e-06, "loss": 0.0597, "step": 28426 }, { "epoch": 0.626397175075884, "grad_norm": 0.5950127243995667, "learning_rate": 9.70457247784997e-06, "loss": 0.054, "step": 28427 }, { "epoch": 0.6264192103654002, "grad_norm": 0.5390651822090149, "learning_rate": 9.703570885131838e-06, "loss": 0.0902, "step": 28428 }, { "epoch": 0.6264412456549163, "grad_norm": 1.0459035634994507, "learning_rate": 9.702569319391262e-06, "loss": 0.1364, "step": 28429 }, { "epoch": 0.6264632809444325, "grad_norm": 0.49497073888778687, "learning_rate": 9.701567780633337e-06, "loss": 0.0734, "step": 28430 }, { "epoch": 0.6264853162339487, "grad_norm": 0.6896334886550903, "learning_rate": 9.700566268863165e-06, "loss": 0.082, "step": 28431 }, { "epoch": 0.6265073515234648, "grad_norm": 0.6558977365493774, "learning_rate": 9.699564784085856e-06, "loss": 0.0975, "step": 28432 }, { "epoch": 0.626529386812981, "grad_norm": 1.0685378313064575, "learning_rate": 9.698563326306506e-06, "loss": 0.1103, "step": 28433 }, { "epoch": 0.6265514221024971, "grad_norm": 0.47689342498779297, "learning_rate": 9.697561895530215e-06, "loss": 0.0621, "step": 28434 }, { "epoch": 0.6265734573920133, "grad_norm": 0.5165132284164429, "learning_rate": 9.696560491762083e-06, "loss": 0.0778, "step": 28435 }, { "epoch": 0.6265954926815295, "grad_norm": 0.5529267191886902, "learning_rate": 9.695559115007215e-06, "loss": 0.0665, "step": 28436 }, { "epoch": 0.6266175279710456, "grad_norm": 0.728291928768158, "learning_rate": 9.694557765270708e-06, "loss": 0.0772, "step": 28437 }, { "epoch": 0.6266395632605618, "grad_norm": 0.6202312111854553, "learning_rate": 9.693556442557664e-06, "loss": 0.079, "step": 28438 }, { "epoch": 0.626661598550078, "grad_norm": 0.7303644418716431, "learning_rate": 9.692555146873176e-06, "loss": 0.0522, "step": 28439 }, { "epoch": 0.6266836338395941, "grad_norm": 0.5144819617271423, "learning_rate": 9.691553878222359e-06, "loss": 0.0722, "step": 28440 }, { "epoch": 0.6267056691291103, "grad_norm": 0.38614293932914734, "learning_rate": 9.690552636610302e-06, "loss": 0.0732, "step": 28441 }, { "epoch": 0.6267277044186265, "grad_norm": 0.8868459463119507, "learning_rate": 9.689551422042108e-06, "loss": 0.0918, "step": 28442 }, { "epoch": 0.6267497397081426, "grad_norm": 0.6198542714118958, "learning_rate": 9.688550234522873e-06, "loss": 0.045, "step": 28443 }, { "epoch": 0.6267717749976588, "grad_norm": 0.3068044185638428, "learning_rate": 9.687549074057706e-06, "loss": 0.0566, "step": 28444 }, { "epoch": 0.626793810287175, "grad_norm": 0.3490036129951477, "learning_rate": 9.686547940651697e-06, "loss": 0.0417, "step": 28445 }, { "epoch": 0.6268158455766911, "grad_norm": 0.8276517987251282, "learning_rate": 9.68554683430995e-06, "loss": 0.1194, "step": 28446 }, { "epoch": 0.6268378808662072, "grad_norm": 0.5548545718193054, "learning_rate": 9.684545755037554e-06, "loss": 0.0592, "step": 28447 }, { "epoch": 0.6268599161557233, "grad_norm": 0.8944973945617676, "learning_rate": 9.683544702839627e-06, "loss": 0.0811, "step": 28448 }, { "epoch": 0.6268819514452395, "grad_norm": 0.5254254937171936, "learning_rate": 9.682543677721256e-06, "loss": 0.0699, "step": 28449 }, { "epoch": 0.6269039867347557, "grad_norm": 0.46179598569869995, "learning_rate": 9.681542679687538e-06, "loss": 0.0536, "step": 28450 }, { "epoch": 0.6269260220242718, "grad_norm": 0.5661124587059021, "learning_rate": 9.680541708743579e-06, "loss": 0.0717, "step": 28451 }, { "epoch": 0.626948057313788, "grad_norm": 0.6593547463417053, "learning_rate": 9.679540764894474e-06, "loss": 0.1397, "step": 28452 }, { "epoch": 0.6269700926033042, "grad_norm": 0.37175825238227844, "learning_rate": 9.67853984814532e-06, "loss": 0.0602, "step": 28453 }, { "epoch": 0.6269921278928203, "grad_norm": 0.6890600323677063, "learning_rate": 9.67753895850121e-06, "loss": 0.0656, "step": 28454 }, { "epoch": 0.6270141631823365, "grad_norm": 0.5709977149963379, "learning_rate": 9.676538095967254e-06, "loss": 0.0644, "step": 28455 }, { "epoch": 0.6270361984718527, "grad_norm": 0.6741833686828613, "learning_rate": 9.675537260548548e-06, "loss": 0.0615, "step": 28456 }, { "epoch": 0.6270582337613688, "grad_norm": 0.7482651472091675, "learning_rate": 9.674536452250181e-06, "loss": 0.0662, "step": 28457 }, { "epoch": 0.627080269050885, "grad_norm": 0.5069732069969177, "learning_rate": 9.673535671077252e-06, "loss": 0.055, "step": 28458 }, { "epoch": 0.6271023043404012, "grad_norm": 0.8638057708740234, "learning_rate": 9.67253491703487e-06, "loss": 0.1075, "step": 28459 }, { "epoch": 0.6271243396299173, "grad_norm": 0.6911851763725281, "learning_rate": 9.67153419012812e-06, "loss": 0.0569, "step": 28460 }, { "epoch": 0.6271463749194335, "grad_norm": 0.3441036641597748, "learning_rate": 9.670533490362108e-06, "loss": 0.0726, "step": 28461 }, { "epoch": 0.6271684102089496, "grad_norm": 0.6669664978981018, "learning_rate": 9.669532817741916e-06, "loss": 0.0837, "step": 28462 }, { "epoch": 0.6271904454984658, "grad_norm": 0.5854548811912537, "learning_rate": 9.66853217227266e-06, "loss": 0.0802, "step": 28463 }, { "epoch": 0.627212480787982, "grad_norm": 0.530055582523346, "learning_rate": 9.66753155395943e-06, "loss": 0.0691, "step": 28464 }, { "epoch": 0.6272345160774981, "grad_norm": 0.4828106760978699, "learning_rate": 9.66653096280732e-06, "loss": 0.0584, "step": 28465 }, { "epoch": 0.6272565513670143, "grad_norm": 0.6087651252746582, "learning_rate": 9.665530398821424e-06, "loss": 0.0764, "step": 28466 }, { "epoch": 0.6272785866565305, "grad_norm": 0.6460369229316711, "learning_rate": 9.664529862006848e-06, "loss": 0.0688, "step": 28467 }, { "epoch": 0.6273006219460466, "grad_norm": 0.7336316704750061, "learning_rate": 9.66352935236868e-06, "loss": 0.0454, "step": 28468 }, { "epoch": 0.6273226572355628, "grad_norm": 0.3293171226978302, "learning_rate": 9.66252886991202e-06, "loss": 0.0421, "step": 28469 }, { "epoch": 0.627344692525079, "grad_norm": 0.8345069885253906, "learning_rate": 9.661528414641954e-06, "loss": 0.0831, "step": 28470 }, { "epoch": 0.6273667278145951, "grad_norm": 0.707209587097168, "learning_rate": 9.660527986563596e-06, "loss": 0.0516, "step": 28471 }, { "epoch": 0.6273887631041112, "grad_norm": 0.7710392475128174, "learning_rate": 9.659527585682031e-06, "loss": 0.0688, "step": 28472 }, { "epoch": 0.6274107983936273, "grad_norm": 0.6336825489997864, "learning_rate": 9.65852721200235e-06, "loss": 0.0747, "step": 28473 }, { "epoch": 0.6274328336831435, "grad_norm": 0.5656847953796387, "learning_rate": 9.657526865529661e-06, "loss": 0.08, "step": 28474 }, { "epoch": 0.6274548689726597, "grad_norm": 0.7412046790122986, "learning_rate": 9.656526546269052e-06, "loss": 0.0473, "step": 28475 }, { "epoch": 0.6274769042621758, "grad_norm": 0.8728975057601929, "learning_rate": 9.655526254225618e-06, "loss": 0.0835, "step": 28476 }, { "epoch": 0.627498939551692, "grad_norm": 0.6708171367645264, "learning_rate": 9.654525989404448e-06, "loss": 0.0966, "step": 28477 }, { "epoch": 0.6275209748412082, "grad_norm": 0.6191352009773254, "learning_rate": 9.65352575181065e-06, "loss": 0.0537, "step": 28478 }, { "epoch": 0.6275430101307243, "grad_norm": 0.4279232323169708, "learning_rate": 9.652525541449314e-06, "loss": 0.0649, "step": 28479 }, { "epoch": 0.6275650454202405, "grad_norm": 0.648085355758667, "learning_rate": 9.65152535832553e-06, "loss": 0.0585, "step": 28480 }, { "epoch": 0.6275870807097567, "grad_norm": 0.5263855457305908, "learning_rate": 9.650525202444395e-06, "loss": 0.0782, "step": 28481 }, { "epoch": 0.6276091159992728, "grad_norm": 0.5400418639183044, "learning_rate": 9.649525073811006e-06, "loss": 0.0662, "step": 28482 }, { "epoch": 0.627631151288789, "grad_norm": 0.7561823129653931, "learning_rate": 9.648524972430454e-06, "loss": 0.0732, "step": 28483 }, { "epoch": 0.6276531865783052, "grad_norm": 0.5941981077194214, "learning_rate": 9.647524898307835e-06, "loss": 0.0694, "step": 28484 }, { "epoch": 0.6276752218678213, "grad_norm": 0.6311002969741821, "learning_rate": 9.646524851448236e-06, "loss": 0.0882, "step": 28485 }, { "epoch": 0.6276972571573375, "grad_norm": 0.6035071611404419, "learning_rate": 9.645524831856761e-06, "loss": 0.0628, "step": 28486 }, { "epoch": 0.6277192924468536, "grad_norm": 0.5467849969863892, "learning_rate": 9.644524839538501e-06, "loss": 0.0832, "step": 28487 }, { "epoch": 0.6277413277363698, "grad_norm": 0.44510629773139954, "learning_rate": 9.643524874498549e-06, "loss": 0.0613, "step": 28488 }, { "epoch": 0.627763363025886, "grad_norm": 0.6976636648178101, "learning_rate": 9.642524936741988e-06, "loss": 0.0743, "step": 28489 }, { "epoch": 0.6277853983154021, "grad_norm": 0.4086267650127411, "learning_rate": 9.641525026273931e-06, "loss": 0.0508, "step": 28490 }, { "epoch": 0.6278074336049183, "grad_norm": 0.5376019477844238, "learning_rate": 9.640525143099459e-06, "loss": 0.0619, "step": 28491 }, { "epoch": 0.6278294688944345, "grad_norm": 0.7283265590667725, "learning_rate": 9.63952528722366e-06, "loss": 0.0451, "step": 28492 }, { "epoch": 0.6278515041839506, "grad_norm": 0.4314379394054413, "learning_rate": 9.638525458651637e-06, "loss": 0.0435, "step": 28493 }, { "epoch": 0.6278735394734668, "grad_norm": 0.4688006639480591, "learning_rate": 9.63752565738848e-06, "loss": 0.0638, "step": 28494 }, { "epoch": 0.627895574762983, "grad_norm": 1.1247456073760986, "learning_rate": 9.63652588343928e-06, "loss": 0.0836, "step": 28495 }, { "epoch": 0.6279176100524991, "grad_norm": 0.579052746295929, "learning_rate": 9.635526136809125e-06, "loss": 0.0716, "step": 28496 }, { "epoch": 0.6279396453420152, "grad_norm": 0.34406203031539917, "learning_rate": 9.634526417503118e-06, "loss": 0.0646, "step": 28497 }, { "epoch": 0.6279616806315313, "grad_norm": 0.6830251216888428, "learning_rate": 9.633526725526343e-06, "loss": 0.0875, "step": 28498 }, { "epoch": 0.6279837159210475, "grad_norm": 0.4645766317844391, "learning_rate": 9.632527060883895e-06, "loss": 0.0611, "step": 28499 }, { "epoch": 0.6280057512105637, "grad_norm": 0.4362412691116333, "learning_rate": 9.631527423580865e-06, "loss": 0.0539, "step": 28500 }, { "epoch": 0.6280277865000798, "grad_norm": 1.0881829261779785, "learning_rate": 9.630527813622343e-06, "loss": 0.106, "step": 28501 }, { "epoch": 0.628049821789596, "grad_norm": 0.8006959557533264, "learning_rate": 9.629528231013427e-06, "loss": 0.0958, "step": 28502 }, { "epoch": 0.6280718570791122, "grad_norm": 0.557784914970398, "learning_rate": 9.6285286757592e-06, "loss": 0.0398, "step": 28503 }, { "epoch": 0.6280938923686283, "grad_norm": 0.4083236753940582, "learning_rate": 9.627529147864751e-06, "loss": 0.0671, "step": 28504 }, { "epoch": 0.6281159276581445, "grad_norm": 0.6545677185058594, "learning_rate": 9.626529647335183e-06, "loss": 0.0744, "step": 28505 }, { "epoch": 0.6281379629476607, "grad_norm": 0.6234292387962341, "learning_rate": 9.625530174175583e-06, "loss": 0.0731, "step": 28506 }, { "epoch": 0.6281599982371768, "grad_norm": 0.7123268842697144, "learning_rate": 9.624530728391037e-06, "loss": 0.0864, "step": 28507 }, { "epoch": 0.628182033526693, "grad_norm": 0.7342138886451721, "learning_rate": 9.62353130998664e-06, "loss": 0.0545, "step": 28508 }, { "epoch": 0.6282040688162092, "grad_norm": 0.5187153220176697, "learning_rate": 9.62253191896748e-06, "loss": 0.0795, "step": 28509 }, { "epoch": 0.6282261041057253, "grad_norm": 0.7977135181427002, "learning_rate": 9.621532555338651e-06, "loss": 0.0634, "step": 28510 }, { "epoch": 0.6282481393952415, "grad_norm": 0.5766661763191223, "learning_rate": 9.62053321910524e-06, "loss": 0.0827, "step": 28511 }, { "epoch": 0.6282701746847577, "grad_norm": 0.9582946300506592, "learning_rate": 9.61953391027233e-06, "loss": 0.0835, "step": 28512 }, { "epoch": 0.6282922099742738, "grad_norm": 0.9691402912139893, "learning_rate": 9.618534628845028e-06, "loss": 0.1021, "step": 28513 }, { "epoch": 0.62831424526379, "grad_norm": 0.8908339738845825, "learning_rate": 9.617535374828413e-06, "loss": 0.0833, "step": 28514 }, { "epoch": 0.6283362805533061, "grad_norm": 0.4844323992729187, "learning_rate": 9.616536148227574e-06, "loss": 0.0615, "step": 28515 }, { "epoch": 0.6283583158428223, "grad_norm": 0.904493510723114, "learning_rate": 9.615536949047606e-06, "loss": 0.095, "step": 28516 }, { "epoch": 0.6283803511323385, "grad_norm": 0.7621241211891174, "learning_rate": 9.614537777293598e-06, "loss": 0.1019, "step": 28517 }, { "epoch": 0.6284023864218546, "grad_norm": 0.6179270148277283, "learning_rate": 9.613538632970634e-06, "loss": 0.0736, "step": 28518 }, { "epoch": 0.6284244217113708, "grad_norm": 0.6646213531494141, "learning_rate": 9.612539516083803e-06, "loss": 0.0733, "step": 28519 }, { "epoch": 0.628446457000887, "grad_norm": 0.48732322454452515, "learning_rate": 9.6115404266382e-06, "loss": 0.0544, "step": 28520 }, { "epoch": 0.6284684922904031, "grad_norm": 0.48025262355804443, "learning_rate": 9.610541364638915e-06, "loss": 0.0627, "step": 28521 }, { "epoch": 0.6284905275799192, "grad_norm": 0.4969489574432373, "learning_rate": 9.60954233009103e-06, "loss": 0.0704, "step": 28522 }, { "epoch": 0.6285125628694354, "grad_norm": 0.7302926182746887, "learning_rate": 9.608543322999633e-06, "loss": 0.0672, "step": 28523 }, { "epoch": 0.6285345981589515, "grad_norm": 0.2896801233291626, "learning_rate": 9.607544343369821e-06, "loss": 0.062, "step": 28524 }, { "epoch": 0.6285566334484677, "grad_norm": 0.4453813135623932, "learning_rate": 9.606545391206678e-06, "loss": 0.0616, "step": 28525 }, { "epoch": 0.6285786687379838, "grad_norm": 0.3960232138633728, "learning_rate": 9.60554646651529e-06, "loss": 0.0622, "step": 28526 }, { "epoch": 0.6286007040275, "grad_norm": 0.41567716002464294, "learning_rate": 9.60454756930074e-06, "loss": 0.0732, "step": 28527 }, { "epoch": 0.6286227393170162, "grad_norm": 0.4530395269393921, "learning_rate": 9.60354869956813e-06, "loss": 0.059, "step": 28528 }, { "epoch": 0.6286447746065323, "grad_norm": 0.3750220239162445, "learning_rate": 9.602549857322538e-06, "loss": 0.0378, "step": 28529 }, { "epoch": 0.6286668098960485, "grad_norm": 0.7437469959259033, "learning_rate": 9.601551042569056e-06, "loss": 0.0803, "step": 28530 }, { "epoch": 0.6286888451855647, "grad_norm": 0.4884951114654541, "learning_rate": 9.600552255312765e-06, "loss": 0.084, "step": 28531 }, { "epoch": 0.6287108804750808, "grad_norm": 0.3347514867782593, "learning_rate": 9.599553495558762e-06, "loss": 0.0432, "step": 28532 }, { "epoch": 0.628732915764597, "grad_norm": 0.9339747428894043, "learning_rate": 9.598554763312127e-06, "loss": 0.1024, "step": 28533 }, { "epoch": 0.6287549510541132, "grad_norm": 0.4937501549720764, "learning_rate": 9.597556058577948e-06, "loss": 0.0766, "step": 28534 }, { "epoch": 0.6287769863436293, "grad_norm": 0.4978227913379669, "learning_rate": 9.59655738136131e-06, "loss": 0.062, "step": 28535 }, { "epoch": 0.6287990216331455, "grad_norm": 1.024762511253357, "learning_rate": 9.595558731667307e-06, "loss": 0.0983, "step": 28536 }, { "epoch": 0.6288210569226617, "grad_norm": 0.5031881928443909, "learning_rate": 9.594560109501023e-06, "loss": 0.0548, "step": 28537 }, { "epoch": 0.6288430922121778, "grad_norm": 0.39887523651123047, "learning_rate": 9.59356151486754e-06, "loss": 0.0614, "step": 28538 }, { "epoch": 0.628865127501694, "grad_norm": 0.5178136825561523, "learning_rate": 9.592562947771948e-06, "loss": 0.0747, "step": 28539 }, { "epoch": 0.6288871627912102, "grad_norm": 0.535135805606842, "learning_rate": 9.591564408219334e-06, "loss": 0.0424, "step": 28540 }, { "epoch": 0.6289091980807263, "grad_norm": 1.0577880144119263, "learning_rate": 9.59056589621478e-06, "loss": 0.0713, "step": 28541 }, { "epoch": 0.6289312333702425, "grad_norm": 0.6146073341369629, "learning_rate": 9.589567411763375e-06, "loss": 0.0636, "step": 28542 }, { "epoch": 0.6289532686597586, "grad_norm": 0.5179976224899292, "learning_rate": 9.588568954870205e-06, "loss": 0.0581, "step": 28543 }, { "epoch": 0.6289753039492748, "grad_norm": 0.620376706123352, "learning_rate": 9.587570525540358e-06, "loss": 0.0568, "step": 28544 }, { "epoch": 0.628997339238791, "grad_norm": 0.4844074845314026, "learning_rate": 9.586572123778914e-06, "loss": 0.0707, "step": 28545 }, { "epoch": 0.629019374528307, "grad_norm": 0.3449406623840332, "learning_rate": 9.585573749590961e-06, "loss": 0.084, "step": 28546 }, { "epoch": 0.6290414098178232, "grad_norm": 0.5299129486083984, "learning_rate": 9.584575402981586e-06, "loss": 0.0511, "step": 28547 }, { "epoch": 0.6290634451073394, "grad_norm": 0.584281861782074, "learning_rate": 9.583577083955872e-06, "loss": 0.0788, "step": 28548 }, { "epoch": 0.6290854803968555, "grad_norm": 0.7006945610046387, "learning_rate": 9.582578792518905e-06, "loss": 0.0619, "step": 28549 }, { "epoch": 0.6291075156863717, "grad_norm": 0.6515779495239258, "learning_rate": 9.581580528675762e-06, "loss": 0.0577, "step": 28550 }, { "epoch": 0.6291295509758879, "grad_norm": 0.7527509927749634, "learning_rate": 9.580582292431544e-06, "loss": 0.104, "step": 28551 }, { "epoch": 0.629151586265404, "grad_norm": 0.6646721959114075, "learning_rate": 9.579584083791323e-06, "loss": 0.0558, "step": 28552 }, { "epoch": 0.6291736215549202, "grad_norm": 0.8272057771682739, "learning_rate": 9.57858590276019e-06, "loss": 0.0787, "step": 28553 }, { "epoch": 0.6291956568444363, "grad_norm": 0.6238859295845032, "learning_rate": 9.577587749343222e-06, "loss": 0.0678, "step": 28554 }, { "epoch": 0.6292176921339525, "grad_norm": 0.8429195880889893, "learning_rate": 9.57658962354551e-06, "loss": 0.0638, "step": 28555 }, { "epoch": 0.6292397274234687, "grad_norm": 0.339977502822876, "learning_rate": 9.575591525372134e-06, "loss": 0.0506, "step": 28556 }, { "epoch": 0.6292617627129848, "grad_norm": 0.6413233280181885, "learning_rate": 9.57459345482818e-06, "loss": 0.0834, "step": 28557 }, { "epoch": 0.629283798002501, "grad_norm": 0.6878997087478638, "learning_rate": 9.573595411918728e-06, "loss": 0.0719, "step": 28558 }, { "epoch": 0.6293058332920172, "grad_norm": 0.799411952495575, "learning_rate": 9.572597396648867e-06, "loss": 0.0711, "step": 28559 }, { "epoch": 0.6293278685815333, "grad_norm": 0.8091575503349304, "learning_rate": 9.571599409023678e-06, "loss": 0.0985, "step": 28560 }, { "epoch": 0.6293499038710495, "grad_norm": 0.6683472990989685, "learning_rate": 9.570601449048241e-06, "loss": 0.0831, "step": 28561 }, { "epoch": 0.6293719391605657, "grad_norm": 0.5222938060760498, "learning_rate": 9.569603516727647e-06, "loss": 0.0478, "step": 28562 }, { "epoch": 0.6293939744500818, "grad_norm": 0.483058899641037, "learning_rate": 9.568605612066974e-06, "loss": 0.0749, "step": 28563 }, { "epoch": 0.629416009739598, "grad_norm": 0.4406452775001526, "learning_rate": 9.567607735071303e-06, "loss": 0.0405, "step": 28564 }, { "epoch": 0.6294380450291142, "grad_norm": 0.5363232493400574, "learning_rate": 9.566609885745714e-06, "loss": 0.0821, "step": 28565 }, { "epoch": 0.6294600803186303, "grad_norm": 0.5506299138069153, "learning_rate": 9.565612064095303e-06, "loss": 0.0579, "step": 28566 }, { "epoch": 0.6294821156081465, "grad_norm": 0.47845128178596497, "learning_rate": 9.56461427012514e-06, "loss": 0.047, "step": 28567 }, { "epoch": 0.6295041508976627, "grad_norm": 0.5499203205108643, "learning_rate": 9.563616503840315e-06, "loss": 0.0589, "step": 28568 }, { "epoch": 0.6295261861871788, "grad_norm": 0.16122956573963165, "learning_rate": 9.5626187652459e-06, "loss": 0.0286, "step": 28569 }, { "epoch": 0.629548221476695, "grad_norm": 0.7163575887680054, "learning_rate": 9.56162105434699e-06, "loss": 0.0668, "step": 28570 }, { "epoch": 0.629570256766211, "grad_norm": 0.5171126127243042, "learning_rate": 9.560623371148657e-06, "loss": 0.0893, "step": 28571 }, { "epoch": 0.6295922920557272, "grad_norm": 0.5506144165992737, "learning_rate": 9.559625715655986e-06, "loss": 0.0635, "step": 28572 }, { "epoch": 0.6296143273452434, "grad_norm": 0.3124551475048065, "learning_rate": 9.558628087874054e-06, "loss": 0.0448, "step": 28573 }, { "epoch": 0.6296363626347595, "grad_norm": 0.5761705636978149, "learning_rate": 9.557630487807952e-06, "loss": 0.0517, "step": 28574 }, { "epoch": 0.6296583979242757, "grad_norm": 0.9356751441955566, "learning_rate": 9.556632915462757e-06, "loss": 0.0866, "step": 28575 }, { "epoch": 0.6296804332137919, "grad_norm": 0.5354818105697632, "learning_rate": 9.55563537084355e-06, "loss": 0.0445, "step": 28576 }, { "epoch": 0.629702468503308, "grad_norm": 0.7904765009880066, "learning_rate": 9.554637853955408e-06, "loss": 0.095, "step": 28577 }, { "epoch": 0.6297245037928242, "grad_norm": 0.7588754892349243, "learning_rate": 9.55364036480342e-06, "loss": 0.0617, "step": 28578 }, { "epoch": 0.6297465390823404, "grad_norm": 0.47154501080513, "learning_rate": 9.55264290339266e-06, "loss": 0.0671, "step": 28579 }, { "epoch": 0.6297685743718565, "grad_norm": 0.7360115647315979, "learning_rate": 9.551645469728211e-06, "loss": 0.0526, "step": 28580 }, { "epoch": 0.6297906096613727, "grad_norm": 0.5237723588943481, "learning_rate": 9.55064806381515e-06, "loss": 0.0847, "step": 28581 }, { "epoch": 0.6298126449508888, "grad_norm": 0.7716083526611328, "learning_rate": 9.549650685658562e-06, "loss": 0.0588, "step": 28582 }, { "epoch": 0.629834680240405, "grad_norm": 0.5106964707374573, "learning_rate": 9.548653335263528e-06, "loss": 0.0695, "step": 28583 }, { "epoch": 0.6298567155299212, "grad_norm": 0.9449944496154785, "learning_rate": 9.547656012635124e-06, "loss": 0.0755, "step": 28584 }, { "epoch": 0.6298787508194373, "grad_norm": 0.3932661712169647, "learning_rate": 9.546658717778435e-06, "loss": 0.0834, "step": 28585 }, { "epoch": 0.6299007861089535, "grad_norm": 0.7027552127838135, "learning_rate": 9.545661450698536e-06, "loss": 0.1128, "step": 28586 }, { "epoch": 0.6299228213984697, "grad_norm": 0.6491318345069885, "learning_rate": 9.544664211400508e-06, "loss": 0.0681, "step": 28587 }, { "epoch": 0.6299448566879858, "grad_norm": 0.3721587657928467, "learning_rate": 9.543666999889424e-06, "loss": 0.0708, "step": 28588 }, { "epoch": 0.629966891977502, "grad_norm": 0.7577312588691711, "learning_rate": 9.542669816170378e-06, "loss": 0.0814, "step": 28589 }, { "epoch": 0.6299889272670182, "grad_norm": 0.7073965072631836, "learning_rate": 9.541672660248437e-06, "loss": 0.0527, "step": 28590 }, { "epoch": 0.6300109625565343, "grad_norm": 0.5999566912651062, "learning_rate": 9.540675532128688e-06, "loss": 0.077, "step": 28591 }, { "epoch": 0.6300329978460505, "grad_norm": 0.7573804259300232, "learning_rate": 9.539678431816201e-06, "loss": 0.0529, "step": 28592 }, { "epoch": 0.6300550331355667, "grad_norm": 0.4319053888320923, "learning_rate": 9.538681359316064e-06, "loss": 0.0747, "step": 28593 }, { "epoch": 0.6300770684250828, "grad_norm": 0.8088918328285217, "learning_rate": 9.537684314633351e-06, "loss": 0.0847, "step": 28594 }, { "epoch": 0.630099103714599, "grad_norm": 1.0225918292999268, "learning_rate": 9.536687297773143e-06, "loss": 0.0886, "step": 28595 }, { "epoch": 0.630121139004115, "grad_norm": 0.3541446030139923, "learning_rate": 9.535690308740508e-06, "loss": 0.0859, "step": 28596 }, { "epoch": 0.6301431742936312, "grad_norm": 0.7014816999435425, "learning_rate": 9.534693347540537e-06, "loss": 0.0769, "step": 28597 }, { "epoch": 0.6301652095831474, "grad_norm": 0.5956636667251587, "learning_rate": 9.533696414178307e-06, "loss": 0.0584, "step": 28598 }, { "epoch": 0.6301872448726635, "grad_norm": 0.8476608991622925, "learning_rate": 9.53269950865889e-06, "loss": 0.0878, "step": 28599 }, { "epoch": 0.6302092801621797, "grad_norm": 0.8707630038261414, "learning_rate": 9.531702630987361e-06, "loss": 0.0651, "step": 28600 }, { "epoch": 0.6302313154516959, "grad_norm": 0.7277440428733826, "learning_rate": 9.530705781168811e-06, "loss": 0.0676, "step": 28601 }, { "epoch": 0.630253350741212, "grad_norm": 0.777949869632721, "learning_rate": 9.529708959208309e-06, "loss": 0.0562, "step": 28602 }, { "epoch": 0.6302753860307282, "grad_norm": 0.6532401442527771, "learning_rate": 9.52871216511093e-06, "loss": 0.0572, "step": 28603 }, { "epoch": 0.6302974213202444, "grad_norm": 0.7028501033782959, "learning_rate": 9.527715398881748e-06, "loss": 0.0667, "step": 28604 }, { "epoch": 0.6303194566097605, "grad_norm": 0.8210911750793457, "learning_rate": 9.526718660525853e-06, "loss": 0.0875, "step": 28605 }, { "epoch": 0.6303414918992767, "grad_norm": 0.6200248599052429, "learning_rate": 9.525721950048313e-06, "loss": 0.0852, "step": 28606 }, { "epoch": 0.6303635271887928, "grad_norm": 0.607881486415863, "learning_rate": 9.524725267454204e-06, "loss": 0.0603, "step": 28607 }, { "epoch": 0.630385562478309, "grad_norm": 0.33504512906074524, "learning_rate": 9.52372861274861e-06, "loss": 0.0513, "step": 28608 }, { "epoch": 0.6304075977678252, "grad_norm": 0.37646937370300293, "learning_rate": 9.5227319859366e-06, "loss": 0.0838, "step": 28609 }, { "epoch": 0.6304296330573413, "grad_norm": 0.4159776270389557, "learning_rate": 9.521735387023257e-06, "loss": 0.0446, "step": 28610 }, { "epoch": 0.6304516683468575, "grad_norm": 0.6864007115364075, "learning_rate": 9.52073881601365e-06, "loss": 0.088, "step": 28611 }, { "epoch": 0.6304737036363737, "grad_norm": 0.5880887508392334, "learning_rate": 9.519742272912858e-06, "loss": 0.0679, "step": 28612 }, { "epoch": 0.6304957389258898, "grad_norm": 0.5774903297424316, "learning_rate": 9.51874575772596e-06, "loss": 0.0817, "step": 28613 }, { "epoch": 0.630517774215406, "grad_norm": 0.41775888204574585, "learning_rate": 9.517749270458028e-06, "loss": 0.054, "step": 28614 }, { "epoch": 0.6305398095049222, "grad_norm": 0.630747377872467, "learning_rate": 9.516752811114133e-06, "loss": 0.0611, "step": 28615 }, { "epoch": 0.6305618447944383, "grad_norm": 0.5323003530502319, "learning_rate": 9.515756379699362e-06, "loss": 0.0678, "step": 28616 }, { "epoch": 0.6305838800839545, "grad_norm": 0.5238466858863831, "learning_rate": 9.514759976218786e-06, "loss": 0.0644, "step": 28617 }, { "epoch": 0.6306059153734707, "grad_norm": 0.6025353074073792, "learning_rate": 9.513763600677478e-06, "loss": 0.0778, "step": 28618 }, { "epoch": 0.6306279506629868, "grad_norm": 0.7024614214897156, "learning_rate": 9.512767253080512e-06, "loss": 0.0808, "step": 28619 }, { "epoch": 0.6306499859525029, "grad_norm": 0.4198058247566223, "learning_rate": 9.511770933432966e-06, "loss": 0.048, "step": 28620 }, { "epoch": 0.630672021242019, "grad_norm": 0.35408979654312134, "learning_rate": 9.510774641739916e-06, "loss": 0.0602, "step": 28621 }, { "epoch": 0.6306940565315352, "grad_norm": 0.6239506602287292, "learning_rate": 9.509778378006432e-06, "loss": 0.0965, "step": 28622 }, { "epoch": 0.6307160918210514, "grad_norm": 0.8081855177879333, "learning_rate": 9.508782142237586e-06, "loss": 0.107, "step": 28623 }, { "epoch": 0.6307381271105675, "grad_norm": 0.589801013469696, "learning_rate": 9.507785934438461e-06, "loss": 0.0849, "step": 28624 }, { "epoch": 0.6307601624000837, "grad_norm": 0.8907326459884644, "learning_rate": 9.506789754614131e-06, "loss": 0.105, "step": 28625 }, { "epoch": 0.6307821976895999, "grad_norm": 0.8759546279907227, "learning_rate": 9.505793602769662e-06, "loss": 0.0874, "step": 28626 }, { "epoch": 0.630804232979116, "grad_norm": 0.7075084447860718, "learning_rate": 9.504797478910134e-06, "loss": 0.0752, "step": 28627 }, { "epoch": 0.6308262682686322, "grad_norm": 0.6751074194908142, "learning_rate": 9.50380138304062e-06, "loss": 0.06, "step": 28628 }, { "epoch": 0.6308483035581484, "grad_norm": 0.5911741852760315, "learning_rate": 9.502805315166196e-06, "loss": 0.0812, "step": 28629 }, { "epoch": 0.6308703388476645, "grad_norm": 0.3832422196865082, "learning_rate": 9.501809275291922e-06, "loss": 0.0628, "step": 28630 }, { "epoch": 0.6308923741371807, "grad_norm": 0.8177272081375122, "learning_rate": 9.500813263422889e-06, "loss": 0.07, "step": 28631 }, { "epoch": 0.6309144094266969, "grad_norm": 0.5945386290550232, "learning_rate": 9.499817279564163e-06, "loss": 0.0682, "step": 28632 }, { "epoch": 0.630936444716213, "grad_norm": 0.21357502043247223, "learning_rate": 9.498821323720817e-06, "loss": 0.0852, "step": 28633 }, { "epoch": 0.6309584800057292, "grad_norm": 0.9425374865531921, "learning_rate": 9.497825395897922e-06, "loss": 0.125, "step": 28634 }, { "epoch": 0.6309805152952453, "grad_norm": 0.5606797933578491, "learning_rate": 9.496829496100554e-06, "loss": 0.0794, "step": 28635 }, { "epoch": 0.6310025505847615, "grad_norm": 0.49289876222610474, "learning_rate": 9.495833624333786e-06, "loss": 0.0723, "step": 28636 }, { "epoch": 0.6310245858742777, "grad_norm": 0.5774416327476501, "learning_rate": 9.494837780602688e-06, "loss": 0.0562, "step": 28637 }, { "epoch": 0.6310466211637938, "grad_norm": 0.520252525806427, "learning_rate": 9.493841964912328e-06, "loss": 0.0506, "step": 28638 }, { "epoch": 0.63106865645331, "grad_norm": 0.44813787937164307, "learning_rate": 9.49284617726779e-06, "loss": 0.0635, "step": 28639 }, { "epoch": 0.6310906917428262, "grad_norm": 0.41434618830680847, "learning_rate": 9.491850417674138e-06, "loss": 0.0571, "step": 28640 }, { "epoch": 0.6311127270323423, "grad_norm": 0.5364694595336914, "learning_rate": 9.490854686136445e-06, "loss": 0.0728, "step": 28641 }, { "epoch": 0.6311347623218585, "grad_norm": 0.5939018726348877, "learning_rate": 9.489858982659782e-06, "loss": 0.0507, "step": 28642 }, { "epoch": 0.6311567976113747, "grad_norm": 0.5514753460884094, "learning_rate": 9.488863307249227e-06, "loss": 0.0587, "step": 28643 }, { "epoch": 0.6311788329008908, "grad_norm": 0.6711148023605347, "learning_rate": 9.487867659909841e-06, "loss": 0.0596, "step": 28644 }, { "epoch": 0.6312008681904069, "grad_norm": 0.836162269115448, "learning_rate": 9.486872040646705e-06, "loss": 0.0731, "step": 28645 }, { "epoch": 0.631222903479923, "grad_norm": 0.7369115352630615, "learning_rate": 9.485876449464878e-06, "loss": 0.0975, "step": 28646 }, { "epoch": 0.6312449387694392, "grad_norm": 0.7500380277633667, "learning_rate": 9.484880886369445e-06, "loss": 0.0667, "step": 28647 }, { "epoch": 0.6312669740589554, "grad_norm": 0.6061663627624512, "learning_rate": 9.483885351365475e-06, "loss": 0.0643, "step": 28648 }, { "epoch": 0.6312890093484715, "grad_norm": 0.5520516633987427, "learning_rate": 9.482889844458028e-06, "loss": 0.0751, "step": 28649 }, { "epoch": 0.6313110446379877, "grad_norm": 0.2538762092590332, "learning_rate": 9.481894365652185e-06, "loss": 0.0403, "step": 28650 }, { "epoch": 0.6313330799275039, "grad_norm": 0.7306513786315918, "learning_rate": 9.480898914953016e-06, "loss": 0.0747, "step": 28651 }, { "epoch": 0.63135511521702, "grad_norm": 0.6040765047073364, "learning_rate": 9.479903492365585e-06, "loss": 0.0454, "step": 28652 }, { "epoch": 0.6313771505065362, "grad_norm": 0.8789158463478088, "learning_rate": 9.478908097894959e-06, "loss": 0.068, "step": 28653 }, { "epoch": 0.6313991857960524, "grad_norm": 0.7435877323150635, "learning_rate": 9.477912731546224e-06, "loss": 0.071, "step": 28654 }, { "epoch": 0.6314212210855685, "grad_norm": 0.5439354181289673, "learning_rate": 9.476917393324439e-06, "loss": 0.0679, "step": 28655 }, { "epoch": 0.6314432563750847, "grad_norm": 0.6797075867652893, "learning_rate": 9.475922083234675e-06, "loss": 0.0701, "step": 28656 }, { "epoch": 0.6314652916646009, "grad_norm": 0.44313523173332214, "learning_rate": 9.474926801281998e-06, "loss": 0.0758, "step": 28657 }, { "epoch": 0.631487326954117, "grad_norm": 0.5239609479904175, "learning_rate": 9.473931547471488e-06, "loss": 0.0618, "step": 28658 }, { "epoch": 0.6315093622436332, "grad_norm": 0.7518147230148315, "learning_rate": 9.472936321808207e-06, "loss": 0.0761, "step": 28659 }, { "epoch": 0.6315313975331494, "grad_norm": 0.48884665966033936, "learning_rate": 9.471941124297223e-06, "loss": 0.0449, "step": 28660 }, { "epoch": 0.6315534328226655, "grad_norm": 0.20833349227905273, "learning_rate": 9.470945954943602e-06, "loss": 0.0587, "step": 28661 }, { "epoch": 0.6315754681121817, "grad_norm": 0.6124933958053589, "learning_rate": 9.469950813752425e-06, "loss": 0.0658, "step": 28662 }, { "epoch": 0.6315975034016978, "grad_norm": 0.5731445550918579, "learning_rate": 9.468955700728752e-06, "loss": 0.0462, "step": 28663 }, { "epoch": 0.631619538691214, "grad_norm": 0.6226403713226318, "learning_rate": 9.467960615877656e-06, "loss": 0.0336, "step": 28664 }, { "epoch": 0.6316415739807302, "grad_norm": 0.9066391587257385, "learning_rate": 9.4669655592042e-06, "loss": 0.0975, "step": 28665 }, { "epoch": 0.6316636092702463, "grad_norm": 0.8611087799072266, "learning_rate": 9.465970530713457e-06, "loss": 0.1015, "step": 28666 }, { "epoch": 0.6316856445597625, "grad_norm": 0.41511622071266174, "learning_rate": 9.464975530410495e-06, "loss": 0.0538, "step": 28667 }, { "epoch": 0.6317076798492787, "grad_norm": 0.5599185824394226, "learning_rate": 9.46398055830038e-06, "loss": 0.0668, "step": 28668 }, { "epoch": 0.6317297151387948, "grad_norm": 0.5125434398651123, "learning_rate": 9.462985614388175e-06, "loss": 0.0725, "step": 28669 }, { "epoch": 0.6317517504283109, "grad_norm": 0.4479248523712158, "learning_rate": 9.461990698678958e-06, "loss": 0.0949, "step": 28670 }, { "epoch": 0.631773785717827, "grad_norm": 0.5998595356941223, "learning_rate": 9.460995811177795e-06, "loss": 0.0569, "step": 28671 }, { "epoch": 0.6317958210073432, "grad_norm": 0.29930365085601807, "learning_rate": 9.460000951889745e-06, "loss": 0.0637, "step": 28672 }, { "epoch": 0.6318178562968594, "grad_norm": 0.5981830358505249, "learning_rate": 9.459006120819885e-06, "loss": 0.0596, "step": 28673 }, { "epoch": 0.6318398915863755, "grad_norm": 0.5107643604278564, "learning_rate": 9.458011317973277e-06, "loss": 0.0781, "step": 28674 }, { "epoch": 0.6318619268758917, "grad_norm": 0.5668628215789795, "learning_rate": 9.45701654335499e-06, "loss": 0.0621, "step": 28675 }, { "epoch": 0.6318839621654079, "grad_norm": 0.5472388863563538, "learning_rate": 9.456021796970084e-06, "loss": 0.0606, "step": 28676 }, { "epoch": 0.631905997454924, "grad_norm": 0.6636098027229309, "learning_rate": 9.455027078823638e-06, "loss": 0.0946, "step": 28677 }, { "epoch": 0.6319280327444402, "grad_norm": 0.6092434525489807, "learning_rate": 9.454032388920714e-06, "loss": 0.0557, "step": 28678 }, { "epoch": 0.6319500680339564, "grad_norm": 0.8517777323722839, "learning_rate": 9.453037727266377e-06, "loss": 0.0692, "step": 28679 }, { "epoch": 0.6319721033234725, "grad_norm": 0.8793733716011047, "learning_rate": 9.452043093865687e-06, "loss": 0.074, "step": 28680 }, { "epoch": 0.6319941386129887, "grad_norm": 0.38068512082099915, "learning_rate": 9.45104848872372e-06, "loss": 0.0536, "step": 28681 }, { "epoch": 0.6320161739025049, "grad_norm": 0.8544293642044067, "learning_rate": 9.450053911845543e-06, "loss": 0.1161, "step": 28682 }, { "epoch": 0.632038209192021, "grad_norm": 0.8035117387771606, "learning_rate": 9.449059363236215e-06, "loss": 0.0612, "step": 28683 }, { "epoch": 0.6320602444815372, "grad_norm": 0.8630263209342957, "learning_rate": 9.448064842900797e-06, "loss": 0.0903, "step": 28684 }, { "epoch": 0.6320822797710534, "grad_norm": 0.39847978949546814, "learning_rate": 9.44707035084437e-06, "loss": 0.0465, "step": 28685 }, { "epoch": 0.6321043150605695, "grad_norm": 0.8287862539291382, "learning_rate": 9.44607588707199e-06, "loss": 0.0779, "step": 28686 }, { "epoch": 0.6321263503500857, "grad_norm": 0.6668208241462708, "learning_rate": 9.445081451588724e-06, "loss": 0.0687, "step": 28687 }, { "epoch": 0.6321483856396019, "grad_norm": 0.29378506541252136, "learning_rate": 9.444087044399633e-06, "loss": 0.0538, "step": 28688 }, { "epoch": 0.632170420929118, "grad_norm": 0.7952916622161865, "learning_rate": 9.443092665509791e-06, "loss": 0.073, "step": 28689 }, { "epoch": 0.6321924562186342, "grad_norm": 0.49693140387535095, "learning_rate": 9.442098314924258e-06, "loss": 0.053, "step": 28690 }, { "epoch": 0.6322144915081503, "grad_norm": 0.45528969168663025, "learning_rate": 9.441103992648099e-06, "loss": 0.0617, "step": 28691 }, { "epoch": 0.6322365267976665, "grad_norm": 0.5941597819328308, "learning_rate": 9.440109698686371e-06, "loss": 0.0519, "step": 28692 }, { "epoch": 0.6322585620871827, "grad_norm": 0.7347244024276733, "learning_rate": 9.439115433044151e-06, "loss": 0.065, "step": 28693 }, { "epoch": 0.6322805973766988, "grad_norm": 0.49185052514076233, "learning_rate": 9.438121195726498e-06, "loss": 0.0626, "step": 28694 }, { "epoch": 0.6323026326662149, "grad_norm": 0.6770230531692505, "learning_rate": 9.437126986738475e-06, "loss": 0.0618, "step": 28695 }, { "epoch": 0.6323246679557311, "grad_norm": 0.7844902276992798, "learning_rate": 9.436132806085149e-06, "loss": 0.0661, "step": 28696 }, { "epoch": 0.6323467032452472, "grad_norm": 0.5114526152610779, "learning_rate": 9.435138653771585e-06, "loss": 0.0802, "step": 28697 }, { "epoch": 0.6323687385347634, "grad_norm": 0.6797733902931213, "learning_rate": 9.434144529802839e-06, "loss": 0.0911, "step": 28698 }, { "epoch": 0.6323907738242796, "grad_norm": 0.896957516670227, "learning_rate": 9.433150434183977e-06, "loss": 0.0836, "step": 28699 }, { "epoch": 0.6324128091137957, "grad_norm": 0.5616770386695862, "learning_rate": 9.432156366920069e-06, "loss": 0.0991, "step": 28700 }, { "epoch": 0.6324348444033119, "grad_norm": 1.3014832735061646, "learning_rate": 9.431162328016177e-06, "loss": 0.083, "step": 28701 }, { "epoch": 0.632456879692828, "grad_norm": 0.3721569776535034, "learning_rate": 9.43016831747736e-06, "loss": 0.0559, "step": 28702 }, { "epoch": 0.6324789149823442, "grad_norm": 0.5732460021972656, "learning_rate": 9.42917433530868e-06, "loss": 0.0974, "step": 28703 }, { "epoch": 0.6325009502718604, "grad_norm": 0.7034109830856323, "learning_rate": 9.428180381515206e-06, "loss": 0.0986, "step": 28704 }, { "epoch": 0.6325229855613765, "grad_norm": 0.9904528856277466, "learning_rate": 9.427186456101996e-06, "loss": 0.0927, "step": 28705 }, { "epoch": 0.6325450208508927, "grad_norm": 0.37595635652542114, "learning_rate": 9.426192559074113e-06, "loss": 0.0496, "step": 28706 }, { "epoch": 0.6325670561404089, "grad_norm": 0.4193871021270752, "learning_rate": 9.425198690436615e-06, "loss": 0.0807, "step": 28707 }, { "epoch": 0.632589091429925, "grad_norm": 0.4784640073776245, "learning_rate": 9.424204850194575e-06, "loss": 0.0606, "step": 28708 }, { "epoch": 0.6326111267194412, "grad_norm": 0.7236883640289307, "learning_rate": 9.42321103835305e-06, "loss": 0.0647, "step": 28709 }, { "epoch": 0.6326331620089574, "grad_norm": 0.5365292429924011, "learning_rate": 9.422217254917102e-06, "loss": 0.082, "step": 28710 }, { "epoch": 0.6326551972984735, "grad_norm": 0.34208834171295166, "learning_rate": 9.42122349989179e-06, "loss": 0.0609, "step": 28711 }, { "epoch": 0.6326772325879897, "grad_norm": 0.5150541067123413, "learning_rate": 9.42022977328218e-06, "loss": 0.0633, "step": 28712 }, { "epoch": 0.6326992678775059, "grad_norm": 0.6556693911552429, "learning_rate": 9.419236075093336e-06, "loss": 0.0987, "step": 28713 }, { "epoch": 0.632721303167022, "grad_norm": 0.4759860336780548, "learning_rate": 9.418242405330311e-06, "loss": 0.0622, "step": 28714 }, { "epoch": 0.6327433384565382, "grad_norm": 0.9999051690101624, "learning_rate": 9.417248763998167e-06, "loss": 0.0737, "step": 28715 }, { "epoch": 0.6327653737460543, "grad_norm": 0.6451361775398254, "learning_rate": 9.416255151101972e-06, "loss": 0.0677, "step": 28716 }, { "epoch": 0.6327874090355705, "grad_norm": 0.6684938073158264, "learning_rate": 9.415261566646784e-06, "loss": 0.0634, "step": 28717 }, { "epoch": 0.6328094443250867, "grad_norm": 0.5567149519920349, "learning_rate": 9.41426801063766e-06, "loss": 0.0611, "step": 28718 }, { "epoch": 0.6328314796146027, "grad_norm": 0.7705526351928711, "learning_rate": 9.41327448307967e-06, "loss": 0.0823, "step": 28719 }, { "epoch": 0.6328535149041189, "grad_norm": 0.6977760791778564, "learning_rate": 9.412280983977868e-06, "loss": 0.1095, "step": 28720 }, { "epoch": 0.6328755501936351, "grad_norm": 0.6766390800476074, "learning_rate": 9.411287513337315e-06, "loss": 0.059, "step": 28721 }, { "epoch": 0.6328975854831512, "grad_norm": 0.45993098616600037, "learning_rate": 9.410294071163069e-06, "loss": 0.0547, "step": 28722 }, { "epoch": 0.6329196207726674, "grad_norm": 0.5729150176048279, "learning_rate": 9.409300657460196e-06, "loss": 0.0489, "step": 28723 }, { "epoch": 0.6329416560621836, "grad_norm": 1.148195743560791, "learning_rate": 9.408307272233752e-06, "loss": 0.09, "step": 28724 }, { "epoch": 0.6329636913516997, "grad_norm": 0.8304580450057983, "learning_rate": 9.407313915488798e-06, "loss": 0.0689, "step": 28725 }, { "epoch": 0.6329857266412159, "grad_norm": 0.42354056239128113, "learning_rate": 9.406320587230388e-06, "loss": 0.066, "step": 28726 }, { "epoch": 0.633007761930732, "grad_norm": 0.7562679648399353, "learning_rate": 9.40532728746359e-06, "loss": 0.0735, "step": 28727 }, { "epoch": 0.6330297972202482, "grad_norm": 0.6706198453903198, "learning_rate": 9.404334016193464e-06, "loss": 0.0682, "step": 28728 }, { "epoch": 0.6330518325097644, "grad_norm": 0.46934980154037476, "learning_rate": 9.403340773425064e-06, "loss": 0.0804, "step": 28729 }, { "epoch": 0.6330738677992805, "grad_norm": 0.7286443710327148, "learning_rate": 9.402347559163447e-06, "loss": 0.0714, "step": 28730 }, { "epoch": 0.6330959030887967, "grad_norm": 0.43304333090782166, "learning_rate": 9.401354373413679e-06, "loss": 0.082, "step": 28731 }, { "epoch": 0.6331179383783129, "grad_norm": 0.912553071975708, "learning_rate": 9.400361216180815e-06, "loss": 0.1036, "step": 28732 }, { "epoch": 0.633139973667829, "grad_norm": 0.44183945655822754, "learning_rate": 9.399368087469915e-06, "loss": 0.0875, "step": 28733 }, { "epoch": 0.6331620089573452, "grad_norm": 0.8290121555328369, "learning_rate": 9.39837498728603e-06, "loss": 0.0818, "step": 28734 }, { "epoch": 0.6331840442468614, "grad_norm": 0.7501239776611328, "learning_rate": 9.397381915634233e-06, "loss": 0.0716, "step": 28735 }, { "epoch": 0.6332060795363775, "grad_norm": 0.7400346398353577, "learning_rate": 9.39638887251957e-06, "loss": 0.0881, "step": 28736 }, { "epoch": 0.6332281148258937, "grad_norm": 0.4636003375053406, "learning_rate": 9.395395857947107e-06, "loss": 0.0329, "step": 28737 }, { "epoch": 0.6332501501154099, "grad_norm": 0.6471626162528992, "learning_rate": 9.394402871921894e-06, "loss": 0.0785, "step": 28738 }, { "epoch": 0.633272185404926, "grad_norm": 0.49001070857048035, "learning_rate": 9.393409914448996e-06, "loss": 0.0389, "step": 28739 }, { "epoch": 0.6332942206944422, "grad_norm": 0.43377578258514404, "learning_rate": 9.392416985533467e-06, "loss": 0.0757, "step": 28740 }, { "epoch": 0.6333162559839584, "grad_norm": 0.15095379948616028, "learning_rate": 9.391424085180361e-06, "loss": 0.0453, "step": 28741 }, { "epoch": 0.6333382912734745, "grad_norm": 0.6753629446029663, "learning_rate": 9.390431213394746e-06, "loss": 0.0611, "step": 28742 }, { "epoch": 0.6333603265629907, "grad_norm": 0.7231504917144775, "learning_rate": 9.38943837018167e-06, "loss": 0.0965, "step": 28743 }, { "epoch": 0.6333823618525067, "grad_norm": 0.7380147576332092, "learning_rate": 9.388445555546197e-06, "loss": 0.0981, "step": 28744 }, { "epoch": 0.6334043971420229, "grad_norm": 0.5248290300369263, "learning_rate": 9.387452769493373e-06, "loss": 0.072, "step": 28745 }, { "epoch": 0.6334264324315391, "grad_norm": 1.2641181945800781, "learning_rate": 9.386460012028269e-06, "loss": 0.0701, "step": 28746 }, { "epoch": 0.6334484677210552, "grad_norm": 0.4944826662540436, "learning_rate": 9.38546728315593e-06, "loss": 0.0586, "step": 28747 }, { "epoch": 0.6334705030105714, "grad_norm": 0.6613225340843201, "learning_rate": 9.38447458288142e-06, "loss": 0.0814, "step": 28748 }, { "epoch": 0.6334925383000876, "grad_norm": 0.39987143874168396, "learning_rate": 9.383481911209783e-06, "loss": 0.0592, "step": 28749 }, { "epoch": 0.6335145735896037, "grad_norm": 0.4205353260040283, "learning_rate": 9.382489268146095e-06, "loss": 0.0698, "step": 28750 }, { "epoch": 0.6335366088791199, "grad_norm": 0.600962221622467, "learning_rate": 9.381496653695397e-06, "loss": 0.075, "step": 28751 }, { "epoch": 0.633558644168636, "grad_norm": 0.7463709712028503, "learning_rate": 9.380504067862754e-06, "loss": 0.0612, "step": 28752 }, { "epoch": 0.6335806794581522, "grad_norm": 0.784541130065918, "learning_rate": 9.37951151065321e-06, "loss": 0.097, "step": 28753 }, { "epoch": 0.6336027147476684, "grad_norm": 0.6604847311973572, "learning_rate": 9.378518982071833e-06, "loss": 0.07, "step": 28754 }, { "epoch": 0.6336247500371845, "grad_norm": 0.5757310390472412, "learning_rate": 9.377526482123673e-06, "loss": 0.0909, "step": 28755 }, { "epoch": 0.6336467853267007, "grad_norm": 1.020519733428955, "learning_rate": 9.376534010813785e-06, "loss": 0.1084, "step": 28756 }, { "epoch": 0.6336688206162169, "grad_norm": 0.45548442006111145, "learning_rate": 9.37554156814722e-06, "loss": 0.08, "step": 28757 }, { "epoch": 0.633690855905733, "grad_norm": 0.922652006149292, "learning_rate": 9.374549154129043e-06, "loss": 0.1008, "step": 28758 }, { "epoch": 0.6337128911952492, "grad_norm": 0.5027663707733154, "learning_rate": 9.373556768764303e-06, "loss": 0.063, "step": 28759 }, { "epoch": 0.6337349264847654, "grad_norm": 0.7774708867073059, "learning_rate": 9.372564412058055e-06, "loss": 0.0851, "step": 28760 }, { "epoch": 0.6337569617742815, "grad_norm": 0.5971846580505371, "learning_rate": 9.371572084015353e-06, "loss": 0.0536, "step": 28761 }, { "epoch": 0.6337789970637977, "grad_norm": 0.6135631799697876, "learning_rate": 9.370579784641258e-06, "loss": 0.0644, "step": 28762 }, { "epoch": 0.6338010323533139, "grad_norm": 0.4718305766582489, "learning_rate": 9.369587513940816e-06, "loss": 0.0676, "step": 28763 }, { "epoch": 0.63382306764283, "grad_norm": 0.5960426330566406, "learning_rate": 9.368595271919077e-06, "loss": 0.092, "step": 28764 }, { "epoch": 0.6338451029323462, "grad_norm": 0.6891011595726013, "learning_rate": 9.367603058581108e-06, "loss": 0.1044, "step": 28765 }, { "epoch": 0.6338671382218624, "grad_norm": 0.663206160068512, "learning_rate": 9.36661087393196e-06, "loss": 0.0612, "step": 28766 }, { "epoch": 0.6338891735113785, "grad_norm": 0.7708880305290222, "learning_rate": 9.365618717976681e-06, "loss": 0.1158, "step": 28767 }, { "epoch": 0.6339112088008947, "grad_norm": 0.9386197924613953, "learning_rate": 9.364626590720325e-06, "loss": 0.1017, "step": 28768 }, { "epoch": 0.6339332440904107, "grad_norm": 0.6894943118095398, "learning_rate": 9.363634492167951e-06, "loss": 0.089, "step": 28769 }, { "epoch": 0.6339552793799269, "grad_norm": 0.8465619087219238, "learning_rate": 9.36264242232461e-06, "loss": 0.0725, "step": 28770 }, { "epoch": 0.6339773146694431, "grad_norm": 0.5896265506744385, "learning_rate": 9.361650381195352e-06, "loss": 0.0742, "step": 28771 }, { "epoch": 0.6339993499589592, "grad_norm": 0.46306559443473816, "learning_rate": 9.360658368785226e-06, "loss": 0.0596, "step": 28772 }, { "epoch": 0.6340213852484754, "grad_norm": 1.491736888885498, "learning_rate": 9.359666385099298e-06, "loss": 0.0771, "step": 28773 }, { "epoch": 0.6340434205379916, "grad_norm": 1.0527610778808594, "learning_rate": 9.358674430142614e-06, "loss": 0.0839, "step": 28774 }, { "epoch": 0.6340654558275077, "grad_norm": 0.2908685505390167, "learning_rate": 9.357682503920225e-06, "loss": 0.0456, "step": 28775 }, { "epoch": 0.6340874911170239, "grad_norm": 1.136845588684082, "learning_rate": 9.356690606437183e-06, "loss": 0.0912, "step": 28776 }, { "epoch": 0.6341095264065401, "grad_norm": 1.2917819023132324, "learning_rate": 9.355698737698543e-06, "loss": 0.092, "step": 28777 }, { "epoch": 0.6341315616960562, "grad_norm": 0.7073729038238525, "learning_rate": 9.35470689770936e-06, "loss": 0.054, "step": 28778 }, { "epoch": 0.6341535969855724, "grad_norm": 0.5078063011169434, "learning_rate": 9.353715086474679e-06, "loss": 0.0904, "step": 28779 }, { "epoch": 0.6341756322750886, "grad_norm": 0.7007667422294617, "learning_rate": 9.35272330399955e-06, "loss": 0.0953, "step": 28780 }, { "epoch": 0.6341976675646047, "grad_norm": 0.4074913561344147, "learning_rate": 9.351731550289034e-06, "loss": 0.0618, "step": 28781 }, { "epoch": 0.6342197028541209, "grad_norm": 0.5123398900032043, "learning_rate": 9.35073982534818e-06, "loss": 0.0657, "step": 28782 }, { "epoch": 0.634241738143637, "grad_norm": 0.6126477122306824, "learning_rate": 9.349748129182032e-06, "loss": 0.0624, "step": 28783 }, { "epoch": 0.6342637734331532, "grad_norm": 0.6457950472831726, "learning_rate": 9.348756461795652e-06, "loss": 0.0622, "step": 28784 }, { "epoch": 0.6342858087226694, "grad_norm": 0.4538591802120209, "learning_rate": 9.347764823194085e-06, "loss": 0.0471, "step": 28785 }, { "epoch": 0.6343078440121855, "grad_norm": 0.5894665718078613, "learning_rate": 9.346773213382381e-06, "loss": 0.053, "step": 28786 }, { "epoch": 0.6343298793017017, "grad_norm": 0.6143890023231506, "learning_rate": 9.345781632365589e-06, "loss": 0.0531, "step": 28787 }, { "epoch": 0.6343519145912179, "grad_norm": 0.928703784942627, "learning_rate": 9.344790080148768e-06, "loss": 0.1027, "step": 28788 }, { "epoch": 0.634373949880734, "grad_norm": 0.6419295072555542, "learning_rate": 9.343798556736963e-06, "loss": 0.0635, "step": 28789 }, { "epoch": 0.6343959851702502, "grad_norm": 0.5546474456787109, "learning_rate": 9.342807062135227e-06, "loss": 0.1007, "step": 28790 }, { "epoch": 0.6344180204597664, "grad_norm": 0.8518295288085938, "learning_rate": 9.341815596348605e-06, "loss": 0.0539, "step": 28791 }, { "epoch": 0.6344400557492825, "grad_norm": 0.38213038444519043, "learning_rate": 9.340824159382152e-06, "loss": 0.0473, "step": 28792 }, { "epoch": 0.6344620910387986, "grad_norm": 1.047037124633789, "learning_rate": 9.339832751240916e-06, "loss": 0.097, "step": 28793 }, { "epoch": 0.6344841263283147, "grad_norm": 0.8155907988548279, "learning_rate": 9.338841371929948e-06, "loss": 0.0874, "step": 28794 }, { "epoch": 0.6345061616178309, "grad_norm": 0.40879273414611816, "learning_rate": 9.337850021454292e-06, "loss": 0.0571, "step": 28795 }, { "epoch": 0.6345281969073471, "grad_norm": 0.6627787947654724, "learning_rate": 9.336858699819006e-06, "loss": 0.09, "step": 28796 }, { "epoch": 0.6345502321968632, "grad_norm": 0.6183951497077942, "learning_rate": 9.335867407029136e-06, "loss": 0.083, "step": 28797 }, { "epoch": 0.6345722674863794, "grad_norm": 0.43794625997543335, "learning_rate": 9.334876143089732e-06, "loss": 0.0688, "step": 28798 }, { "epoch": 0.6345943027758956, "grad_norm": 0.5152803063392639, "learning_rate": 9.333884908005836e-06, "loss": 0.0922, "step": 28799 }, { "epoch": 0.6346163380654117, "grad_norm": 0.5837780833244324, "learning_rate": 9.332893701782507e-06, "loss": 0.0891, "step": 28800 }, { "epoch": 0.6346383733549279, "grad_norm": 1.1186728477478027, "learning_rate": 9.33190252442479e-06, "loss": 0.0699, "step": 28801 }, { "epoch": 0.6346604086444441, "grad_norm": 0.6913357377052307, "learning_rate": 9.330911375937733e-06, "loss": 0.0774, "step": 28802 }, { "epoch": 0.6346824439339602, "grad_norm": 0.9493900537490845, "learning_rate": 9.329920256326378e-06, "loss": 0.0781, "step": 28803 }, { "epoch": 0.6347044792234764, "grad_norm": 0.4243963956832886, "learning_rate": 9.328929165595786e-06, "loss": 0.0629, "step": 28804 }, { "epoch": 0.6347265145129926, "grad_norm": 0.5358547568321228, "learning_rate": 9.327938103750995e-06, "loss": 0.0569, "step": 28805 }, { "epoch": 0.6347485498025087, "grad_norm": 1.1683942079544067, "learning_rate": 9.326947070797059e-06, "loss": 0.0813, "step": 28806 }, { "epoch": 0.6347705850920249, "grad_norm": 0.8148009777069092, "learning_rate": 9.325956066739023e-06, "loss": 0.083, "step": 28807 }, { "epoch": 0.634792620381541, "grad_norm": 0.4783269762992859, "learning_rate": 9.32496509158194e-06, "loss": 0.0812, "step": 28808 }, { "epoch": 0.6348146556710572, "grad_norm": 0.6686287522315979, "learning_rate": 9.323974145330848e-06, "loss": 0.0886, "step": 28809 }, { "epoch": 0.6348366909605734, "grad_norm": 0.7956913113594055, "learning_rate": 9.322983227990795e-06, "loss": 0.065, "step": 28810 }, { "epoch": 0.6348587262500895, "grad_norm": 0.4865133762359619, "learning_rate": 9.32199233956684e-06, "loss": 0.0443, "step": 28811 }, { "epoch": 0.6348807615396057, "grad_norm": 0.6366092562675476, "learning_rate": 9.321001480064021e-06, "loss": 0.0747, "step": 28812 }, { "epoch": 0.6349027968291219, "grad_norm": 0.5217769145965576, "learning_rate": 9.320010649487388e-06, "loss": 0.0657, "step": 28813 }, { "epoch": 0.634924832118638, "grad_norm": 0.7110841870307922, "learning_rate": 9.319019847841981e-06, "loss": 0.0776, "step": 28814 }, { "epoch": 0.6349468674081542, "grad_norm": 0.44933003187179565, "learning_rate": 9.318029075132861e-06, "loss": 0.0658, "step": 28815 }, { "epoch": 0.6349689026976704, "grad_norm": 0.4856027066707611, "learning_rate": 9.317038331365062e-06, "loss": 0.0548, "step": 28816 }, { "epoch": 0.6349909379871865, "grad_norm": 0.5418129563331604, "learning_rate": 9.316047616543634e-06, "loss": 0.0544, "step": 28817 }, { "epoch": 0.6350129732767026, "grad_norm": 0.5512354373931885, "learning_rate": 9.315056930673618e-06, "loss": 0.0826, "step": 28818 }, { "epoch": 0.6350350085662188, "grad_norm": 0.35711753368377686, "learning_rate": 9.314066273760074e-06, "loss": 0.1072, "step": 28819 }, { "epoch": 0.6350570438557349, "grad_norm": 0.6130273938179016, "learning_rate": 9.313075645808037e-06, "loss": 0.0532, "step": 28820 }, { "epoch": 0.6350790791452511, "grad_norm": 0.46396005153656006, "learning_rate": 9.312085046822556e-06, "loss": 0.0516, "step": 28821 }, { "epoch": 0.6351011144347672, "grad_norm": 0.6224817037582397, "learning_rate": 9.311094476808668e-06, "loss": 0.0478, "step": 28822 }, { "epoch": 0.6351231497242834, "grad_norm": 0.7356776595115662, "learning_rate": 9.310103935771433e-06, "loss": 0.0695, "step": 28823 }, { "epoch": 0.6351451850137996, "grad_norm": 0.9756724834442139, "learning_rate": 9.309113423715892e-06, "loss": 0.0653, "step": 28824 }, { "epoch": 0.6351672203033157, "grad_norm": 0.5583028793334961, "learning_rate": 9.30812294064709e-06, "loss": 0.0693, "step": 28825 }, { "epoch": 0.6351892555928319, "grad_norm": 0.6547118425369263, "learning_rate": 9.30713248657006e-06, "loss": 0.0753, "step": 28826 }, { "epoch": 0.6352112908823481, "grad_norm": 0.7961422204971313, "learning_rate": 9.306142061489864e-06, "loss": 0.0514, "step": 28827 }, { "epoch": 0.6352333261718642, "grad_norm": 0.45798590779304504, "learning_rate": 9.305151665411541e-06, "loss": 0.0628, "step": 28828 }, { "epoch": 0.6352553614613804, "grad_norm": 0.7603698372840881, "learning_rate": 9.304161298340125e-06, "loss": 0.0822, "step": 28829 }, { "epoch": 0.6352773967508966, "grad_norm": 0.45668917894363403, "learning_rate": 9.30317096028068e-06, "loss": 0.0426, "step": 28830 }, { "epoch": 0.6352994320404127, "grad_norm": 0.8253921866416931, "learning_rate": 9.302180651238236e-06, "loss": 0.0769, "step": 28831 }, { "epoch": 0.6353214673299289, "grad_norm": 0.6897025108337402, "learning_rate": 9.301190371217845e-06, "loss": 0.0886, "step": 28832 }, { "epoch": 0.635343502619445, "grad_norm": 0.5895110964775085, "learning_rate": 9.300200120224544e-06, "loss": 0.0508, "step": 28833 }, { "epoch": 0.6353655379089612, "grad_norm": 0.3619552552700043, "learning_rate": 9.29920989826338e-06, "loss": 0.0508, "step": 28834 }, { "epoch": 0.6353875731984774, "grad_norm": 0.5490251183509827, "learning_rate": 9.298219705339401e-06, "loss": 0.0646, "step": 28835 }, { "epoch": 0.6354096084879935, "grad_norm": 0.6597049236297607, "learning_rate": 9.297229541457643e-06, "loss": 0.0645, "step": 28836 }, { "epoch": 0.6354316437775097, "grad_norm": 0.6849963665008545, "learning_rate": 9.296239406623148e-06, "loss": 0.1002, "step": 28837 }, { "epoch": 0.6354536790670259, "grad_norm": 0.5207592248916626, "learning_rate": 9.295249300840974e-06, "loss": 0.0791, "step": 28838 }, { "epoch": 0.635475714356542, "grad_norm": 0.34535717964172363, "learning_rate": 9.29425922411615e-06, "loss": 0.0553, "step": 28839 }, { "epoch": 0.6354977496460582, "grad_norm": 0.43873074650764465, "learning_rate": 9.293269176453726e-06, "loss": 0.0708, "step": 28840 }, { "epoch": 0.6355197849355744, "grad_norm": 0.5156026482582092, "learning_rate": 9.292279157858737e-06, "loss": 0.0533, "step": 28841 }, { "epoch": 0.6355418202250905, "grad_norm": 0.7205814123153687, "learning_rate": 9.291289168336235e-06, "loss": 0.0762, "step": 28842 }, { "epoch": 0.6355638555146066, "grad_norm": 0.6194491386413574, "learning_rate": 9.290299207891259e-06, "loss": 0.0598, "step": 28843 }, { "epoch": 0.6355858908041228, "grad_norm": 0.7593346834182739, "learning_rate": 9.28930927652885e-06, "loss": 0.0522, "step": 28844 }, { "epoch": 0.6356079260936389, "grad_norm": 0.460065633058548, "learning_rate": 9.288319374254046e-06, "loss": 0.0689, "step": 28845 }, { "epoch": 0.6356299613831551, "grad_norm": 1.0469557046890259, "learning_rate": 9.2873295010719e-06, "loss": 0.0808, "step": 28846 }, { "epoch": 0.6356519966726712, "grad_norm": 0.5767181515693665, "learning_rate": 9.28633965698745e-06, "loss": 0.0634, "step": 28847 }, { "epoch": 0.6356740319621874, "grad_norm": 0.5922737121582031, "learning_rate": 9.285349842005732e-06, "loss": 0.0499, "step": 28848 }, { "epoch": 0.6356960672517036, "grad_norm": 0.7717549204826355, "learning_rate": 9.284360056131792e-06, "loss": 0.0635, "step": 28849 }, { "epoch": 0.6357181025412197, "grad_norm": 1.0150984525680542, "learning_rate": 9.283370299370674e-06, "loss": 0.1109, "step": 28850 }, { "epoch": 0.6357401378307359, "grad_norm": 0.7726606726646423, "learning_rate": 9.282380571727413e-06, "loss": 0.1004, "step": 28851 }, { "epoch": 0.6357621731202521, "grad_norm": 0.7398383617401123, "learning_rate": 9.281390873207051e-06, "loss": 0.0934, "step": 28852 }, { "epoch": 0.6357842084097682, "grad_norm": 0.6362610459327698, "learning_rate": 9.280401203814638e-06, "loss": 0.0746, "step": 28853 }, { "epoch": 0.6358062436992844, "grad_norm": 0.645976722240448, "learning_rate": 9.279411563555209e-06, "loss": 0.0689, "step": 28854 }, { "epoch": 0.6358282789888006, "grad_norm": 0.31574851274490356, "learning_rate": 9.278421952433802e-06, "loss": 0.0764, "step": 28855 }, { "epoch": 0.6358503142783167, "grad_norm": 0.6128930449485779, "learning_rate": 9.27743237045546e-06, "loss": 0.079, "step": 28856 }, { "epoch": 0.6358723495678329, "grad_norm": 0.49046310782432556, "learning_rate": 9.276442817625225e-06, "loss": 0.0696, "step": 28857 }, { "epoch": 0.6358943848573491, "grad_norm": 0.6601660251617432, "learning_rate": 9.275453293948137e-06, "loss": 0.0716, "step": 28858 }, { "epoch": 0.6359164201468652, "grad_norm": 0.5143558382987976, "learning_rate": 9.274463799429234e-06, "loss": 0.1018, "step": 28859 }, { "epoch": 0.6359384554363814, "grad_norm": 0.737108588218689, "learning_rate": 9.273474334073551e-06, "loss": 0.0605, "step": 28860 }, { "epoch": 0.6359604907258976, "grad_norm": 0.5407135486602783, "learning_rate": 9.272484897886142e-06, "loss": 0.0647, "step": 28861 }, { "epoch": 0.6359825260154137, "grad_norm": 0.8670469522476196, "learning_rate": 9.271495490872038e-06, "loss": 0.0497, "step": 28862 }, { "epoch": 0.6360045613049299, "grad_norm": 0.3164878189563751, "learning_rate": 9.270506113036278e-06, "loss": 0.0516, "step": 28863 }, { "epoch": 0.636026596594446, "grad_norm": 0.5021749138832092, "learning_rate": 9.2695167643839e-06, "loss": 0.0579, "step": 28864 }, { "epoch": 0.6360486318839622, "grad_norm": 0.9875089526176453, "learning_rate": 9.26852744491995e-06, "loss": 0.0734, "step": 28865 }, { "epoch": 0.6360706671734784, "grad_norm": 0.6815924048423767, "learning_rate": 9.267538154649462e-06, "loss": 0.071, "step": 28866 }, { "epoch": 0.6360927024629944, "grad_norm": 0.4895314574241638, "learning_rate": 9.266548893577477e-06, "loss": 0.0621, "step": 28867 }, { "epoch": 0.6361147377525106, "grad_norm": 0.980891227722168, "learning_rate": 9.265559661709028e-06, "loss": 0.0707, "step": 28868 }, { "epoch": 0.6361367730420268, "grad_norm": 1.1250734329223633, "learning_rate": 9.264570459049163e-06, "loss": 0.0903, "step": 28869 }, { "epoch": 0.6361588083315429, "grad_norm": 0.48953723907470703, "learning_rate": 9.263581285602917e-06, "loss": 0.0799, "step": 28870 }, { "epoch": 0.6361808436210591, "grad_norm": 0.5620253682136536, "learning_rate": 9.262592141375326e-06, "loss": 0.0765, "step": 28871 }, { "epoch": 0.6362028789105753, "grad_norm": 0.6622792482376099, "learning_rate": 9.26160302637143e-06, "loss": 0.0577, "step": 28872 }, { "epoch": 0.6362249142000914, "grad_norm": 0.40920764207839966, "learning_rate": 9.260613940596267e-06, "loss": 0.0494, "step": 28873 }, { "epoch": 0.6362469494896076, "grad_norm": 0.5534982681274414, "learning_rate": 9.259624884054876e-06, "loss": 0.053, "step": 28874 }, { "epoch": 0.6362689847791237, "grad_norm": 0.6803330779075623, "learning_rate": 9.258635856752288e-06, "loss": 0.0764, "step": 28875 }, { "epoch": 0.6362910200686399, "grad_norm": 0.6113854050636292, "learning_rate": 9.257646858693552e-06, "loss": 0.0705, "step": 28876 }, { "epoch": 0.6363130553581561, "grad_norm": 0.7793799042701721, "learning_rate": 9.2566578898837e-06, "loss": 0.0656, "step": 28877 }, { "epoch": 0.6363350906476722, "grad_norm": 0.441417396068573, "learning_rate": 9.25566895032777e-06, "loss": 0.0381, "step": 28878 }, { "epoch": 0.6363571259371884, "grad_norm": 0.44857242703437805, "learning_rate": 9.254680040030794e-06, "loss": 0.0759, "step": 28879 }, { "epoch": 0.6363791612267046, "grad_norm": 0.8722696900367737, "learning_rate": 9.253691158997818e-06, "loss": 0.0676, "step": 28880 }, { "epoch": 0.6364011965162207, "grad_norm": 0.3036493957042694, "learning_rate": 9.252702307233872e-06, "loss": 0.059, "step": 28881 }, { "epoch": 0.6364232318057369, "grad_norm": 0.41459259390830994, "learning_rate": 9.251713484743998e-06, "loss": 0.0811, "step": 28882 }, { "epoch": 0.6364452670952531, "grad_norm": 0.7676960229873657, "learning_rate": 9.250724691533223e-06, "loss": 0.0463, "step": 28883 }, { "epoch": 0.6364673023847692, "grad_norm": 1.7508163452148438, "learning_rate": 9.249735927606595e-06, "loss": 0.0463, "step": 28884 }, { "epoch": 0.6364893376742854, "grad_norm": 0.5143614411354065, "learning_rate": 9.248747192969144e-06, "loss": 0.0464, "step": 28885 }, { "epoch": 0.6365113729638016, "grad_norm": 0.6989606618881226, "learning_rate": 9.247758487625912e-06, "loss": 0.0966, "step": 28886 }, { "epoch": 0.6365334082533177, "grad_norm": 0.7277719974517822, "learning_rate": 9.246769811581924e-06, "loss": 0.0569, "step": 28887 }, { "epoch": 0.6365554435428339, "grad_norm": 0.6655658483505249, "learning_rate": 9.245781164842226e-06, "loss": 0.0977, "step": 28888 }, { "epoch": 0.63657747883235, "grad_norm": 0.8049619793891907, "learning_rate": 9.24479254741185e-06, "loss": 0.0838, "step": 28889 }, { "epoch": 0.6365995141218662, "grad_norm": 0.4125240743160248, "learning_rate": 9.243803959295834e-06, "loss": 0.0491, "step": 28890 }, { "epoch": 0.6366215494113824, "grad_norm": 0.743241012096405, "learning_rate": 9.242815400499202e-06, "loss": 0.0825, "step": 28891 }, { "epoch": 0.6366435847008984, "grad_norm": 0.6452037692070007, "learning_rate": 9.241826871027007e-06, "loss": 0.0948, "step": 28892 }, { "epoch": 0.6366656199904146, "grad_norm": 0.4887159764766693, "learning_rate": 9.240838370884276e-06, "loss": 0.0537, "step": 28893 }, { "epoch": 0.6366876552799308, "grad_norm": 0.4383789002895355, "learning_rate": 9.23984990007604e-06, "loss": 0.0875, "step": 28894 }, { "epoch": 0.6367096905694469, "grad_norm": 0.4766273498535156, "learning_rate": 9.238861458607337e-06, "loss": 0.0499, "step": 28895 }, { "epoch": 0.6367317258589631, "grad_norm": 0.4428754150867462, "learning_rate": 9.237873046483204e-06, "loss": 0.0803, "step": 28896 }, { "epoch": 0.6367537611484793, "grad_norm": 0.6965101957321167, "learning_rate": 9.236884663708674e-06, "loss": 0.0538, "step": 28897 }, { "epoch": 0.6367757964379954, "grad_norm": 0.6017125248908997, "learning_rate": 9.235896310288775e-06, "loss": 0.0625, "step": 28898 }, { "epoch": 0.6367978317275116, "grad_norm": 0.48037007451057434, "learning_rate": 9.234907986228554e-06, "loss": 0.0688, "step": 28899 }, { "epoch": 0.6368198670170278, "grad_norm": 0.477859765291214, "learning_rate": 9.233919691533036e-06, "loss": 0.0556, "step": 28900 }, { "epoch": 0.6368419023065439, "grad_norm": 0.32291874289512634, "learning_rate": 9.23293142620726e-06, "loss": 0.0527, "step": 28901 }, { "epoch": 0.6368639375960601, "grad_norm": 0.6468617916107178, "learning_rate": 9.231943190256248e-06, "loss": 0.0623, "step": 28902 }, { "epoch": 0.6368859728855762, "grad_norm": 0.7378361821174622, "learning_rate": 9.23095498368505e-06, "loss": 0.0997, "step": 28903 }, { "epoch": 0.6369080081750924, "grad_norm": 0.5338612198829651, "learning_rate": 9.22996680649869e-06, "loss": 0.0646, "step": 28904 }, { "epoch": 0.6369300434646086, "grad_norm": 0.5835714340209961, "learning_rate": 9.228978658702204e-06, "loss": 0.0793, "step": 28905 }, { "epoch": 0.6369520787541247, "grad_norm": 0.6854124665260315, "learning_rate": 9.22799054030062e-06, "loss": 0.074, "step": 28906 }, { "epoch": 0.6369741140436409, "grad_norm": 0.9758768081665039, "learning_rate": 9.227002451298979e-06, "loss": 0.0705, "step": 28907 }, { "epoch": 0.6369961493331571, "grad_norm": 0.5745678544044495, "learning_rate": 9.22601439170231e-06, "loss": 0.0785, "step": 28908 }, { "epoch": 0.6370181846226732, "grad_norm": 0.714379072189331, "learning_rate": 9.225026361515647e-06, "loss": 0.0599, "step": 28909 }, { "epoch": 0.6370402199121894, "grad_norm": 0.27516740560531616, "learning_rate": 9.22403836074402e-06, "loss": 0.0472, "step": 28910 }, { "epoch": 0.6370622552017056, "grad_norm": 0.2642854154109955, "learning_rate": 9.223050389392464e-06, "loss": 0.0736, "step": 28911 }, { "epoch": 0.6370842904912217, "grad_norm": 0.47035712003707886, "learning_rate": 9.22206244746601e-06, "loss": 0.0538, "step": 28912 }, { "epoch": 0.6371063257807379, "grad_norm": 0.7779277563095093, "learning_rate": 9.221074534969692e-06, "loss": 0.0784, "step": 28913 }, { "epoch": 0.6371283610702541, "grad_norm": 0.32366809248924255, "learning_rate": 9.220086651908532e-06, "loss": 0.0626, "step": 28914 }, { "epoch": 0.6371503963597702, "grad_norm": 0.671649694442749, "learning_rate": 9.21909879828758e-06, "loss": 0.0705, "step": 28915 }, { "epoch": 0.6371724316492864, "grad_norm": 0.512750506401062, "learning_rate": 9.218110974111853e-06, "loss": 0.0551, "step": 28916 }, { "epoch": 0.6371944669388024, "grad_norm": 0.5903160572052002, "learning_rate": 9.217123179386387e-06, "loss": 0.0589, "step": 28917 }, { "epoch": 0.6372165022283186, "grad_norm": 0.7841660976409912, "learning_rate": 9.216135414116216e-06, "loss": 0.0912, "step": 28918 }, { "epoch": 0.6372385375178348, "grad_norm": 0.7653253078460693, "learning_rate": 9.21514767830637e-06, "loss": 0.0971, "step": 28919 }, { "epoch": 0.6372605728073509, "grad_norm": 0.8067522048950195, "learning_rate": 9.214159971961879e-06, "loss": 0.0636, "step": 28920 }, { "epoch": 0.6372826080968671, "grad_norm": 0.667494535446167, "learning_rate": 9.213172295087765e-06, "loss": 0.0733, "step": 28921 }, { "epoch": 0.6373046433863833, "grad_norm": 0.5004851222038269, "learning_rate": 9.212184647689077e-06, "loss": 0.0894, "step": 28922 }, { "epoch": 0.6373266786758994, "grad_norm": 0.6023637056350708, "learning_rate": 9.211197029770834e-06, "loss": 0.11, "step": 28923 }, { "epoch": 0.6373487139654156, "grad_norm": 0.2704354524612427, "learning_rate": 9.210209441338069e-06, "loss": 0.0474, "step": 28924 }, { "epoch": 0.6373707492549318, "grad_norm": 0.6750156283378601, "learning_rate": 9.209221882395806e-06, "loss": 0.0734, "step": 28925 }, { "epoch": 0.6373927845444479, "grad_norm": 0.9949541091918945, "learning_rate": 9.208234352949092e-06, "loss": 0.066, "step": 28926 }, { "epoch": 0.6374148198339641, "grad_norm": 0.6268846988677979, "learning_rate": 9.20724685300294e-06, "loss": 0.0746, "step": 28927 }, { "epoch": 0.6374368551234803, "grad_norm": 0.6052133440971375, "learning_rate": 9.206259382562391e-06, "loss": 0.0683, "step": 28928 }, { "epoch": 0.6374588904129964, "grad_norm": 0.4059365391731262, "learning_rate": 9.20527194163246e-06, "loss": 0.0449, "step": 28929 }, { "epoch": 0.6374809257025126, "grad_norm": 0.5696539878845215, "learning_rate": 9.204284530218192e-06, "loss": 0.0804, "step": 28930 }, { "epoch": 0.6375029609920287, "grad_norm": 0.7015724778175354, "learning_rate": 9.203297148324614e-06, "loss": 0.0653, "step": 28931 }, { "epoch": 0.6375249962815449, "grad_norm": 0.4454018473625183, "learning_rate": 9.20230979595675e-06, "loss": 0.046, "step": 28932 }, { "epoch": 0.6375470315710611, "grad_norm": 0.40419724583625793, "learning_rate": 9.201322473119626e-06, "loss": 0.0432, "step": 28933 }, { "epoch": 0.6375690668605772, "grad_norm": 0.8199970722198486, "learning_rate": 9.20033517981828e-06, "loss": 0.1003, "step": 28934 }, { "epoch": 0.6375911021500934, "grad_norm": 0.3569979667663574, "learning_rate": 9.19934791605774e-06, "loss": 0.038, "step": 28935 }, { "epoch": 0.6376131374396096, "grad_norm": 0.8168128132820129, "learning_rate": 9.198360681843029e-06, "loss": 0.0805, "step": 28936 }, { "epoch": 0.6376351727291257, "grad_norm": 0.39201781153678894, "learning_rate": 9.197373477179176e-06, "loss": 0.0874, "step": 28937 }, { "epoch": 0.6376572080186419, "grad_norm": 0.5869046449661255, "learning_rate": 9.196386302071215e-06, "loss": 0.0502, "step": 28938 }, { "epoch": 0.6376792433081581, "grad_norm": 0.39812734723091125, "learning_rate": 9.19539915652417e-06, "loss": 0.0855, "step": 28939 }, { "epoch": 0.6377012785976742, "grad_norm": 0.7843215465545654, "learning_rate": 9.194412040543065e-06, "loss": 0.0882, "step": 28940 }, { "epoch": 0.6377233138871904, "grad_norm": 0.47930270433425903, "learning_rate": 9.193424954132938e-06, "loss": 0.0608, "step": 28941 }, { "epoch": 0.6377453491767064, "grad_norm": 0.5031996965408325, "learning_rate": 9.192437897298813e-06, "loss": 0.0779, "step": 28942 }, { "epoch": 0.6377673844662226, "grad_norm": 0.32092249393463135, "learning_rate": 9.191450870045713e-06, "loss": 0.0641, "step": 28943 }, { "epoch": 0.6377894197557388, "grad_norm": 0.4443189203739166, "learning_rate": 9.190463872378667e-06, "loss": 0.0581, "step": 28944 }, { "epoch": 0.6378114550452549, "grad_norm": 0.850629985332489, "learning_rate": 9.189476904302707e-06, "loss": 0.0971, "step": 28945 }, { "epoch": 0.6378334903347711, "grad_norm": 0.4909750819206238, "learning_rate": 9.188489965822855e-06, "loss": 0.0322, "step": 28946 }, { "epoch": 0.6378555256242873, "grad_norm": 0.6538116931915283, "learning_rate": 9.187503056944143e-06, "loss": 0.0751, "step": 28947 }, { "epoch": 0.6378775609138034, "grad_norm": 1.1798725128173828, "learning_rate": 9.186516177671586e-06, "loss": 0.1089, "step": 28948 }, { "epoch": 0.6378995962033196, "grad_norm": 0.38745325803756714, "learning_rate": 9.185529328010229e-06, "loss": 0.0629, "step": 28949 }, { "epoch": 0.6379216314928358, "grad_norm": 0.4486517310142517, "learning_rate": 9.184542507965086e-06, "loss": 0.0735, "step": 28950 }, { "epoch": 0.6379436667823519, "grad_norm": 0.5745106935501099, "learning_rate": 9.18355571754119e-06, "loss": 0.074, "step": 28951 }, { "epoch": 0.6379657020718681, "grad_norm": 0.6566959023475647, "learning_rate": 9.182568956743556e-06, "loss": 0.0859, "step": 28952 }, { "epoch": 0.6379877373613843, "grad_norm": 0.41670289635658264, "learning_rate": 9.181582225577223e-06, "loss": 0.0605, "step": 28953 }, { "epoch": 0.6380097726509004, "grad_norm": 0.5064093470573425, "learning_rate": 9.180595524047212e-06, "loss": 0.0574, "step": 28954 }, { "epoch": 0.6380318079404166, "grad_norm": 0.68116295337677, "learning_rate": 9.179608852158548e-06, "loss": 0.058, "step": 28955 }, { "epoch": 0.6380538432299327, "grad_norm": 0.7971975207328796, "learning_rate": 9.178622209916253e-06, "loss": 0.0946, "step": 28956 }, { "epoch": 0.6380758785194489, "grad_norm": 0.7340357303619385, "learning_rate": 9.177635597325362e-06, "loss": 0.0771, "step": 28957 }, { "epoch": 0.6380979138089651, "grad_norm": 0.5005595088005066, "learning_rate": 9.176649014390895e-06, "loss": 0.0743, "step": 28958 }, { "epoch": 0.6381199490984812, "grad_norm": 0.6811121106147766, "learning_rate": 9.175662461117878e-06, "loss": 0.0602, "step": 28959 }, { "epoch": 0.6381419843879974, "grad_norm": 0.6147828102111816, "learning_rate": 9.17467593751133e-06, "loss": 0.0598, "step": 28960 }, { "epoch": 0.6381640196775136, "grad_norm": 0.6972508430480957, "learning_rate": 9.173689443576286e-06, "loss": 0.0724, "step": 28961 }, { "epoch": 0.6381860549670297, "grad_norm": 0.5043680667877197, "learning_rate": 9.172702979317767e-06, "loss": 0.0613, "step": 28962 }, { "epoch": 0.6382080902565459, "grad_norm": 0.6131271123886108, "learning_rate": 9.171716544740788e-06, "loss": 0.0653, "step": 28963 }, { "epoch": 0.6382301255460621, "grad_norm": 0.19080506265163422, "learning_rate": 9.17073013985039e-06, "loss": 0.06, "step": 28964 }, { "epoch": 0.6382521608355782, "grad_norm": 0.42830246686935425, "learning_rate": 9.169743764651587e-06, "loss": 0.0571, "step": 28965 }, { "epoch": 0.6382741961250943, "grad_norm": 0.9241989850997925, "learning_rate": 9.168757419149406e-06, "loss": 0.0603, "step": 28966 }, { "epoch": 0.6382962314146104, "grad_norm": 0.45846226811408997, "learning_rate": 9.16777110334887e-06, "loss": 0.0656, "step": 28967 }, { "epoch": 0.6383182667041266, "grad_norm": 0.24982155859470367, "learning_rate": 9.166784817255004e-06, "loss": 0.0524, "step": 28968 }, { "epoch": 0.6383403019936428, "grad_norm": 0.6326924562454224, "learning_rate": 9.165798560872831e-06, "loss": 0.0592, "step": 28969 }, { "epoch": 0.6383623372831589, "grad_norm": 0.8554636836051941, "learning_rate": 9.164812334207376e-06, "loss": 0.1138, "step": 28970 }, { "epoch": 0.6383843725726751, "grad_norm": 0.971062958240509, "learning_rate": 9.163826137263654e-06, "loss": 0.0915, "step": 28971 }, { "epoch": 0.6384064078621913, "grad_norm": 0.8803234696388245, "learning_rate": 9.162839970046702e-06, "loss": 0.1183, "step": 28972 }, { "epoch": 0.6384284431517074, "grad_norm": 0.33844542503356934, "learning_rate": 9.161853832561534e-06, "loss": 0.0661, "step": 28973 }, { "epoch": 0.6384504784412236, "grad_norm": 1.085652232170105, "learning_rate": 9.160867724813176e-06, "loss": 0.0916, "step": 28974 }, { "epoch": 0.6384725137307398, "grad_norm": 0.32624366879463196, "learning_rate": 9.159881646806649e-06, "loss": 0.0712, "step": 28975 }, { "epoch": 0.6384945490202559, "grad_norm": 0.7017723917961121, "learning_rate": 9.158895598546977e-06, "loss": 0.0821, "step": 28976 }, { "epoch": 0.6385165843097721, "grad_norm": 0.40195783972740173, "learning_rate": 9.157909580039183e-06, "loss": 0.0843, "step": 28977 }, { "epoch": 0.6385386195992883, "grad_norm": 0.7556120157241821, "learning_rate": 9.156923591288286e-06, "loss": 0.0519, "step": 28978 }, { "epoch": 0.6385606548888044, "grad_norm": 0.6488223075866699, "learning_rate": 9.155937632299307e-06, "loss": 0.0624, "step": 28979 }, { "epoch": 0.6385826901783206, "grad_norm": 0.7551695108413696, "learning_rate": 9.154951703077276e-06, "loss": 0.0757, "step": 28980 }, { "epoch": 0.6386047254678368, "grad_norm": 0.797755777835846, "learning_rate": 9.153965803627211e-06, "loss": 0.0539, "step": 28981 }, { "epoch": 0.6386267607573529, "grad_norm": 0.8758081197738647, "learning_rate": 9.152979933954133e-06, "loss": 0.0694, "step": 28982 }, { "epoch": 0.6386487960468691, "grad_norm": 0.6617651581764221, "learning_rate": 9.151994094063061e-06, "loss": 0.0475, "step": 28983 }, { "epoch": 0.6386708313363852, "grad_norm": 0.5311601161956787, "learning_rate": 9.151008283959023e-06, "loss": 0.0683, "step": 28984 }, { "epoch": 0.6386928666259014, "grad_norm": 0.3972625732421875, "learning_rate": 9.150022503647035e-06, "loss": 0.0638, "step": 28985 }, { "epoch": 0.6387149019154176, "grad_norm": 0.6737836003303528, "learning_rate": 9.149036753132113e-06, "loss": 0.0968, "step": 28986 }, { "epoch": 0.6387369372049337, "grad_norm": 0.3748568296432495, "learning_rate": 9.148051032419292e-06, "loss": 0.0567, "step": 28987 }, { "epoch": 0.6387589724944499, "grad_norm": 0.39089611172676086, "learning_rate": 9.147065341513583e-06, "loss": 0.0764, "step": 28988 }, { "epoch": 0.6387810077839661, "grad_norm": 0.6317412853240967, "learning_rate": 9.146079680420011e-06, "loss": 0.0674, "step": 28989 }, { "epoch": 0.6388030430734822, "grad_norm": 0.47803112864494324, "learning_rate": 9.145094049143588e-06, "loss": 0.0777, "step": 28990 }, { "epoch": 0.6388250783629983, "grad_norm": 0.4786798655986786, "learning_rate": 9.144108447689349e-06, "loss": 0.0548, "step": 28991 }, { "epoch": 0.6388471136525145, "grad_norm": 0.3986700773239136, "learning_rate": 9.1431228760623e-06, "loss": 0.0702, "step": 28992 }, { "epoch": 0.6388691489420306, "grad_norm": 0.877689003944397, "learning_rate": 9.142137334267471e-06, "loss": 0.092, "step": 28993 }, { "epoch": 0.6388911842315468, "grad_norm": 0.952286422252655, "learning_rate": 9.14115182230987e-06, "loss": 0.0578, "step": 28994 }, { "epoch": 0.638913219521063, "grad_norm": 0.6470154523849487, "learning_rate": 9.140166340194532e-06, "loss": 0.0695, "step": 28995 }, { "epoch": 0.6389352548105791, "grad_norm": 0.6178921461105347, "learning_rate": 9.139180887926467e-06, "loss": 0.0625, "step": 28996 }, { "epoch": 0.6389572901000953, "grad_norm": 0.6465515494346619, "learning_rate": 9.138195465510696e-06, "loss": 0.0504, "step": 28997 }, { "epoch": 0.6389793253896114, "grad_norm": 0.5523656606674194, "learning_rate": 9.137210072952241e-06, "loss": 0.0626, "step": 28998 }, { "epoch": 0.6390013606791276, "grad_norm": 0.5950409770011902, "learning_rate": 9.136224710256116e-06, "loss": 0.0556, "step": 28999 }, { "epoch": 0.6390233959686438, "grad_norm": 0.8543254733085632, "learning_rate": 9.135239377427346e-06, "loss": 0.0841, "step": 29000 }, { "epoch": 0.6390454312581599, "grad_norm": 0.4719155430793762, "learning_rate": 9.134254074470946e-06, "loss": 0.0593, "step": 29001 }, { "epoch": 0.6390674665476761, "grad_norm": 0.6702840924263, "learning_rate": 9.133268801391929e-06, "loss": 0.0741, "step": 29002 }, { "epoch": 0.6390895018371923, "grad_norm": 0.6325222253799438, "learning_rate": 9.132283558195328e-06, "loss": 0.059, "step": 29003 }, { "epoch": 0.6391115371267084, "grad_norm": 0.5719678997993469, "learning_rate": 9.13129834488615e-06, "loss": 0.0686, "step": 29004 }, { "epoch": 0.6391335724162246, "grad_norm": 0.4920744299888611, "learning_rate": 9.130313161469417e-06, "loss": 0.0653, "step": 29005 }, { "epoch": 0.6391556077057408, "grad_norm": 1.2008676528930664, "learning_rate": 9.129328007950149e-06, "loss": 0.0894, "step": 29006 }, { "epoch": 0.6391776429952569, "grad_norm": 0.5850052237510681, "learning_rate": 9.128342884333361e-06, "loss": 0.0926, "step": 29007 }, { "epoch": 0.6391996782847731, "grad_norm": 0.6745502948760986, "learning_rate": 9.127357790624069e-06, "loss": 0.0768, "step": 29008 }, { "epoch": 0.6392217135742893, "grad_norm": 0.7423413395881653, "learning_rate": 9.12637272682729e-06, "loss": 0.0825, "step": 29009 }, { "epoch": 0.6392437488638054, "grad_norm": 0.5615985989570618, "learning_rate": 9.125387692948049e-06, "loss": 0.0522, "step": 29010 }, { "epoch": 0.6392657841533216, "grad_norm": 0.516708493232727, "learning_rate": 9.12440268899136e-06, "loss": 0.0644, "step": 29011 }, { "epoch": 0.6392878194428377, "grad_norm": 0.5523332953453064, "learning_rate": 9.123417714962238e-06, "loss": 0.0601, "step": 29012 }, { "epoch": 0.6393098547323539, "grad_norm": 0.4511767029762268, "learning_rate": 9.122432770865697e-06, "loss": 0.0608, "step": 29013 }, { "epoch": 0.6393318900218701, "grad_norm": 0.8244299292564392, "learning_rate": 9.121447856706762e-06, "loss": 0.0727, "step": 29014 }, { "epoch": 0.6393539253113862, "grad_norm": 0.7940091490745544, "learning_rate": 9.120462972490442e-06, "loss": 0.1048, "step": 29015 }, { "epoch": 0.6393759606009023, "grad_norm": 0.5637664794921875, "learning_rate": 9.119478118221762e-06, "loss": 0.0673, "step": 29016 }, { "epoch": 0.6393979958904185, "grad_norm": 0.5346958041191101, "learning_rate": 9.118493293905725e-06, "loss": 0.0763, "step": 29017 }, { "epoch": 0.6394200311799346, "grad_norm": 0.6063160300254822, "learning_rate": 9.117508499547361e-06, "loss": 0.0671, "step": 29018 }, { "epoch": 0.6394420664694508, "grad_norm": 0.6513326168060303, "learning_rate": 9.116523735151683e-06, "loss": 0.0724, "step": 29019 }, { "epoch": 0.639464101758967, "grad_norm": 0.5916464328765869, "learning_rate": 9.115539000723703e-06, "loss": 0.0653, "step": 29020 }, { "epoch": 0.6394861370484831, "grad_norm": 0.5869520902633667, "learning_rate": 9.114554296268433e-06, "loss": 0.0749, "step": 29021 }, { "epoch": 0.6395081723379993, "grad_norm": 0.39195239543914795, "learning_rate": 9.113569621790902e-06, "loss": 0.0657, "step": 29022 }, { "epoch": 0.6395302076275154, "grad_norm": 0.5756052136421204, "learning_rate": 9.112584977296112e-06, "loss": 0.0705, "step": 29023 }, { "epoch": 0.6395522429170316, "grad_norm": 0.9153767228126526, "learning_rate": 9.11160036278909e-06, "loss": 0.0888, "step": 29024 }, { "epoch": 0.6395742782065478, "grad_norm": 0.32598862051963806, "learning_rate": 9.110615778274834e-06, "loss": 0.0745, "step": 29025 }, { "epoch": 0.6395963134960639, "grad_norm": 0.45391789078712463, "learning_rate": 9.109631223758378e-06, "loss": 0.1007, "step": 29026 }, { "epoch": 0.6396183487855801, "grad_norm": 0.5050930380821228, "learning_rate": 9.108646699244728e-06, "loss": 0.048, "step": 29027 }, { "epoch": 0.6396403840750963, "grad_norm": 0.5257003903388977, "learning_rate": 9.107662204738897e-06, "loss": 0.0569, "step": 29028 }, { "epoch": 0.6396624193646124, "grad_norm": 0.9180954098701477, "learning_rate": 9.106677740245905e-06, "loss": 0.0931, "step": 29029 }, { "epoch": 0.6396844546541286, "grad_norm": 0.7622851729393005, "learning_rate": 9.105693305770763e-06, "loss": 0.0715, "step": 29030 }, { "epoch": 0.6397064899436448, "grad_norm": 0.49445563554763794, "learning_rate": 9.104708901318487e-06, "loss": 0.0651, "step": 29031 }, { "epoch": 0.6397285252331609, "grad_norm": 0.4308789074420929, "learning_rate": 9.103724526894081e-06, "loss": 0.0556, "step": 29032 }, { "epoch": 0.6397505605226771, "grad_norm": 0.561345636844635, "learning_rate": 9.102740182502577e-06, "loss": 0.0684, "step": 29033 }, { "epoch": 0.6397725958121933, "grad_norm": 0.4637523591518402, "learning_rate": 9.101755868148979e-06, "loss": 0.0463, "step": 29034 }, { "epoch": 0.6397946311017094, "grad_norm": 0.43095463514328003, "learning_rate": 9.100771583838303e-06, "loss": 0.0678, "step": 29035 }, { "epoch": 0.6398166663912256, "grad_norm": 0.8054553866386414, "learning_rate": 9.09978732957555e-06, "loss": 0.0842, "step": 29036 }, { "epoch": 0.6398387016807418, "grad_norm": 0.7967904210090637, "learning_rate": 9.098803105365754e-06, "loss": 0.0729, "step": 29037 }, { "epoch": 0.6398607369702579, "grad_norm": 0.5902122259140015, "learning_rate": 9.09781891121392e-06, "loss": 0.0821, "step": 29038 }, { "epoch": 0.6398827722597741, "grad_norm": 0.449729859828949, "learning_rate": 9.096834747125056e-06, "loss": 0.0802, "step": 29039 }, { "epoch": 0.6399048075492901, "grad_norm": 0.8200892806053162, "learning_rate": 9.09585061310417e-06, "loss": 0.0717, "step": 29040 }, { "epoch": 0.6399268428388063, "grad_norm": 0.6038699746131897, "learning_rate": 9.094866509156292e-06, "loss": 0.0738, "step": 29041 }, { "epoch": 0.6399488781283225, "grad_norm": 1.2477954626083374, "learning_rate": 9.093882435286427e-06, "loss": 0.0979, "step": 29042 }, { "epoch": 0.6399709134178386, "grad_norm": 0.45443475246429443, "learning_rate": 9.092898391499582e-06, "loss": 0.0711, "step": 29043 }, { "epoch": 0.6399929487073548, "grad_norm": 0.5111546516418457, "learning_rate": 9.091914377800769e-06, "loss": 0.06, "step": 29044 }, { "epoch": 0.640014983996871, "grad_norm": 0.5848685503005981, "learning_rate": 9.09093039419501e-06, "loss": 0.0875, "step": 29045 }, { "epoch": 0.6400370192863871, "grad_norm": 0.7559820413589478, "learning_rate": 9.08994644068731e-06, "loss": 0.0527, "step": 29046 }, { "epoch": 0.6400590545759033, "grad_norm": 0.7212026119232178, "learning_rate": 9.088962517282685e-06, "loss": 0.0587, "step": 29047 }, { "epoch": 0.6400810898654195, "grad_norm": 1.898751139640808, "learning_rate": 9.087978623986138e-06, "loss": 0.0633, "step": 29048 }, { "epoch": 0.6401031251549356, "grad_norm": 0.4104599058628082, "learning_rate": 9.086994760802689e-06, "loss": 0.0628, "step": 29049 }, { "epoch": 0.6401251604444518, "grad_norm": 1.0105713605880737, "learning_rate": 9.086010927737348e-06, "loss": 0.0821, "step": 29050 }, { "epoch": 0.6401471957339679, "grad_norm": 0.6531338691711426, "learning_rate": 9.085027124795118e-06, "loss": 0.0895, "step": 29051 }, { "epoch": 0.6401692310234841, "grad_norm": 1.0485485792160034, "learning_rate": 9.084043351981023e-06, "loss": 0.0773, "step": 29052 }, { "epoch": 0.6401912663130003, "grad_norm": 0.5229544043540955, "learning_rate": 9.083059609300068e-06, "loss": 0.05, "step": 29053 }, { "epoch": 0.6402133016025164, "grad_norm": 0.7070122957229614, "learning_rate": 9.082075896757263e-06, "loss": 0.0583, "step": 29054 }, { "epoch": 0.6402353368920326, "grad_norm": 0.5401912927627563, "learning_rate": 9.081092214357617e-06, "loss": 0.0615, "step": 29055 }, { "epoch": 0.6402573721815488, "grad_norm": 0.555738091468811, "learning_rate": 9.080108562106143e-06, "loss": 0.104, "step": 29056 }, { "epoch": 0.6402794074710649, "grad_norm": 0.7155393362045288, "learning_rate": 9.07912494000785e-06, "loss": 0.0569, "step": 29057 }, { "epoch": 0.6403014427605811, "grad_norm": 0.8784344792366028, "learning_rate": 9.078141348067752e-06, "loss": 0.0643, "step": 29058 }, { "epoch": 0.6403234780500973, "grad_norm": 0.6234875321388245, "learning_rate": 9.077157786290847e-06, "loss": 0.0828, "step": 29059 }, { "epoch": 0.6403455133396134, "grad_norm": 0.1613437980413437, "learning_rate": 9.07617425468216e-06, "loss": 0.0806, "step": 29060 }, { "epoch": 0.6403675486291296, "grad_norm": 0.6550316214561462, "learning_rate": 9.075190753246695e-06, "loss": 0.0703, "step": 29061 }, { "epoch": 0.6403895839186458, "grad_norm": 0.6056686043739319, "learning_rate": 9.07420728198946e-06, "loss": 0.0604, "step": 29062 }, { "epoch": 0.6404116192081619, "grad_norm": 0.6912994384765625, "learning_rate": 9.073223840915463e-06, "loss": 0.0743, "step": 29063 }, { "epoch": 0.6404336544976781, "grad_norm": 0.637714684009552, "learning_rate": 9.072240430029715e-06, "loss": 0.0937, "step": 29064 }, { "epoch": 0.6404556897871941, "grad_norm": 0.5450294017791748, "learning_rate": 9.07125704933723e-06, "loss": 0.0658, "step": 29065 }, { "epoch": 0.6404777250767103, "grad_norm": 0.48537692427635193, "learning_rate": 9.07027369884301e-06, "loss": 0.049, "step": 29066 }, { "epoch": 0.6404997603662265, "grad_norm": 0.42231640219688416, "learning_rate": 9.069290378552057e-06, "loss": 0.0653, "step": 29067 }, { "epoch": 0.6405217956557426, "grad_norm": 0.7985686659812927, "learning_rate": 9.068307088469398e-06, "loss": 0.0492, "step": 29068 }, { "epoch": 0.6405438309452588, "grad_norm": 0.7729217410087585, "learning_rate": 9.067323828600031e-06, "loss": 0.0352, "step": 29069 }, { "epoch": 0.640565866234775, "grad_norm": 0.32626307010650635, "learning_rate": 9.066340598948965e-06, "loss": 0.0706, "step": 29070 }, { "epoch": 0.6405879015242911, "grad_norm": 0.5033950805664062, "learning_rate": 9.065357399521206e-06, "loss": 0.0813, "step": 29071 }, { "epoch": 0.6406099368138073, "grad_norm": 0.5390064716339111, "learning_rate": 9.064374230321767e-06, "loss": 0.0552, "step": 29072 }, { "epoch": 0.6406319721033235, "grad_norm": 0.42454344034194946, "learning_rate": 9.06339109135565e-06, "loss": 0.0449, "step": 29073 }, { "epoch": 0.6406540073928396, "grad_norm": 0.6141131520271301, "learning_rate": 9.062407982627863e-06, "loss": 0.0656, "step": 29074 }, { "epoch": 0.6406760426823558, "grad_norm": 0.4897069036960602, "learning_rate": 9.061424904143419e-06, "loss": 0.0551, "step": 29075 }, { "epoch": 0.640698077971872, "grad_norm": 0.6430425643920898, "learning_rate": 9.060441855907324e-06, "loss": 0.0726, "step": 29076 }, { "epoch": 0.6407201132613881, "grad_norm": 0.7898452281951904, "learning_rate": 9.059458837924585e-06, "loss": 0.076, "step": 29077 }, { "epoch": 0.6407421485509043, "grad_norm": 0.7373579144477844, "learning_rate": 9.058475850200203e-06, "loss": 0.0837, "step": 29078 }, { "epoch": 0.6407641838404204, "grad_norm": 0.4188007116317749, "learning_rate": 9.057492892739193e-06, "loss": 0.0528, "step": 29079 }, { "epoch": 0.6407862191299366, "grad_norm": 0.3364483118057251, "learning_rate": 9.05650996554656e-06, "loss": 0.0391, "step": 29080 }, { "epoch": 0.6408082544194528, "grad_norm": 0.4905546009540558, "learning_rate": 9.055527068627308e-06, "loss": 0.0515, "step": 29081 }, { "epoch": 0.6408302897089689, "grad_norm": 0.5638499855995178, "learning_rate": 9.054544201986436e-06, "loss": 0.0538, "step": 29082 }, { "epoch": 0.6408523249984851, "grad_norm": 0.6473427414894104, "learning_rate": 9.053561365628967e-06, "loss": 0.0557, "step": 29083 }, { "epoch": 0.6408743602880013, "grad_norm": 0.5005666017532349, "learning_rate": 9.052578559559899e-06, "loss": 0.0447, "step": 29084 }, { "epoch": 0.6408963955775174, "grad_norm": 1.014551043510437, "learning_rate": 9.051595783784236e-06, "loss": 0.0893, "step": 29085 }, { "epoch": 0.6409184308670336, "grad_norm": 0.5850516557693481, "learning_rate": 9.050613038306984e-06, "loss": 0.086, "step": 29086 }, { "epoch": 0.6409404661565498, "grad_norm": 0.4025411903858185, "learning_rate": 9.049630323133152e-06, "loss": 0.0427, "step": 29087 }, { "epoch": 0.6409625014460659, "grad_norm": 0.763215959072113, "learning_rate": 9.048647638267744e-06, "loss": 0.0927, "step": 29088 }, { "epoch": 0.6409845367355821, "grad_norm": 0.7243875861167908, "learning_rate": 9.047664983715765e-06, "loss": 0.068, "step": 29089 }, { "epoch": 0.6410065720250981, "grad_norm": 0.7415657043457031, "learning_rate": 9.046682359482214e-06, "loss": 0.0874, "step": 29090 }, { "epoch": 0.6410286073146143, "grad_norm": 0.409842312335968, "learning_rate": 9.045699765572108e-06, "loss": 0.046, "step": 29091 }, { "epoch": 0.6410506426041305, "grad_norm": 0.6994452476501465, "learning_rate": 9.044717201990448e-06, "loss": 0.0546, "step": 29092 }, { "epoch": 0.6410726778936466, "grad_norm": 0.5512510538101196, "learning_rate": 9.043734668742235e-06, "loss": 0.0511, "step": 29093 }, { "epoch": 0.6410947131831628, "grad_norm": 0.81577467918396, "learning_rate": 9.042752165832473e-06, "loss": 0.0859, "step": 29094 }, { "epoch": 0.641116748472679, "grad_norm": 0.4701675772666931, "learning_rate": 9.04176969326617e-06, "loss": 0.046, "step": 29095 }, { "epoch": 0.6411387837621951, "grad_norm": 0.5655841827392578, "learning_rate": 9.04078725104833e-06, "loss": 0.0473, "step": 29096 }, { "epoch": 0.6411608190517113, "grad_norm": 0.6729776859283447, "learning_rate": 9.039804839183949e-06, "loss": 0.0515, "step": 29097 }, { "epoch": 0.6411828543412275, "grad_norm": 0.5486229658126831, "learning_rate": 9.038822457678047e-06, "loss": 0.062, "step": 29098 }, { "epoch": 0.6412048896307436, "grad_norm": 0.6869468688964844, "learning_rate": 9.037840106535616e-06, "loss": 0.0669, "step": 29099 }, { "epoch": 0.6412269249202598, "grad_norm": 0.3928307294845581, "learning_rate": 9.036857785761664e-06, "loss": 0.0587, "step": 29100 }, { "epoch": 0.641248960209776, "grad_norm": 0.5154814124107361, "learning_rate": 9.035875495361189e-06, "loss": 0.0572, "step": 29101 }, { "epoch": 0.6412709954992921, "grad_norm": 0.455369770526886, "learning_rate": 9.034893235339202e-06, "loss": 0.0482, "step": 29102 }, { "epoch": 0.6412930307888083, "grad_norm": 0.6818335056304932, "learning_rate": 9.033911005700703e-06, "loss": 0.0757, "step": 29103 }, { "epoch": 0.6413150660783244, "grad_norm": 0.3183644413948059, "learning_rate": 9.032928806450694e-06, "loss": 0.0427, "step": 29104 }, { "epoch": 0.6413371013678406, "grad_norm": 0.5299171209335327, "learning_rate": 9.031946637594173e-06, "loss": 0.0701, "step": 29105 }, { "epoch": 0.6413591366573568, "grad_norm": 0.6783010363578796, "learning_rate": 9.030964499136153e-06, "loss": 0.074, "step": 29106 }, { "epoch": 0.6413811719468729, "grad_norm": 0.7638279795646667, "learning_rate": 9.029982391081631e-06, "loss": 0.0674, "step": 29107 }, { "epoch": 0.6414032072363891, "grad_norm": 0.6129043102264404, "learning_rate": 9.029000313435612e-06, "loss": 0.0941, "step": 29108 }, { "epoch": 0.6414252425259053, "grad_norm": 0.5193517804145813, "learning_rate": 9.028018266203092e-06, "loss": 0.0754, "step": 29109 }, { "epoch": 0.6414472778154214, "grad_norm": 0.7390658259391785, "learning_rate": 9.027036249389082e-06, "loss": 0.0652, "step": 29110 }, { "epoch": 0.6414693131049376, "grad_norm": 0.5749096274375916, "learning_rate": 9.026054262998577e-06, "loss": 0.0606, "step": 29111 }, { "epoch": 0.6414913483944538, "grad_norm": 0.47970789670944214, "learning_rate": 9.025072307036583e-06, "loss": 0.0587, "step": 29112 }, { "epoch": 0.6415133836839699, "grad_norm": 0.5717050433158875, "learning_rate": 9.024090381508093e-06, "loss": 0.0845, "step": 29113 }, { "epoch": 0.641535418973486, "grad_norm": 0.5432242751121521, "learning_rate": 9.023108486418121e-06, "loss": 0.0732, "step": 29114 }, { "epoch": 0.6415574542630021, "grad_norm": 0.4283613860607147, "learning_rate": 9.022126621771664e-06, "loss": 0.0586, "step": 29115 }, { "epoch": 0.6415794895525183, "grad_norm": 0.48479199409484863, "learning_rate": 9.021144787573719e-06, "loss": 0.0684, "step": 29116 }, { "epoch": 0.6416015248420345, "grad_norm": 0.6184899806976318, "learning_rate": 9.020162983829288e-06, "loss": 0.056, "step": 29117 }, { "epoch": 0.6416235601315506, "grad_norm": 0.32844746112823486, "learning_rate": 9.019181210543378e-06, "loss": 0.045, "step": 29118 }, { "epoch": 0.6416455954210668, "grad_norm": 0.7038609981536865, "learning_rate": 9.018199467720985e-06, "loss": 0.0481, "step": 29119 }, { "epoch": 0.641667630710583, "grad_norm": 0.1434410810470581, "learning_rate": 9.017217755367103e-06, "loss": 0.0366, "step": 29120 }, { "epoch": 0.6416896660000991, "grad_norm": 0.5432190895080566, "learning_rate": 9.016236073486746e-06, "loss": 0.0427, "step": 29121 }, { "epoch": 0.6417117012896153, "grad_norm": 0.5886496305465698, "learning_rate": 9.015254422084906e-06, "loss": 0.0907, "step": 29122 }, { "epoch": 0.6417337365791315, "grad_norm": 0.5140863656997681, "learning_rate": 9.014272801166585e-06, "loss": 0.0582, "step": 29123 }, { "epoch": 0.6417557718686476, "grad_norm": 0.6968666315078735, "learning_rate": 9.013291210736779e-06, "loss": 0.1055, "step": 29124 }, { "epoch": 0.6417778071581638, "grad_norm": 0.5808618068695068, "learning_rate": 9.012309650800494e-06, "loss": 0.0574, "step": 29125 }, { "epoch": 0.64179984244768, "grad_norm": 0.830231249332428, "learning_rate": 9.011328121362728e-06, "loss": 0.0856, "step": 29126 }, { "epoch": 0.6418218777371961, "grad_norm": 0.26585525274276733, "learning_rate": 9.010346622428478e-06, "loss": 0.0628, "step": 29127 }, { "epoch": 0.6418439130267123, "grad_norm": 0.6479138731956482, "learning_rate": 9.00936515400274e-06, "loss": 0.0599, "step": 29128 }, { "epoch": 0.6418659483162285, "grad_norm": 0.5127253532409668, "learning_rate": 9.008383716090522e-06, "loss": 0.0688, "step": 29129 }, { "epoch": 0.6418879836057446, "grad_norm": 0.43989190459251404, "learning_rate": 9.00740230869682e-06, "loss": 0.092, "step": 29130 }, { "epoch": 0.6419100188952608, "grad_norm": 0.8083718419075012, "learning_rate": 9.006420931826633e-06, "loss": 0.0813, "step": 29131 }, { "epoch": 0.641932054184777, "grad_norm": 0.4278149902820587, "learning_rate": 9.005439585484954e-06, "loss": 0.0647, "step": 29132 }, { "epoch": 0.6419540894742931, "grad_norm": 0.22435560822486877, "learning_rate": 9.004458269676788e-06, "loss": 0.036, "step": 29133 }, { "epoch": 0.6419761247638093, "grad_norm": 0.6165581941604614, "learning_rate": 9.00347698440713e-06, "loss": 0.0444, "step": 29134 }, { "epoch": 0.6419981600533254, "grad_norm": 0.5856360793113708, "learning_rate": 9.00249572968098e-06, "loss": 0.0546, "step": 29135 }, { "epoch": 0.6420201953428416, "grad_norm": 0.7230313420295715, "learning_rate": 9.00151450550333e-06, "loss": 0.0935, "step": 29136 }, { "epoch": 0.6420422306323578, "grad_norm": 0.5367080569267273, "learning_rate": 9.00053331187919e-06, "loss": 0.0808, "step": 29137 }, { "epoch": 0.6420642659218739, "grad_norm": 0.47824156284332275, "learning_rate": 8.99955214881355e-06, "loss": 0.058, "step": 29138 }, { "epoch": 0.64208630121139, "grad_norm": 0.348531037569046, "learning_rate": 8.998571016311403e-06, "loss": 0.0582, "step": 29139 }, { "epoch": 0.6421083365009062, "grad_norm": 0.42598673701286316, "learning_rate": 8.997589914377762e-06, "loss": 0.0448, "step": 29140 }, { "epoch": 0.6421303717904223, "grad_norm": 0.5396230220794678, "learning_rate": 8.99660884301761e-06, "loss": 0.0709, "step": 29141 }, { "epoch": 0.6421524070799385, "grad_norm": 0.5455288887023926, "learning_rate": 8.995627802235946e-06, "loss": 0.0347, "step": 29142 }, { "epoch": 0.6421744423694546, "grad_norm": 0.509505569934845, "learning_rate": 8.994646792037767e-06, "loss": 0.0839, "step": 29143 }, { "epoch": 0.6421964776589708, "grad_norm": 0.8137147426605225, "learning_rate": 8.993665812428076e-06, "loss": 0.0782, "step": 29144 }, { "epoch": 0.642218512948487, "grad_norm": 0.7826604843139648, "learning_rate": 8.992684863411868e-06, "loss": 0.0843, "step": 29145 }, { "epoch": 0.6422405482380031, "grad_norm": 0.6147779822349548, "learning_rate": 8.991703944994136e-06, "loss": 0.063, "step": 29146 }, { "epoch": 0.6422625835275193, "grad_norm": 0.7411148548126221, "learning_rate": 8.990723057179872e-06, "loss": 0.067, "step": 29147 }, { "epoch": 0.6422846188170355, "grad_norm": 0.6530736684799194, "learning_rate": 8.989742199974085e-06, "loss": 0.1023, "step": 29148 }, { "epoch": 0.6423066541065516, "grad_norm": 0.8885211944580078, "learning_rate": 8.988761373381763e-06, "loss": 0.0814, "step": 29149 }, { "epoch": 0.6423286893960678, "grad_norm": 0.46242383122444153, "learning_rate": 8.987780577407907e-06, "loss": 0.0631, "step": 29150 }, { "epoch": 0.642350724685584, "grad_norm": 1.0092462301254272, "learning_rate": 8.986799812057497e-06, "loss": 0.0697, "step": 29151 }, { "epoch": 0.6423727599751001, "grad_norm": 0.6022233963012695, "learning_rate": 8.985819077335548e-06, "loss": 0.036, "step": 29152 }, { "epoch": 0.6423947952646163, "grad_norm": 0.49831870198249817, "learning_rate": 8.984838373247047e-06, "loss": 0.0634, "step": 29153 }, { "epoch": 0.6424168305541325, "grad_norm": 0.6148033738136292, "learning_rate": 8.983857699796989e-06, "loss": 0.0847, "step": 29154 }, { "epoch": 0.6424388658436486, "grad_norm": 0.24543894827365875, "learning_rate": 8.982877056990364e-06, "loss": 0.0362, "step": 29155 }, { "epoch": 0.6424609011331648, "grad_norm": 0.4960460364818573, "learning_rate": 8.98189644483218e-06, "loss": 0.0613, "step": 29156 }, { "epoch": 0.642482936422681, "grad_norm": 0.6479189991950989, "learning_rate": 8.980915863327425e-06, "loss": 0.0842, "step": 29157 }, { "epoch": 0.6425049717121971, "grad_norm": 0.32230377197265625, "learning_rate": 8.979935312481092e-06, "loss": 0.0683, "step": 29158 }, { "epoch": 0.6425270070017133, "grad_norm": 0.8330153822898865, "learning_rate": 8.978954792298172e-06, "loss": 0.0714, "step": 29159 }, { "epoch": 0.6425490422912294, "grad_norm": 0.3380027413368225, "learning_rate": 8.977974302783669e-06, "loss": 0.0561, "step": 29160 }, { "epoch": 0.6425710775807456, "grad_norm": 0.7606369256973267, "learning_rate": 8.976993843942574e-06, "loss": 0.0641, "step": 29161 }, { "epoch": 0.6425931128702618, "grad_norm": 0.6658729314804077, "learning_rate": 8.97601341577987e-06, "loss": 0.0695, "step": 29162 }, { "epoch": 0.6426151481597779, "grad_norm": 0.5124139189720154, "learning_rate": 8.975033018300567e-06, "loss": 0.0579, "step": 29163 }, { "epoch": 0.642637183449294, "grad_norm": 0.683372974395752, "learning_rate": 8.974052651509652e-06, "loss": 0.0527, "step": 29164 }, { "epoch": 0.6426592187388102, "grad_norm": 0.47082898020744324, "learning_rate": 8.973072315412116e-06, "loss": 0.0547, "step": 29165 }, { "epoch": 0.6426812540283263, "grad_norm": 0.5263024568557739, "learning_rate": 8.972092010012954e-06, "loss": 0.0684, "step": 29166 }, { "epoch": 0.6427032893178425, "grad_norm": 0.6027927994728088, "learning_rate": 8.971111735317163e-06, "loss": 0.0661, "step": 29167 }, { "epoch": 0.6427253246073587, "grad_norm": 0.5360273122787476, "learning_rate": 8.970131491329732e-06, "loss": 0.0686, "step": 29168 }, { "epoch": 0.6427473598968748, "grad_norm": 0.5925490856170654, "learning_rate": 8.969151278055656e-06, "loss": 0.0694, "step": 29169 }, { "epoch": 0.642769395186391, "grad_norm": 1.0375192165374756, "learning_rate": 8.96817109549992e-06, "loss": 0.0807, "step": 29170 }, { "epoch": 0.6427914304759071, "grad_norm": 0.5693178176879883, "learning_rate": 8.967190943667528e-06, "loss": 0.0574, "step": 29171 }, { "epoch": 0.6428134657654233, "grad_norm": 0.9056345224380493, "learning_rate": 8.96621082256347e-06, "loss": 0.0993, "step": 29172 }, { "epoch": 0.6428355010549395, "grad_norm": 0.5967090725898743, "learning_rate": 8.96523073219273e-06, "loss": 0.0493, "step": 29173 }, { "epoch": 0.6428575363444556, "grad_norm": 0.7039657235145569, "learning_rate": 8.964250672560308e-06, "loss": 0.0761, "step": 29174 }, { "epoch": 0.6428795716339718, "grad_norm": 0.7641739845275879, "learning_rate": 8.963270643671196e-06, "loss": 0.0653, "step": 29175 }, { "epoch": 0.642901606923488, "grad_norm": 0.7530319690704346, "learning_rate": 8.962290645530385e-06, "loss": 0.0605, "step": 29176 }, { "epoch": 0.6429236422130041, "grad_norm": 0.48499271273612976, "learning_rate": 8.961310678142864e-06, "loss": 0.0999, "step": 29177 }, { "epoch": 0.6429456775025203, "grad_norm": 0.48973140120506287, "learning_rate": 8.96033074151362e-06, "loss": 0.0531, "step": 29178 }, { "epoch": 0.6429677127920365, "grad_norm": 0.4532783627510071, "learning_rate": 8.959350835647656e-06, "loss": 0.0621, "step": 29179 }, { "epoch": 0.6429897480815526, "grad_norm": 0.6561151742935181, "learning_rate": 8.958370960549958e-06, "loss": 0.064, "step": 29180 }, { "epoch": 0.6430117833710688, "grad_norm": 0.6892368197441101, "learning_rate": 8.957391116225516e-06, "loss": 0.0735, "step": 29181 }, { "epoch": 0.643033818660585, "grad_norm": 0.5610438585281372, "learning_rate": 8.956411302679319e-06, "loss": 0.0859, "step": 29182 }, { "epoch": 0.6430558539501011, "grad_norm": 0.558312177658081, "learning_rate": 8.955431519916362e-06, "loss": 0.0672, "step": 29183 }, { "epoch": 0.6430778892396173, "grad_norm": 0.45879313349723816, "learning_rate": 8.954451767941637e-06, "loss": 0.0478, "step": 29184 }, { "epoch": 0.6430999245291334, "grad_norm": 0.809954047203064, "learning_rate": 8.95347204676012e-06, "loss": 0.0683, "step": 29185 }, { "epoch": 0.6431219598186496, "grad_norm": 0.39287233352661133, "learning_rate": 8.952492356376821e-06, "loss": 0.0478, "step": 29186 }, { "epoch": 0.6431439951081658, "grad_norm": 0.5330654978752136, "learning_rate": 8.951512696796721e-06, "loss": 0.0782, "step": 29187 }, { "epoch": 0.6431660303976819, "grad_norm": 0.4994993507862091, "learning_rate": 8.95053306802481e-06, "loss": 0.0604, "step": 29188 }, { "epoch": 0.643188065687198, "grad_norm": 0.3608059883117676, "learning_rate": 8.949553470066075e-06, "loss": 0.0639, "step": 29189 }, { "epoch": 0.6432101009767142, "grad_norm": 0.4348536431789398, "learning_rate": 8.948573902925513e-06, "loss": 0.0681, "step": 29190 }, { "epoch": 0.6432321362662303, "grad_norm": 0.5076552033424377, "learning_rate": 8.947594366608108e-06, "loss": 0.0732, "step": 29191 }, { "epoch": 0.6432541715557465, "grad_norm": 0.42370089888572693, "learning_rate": 8.946614861118851e-06, "loss": 0.0665, "step": 29192 }, { "epoch": 0.6432762068452627, "grad_norm": 0.69890958070755, "learning_rate": 8.945635386462723e-06, "loss": 0.0706, "step": 29193 }, { "epoch": 0.6432982421347788, "grad_norm": 0.5318859219551086, "learning_rate": 8.944655942644728e-06, "loss": 0.0453, "step": 29194 }, { "epoch": 0.643320277424295, "grad_norm": 1.3679094314575195, "learning_rate": 8.943676529669846e-06, "loss": 0.0638, "step": 29195 }, { "epoch": 0.6433423127138111, "grad_norm": 0.6161647439002991, "learning_rate": 8.94269714754307e-06, "loss": 0.0532, "step": 29196 }, { "epoch": 0.6433643480033273, "grad_norm": 0.6255645155906677, "learning_rate": 8.941717796269381e-06, "loss": 0.0582, "step": 29197 }, { "epoch": 0.6433863832928435, "grad_norm": 0.6406059265136719, "learning_rate": 8.940738475853775e-06, "loss": 0.0606, "step": 29198 }, { "epoch": 0.6434084185823596, "grad_norm": 0.49923425912857056, "learning_rate": 8.939759186301236e-06, "loss": 0.0806, "step": 29199 }, { "epoch": 0.6434304538718758, "grad_norm": 0.4630814790725708, "learning_rate": 8.938779927616756e-06, "loss": 0.0594, "step": 29200 }, { "epoch": 0.643452489161392, "grad_norm": 0.5735921263694763, "learning_rate": 8.93780069980531e-06, "loss": 0.0783, "step": 29201 }, { "epoch": 0.6434745244509081, "grad_norm": 0.5713361501693726, "learning_rate": 8.936821502871905e-06, "loss": 0.0674, "step": 29202 }, { "epoch": 0.6434965597404243, "grad_norm": 0.5361645817756653, "learning_rate": 8.93584233682152e-06, "loss": 0.0554, "step": 29203 }, { "epoch": 0.6435185950299405, "grad_norm": 0.5711652636528015, "learning_rate": 8.93486320165914e-06, "loss": 0.0665, "step": 29204 }, { "epoch": 0.6435406303194566, "grad_norm": 0.32051682472229004, "learning_rate": 8.933884097389752e-06, "loss": 0.0994, "step": 29205 }, { "epoch": 0.6435626656089728, "grad_norm": 0.6166269183158875, "learning_rate": 8.932905024018347e-06, "loss": 0.0962, "step": 29206 }, { "epoch": 0.643584700898489, "grad_norm": 0.8292335271835327, "learning_rate": 8.93192598154991e-06, "loss": 0.0992, "step": 29207 }, { "epoch": 0.6436067361880051, "grad_norm": 0.24942438304424286, "learning_rate": 8.930946969989423e-06, "loss": 0.0474, "step": 29208 }, { "epoch": 0.6436287714775213, "grad_norm": 0.49932730197906494, "learning_rate": 8.929967989341884e-06, "loss": 0.0787, "step": 29209 }, { "epoch": 0.6436508067670375, "grad_norm": 0.9966827630996704, "learning_rate": 8.928989039612272e-06, "loss": 0.0579, "step": 29210 }, { "epoch": 0.6436728420565536, "grad_norm": 0.7062992453575134, "learning_rate": 8.928010120805576e-06, "loss": 0.071, "step": 29211 }, { "epoch": 0.6436948773460698, "grad_norm": 0.6102541089057922, "learning_rate": 8.927031232926777e-06, "loss": 0.0431, "step": 29212 }, { "epoch": 0.6437169126355858, "grad_norm": 0.661281168460846, "learning_rate": 8.926052375980866e-06, "loss": 0.0605, "step": 29213 }, { "epoch": 0.643738947925102, "grad_norm": 0.6264732480049133, "learning_rate": 8.92507354997283e-06, "loss": 0.0739, "step": 29214 }, { "epoch": 0.6437609832146182, "grad_norm": 0.6252748370170593, "learning_rate": 8.924094754907648e-06, "loss": 0.0508, "step": 29215 }, { "epoch": 0.6437830185041343, "grad_norm": 0.7302530407905579, "learning_rate": 8.923115990790305e-06, "loss": 0.0824, "step": 29216 }, { "epoch": 0.6438050537936505, "grad_norm": 0.31374597549438477, "learning_rate": 8.9221372576258e-06, "loss": 0.0509, "step": 29217 }, { "epoch": 0.6438270890831667, "grad_norm": 0.9650818109512329, "learning_rate": 8.921158555419106e-06, "loss": 0.0793, "step": 29218 }, { "epoch": 0.6438491243726828, "grad_norm": 0.6435657143592834, "learning_rate": 8.920179884175212e-06, "loss": 0.0577, "step": 29219 }, { "epoch": 0.643871159662199, "grad_norm": 0.6493266820907593, "learning_rate": 8.919201243899099e-06, "loss": 0.0574, "step": 29220 }, { "epoch": 0.6438931949517152, "grad_norm": 0.8085549473762512, "learning_rate": 8.91822263459576e-06, "loss": 0.0705, "step": 29221 }, { "epoch": 0.6439152302412313, "grad_norm": 1.240112066268921, "learning_rate": 8.917244056270171e-06, "loss": 0.1181, "step": 29222 }, { "epoch": 0.6439372655307475, "grad_norm": 0.7242254018783569, "learning_rate": 8.916265508927321e-06, "loss": 0.0486, "step": 29223 }, { "epoch": 0.6439593008202636, "grad_norm": 0.6324657797813416, "learning_rate": 8.915286992572189e-06, "loss": 0.0806, "step": 29224 }, { "epoch": 0.6439813361097798, "grad_norm": 1.180126667022705, "learning_rate": 8.914308507209767e-06, "loss": 0.0926, "step": 29225 }, { "epoch": 0.644003371399296, "grad_norm": 0.6464278101921082, "learning_rate": 8.913330052845036e-06, "loss": 0.0542, "step": 29226 }, { "epoch": 0.6440254066888121, "grad_norm": 0.7946217060089111, "learning_rate": 8.91235162948298e-06, "loss": 0.0849, "step": 29227 }, { "epoch": 0.6440474419783283, "grad_norm": 0.9975650310516357, "learning_rate": 8.911373237128576e-06, "loss": 0.0955, "step": 29228 }, { "epoch": 0.6440694772678445, "grad_norm": 0.5234448313713074, "learning_rate": 8.910394875786818e-06, "loss": 0.099, "step": 29229 }, { "epoch": 0.6440915125573606, "grad_norm": 0.4133621156215668, "learning_rate": 8.909416545462684e-06, "loss": 0.0366, "step": 29230 }, { "epoch": 0.6441135478468768, "grad_norm": 0.5459402203559875, "learning_rate": 8.908438246161152e-06, "loss": 0.0639, "step": 29231 }, { "epoch": 0.644135583136393, "grad_norm": 0.7258583903312683, "learning_rate": 8.907459977887215e-06, "loss": 0.0551, "step": 29232 }, { "epoch": 0.6441576184259091, "grad_norm": 0.5185603499412537, "learning_rate": 8.906481740645853e-06, "loss": 0.0557, "step": 29233 }, { "epoch": 0.6441796537154253, "grad_norm": 0.4760051369667053, "learning_rate": 8.905503534442048e-06, "loss": 0.0672, "step": 29234 }, { "epoch": 0.6442016890049415, "grad_norm": 0.8643801808357239, "learning_rate": 8.904525359280776e-06, "loss": 0.0556, "step": 29235 }, { "epoch": 0.6442237242944576, "grad_norm": 1.0324844121932983, "learning_rate": 8.903547215167028e-06, "loss": 0.0638, "step": 29236 }, { "epoch": 0.6442457595839738, "grad_norm": 1.0670760869979858, "learning_rate": 8.902569102105787e-06, "loss": 0.081, "step": 29237 }, { "epoch": 0.6442677948734898, "grad_norm": 0.5433164238929749, "learning_rate": 8.901591020102028e-06, "loss": 0.0512, "step": 29238 }, { "epoch": 0.644289830163006, "grad_norm": 0.4970652461051941, "learning_rate": 8.90061296916073e-06, "loss": 0.0445, "step": 29239 }, { "epoch": 0.6443118654525222, "grad_norm": 0.5918079018592834, "learning_rate": 8.899634949286888e-06, "loss": 0.093, "step": 29240 }, { "epoch": 0.6443339007420383, "grad_norm": 0.5678342580795288, "learning_rate": 8.898656960485477e-06, "loss": 0.0664, "step": 29241 }, { "epoch": 0.6443559360315545, "grad_norm": 0.23425321280956268, "learning_rate": 8.897679002761477e-06, "loss": 0.0721, "step": 29242 }, { "epoch": 0.6443779713210707, "grad_norm": 0.655169665813446, "learning_rate": 8.896701076119868e-06, "loss": 0.1008, "step": 29243 }, { "epoch": 0.6444000066105868, "grad_norm": 0.8081951141357422, "learning_rate": 8.895723180565635e-06, "loss": 0.0868, "step": 29244 }, { "epoch": 0.644422041900103, "grad_norm": 0.6760775446891785, "learning_rate": 8.894745316103758e-06, "loss": 0.0588, "step": 29245 }, { "epoch": 0.6444440771896192, "grad_norm": 0.6103394031524658, "learning_rate": 8.893767482739216e-06, "loss": 0.0311, "step": 29246 }, { "epoch": 0.6444661124791353, "grad_norm": 0.549345850944519, "learning_rate": 8.892789680476987e-06, "loss": 0.0529, "step": 29247 }, { "epoch": 0.6444881477686515, "grad_norm": 0.4877746105194092, "learning_rate": 8.891811909322058e-06, "loss": 0.0819, "step": 29248 }, { "epoch": 0.6445101830581677, "grad_norm": 0.8777573704719543, "learning_rate": 8.890834169279408e-06, "loss": 0.0639, "step": 29249 }, { "epoch": 0.6445322183476838, "grad_norm": 0.5005985498428345, "learning_rate": 8.889856460354015e-06, "loss": 0.0682, "step": 29250 }, { "epoch": 0.6445542536372, "grad_norm": 0.5034152865409851, "learning_rate": 8.888878782550859e-06, "loss": 0.0672, "step": 29251 }, { "epoch": 0.6445762889267161, "grad_norm": 0.47369739413261414, "learning_rate": 8.887901135874921e-06, "loss": 0.0646, "step": 29252 }, { "epoch": 0.6445983242162323, "grad_norm": 0.6736736297607422, "learning_rate": 8.88692352033118e-06, "loss": 0.0696, "step": 29253 }, { "epoch": 0.6446203595057485, "grad_norm": 0.4743441939353943, "learning_rate": 8.88594593592461e-06, "loss": 0.085, "step": 29254 }, { "epoch": 0.6446423947952646, "grad_norm": 0.646905243396759, "learning_rate": 8.884968382660202e-06, "loss": 0.0545, "step": 29255 }, { "epoch": 0.6446644300847808, "grad_norm": 0.7941100597381592, "learning_rate": 8.88399086054293e-06, "loss": 0.0607, "step": 29256 }, { "epoch": 0.644686465374297, "grad_norm": 0.39489439129829407, "learning_rate": 8.883013369577771e-06, "loss": 0.0472, "step": 29257 }, { "epoch": 0.6447085006638131, "grad_norm": 0.4674449563026428, "learning_rate": 8.8820359097697e-06, "loss": 0.0824, "step": 29258 }, { "epoch": 0.6447305359533293, "grad_norm": 0.4274633526802063, "learning_rate": 8.881058481123705e-06, "loss": 0.0658, "step": 29259 }, { "epoch": 0.6447525712428455, "grad_norm": 0.8869708776473999, "learning_rate": 8.880081083644764e-06, "loss": 0.1057, "step": 29260 }, { "epoch": 0.6447746065323616, "grad_norm": 0.7257350087165833, "learning_rate": 8.87910371733785e-06, "loss": 0.0911, "step": 29261 }, { "epoch": 0.6447966418218778, "grad_norm": 1.196028470993042, "learning_rate": 8.878126382207937e-06, "loss": 0.0758, "step": 29262 }, { "epoch": 0.6448186771113938, "grad_norm": 0.7906181812286377, "learning_rate": 8.877149078260016e-06, "loss": 0.0621, "step": 29263 }, { "epoch": 0.64484071240091, "grad_norm": 0.9142982363700867, "learning_rate": 8.876171805499056e-06, "loss": 0.0743, "step": 29264 }, { "epoch": 0.6448627476904262, "grad_norm": 0.7897620797157288, "learning_rate": 8.87519456393004e-06, "loss": 0.0664, "step": 29265 }, { "epoch": 0.6448847829799423, "grad_norm": 0.4828214943408966, "learning_rate": 8.874217353557934e-06, "loss": 0.0588, "step": 29266 }, { "epoch": 0.6449068182694585, "grad_norm": 0.7214249968528748, "learning_rate": 8.87324017438773e-06, "loss": 0.0765, "step": 29267 }, { "epoch": 0.6449288535589747, "grad_norm": 0.5996409058570862, "learning_rate": 8.8722630264244e-06, "loss": 0.0589, "step": 29268 }, { "epoch": 0.6449508888484908, "grad_norm": 0.5520323514938354, "learning_rate": 8.871285909672919e-06, "loss": 0.0405, "step": 29269 }, { "epoch": 0.644972924138007, "grad_norm": 0.7093064785003662, "learning_rate": 8.87030882413826e-06, "loss": 0.0536, "step": 29270 }, { "epoch": 0.6449949594275232, "grad_norm": 0.745803952217102, "learning_rate": 8.869331769825413e-06, "loss": 0.1214, "step": 29271 }, { "epoch": 0.6450169947170393, "grad_norm": 0.46751731634140015, "learning_rate": 8.868354746739346e-06, "loss": 0.0464, "step": 29272 }, { "epoch": 0.6450390300065555, "grad_norm": 0.6777152419090271, "learning_rate": 8.867377754885029e-06, "loss": 0.0646, "step": 29273 }, { "epoch": 0.6450610652960717, "grad_norm": 0.9623287320137024, "learning_rate": 8.866400794267452e-06, "loss": 0.0761, "step": 29274 }, { "epoch": 0.6450831005855878, "grad_norm": 0.42350777983665466, "learning_rate": 8.865423864891586e-06, "loss": 0.0676, "step": 29275 }, { "epoch": 0.645105135875104, "grad_norm": 0.5625650882720947, "learning_rate": 8.864446966762405e-06, "loss": 0.0613, "step": 29276 }, { "epoch": 0.6451271711646202, "grad_norm": 0.7975074052810669, "learning_rate": 8.863470099884882e-06, "loss": 0.0973, "step": 29277 }, { "epoch": 0.6451492064541363, "grad_norm": 0.7414839267730713, "learning_rate": 8.862493264264e-06, "loss": 0.0847, "step": 29278 }, { "epoch": 0.6451712417436525, "grad_norm": 0.7794817686080933, "learning_rate": 8.86151645990473e-06, "loss": 0.084, "step": 29279 }, { "epoch": 0.6451932770331686, "grad_norm": 0.7829513549804688, "learning_rate": 8.86053968681205e-06, "loss": 0.0712, "step": 29280 }, { "epoch": 0.6452153123226848, "grad_norm": 0.48600533604621887, "learning_rate": 8.859562944990927e-06, "loss": 0.0779, "step": 29281 }, { "epoch": 0.645237347612201, "grad_norm": 0.4283404052257538, "learning_rate": 8.858586234446348e-06, "loss": 0.0901, "step": 29282 }, { "epoch": 0.6452593829017171, "grad_norm": 0.48864349722862244, "learning_rate": 8.857609555183282e-06, "loss": 0.0303, "step": 29283 }, { "epoch": 0.6452814181912333, "grad_norm": 0.6393144130706787, "learning_rate": 8.856632907206708e-06, "loss": 0.1058, "step": 29284 }, { "epoch": 0.6453034534807495, "grad_norm": 0.6254740953445435, "learning_rate": 8.855656290521591e-06, "loss": 0.053, "step": 29285 }, { "epoch": 0.6453254887702656, "grad_norm": 0.9583222270011902, "learning_rate": 8.854679705132915e-06, "loss": 0.1193, "step": 29286 }, { "epoch": 0.6453475240597817, "grad_norm": 0.41857200860977173, "learning_rate": 8.85370315104565e-06, "loss": 0.0674, "step": 29287 }, { "epoch": 0.6453695593492978, "grad_norm": 0.6390953660011292, "learning_rate": 8.852726628264773e-06, "loss": 0.0758, "step": 29288 }, { "epoch": 0.645391594638814, "grad_norm": 0.8144104480743408, "learning_rate": 8.851750136795247e-06, "loss": 0.081, "step": 29289 }, { "epoch": 0.6454136299283302, "grad_norm": 0.773746907711029, "learning_rate": 8.850773676642061e-06, "loss": 0.0503, "step": 29290 }, { "epoch": 0.6454356652178463, "grad_norm": 0.3096487820148468, "learning_rate": 8.849797247810185e-06, "loss": 0.066, "step": 29291 }, { "epoch": 0.6454577005073625, "grad_norm": 0.5312384366989136, "learning_rate": 8.848820850304587e-06, "loss": 0.0593, "step": 29292 }, { "epoch": 0.6454797357968787, "grad_norm": 0.7477273941040039, "learning_rate": 8.847844484130238e-06, "loss": 0.069, "step": 29293 }, { "epoch": 0.6455017710863948, "grad_norm": 0.473574161529541, "learning_rate": 8.84686814929212e-06, "loss": 0.04, "step": 29294 }, { "epoch": 0.645523806375911, "grad_norm": 0.5632855296134949, "learning_rate": 8.845891845795205e-06, "loss": 0.0677, "step": 29295 }, { "epoch": 0.6455458416654272, "grad_norm": 0.3768930733203888, "learning_rate": 8.844915573644456e-06, "loss": 0.0362, "step": 29296 }, { "epoch": 0.6455678769549433, "grad_norm": 0.7784128189086914, "learning_rate": 8.843939332844858e-06, "loss": 0.0742, "step": 29297 }, { "epoch": 0.6455899122444595, "grad_norm": 0.8600291013717651, "learning_rate": 8.842963123401378e-06, "loss": 0.0602, "step": 29298 }, { "epoch": 0.6456119475339757, "grad_norm": 0.4257594048976898, "learning_rate": 8.841986945318987e-06, "loss": 0.0617, "step": 29299 }, { "epoch": 0.6456339828234918, "grad_norm": 0.6003799438476562, "learning_rate": 8.841010798602657e-06, "loss": 0.0732, "step": 29300 }, { "epoch": 0.645656018113008, "grad_norm": 0.6685855984687805, "learning_rate": 8.840034683257365e-06, "loss": 0.0714, "step": 29301 }, { "epoch": 0.6456780534025242, "grad_norm": 0.7371921539306641, "learning_rate": 8.839058599288082e-06, "loss": 0.0906, "step": 29302 }, { "epoch": 0.6457000886920403, "grad_norm": 0.6909886598587036, "learning_rate": 8.838082546699774e-06, "loss": 0.0792, "step": 29303 }, { "epoch": 0.6457221239815565, "grad_norm": 0.8908343315124512, "learning_rate": 8.83710652549741e-06, "loss": 0.0736, "step": 29304 }, { "epoch": 0.6457441592710726, "grad_norm": 0.40338432788848877, "learning_rate": 8.836130535685974e-06, "loss": 0.0565, "step": 29305 }, { "epoch": 0.6457661945605888, "grad_norm": 0.6762663125991821, "learning_rate": 8.83515457727043e-06, "loss": 0.0614, "step": 29306 }, { "epoch": 0.645788229850105, "grad_norm": 0.3781116306781769, "learning_rate": 8.834178650255751e-06, "loss": 0.0667, "step": 29307 }, { "epoch": 0.6458102651396211, "grad_norm": 0.272805392742157, "learning_rate": 8.833202754646902e-06, "loss": 0.067, "step": 29308 }, { "epoch": 0.6458323004291373, "grad_norm": 0.641384482383728, "learning_rate": 8.832226890448862e-06, "loss": 0.0517, "step": 29309 }, { "epoch": 0.6458543357186535, "grad_norm": 0.4539758861064911, "learning_rate": 8.831251057666598e-06, "loss": 0.0497, "step": 29310 }, { "epoch": 0.6458763710081696, "grad_norm": 0.6356293559074402, "learning_rate": 8.83027525630508e-06, "loss": 0.0667, "step": 29311 }, { "epoch": 0.6458984062976857, "grad_norm": 1.1870988607406616, "learning_rate": 8.829299486369272e-06, "loss": 0.0729, "step": 29312 }, { "epoch": 0.6459204415872019, "grad_norm": 0.4762943685054779, "learning_rate": 8.828323747864157e-06, "loss": 0.0568, "step": 29313 }, { "epoch": 0.645942476876718, "grad_norm": 0.4747787117958069, "learning_rate": 8.8273480407947e-06, "loss": 0.0567, "step": 29314 }, { "epoch": 0.6459645121662342, "grad_norm": 0.6694813370704651, "learning_rate": 8.826372365165869e-06, "loss": 0.0676, "step": 29315 }, { "epoch": 0.6459865474557503, "grad_norm": 0.47697678208351135, "learning_rate": 8.82539672098263e-06, "loss": 0.0499, "step": 29316 }, { "epoch": 0.6460085827452665, "grad_norm": 0.8787892460823059, "learning_rate": 8.824421108249959e-06, "loss": 0.0773, "step": 29317 }, { "epoch": 0.6460306180347827, "grad_norm": 0.5821210741996765, "learning_rate": 8.823445526972826e-06, "loss": 0.0632, "step": 29318 }, { "epoch": 0.6460526533242988, "grad_norm": 0.8134395480155945, "learning_rate": 8.822469977156189e-06, "loss": 0.0486, "step": 29319 }, { "epoch": 0.646074688613815, "grad_norm": 0.6267827153205872, "learning_rate": 8.82149445880503e-06, "loss": 0.0651, "step": 29320 }, { "epoch": 0.6460967239033312, "grad_norm": 0.33651402592658997, "learning_rate": 8.820518971924315e-06, "loss": 0.0668, "step": 29321 }, { "epoch": 0.6461187591928473, "grad_norm": 0.34969255328178406, "learning_rate": 8.81954351651901e-06, "loss": 0.0428, "step": 29322 }, { "epoch": 0.6461407944823635, "grad_norm": 0.3509806990623474, "learning_rate": 8.81856809259408e-06, "loss": 0.0594, "step": 29323 }, { "epoch": 0.6461628297718797, "grad_norm": 0.6875845193862915, "learning_rate": 8.817592700154502e-06, "loss": 0.0721, "step": 29324 }, { "epoch": 0.6461848650613958, "grad_norm": 0.659718930721283, "learning_rate": 8.816617339205238e-06, "loss": 0.0601, "step": 29325 }, { "epoch": 0.646206900350912, "grad_norm": 1.102986454963684, "learning_rate": 8.815642009751259e-06, "loss": 0.1074, "step": 29326 }, { "epoch": 0.6462289356404282, "grad_norm": 1.0366182327270508, "learning_rate": 8.814666711797526e-06, "loss": 0.0858, "step": 29327 }, { "epoch": 0.6462509709299443, "grad_norm": 0.7045690417289734, "learning_rate": 8.813691445349016e-06, "loss": 0.0594, "step": 29328 }, { "epoch": 0.6462730062194605, "grad_norm": 0.7196946740150452, "learning_rate": 8.812716210410694e-06, "loss": 0.0927, "step": 29329 }, { "epoch": 0.6462950415089767, "grad_norm": 0.5218203663825989, "learning_rate": 8.811741006987527e-06, "loss": 0.054, "step": 29330 }, { "epoch": 0.6463170767984928, "grad_norm": 0.5044500827789307, "learning_rate": 8.810765835084478e-06, "loss": 0.0559, "step": 29331 }, { "epoch": 0.646339112088009, "grad_norm": 0.8486998081207275, "learning_rate": 8.809790694706519e-06, "loss": 0.0887, "step": 29332 }, { "epoch": 0.6463611473775251, "grad_norm": 0.5553270578384399, "learning_rate": 8.808815585858619e-06, "loss": 0.0811, "step": 29333 }, { "epoch": 0.6463831826670413, "grad_norm": 0.625575065612793, "learning_rate": 8.807840508545737e-06, "loss": 0.0776, "step": 29334 }, { "epoch": 0.6464052179565575, "grad_norm": 0.3021237552165985, "learning_rate": 8.80686546277284e-06, "loss": 0.0368, "step": 29335 }, { "epoch": 0.6464272532460736, "grad_norm": 0.978047251701355, "learning_rate": 8.805890448544906e-06, "loss": 0.0832, "step": 29336 }, { "epoch": 0.6464492885355897, "grad_norm": 0.5994841456413269, "learning_rate": 8.80491546586689e-06, "loss": 0.0486, "step": 29337 }, { "epoch": 0.6464713238251059, "grad_norm": 0.31919756531715393, "learning_rate": 8.803940514743762e-06, "loss": 0.0659, "step": 29338 }, { "epoch": 0.646493359114622, "grad_norm": 0.23364073038101196, "learning_rate": 8.802965595180487e-06, "loss": 0.0543, "step": 29339 }, { "epoch": 0.6465153944041382, "grad_norm": 0.715977668762207, "learning_rate": 8.801990707182032e-06, "loss": 0.0638, "step": 29340 }, { "epoch": 0.6465374296936544, "grad_norm": 0.521850049495697, "learning_rate": 8.801015850753364e-06, "loss": 0.0535, "step": 29341 }, { "epoch": 0.6465594649831705, "grad_norm": 1.013060212135315, "learning_rate": 8.80004102589944e-06, "loss": 0.0592, "step": 29342 }, { "epoch": 0.6465815002726867, "grad_norm": 0.6891651153564453, "learning_rate": 8.799066232625236e-06, "loss": 0.0538, "step": 29343 }, { "epoch": 0.6466035355622028, "grad_norm": 0.7284569144248962, "learning_rate": 8.798091470935714e-06, "loss": 0.0728, "step": 29344 }, { "epoch": 0.646625570851719, "grad_norm": 0.5325649380683899, "learning_rate": 8.797116740835839e-06, "loss": 0.0969, "step": 29345 }, { "epoch": 0.6466476061412352, "grad_norm": 0.4856135845184326, "learning_rate": 8.79614204233057e-06, "loss": 0.0652, "step": 29346 }, { "epoch": 0.6466696414307513, "grad_norm": 0.5805360674858093, "learning_rate": 8.79516737542488e-06, "loss": 0.0759, "step": 29347 }, { "epoch": 0.6466916767202675, "grad_norm": 0.3632025718688965, "learning_rate": 8.79419274012373e-06, "loss": 0.0295, "step": 29348 }, { "epoch": 0.6467137120097837, "grad_norm": 0.8371103405952454, "learning_rate": 8.793218136432083e-06, "loss": 0.0744, "step": 29349 }, { "epoch": 0.6467357472992998, "grad_norm": 0.6724328994750977, "learning_rate": 8.7922435643549e-06, "loss": 0.059, "step": 29350 }, { "epoch": 0.646757782588816, "grad_norm": 0.6386737823486328, "learning_rate": 8.791269023897154e-06, "loss": 0.0571, "step": 29351 }, { "epoch": 0.6467798178783322, "grad_norm": 0.861091136932373, "learning_rate": 8.790294515063804e-06, "loss": 0.0947, "step": 29352 }, { "epoch": 0.6468018531678483, "grad_norm": 0.7322707176208496, "learning_rate": 8.789320037859817e-06, "loss": 0.0525, "step": 29353 }, { "epoch": 0.6468238884573645, "grad_norm": 0.7267383933067322, "learning_rate": 8.788345592290148e-06, "loss": 0.0654, "step": 29354 }, { "epoch": 0.6468459237468807, "grad_norm": 0.7897248268127441, "learning_rate": 8.78737117835977e-06, "loss": 0.0653, "step": 29355 }, { "epoch": 0.6468679590363968, "grad_norm": 0.36228659749031067, "learning_rate": 8.78639679607364e-06, "loss": 0.0329, "step": 29356 }, { "epoch": 0.646889994325913, "grad_norm": 0.5344017744064331, "learning_rate": 8.785422445436725e-06, "loss": 0.057, "step": 29357 }, { "epoch": 0.6469120296154292, "grad_norm": 0.7154894471168518, "learning_rate": 8.78444812645398e-06, "loss": 0.0969, "step": 29358 }, { "epoch": 0.6469340649049453, "grad_norm": 0.4573213458061218, "learning_rate": 8.783473839130378e-06, "loss": 0.0379, "step": 29359 }, { "epoch": 0.6469561001944615, "grad_norm": 0.32304733991622925, "learning_rate": 8.78249958347088e-06, "loss": 0.0468, "step": 29360 }, { "epoch": 0.6469781354839776, "grad_norm": 0.7623056769371033, "learning_rate": 8.781525359480445e-06, "loss": 0.0974, "step": 29361 }, { "epoch": 0.6470001707734937, "grad_norm": 0.5113190412521362, "learning_rate": 8.780551167164029e-06, "loss": 0.0629, "step": 29362 }, { "epoch": 0.6470222060630099, "grad_norm": 0.8888111114501953, "learning_rate": 8.779577006526608e-06, "loss": 0.0753, "step": 29363 }, { "epoch": 0.647044241352526, "grad_norm": 0.6464504599571228, "learning_rate": 8.778602877573137e-06, "loss": 0.0996, "step": 29364 }, { "epoch": 0.6470662766420422, "grad_norm": 0.5573119521141052, "learning_rate": 8.777628780308571e-06, "loss": 0.063, "step": 29365 }, { "epoch": 0.6470883119315584, "grad_norm": 0.4381412863731384, "learning_rate": 8.776654714737884e-06, "loss": 0.0502, "step": 29366 }, { "epoch": 0.6471103472210745, "grad_norm": 0.7754063606262207, "learning_rate": 8.775680680866032e-06, "loss": 0.0835, "step": 29367 }, { "epoch": 0.6471323825105907, "grad_norm": 0.40213292837142944, "learning_rate": 8.774706678697975e-06, "loss": 0.0474, "step": 29368 }, { "epoch": 0.6471544178001069, "grad_norm": 1.216496467590332, "learning_rate": 8.773732708238672e-06, "loss": 0.0827, "step": 29369 }, { "epoch": 0.647176453089623, "grad_norm": 0.39611169695854187, "learning_rate": 8.77275876949309e-06, "loss": 0.0779, "step": 29370 }, { "epoch": 0.6471984883791392, "grad_norm": 0.4850797951221466, "learning_rate": 8.771784862466188e-06, "loss": 0.0581, "step": 29371 }, { "epoch": 0.6472205236686553, "grad_norm": 0.594176709651947, "learning_rate": 8.770810987162927e-06, "loss": 0.1112, "step": 29372 }, { "epoch": 0.6472425589581715, "grad_norm": 0.7195760011672974, "learning_rate": 8.769837143588261e-06, "loss": 0.0716, "step": 29373 }, { "epoch": 0.6472645942476877, "grad_norm": 0.37679576873779297, "learning_rate": 8.76886333174716e-06, "loss": 0.0637, "step": 29374 }, { "epoch": 0.6472866295372038, "grad_norm": 0.9959304928779602, "learning_rate": 8.767889551644579e-06, "loss": 0.0845, "step": 29375 }, { "epoch": 0.64730866482672, "grad_norm": 0.44358351826667786, "learning_rate": 8.766915803285476e-06, "loss": 0.0662, "step": 29376 }, { "epoch": 0.6473307001162362, "grad_norm": 1.122791051864624, "learning_rate": 8.765942086674808e-06, "loss": 0.069, "step": 29377 }, { "epoch": 0.6473527354057523, "grad_norm": 0.5137711763381958, "learning_rate": 8.764968401817549e-06, "loss": 0.059, "step": 29378 }, { "epoch": 0.6473747706952685, "grad_norm": 0.49405109882354736, "learning_rate": 8.763994748718648e-06, "loss": 0.0642, "step": 29379 }, { "epoch": 0.6473968059847847, "grad_norm": 0.7387972474098206, "learning_rate": 8.763021127383064e-06, "loss": 0.0812, "step": 29380 }, { "epoch": 0.6474188412743008, "grad_norm": 0.6091345548629761, "learning_rate": 8.762047537815757e-06, "loss": 0.0476, "step": 29381 }, { "epoch": 0.647440876563817, "grad_norm": 0.7181914448738098, "learning_rate": 8.76107398002169e-06, "loss": 0.0728, "step": 29382 }, { "epoch": 0.6474629118533332, "grad_norm": 0.6527799367904663, "learning_rate": 8.760100454005817e-06, "loss": 0.0853, "step": 29383 }, { "epoch": 0.6474849471428493, "grad_norm": 0.4906454086303711, "learning_rate": 8.759126959773099e-06, "loss": 0.0564, "step": 29384 }, { "epoch": 0.6475069824323655, "grad_norm": 0.6989119648933411, "learning_rate": 8.75815349732849e-06, "loss": 0.0487, "step": 29385 }, { "epoch": 0.6475290177218815, "grad_norm": 0.598080575466156, "learning_rate": 8.757180066676958e-06, "loss": 0.0686, "step": 29386 }, { "epoch": 0.6475510530113977, "grad_norm": 0.6131172776222229, "learning_rate": 8.756206667823451e-06, "loss": 0.092, "step": 29387 }, { "epoch": 0.6475730883009139, "grad_norm": 0.5649336576461792, "learning_rate": 8.755233300772932e-06, "loss": 0.0766, "step": 29388 }, { "epoch": 0.64759512359043, "grad_norm": 0.665473997592926, "learning_rate": 8.75425996553036e-06, "loss": 0.0759, "step": 29389 }, { "epoch": 0.6476171588799462, "grad_norm": 0.7437668442726135, "learning_rate": 8.753286662100696e-06, "loss": 0.0782, "step": 29390 }, { "epoch": 0.6476391941694624, "grad_norm": 0.36691805720329285, "learning_rate": 8.752313390488885e-06, "loss": 0.0451, "step": 29391 }, { "epoch": 0.6476612294589785, "grad_norm": 0.6217715740203857, "learning_rate": 8.751340150699894e-06, "loss": 0.0593, "step": 29392 }, { "epoch": 0.6476832647484947, "grad_norm": 0.8211564421653748, "learning_rate": 8.750366942738684e-06, "loss": 0.089, "step": 29393 }, { "epoch": 0.6477053000380109, "grad_norm": 0.620707631111145, "learning_rate": 8.7493937666102e-06, "loss": 0.0878, "step": 29394 }, { "epoch": 0.647727335327527, "grad_norm": 0.5961384177207947, "learning_rate": 8.748420622319415e-06, "loss": 0.0665, "step": 29395 }, { "epoch": 0.6477493706170432, "grad_norm": 0.6312364339828491, "learning_rate": 8.74744750987126e-06, "loss": 0.0842, "step": 29396 }, { "epoch": 0.6477714059065593, "grad_norm": 0.7614144682884216, "learning_rate": 8.746474429270718e-06, "loss": 0.0782, "step": 29397 }, { "epoch": 0.6477934411960755, "grad_norm": 0.7165561318397522, "learning_rate": 8.745501380522734e-06, "loss": 0.0744, "step": 29398 }, { "epoch": 0.6478154764855917, "grad_norm": 0.45860427618026733, "learning_rate": 8.74452836363227e-06, "loss": 0.0798, "step": 29399 }, { "epoch": 0.6478375117751078, "grad_norm": 0.5259403586387634, "learning_rate": 8.74355537860427e-06, "loss": 0.0671, "step": 29400 }, { "epoch": 0.647859547064624, "grad_norm": 0.6437835693359375, "learning_rate": 8.742582425443698e-06, "loss": 0.0635, "step": 29401 }, { "epoch": 0.6478815823541402, "grad_norm": 0.6244063377380371, "learning_rate": 8.741609504155517e-06, "loss": 0.0778, "step": 29402 }, { "epoch": 0.6479036176436563, "grad_norm": 0.5377675294876099, "learning_rate": 8.740636614744667e-06, "loss": 0.0824, "step": 29403 }, { "epoch": 0.6479256529331725, "grad_norm": 0.6048275828361511, "learning_rate": 8.73966375721611e-06, "loss": 0.0633, "step": 29404 }, { "epoch": 0.6479476882226887, "grad_norm": 0.38369494676589966, "learning_rate": 8.738690931574803e-06, "loss": 0.0339, "step": 29405 }, { "epoch": 0.6479697235122048, "grad_norm": 0.7727310061454773, "learning_rate": 8.73771813782571e-06, "loss": 0.0768, "step": 29406 }, { "epoch": 0.647991758801721, "grad_norm": 0.772241473197937, "learning_rate": 8.736745375973767e-06, "loss": 0.0865, "step": 29407 }, { "epoch": 0.6480137940912372, "grad_norm": 0.515593945980072, "learning_rate": 8.735772646023942e-06, "loss": 0.071, "step": 29408 }, { "epoch": 0.6480358293807533, "grad_norm": 0.17349109053611755, "learning_rate": 8.734799947981189e-06, "loss": 0.0405, "step": 29409 }, { "epoch": 0.6480578646702695, "grad_norm": 0.5307999849319458, "learning_rate": 8.733827281850455e-06, "loss": 0.0748, "step": 29410 }, { "epoch": 0.6480798999597855, "grad_norm": 0.644987165927887, "learning_rate": 8.732854647636697e-06, "loss": 0.0617, "step": 29411 }, { "epoch": 0.6481019352493017, "grad_norm": 0.7727707028388977, "learning_rate": 8.731882045344873e-06, "loss": 0.0774, "step": 29412 }, { "epoch": 0.6481239705388179, "grad_norm": 0.8619797825813293, "learning_rate": 8.730909474979942e-06, "loss": 0.0612, "step": 29413 }, { "epoch": 0.648146005828334, "grad_norm": 0.34019699692726135, "learning_rate": 8.729936936546841e-06, "loss": 0.056, "step": 29414 }, { "epoch": 0.6481680411178502, "grad_norm": 0.3169873058795929, "learning_rate": 8.728964430050535e-06, "loss": 0.0735, "step": 29415 }, { "epoch": 0.6481900764073664, "grad_norm": 0.6733106970787048, "learning_rate": 8.727991955495983e-06, "loss": 0.0512, "step": 29416 }, { "epoch": 0.6482121116968825, "grad_norm": 1.0253232717514038, "learning_rate": 8.727019512888124e-06, "loss": 0.0994, "step": 29417 }, { "epoch": 0.6482341469863987, "grad_norm": 0.7829288244247437, "learning_rate": 8.726047102231926e-06, "loss": 0.0609, "step": 29418 }, { "epoch": 0.6482561822759149, "grad_norm": 0.7579444050788879, "learning_rate": 8.72507472353232e-06, "loss": 0.0586, "step": 29419 }, { "epoch": 0.648278217565431, "grad_norm": 0.5758728981018066, "learning_rate": 8.724102376794288e-06, "loss": 0.0603, "step": 29420 }, { "epoch": 0.6483002528549472, "grad_norm": 0.7497898936271667, "learning_rate": 8.723130062022761e-06, "loss": 0.0813, "step": 29421 }, { "epoch": 0.6483222881444634, "grad_norm": 0.6234102845191956, "learning_rate": 8.722157779222707e-06, "loss": 0.0866, "step": 29422 }, { "epoch": 0.6483443234339795, "grad_norm": 0.8048511743545532, "learning_rate": 8.721185528399059e-06, "loss": 0.0734, "step": 29423 }, { "epoch": 0.6483663587234957, "grad_norm": 0.5212385058403015, "learning_rate": 8.720213309556783e-06, "loss": 0.0512, "step": 29424 }, { "epoch": 0.6483883940130118, "grad_norm": 0.34445270895957947, "learning_rate": 8.719241122700834e-06, "loss": 0.0507, "step": 29425 }, { "epoch": 0.648410429302528, "grad_norm": 0.6534369587898254, "learning_rate": 8.718268967836151e-06, "loss": 0.0707, "step": 29426 }, { "epoch": 0.6484324645920442, "grad_norm": 0.5250872373580933, "learning_rate": 8.717296844967692e-06, "loss": 0.0725, "step": 29427 }, { "epoch": 0.6484544998815603, "grad_norm": 0.4742356538772583, "learning_rate": 8.716324754100411e-06, "loss": 0.0485, "step": 29428 }, { "epoch": 0.6484765351710765, "grad_norm": 0.6308332681655884, "learning_rate": 8.715352695239264e-06, "loss": 0.0673, "step": 29429 }, { "epoch": 0.6484985704605927, "grad_norm": 0.681174099445343, "learning_rate": 8.714380668389187e-06, "loss": 0.0804, "step": 29430 }, { "epoch": 0.6485206057501088, "grad_norm": 0.5653868913650513, "learning_rate": 8.713408673555144e-06, "loss": 0.0717, "step": 29431 }, { "epoch": 0.648542641039625, "grad_norm": 0.9355242252349854, "learning_rate": 8.712436710742083e-06, "loss": 0.0682, "step": 29432 }, { "epoch": 0.6485646763291412, "grad_norm": 0.794373631477356, "learning_rate": 8.71146477995495e-06, "loss": 0.0663, "step": 29433 }, { "epoch": 0.6485867116186573, "grad_norm": 0.4489222466945648, "learning_rate": 8.7104928811987e-06, "loss": 0.0966, "step": 29434 }, { "epoch": 0.6486087469081735, "grad_norm": 0.5457040071487427, "learning_rate": 8.709521014478281e-06, "loss": 0.0648, "step": 29435 }, { "epoch": 0.6486307821976895, "grad_norm": 0.9329577684402466, "learning_rate": 8.708549179798652e-06, "loss": 0.1012, "step": 29436 }, { "epoch": 0.6486528174872057, "grad_norm": 0.45990118384361267, "learning_rate": 8.707577377164749e-06, "loss": 0.0605, "step": 29437 }, { "epoch": 0.6486748527767219, "grad_norm": 0.710735559463501, "learning_rate": 8.706605606581528e-06, "loss": 0.0616, "step": 29438 }, { "epoch": 0.648696888066238, "grad_norm": 0.7761887907981873, "learning_rate": 8.705633868053946e-06, "loss": 0.054, "step": 29439 }, { "epoch": 0.6487189233557542, "grad_norm": 0.3435896337032318, "learning_rate": 8.70466216158694e-06, "loss": 0.0425, "step": 29440 }, { "epoch": 0.6487409586452704, "grad_norm": 0.4998951852321625, "learning_rate": 8.703690487185472e-06, "loss": 0.0616, "step": 29441 }, { "epoch": 0.6487629939347865, "grad_norm": 0.6452510952949524, "learning_rate": 8.702718844854473e-06, "loss": 0.0695, "step": 29442 }, { "epoch": 0.6487850292243027, "grad_norm": 0.7455813884735107, "learning_rate": 8.701747234598914e-06, "loss": 0.0924, "step": 29443 }, { "epoch": 0.6488070645138189, "grad_norm": 0.6226750016212463, "learning_rate": 8.700775656423729e-06, "loss": 0.0578, "step": 29444 }, { "epoch": 0.648829099803335, "grad_norm": 0.3842966854572296, "learning_rate": 8.699804110333877e-06, "loss": 0.0683, "step": 29445 }, { "epoch": 0.6488511350928512, "grad_norm": 0.5231687426567078, "learning_rate": 8.698832596334295e-06, "loss": 0.0559, "step": 29446 }, { "epoch": 0.6488731703823674, "grad_norm": 0.49066850543022156, "learning_rate": 8.697861114429937e-06, "loss": 0.059, "step": 29447 }, { "epoch": 0.6488952056718835, "grad_norm": 0.819331705570221, "learning_rate": 8.69688966462576e-06, "loss": 0.0914, "step": 29448 }, { "epoch": 0.6489172409613997, "grad_norm": 0.47235000133514404, "learning_rate": 8.695918246926698e-06, "loss": 0.0608, "step": 29449 }, { "epoch": 0.6489392762509159, "grad_norm": 0.8318274021148682, "learning_rate": 8.694946861337702e-06, "loss": 0.0506, "step": 29450 }, { "epoch": 0.648961311540432, "grad_norm": 0.44539615511894226, "learning_rate": 8.693975507863723e-06, "loss": 0.0615, "step": 29451 }, { "epoch": 0.6489833468299482, "grad_norm": 0.9307433366775513, "learning_rate": 8.693004186509718e-06, "loss": 0.0789, "step": 29452 }, { "epoch": 0.6490053821194643, "grad_norm": 0.5375788807868958, "learning_rate": 8.692032897280615e-06, "loss": 0.0862, "step": 29453 }, { "epoch": 0.6490274174089805, "grad_norm": 0.713366687297821, "learning_rate": 8.691061640181371e-06, "loss": 0.0583, "step": 29454 }, { "epoch": 0.6490494526984967, "grad_norm": 0.502821147441864, "learning_rate": 8.690090415216943e-06, "loss": 0.0631, "step": 29455 }, { "epoch": 0.6490714879880128, "grad_norm": 0.6445714831352234, "learning_rate": 8.689119222392258e-06, "loss": 0.1002, "step": 29456 }, { "epoch": 0.649093523277529, "grad_norm": 0.6179112195968628, "learning_rate": 8.688148061712275e-06, "loss": 0.0465, "step": 29457 }, { "epoch": 0.6491155585670452, "grad_norm": 0.9794260263442993, "learning_rate": 8.687176933181939e-06, "loss": 0.0889, "step": 29458 }, { "epoch": 0.6491375938565613, "grad_norm": 0.745703935623169, "learning_rate": 8.686205836806199e-06, "loss": 0.0643, "step": 29459 }, { "epoch": 0.6491596291460774, "grad_norm": 0.4075687527656555, "learning_rate": 8.685234772589996e-06, "loss": 0.0482, "step": 29460 }, { "epoch": 0.6491816644355936, "grad_norm": 0.829887866973877, "learning_rate": 8.684263740538276e-06, "loss": 0.0769, "step": 29461 }, { "epoch": 0.6492036997251097, "grad_norm": 0.736856997013092, "learning_rate": 8.683292740655996e-06, "loss": 0.111, "step": 29462 }, { "epoch": 0.6492257350146259, "grad_norm": 0.7419724464416504, "learning_rate": 8.682321772948087e-06, "loss": 0.0693, "step": 29463 }, { "epoch": 0.649247770304142, "grad_norm": 0.6767110228538513, "learning_rate": 8.681350837419504e-06, "loss": 0.0568, "step": 29464 }, { "epoch": 0.6492698055936582, "grad_norm": 0.49814504384994507, "learning_rate": 8.680379934075181e-06, "loss": 0.0681, "step": 29465 }, { "epoch": 0.6492918408831744, "grad_norm": 0.6029045581817627, "learning_rate": 8.679409062920082e-06, "loss": 0.0615, "step": 29466 }, { "epoch": 0.6493138761726905, "grad_norm": 0.5020776391029358, "learning_rate": 8.678438223959137e-06, "loss": 0.0696, "step": 29467 }, { "epoch": 0.6493359114622067, "grad_norm": 0.5545324683189392, "learning_rate": 8.6774674171973e-06, "loss": 0.0591, "step": 29468 }, { "epoch": 0.6493579467517229, "grad_norm": 0.6655788421630859, "learning_rate": 8.676496642639502e-06, "loss": 0.0516, "step": 29469 }, { "epoch": 0.649379982041239, "grad_norm": 0.4336581230163574, "learning_rate": 8.67552590029071e-06, "loss": 0.0778, "step": 29470 }, { "epoch": 0.6494020173307552, "grad_norm": 1.0052329301834106, "learning_rate": 8.674555190155854e-06, "loss": 0.1169, "step": 29471 }, { "epoch": 0.6494240526202714, "grad_norm": 0.9021075963973999, "learning_rate": 8.673584512239875e-06, "loss": 0.0969, "step": 29472 }, { "epoch": 0.6494460879097875, "grad_norm": 0.4713400602340698, "learning_rate": 8.672613866547724e-06, "loss": 0.0654, "step": 29473 }, { "epoch": 0.6494681231993037, "grad_norm": 0.6590830087661743, "learning_rate": 8.671643253084342e-06, "loss": 0.0529, "step": 29474 }, { "epoch": 0.6494901584888199, "grad_norm": 0.615534245967865, "learning_rate": 8.67067267185468e-06, "loss": 0.0452, "step": 29475 }, { "epoch": 0.649512193778336, "grad_norm": 0.7506146430969238, "learning_rate": 8.669702122863665e-06, "loss": 0.0771, "step": 29476 }, { "epoch": 0.6495342290678522, "grad_norm": 0.5649219155311584, "learning_rate": 8.668731606116264e-06, "loss": 0.0519, "step": 29477 }, { "epoch": 0.6495562643573684, "grad_norm": 0.4061354398727417, "learning_rate": 8.667761121617402e-06, "loss": 0.0663, "step": 29478 }, { "epoch": 0.6495782996468845, "grad_norm": 0.7816275358200073, "learning_rate": 8.666790669372033e-06, "loss": 0.0492, "step": 29479 }, { "epoch": 0.6496003349364007, "grad_norm": 0.369783878326416, "learning_rate": 8.665820249385093e-06, "loss": 0.0631, "step": 29480 }, { "epoch": 0.6496223702259168, "grad_norm": 0.6294427514076233, "learning_rate": 8.664849861661524e-06, "loss": 0.085, "step": 29481 }, { "epoch": 0.649644405515433, "grad_norm": 0.3108054995536804, "learning_rate": 8.663879506206276e-06, "loss": 0.0442, "step": 29482 }, { "epoch": 0.6496664408049492, "grad_norm": 0.5135547518730164, "learning_rate": 8.662909183024285e-06, "loss": 0.0549, "step": 29483 }, { "epoch": 0.6496884760944653, "grad_norm": 0.7709153294563293, "learning_rate": 8.661938892120494e-06, "loss": 0.0715, "step": 29484 }, { "epoch": 0.6497105113839814, "grad_norm": 0.6990309357643127, "learning_rate": 8.66096863349985e-06, "loss": 0.0866, "step": 29485 }, { "epoch": 0.6497325466734976, "grad_norm": 0.7232251167297363, "learning_rate": 8.659998407167294e-06, "loss": 0.0684, "step": 29486 }, { "epoch": 0.6497545819630137, "grad_norm": 0.8639442920684814, "learning_rate": 8.65902821312776e-06, "loss": 0.0843, "step": 29487 }, { "epoch": 0.6497766172525299, "grad_norm": 0.8024423718452454, "learning_rate": 8.658058051386197e-06, "loss": 0.0662, "step": 29488 }, { "epoch": 0.649798652542046, "grad_norm": 0.7087864875793457, "learning_rate": 8.65708792194755e-06, "loss": 0.079, "step": 29489 }, { "epoch": 0.6498206878315622, "grad_norm": 0.5133628249168396, "learning_rate": 8.656117824816752e-06, "loss": 0.0888, "step": 29490 }, { "epoch": 0.6498427231210784, "grad_norm": 0.5711522102355957, "learning_rate": 8.655147759998751e-06, "loss": 0.0504, "step": 29491 }, { "epoch": 0.6498647584105945, "grad_norm": 0.4889763295650482, "learning_rate": 8.654177727498475e-06, "loss": 0.0898, "step": 29492 }, { "epoch": 0.6498867937001107, "grad_norm": 0.9016135334968567, "learning_rate": 8.653207727320884e-06, "loss": 0.0729, "step": 29493 }, { "epoch": 0.6499088289896269, "grad_norm": 0.6543372869491577, "learning_rate": 8.652237759470906e-06, "loss": 0.0697, "step": 29494 }, { "epoch": 0.649930864279143, "grad_norm": 0.7569371461868286, "learning_rate": 8.651267823953491e-06, "loss": 0.0759, "step": 29495 }, { "epoch": 0.6499528995686592, "grad_norm": 0.6100904941558838, "learning_rate": 8.650297920773568e-06, "loss": 0.0836, "step": 29496 }, { "epoch": 0.6499749348581754, "grad_norm": 0.7453212141990662, "learning_rate": 8.649328049936079e-06, "loss": 0.0871, "step": 29497 }, { "epoch": 0.6499969701476915, "grad_norm": 0.6790060997009277, "learning_rate": 8.648358211445978e-06, "loss": 0.0416, "step": 29498 }, { "epoch": 0.6500190054372077, "grad_norm": 0.5578454732894897, "learning_rate": 8.647388405308179e-06, "loss": 0.0474, "step": 29499 }, { "epoch": 0.6500410407267239, "grad_norm": 0.38211649656295776, "learning_rate": 8.646418631527652e-06, "loss": 0.0627, "step": 29500 }, { "epoch": 0.65006307601624, "grad_norm": 0.31740885972976685, "learning_rate": 8.645448890109314e-06, "loss": 0.047, "step": 29501 }, { "epoch": 0.6500851113057562, "grad_norm": 0.5205592513084412, "learning_rate": 8.64447918105812e-06, "loss": 0.0595, "step": 29502 }, { "epoch": 0.6501071465952724, "grad_norm": 0.4376424551010132, "learning_rate": 8.643509504378997e-06, "loss": 0.0562, "step": 29503 }, { "epoch": 0.6501291818847885, "grad_norm": 0.5347938537597656, "learning_rate": 8.642539860076886e-06, "loss": 0.066, "step": 29504 }, { "epoch": 0.6501512171743047, "grad_norm": 0.6652262806892395, "learning_rate": 8.641570248156737e-06, "loss": 0.1082, "step": 29505 }, { "epoch": 0.6501732524638208, "grad_norm": 0.744914174079895, "learning_rate": 8.640600668623472e-06, "loss": 0.0707, "step": 29506 }, { "epoch": 0.650195287753337, "grad_norm": 0.8480209112167358, "learning_rate": 8.639631121482039e-06, "loss": 0.1018, "step": 29507 }, { "epoch": 0.6502173230428532, "grad_norm": 0.48650044202804565, "learning_rate": 8.638661606737375e-06, "loss": 0.0647, "step": 29508 }, { "epoch": 0.6502393583323693, "grad_norm": 0.5191234946250916, "learning_rate": 8.637692124394425e-06, "loss": 0.0566, "step": 29509 }, { "epoch": 0.6502613936218854, "grad_norm": 0.4874219298362732, "learning_rate": 8.636722674458114e-06, "loss": 0.0745, "step": 29510 }, { "epoch": 0.6502834289114016, "grad_norm": 0.5616570115089417, "learning_rate": 8.635753256933388e-06, "loss": 0.0615, "step": 29511 }, { "epoch": 0.6503054642009177, "grad_norm": 0.5810053944587708, "learning_rate": 8.634783871825187e-06, "loss": 0.0862, "step": 29512 }, { "epoch": 0.6503274994904339, "grad_norm": 0.813881516456604, "learning_rate": 8.633814519138442e-06, "loss": 0.0626, "step": 29513 }, { "epoch": 0.6503495347799501, "grad_norm": 0.26598790287971497, "learning_rate": 8.6328451988781e-06, "loss": 0.0514, "step": 29514 }, { "epoch": 0.6503715700694662, "grad_norm": 0.7714415788650513, "learning_rate": 8.631875911049078e-06, "loss": 0.072, "step": 29515 }, { "epoch": 0.6503936053589824, "grad_norm": 0.7247015237808228, "learning_rate": 8.630906655656338e-06, "loss": 0.0688, "step": 29516 }, { "epoch": 0.6504156406484985, "grad_norm": 0.43724068999290466, "learning_rate": 8.6299374327048e-06, "loss": 0.0699, "step": 29517 }, { "epoch": 0.6504376759380147, "grad_norm": 0.494606614112854, "learning_rate": 8.628968242199412e-06, "loss": 0.074, "step": 29518 }, { "epoch": 0.6504597112275309, "grad_norm": 0.8109219670295715, "learning_rate": 8.627999084145103e-06, "loss": 0.0889, "step": 29519 }, { "epoch": 0.650481746517047, "grad_norm": 0.3463107645511627, "learning_rate": 8.62702995854681e-06, "loss": 0.0641, "step": 29520 }, { "epoch": 0.6505037818065632, "grad_norm": 0.5864566564559937, "learning_rate": 8.626060865409475e-06, "loss": 0.045, "step": 29521 }, { "epoch": 0.6505258170960794, "grad_norm": 0.3098049759864807, "learning_rate": 8.625091804738017e-06, "loss": 0.0479, "step": 29522 }, { "epoch": 0.6505478523855955, "grad_norm": 0.4509558081626892, "learning_rate": 8.6241227765374e-06, "loss": 0.0596, "step": 29523 }, { "epoch": 0.6505698876751117, "grad_norm": 0.5370528697967529, "learning_rate": 8.623153780812537e-06, "loss": 0.0717, "step": 29524 }, { "epoch": 0.6505919229646279, "grad_norm": 0.69777911901474, "learning_rate": 8.622184817568379e-06, "loss": 0.1063, "step": 29525 }, { "epoch": 0.650613958254144, "grad_norm": 0.5200010538101196, "learning_rate": 8.621215886809846e-06, "loss": 0.0729, "step": 29526 }, { "epoch": 0.6506359935436602, "grad_norm": 0.5840783715248108, "learning_rate": 8.620246988541882e-06, "loss": 0.0596, "step": 29527 }, { "epoch": 0.6506580288331764, "grad_norm": 0.6472237706184387, "learning_rate": 8.619278122769428e-06, "loss": 0.0497, "step": 29528 }, { "epoch": 0.6506800641226925, "grad_norm": 0.6692939400672913, "learning_rate": 8.618309289497404e-06, "loss": 0.033, "step": 29529 }, { "epoch": 0.6507020994122087, "grad_norm": 0.8769432306289673, "learning_rate": 8.617340488730754e-06, "loss": 0.0848, "step": 29530 }, { "epoch": 0.6507241347017249, "grad_norm": 0.23674245178699493, "learning_rate": 8.616371720474412e-06, "loss": 0.0463, "step": 29531 }, { "epoch": 0.650746169991241, "grad_norm": 0.5280583500862122, "learning_rate": 8.615402984733317e-06, "loss": 0.0694, "step": 29532 }, { "epoch": 0.6507682052807572, "grad_norm": 0.6690802574157715, "learning_rate": 8.614434281512392e-06, "loss": 0.0581, "step": 29533 }, { "epoch": 0.6507902405702732, "grad_norm": 0.8484873175621033, "learning_rate": 8.61346561081658e-06, "loss": 0.0881, "step": 29534 }, { "epoch": 0.6508122758597894, "grad_norm": 0.5925693511962891, "learning_rate": 8.612496972650817e-06, "loss": 0.05, "step": 29535 }, { "epoch": 0.6508343111493056, "grad_norm": 0.6271569132804871, "learning_rate": 8.611528367020024e-06, "loss": 0.0637, "step": 29536 }, { "epoch": 0.6508563464388217, "grad_norm": 0.4794800877571106, "learning_rate": 8.610559793929153e-06, "loss": 0.0755, "step": 29537 }, { "epoch": 0.6508783817283379, "grad_norm": 0.7650815844535828, "learning_rate": 8.609591253383111e-06, "loss": 0.0836, "step": 29538 }, { "epoch": 0.6509004170178541, "grad_norm": 0.666560173034668, "learning_rate": 8.608622745386862e-06, "loss": 0.0532, "step": 29539 }, { "epoch": 0.6509224523073702, "grad_norm": 0.45639365911483765, "learning_rate": 8.607654269945316e-06, "loss": 0.0527, "step": 29540 }, { "epoch": 0.6509444875968864, "grad_norm": 0.30015286803245544, "learning_rate": 8.606685827063418e-06, "loss": 0.065, "step": 29541 }, { "epoch": 0.6509665228864026, "grad_norm": 0.5765168070793152, "learning_rate": 8.605717416746101e-06, "loss": 0.071, "step": 29542 }, { "epoch": 0.6509885581759187, "grad_norm": 0.47100594639778137, "learning_rate": 8.604749038998291e-06, "loss": 0.047, "step": 29543 }, { "epoch": 0.6510105934654349, "grad_norm": 0.5898016095161438, "learning_rate": 8.603780693824929e-06, "loss": 0.0862, "step": 29544 }, { "epoch": 0.651032628754951, "grad_norm": 0.8331994414329529, "learning_rate": 8.602812381230928e-06, "loss": 0.0916, "step": 29545 }, { "epoch": 0.6510546640444672, "grad_norm": 0.7259198427200317, "learning_rate": 8.601844101221245e-06, "loss": 0.0547, "step": 29546 }, { "epoch": 0.6510766993339834, "grad_norm": 0.5211461186408997, "learning_rate": 8.600875853800797e-06, "loss": 0.0506, "step": 29547 }, { "epoch": 0.6510987346234995, "grad_norm": 1.0025008916854858, "learning_rate": 8.599907638974526e-06, "loss": 0.0667, "step": 29548 }, { "epoch": 0.6511207699130157, "grad_norm": 0.679732620716095, "learning_rate": 8.598939456747351e-06, "loss": 0.0614, "step": 29549 }, { "epoch": 0.6511428052025319, "grad_norm": 0.5674912929534912, "learning_rate": 8.597971307124211e-06, "loss": 0.0666, "step": 29550 }, { "epoch": 0.651164840492048, "grad_norm": 0.5471999645233154, "learning_rate": 8.597003190110039e-06, "loss": 0.0606, "step": 29551 }, { "epoch": 0.6511868757815642, "grad_norm": 0.6202850341796875, "learning_rate": 8.596035105709759e-06, "loss": 0.0699, "step": 29552 }, { "epoch": 0.6512089110710804, "grad_norm": 0.3515145480632782, "learning_rate": 8.595067053928306e-06, "loss": 0.0471, "step": 29553 }, { "epoch": 0.6512309463605965, "grad_norm": 0.4663906395435333, "learning_rate": 8.594099034770613e-06, "loss": 0.0795, "step": 29554 }, { "epoch": 0.6512529816501127, "grad_norm": 0.5565744042396545, "learning_rate": 8.593131048241612e-06, "loss": 0.0497, "step": 29555 }, { "epoch": 0.6512750169396289, "grad_norm": 0.96010822057724, "learning_rate": 8.592163094346225e-06, "loss": 0.0911, "step": 29556 }, { "epoch": 0.651297052229145, "grad_norm": 0.7211761474609375, "learning_rate": 8.591195173089388e-06, "loss": 0.0498, "step": 29557 }, { "epoch": 0.6513190875186612, "grad_norm": 0.46970853209495544, "learning_rate": 8.590227284476037e-06, "loss": 0.1008, "step": 29558 }, { "epoch": 0.6513411228081772, "grad_norm": 0.5887886881828308, "learning_rate": 8.58925942851109e-06, "loss": 0.0821, "step": 29559 }, { "epoch": 0.6513631580976934, "grad_norm": 0.815052330493927, "learning_rate": 8.588291605199487e-06, "loss": 0.1142, "step": 29560 }, { "epoch": 0.6513851933872096, "grad_norm": 0.7178100347518921, "learning_rate": 8.587323814546142e-06, "loss": 0.0505, "step": 29561 }, { "epoch": 0.6514072286767257, "grad_norm": 0.624991238117218, "learning_rate": 8.586356056556006e-06, "loss": 0.0708, "step": 29562 }, { "epoch": 0.6514292639662419, "grad_norm": 0.41217029094696045, "learning_rate": 8.585388331233996e-06, "loss": 0.0711, "step": 29563 }, { "epoch": 0.6514512992557581, "grad_norm": 0.4151468276977539, "learning_rate": 8.584420638585037e-06, "loss": 0.0445, "step": 29564 }, { "epoch": 0.6514733345452742, "grad_norm": 0.7813737988471985, "learning_rate": 8.583452978614076e-06, "loss": 0.0638, "step": 29565 }, { "epoch": 0.6514953698347904, "grad_norm": 0.790521502494812, "learning_rate": 8.58248535132602e-06, "loss": 0.0891, "step": 29566 }, { "epoch": 0.6515174051243066, "grad_norm": 0.4710286557674408, "learning_rate": 8.581517756725814e-06, "loss": 0.0711, "step": 29567 }, { "epoch": 0.6515394404138227, "grad_norm": 1.0711520910263062, "learning_rate": 8.58055019481837e-06, "loss": 0.0913, "step": 29568 }, { "epoch": 0.6515614757033389, "grad_norm": 0.5059563517570496, "learning_rate": 8.579582665608636e-06, "loss": 0.0767, "step": 29569 }, { "epoch": 0.651583510992855, "grad_norm": 0.5900108218193054, "learning_rate": 8.578615169101524e-06, "loss": 0.0753, "step": 29570 }, { "epoch": 0.6516055462823712, "grad_norm": 0.529310405254364, "learning_rate": 8.577647705301976e-06, "loss": 0.0519, "step": 29571 }, { "epoch": 0.6516275815718874, "grad_norm": 1.0703688859939575, "learning_rate": 8.576680274214905e-06, "loss": 0.1014, "step": 29572 }, { "epoch": 0.6516496168614035, "grad_norm": 0.7411280274391174, "learning_rate": 8.575712875845247e-06, "loss": 0.0761, "step": 29573 }, { "epoch": 0.6516716521509197, "grad_norm": 0.7091873288154602, "learning_rate": 8.574745510197933e-06, "loss": 0.0727, "step": 29574 }, { "epoch": 0.6516936874404359, "grad_norm": 0.4911133050918579, "learning_rate": 8.573778177277881e-06, "loss": 0.0557, "step": 29575 }, { "epoch": 0.651715722729952, "grad_norm": 0.6238369345664978, "learning_rate": 8.572810877090023e-06, "loss": 0.0695, "step": 29576 }, { "epoch": 0.6517377580194682, "grad_norm": 0.8692204356193542, "learning_rate": 8.571843609639284e-06, "loss": 0.0975, "step": 29577 }, { "epoch": 0.6517597933089844, "grad_norm": 0.5336938500404358, "learning_rate": 8.570876374930598e-06, "loss": 0.0755, "step": 29578 }, { "epoch": 0.6517818285985005, "grad_norm": 0.5284310579299927, "learning_rate": 8.569909172968883e-06, "loss": 0.0333, "step": 29579 }, { "epoch": 0.6518038638880167, "grad_norm": 0.5804840326309204, "learning_rate": 8.568942003759066e-06, "loss": 0.078, "step": 29580 }, { "epoch": 0.6518258991775329, "grad_norm": 0.5248724818229675, "learning_rate": 8.567974867306083e-06, "loss": 0.0489, "step": 29581 }, { "epoch": 0.651847934467049, "grad_norm": 0.4079732894897461, "learning_rate": 8.567007763614847e-06, "loss": 0.0431, "step": 29582 }, { "epoch": 0.6518699697565652, "grad_norm": 0.9103054404258728, "learning_rate": 8.566040692690297e-06, "loss": 0.0609, "step": 29583 }, { "epoch": 0.6518920050460812, "grad_norm": 0.536626398563385, "learning_rate": 8.565073654537339e-06, "loss": 0.0593, "step": 29584 }, { "epoch": 0.6519140403355974, "grad_norm": 0.5419570803642273, "learning_rate": 8.564106649160926e-06, "loss": 0.0524, "step": 29585 }, { "epoch": 0.6519360756251136, "grad_norm": 0.6092745661735535, "learning_rate": 8.56313967656596e-06, "loss": 0.0453, "step": 29586 }, { "epoch": 0.6519581109146297, "grad_norm": 0.6627971529960632, "learning_rate": 8.562172736757376e-06, "loss": 0.079, "step": 29587 }, { "epoch": 0.6519801462041459, "grad_norm": 0.5615323781967163, "learning_rate": 8.561205829740098e-06, "loss": 0.0446, "step": 29588 }, { "epoch": 0.6520021814936621, "grad_norm": 0.5515269637107849, "learning_rate": 8.560238955519059e-06, "loss": 0.0527, "step": 29589 }, { "epoch": 0.6520242167831782, "grad_norm": 0.6627988815307617, "learning_rate": 8.55927211409917e-06, "loss": 0.0805, "step": 29590 }, { "epoch": 0.6520462520726944, "grad_norm": 0.7277311086654663, "learning_rate": 8.558305305485359e-06, "loss": 0.056, "step": 29591 }, { "epoch": 0.6520682873622106, "grad_norm": 0.6147410869598389, "learning_rate": 8.557338529682564e-06, "loss": 0.0699, "step": 29592 }, { "epoch": 0.6520903226517267, "grad_norm": 0.5216792821884155, "learning_rate": 8.556371786695691e-06, "loss": 0.0845, "step": 29593 }, { "epoch": 0.6521123579412429, "grad_norm": 0.63823401927948, "learning_rate": 8.555405076529677e-06, "loss": 0.0499, "step": 29594 }, { "epoch": 0.6521343932307591, "grad_norm": 0.9051418900489807, "learning_rate": 8.554438399189428e-06, "loss": 0.0617, "step": 29595 }, { "epoch": 0.6521564285202752, "grad_norm": 0.6663199663162231, "learning_rate": 8.553471754679894e-06, "loss": 0.0571, "step": 29596 }, { "epoch": 0.6521784638097914, "grad_norm": 0.4668358266353607, "learning_rate": 8.552505143005979e-06, "loss": 0.053, "step": 29597 }, { "epoch": 0.6522004990993076, "grad_norm": 0.7020087838172913, "learning_rate": 8.55153856417262e-06, "loss": 0.0645, "step": 29598 }, { "epoch": 0.6522225343888237, "grad_norm": 0.8685455918312073, "learning_rate": 8.550572018184724e-06, "loss": 0.0582, "step": 29599 }, { "epoch": 0.6522445696783399, "grad_norm": 0.5465570092201233, "learning_rate": 8.549605505047227e-06, "loss": 0.0708, "step": 29600 }, { "epoch": 0.652266604967856, "grad_norm": 0.5643116235733032, "learning_rate": 8.548639024765051e-06, "loss": 0.0773, "step": 29601 }, { "epoch": 0.6522886402573722, "grad_norm": 0.5726333260536194, "learning_rate": 8.547672577343111e-06, "loss": 0.0724, "step": 29602 }, { "epoch": 0.6523106755468884, "grad_norm": 0.9052760004997253, "learning_rate": 8.546706162786335e-06, "loss": 0.0405, "step": 29603 }, { "epoch": 0.6523327108364045, "grad_norm": 0.7618281245231628, "learning_rate": 8.545739781099644e-06, "loss": 0.0958, "step": 29604 }, { "epoch": 0.6523547461259207, "grad_norm": 0.7030799388885498, "learning_rate": 8.544773432287967e-06, "loss": 0.0675, "step": 29605 }, { "epoch": 0.6523767814154369, "grad_norm": 0.9679622054100037, "learning_rate": 8.543807116356217e-06, "loss": 0.0741, "step": 29606 }, { "epoch": 0.652398816704953, "grad_norm": 0.8018149137496948, "learning_rate": 8.542840833309318e-06, "loss": 0.0773, "step": 29607 }, { "epoch": 0.6524208519944692, "grad_norm": 0.5168930888175964, "learning_rate": 8.541874583152198e-06, "loss": 0.0645, "step": 29608 }, { "epoch": 0.6524428872839853, "grad_norm": 0.6017668843269348, "learning_rate": 8.54090836588977e-06, "loss": 0.0555, "step": 29609 }, { "epoch": 0.6524649225735014, "grad_norm": 0.5291872620582581, "learning_rate": 8.539942181526958e-06, "loss": 0.0673, "step": 29610 }, { "epoch": 0.6524869578630176, "grad_norm": 0.42659270763397217, "learning_rate": 8.538976030068684e-06, "loss": 0.0774, "step": 29611 }, { "epoch": 0.6525089931525337, "grad_norm": 0.4618780016899109, "learning_rate": 8.538009911519878e-06, "loss": 0.0733, "step": 29612 }, { "epoch": 0.6525310284420499, "grad_norm": 1.1450822353363037, "learning_rate": 8.537043825885446e-06, "loss": 0.0946, "step": 29613 }, { "epoch": 0.6525530637315661, "grad_norm": 0.5307796001434326, "learning_rate": 8.536077773170314e-06, "loss": 0.0747, "step": 29614 }, { "epoch": 0.6525750990210822, "grad_norm": 0.3847907483577728, "learning_rate": 8.53511175337941e-06, "loss": 0.0589, "step": 29615 }, { "epoch": 0.6525971343105984, "grad_norm": 0.29583483934402466, "learning_rate": 8.534145766517644e-06, "loss": 0.0498, "step": 29616 }, { "epoch": 0.6526191696001146, "grad_norm": 0.3763725161552429, "learning_rate": 8.533179812589947e-06, "loss": 0.0377, "step": 29617 }, { "epoch": 0.6526412048896307, "grad_norm": 0.5466420650482178, "learning_rate": 8.53221389160122e-06, "loss": 0.0489, "step": 29618 }, { "epoch": 0.6526632401791469, "grad_norm": 0.6698148250579834, "learning_rate": 8.531248003556407e-06, "loss": 0.0724, "step": 29619 }, { "epoch": 0.6526852754686631, "grad_norm": 0.3490809500217438, "learning_rate": 8.530282148460414e-06, "loss": 0.05, "step": 29620 }, { "epoch": 0.6527073107581792, "grad_norm": 0.6682440638542175, "learning_rate": 8.529316326318168e-06, "loss": 0.0632, "step": 29621 }, { "epoch": 0.6527293460476954, "grad_norm": 1.283198356628418, "learning_rate": 8.528350537134576e-06, "loss": 0.077, "step": 29622 }, { "epoch": 0.6527513813372116, "grad_norm": 0.8722422122955322, "learning_rate": 8.527384780914567e-06, "loss": 0.0927, "step": 29623 }, { "epoch": 0.6527734166267277, "grad_norm": 0.8719724416732788, "learning_rate": 8.526419057663064e-06, "loss": 0.0617, "step": 29624 }, { "epoch": 0.6527954519162439, "grad_norm": 0.6255172491073608, "learning_rate": 8.525453367384974e-06, "loss": 0.0585, "step": 29625 }, { "epoch": 0.65281748720576, "grad_norm": 0.9249759316444397, "learning_rate": 8.524487710085222e-06, "loss": 0.0993, "step": 29626 }, { "epoch": 0.6528395224952762, "grad_norm": 0.47955453395843506, "learning_rate": 8.523522085768726e-06, "loss": 0.0595, "step": 29627 }, { "epoch": 0.6528615577847924, "grad_norm": 0.7360513210296631, "learning_rate": 8.522556494440411e-06, "loss": 0.0654, "step": 29628 }, { "epoch": 0.6528835930743085, "grad_norm": 0.7028582692146301, "learning_rate": 8.521590936105183e-06, "loss": 0.0973, "step": 29629 }, { "epoch": 0.6529056283638247, "grad_norm": 0.7553717494010925, "learning_rate": 8.520625410767968e-06, "loss": 0.0754, "step": 29630 }, { "epoch": 0.6529276636533409, "grad_norm": 0.5674732327461243, "learning_rate": 8.519659918433686e-06, "loss": 0.0842, "step": 29631 }, { "epoch": 0.652949698942857, "grad_norm": 0.6581563353538513, "learning_rate": 8.518694459107249e-06, "loss": 0.0422, "step": 29632 }, { "epoch": 0.6529717342323731, "grad_norm": 0.5174444317817688, "learning_rate": 8.517729032793573e-06, "loss": 0.071, "step": 29633 }, { "epoch": 0.6529937695218893, "grad_norm": 0.40411049127578735, "learning_rate": 8.516763639497578e-06, "loss": 0.0439, "step": 29634 }, { "epoch": 0.6530158048114054, "grad_norm": 0.5324333310127258, "learning_rate": 8.515798279224191e-06, "loss": 0.0685, "step": 29635 }, { "epoch": 0.6530378401009216, "grad_norm": 0.8978704810142517, "learning_rate": 8.514832951978315e-06, "loss": 0.0759, "step": 29636 }, { "epoch": 0.6530598753904377, "grad_norm": 0.24661536514759064, "learning_rate": 8.513867657764871e-06, "loss": 0.0525, "step": 29637 }, { "epoch": 0.6530819106799539, "grad_norm": 0.9304923415184021, "learning_rate": 8.512902396588783e-06, "loss": 0.0951, "step": 29638 }, { "epoch": 0.6531039459694701, "grad_norm": 0.5185096859931946, "learning_rate": 8.511937168454955e-06, "loss": 0.0635, "step": 29639 }, { "epoch": 0.6531259812589862, "grad_norm": 0.43900394439697266, "learning_rate": 8.510971973368319e-06, "loss": 0.052, "step": 29640 }, { "epoch": 0.6531480165485024, "grad_norm": 0.29952746629714966, "learning_rate": 8.51000681133377e-06, "loss": 0.0668, "step": 29641 }, { "epoch": 0.6531700518380186, "grad_norm": 0.5714691281318665, "learning_rate": 8.509041682356246e-06, "loss": 0.0634, "step": 29642 }, { "epoch": 0.6531920871275347, "grad_norm": 0.5575326085090637, "learning_rate": 8.508076586440649e-06, "loss": 0.0569, "step": 29643 }, { "epoch": 0.6532141224170509, "grad_norm": 0.6631789803504944, "learning_rate": 8.507111523591904e-06, "loss": 0.0935, "step": 29644 }, { "epoch": 0.6532361577065671, "grad_norm": 0.4792655408382416, "learning_rate": 8.506146493814918e-06, "loss": 0.074, "step": 29645 }, { "epoch": 0.6532581929960832, "grad_norm": 0.564453661441803, "learning_rate": 8.505181497114607e-06, "loss": 0.0581, "step": 29646 }, { "epoch": 0.6532802282855994, "grad_norm": 0.8470273613929749, "learning_rate": 8.504216533495898e-06, "loss": 0.0679, "step": 29647 }, { "epoch": 0.6533022635751156, "grad_norm": 0.5528258085250854, "learning_rate": 8.503251602963691e-06, "loss": 0.0642, "step": 29648 }, { "epoch": 0.6533242988646317, "grad_norm": 0.5999699234962463, "learning_rate": 8.502286705522907e-06, "loss": 0.0789, "step": 29649 }, { "epoch": 0.6533463341541479, "grad_norm": 0.7295509576797485, "learning_rate": 8.50132184117846e-06, "loss": 0.0521, "step": 29650 }, { "epoch": 0.653368369443664, "grad_norm": 0.4312455356121063, "learning_rate": 8.500357009935274e-06, "loss": 0.0807, "step": 29651 }, { "epoch": 0.6533904047331802, "grad_norm": 0.6201755404472351, "learning_rate": 8.499392211798249e-06, "loss": 0.0606, "step": 29652 }, { "epoch": 0.6534124400226964, "grad_norm": 0.6503158211708069, "learning_rate": 8.498427446772304e-06, "loss": 0.0622, "step": 29653 }, { "epoch": 0.6534344753122125, "grad_norm": 0.5834872722625732, "learning_rate": 8.497462714862362e-06, "loss": 0.0628, "step": 29654 }, { "epoch": 0.6534565106017287, "grad_norm": 0.6041195392608643, "learning_rate": 8.496498016073323e-06, "loss": 0.0565, "step": 29655 }, { "epoch": 0.6534785458912449, "grad_norm": 0.5963331460952759, "learning_rate": 8.495533350410106e-06, "loss": 0.0687, "step": 29656 }, { "epoch": 0.653500581180761, "grad_norm": 0.8500590324401855, "learning_rate": 8.494568717877625e-06, "loss": 0.0689, "step": 29657 }, { "epoch": 0.6535226164702771, "grad_norm": 0.5614062547683716, "learning_rate": 8.4936041184808e-06, "loss": 0.0531, "step": 29658 }, { "epoch": 0.6535446517597933, "grad_norm": 0.5721983909606934, "learning_rate": 8.492639552224535e-06, "loss": 0.0916, "step": 29659 }, { "epoch": 0.6535666870493094, "grad_norm": 0.7780472040176392, "learning_rate": 8.491675019113746e-06, "loss": 0.068, "step": 29660 }, { "epoch": 0.6535887223388256, "grad_norm": 0.6349728107452393, "learning_rate": 8.490710519153352e-06, "loss": 0.0521, "step": 29661 }, { "epoch": 0.6536107576283418, "grad_norm": 0.6036689877510071, "learning_rate": 8.48974605234825e-06, "loss": 0.0589, "step": 29662 }, { "epoch": 0.6536327929178579, "grad_norm": 0.7063007950782776, "learning_rate": 8.488781618703372e-06, "loss": 0.0546, "step": 29663 }, { "epoch": 0.6536548282073741, "grad_norm": 0.4689702093601227, "learning_rate": 8.487817218223607e-06, "loss": 0.0475, "step": 29664 }, { "epoch": 0.6536768634968902, "grad_norm": 0.6974638104438782, "learning_rate": 8.486852850913895e-06, "loss": 0.0866, "step": 29665 }, { "epoch": 0.6536988987864064, "grad_norm": 0.3993763327598572, "learning_rate": 8.485888516779126e-06, "loss": 0.0635, "step": 29666 }, { "epoch": 0.6537209340759226, "grad_norm": 0.473416268825531, "learning_rate": 8.484924215824227e-06, "loss": 0.0667, "step": 29667 }, { "epoch": 0.6537429693654387, "grad_norm": 0.500725269317627, "learning_rate": 8.483959948054096e-06, "loss": 0.0446, "step": 29668 }, { "epoch": 0.6537650046549549, "grad_norm": 0.4588640332221985, "learning_rate": 8.48299571347365e-06, "loss": 0.1028, "step": 29669 }, { "epoch": 0.6537870399444711, "grad_norm": 0.5226457715034485, "learning_rate": 8.482031512087811e-06, "loss": 0.043, "step": 29670 }, { "epoch": 0.6538090752339872, "grad_norm": 0.453691691160202, "learning_rate": 8.481067343901474e-06, "loss": 0.0431, "step": 29671 }, { "epoch": 0.6538311105235034, "grad_norm": 0.6264708638191223, "learning_rate": 8.480103208919557e-06, "loss": 0.0604, "step": 29672 }, { "epoch": 0.6538531458130196, "grad_norm": 0.5675815343856812, "learning_rate": 8.479139107146967e-06, "loss": 0.0718, "step": 29673 }, { "epoch": 0.6538751811025357, "grad_norm": 0.3891987204551697, "learning_rate": 8.478175038588627e-06, "loss": 0.0566, "step": 29674 }, { "epoch": 0.6538972163920519, "grad_norm": 0.5060631036758423, "learning_rate": 8.477211003249433e-06, "loss": 0.0679, "step": 29675 }, { "epoch": 0.6539192516815681, "grad_norm": 0.6639193892478943, "learning_rate": 8.4762470011343e-06, "loss": 0.0562, "step": 29676 }, { "epoch": 0.6539412869710842, "grad_norm": 0.4480217397212982, "learning_rate": 8.475283032248145e-06, "loss": 0.0664, "step": 29677 }, { "epoch": 0.6539633222606004, "grad_norm": 0.6004055142402649, "learning_rate": 8.47431909659587e-06, "loss": 0.0685, "step": 29678 }, { "epoch": 0.6539853575501166, "grad_norm": 0.5505895018577576, "learning_rate": 8.473355194182382e-06, "loss": 0.0713, "step": 29679 }, { "epoch": 0.6540073928396327, "grad_norm": 0.5154781937599182, "learning_rate": 8.472391325012599e-06, "loss": 0.0315, "step": 29680 }, { "epoch": 0.6540294281291489, "grad_norm": 0.5687407851219177, "learning_rate": 8.471427489091432e-06, "loss": 0.0648, "step": 29681 }, { "epoch": 0.654051463418665, "grad_norm": 0.4398599863052368, "learning_rate": 8.470463686423781e-06, "loss": 0.0601, "step": 29682 }, { "epoch": 0.6540734987081811, "grad_norm": 0.715986967086792, "learning_rate": 8.469499917014558e-06, "loss": 0.0533, "step": 29683 }, { "epoch": 0.6540955339976973, "grad_norm": 0.5890606641769409, "learning_rate": 8.46853618086868e-06, "loss": 0.0612, "step": 29684 }, { "epoch": 0.6541175692872134, "grad_norm": 0.8248634338378906, "learning_rate": 8.467572477991046e-06, "loss": 0.0692, "step": 29685 }, { "epoch": 0.6541396045767296, "grad_norm": 0.584222137928009, "learning_rate": 8.466608808386574e-06, "loss": 0.0737, "step": 29686 }, { "epoch": 0.6541616398662458, "grad_norm": 0.523508608341217, "learning_rate": 8.465645172060153e-06, "loss": 0.0645, "step": 29687 }, { "epoch": 0.6541836751557619, "grad_norm": 0.5868017077445984, "learning_rate": 8.464681569016719e-06, "loss": 0.0569, "step": 29688 }, { "epoch": 0.6542057104452781, "grad_norm": 0.4780116677284241, "learning_rate": 8.46371799926116e-06, "loss": 0.0531, "step": 29689 }, { "epoch": 0.6542277457347943, "grad_norm": 0.4318888783454895, "learning_rate": 8.462754462798394e-06, "loss": 0.0725, "step": 29690 }, { "epoch": 0.6542497810243104, "grad_norm": 0.8868631720542908, "learning_rate": 8.461790959633315e-06, "loss": 0.0878, "step": 29691 }, { "epoch": 0.6542718163138266, "grad_norm": 0.6799576282501221, "learning_rate": 8.460827489770853e-06, "loss": 0.09, "step": 29692 }, { "epoch": 0.6542938516033427, "grad_norm": 0.5977356433868408, "learning_rate": 8.459864053215902e-06, "loss": 0.0525, "step": 29693 }, { "epoch": 0.6543158868928589, "grad_norm": 0.5262459516525269, "learning_rate": 8.458900649973366e-06, "loss": 0.0556, "step": 29694 }, { "epoch": 0.6543379221823751, "grad_norm": 0.6144779324531555, "learning_rate": 8.457937280048156e-06, "loss": 0.0515, "step": 29695 }, { "epoch": 0.6543599574718912, "grad_norm": 0.3614288866519928, "learning_rate": 8.456973943445178e-06, "loss": 0.0612, "step": 29696 }, { "epoch": 0.6543819927614074, "grad_norm": 0.5203102231025696, "learning_rate": 8.456010640169347e-06, "loss": 0.0523, "step": 29697 }, { "epoch": 0.6544040280509236, "grad_norm": 0.7586902976036072, "learning_rate": 8.455047370225551e-06, "loss": 0.0626, "step": 29698 }, { "epoch": 0.6544260633404397, "grad_norm": 0.7165920734405518, "learning_rate": 8.454084133618721e-06, "loss": 0.0529, "step": 29699 }, { "epoch": 0.6544480986299559, "grad_norm": 0.48722198605537415, "learning_rate": 8.453120930353745e-06, "loss": 0.0416, "step": 29700 }, { "epoch": 0.6544701339194721, "grad_norm": 0.5327393412590027, "learning_rate": 8.45215776043554e-06, "loss": 0.1117, "step": 29701 }, { "epoch": 0.6544921692089882, "grad_norm": 0.6260470151901245, "learning_rate": 8.451194623869003e-06, "loss": 0.0695, "step": 29702 }, { "epoch": 0.6545142044985044, "grad_norm": 0.6496924757957458, "learning_rate": 8.45023152065904e-06, "loss": 0.0553, "step": 29703 }, { "epoch": 0.6545362397880206, "grad_norm": 0.7260130047798157, "learning_rate": 8.44926845081057e-06, "loss": 0.0617, "step": 29704 }, { "epoch": 0.6545582750775367, "grad_norm": 0.6833932995796204, "learning_rate": 8.44830541432848e-06, "loss": 0.0766, "step": 29705 }, { "epoch": 0.6545803103670529, "grad_norm": 0.8596020340919495, "learning_rate": 8.447342411217683e-06, "loss": 0.0909, "step": 29706 }, { "epoch": 0.6546023456565689, "grad_norm": 0.635859489440918, "learning_rate": 8.446379441483086e-06, "loss": 0.0684, "step": 29707 }, { "epoch": 0.6546243809460851, "grad_norm": 0.6328935027122498, "learning_rate": 8.4454165051296e-06, "loss": 0.0575, "step": 29708 }, { "epoch": 0.6546464162356013, "grad_norm": 1.0107735395431519, "learning_rate": 8.444453602162114e-06, "loss": 0.0703, "step": 29709 }, { "epoch": 0.6546684515251174, "grad_norm": 0.3592230975627899, "learning_rate": 8.443490732585544e-06, "loss": 0.0466, "step": 29710 }, { "epoch": 0.6546904868146336, "grad_norm": 1.0110594034194946, "learning_rate": 8.442527896404797e-06, "loss": 0.0942, "step": 29711 }, { "epoch": 0.6547125221041498, "grad_norm": 0.5602120161056519, "learning_rate": 8.441565093624766e-06, "loss": 0.0582, "step": 29712 }, { "epoch": 0.6547345573936659, "grad_norm": 0.46051979064941406, "learning_rate": 8.440602324250364e-06, "loss": 0.0705, "step": 29713 }, { "epoch": 0.6547565926831821, "grad_norm": 0.7945659160614014, "learning_rate": 8.439639588286482e-06, "loss": 0.0839, "step": 29714 }, { "epoch": 0.6547786279726983, "grad_norm": 0.34554487466812134, "learning_rate": 8.438676885738047e-06, "loss": 0.0546, "step": 29715 }, { "epoch": 0.6548006632622144, "grad_norm": 0.7146708965301514, "learning_rate": 8.43771421660994e-06, "loss": 0.0735, "step": 29716 }, { "epoch": 0.6548226985517306, "grad_norm": 0.49405521154403687, "learning_rate": 8.43675158090708e-06, "loss": 0.0758, "step": 29717 }, { "epoch": 0.6548447338412468, "grad_norm": 0.36918768286705017, "learning_rate": 8.43578897863436e-06, "loss": 0.0519, "step": 29718 }, { "epoch": 0.6548667691307629, "grad_norm": 0.6749398112297058, "learning_rate": 8.434826409796685e-06, "loss": 0.0689, "step": 29719 }, { "epoch": 0.6548888044202791, "grad_norm": 0.7780599594116211, "learning_rate": 8.433863874398967e-06, "loss": 0.0679, "step": 29720 }, { "epoch": 0.6549108397097952, "grad_norm": 0.5971508026123047, "learning_rate": 8.432901372446087e-06, "loss": 0.033, "step": 29721 }, { "epoch": 0.6549328749993114, "grad_norm": 0.8166192173957825, "learning_rate": 8.431938903942975e-06, "loss": 0.0564, "step": 29722 }, { "epoch": 0.6549549102888276, "grad_norm": 0.9040237665176392, "learning_rate": 8.430976468894516e-06, "loss": 0.0895, "step": 29723 }, { "epoch": 0.6549769455783437, "grad_norm": 0.8455283045768738, "learning_rate": 8.43001406730562e-06, "loss": 0.0939, "step": 29724 }, { "epoch": 0.6549989808678599, "grad_norm": 0.8056415915489197, "learning_rate": 8.429051699181181e-06, "loss": 0.0729, "step": 29725 }, { "epoch": 0.6550210161573761, "grad_norm": 0.4965808391571045, "learning_rate": 8.428089364526104e-06, "loss": 0.0551, "step": 29726 }, { "epoch": 0.6550430514468922, "grad_norm": 0.5583090782165527, "learning_rate": 8.427127063345298e-06, "loss": 0.0695, "step": 29727 }, { "epoch": 0.6550650867364084, "grad_norm": 0.8333091139793396, "learning_rate": 8.426164795643652e-06, "loss": 0.0795, "step": 29728 }, { "epoch": 0.6550871220259246, "grad_norm": 0.5459692478179932, "learning_rate": 8.425202561426078e-06, "loss": 0.0545, "step": 29729 }, { "epoch": 0.6551091573154407, "grad_norm": 0.5388851761817932, "learning_rate": 8.424240360697469e-06, "loss": 0.0791, "step": 29730 }, { "epoch": 0.6551311926049569, "grad_norm": 0.46079739928245544, "learning_rate": 8.423278193462736e-06, "loss": 0.0432, "step": 29731 }, { "epoch": 0.655153227894473, "grad_norm": 0.6603855490684509, "learning_rate": 8.42231605972677e-06, "loss": 0.0623, "step": 29732 }, { "epoch": 0.6551752631839891, "grad_norm": 0.8473606109619141, "learning_rate": 8.421353959494474e-06, "loss": 0.0955, "step": 29733 }, { "epoch": 0.6551972984735053, "grad_norm": 0.2752107083797455, "learning_rate": 8.420391892770756e-06, "loss": 0.0813, "step": 29734 }, { "epoch": 0.6552193337630214, "grad_norm": 0.5468947887420654, "learning_rate": 8.419429859560506e-06, "loss": 0.0844, "step": 29735 }, { "epoch": 0.6552413690525376, "grad_norm": 0.6286178827285767, "learning_rate": 8.418467859868633e-06, "loss": 0.0675, "step": 29736 }, { "epoch": 0.6552634043420538, "grad_norm": 0.4737311601638794, "learning_rate": 8.41750589370002e-06, "loss": 0.0594, "step": 29737 }, { "epoch": 0.6552854396315699, "grad_norm": 0.613509476184845, "learning_rate": 8.416543961059592e-06, "loss": 0.0743, "step": 29738 }, { "epoch": 0.6553074749210861, "grad_norm": 1.2876895666122437, "learning_rate": 8.41558206195223e-06, "loss": 0.079, "step": 29739 }, { "epoch": 0.6553295102106023, "grad_norm": 0.532828688621521, "learning_rate": 8.414620196382846e-06, "loss": 0.0605, "step": 29740 }, { "epoch": 0.6553515455001184, "grad_norm": 0.6737989783287048, "learning_rate": 8.413658364356329e-06, "loss": 0.0764, "step": 29741 }, { "epoch": 0.6553735807896346, "grad_norm": 0.8201552033424377, "learning_rate": 8.412696565877578e-06, "loss": 0.0788, "step": 29742 }, { "epoch": 0.6553956160791508, "grad_norm": 0.7040356397628784, "learning_rate": 8.411734800951501e-06, "loss": 0.0606, "step": 29743 }, { "epoch": 0.6554176513686669, "grad_norm": 0.5082671642303467, "learning_rate": 8.410773069582983e-06, "loss": 0.0423, "step": 29744 }, { "epoch": 0.6554396866581831, "grad_norm": 0.387760192155838, "learning_rate": 8.40981137177694e-06, "loss": 0.0472, "step": 29745 }, { "epoch": 0.6554617219476992, "grad_norm": 0.5354046821594238, "learning_rate": 8.408849707538258e-06, "loss": 0.0834, "step": 29746 }, { "epoch": 0.6554837572372154, "grad_norm": 0.8391699194908142, "learning_rate": 8.407888076871845e-06, "loss": 0.0918, "step": 29747 }, { "epoch": 0.6555057925267316, "grad_norm": 0.3218845725059509, "learning_rate": 8.406926479782586e-06, "loss": 0.0634, "step": 29748 }, { "epoch": 0.6555278278162477, "grad_norm": 0.583448052406311, "learning_rate": 8.405964916275386e-06, "loss": 0.063, "step": 29749 }, { "epoch": 0.6555498631057639, "grad_norm": 0.5259602069854736, "learning_rate": 8.405003386355148e-06, "loss": 0.0535, "step": 29750 }, { "epoch": 0.6555718983952801, "grad_norm": 0.480887770652771, "learning_rate": 8.404041890026757e-06, "loss": 0.0457, "step": 29751 }, { "epoch": 0.6555939336847962, "grad_norm": 1.192599892616272, "learning_rate": 8.403080427295121e-06, "loss": 0.1229, "step": 29752 }, { "epoch": 0.6556159689743124, "grad_norm": 0.4369385540485382, "learning_rate": 8.402118998165133e-06, "loss": 0.0592, "step": 29753 }, { "epoch": 0.6556380042638286, "grad_norm": 0.5719690918922424, "learning_rate": 8.401157602641696e-06, "loss": 0.0765, "step": 29754 }, { "epoch": 0.6556600395533447, "grad_norm": 0.42215704917907715, "learning_rate": 8.400196240729698e-06, "loss": 0.0414, "step": 29755 }, { "epoch": 0.6556820748428609, "grad_norm": 0.37505146861076355, "learning_rate": 8.399234912434035e-06, "loss": 0.0531, "step": 29756 }, { "epoch": 0.655704110132377, "grad_norm": 0.7058194279670715, "learning_rate": 8.39827361775962e-06, "loss": 0.0547, "step": 29757 }, { "epoch": 0.6557261454218931, "grad_norm": 0.9363012313842773, "learning_rate": 8.397312356711327e-06, "loss": 0.079, "step": 29758 }, { "epoch": 0.6557481807114093, "grad_norm": 0.48971161246299744, "learning_rate": 8.396351129294073e-06, "loss": 0.0738, "step": 29759 }, { "epoch": 0.6557702160009254, "grad_norm": 0.6246453523635864, "learning_rate": 8.395389935512729e-06, "loss": 0.0715, "step": 29760 }, { "epoch": 0.6557922512904416, "grad_norm": 0.819644033908844, "learning_rate": 8.394428775372219e-06, "loss": 0.0631, "step": 29761 }, { "epoch": 0.6558142865799578, "grad_norm": 0.3055703341960907, "learning_rate": 8.393467648877417e-06, "loss": 0.0564, "step": 29762 }, { "epoch": 0.6558363218694739, "grad_norm": 0.5463985204696655, "learning_rate": 8.392506556033234e-06, "loss": 0.0432, "step": 29763 }, { "epoch": 0.6558583571589901, "grad_norm": 0.5329760909080505, "learning_rate": 8.391545496844554e-06, "loss": 0.0676, "step": 29764 }, { "epoch": 0.6558803924485063, "grad_norm": 0.36874696612358093, "learning_rate": 8.390584471316278e-06, "loss": 0.0446, "step": 29765 }, { "epoch": 0.6559024277380224, "grad_norm": 0.38660070300102234, "learning_rate": 8.389623479453301e-06, "loss": 0.0544, "step": 29766 }, { "epoch": 0.6559244630275386, "grad_norm": 0.7239922285079956, "learning_rate": 8.388662521260507e-06, "loss": 0.0536, "step": 29767 }, { "epoch": 0.6559464983170548, "grad_norm": 0.48056498169898987, "learning_rate": 8.387701596742811e-06, "loss": 0.0664, "step": 29768 }, { "epoch": 0.6559685336065709, "grad_norm": 0.3028604984283447, "learning_rate": 8.386740705905091e-06, "loss": 0.0527, "step": 29769 }, { "epoch": 0.6559905688960871, "grad_norm": 0.7200682759284973, "learning_rate": 8.385779848752254e-06, "loss": 0.0627, "step": 29770 }, { "epoch": 0.6560126041856033, "grad_norm": 0.5995777249336243, "learning_rate": 8.384819025289178e-06, "loss": 0.0697, "step": 29771 }, { "epoch": 0.6560346394751194, "grad_norm": 0.5742031931877136, "learning_rate": 8.38385823552077e-06, "loss": 0.0439, "step": 29772 }, { "epoch": 0.6560566747646356, "grad_norm": 0.5503401756286621, "learning_rate": 8.382897479451923e-06, "loss": 0.0803, "step": 29773 }, { "epoch": 0.6560787100541517, "grad_norm": 0.41654765605926514, "learning_rate": 8.381936757087523e-06, "loss": 0.0672, "step": 29774 }, { "epoch": 0.6561007453436679, "grad_norm": 0.5573477745056152, "learning_rate": 8.380976068432469e-06, "loss": 0.0729, "step": 29775 }, { "epoch": 0.6561227806331841, "grad_norm": 0.6553379893302917, "learning_rate": 8.38001541349165e-06, "loss": 0.0681, "step": 29776 }, { "epoch": 0.6561448159227002, "grad_norm": 0.5224235653877258, "learning_rate": 8.379054792269972e-06, "loss": 0.0606, "step": 29777 }, { "epoch": 0.6561668512122164, "grad_norm": 0.9184803366661072, "learning_rate": 8.37809420477231e-06, "loss": 0.0682, "step": 29778 }, { "epoch": 0.6561888865017326, "grad_norm": 0.43628066778182983, "learning_rate": 8.377133651003565e-06, "loss": 0.0434, "step": 29779 }, { "epoch": 0.6562109217912487, "grad_norm": 0.712033212184906, "learning_rate": 8.376173130968636e-06, "loss": 0.0734, "step": 29780 }, { "epoch": 0.6562329570807649, "grad_norm": 0.589730978012085, "learning_rate": 8.375212644672404e-06, "loss": 0.0451, "step": 29781 }, { "epoch": 0.656254992370281, "grad_norm": 0.6334479451179504, "learning_rate": 8.374252192119774e-06, "loss": 0.0696, "step": 29782 }, { "epoch": 0.6562770276597971, "grad_norm": 0.44242438673973083, "learning_rate": 8.373291773315616e-06, "loss": 0.0534, "step": 29783 }, { "epoch": 0.6562990629493133, "grad_norm": 0.7174253463745117, "learning_rate": 8.37233138826485e-06, "loss": 0.1157, "step": 29784 }, { "epoch": 0.6563210982388294, "grad_norm": 0.42005494236946106, "learning_rate": 8.371371036972348e-06, "loss": 0.0509, "step": 29785 }, { "epoch": 0.6563431335283456, "grad_norm": 0.5603684186935425, "learning_rate": 8.370410719443007e-06, "loss": 0.0635, "step": 29786 }, { "epoch": 0.6563651688178618, "grad_norm": 0.4006815254688263, "learning_rate": 8.369450435681725e-06, "loss": 0.0581, "step": 29787 }, { "epoch": 0.6563872041073779, "grad_norm": 0.7855117321014404, "learning_rate": 8.368490185693382e-06, "loss": 0.0935, "step": 29788 }, { "epoch": 0.6564092393968941, "grad_norm": 0.6435012221336365, "learning_rate": 8.36752996948288e-06, "loss": 0.0844, "step": 29789 }, { "epoch": 0.6564312746864103, "grad_norm": 0.5133028030395508, "learning_rate": 8.366569787055094e-06, "loss": 0.0635, "step": 29790 }, { "epoch": 0.6564533099759264, "grad_norm": 0.38251498341560364, "learning_rate": 8.365609638414937e-06, "loss": 0.056, "step": 29791 }, { "epoch": 0.6564753452654426, "grad_norm": 0.2801281213760376, "learning_rate": 8.36464952356728e-06, "loss": 0.0563, "step": 29792 }, { "epoch": 0.6564973805549588, "grad_norm": 0.6583516001701355, "learning_rate": 8.363689442517028e-06, "loss": 0.0725, "step": 29793 }, { "epoch": 0.6565194158444749, "grad_norm": 0.7795079350471497, "learning_rate": 8.362729395269055e-06, "loss": 0.0708, "step": 29794 }, { "epoch": 0.6565414511339911, "grad_norm": 1.3075751066207886, "learning_rate": 8.361769381828269e-06, "loss": 0.0899, "step": 29795 }, { "epoch": 0.6565634864235073, "grad_norm": 0.3108053505420685, "learning_rate": 8.360809402199552e-06, "loss": 0.0552, "step": 29796 }, { "epoch": 0.6565855217130234, "grad_norm": 0.5306977033615112, "learning_rate": 8.359849456387788e-06, "loss": 0.0509, "step": 29797 }, { "epoch": 0.6566075570025396, "grad_norm": 0.9043187499046326, "learning_rate": 8.35888954439787e-06, "loss": 0.0667, "step": 29798 }, { "epoch": 0.6566295922920558, "grad_norm": 0.8000920414924622, "learning_rate": 8.35792966623469e-06, "loss": 0.0782, "step": 29799 }, { "epoch": 0.6566516275815719, "grad_norm": 0.7129954099655151, "learning_rate": 8.356969821903142e-06, "loss": 0.0946, "step": 29800 }, { "epoch": 0.6566736628710881, "grad_norm": 0.5634738802909851, "learning_rate": 8.356010011408102e-06, "loss": 0.0663, "step": 29801 }, { "epoch": 0.6566956981606042, "grad_norm": 1.042641520500183, "learning_rate": 8.355050234754465e-06, "loss": 0.0913, "step": 29802 }, { "epoch": 0.6567177334501204, "grad_norm": 0.7103409171104431, "learning_rate": 8.354090491947123e-06, "loss": 0.0435, "step": 29803 }, { "epoch": 0.6567397687396366, "grad_norm": 0.4183961749076843, "learning_rate": 8.353130782990966e-06, "loss": 0.0534, "step": 29804 }, { "epoch": 0.6567618040291527, "grad_norm": 0.9624403119087219, "learning_rate": 8.35217110789088e-06, "loss": 0.0932, "step": 29805 }, { "epoch": 0.6567838393186688, "grad_norm": 0.4843646287918091, "learning_rate": 8.351211466651739e-06, "loss": 0.0618, "step": 29806 }, { "epoch": 0.656805874608185, "grad_norm": 0.8840489983558655, "learning_rate": 8.350251859278453e-06, "loss": 0.0729, "step": 29807 }, { "epoch": 0.6568279098977011, "grad_norm": 0.466124564409256, "learning_rate": 8.349292285775895e-06, "loss": 0.0512, "step": 29808 }, { "epoch": 0.6568499451872173, "grad_norm": 0.5090118050575256, "learning_rate": 8.348332746148962e-06, "loss": 0.0543, "step": 29809 }, { "epoch": 0.6568719804767335, "grad_norm": 0.5864994525909424, "learning_rate": 8.347373240402531e-06, "loss": 0.0443, "step": 29810 }, { "epoch": 0.6568940157662496, "grad_norm": 0.5278935432434082, "learning_rate": 8.346413768541507e-06, "loss": 0.0644, "step": 29811 }, { "epoch": 0.6569160510557658, "grad_norm": 0.40730148553848267, "learning_rate": 8.345454330570755e-06, "loss": 0.0538, "step": 29812 }, { "epoch": 0.656938086345282, "grad_norm": 0.6206030249595642, "learning_rate": 8.344494926495175e-06, "loss": 0.0822, "step": 29813 }, { "epoch": 0.6569601216347981, "grad_norm": 0.37967997789382935, "learning_rate": 8.343535556319655e-06, "loss": 0.059, "step": 29814 }, { "epoch": 0.6569821569243143, "grad_norm": 0.6358316540718079, "learning_rate": 8.342576220049076e-06, "loss": 0.0898, "step": 29815 }, { "epoch": 0.6570041922138304, "grad_norm": 0.5640339851379395, "learning_rate": 8.34161691768833e-06, "loss": 0.1047, "step": 29816 }, { "epoch": 0.6570262275033466, "grad_norm": 0.5626301765441895, "learning_rate": 8.340657649242287e-06, "loss": 0.0812, "step": 29817 }, { "epoch": 0.6570482627928628, "grad_norm": 0.37446266412734985, "learning_rate": 8.339698414715858e-06, "loss": 0.0463, "step": 29818 }, { "epoch": 0.6570702980823789, "grad_norm": 0.5136838555335999, "learning_rate": 8.338739214113912e-06, "loss": 0.0514, "step": 29819 }, { "epoch": 0.6570923333718951, "grad_norm": 0.7659078240394592, "learning_rate": 8.337780047441343e-06, "loss": 0.108, "step": 29820 }, { "epoch": 0.6571143686614113, "grad_norm": 0.5226189494132996, "learning_rate": 8.336820914703027e-06, "loss": 0.075, "step": 29821 }, { "epoch": 0.6571364039509274, "grad_norm": 0.8229920268058777, "learning_rate": 8.335861815903855e-06, "loss": 0.0947, "step": 29822 }, { "epoch": 0.6571584392404436, "grad_norm": 0.8801852464675903, "learning_rate": 8.334902751048721e-06, "loss": 0.0747, "step": 29823 }, { "epoch": 0.6571804745299598, "grad_norm": 0.5230880379676819, "learning_rate": 8.333943720142493e-06, "loss": 0.0572, "step": 29824 }, { "epoch": 0.6572025098194759, "grad_norm": 0.520942211151123, "learning_rate": 8.332984723190065e-06, "loss": 0.0673, "step": 29825 }, { "epoch": 0.6572245451089921, "grad_norm": 0.6713346242904663, "learning_rate": 8.332025760196321e-06, "loss": 0.0584, "step": 29826 }, { "epoch": 0.6572465803985083, "grad_norm": 0.6742032766342163, "learning_rate": 8.331066831166153e-06, "loss": 0.0471, "step": 29827 }, { "epoch": 0.6572686156880244, "grad_norm": 0.3815763592720032, "learning_rate": 8.330107936104429e-06, "loss": 0.0651, "step": 29828 }, { "epoch": 0.6572906509775406, "grad_norm": 0.6912234425544739, "learning_rate": 8.329149075016043e-06, "loss": 0.0854, "step": 29829 }, { "epoch": 0.6573126862670567, "grad_norm": 0.7799020409584045, "learning_rate": 8.328190247905885e-06, "loss": 0.0796, "step": 29830 }, { "epoch": 0.6573347215565728, "grad_norm": 0.5001466870307922, "learning_rate": 8.327231454778824e-06, "loss": 0.0777, "step": 29831 }, { "epoch": 0.657356756846089, "grad_norm": 0.5405755043029785, "learning_rate": 8.326272695639752e-06, "loss": 0.0729, "step": 29832 }, { "epoch": 0.6573787921356051, "grad_norm": 0.6564484238624573, "learning_rate": 8.325313970493553e-06, "loss": 0.0706, "step": 29833 }, { "epoch": 0.6574008274251213, "grad_norm": 0.871969997882843, "learning_rate": 8.324355279345113e-06, "loss": 0.0528, "step": 29834 }, { "epoch": 0.6574228627146375, "grad_norm": 0.25198641419410706, "learning_rate": 8.323396622199306e-06, "loss": 0.0727, "step": 29835 }, { "epoch": 0.6574448980041536, "grad_norm": 0.6773068904876709, "learning_rate": 8.322437999061024e-06, "loss": 0.0672, "step": 29836 }, { "epoch": 0.6574669332936698, "grad_norm": 0.4145055115222931, "learning_rate": 8.321479409935146e-06, "loss": 0.0362, "step": 29837 }, { "epoch": 0.657488968583186, "grad_norm": 0.793716311454773, "learning_rate": 8.320520854826552e-06, "loss": 0.0725, "step": 29838 }, { "epoch": 0.6575110038727021, "grad_norm": 0.6825066804885864, "learning_rate": 8.319562333740133e-06, "loss": 0.0578, "step": 29839 }, { "epoch": 0.6575330391622183, "grad_norm": 0.665851891040802, "learning_rate": 8.318603846680754e-06, "loss": 0.0501, "step": 29840 }, { "epoch": 0.6575550744517344, "grad_norm": 0.8938705325126648, "learning_rate": 8.31764539365332e-06, "loss": 0.0896, "step": 29841 }, { "epoch": 0.6575771097412506, "grad_norm": 0.4919280409812927, "learning_rate": 8.316686974662694e-06, "loss": 0.0895, "step": 29842 }, { "epoch": 0.6575991450307668, "grad_norm": 0.3672615885734558, "learning_rate": 8.315728589713771e-06, "loss": 0.0784, "step": 29843 }, { "epoch": 0.6576211803202829, "grad_norm": 0.6520594358444214, "learning_rate": 8.314770238811422e-06, "loss": 0.0633, "step": 29844 }, { "epoch": 0.6576432156097991, "grad_norm": 0.768505871295929, "learning_rate": 8.313811921960535e-06, "loss": 0.0875, "step": 29845 }, { "epoch": 0.6576652508993153, "grad_norm": 0.834527313709259, "learning_rate": 8.312853639165991e-06, "loss": 0.0764, "step": 29846 }, { "epoch": 0.6576872861888314, "grad_norm": 0.5930628180503845, "learning_rate": 8.311895390432665e-06, "loss": 0.0621, "step": 29847 }, { "epoch": 0.6577093214783476, "grad_norm": 0.42023375630378723, "learning_rate": 8.310937175765445e-06, "loss": 0.0622, "step": 29848 }, { "epoch": 0.6577313567678638, "grad_norm": 0.42537784576416016, "learning_rate": 8.309978995169206e-06, "loss": 0.0655, "step": 29849 }, { "epoch": 0.6577533920573799, "grad_norm": 0.8330705165863037, "learning_rate": 8.309020848648838e-06, "loss": 0.0752, "step": 29850 }, { "epoch": 0.6577754273468961, "grad_norm": 0.7344960570335388, "learning_rate": 8.308062736209208e-06, "loss": 0.0637, "step": 29851 }, { "epoch": 0.6577974626364123, "grad_norm": 0.4832344055175781, "learning_rate": 8.307104657855206e-06, "loss": 0.0773, "step": 29852 }, { "epoch": 0.6578194979259284, "grad_norm": 0.4255650043487549, "learning_rate": 8.306146613591711e-06, "loss": 0.0463, "step": 29853 }, { "epoch": 0.6578415332154446, "grad_norm": 0.9275473356246948, "learning_rate": 8.305188603423597e-06, "loss": 0.0753, "step": 29854 }, { "epoch": 0.6578635685049607, "grad_norm": 0.7593587636947632, "learning_rate": 8.304230627355748e-06, "loss": 0.0849, "step": 29855 }, { "epoch": 0.6578856037944768, "grad_norm": 0.5280478596687317, "learning_rate": 8.30327268539304e-06, "loss": 0.0838, "step": 29856 }, { "epoch": 0.657907639083993, "grad_norm": 0.7006723284721375, "learning_rate": 8.302314777540365e-06, "loss": 0.0693, "step": 29857 }, { "epoch": 0.6579296743735091, "grad_norm": 0.7209031581878662, "learning_rate": 8.301356903802584e-06, "loss": 0.0574, "step": 29858 }, { "epoch": 0.6579517096630253, "grad_norm": 1.2201837301254272, "learning_rate": 8.300399064184586e-06, "loss": 0.0791, "step": 29859 }, { "epoch": 0.6579737449525415, "grad_norm": 0.8573526740074158, "learning_rate": 8.299441258691253e-06, "loss": 0.0843, "step": 29860 }, { "epoch": 0.6579957802420576, "grad_norm": 0.376747190952301, "learning_rate": 8.298483487327452e-06, "loss": 0.0423, "step": 29861 }, { "epoch": 0.6580178155315738, "grad_norm": 0.9107188582420349, "learning_rate": 8.297525750098076e-06, "loss": 0.0933, "step": 29862 }, { "epoch": 0.65803985082109, "grad_norm": 0.4479820430278778, "learning_rate": 8.296568047007984e-06, "loss": 0.064, "step": 29863 }, { "epoch": 0.6580618861106061, "grad_norm": 0.4086337685585022, "learning_rate": 8.295610378062076e-06, "loss": 0.0521, "step": 29864 }, { "epoch": 0.6580839214001223, "grad_norm": 0.8024001121520996, "learning_rate": 8.294652743265215e-06, "loss": 0.0896, "step": 29865 }, { "epoch": 0.6581059566896384, "grad_norm": 0.7726771831512451, "learning_rate": 8.293695142622287e-06, "loss": 0.0807, "step": 29866 }, { "epoch": 0.6581279919791546, "grad_norm": 0.3247227668762207, "learning_rate": 8.292737576138162e-06, "loss": 0.0328, "step": 29867 }, { "epoch": 0.6581500272686708, "grad_norm": 0.6226441264152527, "learning_rate": 8.291780043817722e-06, "loss": 0.0639, "step": 29868 }, { "epoch": 0.6581720625581869, "grad_norm": 0.4126734435558319, "learning_rate": 8.290822545665845e-06, "loss": 0.0694, "step": 29869 }, { "epoch": 0.6581940978477031, "grad_norm": 0.9730339646339417, "learning_rate": 8.289865081687404e-06, "loss": 0.0633, "step": 29870 }, { "epoch": 0.6582161331372193, "grad_norm": 0.6646396517753601, "learning_rate": 8.28890765188728e-06, "loss": 0.1039, "step": 29871 }, { "epoch": 0.6582381684267354, "grad_norm": 1.0220227241516113, "learning_rate": 8.287950256270344e-06, "loss": 0.079, "step": 29872 }, { "epoch": 0.6582602037162516, "grad_norm": 0.6028305292129517, "learning_rate": 8.286992894841486e-06, "loss": 0.0497, "step": 29873 }, { "epoch": 0.6582822390057678, "grad_norm": 0.772732675075531, "learning_rate": 8.286035567605564e-06, "loss": 0.0537, "step": 29874 }, { "epoch": 0.6583042742952839, "grad_norm": 0.5196695923805237, "learning_rate": 8.285078274567464e-06, "loss": 0.088, "step": 29875 }, { "epoch": 0.6583263095848001, "grad_norm": 0.641342282295227, "learning_rate": 8.284121015732068e-06, "loss": 0.0572, "step": 29876 }, { "epoch": 0.6583483448743163, "grad_norm": 0.6520572304725647, "learning_rate": 8.28316379110424e-06, "loss": 0.0454, "step": 29877 }, { "epoch": 0.6583703801638324, "grad_norm": 0.6761374473571777, "learning_rate": 8.282206600688861e-06, "loss": 0.047, "step": 29878 }, { "epoch": 0.6583924154533486, "grad_norm": 0.5891076922416687, "learning_rate": 8.281249444490804e-06, "loss": 0.0837, "step": 29879 }, { "epoch": 0.6584144507428646, "grad_norm": 0.7488772869110107, "learning_rate": 8.280292322514953e-06, "loss": 0.0708, "step": 29880 }, { "epoch": 0.6584364860323808, "grad_norm": 0.38953009247779846, "learning_rate": 8.27933523476617e-06, "loss": 0.0526, "step": 29881 }, { "epoch": 0.658458521321897, "grad_norm": 0.6715235114097595, "learning_rate": 8.278378181249338e-06, "loss": 0.0576, "step": 29882 }, { "epoch": 0.6584805566114131, "grad_norm": 0.43141546845436096, "learning_rate": 8.277421161969336e-06, "loss": 0.0561, "step": 29883 }, { "epoch": 0.6585025919009293, "grad_norm": 0.7076886892318726, "learning_rate": 8.276464176931026e-06, "loss": 0.0629, "step": 29884 }, { "epoch": 0.6585246271904455, "grad_norm": 0.7071518301963806, "learning_rate": 8.275507226139297e-06, "loss": 0.0889, "step": 29885 }, { "epoch": 0.6585466624799616, "grad_norm": 0.28545162081718445, "learning_rate": 8.274550309599001e-06, "loss": 0.0691, "step": 29886 }, { "epoch": 0.6585686977694778, "grad_norm": 0.9950355887413025, "learning_rate": 8.273593427315043e-06, "loss": 0.0946, "step": 29887 }, { "epoch": 0.658590733058994, "grad_norm": 0.7261971235275269, "learning_rate": 8.272636579292271e-06, "loss": 0.0732, "step": 29888 }, { "epoch": 0.6586127683485101, "grad_norm": 0.45902055501937866, "learning_rate": 8.271679765535576e-06, "loss": 0.0585, "step": 29889 }, { "epoch": 0.6586348036380263, "grad_norm": 0.7293508648872375, "learning_rate": 8.270722986049816e-06, "loss": 0.0508, "step": 29890 }, { "epoch": 0.6586568389275425, "grad_norm": 0.6655222177505493, "learning_rate": 8.269766240839873e-06, "loss": 0.0526, "step": 29891 }, { "epoch": 0.6586788742170586, "grad_norm": 0.8224964141845703, "learning_rate": 8.268809529910628e-06, "loss": 0.0542, "step": 29892 }, { "epoch": 0.6587009095065748, "grad_norm": 0.49480390548706055, "learning_rate": 8.267852853266937e-06, "loss": 0.0703, "step": 29893 }, { "epoch": 0.658722944796091, "grad_norm": 0.45269712805747986, "learning_rate": 8.266896210913683e-06, "loss": 0.0928, "step": 29894 }, { "epoch": 0.6587449800856071, "grad_norm": 0.6597258448600769, "learning_rate": 8.265939602855737e-06, "loss": 0.0818, "step": 29895 }, { "epoch": 0.6587670153751233, "grad_norm": 0.5580911040306091, "learning_rate": 8.264983029097977e-06, "loss": 0.0695, "step": 29896 }, { "epoch": 0.6587890506646394, "grad_norm": 0.6953344345092773, "learning_rate": 8.264026489645264e-06, "loss": 0.0713, "step": 29897 }, { "epoch": 0.6588110859541556, "grad_norm": 0.6894206404685974, "learning_rate": 8.263069984502475e-06, "loss": 0.0834, "step": 29898 }, { "epoch": 0.6588331212436718, "grad_norm": 0.7432254552841187, "learning_rate": 8.262113513674493e-06, "loss": 0.049, "step": 29899 }, { "epoch": 0.6588551565331879, "grad_norm": 0.5017489194869995, "learning_rate": 8.261157077166172e-06, "loss": 0.0403, "step": 29900 }, { "epoch": 0.6588771918227041, "grad_norm": 0.5462755560874939, "learning_rate": 8.260200674982391e-06, "loss": 0.0504, "step": 29901 }, { "epoch": 0.6588992271122203, "grad_norm": 0.5828810930252075, "learning_rate": 8.259244307128023e-06, "loss": 0.0794, "step": 29902 }, { "epoch": 0.6589212624017364, "grad_norm": 0.7408183813095093, "learning_rate": 8.258287973607943e-06, "loss": 0.0905, "step": 29903 }, { "epoch": 0.6589432976912526, "grad_norm": 0.5490250587463379, "learning_rate": 8.257331674427014e-06, "loss": 0.0653, "step": 29904 }, { "epoch": 0.6589653329807686, "grad_norm": 0.5060736536979675, "learning_rate": 8.256375409590108e-06, "loss": 0.0845, "step": 29905 }, { "epoch": 0.6589873682702848, "grad_norm": 0.5985687971115112, "learning_rate": 8.255419179102106e-06, "loss": 0.0661, "step": 29906 }, { "epoch": 0.659009403559801, "grad_norm": 0.34770652651786804, "learning_rate": 8.254462982967864e-06, "loss": 0.0521, "step": 29907 }, { "epoch": 0.6590314388493171, "grad_norm": 0.7777348160743713, "learning_rate": 8.253506821192265e-06, "loss": 0.0692, "step": 29908 }, { "epoch": 0.6590534741388333, "grad_norm": 0.7106577754020691, "learning_rate": 8.252550693780162e-06, "loss": 0.0795, "step": 29909 }, { "epoch": 0.6590755094283495, "grad_norm": 0.7746191024780273, "learning_rate": 8.251594600736448e-06, "loss": 0.0741, "step": 29910 }, { "epoch": 0.6590975447178656, "grad_norm": 0.7130312323570251, "learning_rate": 8.250638542065978e-06, "loss": 0.0869, "step": 29911 }, { "epoch": 0.6591195800073818, "grad_norm": 0.3942544460296631, "learning_rate": 8.249682517773628e-06, "loss": 0.0433, "step": 29912 }, { "epoch": 0.659141615296898, "grad_norm": 1.036953330039978, "learning_rate": 8.248726527864254e-06, "loss": 0.0856, "step": 29913 }, { "epoch": 0.6591636505864141, "grad_norm": 0.6460341215133667, "learning_rate": 8.247770572342748e-06, "loss": 0.0434, "step": 29914 }, { "epoch": 0.6591856858759303, "grad_norm": 0.6010212898254395, "learning_rate": 8.246814651213962e-06, "loss": 0.0645, "step": 29915 }, { "epoch": 0.6592077211654465, "grad_norm": 0.7154436111450195, "learning_rate": 8.245858764482775e-06, "loss": 0.0607, "step": 29916 }, { "epoch": 0.6592297564549626, "grad_norm": 0.35344332456588745, "learning_rate": 8.244902912154045e-06, "loss": 0.0526, "step": 29917 }, { "epoch": 0.6592517917444788, "grad_norm": 0.5518077611923218, "learning_rate": 8.243947094232649e-06, "loss": 0.0816, "step": 29918 }, { "epoch": 0.659273827033995, "grad_norm": 0.6600801348686218, "learning_rate": 8.242991310723456e-06, "loss": 0.0566, "step": 29919 }, { "epoch": 0.6592958623235111, "grad_norm": 0.7678220868110657, "learning_rate": 8.242035561631324e-06, "loss": 0.1105, "step": 29920 }, { "epoch": 0.6593178976130273, "grad_norm": 0.5767521858215332, "learning_rate": 8.241079846961138e-06, "loss": 0.074, "step": 29921 }, { "epoch": 0.6593399329025434, "grad_norm": 0.7659057378768921, "learning_rate": 8.240124166717751e-06, "loss": 0.0877, "step": 29922 }, { "epoch": 0.6593619681920596, "grad_norm": 0.4667942523956299, "learning_rate": 8.239168520906041e-06, "loss": 0.0647, "step": 29923 }, { "epoch": 0.6593840034815758, "grad_norm": 0.2837893068790436, "learning_rate": 8.238212909530868e-06, "loss": 0.0634, "step": 29924 }, { "epoch": 0.6594060387710919, "grad_norm": 0.611774206161499, "learning_rate": 8.237257332597101e-06, "loss": 0.0651, "step": 29925 }, { "epoch": 0.6594280740606081, "grad_norm": 0.32744014263153076, "learning_rate": 8.236301790109616e-06, "loss": 0.0402, "step": 29926 }, { "epoch": 0.6594501093501243, "grad_norm": 0.6122581958770752, "learning_rate": 8.235346282073267e-06, "loss": 0.0914, "step": 29927 }, { "epoch": 0.6594721446396404, "grad_norm": 1.9183146953582764, "learning_rate": 8.234390808492926e-06, "loss": 0.0621, "step": 29928 }, { "epoch": 0.6594941799291566, "grad_norm": 0.5906895995140076, "learning_rate": 8.233435369373462e-06, "loss": 0.0673, "step": 29929 }, { "epoch": 0.6595162152186727, "grad_norm": 0.5758576989173889, "learning_rate": 8.232479964719744e-06, "loss": 0.0789, "step": 29930 }, { "epoch": 0.6595382505081888, "grad_norm": 0.4259355366230011, "learning_rate": 8.23152459453663e-06, "loss": 0.0509, "step": 29931 }, { "epoch": 0.659560285797705, "grad_norm": 0.6236084699630737, "learning_rate": 8.230569258828992e-06, "loss": 0.0815, "step": 29932 }, { "epoch": 0.6595823210872211, "grad_norm": 0.7204151153564453, "learning_rate": 8.229613957601698e-06, "loss": 0.0623, "step": 29933 }, { "epoch": 0.6596043563767373, "grad_norm": 0.3715079426765442, "learning_rate": 8.228658690859606e-06, "loss": 0.0776, "step": 29934 }, { "epoch": 0.6596263916662535, "grad_norm": 0.6060999035835266, "learning_rate": 8.227703458607592e-06, "loss": 0.0478, "step": 29935 }, { "epoch": 0.6596484269557696, "grad_norm": 0.26916173100471497, "learning_rate": 8.226748260850505e-06, "loss": 0.0552, "step": 29936 }, { "epoch": 0.6596704622452858, "grad_norm": 0.5669161677360535, "learning_rate": 8.225793097593233e-06, "loss": 0.0719, "step": 29937 }, { "epoch": 0.659692497534802, "grad_norm": 0.6116467714309692, "learning_rate": 8.224837968840621e-06, "loss": 0.067, "step": 29938 }, { "epoch": 0.6597145328243181, "grad_norm": 0.28151771426200867, "learning_rate": 8.223882874597551e-06, "loss": 0.0607, "step": 29939 }, { "epoch": 0.6597365681138343, "grad_norm": 0.9786117672920227, "learning_rate": 8.222927814868871e-06, "loss": 0.0551, "step": 29940 }, { "epoch": 0.6597586034033505, "grad_norm": 0.9383606910705566, "learning_rate": 8.221972789659457e-06, "loss": 0.0515, "step": 29941 }, { "epoch": 0.6597806386928666, "grad_norm": 0.6323683857917786, "learning_rate": 8.221017798974173e-06, "loss": 0.0756, "step": 29942 }, { "epoch": 0.6598026739823828, "grad_norm": 0.7661622762680054, "learning_rate": 8.22006284281787e-06, "loss": 0.0844, "step": 29943 }, { "epoch": 0.659824709271899, "grad_norm": 0.40189671516418457, "learning_rate": 8.219107921195435e-06, "loss": 0.0481, "step": 29944 }, { "epoch": 0.6598467445614151, "grad_norm": 0.8716545701026917, "learning_rate": 8.218153034111713e-06, "loss": 0.0583, "step": 29945 }, { "epoch": 0.6598687798509313, "grad_norm": 0.47384509444236755, "learning_rate": 8.21719818157158e-06, "loss": 0.0601, "step": 29946 }, { "epoch": 0.6598908151404475, "grad_norm": 0.4157542288303375, "learning_rate": 8.216243363579887e-06, "loss": 0.0629, "step": 29947 }, { "epoch": 0.6599128504299636, "grad_norm": 0.8096088767051697, "learning_rate": 8.215288580141508e-06, "loss": 0.0794, "step": 29948 }, { "epoch": 0.6599348857194798, "grad_norm": 0.9580713510513306, "learning_rate": 8.214333831261306e-06, "loss": 0.089, "step": 29949 }, { "epoch": 0.659956921008996, "grad_norm": 0.684634268283844, "learning_rate": 8.213379116944133e-06, "loss": 0.0748, "step": 29950 }, { "epoch": 0.6599789562985121, "grad_norm": 1.011801838874817, "learning_rate": 8.212424437194862e-06, "loss": 0.0963, "step": 29951 }, { "epoch": 0.6600009915880283, "grad_norm": 0.7705655097961426, "learning_rate": 8.211469792018352e-06, "loss": 0.0459, "step": 29952 }, { "epoch": 0.6600230268775444, "grad_norm": 0.5124901533126831, "learning_rate": 8.210515181419473e-06, "loss": 0.0694, "step": 29953 }, { "epoch": 0.6600450621670605, "grad_norm": 0.5380269289016724, "learning_rate": 8.209560605403074e-06, "loss": 0.1033, "step": 29954 }, { "epoch": 0.6600670974565767, "grad_norm": 0.7383142709732056, "learning_rate": 8.208606063974026e-06, "loss": 0.0776, "step": 29955 }, { "epoch": 0.6600891327460928, "grad_norm": 0.44043341279029846, "learning_rate": 8.207651557137192e-06, "loss": 0.0796, "step": 29956 }, { "epoch": 0.660111168035609, "grad_norm": 0.6341392397880554, "learning_rate": 8.206697084897427e-06, "loss": 0.0472, "step": 29957 }, { "epoch": 0.6601332033251252, "grad_norm": 0.6289066672325134, "learning_rate": 8.205742647259605e-06, "loss": 0.072, "step": 29958 }, { "epoch": 0.6601552386146413, "grad_norm": 0.7007399201393127, "learning_rate": 8.204788244228564e-06, "loss": 0.0863, "step": 29959 }, { "epoch": 0.6601772739041575, "grad_norm": 0.9026179909706116, "learning_rate": 8.203833875809192e-06, "loss": 0.0654, "step": 29960 }, { "epoch": 0.6601993091936736, "grad_norm": 0.5220969915390015, "learning_rate": 8.202879542006335e-06, "loss": 0.1029, "step": 29961 }, { "epoch": 0.6602213444831898, "grad_norm": 0.43079715967178345, "learning_rate": 8.201925242824863e-06, "loss": 0.0628, "step": 29962 }, { "epoch": 0.660243379772706, "grad_norm": 0.6813726425170898, "learning_rate": 8.200970978269623e-06, "loss": 0.0537, "step": 29963 }, { "epoch": 0.6602654150622221, "grad_norm": 0.3006575107574463, "learning_rate": 8.200016748345484e-06, "loss": 0.0745, "step": 29964 }, { "epoch": 0.6602874503517383, "grad_norm": 0.7163293957710266, "learning_rate": 8.199062553057314e-06, "loss": 0.0824, "step": 29965 }, { "epoch": 0.6603094856412545, "grad_norm": 0.5666275024414062, "learning_rate": 8.198108392409952e-06, "loss": 0.0708, "step": 29966 }, { "epoch": 0.6603315209307706, "grad_norm": 0.2238692045211792, "learning_rate": 8.197154266408285e-06, "loss": 0.054, "step": 29967 }, { "epoch": 0.6603535562202868, "grad_norm": 0.5846579074859619, "learning_rate": 8.196200175057151e-06, "loss": 0.0777, "step": 29968 }, { "epoch": 0.660375591509803, "grad_norm": 0.3780103325843811, "learning_rate": 8.195246118361424e-06, "loss": 0.0616, "step": 29969 }, { "epoch": 0.6603976267993191, "grad_norm": 0.5953421592712402, "learning_rate": 8.194292096325954e-06, "loss": 0.0665, "step": 29970 }, { "epoch": 0.6604196620888353, "grad_norm": 0.4146706461906433, "learning_rate": 8.193338108955602e-06, "loss": 0.038, "step": 29971 }, { "epoch": 0.6604416973783515, "grad_norm": 0.49700379371643066, "learning_rate": 8.192384156255235e-06, "loss": 0.0839, "step": 29972 }, { "epoch": 0.6604637326678676, "grad_norm": 0.7287879586219788, "learning_rate": 8.1914302382297e-06, "loss": 0.0627, "step": 29973 }, { "epoch": 0.6604857679573838, "grad_norm": 0.8597564101219177, "learning_rate": 8.190476354883862e-06, "loss": 0.0795, "step": 29974 }, { "epoch": 0.6605078032469, "grad_norm": 1.0835121870040894, "learning_rate": 8.189522506222582e-06, "loss": 0.0661, "step": 29975 }, { "epoch": 0.6605298385364161, "grad_norm": 0.40199410915374756, "learning_rate": 8.18856869225072e-06, "loss": 0.0452, "step": 29976 }, { "epoch": 0.6605518738259323, "grad_norm": 0.4462449550628662, "learning_rate": 8.187614912973123e-06, "loss": 0.0799, "step": 29977 }, { "epoch": 0.6605739091154484, "grad_norm": 0.4194018542766571, "learning_rate": 8.186661168394659e-06, "loss": 0.0584, "step": 29978 }, { "epoch": 0.6605959444049645, "grad_norm": 0.6723372936248779, "learning_rate": 8.185707458520187e-06, "loss": 0.0616, "step": 29979 }, { "epoch": 0.6606179796944807, "grad_norm": 0.5901980996131897, "learning_rate": 8.184753783354557e-06, "loss": 0.0612, "step": 29980 }, { "epoch": 0.6606400149839968, "grad_norm": 0.6008793115615845, "learning_rate": 8.183800142902633e-06, "loss": 0.1267, "step": 29981 }, { "epoch": 0.660662050273513, "grad_norm": 0.4433329999446869, "learning_rate": 8.18284653716926e-06, "loss": 0.0608, "step": 29982 }, { "epoch": 0.6606840855630292, "grad_norm": 0.7805973887443542, "learning_rate": 8.181892966159317e-06, "loss": 0.0817, "step": 29983 }, { "epoch": 0.6607061208525453, "grad_norm": 0.576387345790863, "learning_rate": 8.180939429877644e-06, "loss": 0.0863, "step": 29984 }, { "epoch": 0.6607281561420615, "grad_norm": 0.457011342048645, "learning_rate": 8.17998592832911e-06, "loss": 0.0791, "step": 29985 }, { "epoch": 0.6607501914315776, "grad_norm": 0.6766293048858643, "learning_rate": 8.17903246151856e-06, "loss": 0.0922, "step": 29986 }, { "epoch": 0.6607722267210938, "grad_norm": 0.5557034015655518, "learning_rate": 8.17807902945085e-06, "loss": 0.0853, "step": 29987 }, { "epoch": 0.66079426201061, "grad_norm": 0.81475430727005, "learning_rate": 8.177125632130851e-06, "loss": 0.1014, "step": 29988 }, { "epoch": 0.6608162973001261, "grad_norm": 0.43361154198646545, "learning_rate": 8.176172269563399e-06, "loss": 0.0524, "step": 29989 }, { "epoch": 0.6608383325896423, "grad_norm": 0.4366273283958435, "learning_rate": 8.17521894175337e-06, "loss": 0.0686, "step": 29990 }, { "epoch": 0.6608603678791585, "grad_norm": 0.3583178222179413, "learning_rate": 8.174265648705607e-06, "loss": 0.0744, "step": 29991 }, { "epoch": 0.6608824031686746, "grad_norm": 0.734864354133606, "learning_rate": 8.173312390424975e-06, "loss": 0.081, "step": 29992 }, { "epoch": 0.6609044384581908, "grad_norm": 0.58091139793396, "learning_rate": 8.172359166916317e-06, "loss": 0.0629, "step": 29993 }, { "epoch": 0.660926473747707, "grad_norm": 0.58420729637146, "learning_rate": 8.171405978184496e-06, "loss": 0.0889, "step": 29994 }, { "epoch": 0.6609485090372231, "grad_norm": 0.605457603931427, "learning_rate": 8.17045282423437e-06, "loss": 0.0635, "step": 29995 }, { "epoch": 0.6609705443267393, "grad_norm": 0.5311223864555359, "learning_rate": 8.169499705070785e-06, "loss": 0.069, "step": 29996 }, { "epoch": 0.6609925796162555, "grad_norm": 0.5679288506507874, "learning_rate": 8.168546620698604e-06, "loss": 0.0729, "step": 29997 }, { "epoch": 0.6610146149057716, "grad_norm": 0.46530911326408386, "learning_rate": 8.167593571122673e-06, "loss": 0.0503, "step": 29998 }, { "epoch": 0.6610366501952878, "grad_norm": 0.5558481812477112, "learning_rate": 8.166640556347862e-06, "loss": 0.0703, "step": 29999 }, { "epoch": 0.661058685484804, "grad_norm": 0.8127792477607727, "learning_rate": 8.165687576379009e-06, "loss": 0.073, "step": 30000 }, { "epoch": 0.6610807207743201, "grad_norm": 0.6967863440513611, "learning_rate": 8.164734631220971e-06, "loss": 0.054, "step": 30001 }, { "epoch": 0.6611027560638363, "grad_norm": 0.47219738364219666, "learning_rate": 8.163781720878612e-06, "loss": 0.0751, "step": 30002 }, { "epoch": 0.6611247913533524, "grad_norm": 0.3995513319969177, "learning_rate": 8.162828845356771e-06, "loss": 0.0563, "step": 30003 }, { "epoch": 0.6611468266428685, "grad_norm": 0.861587405204773, "learning_rate": 8.16187600466032e-06, "loss": 0.0881, "step": 30004 }, { "epoch": 0.6611688619323847, "grad_norm": 0.5421255230903625, "learning_rate": 8.160923198794086e-06, "loss": 0.0725, "step": 30005 }, { "epoch": 0.6611908972219008, "grad_norm": 0.880224347114563, "learning_rate": 8.15997042776295e-06, "loss": 0.0763, "step": 30006 }, { "epoch": 0.661212932511417, "grad_norm": 0.5404092669487, "learning_rate": 8.159017691571746e-06, "loss": 0.0565, "step": 30007 }, { "epoch": 0.6612349678009332, "grad_norm": 0.3568679094314575, "learning_rate": 8.15806499022534e-06, "loss": 0.0713, "step": 30008 }, { "epoch": 0.6612570030904493, "grad_norm": 0.7891409993171692, "learning_rate": 8.157112323728572e-06, "loss": 0.0848, "step": 30009 }, { "epoch": 0.6612790383799655, "grad_norm": 0.3162190914154053, "learning_rate": 8.156159692086299e-06, "loss": 0.0762, "step": 30010 }, { "epoch": 0.6613010736694817, "grad_norm": 0.5127366185188293, "learning_rate": 8.155207095303384e-06, "loss": 0.0394, "step": 30011 }, { "epoch": 0.6613231089589978, "grad_norm": 0.6604472994804382, "learning_rate": 8.154254533384655e-06, "loss": 0.0687, "step": 30012 }, { "epoch": 0.661345144248514, "grad_norm": 0.640338122844696, "learning_rate": 8.153302006334992e-06, "loss": 0.0798, "step": 30013 }, { "epoch": 0.6613671795380301, "grad_norm": 0.6505697965621948, "learning_rate": 8.152349514159227e-06, "loss": 0.0513, "step": 30014 }, { "epoch": 0.6613892148275463, "grad_norm": 0.5431728363037109, "learning_rate": 8.151397056862221e-06, "loss": 0.0681, "step": 30015 }, { "epoch": 0.6614112501170625, "grad_norm": 0.5020766258239746, "learning_rate": 8.150444634448815e-06, "loss": 0.058, "step": 30016 }, { "epoch": 0.6614332854065786, "grad_norm": 0.5173346996307373, "learning_rate": 8.149492246923875e-06, "loss": 0.0569, "step": 30017 }, { "epoch": 0.6614553206960948, "grad_norm": 0.5254485011100769, "learning_rate": 8.148539894292248e-06, "loss": 0.0601, "step": 30018 }, { "epoch": 0.661477355985611, "grad_norm": 0.44736653566360474, "learning_rate": 8.147587576558774e-06, "loss": 0.0692, "step": 30019 }, { "epoch": 0.6614993912751271, "grad_norm": 0.6402691602706909, "learning_rate": 8.14663529372831e-06, "loss": 0.0714, "step": 30020 }, { "epoch": 0.6615214265646433, "grad_norm": 0.6572522521018982, "learning_rate": 8.145683045805708e-06, "loss": 0.0792, "step": 30021 }, { "epoch": 0.6615434618541595, "grad_norm": 0.8855138421058655, "learning_rate": 8.144730832795827e-06, "loss": 0.0727, "step": 30022 }, { "epoch": 0.6615654971436756, "grad_norm": 0.31753459572792053, "learning_rate": 8.143778654703498e-06, "loss": 0.0732, "step": 30023 }, { "epoch": 0.6615875324331918, "grad_norm": 0.34203940629959106, "learning_rate": 8.142826511533584e-06, "loss": 0.0556, "step": 30024 }, { "epoch": 0.661609567722708, "grad_norm": 0.5415038466453552, "learning_rate": 8.141874403290929e-06, "loss": 0.0759, "step": 30025 }, { "epoch": 0.6616316030122241, "grad_norm": 0.6687982082366943, "learning_rate": 8.140922329980394e-06, "loss": 0.0718, "step": 30026 }, { "epoch": 0.6616536383017403, "grad_norm": 0.415513813495636, "learning_rate": 8.139970291606811e-06, "loss": 0.0374, "step": 30027 }, { "epoch": 0.6616756735912565, "grad_norm": 0.5338436365127563, "learning_rate": 8.139018288175037e-06, "loss": 0.0649, "step": 30028 }, { "epoch": 0.6616977088807725, "grad_norm": 0.5490196943283081, "learning_rate": 8.13806631968993e-06, "loss": 0.0739, "step": 30029 }, { "epoch": 0.6617197441702887, "grad_norm": 0.9445651173591614, "learning_rate": 8.137114386156327e-06, "loss": 0.0944, "step": 30030 }, { "epoch": 0.6617417794598048, "grad_norm": 0.7184964418411255, "learning_rate": 8.136162487579086e-06, "loss": 0.0682, "step": 30031 }, { "epoch": 0.661763814749321, "grad_norm": 0.5933856964111328, "learning_rate": 8.135210623963035e-06, "loss": 0.0616, "step": 30032 }, { "epoch": 0.6617858500388372, "grad_norm": 0.502006471157074, "learning_rate": 8.13425879531305e-06, "loss": 0.0676, "step": 30033 }, { "epoch": 0.6618078853283533, "grad_norm": 0.6908990144729614, "learning_rate": 8.133307001633962e-06, "loss": 0.0552, "step": 30034 }, { "epoch": 0.6618299206178695, "grad_norm": 0.6480762958526611, "learning_rate": 8.132355242930624e-06, "loss": 0.0586, "step": 30035 }, { "epoch": 0.6618519559073857, "grad_norm": 0.668793797492981, "learning_rate": 8.131403519207888e-06, "loss": 0.0579, "step": 30036 }, { "epoch": 0.6618739911969018, "grad_norm": 0.9643717408180237, "learning_rate": 8.130451830470592e-06, "loss": 0.1024, "step": 30037 }, { "epoch": 0.661896026486418, "grad_norm": 0.4901857376098633, "learning_rate": 8.129500176723596e-06, "loss": 0.0631, "step": 30038 }, { "epoch": 0.6619180617759342, "grad_norm": 0.5134938955307007, "learning_rate": 8.128548557971726e-06, "loss": 0.0639, "step": 30039 }, { "epoch": 0.6619400970654503, "grad_norm": 0.4889891445636749, "learning_rate": 8.127596974219856e-06, "loss": 0.0699, "step": 30040 }, { "epoch": 0.6619621323549665, "grad_norm": 0.38708510994911194, "learning_rate": 8.126645425472812e-06, "loss": 0.0273, "step": 30041 }, { "epoch": 0.6619841676444826, "grad_norm": 0.3738133907318115, "learning_rate": 8.125693911735456e-06, "loss": 0.0491, "step": 30042 }, { "epoch": 0.6620062029339988, "grad_norm": 0.42910680174827576, "learning_rate": 8.124742433012623e-06, "loss": 0.0594, "step": 30043 }, { "epoch": 0.662028238223515, "grad_norm": 0.8165558576583862, "learning_rate": 8.12379098930916e-06, "loss": 0.0871, "step": 30044 }, { "epoch": 0.6620502735130311, "grad_norm": 0.7675994038581848, "learning_rate": 8.122839580629926e-06, "loss": 0.0611, "step": 30045 }, { "epoch": 0.6620723088025473, "grad_norm": 0.69384765625, "learning_rate": 8.121888206979751e-06, "loss": 0.0622, "step": 30046 }, { "epoch": 0.6620943440920635, "grad_norm": 0.5722920894622803, "learning_rate": 8.120936868363487e-06, "loss": 0.0707, "step": 30047 }, { "epoch": 0.6621163793815796, "grad_norm": 0.5186632871627808, "learning_rate": 8.11998556478598e-06, "loss": 0.0407, "step": 30048 }, { "epoch": 0.6621384146710958, "grad_norm": 0.632248044013977, "learning_rate": 8.11903429625208e-06, "loss": 0.0954, "step": 30049 }, { "epoch": 0.662160449960612, "grad_norm": 0.8272548913955688, "learning_rate": 8.118083062766627e-06, "loss": 0.085, "step": 30050 }, { "epoch": 0.6621824852501281, "grad_norm": 0.5574532151222229, "learning_rate": 8.117131864334465e-06, "loss": 0.0831, "step": 30051 }, { "epoch": 0.6622045205396443, "grad_norm": 0.4182036221027374, "learning_rate": 8.116180700960446e-06, "loss": 0.0926, "step": 30052 }, { "epoch": 0.6622265558291603, "grad_norm": 0.6114389300346375, "learning_rate": 8.115229572649407e-06, "loss": 0.0582, "step": 30053 }, { "epoch": 0.6622485911186765, "grad_norm": 0.5796387195587158, "learning_rate": 8.11427847940619e-06, "loss": 0.0558, "step": 30054 }, { "epoch": 0.6622706264081927, "grad_norm": 0.5685899257659912, "learning_rate": 8.11332742123565e-06, "loss": 0.0816, "step": 30055 }, { "epoch": 0.6622926616977088, "grad_norm": 0.4302363097667694, "learning_rate": 8.112376398142634e-06, "loss": 0.0422, "step": 30056 }, { "epoch": 0.662314696987225, "grad_norm": 0.7403646111488342, "learning_rate": 8.111425410131967e-06, "loss": 0.0484, "step": 30057 }, { "epoch": 0.6623367322767412, "grad_norm": 0.6787663102149963, "learning_rate": 8.110474457208507e-06, "loss": 0.0608, "step": 30058 }, { "epoch": 0.6623587675662573, "grad_norm": 0.44392672181129456, "learning_rate": 8.109523539377103e-06, "loss": 0.0577, "step": 30059 }, { "epoch": 0.6623808028557735, "grad_norm": 0.45447415113449097, "learning_rate": 8.108572656642583e-06, "loss": 0.0524, "step": 30060 }, { "epoch": 0.6624028381452897, "grad_norm": 0.34190642833709717, "learning_rate": 8.107621809009803e-06, "loss": 0.0497, "step": 30061 }, { "epoch": 0.6624248734348058, "grad_norm": 0.5075180530548096, "learning_rate": 8.106670996483589e-06, "loss": 0.0681, "step": 30062 }, { "epoch": 0.662446908724322, "grad_norm": 0.8502563238143921, "learning_rate": 8.105720219068811e-06, "loss": 0.0589, "step": 30063 }, { "epoch": 0.6624689440138382, "grad_norm": 0.5328960418701172, "learning_rate": 8.104769476770288e-06, "loss": 0.0679, "step": 30064 }, { "epoch": 0.6624909793033543, "grad_norm": 0.6257779002189636, "learning_rate": 8.103818769592878e-06, "loss": 0.0404, "step": 30065 }, { "epoch": 0.6625130145928705, "grad_norm": 1.1591602563858032, "learning_rate": 8.102868097541411e-06, "loss": 0.1086, "step": 30066 }, { "epoch": 0.6625350498823867, "grad_norm": 0.4470134377479553, "learning_rate": 8.101917460620738e-06, "loss": 0.0631, "step": 30067 }, { "epoch": 0.6625570851719028, "grad_norm": 0.5017547607421875, "learning_rate": 8.1009668588357e-06, "loss": 0.0457, "step": 30068 }, { "epoch": 0.662579120461419, "grad_norm": 0.723875105381012, "learning_rate": 8.100016292191134e-06, "loss": 0.0737, "step": 30069 }, { "epoch": 0.6626011557509351, "grad_norm": 0.8324646949768066, "learning_rate": 8.099065760691887e-06, "loss": 0.0689, "step": 30070 }, { "epoch": 0.6626231910404513, "grad_norm": 1.232269287109375, "learning_rate": 8.098115264342795e-06, "loss": 0.0558, "step": 30071 }, { "epoch": 0.6626452263299675, "grad_norm": 0.4553792476654053, "learning_rate": 8.09716480314871e-06, "loss": 0.0333, "step": 30072 }, { "epoch": 0.6626672616194836, "grad_norm": 0.29045170545578003, "learning_rate": 8.096214377114461e-06, "loss": 0.0616, "step": 30073 }, { "epoch": 0.6626892969089998, "grad_norm": 0.5156482458114624, "learning_rate": 8.095263986244895e-06, "loss": 0.0766, "step": 30074 }, { "epoch": 0.662711332198516, "grad_norm": 0.46167799830436707, "learning_rate": 8.094313630544857e-06, "loss": 0.0614, "step": 30075 }, { "epoch": 0.6627333674880321, "grad_norm": 0.3684232532978058, "learning_rate": 8.093363310019176e-06, "loss": 0.0867, "step": 30076 }, { "epoch": 0.6627554027775483, "grad_norm": 0.5051157474517822, "learning_rate": 8.092413024672699e-06, "loss": 0.0496, "step": 30077 }, { "epoch": 0.6627774380670644, "grad_norm": 0.7561963200569153, "learning_rate": 8.091462774510267e-06, "loss": 0.0922, "step": 30078 }, { "epoch": 0.6627994733565805, "grad_norm": 0.40555235743522644, "learning_rate": 8.090512559536729e-06, "loss": 0.0366, "step": 30079 }, { "epoch": 0.6628215086460967, "grad_norm": 0.683869481086731, "learning_rate": 8.089562379756906e-06, "loss": 0.075, "step": 30080 }, { "epoch": 0.6628435439356128, "grad_norm": 0.6031070947647095, "learning_rate": 8.088612235175649e-06, "loss": 0.0564, "step": 30081 }, { "epoch": 0.662865579225129, "grad_norm": 0.6732048988342285, "learning_rate": 8.087662125797803e-06, "loss": 0.0916, "step": 30082 }, { "epoch": 0.6628876145146452, "grad_norm": 0.7122674584388733, "learning_rate": 8.086712051628192e-06, "loss": 0.0683, "step": 30083 }, { "epoch": 0.6629096498041613, "grad_norm": 0.3892683982849121, "learning_rate": 8.085762012671671e-06, "loss": 0.0765, "step": 30084 }, { "epoch": 0.6629316850936775, "grad_norm": 0.9293707609176636, "learning_rate": 8.084812008933059e-06, "loss": 0.0431, "step": 30085 }, { "epoch": 0.6629537203831937, "grad_norm": 0.6147621870040894, "learning_rate": 8.08386204041722e-06, "loss": 0.0857, "step": 30086 }, { "epoch": 0.6629757556727098, "grad_norm": 0.6057674884796143, "learning_rate": 8.082912107128974e-06, "loss": 0.0795, "step": 30087 }, { "epoch": 0.662997790962226, "grad_norm": 0.5352656841278076, "learning_rate": 8.081962209073175e-06, "loss": 0.0611, "step": 30088 }, { "epoch": 0.6630198262517422, "grad_norm": 0.5065726041793823, "learning_rate": 8.081012346254643e-06, "loss": 0.0631, "step": 30089 }, { "epoch": 0.6630418615412583, "grad_norm": 0.6463107466697693, "learning_rate": 8.080062518678227e-06, "loss": 0.0669, "step": 30090 }, { "epoch": 0.6630638968307745, "grad_norm": 0.3980267643928528, "learning_rate": 8.079112726348768e-06, "loss": 0.0986, "step": 30091 }, { "epoch": 0.6630859321202907, "grad_norm": 0.7310867309570312, "learning_rate": 8.078162969271095e-06, "loss": 0.0953, "step": 30092 }, { "epoch": 0.6631079674098068, "grad_norm": 0.6499132513999939, "learning_rate": 8.077213247450049e-06, "loss": 0.0855, "step": 30093 }, { "epoch": 0.663130002699323, "grad_norm": 0.8025670051574707, "learning_rate": 8.076263560890465e-06, "loss": 0.0522, "step": 30094 }, { "epoch": 0.6631520379888391, "grad_norm": 0.5731543302536011, "learning_rate": 8.075313909597192e-06, "loss": 0.096, "step": 30095 }, { "epoch": 0.6631740732783553, "grad_norm": 0.8374671339988708, "learning_rate": 8.074364293575053e-06, "loss": 0.0843, "step": 30096 }, { "epoch": 0.6631961085678715, "grad_norm": 0.5818093419075012, "learning_rate": 8.07341471282889e-06, "loss": 0.0697, "step": 30097 }, { "epoch": 0.6632181438573876, "grad_norm": 0.3360807001590729, "learning_rate": 8.072465167363546e-06, "loss": 0.0524, "step": 30098 }, { "epoch": 0.6632401791469038, "grad_norm": 0.7691823840141296, "learning_rate": 8.071515657183846e-06, "loss": 0.0592, "step": 30099 }, { "epoch": 0.66326221443642, "grad_norm": 0.6130815744400024, "learning_rate": 8.070566182294633e-06, "loss": 0.0653, "step": 30100 }, { "epoch": 0.6632842497259361, "grad_norm": 0.6884428858757019, "learning_rate": 8.069616742700742e-06, "loss": 0.0657, "step": 30101 }, { "epoch": 0.6633062850154523, "grad_norm": 0.530191957950592, "learning_rate": 8.068667338407016e-06, "loss": 0.0671, "step": 30102 }, { "epoch": 0.6633283203049684, "grad_norm": 0.5592462420463562, "learning_rate": 8.067717969418275e-06, "loss": 0.0735, "step": 30103 }, { "epoch": 0.6633503555944845, "grad_norm": 0.38321614265441895, "learning_rate": 8.066768635739369e-06, "loss": 0.0381, "step": 30104 }, { "epoch": 0.6633723908840007, "grad_norm": 0.5765629410743713, "learning_rate": 8.065819337375127e-06, "loss": 0.0454, "step": 30105 }, { "epoch": 0.6633944261735168, "grad_norm": 0.5171369910240173, "learning_rate": 8.064870074330384e-06, "loss": 0.0588, "step": 30106 }, { "epoch": 0.663416461463033, "grad_norm": 0.5952056050300598, "learning_rate": 8.063920846609982e-06, "loss": 0.0413, "step": 30107 }, { "epoch": 0.6634384967525492, "grad_norm": 0.34013527631759644, "learning_rate": 8.062971654218739e-06, "loss": 0.0587, "step": 30108 }, { "epoch": 0.6634605320420653, "grad_norm": 0.4375278949737549, "learning_rate": 8.062022497161515e-06, "loss": 0.0549, "step": 30109 }, { "epoch": 0.6634825673315815, "grad_norm": 0.8782413601875305, "learning_rate": 8.061073375443123e-06, "loss": 0.0999, "step": 30110 }, { "epoch": 0.6635046026210977, "grad_norm": 0.3081183135509491, "learning_rate": 8.06012428906841e-06, "loss": 0.0675, "step": 30111 }, { "epoch": 0.6635266379106138, "grad_norm": 1.1342054605484009, "learning_rate": 8.0591752380422e-06, "loss": 0.1072, "step": 30112 }, { "epoch": 0.66354867320013, "grad_norm": 0.8172969222068787, "learning_rate": 8.058226222369333e-06, "loss": 0.0978, "step": 30113 }, { "epoch": 0.6635707084896462, "grad_norm": 0.4194396734237671, "learning_rate": 8.057277242054649e-06, "loss": 0.0652, "step": 30114 }, { "epoch": 0.6635927437791623, "grad_norm": 0.7315161824226379, "learning_rate": 8.056328297102969e-06, "loss": 0.0757, "step": 30115 }, { "epoch": 0.6636147790686785, "grad_norm": 1.0449907779693604, "learning_rate": 8.055379387519132e-06, "loss": 0.0962, "step": 30116 }, { "epoch": 0.6636368143581947, "grad_norm": 0.5568273067474365, "learning_rate": 8.05443051330797e-06, "loss": 0.0483, "step": 30117 }, { "epoch": 0.6636588496477108, "grad_norm": 0.5066906809806824, "learning_rate": 8.053481674474326e-06, "loss": 0.0562, "step": 30118 }, { "epoch": 0.663680884937227, "grad_norm": 0.8060829043388367, "learning_rate": 8.05253287102302e-06, "loss": 0.0757, "step": 30119 }, { "epoch": 0.6637029202267432, "grad_norm": 0.3675348460674286, "learning_rate": 8.051584102958886e-06, "loss": 0.0323, "step": 30120 }, { "epoch": 0.6637249555162593, "grad_norm": 0.551795244216919, "learning_rate": 8.05063537028677e-06, "loss": 0.0924, "step": 30121 }, { "epoch": 0.6637469908057755, "grad_norm": 0.5923575162887573, "learning_rate": 8.049686673011485e-06, "loss": 0.064, "step": 30122 }, { "epoch": 0.6637690260952916, "grad_norm": 0.2972698509693146, "learning_rate": 8.048738011137875e-06, "loss": 0.0657, "step": 30123 }, { "epoch": 0.6637910613848078, "grad_norm": 0.8640674948692322, "learning_rate": 8.04778938467077e-06, "loss": 0.0655, "step": 30124 }, { "epoch": 0.663813096674324, "grad_norm": 0.3609670400619507, "learning_rate": 8.046840793615006e-06, "loss": 0.0622, "step": 30125 }, { "epoch": 0.6638351319638401, "grad_norm": 0.5894284844398499, "learning_rate": 8.045892237975406e-06, "loss": 0.0695, "step": 30126 }, { "epoch": 0.6638571672533562, "grad_norm": 0.443890780210495, "learning_rate": 8.044943717756806e-06, "loss": 0.0544, "step": 30127 }, { "epoch": 0.6638792025428724, "grad_norm": 0.5810574293136597, "learning_rate": 8.043995232964038e-06, "loss": 0.0656, "step": 30128 }, { "epoch": 0.6639012378323885, "grad_norm": 0.3652328848838806, "learning_rate": 8.043046783601936e-06, "loss": 0.0668, "step": 30129 }, { "epoch": 0.6639232731219047, "grad_norm": 0.6177383661270142, "learning_rate": 8.042098369675327e-06, "loss": 0.0601, "step": 30130 }, { "epoch": 0.6639453084114209, "grad_norm": 0.7553442120552063, "learning_rate": 8.041149991189033e-06, "loss": 0.0897, "step": 30131 }, { "epoch": 0.663967343700937, "grad_norm": 0.4898662865161896, "learning_rate": 8.040201648147903e-06, "loss": 0.037, "step": 30132 }, { "epoch": 0.6639893789904532, "grad_norm": 0.7935041785240173, "learning_rate": 8.039253340556753e-06, "loss": 0.0739, "step": 30133 }, { "epoch": 0.6640114142799693, "grad_norm": 0.4419742524623871, "learning_rate": 8.038305068420426e-06, "loss": 0.0727, "step": 30134 }, { "epoch": 0.6640334495694855, "grad_norm": 0.5067313313484192, "learning_rate": 8.037356831743731e-06, "loss": 0.0738, "step": 30135 }, { "epoch": 0.6640554848590017, "grad_norm": 0.7054399251937866, "learning_rate": 8.036408630531523e-06, "loss": 0.0756, "step": 30136 }, { "epoch": 0.6640775201485178, "grad_norm": 0.5092825293540955, "learning_rate": 8.035460464788617e-06, "loss": 0.0686, "step": 30137 }, { "epoch": 0.664099555438034, "grad_norm": 0.6113763451576233, "learning_rate": 8.034512334519849e-06, "loss": 0.0752, "step": 30138 }, { "epoch": 0.6641215907275502, "grad_norm": 0.6719275116920471, "learning_rate": 8.033564239730038e-06, "loss": 0.0607, "step": 30139 }, { "epoch": 0.6641436260170663, "grad_norm": 0.23080497980117798, "learning_rate": 8.032616180424022e-06, "loss": 0.0473, "step": 30140 }, { "epoch": 0.6641656613065825, "grad_norm": 0.7184138298034668, "learning_rate": 8.031668156606633e-06, "loss": 0.0808, "step": 30141 }, { "epoch": 0.6641876965960987, "grad_norm": 0.3066205084323883, "learning_rate": 8.030720168282689e-06, "loss": 0.0923, "step": 30142 }, { "epoch": 0.6642097318856148, "grad_norm": 0.48925963044166565, "learning_rate": 8.029772215457024e-06, "loss": 0.0466, "step": 30143 }, { "epoch": 0.664231767175131, "grad_norm": 0.8674362897872925, "learning_rate": 8.028824298134466e-06, "loss": 0.091, "step": 30144 }, { "epoch": 0.6642538024646472, "grad_norm": 0.48842620849609375, "learning_rate": 8.027876416319852e-06, "loss": 0.0787, "step": 30145 }, { "epoch": 0.6642758377541633, "grad_norm": 0.8426886796951294, "learning_rate": 8.026928570017995e-06, "loss": 0.0714, "step": 30146 }, { "epoch": 0.6642978730436795, "grad_norm": 0.5500150918960571, "learning_rate": 8.025980759233731e-06, "loss": 0.0693, "step": 30147 }, { "epoch": 0.6643199083331957, "grad_norm": 0.6321598887443542, "learning_rate": 8.025032983971893e-06, "loss": 0.0532, "step": 30148 }, { "epoch": 0.6643419436227118, "grad_norm": 0.6162724494934082, "learning_rate": 8.024085244237294e-06, "loss": 0.0743, "step": 30149 }, { "epoch": 0.664363978912228, "grad_norm": 0.37855473160743713, "learning_rate": 8.02313754003477e-06, "loss": 0.0621, "step": 30150 }, { "epoch": 0.6643860142017441, "grad_norm": 0.5335651636123657, "learning_rate": 8.02218987136915e-06, "loss": 0.0913, "step": 30151 }, { "epoch": 0.6644080494912602, "grad_norm": 0.8397790789604187, "learning_rate": 8.021242238245264e-06, "loss": 0.0708, "step": 30152 }, { "epoch": 0.6644300847807764, "grad_norm": 0.676030695438385, "learning_rate": 8.020294640667927e-06, "loss": 0.1099, "step": 30153 }, { "epoch": 0.6644521200702925, "grad_norm": 0.39192327857017517, "learning_rate": 8.019347078641973e-06, "loss": 0.0568, "step": 30154 }, { "epoch": 0.6644741553598087, "grad_norm": 0.5319584012031555, "learning_rate": 8.018399552172233e-06, "loss": 0.0567, "step": 30155 }, { "epoch": 0.6644961906493249, "grad_norm": 0.4985470473766327, "learning_rate": 8.017452061263522e-06, "loss": 0.082, "step": 30156 }, { "epoch": 0.664518225938841, "grad_norm": 0.17224080860614777, "learning_rate": 8.016504605920678e-06, "loss": 0.073, "step": 30157 }, { "epoch": 0.6645402612283572, "grad_norm": 0.9226914644241333, "learning_rate": 8.01555718614851e-06, "loss": 0.1234, "step": 30158 }, { "epoch": 0.6645622965178734, "grad_norm": 0.6911149621009827, "learning_rate": 8.014609801951864e-06, "loss": 0.0655, "step": 30159 }, { "epoch": 0.6645843318073895, "grad_norm": 0.41949939727783203, "learning_rate": 8.013662453335552e-06, "loss": 0.063, "step": 30160 }, { "epoch": 0.6646063670969057, "grad_norm": 0.4088610112667084, "learning_rate": 8.012715140304409e-06, "loss": 0.0772, "step": 30161 }, { "epoch": 0.6646284023864218, "grad_norm": 0.638411283493042, "learning_rate": 8.01176786286325e-06, "loss": 0.0668, "step": 30162 }, { "epoch": 0.664650437675938, "grad_norm": 0.5042524337768555, "learning_rate": 8.010820621016904e-06, "loss": 0.0617, "step": 30163 }, { "epoch": 0.6646724729654542, "grad_norm": 0.5019571185112, "learning_rate": 8.0098734147702e-06, "loss": 0.0647, "step": 30164 }, { "epoch": 0.6646945082549703, "grad_norm": 0.5295626521110535, "learning_rate": 8.008926244127957e-06, "loss": 0.0663, "step": 30165 }, { "epoch": 0.6647165435444865, "grad_norm": 0.6082005500793457, "learning_rate": 8.007979109094999e-06, "loss": 0.056, "step": 30166 }, { "epoch": 0.6647385788340027, "grad_norm": 0.5570471882820129, "learning_rate": 8.007032009676154e-06, "loss": 0.065, "step": 30167 }, { "epoch": 0.6647606141235188, "grad_norm": 0.779254674911499, "learning_rate": 8.00608494587625e-06, "loss": 0.0353, "step": 30168 }, { "epoch": 0.664782649413035, "grad_norm": 0.7125229239463806, "learning_rate": 8.005137917700098e-06, "loss": 0.0817, "step": 30169 }, { "epoch": 0.6648046847025512, "grad_norm": 0.5080644488334656, "learning_rate": 8.004190925152532e-06, "loss": 0.0819, "step": 30170 }, { "epoch": 0.6648267199920673, "grad_norm": 0.35247135162353516, "learning_rate": 8.003243968238379e-06, "loss": 0.0607, "step": 30171 }, { "epoch": 0.6648487552815835, "grad_norm": 0.7098241448402405, "learning_rate": 8.002297046962452e-06, "loss": 0.0655, "step": 30172 }, { "epoch": 0.6648707905710997, "grad_norm": 0.3654900789260864, "learning_rate": 8.001350161329577e-06, "loss": 0.0569, "step": 30173 }, { "epoch": 0.6648928258606158, "grad_norm": 0.7289548516273499, "learning_rate": 8.000403311344577e-06, "loss": 0.0732, "step": 30174 }, { "epoch": 0.664914861150132, "grad_norm": 0.6433737277984619, "learning_rate": 7.999456497012282e-06, "loss": 0.0678, "step": 30175 }, { "epoch": 0.6649368964396482, "grad_norm": 0.5764634013175964, "learning_rate": 7.998509718337507e-06, "loss": 0.0583, "step": 30176 }, { "epoch": 0.6649589317291642, "grad_norm": 0.6799163818359375, "learning_rate": 7.997562975325073e-06, "loss": 0.0626, "step": 30177 }, { "epoch": 0.6649809670186804, "grad_norm": 0.5079267024993896, "learning_rate": 7.996616267979814e-06, "loss": 0.0616, "step": 30178 }, { "epoch": 0.6650030023081965, "grad_norm": 0.6921132206916809, "learning_rate": 7.995669596306537e-06, "loss": 0.0525, "step": 30179 }, { "epoch": 0.6650250375977127, "grad_norm": 0.23060880601406097, "learning_rate": 7.994722960310077e-06, "loss": 0.0393, "step": 30180 }, { "epoch": 0.6650470728872289, "grad_norm": 0.5987605452537537, "learning_rate": 7.993776359995237e-06, "loss": 0.0642, "step": 30181 }, { "epoch": 0.665069108176745, "grad_norm": 0.510581910610199, "learning_rate": 7.992829795366863e-06, "loss": 0.0799, "step": 30182 }, { "epoch": 0.6650911434662612, "grad_norm": 0.6610016822814941, "learning_rate": 7.991883266429759e-06, "loss": 0.0908, "step": 30183 }, { "epoch": 0.6651131787557774, "grad_norm": 0.5742360353469849, "learning_rate": 7.990936773188757e-06, "loss": 0.0577, "step": 30184 }, { "epoch": 0.6651352140452935, "grad_norm": 0.4472756087779999, "learning_rate": 7.989990315648668e-06, "loss": 0.0582, "step": 30185 }, { "epoch": 0.6651572493348097, "grad_norm": 0.6728844046592712, "learning_rate": 7.989043893814316e-06, "loss": 0.0867, "step": 30186 }, { "epoch": 0.6651792846243259, "grad_norm": 0.47825396060943604, "learning_rate": 7.988097507690528e-06, "loss": 0.0614, "step": 30187 }, { "epoch": 0.665201319913842, "grad_norm": 0.6560361385345459, "learning_rate": 7.987151157282109e-06, "loss": 0.053, "step": 30188 }, { "epoch": 0.6652233552033582, "grad_norm": 0.4165879189968109, "learning_rate": 7.986204842593903e-06, "loss": 0.0658, "step": 30189 }, { "epoch": 0.6652453904928743, "grad_norm": 0.33111152052879333, "learning_rate": 7.98525856363071e-06, "loss": 0.067, "step": 30190 }, { "epoch": 0.6652674257823905, "grad_norm": 0.34892502427101135, "learning_rate": 7.984312320397362e-06, "loss": 0.0482, "step": 30191 }, { "epoch": 0.6652894610719067, "grad_norm": 0.3667739927768707, "learning_rate": 7.983366112898666e-06, "loss": 0.0608, "step": 30192 }, { "epoch": 0.6653114963614228, "grad_norm": 0.5739907026290894, "learning_rate": 7.98241994113945e-06, "loss": 0.0982, "step": 30193 }, { "epoch": 0.665333531650939, "grad_norm": 0.4958727955818176, "learning_rate": 7.981473805124538e-06, "loss": 0.0854, "step": 30194 }, { "epoch": 0.6653555669404552, "grad_norm": 0.5714600086212158, "learning_rate": 7.98052770485874e-06, "loss": 0.0562, "step": 30195 }, { "epoch": 0.6653776022299713, "grad_norm": 0.5861298441886902, "learning_rate": 7.979581640346876e-06, "loss": 0.071, "step": 30196 }, { "epoch": 0.6653996375194875, "grad_norm": 0.5143241882324219, "learning_rate": 7.978635611593769e-06, "loss": 0.0595, "step": 30197 }, { "epoch": 0.6654216728090037, "grad_norm": 0.7006990909576416, "learning_rate": 7.977689618604241e-06, "loss": 0.0892, "step": 30198 }, { "epoch": 0.6654437080985198, "grad_norm": 0.34577861428260803, "learning_rate": 7.9767436613831e-06, "loss": 0.0405, "step": 30199 }, { "epoch": 0.665465743388036, "grad_norm": 0.7601593732833862, "learning_rate": 7.97579773993517e-06, "loss": 0.0687, "step": 30200 }, { "epoch": 0.665487778677552, "grad_norm": 0.7692229747772217, "learning_rate": 7.974851854265273e-06, "loss": 0.0661, "step": 30201 }, { "epoch": 0.6655098139670682, "grad_norm": 0.7069346904754639, "learning_rate": 7.973906004378218e-06, "loss": 0.0615, "step": 30202 }, { "epoch": 0.6655318492565844, "grad_norm": 0.32715949416160583, "learning_rate": 7.972960190278833e-06, "loss": 0.0387, "step": 30203 }, { "epoch": 0.6655538845461005, "grad_norm": 0.39462724328041077, "learning_rate": 7.972014411971919e-06, "loss": 0.0676, "step": 30204 }, { "epoch": 0.6655759198356167, "grad_norm": 0.7097411751747131, "learning_rate": 7.971068669462315e-06, "loss": 0.0526, "step": 30205 }, { "epoch": 0.6655979551251329, "grad_norm": 0.48949483036994934, "learning_rate": 7.970122962754823e-06, "loss": 0.0465, "step": 30206 }, { "epoch": 0.665619990414649, "grad_norm": 0.6529222726821899, "learning_rate": 7.969177291854269e-06, "loss": 0.0789, "step": 30207 }, { "epoch": 0.6656420257041652, "grad_norm": 0.518174946308136, "learning_rate": 7.968231656765458e-06, "loss": 0.0599, "step": 30208 }, { "epoch": 0.6656640609936814, "grad_norm": 0.4825466275215149, "learning_rate": 7.967286057493218e-06, "loss": 0.0524, "step": 30209 }, { "epoch": 0.6656860962831975, "grad_norm": 0.6659343838691711, "learning_rate": 7.966340494042363e-06, "loss": 0.0781, "step": 30210 }, { "epoch": 0.6657081315727137, "grad_norm": 0.4336722493171692, "learning_rate": 7.9653949664177e-06, "loss": 0.0533, "step": 30211 }, { "epoch": 0.6657301668622299, "grad_norm": 0.5171765685081482, "learning_rate": 7.964449474624062e-06, "loss": 0.0577, "step": 30212 }, { "epoch": 0.665752202151746, "grad_norm": 0.22632384300231934, "learning_rate": 7.963504018666248e-06, "loss": 0.0499, "step": 30213 }, { "epoch": 0.6657742374412622, "grad_norm": 0.3708365559577942, "learning_rate": 7.96255859854909e-06, "loss": 0.065, "step": 30214 }, { "epoch": 0.6657962727307783, "grad_norm": 0.5177217125892639, "learning_rate": 7.96161321427739e-06, "loss": 0.0902, "step": 30215 }, { "epoch": 0.6658183080202945, "grad_norm": 0.4001362919807434, "learning_rate": 7.960667865855965e-06, "loss": 0.0591, "step": 30216 }, { "epoch": 0.6658403433098107, "grad_norm": 0.4842592775821686, "learning_rate": 7.95972255328964e-06, "loss": 0.0717, "step": 30217 }, { "epoch": 0.6658623785993268, "grad_norm": 0.6810946464538574, "learning_rate": 7.958777276583218e-06, "loss": 0.0608, "step": 30218 }, { "epoch": 0.665884413888843, "grad_norm": 0.33698570728302, "learning_rate": 7.957832035741519e-06, "loss": 0.0738, "step": 30219 }, { "epoch": 0.6659064491783592, "grad_norm": 0.7979221343994141, "learning_rate": 7.956886830769357e-06, "loss": 0.0564, "step": 30220 }, { "epoch": 0.6659284844678753, "grad_norm": 0.6152392029762268, "learning_rate": 7.955941661671555e-06, "loss": 0.0622, "step": 30221 }, { "epoch": 0.6659505197573915, "grad_norm": 0.5745826959609985, "learning_rate": 7.954996528452914e-06, "loss": 0.0649, "step": 30222 }, { "epoch": 0.6659725550469077, "grad_norm": 0.6210574507713318, "learning_rate": 7.95405143111825e-06, "loss": 0.077, "step": 30223 }, { "epoch": 0.6659945903364238, "grad_norm": 0.6595615744590759, "learning_rate": 7.953106369672388e-06, "loss": 0.0633, "step": 30224 }, { "epoch": 0.66601662562594, "grad_norm": 0.5880247354507446, "learning_rate": 7.952161344120128e-06, "loss": 0.0587, "step": 30225 }, { "epoch": 0.666038660915456, "grad_norm": 0.8956829309463501, "learning_rate": 7.951216354466297e-06, "loss": 0.0677, "step": 30226 }, { "epoch": 0.6660606962049722, "grad_norm": 0.8440481424331665, "learning_rate": 7.950271400715688e-06, "loss": 0.0911, "step": 30227 }, { "epoch": 0.6660827314944884, "grad_norm": 0.49796798825263977, "learning_rate": 7.94932648287314e-06, "loss": 0.0426, "step": 30228 }, { "epoch": 0.6661047667840045, "grad_norm": 0.48079344630241394, "learning_rate": 7.948381600943445e-06, "loss": 0.0566, "step": 30229 }, { "epoch": 0.6661268020735207, "grad_norm": 0.4618808329105377, "learning_rate": 7.947436754931433e-06, "loss": 0.0464, "step": 30230 }, { "epoch": 0.6661488373630369, "grad_norm": 0.704221785068512, "learning_rate": 7.946491944841898e-06, "loss": 0.0868, "step": 30231 }, { "epoch": 0.666170872652553, "grad_norm": 0.44106897711753845, "learning_rate": 7.945547170679664e-06, "loss": 0.0624, "step": 30232 }, { "epoch": 0.6661929079420692, "grad_norm": 0.43302178382873535, "learning_rate": 7.944602432449546e-06, "loss": 0.1035, "step": 30233 }, { "epoch": 0.6662149432315854, "grad_norm": 0.8236019015312195, "learning_rate": 7.94365773015634e-06, "loss": 0.0663, "step": 30234 }, { "epoch": 0.6662369785211015, "grad_norm": 0.7068830728530884, "learning_rate": 7.942713063804881e-06, "loss": 0.0659, "step": 30235 }, { "epoch": 0.6662590138106177, "grad_norm": 0.6639029383659363, "learning_rate": 7.941768433399962e-06, "loss": 0.0581, "step": 30236 }, { "epoch": 0.6662810491001339, "grad_norm": 0.49675753712654114, "learning_rate": 7.940823838946408e-06, "loss": 0.0349, "step": 30237 }, { "epoch": 0.66630308438965, "grad_norm": 0.5494840145111084, "learning_rate": 7.939879280449013e-06, "loss": 0.0558, "step": 30238 }, { "epoch": 0.6663251196791662, "grad_norm": 0.5872800946235657, "learning_rate": 7.938934757912609e-06, "loss": 0.0845, "step": 30239 }, { "epoch": 0.6663471549686824, "grad_norm": 0.5975741147994995, "learning_rate": 7.937990271341992e-06, "loss": 0.0407, "step": 30240 }, { "epoch": 0.6663691902581985, "grad_norm": 0.7471925020217896, "learning_rate": 7.937045820741983e-06, "loss": 0.0455, "step": 30241 }, { "epoch": 0.6663912255477147, "grad_norm": 1.1700589656829834, "learning_rate": 7.93610140611738e-06, "loss": 0.0942, "step": 30242 }, { "epoch": 0.6664132608372308, "grad_norm": 0.9633393883705139, "learning_rate": 7.935157027473001e-06, "loss": 0.0795, "step": 30243 }, { "epoch": 0.666435296126747, "grad_norm": 0.44056034088134766, "learning_rate": 7.934212684813663e-06, "loss": 0.0478, "step": 30244 }, { "epoch": 0.6664573314162632, "grad_norm": 0.5039975643157959, "learning_rate": 7.933268378144161e-06, "loss": 0.0647, "step": 30245 }, { "epoch": 0.6664793667057793, "grad_norm": 0.4467349052429199, "learning_rate": 7.932324107469314e-06, "loss": 0.0705, "step": 30246 }, { "epoch": 0.6665014019952955, "grad_norm": 0.7151424288749695, "learning_rate": 7.93137987279393e-06, "loss": 0.0694, "step": 30247 }, { "epoch": 0.6665234372848117, "grad_norm": 0.7842806577682495, "learning_rate": 7.930435674122825e-06, "loss": 0.0588, "step": 30248 }, { "epoch": 0.6665454725743278, "grad_norm": 0.4155711233615875, "learning_rate": 7.929491511460794e-06, "loss": 0.0919, "step": 30249 }, { "epoch": 0.666567507863844, "grad_norm": 0.683881402015686, "learning_rate": 7.928547384812657e-06, "loss": 0.052, "step": 30250 }, { "epoch": 0.66658954315336, "grad_norm": 0.8580072522163391, "learning_rate": 7.927603294183223e-06, "loss": 0.0821, "step": 30251 }, { "epoch": 0.6666115784428762, "grad_norm": 0.5339551568031311, "learning_rate": 7.926659239577294e-06, "loss": 0.0427, "step": 30252 }, { "epoch": 0.6666336137323924, "grad_norm": 0.5049219727516174, "learning_rate": 7.925715220999689e-06, "loss": 0.0694, "step": 30253 }, { "epoch": 0.6666556490219085, "grad_norm": 0.5974043011665344, "learning_rate": 7.924771238455197e-06, "loss": 0.0642, "step": 30254 }, { "epoch": 0.6666776843114247, "grad_norm": 0.5311304330825806, "learning_rate": 7.923827291948653e-06, "loss": 0.0599, "step": 30255 }, { "epoch": 0.6666997196009409, "grad_norm": 0.5350398421287537, "learning_rate": 7.922883381484842e-06, "loss": 0.0634, "step": 30256 }, { "epoch": 0.666721754890457, "grad_norm": 0.502596378326416, "learning_rate": 7.921939507068582e-06, "loss": 0.0561, "step": 30257 }, { "epoch": 0.6667437901799732, "grad_norm": 0.7091743350028992, "learning_rate": 7.920995668704687e-06, "loss": 0.0554, "step": 30258 }, { "epoch": 0.6667658254694894, "grad_norm": 0.43047741055488586, "learning_rate": 7.920051866397952e-06, "loss": 0.0595, "step": 30259 }, { "epoch": 0.6667878607590055, "grad_norm": 0.6062369346618652, "learning_rate": 7.919108100153193e-06, "loss": 0.0928, "step": 30260 }, { "epoch": 0.6668098960485217, "grad_norm": 0.3317069411277771, "learning_rate": 7.918164369975204e-06, "loss": 0.0485, "step": 30261 }, { "epoch": 0.6668319313380379, "grad_norm": 1.0417866706848145, "learning_rate": 7.917220675868811e-06, "loss": 0.0869, "step": 30262 }, { "epoch": 0.666853966627554, "grad_norm": 0.383897989988327, "learning_rate": 7.916277017838805e-06, "loss": 0.0491, "step": 30263 }, { "epoch": 0.6668760019170702, "grad_norm": 0.4832267463207245, "learning_rate": 7.915333395890007e-06, "loss": 0.0727, "step": 30264 }, { "epoch": 0.6668980372065864, "grad_norm": 0.6033110022544861, "learning_rate": 7.914389810027208e-06, "loss": 0.0466, "step": 30265 }, { "epoch": 0.6669200724961025, "grad_norm": 0.7468018531799316, "learning_rate": 7.91344626025522e-06, "loss": 0.0583, "step": 30266 }, { "epoch": 0.6669421077856187, "grad_norm": 0.27665501832962036, "learning_rate": 7.91250274657886e-06, "loss": 0.0369, "step": 30267 }, { "epoch": 0.6669641430751349, "grad_norm": 0.4532582461833954, "learning_rate": 7.911559269002914e-06, "loss": 0.0385, "step": 30268 }, { "epoch": 0.666986178364651, "grad_norm": 0.4438585638999939, "learning_rate": 7.910615827532198e-06, "loss": 0.0727, "step": 30269 }, { "epoch": 0.6670082136541672, "grad_norm": 1.088437557220459, "learning_rate": 7.909672422171519e-06, "loss": 0.0662, "step": 30270 }, { "epoch": 0.6670302489436833, "grad_norm": 0.6302713751792908, "learning_rate": 7.908729052925687e-06, "loss": 0.0589, "step": 30271 }, { "epoch": 0.6670522842331995, "grad_norm": 0.8342604041099548, "learning_rate": 7.907785719799493e-06, "loss": 0.0792, "step": 30272 }, { "epoch": 0.6670743195227157, "grad_norm": 0.5402256846427917, "learning_rate": 7.906842422797751e-06, "loss": 0.0743, "step": 30273 }, { "epoch": 0.6670963548122318, "grad_norm": 0.9328230619430542, "learning_rate": 7.905899161925268e-06, "loss": 0.0709, "step": 30274 }, { "epoch": 0.667118390101748, "grad_norm": 0.9060380458831787, "learning_rate": 7.904955937186841e-06, "loss": 0.0634, "step": 30275 }, { "epoch": 0.6671404253912641, "grad_norm": 1.2381150722503662, "learning_rate": 7.904012748587284e-06, "loss": 0.0706, "step": 30276 }, { "epoch": 0.6671624606807802, "grad_norm": 0.45398950576782227, "learning_rate": 7.90306959613138e-06, "loss": 0.0761, "step": 30277 }, { "epoch": 0.6671844959702964, "grad_norm": 0.5052972435951233, "learning_rate": 7.902126479823964e-06, "loss": 0.0428, "step": 30278 }, { "epoch": 0.6672065312598126, "grad_norm": 0.7737687826156616, "learning_rate": 7.901183399669816e-06, "loss": 0.0599, "step": 30279 }, { "epoch": 0.6672285665493287, "grad_norm": 0.6052579879760742, "learning_rate": 7.900240355673748e-06, "loss": 0.0798, "step": 30280 }, { "epoch": 0.6672506018388449, "grad_norm": 0.6528362035751343, "learning_rate": 7.899297347840569e-06, "loss": 0.0804, "step": 30281 }, { "epoch": 0.667272637128361, "grad_norm": 0.518446147441864, "learning_rate": 7.898354376175072e-06, "loss": 0.065, "step": 30282 }, { "epoch": 0.6672946724178772, "grad_norm": 0.6100512146949768, "learning_rate": 7.897411440682067e-06, "loss": 0.0619, "step": 30283 }, { "epoch": 0.6673167077073934, "grad_norm": 1.6932988166809082, "learning_rate": 7.896468541366345e-06, "loss": 0.0623, "step": 30284 }, { "epoch": 0.6673387429969095, "grad_norm": 0.7292336821556091, "learning_rate": 7.89552567823273e-06, "loss": 0.0616, "step": 30285 }, { "epoch": 0.6673607782864257, "grad_norm": 0.8537284135818481, "learning_rate": 7.894582851286004e-06, "loss": 0.0925, "step": 30286 }, { "epoch": 0.6673828135759419, "grad_norm": 0.7804514765739441, "learning_rate": 7.893640060530986e-06, "loss": 0.0905, "step": 30287 }, { "epoch": 0.667404848865458, "grad_norm": 0.5147693157196045, "learning_rate": 7.892697305972465e-06, "loss": 0.0618, "step": 30288 }, { "epoch": 0.6674268841549742, "grad_norm": 0.4584188163280487, "learning_rate": 7.891754587615247e-06, "loss": 0.0507, "step": 30289 }, { "epoch": 0.6674489194444904, "grad_norm": 0.7382861971855164, "learning_rate": 7.890811905464137e-06, "loss": 0.0709, "step": 30290 }, { "epoch": 0.6674709547340065, "grad_norm": 0.4906814396381378, "learning_rate": 7.889869259523931e-06, "loss": 0.0617, "step": 30291 }, { "epoch": 0.6674929900235227, "grad_norm": 0.828931987285614, "learning_rate": 7.888926649799434e-06, "loss": 0.0706, "step": 30292 }, { "epoch": 0.6675150253130389, "grad_norm": 0.5601048469543457, "learning_rate": 7.887984076295447e-06, "loss": 0.0657, "step": 30293 }, { "epoch": 0.667537060602555, "grad_norm": 0.4960385262966156, "learning_rate": 7.887041539016776e-06, "loss": 0.0681, "step": 30294 }, { "epoch": 0.6675590958920712, "grad_norm": 0.8918830156326294, "learning_rate": 7.88609903796821e-06, "loss": 0.0971, "step": 30295 }, { "epoch": 0.6675811311815874, "grad_norm": 0.5793234705924988, "learning_rate": 7.88515657315456e-06, "loss": 0.0561, "step": 30296 }, { "epoch": 0.6676031664711035, "grad_norm": 0.7950706481933594, "learning_rate": 7.884214144580626e-06, "loss": 0.0876, "step": 30297 }, { "epoch": 0.6676252017606197, "grad_norm": 0.44824960827827454, "learning_rate": 7.883271752251199e-06, "loss": 0.0919, "step": 30298 }, { "epoch": 0.6676472370501358, "grad_norm": 0.48330894112586975, "learning_rate": 7.882329396171084e-06, "loss": 0.0501, "step": 30299 }, { "epoch": 0.6676692723396519, "grad_norm": 0.5289071798324585, "learning_rate": 7.881387076345085e-06, "loss": 0.0589, "step": 30300 }, { "epoch": 0.6676913076291681, "grad_norm": 0.5767229795455933, "learning_rate": 7.880444792778004e-06, "loss": 0.0809, "step": 30301 }, { "epoch": 0.6677133429186842, "grad_norm": 0.6394321918487549, "learning_rate": 7.879502545474629e-06, "loss": 0.0747, "step": 30302 }, { "epoch": 0.6677353782082004, "grad_norm": 0.4915946125984192, "learning_rate": 7.878560334439767e-06, "loss": 0.0968, "step": 30303 }, { "epoch": 0.6677574134977166, "grad_norm": 0.774538516998291, "learning_rate": 7.877618159678221e-06, "loss": 0.0823, "step": 30304 }, { "epoch": 0.6677794487872327, "grad_norm": 0.44588324427604675, "learning_rate": 7.876676021194778e-06, "loss": 0.05, "step": 30305 }, { "epoch": 0.6678014840767489, "grad_norm": 0.40826600790023804, "learning_rate": 7.875733918994251e-06, "loss": 0.0782, "step": 30306 }, { "epoch": 0.667823519366265, "grad_norm": 0.731641948223114, "learning_rate": 7.87479185308142e-06, "loss": 0.1115, "step": 30307 }, { "epoch": 0.6678455546557812, "grad_norm": 0.3539169132709503, "learning_rate": 7.873849823461106e-06, "loss": 0.0418, "step": 30308 }, { "epoch": 0.6678675899452974, "grad_norm": 0.37914255261421204, "learning_rate": 7.87290783013809e-06, "loss": 0.0316, "step": 30309 }, { "epoch": 0.6678896252348135, "grad_norm": 0.7547375559806824, "learning_rate": 7.871965873117182e-06, "loss": 0.0713, "step": 30310 }, { "epoch": 0.6679116605243297, "grad_norm": 0.5004739165306091, "learning_rate": 7.871023952403171e-06, "loss": 0.0653, "step": 30311 }, { "epoch": 0.6679336958138459, "grad_norm": 0.419122576713562, "learning_rate": 7.870082068000854e-06, "loss": 0.0679, "step": 30312 }, { "epoch": 0.667955731103362, "grad_norm": 0.49269407987594604, "learning_rate": 7.869140219915042e-06, "loss": 0.0573, "step": 30313 }, { "epoch": 0.6679777663928782, "grad_norm": 1.023725986480713, "learning_rate": 7.868198408150515e-06, "loss": 0.1035, "step": 30314 }, { "epoch": 0.6679998016823944, "grad_norm": 0.5214253067970276, "learning_rate": 7.867256632712078e-06, "loss": 0.0565, "step": 30315 }, { "epoch": 0.6680218369719105, "grad_norm": 0.5663609504699707, "learning_rate": 7.866314893604528e-06, "loss": 0.0608, "step": 30316 }, { "epoch": 0.6680438722614267, "grad_norm": 0.6375688314437866, "learning_rate": 7.865373190832666e-06, "loss": 0.0675, "step": 30317 }, { "epoch": 0.6680659075509429, "grad_norm": 0.5087165832519531, "learning_rate": 7.864431524401281e-06, "loss": 0.0714, "step": 30318 }, { "epoch": 0.668087942840459, "grad_norm": 1.2727563381195068, "learning_rate": 7.863489894315172e-06, "loss": 0.1094, "step": 30319 }, { "epoch": 0.6681099781299752, "grad_norm": 0.5561681389808655, "learning_rate": 7.86254830057914e-06, "loss": 0.0492, "step": 30320 }, { "epoch": 0.6681320134194914, "grad_norm": 0.5690568685531616, "learning_rate": 7.861606743197971e-06, "loss": 0.0582, "step": 30321 }, { "epoch": 0.6681540487090075, "grad_norm": 0.5684069991111755, "learning_rate": 7.86066522217647e-06, "loss": 0.0602, "step": 30322 }, { "epoch": 0.6681760839985237, "grad_norm": 0.803907573223114, "learning_rate": 7.859723737519426e-06, "loss": 0.0723, "step": 30323 }, { "epoch": 0.6681981192880398, "grad_norm": 0.30109068751335144, "learning_rate": 7.858782289231644e-06, "loss": 0.0695, "step": 30324 }, { "epoch": 0.6682201545775559, "grad_norm": 0.3785274028778076, "learning_rate": 7.85784087731791e-06, "loss": 0.0521, "step": 30325 }, { "epoch": 0.6682421898670721, "grad_norm": 0.6158485412597656, "learning_rate": 7.85689950178302e-06, "loss": 0.0465, "step": 30326 }, { "epoch": 0.6682642251565882, "grad_norm": 0.6515088677406311, "learning_rate": 7.855958162631778e-06, "loss": 0.0512, "step": 30327 }, { "epoch": 0.6682862604461044, "grad_norm": 0.5577932000160217, "learning_rate": 7.855016859868965e-06, "loss": 0.0877, "step": 30328 }, { "epoch": 0.6683082957356206, "grad_norm": 0.7558075785636902, "learning_rate": 7.854075593499391e-06, "loss": 0.0749, "step": 30329 }, { "epoch": 0.6683303310251367, "grad_norm": 0.41825807094573975, "learning_rate": 7.853134363527829e-06, "loss": 0.0607, "step": 30330 }, { "epoch": 0.6683523663146529, "grad_norm": 0.5226463079452515, "learning_rate": 7.852193169959097e-06, "loss": 0.0562, "step": 30331 }, { "epoch": 0.668374401604169, "grad_norm": 0.6976802349090576, "learning_rate": 7.851252012797972e-06, "loss": 0.0466, "step": 30332 }, { "epoch": 0.6683964368936852, "grad_norm": 0.47070327401161194, "learning_rate": 7.850310892049263e-06, "loss": 0.0421, "step": 30333 }, { "epoch": 0.6684184721832014, "grad_norm": 0.7517290711402893, "learning_rate": 7.849369807717745e-06, "loss": 0.0468, "step": 30334 }, { "epoch": 0.6684405074727175, "grad_norm": 0.6270301938056946, "learning_rate": 7.848428759808223e-06, "loss": 0.0629, "step": 30335 }, { "epoch": 0.6684625427622337, "grad_norm": 0.6826831698417664, "learning_rate": 7.847487748325494e-06, "loss": 0.077, "step": 30336 }, { "epoch": 0.6684845780517499, "grad_norm": 1.1230882406234741, "learning_rate": 7.846546773274339e-06, "loss": 0.0625, "step": 30337 }, { "epoch": 0.668506613341266, "grad_norm": 0.8001531958580017, "learning_rate": 7.845605834659556e-06, "loss": 0.0448, "step": 30338 }, { "epoch": 0.6685286486307822, "grad_norm": 0.6513940095901489, "learning_rate": 7.84466493248594e-06, "loss": 0.0583, "step": 30339 }, { "epoch": 0.6685506839202984, "grad_norm": 0.6145708560943604, "learning_rate": 7.84372406675829e-06, "loss": 0.0329, "step": 30340 }, { "epoch": 0.6685727192098145, "grad_norm": 0.9172884821891785, "learning_rate": 7.842783237481385e-06, "loss": 0.0697, "step": 30341 }, { "epoch": 0.6685947544993307, "grad_norm": 0.8692417740821838, "learning_rate": 7.841842444660022e-06, "loss": 0.0987, "step": 30342 }, { "epoch": 0.6686167897888469, "grad_norm": 0.7355563044548035, "learning_rate": 7.840901688299e-06, "loss": 0.0542, "step": 30343 }, { "epoch": 0.668638825078363, "grad_norm": 0.6909927129745483, "learning_rate": 7.839960968403097e-06, "loss": 0.0452, "step": 30344 }, { "epoch": 0.6686608603678792, "grad_norm": 0.535193920135498, "learning_rate": 7.839020284977114e-06, "loss": 0.0583, "step": 30345 }, { "epoch": 0.6686828956573954, "grad_norm": 0.7486248016357422, "learning_rate": 7.838079638025841e-06, "loss": 0.055, "step": 30346 }, { "epoch": 0.6687049309469115, "grad_norm": 0.4988804757595062, "learning_rate": 7.837139027554071e-06, "loss": 0.0615, "step": 30347 }, { "epoch": 0.6687269662364277, "grad_norm": 0.5397612452507019, "learning_rate": 7.836198453566591e-06, "loss": 0.0776, "step": 30348 }, { "epoch": 0.6687490015259439, "grad_norm": 0.7766932249069214, "learning_rate": 7.835257916068194e-06, "loss": 0.0664, "step": 30349 }, { "epoch": 0.6687710368154599, "grad_norm": 0.4851859211921692, "learning_rate": 7.83431741506367e-06, "loss": 0.0581, "step": 30350 }, { "epoch": 0.6687930721049761, "grad_norm": 0.7150999903678894, "learning_rate": 7.833376950557815e-06, "loss": 0.0663, "step": 30351 }, { "epoch": 0.6688151073944922, "grad_norm": 0.32529422640800476, "learning_rate": 7.83243652255541e-06, "loss": 0.0394, "step": 30352 }, { "epoch": 0.6688371426840084, "grad_norm": 0.6640315651893616, "learning_rate": 7.831496131061247e-06, "loss": 0.0486, "step": 30353 }, { "epoch": 0.6688591779735246, "grad_norm": 0.6834275126457214, "learning_rate": 7.830555776080126e-06, "loss": 0.0642, "step": 30354 }, { "epoch": 0.6688812132630407, "grad_norm": 0.6417447328567505, "learning_rate": 7.82961545761682e-06, "loss": 0.0386, "step": 30355 }, { "epoch": 0.6689032485525569, "grad_norm": 0.7060016393661499, "learning_rate": 7.828675175676138e-06, "loss": 0.0601, "step": 30356 }, { "epoch": 0.6689252838420731, "grad_norm": 0.5691671371459961, "learning_rate": 7.827734930262844e-06, "loss": 0.0476, "step": 30357 }, { "epoch": 0.6689473191315892, "grad_norm": 0.6150729656219482, "learning_rate": 7.826794721381753e-06, "loss": 0.0682, "step": 30358 }, { "epoch": 0.6689693544211054, "grad_norm": 0.5121498703956604, "learning_rate": 7.82585454903764e-06, "loss": 0.0667, "step": 30359 }, { "epoch": 0.6689913897106216, "grad_norm": 0.9601742625236511, "learning_rate": 7.824914413235303e-06, "loss": 0.0679, "step": 30360 }, { "epoch": 0.6690134250001377, "grad_norm": 0.6365983486175537, "learning_rate": 7.823974313979517e-06, "loss": 0.0578, "step": 30361 }, { "epoch": 0.6690354602896539, "grad_norm": 0.3935086727142334, "learning_rate": 7.823034251275077e-06, "loss": 0.0901, "step": 30362 }, { "epoch": 0.66905749557917, "grad_norm": 0.46130722761154175, "learning_rate": 7.82209422512678e-06, "loss": 0.0596, "step": 30363 }, { "epoch": 0.6690795308686862, "grad_norm": 0.7355388402938843, "learning_rate": 7.821154235539398e-06, "loss": 0.0748, "step": 30364 }, { "epoch": 0.6691015661582024, "grad_norm": 0.6525198817253113, "learning_rate": 7.82021428251773e-06, "loss": 0.0567, "step": 30365 }, { "epoch": 0.6691236014477185, "grad_norm": 0.4621434211730957, "learning_rate": 7.81927436606656e-06, "loss": 0.0534, "step": 30366 }, { "epoch": 0.6691456367372347, "grad_norm": 0.5739046335220337, "learning_rate": 7.81833448619068e-06, "loss": 0.0594, "step": 30367 }, { "epoch": 0.6691676720267509, "grad_norm": 0.4744110107421875, "learning_rate": 7.81739464289487e-06, "loss": 0.077, "step": 30368 }, { "epoch": 0.669189707316267, "grad_norm": 0.33983564376831055, "learning_rate": 7.81645483618392e-06, "loss": 0.0533, "step": 30369 }, { "epoch": 0.6692117426057832, "grad_norm": 0.7321736812591553, "learning_rate": 7.815515066062623e-06, "loss": 0.0791, "step": 30370 }, { "epoch": 0.6692337778952994, "grad_norm": 0.5600382685661316, "learning_rate": 7.814575332535755e-06, "loss": 0.0538, "step": 30371 }, { "epoch": 0.6692558131848155, "grad_norm": 0.7018420100212097, "learning_rate": 7.813635635608108e-06, "loss": 0.07, "step": 30372 }, { "epoch": 0.6692778484743317, "grad_norm": 0.42469415068626404, "learning_rate": 7.81269597528447e-06, "loss": 0.0408, "step": 30373 }, { "epoch": 0.6692998837638477, "grad_norm": 0.6731351613998413, "learning_rate": 7.811756351569627e-06, "loss": 0.0873, "step": 30374 }, { "epoch": 0.6693219190533639, "grad_norm": 0.5914905071258545, "learning_rate": 7.810816764468362e-06, "loss": 0.0607, "step": 30375 }, { "epoch": 0.6693439543428801, "grad_norm": 0.4094356894493103, "learning_rate": 7.80987721398546e-06, "loss": 0.0555, "step": 30376 }, { "epoch": 0.6693659896323962, "grad_norm": 0.6073101758956909, "learning_rate": 7.808937700125716e-06, "loss": 0.0888, "step": 30377 }, { "epoch": 0.6693880249219124, "grad_norm": 0.6140521764755249, "learning_rate": 7.807998222893901e-06, "loss": 0.0806, "step": 30378 }, { "epoch": 0.6694100602114286, "grad_norm": 0.6124979257583618, "learning_rate": 7.807058782294814e-06, "loss": 0.0736, "step": 30379 }, { "epoch": 0.6694320955009447, "grad_norm": 0.5153200030326843, "learning_rate": 7.806119378333226e-06, "loss": 0.0498, "step": 30380 }, { "epoch": 0.6694541307904609, "grad_norm": 0.7530480623245239, "learning_rate": 7.805180011013937e-06, "loss": 0.0762, "step": 30381 }, { "epoch": 0.6694761660799771, "grad_norm": 0.7053812742233276, "learning_rate": 7.80424068034172e-06, "loss": 0.0782, "step": 30382 }, { "epoch": 0.6694982013694932, "grad_norm": 0.5509883165359497, "learning_rate": 7.803301386321368e-06, "loss": 0.0571, "step": 30383 }, { "epoch": 0.6695202366590094, "grad_norm": 0.6876006126403809, "learning_rate": 7.802362128957657e-06, "loss": 0.0897, "step": 30384 }, { "epoch": 0.6695422719485256, "grad_norm": 0.576981246471405, "learning_rate": 7.801422908255375e-06, "loss": 0.066, "step": 30385 }, { "epoch": 0.6695643072380417, "grad_norm": 0.4453639090061188, "learning_rate": 7.800483724219314e-06, "loss": 0.0725, "step": 30386 }, { "epoch": 0.6695863425275579, "grad_norm": 0.5816269516944885, "learning_rate": 7.799544576854242e-06, "loss": 0.0488, "step": 30387 }, { "epoch": 0.669608377817074, "grad_norm": 0.47382932901382446, "learning_rate": 7.79860546616495e-06, "loss": 0.0437, "step": 30388 }, { "epoch": 0.6696304131065902, "grad_norm": 0.643779993057251, "learning_rate": 7.797666392156224e-06, "loss": 0.0586, "step": 30389 }, { "epoch": 0.6696524483961064, "grad_norm": 0.4519062638282776, "learning_rate": 7.79672735483285e-06, "loss": 0.0703, "step": 30390 }, { "epoch": 0.6696744836856225, "grad_norm": 0.8212293386459351, "learning_rate": 7.7957883541996e-06, "loss": 0.0698, "step": 30391 }, { "epoch": 0.6696965189751387, "grad_norm": 0.690229058265686, "learning_rate": 7.794849390261263e-06, "loss": 0.0668, "step": 30392 }, { "epoch": 0.6697185542646549, "grad_norm": 0.9076440930366516, "learning_rate": 7.79391046302263e-06, "loss": 0.0796, "step": 30393 }, { "epoch": 0.669740589554171, "grad_norm": 0.623411238193512, "learning_rate": 7.792971572488468e-06, "loss": 0.0706, "step": 30394 }, { "epoch": 0.6697626248436872, "grad_norm": 0.4387105405330658, "learning_rate": 7.792032718663566e-06, "loss": 0.0626, "step": 30395 }, { "epoch": 0.6697846601332034, "grad_norm": 0.25085246562957764, "learning_rate": 7.791093901552709e-06, "loss": 0.0513, "step": 30396 }, { "epoch": 0.6698066954227195, "grad_norm": 0.5887228846549988, "learning_rate": 7.790155121160678e-06, "loss": 0.0571, "step": 30397 }, { "epoch": 0.6698287307122357, "grad_norm": 0.5142008662223816, "learning_rate": 7.789216377492251e-06, "loss": 0.0661, "step": 30398 }, { "epoch": 0.6698507660017518, "grad_norm": 0.49084532260894775, "learning_rate": 7.78827767055221e-06, "loss": 0.057, "step": 30399 }, { "epoch": 0.6698728012912679, "grad_norm": 0.48541489243507385, "learning_rate": 7.787339000345343e-06, "loss": 0.0633, "step": 30400 }, { "epoch": 0.6698948365807841, "grad_norm": 0.4190142750740051, "learning_rate": 7.786400366876425e-06, "loss": 0.0677, "step": 30401 }, { "epoch": 0.6699168718703002, "grad_norm": 0.7713552117347717, "learning_rate": 7.78546177015024e-06, "loss": 0.0832, "step": 30402 }, { "epoch": 0.6699389071598164, "grad_norm": 0.5955221652984619, "learning_rate": 7.784523210171554e-06, "loss": 0.0828, "step": 30403 }, { "epoch": 0.6699609424493326, "grad_norm": 0.6819889545440674, "learning_rate": 7.783584686945176e-06, "loss": 0.0516, "step": 30404 }, { "epoch": 0.6699829777388487, "grad_norm": 0.39695051312446594, "learning_rate": 7.782646200475861e-06, "loss": 0.0608, "step": 30405 }, { "epoch": 0.6700050130283649, "grad_norm": 0.5230328440666199, "learning_rate": 7.781707750768408e-06, "loss": 0.0489, "step": 30406 }, { "epoch": 0.6700270483178811, "grad_norm": 0.9995536208152771, "learning_rate": 7.78076933782758e-06, "loss": 0.0935, "step": 30407 }, { "epoch": 0.6700490836073972, "grad_norm": 0.5161808729171753, "learning_rate": 7.779830961658169e-06, "loss": 0.0504, "step": 30408 }, { "epoch": 0.6700711188969134, "grad_norm": 0.39746400713920593, "learning_rate": 7.778892622264953e-06, "loss": 0.0395, "step": 30409 }, { "epoch": 0.6700931541864296, "grad_norm": 0.7763957381248474, "learning_rate": 7.777954319652704e-06, "loss": 0.0917, "step": 30410 }, { "epoch": 0.6701151894759457, "grad_norm": 0.24555827677249908, "learning_rate": 7.777016053826209e-06, "loss": 0.0551, "step": 30411 }, { "epoch": 0.6701372247654619, "grad_norm": 0.6975486874580383, "learning_rate": 7.77607782479024e-06, "loss": 0.068, "step": 30412 }, { "epoch": 0.6701592600549781, "grad_norm": 0.46582090854644775, "learning_rate": 7.77513963254959e-06, "loss": 0.0472, "step": 30413 }, { "epoch": 0.6701812953444942, "grad_norm": 0.6806658506393433, "learning_rate": 7.77420147710902e-06, "loss": 0.0935, "step": 30414 }, { "epoch": 0.6702033306340104, "grad_norm": 0.38982564210891724, "learning_rate": 7.773263358473318e-06, "loss": 0.0613, "step": 30415 }, { "epoch": 0.6702253659235266, "grad_norm": 0.5754742622375488, "learning_rate": 7.772325276647266e-06, "loss": 0.0491, "step": 30416 }, { "epoch": 0.6702474012130427, "grad_norm": 0.46169111132621765, "learning_rate": 7.771387231635631e-06, "loss": 0.0704, "step": 30417 }, { "epoch": 0.6702694365025589, "grad_norm": 0.5499849915504456, "learning_rate": 7.770449223443199e-06, "loss": 0.0359, "step": 30418 }, { "epoch": 0.670291471792075, "grad_norm": 0.8450044989585876, "learning_rate": 7.769511252074744e-06, "loss": 0.0949, "step": 30419 }, { "epoch": 0.6703135070815912, "grad_norm": 0.5638020634651184, "learning_rate": 7.768573317535052e-06, "loss": 0.0627, "step": 30420 }, { "epoch": 0.6703355423711074, "grad_norm": 0.4715692400932312, "learning_rate": 7.767635419828888e-06, "loss": 0.0458, "step": 30421 }, { "epoch": 0.6703575776606235, "grad_norm": 0.3849133849143982, "learning_rate": 7.766697558961035e-06, "loss": 0.0803, "step": 30422 }, { "epoch": 0.6703796129501397, "grad_norm": 0.5768628716468811, "learning_rate": 7.765759734936276e-06, "loss": 0.0695, "step": 30423 }, { "epoch": 0.6704016482396558, "grad_norm": 0.6458317637443542, "learning_rate": 7.764821947759376e-06, "loss": 0.0588, "step": 30424 }, { "epoch": 0.6704236835291719, "grad_norm": 0.5678613185882568, "learning_rate": 7.763884197435123e-06, "loss": 0.0939, "step": 30425 }, { "epoch": 0.6704457188186881, "grad_norm": 0.8537544012069702, "learning_rate": 7.762946483968279e-06, "loss": 0.0851, "step": 30426 }, { "epoch": 0.6704677541082043, "grad_norm": 0.47503674030303955, "learning_rate": 7.76200880736364e-06, "loss": 0.0546, "step": 30427 }, { "epoch": 0.6704897893977204, "grad_norm": 0.36247313022613525, "learning_rate": 7.761071167625964e-06, "loss": 0.0399, "step": 30428 }, { "epoch": 0.6705118246872366, "grad_norm": 0.5342184901237488, "learning_rate": 7.76013356476004e-06, "loss": 0.0527, "step": 30429 }, { "epoch": 0.6705338599767527, "grad_norm": 0.6696807146072388, "learning_rate": 7.759195998770636e-06, "loss": 0.085, "step": 30430 }, { "epoch": 0.6705558952662689, "grad_norm": 0.671588122844696, "learning_rate": 7.758258469662525e-06, "loss": 0.0784, "step": 30431 }, { "epoch": 0.6705779305557851, "grad_norm": 0.7317181825637817, "learning_rate": 7.757320977440496e-06, "loss": 0.073, "step": 30432 }, { "epoch": 0.6705999658453012, "grad_norm": 1.0152466297149658, "learning_rate": 7.7563835221093e-06, "loss": 0.0857, "step": 30433 }, { "epoch": 0.6706220011348174, "grad_norm": 0.7247437834739685, "learning_rate": 7.755446103673742e-06, "loss": 0.0867, "step": 30434 }, { "epoch": 0.6706440364243336, "grad_norm": 0.4255792498588562, "learning_rate": 7.754508722138573e-06, "loss": 0.051, "step": 30435 }, { "epoch": 0.6706660717138497, "grad_norm": 0.5316193103790283, "learning_rate": 7.753571377508585e-06, "loss": 0.0741, "step": 30436 }, { "epoch": 0.6706881070033659, "grad_norm": 0.22242267429828644, "learning_rate": 7.752634069788536e-06, "loss": 0.0653, "step": 30437 }, { "epoch": 0.6707101422928821, "grad_norm": 0.5659898519515991, "learning_rate": 7.75169679898321e-06, "loss": 0.0373, "step": 30438 }, { "epoch": 0.6707321775823982, "grad_norm": 0.3659557104110718, "learning_rate": 7.750759565097385e-06, "loss": 0.0501, "step": 30439 }, { "epoch": 0.6707542128719144, "grad_norm": 0.7246183753013611, "learning_rate": 7.749822368135821e-06, "loss": 0.0752, "step": 30440 }, { "epoch": 0.6707762481614306, "grad_norm": 0.4637109935283661, "learning_rate": 7.748885208103302e-06, "loss": 0.0585, "step": 30441 }, { "epoch": 0.6707982834509467, "grad_norm": 0.6209097504615784, "learning_rate": 7.747948085004597e-06, "loss": 0.0624, "step": 30442 }, { "epoch": 0.6708203187404629, "grad_norm": 0.5400435328483582, "learning_rate": 7.747010998844488e-06, "loss": 0.0492, "step": 30443 }, { "epoch": 0.670842354029979, "grad_norm": 0.6539201140403748, "learning_rate": 7.746073949627734e-06, "loss": 0.0477, "step": 30444 }, { "epoch": 0.6708643893194952, "grad_norm": 0.5277726650238037, "learning_rate": 7.745136937359118e-06, "loss": 0.0746, "step": 30445 }, { "epoch": 0.6708864246090114, "grad_norm": 0.801616370677948, "learning_rate": 7.744199962043413e-06, "loss": 0.0822, "step": 30446 }, { "epoch": 0.6709084598985275, "grad_norm": 0.27989327907562256, "learning_rate": 7.743263023685385e-06, "loss": 0.036, "step": 30447 }, { "epoch": 0.6709304951880437, "grad_norm": 0.8153789043426514, "learning_rate": 7.742326122289814e-06, "loss": 0.0741, "step": 30448 }, { "epoch": 0.6709525304775598, "grad_norm": 0.5409011840820312, "learning_rate": 7.741389257861456e-06, "loss": 0.0749, "step": 30449 }, { "epoch": 0.6709745657670759, "grad_norm": 1.1074100732803345, "learning_rate": 7.740452430405106e-06, "loss": 0.0566, "step": 30450 }, { "epoch": 0.6709966010565921, "grad_norm": 0.5315245985984802, "learning_rate": 7.739515639925521e-06, "loss": 0.0341, "step": 30451 }, { "epoch": 0.6710186363461083, "grad_norm": 0.4880834221839905, "learning_rate": 7.738578886427481e-06, "loss": 0.0536, "step": 30452 }, { "epoch": 0.6710406716356244, "grad_norm": 0.5636239051818848, "learning_rate": 7.737642169915741e-06, "loss": 0.0601, "step": 30453 }, { "epoch": 0.6710627069251406, "grad_norm": 0.6667870879173279, "learning_rate": 7.736705490395097e-06, "loss": 0.0606, "step": 30454 }, { "epoch": 0.6710847422146567, "grad_norm": 0.6053143739700317, "learning_rate": 7.735768847870304e-06, "loss": 0.0723, "step": 30455 }, { "epoch": 0.6711067775041729, "grad_norm": 0.856726348400116, "learning_rate": 7.734832242346126e-06, "loss": 0.0573, "step": 30456 }, { "epoch": 0.6711288127936891, "grad_norm": 0.8594453930854797, "learning_rate": 7.733895673827354e-06, "loss": 0.1044, "step": 30457 }, { "epoch": 0.6711508480832052, "grad_norm": 0.4947969913482666, "learning_rate": 7.732959142318743e-06, "loss": 0.0493, "step": 30458 }, { "epoch": 0.6711728833727214, "grad_norm": 0.627606987953186, "learning_rate": 7.732022647825074e-06, "loss": 0.088, "step": 30459 }, { "epoch": 0.6711949186622376, "grad_norm": 0.604461669921875, "learning_rate": 7.731086190351098e-06, "loss": 0.0532, "step": 30460 }, { "epoch": 0.6712169539517537, "grad_norm": 0.5163666605949402, "learning_rate": 7.730149769901608e-06, "loss": 0.0442, "step": 30461 }, { "epoch": 0.6712389892412699, "grad_norm": 0.9025904536247253, "learning_rate": 7.72921338648136e-06, "loss": 0.1029, "step": 30462 }, { "epoch": 0.6712610245307861, "grad_norm": 0.4805316925048828, "learning_rate": 7.728277040095134e-06, "loss": 0.0574, "step": 30463 }, { "epoch": 0.6712830598203022, "grad_norm": 0.4320527911186218, "learning_rate": 7.727340730747684e-06, "loss": 0.0885, "step": 30464 }, { "epoch": 0.6713050951098184, "grad_norm": 0.6364196538925171, "learning_rate": 7.72640445844379e-06, "loss": 0.0654, "step": 30465 }, { "epoch": 0.6713271303993346, "grad_norm": 0.6487234234809875, "learning_rate": 7.725468223188223e-06, "loss": 0.0851, "step": 30466 }, { "epoch": 0.6713491656888507, "grad_norm": 0.41677969694137573, "learning_rate": 7.724532024985742e-06, "loss": 0.0385, "step": 30467 }, { "epoch": 0.6713712009783669, "grad_norm": 0.7062296271324158, "learning_rate": 7.723595863841121e-06, "loss": 0.0887, "step": 30468 }, { "epoch": 0.671393236267883, "grad_norm": 0.5121783018112183, "learning_rate": 7.722659739759126e-06, "loss": 0.0506, "step": 30469 }, { "epoch": 0.6714152715573992, "grad_norm": 0.8718351721763611, "learning_rate": 7.721723652744537e-06, "loss": 0.0722, "step": 30470 }, { "epoch": 0.6714373068469154, "grad_norm": 0.6634183526039124, "learning_rate": 7.720787602802103e-06, "loss": 0.0626, "step": 30471 }, { "epoch": 0.6714593421364315, "grad_norm": 0.5538361072540283, "learning_rate": 7.719851589936602e-06, "loss": 0.0286, "step": 30472 }, { "epoch": 0.6714813774259476, "grad_norm": 0.742091715335846, "learning_rate": 7.718915614152808e-06, "loss": 0.0932, "step": 30473 }, { "epoch": 0.6715034127154638, "grad_norm": 0.6357536315917969, "learning_rate": 7.717979675455476e-06, "loss": 0.0727, "step": 30474 }, { "epoch": 0.6715254480049799, "grad_norm": 0.7118062376976013, "learning_rate": 7.717043773849382e-06, "loss": 0.0837, "step": 30475 }, { "epoch": 0.6715474832944961, "grad_norm": 0.47443467378616333, "learning_rate": 7.716107909339278e-06, "loss": 0.0676, "step": 30476 }, { "epoch": 0.6715695185840123, "grad_norm": 0.8058796525001526, "learning_rate": 7.715172081929955e-06, "loss": 0.0732, "step": 30477 }, { "epoch": 0.6715915538735284, "grad_norm": 0.5211679935455322, "learning_rate": 7.714236291626161e-06, "loss": 0.0351, "step": 30478 }, { "epoch": 0.6716135891630446, "grad_norm": 0.9507594704627991, "learning_rate": 7.713300538432667e-06, "loss": 0.0724, "step": 30479 }, { "epoch": 0.6716356244525608, "grad_norm": 0.44331878423690796, "learning_rate": 7.71236482235425e-06, "loss": 0.0616, "step": 30480 }, { "epoch": 0.6716576597420769, "grad_norm": 0.8939385414123535, "learning_rate": 7.711429143395657e-06, "loss": 0.072, "step": 30481 }, { "epoch": 0.6716796950315931, "grad_norm": 0.5712407231330872, "learning_rate": 7.710493501561674e-06, "loss": 0.0823, "step": 30482 }, { "epoch": 0.6717017303211092, "grad_norm": 0.49752277135849, "learning_rate": 7.70955789685704e-06, "loss": 0.0694, "step": 30483 }, { "epoch": 0.6717237656106254, "grad_norm": 0.7944138646125793, "learning_rate": 7.708622329286553e-06, "loss": 0.0693, "step": 30484 }, { "epoch": 0.6717458009001416, "grad_norm": 0.8713082075119019, "learning_rate": 7.707686798854954e-06, "loss": 0.124, "step": 30485 }, { "epoch": 0.6717678361896577, "grad_norm": 0.3146873712539673, "learning_rate": 7.706751305567024e-06, "loss": 0.0614, "step": 30486 }, { "epoch": 0.6717898714791739, "grad_norm": 0.44623255729675293, "learning_rate": 7.705815849427514e-06, "loss": 0.0577, "step": 30487 }, { "epoch": 0.6718119067686901, "grad_norm": 0.9850293397903442, "learning_rate": 7.704880430441195e-06, "loss": 0.1076, "step": 30488 }, { "epoch": 0.6718339420582062, "grad_norm": 0.4521414041519165, "learning_rate": 7.703945048612838e-06, "loss": 0.0473, "step": 30489 }, { "epoch": 0.6718559773477224, "grad_norm": 0.4979498088359833, "learning_rate": 7.703009703947197e-06, "loss": 0.0311, "step": 30490 }, { "epoch": 0.6718780126372386, "grad_norm": 0.6293535232543945, "learning_rate": 7.702074396449037e-06, "loss": 0.0866, "step": 30491 }, { "epoch": 0.6719000479267547, "grad_norm": 0.6320685744285583, "learning_rate": 7.701139126123128e-06, "loss": 0.0796, "step": 30492 }, { "epoch": 0.6719220832162709, "grad_norm": 0.7242635488510132, "learning_rate": 7.700203892974237e-06, "loss": 0.0838, "step": 30493 }, { "epoch": 0.6719441185057871, "grad_norm": 0.5233807563781738, "learning_rate": 7.699268697007115e-06, "loss": 0.05, "step": 30494 }, { "epoch": 0.6719661537953032, "grad_norm": 0.6371440887451172, "learning_rate": 7.698333538226536e-06, "loss": 0.0778, "step": 30495 }, { "epoch": 0.6719881890848194, "grad_norm": 0.7794768810272217, "learning_rate": 7.697398416637263e-06, "loss": 0.101, "step": 30496 }, { "epoch": 0.6720102243743356, "grad_norm": 0.7142640948295593, "learning_rate": 7.69646333224405e-06, "loss": 0.0596, "step": 30497 }, { "epoch": 0.6720322596638516, "grad_norm": 0.5620648264884949, "learning_rate": 7.695528285051672e-06, "loss": 0.0513, "step": 30498 }, { "epoch": 0.6720542949533678, "grad_norm": 0.4745623469352722, "learning_rate": 7.694593275064875e-06, "loss": 0.0488, "step": 30499 }, { "epoch": 0.6720763302428839, "grad_norm": 0.7382572293281555, "learning_rate": 7.69365830228844e-06, "loss": 0.0652, "step": 30500 }, { "epoch": 0.6720983655324001, "grad_norm": 0.5734326243400574, "learning_rate": 7.692723366727117e-06, "loss": 0.0849, "step": 30501 }, { "epoch": 0.6721204008219163, "grad_norm": 0.40858757495880127, "learning_rate": 7.691788468385674e-06, "loss": 0.0393, "step": 30502 }, { "epoch": 0.6721424361114324, "grad_norm": 0.6564210653305054, "learning_rate": 7.690853607268875e-06, "loss": 0.1101, "step": 30503 }, { "epoch": 0.6721644714009486, "grad_norm": 0.7448659539222717, "learning_rate": 7.689918783381474e-06, "loss": 0.1028, "step": 30504 }, { "epoch": 0.6721865066904648, "grad_norm": 0.809820830821991, "learning_rate": 7.688983996728242e-06, "loss": 0.0987, "step": 30505 }, { "epoch": 0.6722085419799809, "grad_norm": 0.5220997333526611, "learning_rate": 7.688049247313923e-06, "loss": 0.0548, "step": 30506 }, { "epoch": 0.6722305772694971, "grad_norm": 0.5542840361595154, "learning_rate": 7.687114535143301e-06, "loss": 0.071, "step": 30507 }, { "epoch": 0.6722526125590133, "grad_norm": 0.7330867052078247, "learning_rate": 7.686179860221122e-06, "loss": 0.0604, "step": 30508 }, { "epoch": 0.6722746478485294, "grad_norm": 0.7087156772613525, "learning_rate": 7.685245222552154e-06, "loss": 0.0742, "step": 30509 }, { "epoch": 0.6722966831380456, "grad_norm": 0.8851640224456787, "learning_rate": 7.684310622141152e-06, "loss": 0.0557, "step": 30510 }, { "epoch": 0.6723187184275617, "grad_norm": 0.609280526638031, "learning_rate": 7.683376058992878e-06, "loss": 0.073, "step": 30511 }, { "epoch": 0.6723407537170779, "grad_norm": 0.7641345858573914, "learning_rate": 7.682441533112098e-06, "loss": 0.0739, "step": 30512 }, { "epoch": 0.6723627890065941, "grad_norm": 0.4383664131164551, "learning_rate": 7.681507044503562e-06, "loss": 0.0608, "step": 30513 }, { "epoch": 0.6723848242961102, "grad_norm": 0.6636109948158264, "learning_rate": 7.680572593172036e-06, "loss": 0.0731, "step": 30514 }, { "epoch": 0.6724068595856264, "grad_norm": 0.4665834903717041, "learning_rate": 7.679638179122278e-06, "loss": 0.0408, "step": 30515 }, { "epoch": 0.6724288948751426, "grad_norm": 0.7022764682769775, "learning_rate": 7.678703802359052e-06, "loss": 0.0895, "step": 30516 }, { "epoch": 0.6724509301646587, "grad_norm": 0.6967598795890808, "learning_rate": 7.67776946288711e-06, "loss": 0.0698, "step": 30517 }, { "epoch": 0.6724729654541749, "grad_norm": 0.5132508873939514, "learning_rate": 7.676835160711215e-06, "loss": 0.0582, "step": 30518 }, { "epoch": 0.6724950007436911, "grad_norm": 0.7337456941604614, "learning_rate": 7.675900895836132e-06, "loss": 0.082, "step": 30519 }, { "epoch": 0.6725170360332072, "grad_norm": 0.767055094242096, "learning_rate": 7.674966668266604e-06, "loss": 0.0676, "step": 30520 }, { "epoch": 0.6725390713227234, "grad_norm": 0.5111063718795776, "learning_rate": 7.674032478007408e-06, "loss": 0.0541, "step": 30521 }, { "epoch": 0.6725611066122396, "grad_norm": 0.6283869743347168, "learning_rate": 7.673098325063278e-06, "loss": 0.0995, "step": 30522 }, { "epoch": 0.6725831419017556, "grad_norm": 0.6031152009963989, "learning_rate": 7.672164209439004e-06, "loss": 0.0647, "step": 30523 }, { "epoch": 0.6726051771912718, "grad_norm": 0.7581173181533813, "learning_rate": 7.671230131139317e-06, "loss": 0.0744, "step": 30524 }, { "epoch": 0.6726272124807879, "grad_norm": 0.38880735635757446, "learning_rate": 7.670296090168987e-06, "loss": 0.0423, "step": 30525 }, { "epoch": 0.6726492477703041, "grad_norm": 0.4424193799495697, "learning_rate": 7.669362086532775e-06, "loss": 0.0374, "step": 30526 }, { "epoch": 0.6726712830598203, "grad_norm": 0.6038225293159485, "learning_rate": 7.668428120235427e-06, "loss": 0.1129, "step": 30527 }, { "epoch": 0.6726933183493364, "grad_norm": 0.659882664680481, "learning_rate": 7.667494191281711e-06, "loss": 0.0655, "step": 30528 }, { "epoch": 0.6727153536388526, "grad_norm": 0.5168578028678894, "learning_rate": 7.666560299676366e-06, "loss": 0.0479, "step": 30529 }, { "epoch": 0.6727373889283688, "grad_norm": 0.9227148294448853, "learning_rate": 7.665626445424177e-06, "loss": 0.0637, "step": 30530 }, { "epoch": 0.6727594242178849, "grad_norm": 0.30639174580574036, "learning_rate": 7.66469262852988e-06, "loss": 0.045, "step": 30531 }, { "epoch": 0.6727814595074011, "grad_norm": 0.6620888113975525, "learning_rate": 7.66375884899824e-06, "loss": 0.0666, "step": 30532 }, { "epoch": 0.6728034947969173, "grad_norm": 0.39345189929008484, "learning_rate": 7.662825106834006e-06, "loss": 0.0539, "step": 30533 }, { "epoch": 0.6728255300864334, "grad_norm": 0.7575960755348206, "learning_rate": 7.661891402041936e-06, "loss": 0.1049, "step": 30534 }, { "epoch": 0.6728475653759496, "grad_norm": 0.6327129602432251, "learning_rate": 7.660957734626797e-06, "loss": 0.0468, "step": 30535 }, { "epoch": 0.6728696006654658, "grad_norm": 0.7102874517440796, "learning_rate": 7.660024104593328e-06, "loss": 0.0644, "step": 30536 }, { "epoch": 0.6728916359549819, "grad_norm": 0.5282904505729675, "learning_rate": 7.659090511946292e-06, "loss": 0.0692, "step": 30537 }, { "epoch": 0.6729136712444981, "grad_norm": 0.6637132167816162, "learning_rate": 7.658156956690446e-06, "loss": 0.0604, "step": 30538 }, { "epoch": 0.6729357065340142, "grad_norm": 0.5623141527175903, "learning_rate": 7.65722343883055e-06, "loss": 0.0646, "step": 30539 }, { "epoch": 0.6729577418235304, "grad_norm": 0.9723220467567444, "learning_rate": 7.656289958371344e-06, "loss": 0.0755, "step": 30540 }, { "epoch": 0.6729797771130466, "grad_norm": 0.760419487953186, "learning_rate": 7.655356515317592e-06, "loss": 0.0615, "step": 30541 }, { "epoch": 0.6730018124025627, "grad_norm": 0.4957161247730255, "learning_rate": 7.654423109674055e-06, "loss": 0.0393, "step": 30542 }, { "epoch": 0.6730238476920789, "grad_norm": 0.690187394618988, "learning_rate": 7.653489741445475e-06, "loss": 0.0862, "step": 30543 }, { "epoch": 0.6730458829815951, "grad_norm": 0.5267322659492493, "learning_rate": 7.652556410636616e-06, "loss": 0.0784, "step": 30544 }, { "epoch": 0.6730679182711112, "grad_norm": 0.4110381007194519, "learning_rate": 7.651623117252214e-06, "loss": 0.0547, "step": 30545 }, { "epoch": 0.6730899535606274, "grad_norm": 0.5777493119239807, "learning_rate": 7.650689861297048e-06, "loss": 0.0732, "step": 30546 }, { "epoch": 0.6731119888501435, "grad_norm": 0.5613062977790833, "learning_rate": 7.649756642775855e-06, "loss": 0.0805, "step": 30547 }, { "epoch": 0.6731340241396596, "grad_norm": 0.7558556795120239, "learning_rate": 7.648823461693394e-06, "loss": 0.0894, "step": 30548 }, { "epoch": 0.6731560594291758, "grad_norm": 0.3846440315246582, "learning_rate": 7.647890318054422e-06, "loss": 0.0437, "step": 30549 }, { "epoch": 0.6731780947186919, "grad_norm": 1.6034623384475708, "learning_rate": 7.64695721186368e-06, "loss": 0.0789, "step": 30550 }, { "epoch": 0.6732001300082081, "grad_norm": 0.5439102649688721, "learning_rate": 7.646024143125932e-06, "loss": 0.0443, "step": 30551 }, { "epoch": 0.6732221652977243, "grad_norm": 0.5370419025421143, "learning_rate": 7.64509111184592e-06, "loss": 0.0615, "step": 30552 }, { "epoch": 0.6732442005872404, "grad_norm": 0.541006326675415, "learning_rate": 7.64415811802841e-06, "loss": 0.0413, "step": 30553 }, { "epoch": 0.6732662358767566, "grad_norm": 0.5034012794494629, "learning_rate": 7.643225161678144e-06, "loss": 0.062, "step": 30554 }, { "epoch": 0.6732882711662728, "grad_norm": 0.6757359504699707, "learning_rate": 7.642292242799883e-06, "loss": 0.0639, "step": 30555 }, { "epoch": 0.6733103064557889, "grad_norm": 0.44507837295532227, "learning_rate": 7.641359361398366e-06, "loss": 0.0403, "step": 30556 }, { "epoch": 0.6733323417453051, "grad_norm": 0.22937999665737152, "learning_rate": 7.640426517478353e-06, "loss": 0.0624, "step": 30557 }, { "epoch": 0.6733543770348213, "grad_norm": 0.8056277632713318, "learning_rate": 7.6394937110446e-06, "loss": 0.0963, "step": 30558 }, { "epoch": 0.6733764123243374, "grad_norm": 0.5066694617271423, "learning_rate": 7.638560942101847e-06, "loss": 0.0513, "step": 30559 }, { "epoch": 0.6733984476138536, "grad_norm": 0.41676685214042664, "learning_rate": 7.637628210654849e-06, "loss": 0.061, "step": 30560 }, { "epoch": 0.6734204829033698, "grad_norm": 0.5837411284446716, "learning_rate": 7.63669551670836e-06, "loss": 0.0827, "step": 30561 }, { "epoch": 0.6734425181928859, "grad_norm": 0.7050990462303162, "learning_rate": 7.635762860267134e-06, "loss": 0.0807, "step": 30562 }, { "epoch": 0.6734645534824021, "grad_norm": 0.27611786127090454, "learning_rate": 7.634830241335913e-06, "loss": 0.0463, "step": 30563 }, { "epoch": 0.6734865887719182, "grad_norm": 0.6871940493583679, "learning_rate": 7.633897659919449e-06, "loss": 0.0738, "step": 30564 }, { "epoch": 0.6735086240614344, "grad_norm": 0.7352144718170166, "learning_rate": 7.632965116022496e-06, "loss": 0.055, "step": 30565 }, { "epoch": 0.6735306593509506, "grad_norm": 0.662131130695343, "learning_rate": 7.632032609649806e-06, "loss": 0.0766, "step": 30566 }, { "epoch": 0.6735526946404667, "grad_norm": 0.6204635500907898, "learning_rate": 7.63110014080612e-06, "loss": 0.0504, "step": 30567 }, { "epoch": 0.6735747299299829, "grad_norm": 0.40280914306640625, "learning_rate": 7.630167709496193e-06, "loss": 0.0623, "step": 30568 }, { "epoch": 0.6735967652194991, "grad_norm": 0.4289589822292328, "learning_rate": 7.629235315724778e-06, "loss": 0.0478, "step": 30569 }, { "epoch": 0.6736188005090152, "grad_norm": 0.46624770760536194, "learning_rate": 7.628302959496616e-06, "loss": 0.068, "step": 30570 }, { "epoch": 0.6736408357985314, "grad_norm": 0.9274663925170898, "learning_rate": 7.627370640816461e-06, "loss": 0.0591, "step": 30571 }, { "epoch": 0.6736628710880475, "grad_norm": 0.7234925627708435, "learning_rate": 7.626438359689059e-06, "loss": 0.0735, "step": 30572 }, { "epoch": 0.6736849063775636, "grad_norm": 0.7687249779701233, "learning_rate": 7.625506116119167e-06, "loss": 0.0841, "step": 30573 }, { "epoch": 0.6737069416670798, "grad_norm": 0.5480857491493225, "learning_rate": 7.624573910111521e-06, "loss": 0.0901, "step": 30574 }, { "epoch": 0.673728976956596, "grad_norm": 0.421678364276886, "learning_rate": 7.623641741670874e-06, "loss": 0.0831, "step": 30575 }, { "epoch": 0.6737510122461121, "grad_norm": 0.7757835388183594, "learning_rate": 7.6227096108019825e-06, "loss": 0.0626, "step": 30576 }, { "epoch": 0.6737730475356283, "grad_norm": 0.898057222366333, "learning_rate": 7.621777517509579e-06, "loss": 0.0792, "step": 30577 }, { "epoch": 0.6737950828251444, "grad_norm": 0.5816705226898193, "learning_rate": 7.620845461798428e-06, "loss": 0.0783, "step": 30578 }, { "epoch": 0.6738171181146606, "grad_norm": 0.8091368079185486, "learning_rate": 7.619913443673254e-06, "loss": 0.0675, "step": 30579 }, { "epoch": 0.6738391534041768, "grad_norm": 0.45346546173095703, "learning_rate": 7.618981463138831e-06, "loss": 0.0471, "step": 30580 }, { "epoch": 0.6738611886936929, "grad_norm": 0.5838016271591187, "learning_rate": 7.618049520199888e-06, "loss": 0.0675, "step": 30581 }, { "epoch": 0.6738832239832091, "grad_norm": 0.7108403444290161, "learning_rate": 7.617117614861181e-06, "loss": 0.0964, "step": 30582 }, { "epoch": 0.6739052592727253, "grad_norm": 0.690091609954834, "learning_rate": 7.61618574712745e-06, "loss": 0.0541, "step": 30583 }, { "epoch": 0.6739272945622414, "grad_norm": 0.6419645547866821, "learning_rate": 7.6152539170034425e-06, "loss": 0.0598, "step": 30584 }, { "epoch": 0.6739493298517576, "grad_norm": 0.7344974279403687, "learning_rate": 7.614322124493913e-06, "loss": 0.0738, "step": 30585 }, { "epoch": 0.6739713651412738, "grad_norm": 0.606364369392395, "learning_rate": 7.6133903696035965e-06, "loss": 0.0471, "step": 30586 }, { "epoch": 0.6739934004307899, "grad_norm": 0.6472631096839905, "learning_rate": 7.612458652337244e-06, "loss": 0.0806, "step": 30587 }, { "epoch": 0.6740154357203061, "grad_norm": 0.6087425351142883, "learning_rate": 7.6115269726996e-06, "loss": 0.0498, "step": 30588 }, { "epoch": 0.6740374710098223, "grad_norm": 0.8517835736274719, "learning_rate": 7.610595330695418e-06, "loss": 0.1075, "step": 30589 }, { "epoch": 0.6740595062993384, "grad_norm": 0.40296030044555664, "learning_rate": 7.609663726329429e-06, "loss": 0.0618, "step": 30590 }, { "epoch": 0.6740815415888546, "grad_norm": 0.45494741201400757, "learning_rate": 7.6087321596063875e-06, "loss": 0.0669, "step": 30591 }, { "epoch": 0.6741035768783707, "grad_norm": 0.7783616185188293, "learning_rate": 7.607800630531041e-06, "loss": 0.087, "step": 30592 }, { "epoch": 0.6741256121678869, "grad_norm": 0.6653575897216797, "learning_rate": 7.606869139108125e-06, "loss": 0.0668, "step": 30593 }, { "epoch": 0.6741476474574031, "grad_norm": 0.8232609629631042, "learning_rate": 7.605937685342388e-06, "loss": 0.061, "step": 30594 }, { "epoch": 0.6741696827469192, "grad_norm": 1.0212687253952026, "learning_rate": 7.605006269238577e-06, "loss": 0.0935, "step": 30595 }, { "epoch": 0.6741917180364354, "grad_norm": 0.5143566131591797, "learning_rate": 7.604074890801439e-06, "loss": 0.058, "step": 30596 }, { "epoch": 0.6742137533259515, "grad_norm": 0.44799554347991943, "learning_rate": 7.603143550035707e-06, "loss": 0.0695, "step": 30597 }, { "epoch": 0.6742357886154676, "grad_norm": 0.5243732929229736, "learning_rate": 7.6022122469461345e-06, "loss": 0.0526, "step": 30598 }, { "epoch": 0.6742578239049838, "grad_norm": 0.34399500489234924, "learning_rate": 7.601280981537466e-06, "loss": 0.0495, "step": 30599 }, { "epoch": 0.6742798591945, "grad_norm": 0.6004911065101624, "learning_rate": 7.600349753814434e-06, "loss": 0.0833, "step": 30600 }, { "epoch": 0.6743018944840161, "grad_norm": 0.7779572606086731, "learning_rate": 7.599418563781797e-06, "loss": 0.0651, "step": 30601 }, { "epoch": 0.6743239297735323, "grad_norm": 0.5709294676780701, "learning_rate": 7.598487411444277e-06, "loss": 0.0789, "step": 30602 }, { "epoch": 0.6743459650630484, "grad_norm": 0.8117812275886536, "learning_rate": 7.597556296806639e-06, "loss": 0.1056, "step": 30603 }, { "epoch": 0.6743680003525646, "grad_norm": 0.814935028553009, "learning_rate": 7.596625219873613e-06, "loss": 0.061, "step": 30604 }, { "epoch": 0.6743900356420808, "grad_norm": 1.1188174486160278, "learning_rate": 7.595694180649951e-06, "loss": 0.0939, "step": 30605 }, { "epoch": 0.6744120709315969, "grad_norm": 0.4083980619907379, "learning_rate": 7.594763179140382e-06, "loss": 0.0616, "step": 30606 }, { "epoch": 0.6744341062211131, "grad_norm": 0.6878079771995544, "learning_rate": 7.593832215349657e-06, "loss": 0.0641, "step": 30607 }, { "epoch": 0.6744561415106293, "grad_norm": 0.6701921820640564, "learning_rate": 7.59290128928252e-06, "loss": 0.0697, "step": 30608 }, { "epoch": 0.6744781768001454, "grad_norm": 0.6062540411949158, "learning_rate": 7.591970400943703e-06, "loss": 0.1279, "step": 30609 }, { "epoch": 0.6745002120896616, "grad_norm": 0.7081021070480347, "learning_rate": 7.591039550337953e-06, "loss": 0.0868, "step": 30610 }, { "epoch": 0.6745222473791778, "grad_norm": 1.006883978843689, "learning_rate": 7.590108737470012e-06, "loss": 0.084, "step": 30611 }, { "epoch": 0.6745442826686939, "grad_norm": 0.6022104620933533, "learning_rate": 7.589177962344628e-06, "loss": 0.0964, "step": 30612 }, { "epoch": 0.6745663179582101, "grad_norm": 0.9978528022766113, "learning_rate": 7.588247224966525e-06, "loss": 0.0706, "step": 30613 }, { "epoch": 0.6745883532477263, "grad_norm": 0.33024272322654724, "learning_rate": 7.587316525340457e-06, "loss": 0.0697, "step": 30614 }, { "epoch": 0.6746103885372424, "grad_norm": 0.8053652048110962, "learning_rate": 7.5863858634711654e-06, "loss": 0.102, "step": 30615 }, { "epoch": 0.6746324238267586, "grad_norm": 0.78566575050354, "learning_rate": 7.58545523936338e-06, "loss": 0.0891, "step": 30616 }, { "epoch": 0.6746544591162748, "grad_norm": 0.3822762072086334, "learning_rate": 7.584524653021848e-06, "loss": 0.0641, "step": 30617 }, { "epoch": 0.6746764944057909, "grad_norm": 0.4792311489582062, "learning_rate": 7.583594104451309e-06, "loss": 0.0425, "step": 30618 }, { "epoch": 0.6746985296953071, "grad_norm": 0.6560140252113342, "learning_rate": 7.582663593656506e-06, "loss": 0.0543, "step": 30619 }, { "epoch": 0.6747205649848232, "grad_norm": 0.7820227146148682, "learning_rate": 7.58173312064217e-06, "loss": 0.0704, "step": 30620 }, { "epoch": 0.6747426002743393, "grad_norm": 0.6541347503662109, "learning_rate": 7.580802685413045e-06, "loss": 0.0573, "step": 30621 }, { "epoch": 0.6747646355638555, "grad_norm": 0.517603874206543, "learning_rate": 7.579872287973877e-06, "loss": 0.054, "step": 30622 }, { "epoch": 0.6747866708533716, "grad_norm": 0.3951956033706665, "learning_rate": 7.578941928329391e-06, "loss": 0.0517, "step": 30623 }, { "epoch": 0.6748087061428878, "grad_norm": 0.45716139674186707, "learning_rate": 7.57801160648434e-06, "loss": 0.0424, "step": 30624 }, { "epoch": 0.674830741432404, "grad_norm": 0.1467437744140625, "learning_rate": 7.577081322443444e-06, "loss": 0.045, "step": 30625 }, { "epoch": 0.6748527767219201, "grad_norm": 0.7613699436187744, "learning_rate": 7.576151076211466e-06, "loss": 0.0444, "step": 30626 }, { "epoch": 0.6748748120114363, "grad_norm": 0.4427427351474762, "learning_rate": 7.575220867793125e-06, "loss": 0.0666, "step": 30627 }, { "epoch": 0.6748968473009525, "grad_norm": 0.6950542330741882, "learning_rate": 7.574290697193173e-06, "loss": 0.0675, "step": 30628 }, { "epoch": 0.6749188825904686, "grad_norm": 0.4503420889377594, "learning_rate": 7.573360564416331e-06, "loss": 0.0393, "step": 30629 }, { "epoch": 0.6749409178799848, "grad_norm": 0.7661133408546448, "learning_rate": 7.572430469467347e-06, "loss": 0.0789, "step": 30630 }, { "epoch": 0.674962953169501, "grad_norm": 0.9654818773269653, "learning_rate": 7.571500412350966e-06, "loss": 0.077, "step": 30631 }, { "epoch": 0.6749849884590171, "grad_norm": 0.5901614427566528, "learning_rate": 7.57057039307191e-06, "loss": 0.0586, "step": 30632 }, { "epoch": 0.6750070237485333, "grad_norm": 0.29057803750038147, "learning_rate": 7.569640411634922e-06, "loss": 0.0372, "step": 30633 }, { "epoch": 0.6750290590380494, "grad_norm": 0.5126234292984009, "learning_rate": 7.5687104680447415e-06, "loss": 0.0662, "step": 30634 }, { "epoch": 0.6750510943275656, "grad_norm": 0.5912513136863708, "learning_rate": 7.567780562306107e-06, "loss": 0.062, "step": 30635 }, { "epoch": 0.6750731296170818, "grad_norm": 0.6162722110748291, "learning_rate": 7.566850694423747e-06, "loss": 0.0534, "step": 30636 }, { "epoch": 0.6750951649065979, "grad_norm": 1.5203546285629272, "learning_rate": 7.565920864402403e-06, "loss": 0.0792, "step": 30637 }, { "epoch": 0.6751172001961141, "grad_norm": 0.6717169880867004, "learning_rate": 7.564991072246816e-06, "loss": 0.0763, "step": 30638 }, { "epoch": 0.6751392354856303, "grad_norm": 0.4821978509426117, "learning_rate": 7.564061317961711e-06, "loss": 0.071, "step": 30639 }, { "epoch": 0.6751612707751464, "grad_norm": 0.6979067921638489, "learning_rate": 7.563131601551828e-06, "loss": 0.0808, "step": 30640 }, { "epoch": 0.6751833060646626, "grad_norm": 0.43040719628334045, "learning_rate": 7.562201923021903e-06, "loss": 0.0506, "step": 30641 }, { "epoch": 0.6752053413541788, "grad_norm": 0.680759072303772, "learning_rate": 7.561272282376681e-06, "loss": 0.0444, "step": 30642 }, { "epoch": 0.6752273766436949, "grad_norm": 0.6544970273971558, "learning_rate": 7.56034267962088e-06, "loss": 0.0782, "step": 30643 }, { "epoch": 0.6752494119332111, "grad_norm": 0.35552725195884705, "learning_rate": 7.559413114759245e-06, "loss": 0.0582, "step": 30644 }, { "epoch": 0.6752714472227273, "grad_norm": 0.46721071004867554, "learning_rate": 7.558483587796512e-06, "loss": 0.061, "step": 30645 }, { "epoch": 0.6752934825122433, "grad_norm": 0.9506277441978455, "learning_rate": 7.557554098737409e-06, "loss": 0.0638, "step": 30646 }, { "epoch": 0.6753155178017595, "grad_norm": 0.5620800256729126, "learning_rate": 7.5566246475866795e-06, "loss": 0.0657, "step": 30647 }, { "epoch": 0.6753375530912756, "grad_norm": 0.6154341697692871, "learning_rate": 7.555695234349038e-06, "loss": 0.0589, "step": 30648 }, { "epoch": 0.6753595883807918, "grad_norm": 0.5285261273384094, "learning_rate": 7.554765859029247e-06, "loss": 0.0653, "step": 30649 }, { "epoch": 0.675381623670308, "grad_norm": 0.22914819419384003, "learning_rate": 7.55383652163202e-06, "loss": 0.0417, "step": 30650 }, { "epoch": 0.6754036589598241, "grad_norm": 0.4925936758518219, "learning_rate": 7.552907222162102e-06, "loss": 0.0676, "step": 30651 }, { "epoch": 0.6754256942493403, "grad_norm": 0.6766480207443237, "learning_rate": 7.551977960624214e-06, "loss": 0.0779, "step": 30652 }, { "epoch": 0.6754477295388565, "grad_norm": 0.7894779443740845, "learning_rate": 7.551048737023099e-06, "loss": 0.0874, "step": 30653 }, { "epoch": 0.6754697648283726, "grad_norm": 0.7132924795150757, "learning_rate": 7.55011955136349e-06, "loss": 0.0661, "step": 30654 }, { "epoch": 0.6754918001178888, "grad_norm": 0.46716248989105225, "learning_rate": 7.549190403650111e-06, "loss": 0.0779, "step": 30655 }, { "epoch": 0.675513835407405, "grad_norm": 0.6082170009613037, "learning_rate": 7.5482612938877035e-06, "loss": 0.0554, "step": 30656 }, { "epoch": 0.6755358706969211, "grad_norm": 0.45027461647987366, "learning_rate": 7.547332222080996e-06, "loss": 0.0555, "step": 30657 }, { "epoch": 0.6755579059864373, "grad_norm": 0.330417275428772, "learning_rate": 7.546403188234728e-06, "loss": 0.0472, "step": 30658 }, { "epoch": 0.6755799412759534, "grad_norm": 0.9808855056762695, "learning_rate": 7.545474192353619e-06, "loss": 0.1129, "step": 30659 }, { "epoch": 0.6756019765654696, "grad_norm": 0.3110058903694153, "learning_rate": 7.544545234442407e-06, "loss": 0.0479, "step": 30660 }, { "epoch": 0.6756240118549858, "grad_norm": 0.6807869076728821, "learning_rate": 7.543616314505831e-06, "loss": 0.0653, "step": 30661 }, { "epoch": 0.6756460471445019, "grad_norm": 0.5454083681106567, "learning_rate": 7.542687432548609e-06, "loss": 0.0436, "step": 30662 }, { "epoch": 0.6756680824340181, "grad_norm": 0.2811855971813202, "learning_rate": 7.541758588575478e-06, "loss": 0.0561, "step": 30663 }, { "epoch": 0.6756901177235343, "grad_norm": 0.6480656266212463, "learning_rate": 7.540829782591171e-06, "loss": 0.0615, "step": 30664 }, { "epoch": 0.6757121530130504, "grad_norm": 0.7508386969566345, "learning_rate": 7.539901014600425e-06, "loss": 0.091, "step": 30665 }, { "epoch": 0.6757341883025666, "grad_norm": 0.8133808374404907, "learning_rate": 7.5389722846079545e-06, "loss": 0.0661, "step": 30666 }, { "epoch": 0.6757562235920828, "grad_norm": 0.7704048156738281, "learning_rate": 7.538043592618501e-06, "loss": 0.0537, "step": 30667 }, { "epoch": 0.6757782588815989, "grad_norm": 0.5822490453720093, "learning_rate": 7.537114938636799e-06, "loss": 0.0503, "step": 30668 }, { "epoch": 0.6758002941711151, "grad_norm": 0.3340555429458618, "learning_rate": 7.536186322667568e-06, "loss": 0.0594, "step": 30669 }, { "epoch": 0.6758223294606313, "grad_norm": 0.3869938552379608, "learning_rate": 7.535257744715547e-06, "loss": 0.0521, "step": 30670 }, { "epoch": 0.6758443647501473, "grad_norm": 0.8612514734268188, "learning_rate": 7.534329204785448e-06, "loss": 0.0776, "step": 30671 }, { "epoch": 0.6758664000396635, "grad_norm": 0.5541349649429321, "learning_rate": 7.533400702882026e-06, "loss": 0.0897, "step": 30672 }, { "epoch": 0.6758884353291796, "grad_norm": 0.6394599676132202, "learning_rate": 7.532472239009993e-06, "loss": 0.0517, "step": 30673 }, { "epoch": 0.6759104706186958, "grad_norm": 0.458029180765152, "learning_rate": 7.531543813174089e-06, "loss": 0.0689, "step": 30674 }, { "epoch": 0.675932505908212, "grad_norm": 0.6381957530975342, "learning_rate": 7.530615425379027e-06, "loss": 0.0724, "step": 30675 }, { "epoch": 0.6759545411977281, "grad_norm": 0.5242938995361328, "learning_rate": 7.529687075629556e-06, "loss": 0.0774, "step": 30676 }, { "epoch": 0.6759765764872443, "grad_norm": 0.3718888759613037, "learning_rate": 7.52875876393039e-06, "loss": 0.0397, "step": 30677 }, { "epoch": 0.6759986117767605, "grad_norm": 1.0020352602005005, "learning_rate": 7.5278304902862645e-06, "loss": 0.0545, "step": 30678 }, { "epoch": 0.6760206470662766, "grad_norm": 0.6275784373283386, "learning_rate": 7.526902254701904e-06, "loss": 0.0628, "step": 30679 }, { "epoch": 0.6760426823557928, "grad_norm": 0.649020254611969, "learning_rate": 7.525974057182035e-06, "loss": 0.0669, "step": 30680 }, { "epoch": 0.676064717645309, "grad_norm": 0.5706954598426819, "learning_rate": 7.525045897731394e-06, "loss": 0.0585, "step": 30681 }, { "epoch": 0.6760867529348251, "grad_norm": 0.460843563079834, "learning_rate": 7.5241177763546914e-06, "loss": 0.0476, "step": 30682 }, { "epoch": 0.6761087882243413, "grad_norm": 0.5076699256896973, "learning_rate": 7.523189693056677e-06, "loss": 0.0607, "step": 30683 }, { "epoch": 0.6761308235138574, "grad_norm": 0.7813273668289185, "learning_rate": 7.522261647842064e-06, "loss": 0.0633, "step": 30684 }, { "epoch": 0.6761528588033736, "grad_norm": 0.6397774815559387, "learning_rate": 7.521333640715583e-06, "loss": 0.0923, "step": 30685 }, { "epoch": 0.6761748940928898, "grad_norm": 0.566012978553772, "learning_rate": 7.520405671681958e-06, "loss": 0.056, "step": 30686 }, { "epoch": 0.6761969293824059, "grad_norm": 0.5451117157936096, "learning_rate": 7.519477740745918e-06, "loss": 0.0575, "step": 30687 }, { "epoch": 0.6762189646719221, "grad_norm": 0.5028013586997986, "learning_rate": 7.518549847912193e-06, "loss": 0.0462, "step": 30688 }, { "epoch": 0.6762409999614383, "grad_norm": 0.6944432258605957, "learning_rate": 7.517621993185501e-06, "loss": 0.0672, "step": 30689 }, { "epoch": 0.6762630352509544, "grad_norm": 0.48571130633354187, "learning_rate": 7.516694176570571e-06, "loss": 0.0762, "step": 30690 }, { "epoch": 0.6762850705404706, "grad_norm": 0.6015542149543762, "learning_rate": 7.515766398072132e-06, "loss": 0.0789, "step": 30691 }, { "epoch": 0.6763071058299868, "grad_norm": 0.589418351650238, "learning_rate": 7.514838657694914e-06, "loss": 0.0627, "step": 30692 }, { "epoch": 0.6763291411195029, "grad_norm": 0.7764666080474854, "learning_rate": 7.51391095544363e-06, "loss": 0.0633, "step": 30693 }, { "epoch": 0.6763511764090191, "grad_norm": 0.8195322155952454, "learning_rate": 7.512983291323012e-06, "loss": 0.1069, "step": 30694 }, { "epoch": 0.6763732116985353, "grad_norm": 0.6599475145339966, "learning_rate": 7.512055665337792e-06, "loss": 0.08, "step": 30695 }, { "epoch": 0.6763952469880513, "grad_norm": 0.7555792927742004, "learning_rate": 7.51112807749268e-06, "loss": 0.0808, "step": 30696 }, { "epoch": 0.6764172822775675, "grad_norm": 0.703561007976532, "learning_rate": 7.510200527792415e-06, "loss": 0.0832, "step": 30697 }, { "epoch": 0.6764393175670836, "grad_norm": 0.5991389751434326, "learning_rate": 7.509273016241703e-06, "loss": 0.0766, "step": 30698 }, { "epoch": 0.6764613528565998, "grad_norm": 0.5838851928710938, "learning_rate": 7.508345542845293e-06, "loss": 0.0716, "step": 30699 }, { "epoch": 0.676483388146116, "grad_norm": 0.514181911945343, "learning_rate": 7.507418107607892e-06, "loss": 0.0509, "step": 30700 }, { "epoch": 0.6765054234356321, "grad_norm": 0.49093857407569885, "learning_rate": 7.506490710534226e-06, "loss": 0.0668, "step": 30701 }, { "epoch": 0.6765274587251483, "grad_norm": 0.6182683110237122, "learning_rate": 7.505563351629027e-06, "loss": 0.0437, "step": 30702 }, { "epoch": 0.6765494940146645, "grad_norm": 0.7351890802383423, "learning_rate": 7.504636030897008e-06, "loss": 0.0623, "step": 30703 }, { "epoch": 0.6765715293041806, "grad_norm": 1.085735559463501, "learning_rate": 7.503708748342902e-06, "loss": 0.0604, "step": 30704 }, { "epoch": 0.6765935645936968, "grad_norm": 0.6375647187232971, "learning_rate": 7.502781503971416e-06, "loss": 0.0484, "step": 30705 }, { "epoch": 0.676615599883213, "grad_norm": 0.6601436734199524, "learning_rate": 7.501854297787295e-06, "loss": 0.09, "step": 30706 }, { "epoch": 0.6766376351727291, "grad_norm": 0.6103399395942688, "learning_rate": 7.500927129795246e-06, "loss": 0.0534, "step": 30707 }, { "epoch": 0.6766596704622453, "grad_norm": 0.4347221851348877, "learning_rate": 7.500000000000004e-06, "loss": 0.0532, "step": 30708 }, { "epoch": 0.6766817057517615, "grad_norm": 0.5911068320274353, "learning_rate": 7.499072908406276e-06, "loss": 0.051, "step": 30709 }, { "epoch": 0.6767037410412776, "grad_norm": 0.9904583096504211, "learning_rate": 7.498145855018792e-06, "loss": 0.0787, "step": 30710 }, { "epoch": 0.6767257763307938, "grad_norm": 0.6322170495986938, "learning_rate": 7.49721883984228e-06, "loss": 0.0802, "step": 30711 }, { "epoch": 0.67674781162031, "grad_norm": 0.255542516708374, "learning_rate": 7.496291862881449e-06, "loss": 0.0565, "step": 30712 }, { "epoch": 0.6767698469098261, "grad_norm": 0.5151146650314331, "learning_rate": 7.495364924141028e-06, "loss": 0.0569, "step": 30713 }, { "epoch": 0.6767918821993423, "grad_norm": 0.5764487981796265, "learning_rate": 7.494438023625738e-06, "loss": 0.0636, "step": 30714 }, { "epoch": 0.6768139174888584, "grad_norm": 0.607268214225769, "learning_rate": 7.493511161340306e-06, "loss": 0.0471, "step": 30715 }, { "epoch": 0.6768359527783746, "grad_norm": 0.5579628944396973, "learning_rate": 7.492584337289442e-06, "loss": 0.0972, "step": 30716 }, { "epoch": 0.6768579880678908, "grad_norm": 0.5212162137031555, "learning_rate": 7.49165755147787e-06, "loss": 0.0619, "step": 30717 }, { "epoch": 0.6768800233574069, "grad_norm": 0.6666131615638733, "learning_rate": 7.490730803910318e-06, "loss": 0.0627, "step": 30718 }, { "epoch": 0.6769020586469231, "grad_norm": 0.6531388759613037, "learning_rate": 7.4898040945914955e-06, "loss": 0.0676, "step": 30719 }, { "epoch": 0.6769240939364392, "grad_norm": 0.5496861338615417, "learning_rate": 7.488877423526136e-06, "loss": 0.0662, "step": 30720 }, { "epoch": 0.6769461292259553, "grad_norm": 0.6757304072380066, "learning_rate": 7.487950790718937e-06, "loss": 0.0977, "step": 30721 }, { "epoch": 0.6769681645154715, "grad_norm": 0.4091986119747162, "learning_rate": 7.487024196174645e-06, "loss": 0.065, "step": 30722 }, { "epoch": 0.6769901998049876, "grad_norm": 0.8769323229789734, "learning_rate": 7.486097639897962e-06, "loss": 0.093, "step": 30723 }, { "epoch": 0.6770122350945038, "grad_norm": 0.9398553967475891, "learning_rate": 7.4851711218936135e-06, "loss": 0.086, "step": 30724 }, { "epoch": 0.67703427038402, "grad_norm": 0.3671261668205261, "learning_rate": 7.484244642166325e-06, "loss": 0.0402, "step": 30725 }, { "epoch": 0.6770563056735361, "grad_norm": 0.6823236346244812, "learning_rate": 7.483318200720801e-06, "loss": 0.0802, "step": 30726 }, { "epoch": 0.6770783409630523, "grad_norm": 0.5758857727050781, "learning_rate": 7.482391797561775e-06, "loss": 0.0764, "step": 30727 }, { "epoch": 0.6771003762525685, "grad_norm": 0.8835835456848145, "learning_rate": 7.481465432693948e-06, "loss": 0.102, "step": 30728 }, { "epoch": 0.6771224115420846, "grad_norm": 0.6936208009719849, "learning_rate": 7.48053910612206e-06, "loss": 0.0956, "step": 30729 }, { "epoch": 0.6771444468316008, "grad_norm": 0.7226532697677612, "learning_rate": 7.479612817850812e-06, "loss": 0.064, "step": 30730 }, { "epoch": 0.677166482121117, "grad_norm": 0.4273197054862976, "learning_rate": 7.478686567884935e-06, "loss": 0.0695, "step": 30731 }, { "epoch": 0.6771885174106331, "grad_norm": 0.8945379257202148, "learning_rate": 7.4777603562291355e-06, "loss": 0.0666, "step": 30732 }, { "epoch": 0.6772105527001493, "grad_norm": 0.8705317974090576, "learning_rate": 7.476834182888135e-06, "loss": 0.0712, "step": 30733 }, { "epoch": 0.6772325879896655, "grad_norm": 0.23511572182178497, "learning_rate": 7.475908047866659e-06, "loss": 0.0282, "step": 30734 }, { "epoch": 0.6772546232791816, "grad_norm": 0.7468450665473938, "learning_rate": 7.474981951169413e-06, "loss": 0.077, "step": 30735 }, { "epoch": 0.6772766585686978, "grad_norm": 0.828105628490448, "learning_rate": 7.474055892801117e-06, "loss": 0.0768, "step": 30736 }, { "epoch": 0.677298693858214, "grad_norm": 0.7070214748382568, "learning_rate": 7.473129872766492e-06, "loss": 0.0791, "step": 30737 }, { "epoch": 0.6773207291477301, "grad_norm": 0.43044501543045044, "learning_rate": 7.472203891070257e-06, "loss": 0.0555, "step": 30738 }, { "epoch": 0.6773427644372463, "grad_norm": 0.4584980309009552, "learning_rate": 7.471277947717121e-06, "loss": 0.0595, "step": 30739 }, { "epoch": 0.6773647997267624, "grad_norm": 0.9968425035476685, "learning_rate": 7.470352042711801e-06, "loss": 0.074, "step": 30740 }, { "epoch": 0.6773868350162786, "grad_norm": 0.9120177626609802, "learning_rate": 7.469426176059023e-06, "loss": 0.0934, "step": 30741 }, { "epoch": 0.6774088703057948, "grad_norm": 0.16865026950836182, "learning_rate": 7.46850034776349e-06, "loss": 0.0363, "step": 30742 }, { "epoch": 0.6774309055953109, "grad_norm": 0.9309584498405457, "learning_rate": 7.467574557829929e-06, "loss": 0.0738, "step": 30743 }, { "epoch": 0.6774529408848271, "grad_norm": 0.48358333110809326, "learning_rate": 7.466648806263038e-06, "loss": 0.0532, "step": 30744 }, { "epoch": 0.6774749761743432, "grad_norm": 0.5535678863525391, "learning_rate": 7.465723093067555e-06, "loss": 0.0777, "step": 30745 }, { "epoch": 0.6774970114638593, "grad_norm": 0.5074023604393005, "learning_rate": 7.4647974182481805e-06, "loss": 0.0596, "step": 30746 }, { "epoch": 0.6775190467533755, "grad_norm": 0.5228756666183472, "learning_rate": 7.463871781809633e-06, "loss": 0.0761, "step": 30747 }, { "epoch": 0.6775410820428917, "grad_norm": 0.47627782821655273, "learning_rate": 7.462946183756633e-06, "loss": 0.0355, "step": 30748 }, { "epoch": 0.6775631173324078, "grad_norm": 0.864904522895813, "learning_rate": 7.462020624093884e-06, "loss": 0.0991, "step": 30749 }, { "epoch": 0.677585152621924, "grad_norm": 0.5395222306251526, "learning_rate": 7.461095102826112e-06, "loss": 0.0603, "step": 30750 }, { "epoch": 0.6776071879114401, "grad_norm": 0.5640214085578918, "learning_rate": 7.460169619958014e-06, "loss": 0.0819, "step": 30751 }, { "epoch": 0.6776292232009563, "grad_norm": 0.8465692400932312, "learning_rate": 7.459244175494327e-06, "loss": 0.0445, "step": 30752 }, { "epoch": 0.6776512584904725, "grad_norm": 0.5656806230545044, "learning_rate": 7.458318769439746e-06, "loss": 0.0685, "step": 30753 }, { "epoch": 0.6776732937799886, "grad_norm": 0.4957082271575928, "learning_rate": 7.457393401798999e-06, "loss": 0.059, "step": 30754 }, { "epoch": 0.6776953290695048, "grad_norm": 0.5499937534332275, "learning_rate": 7.4564680725767844e-06, "loss": 0.06, "step": 30755 }, { "epoch": 0.677717364359021, "grad_norm": 0.531869113445282, "learning_rate": 7.455542781777825e-06, "loss": 0.032, "step": 30756 }, { "epoch": 0.6777393996485371, "grad_norm": 0.7733253836631775, "learning_rate": 7.454617529406837e-06, "loss": 0.0475, "step": 30757 }, { "epoch": 0.6777614349380533, "grad_norm": 0.6456130146980286, "learning_rate": 7.453692315468522e-06, "loss": 0.0904, "step": 30758 }, { "epoch": 0.6777834702275695, "grad_norm": 0.7941641211509705, "learning_rate": 7.452767139967601e-06, "loss": 0.0629, "step": 30759 }, { "epoch": 0.6778055055170856, "grad_norm": 0.6628998517990112, "learning_rate": 7.451842002908782e-06, "loss": 0.0697, "step": 30760 }, { "epoch": 0.6778275408066018, "grad_norm": 0.7218592762947083, "learning_rate": 7.450916904296784e-06, "loss": 0.0511, "step": 30761 }, { "epoch": 0.677849576096118, "grad_norm": 0.691850483417511, "learning_rate": 7.44999184413631e-06, "loss": 0.0783, "step": 30762 }, { "epoch": 0.6778716113856341, "grad_norm": 0.4993918836116791, "learning_rate": 7.449066822432077e-06, "loss": 0.0622, "step": 30763 }, { "epoch": 0.6778936466751503, "grad_norm": 1.0183156728744507, "learning_rate": 7.4481418391888e-06, "loss": 0.0719, "step": 30764 }, { "epoch": 0.6779156819646665, "grad_norm": 0.7582428455352783, "learning_rate": 7.44721689441118e-06, "loss": 0.0699, "step": 30765 }, { "epoch": 0.6779377172541826, "grad_norm": 0.7444719672203064, "learning_rate": 7.4462919881039414e-06, "loss": 0.0642, "step": 30766 }, { "epoch": 0.6779597525436988, "grad_norm": 0.366420179605484, "learning_rate": 7.445367120271777e-06, "loss": 0.0349, "step": 30767 }, { "epoch": 0.6779817878332149, "grad_norm": 0.5164027810096741, "learning_rate": 7.4444422909194205e-06, "loss": 0.068, "step": 30768 }, { "epoch": 0.6780038231227311, "grad_norm": 0.33044493198394775, "learning_rate": 7.443517500051566e-06, "loss": 0.0812, "step": 30769 }, { "epoch": 0.6780258584122472, "grad_norm": 0.7102934718132019, "learning_rate": 7.442592747672927e-06, "loss": 0.0696, "step": 30770 }, { "epoch": 0.6780478937017633, "grad_norm": 0.6750876307487488, "learning_rate": 7.441668033788222e-06, "loss": 0.0513, "step": 30771 }, { "epoch": 0.6780699289912795, "grad_norm": 0.7388840317726135, "learning_rate": 7.44074335840215e-06, "loss": 0.0731, "step": 30772 }, { "epoch": 0.6780919642807957, "grad_norm": 0.40426284074783325, "learning_rate": 7.439818721519432e-06, "loss": 0.0593, "step": 30773 }, { "epoch": 0.6781139995703118, "grad_norm": 0.5779864192008972, "learning_rate": 7.438894123144757e-06, "loss": 0.061, "step": 30774 }, { "epoch": 0.678136034859828, "grad_norm": 0.4509325623512268, "learning_rate": 7.437969563282864e-06, "loss": 0.0538, "step": 30775 }, { "epoch": 0.6781580701493441, "grad_norm": 0.7470638751983643, "learning_rate": 7.4370450419384375e-06, "loss": 0.0615, "step": 30776 }, { "epoch": 0.6781801054388603, "grad_norm": 0.43475034832954407, "learning_rate": 7.436120559116204e-06, "loss": 0.0673, "step": 30777 }, { "epoch": 0.6782021407283765, "grad_norm": 0.9268311262130737, "learning_rate": 7.435196114820853e-06, "loss": 0.0798, "step": 30778 }, { "epoch": 0.6782241760178926, "grad_norm": 0.5398364663124084, "learning_rate": 7.434271709057116e-06, "loss": 0.0617, "step": 30779 }, { "epoch": 0.6782462113074088, "grad_norm": 0.5445874333381653, "learning_rate": 7.433347341829691e-06, "loss": 0.077, "step": 30780 }, { "epoch": 0.678268246596925, "grad_norm": 0.5956364870071411, "learning_rate": 7.4324230131432796e-06, "loss": 0.0677, "step": 30781 }, { "epoch": 0.6782902818864411, "grad_norm": 1.221317172050476, "learning_rate": 7.431498723002595e-06, "loss": 0.0812, "step": 30782 }, { "epoch": 0.6783123171759573, "grad_norm": 0.7051728367805481, "learning_rate": 7.4305744714123465e-06, "loss": 0.0492, "step": 30783 }, { "epoch": 0.6783343524654735, "grad_norm": 0.9266459345817566, "learning_rate": 7.429650258377247e-06, "loss": 0.0687, "step": 30784 }, { "epoch": 0.6783563877549896, "grad_norm": 0.6760376691818237, "learning_rate": 7.428726083901993e-06, "loss": 0.0677, "step": 30785 }, { "epoch": 0.6783784230445058, "grad_norm": 0.37042176723480225, "learning_rate": 7.427801947991297e-06, "loss": 0.0594, "step": 30786 }, { "epoch": 0.678400458334022, "grad_norm": 0.42178991436958313, "learning_rate": 7.426877850649866e-06, "loss": 0.0542, "step": 30787 }, { "epoch": 0.6784224936235381, "grad_norm": 0.6798704862594604, "learning_rate": 7.425953791882411e-06, "loss": 0.0519, "step": 30788 }, { "epoch": 0.6784445289130543, "grad_norm": 0.3034341633319855, "learning_rate": 7.4250297716936325e-06, "loss": 0.0568, "step": 30789 }, { "epoch": 0.6784665642025705, "grad_norm": 0.7927853465080261, "learning_rate": 7.424105790088236e-06, "loss": 0.0803, "step": 30790 }, { "epoch": 0.6784885994920866, "grad_norm": 0.681839108467102, "learning_rate": 7.42318184707094e-06, "loss": 0.0494, "step": 30791 }, { "epoch": 0.6785106347816028, "grad_norm": 0.5480931401252747, "learning_rate": 7.4222579426464346e-06, "loss": 0.0765, "step": 30792 }, { "epoch": 0.678532670071119, "grad_norm": 0.34818631410598755, "learning_rate": 7.421334076819432e-06, "loss": 0.036, "step": 30793 }, { "epoch": 0.678554705360635, "grad_norm": 0.47652682662010193, "learning_rate": 7.420410249594641e-06, "loss": 0.0502, "step": 30794 }, { "epoch": 0.6785767406501512, "grad_norm": 1.150475263595581, "learning_rate": 7.41948646097677e-06, "loss": 0.0704, "step": 30795 }, { "epoch": 0.6785987759396673, "grad_norm": 0.6630313992500305, "learning_rate": 7.418562710970514e-06, "loss": 0.0881, "step": 30796 }, { "epoch": 0.6786208112291835, "grad_norm": 0.6177088618278503, "learning_rate": 7.417638999580583e-06, "loss": 0.0888, "step": 30797 }, { "epoch": 0.6786428465186997, "grad_norm": 0.5242114067077637, "learning_rate": 7.416715326811689e-06, "loss": 0.0571, "step": 30798 }, { "epoch": 0.6786648818082158, "grad_norm": 0.27052560448646545, "learning_rate": 7.4157916926685234e-06, "loss": 0.0633, "step": 30799 }, { "epoch": 0.678686917097732, "grad_norm": 0.5436729192733765, "learning_rate": 7.414868097155804e-06, "loss": 0.0797, "step": 30800 }, { "epoch": 0.6787089523872482, "grad_norm": 0.6068267226219177, "learning_rate": 7.413944540278218e-06, "loss": 0.0581, "step": 30801 }, { "epoch": 0.6787309876767643, "grad_norm": 0.8278897404670715, "learning_rate": 7.413021022040492e-06, "loss": 0.0627, "step": 30802 }, { "epoch": 0.6787530229662805, "grad_norm": 0.7398706674575806, "learning_rate": 7.412097542447311e-06, "loss": 0.0614, "step": 30803 }, { "epoch": 0.6787750582557966, "grad_norm": 0.41399282217025757, "learning_rate": 7.411174101503394e-06, "loss": 0.03, "step": 30804 }, { "epoch": 0.6787970935453128, "grad_norm": 0.5936059951782227, "learning_rate": 7.410250699213429e-06, "loss": 0.0724, "step": 30805 }, { "epoch": 0.678819128834829, "grad_norm": 0.8877393007278442, "learning_rate": 7.4093273355821286e-06, "loss": 0.069, "step": 30806 }, { "epoch": 0.6788411641243451, "grad_norm": 0.8780404925346375, "learning_rate": 7.4084040106142006e-06, "loss": 0.084, "step": 30807 }, { "epoch": 0.6788631994138613, "grad_norm": 0.5493069291114807, "learning_rate": 7.4074807243143345e-06, "loss": 0.0702, "step": 30808 }, { "epoch": 0.6788852347033775, "grad_norm": 0.839230477809906, "learning_rate": 7.406557476687242e-06, "loss": 0.0659, "step": 30809 }, { "epoch": 0.6789072699928936, "grad_norm": 0.6449072957038879, "learning_rate": 7.405634267737624e-06, "loss": 0.0606, "step": 30810 }, { "epoch": 0.6789293052824098, "grad_norm": 0.46626871824264526, "learning_rate": 7.404711097470188e-06, "loss": 0.0566, "step": 30811 }, { "epoch": 0.678951340571926, "grad_norm": 1.1375566720962524, "learning_rate": 7.4037879658896264e-06, "loss": 0.0374, "step": 30812 }, { "epoch": 0.6789733758614421, "grad_norm": 0.7509210705757141, "learning_rate": 7.4028648730006444e-06, "loss": 0.0604, "step": 30813 }, { "epoch": 0.6789954111509583, "grad_norm": 0.5437740683555603, "learning_rate": 7.4019418188079535e-06, "loss": 0.06, "step": 30814 }, { "epoch": 0.6790174464404745, "grad_norm": 0.613163411617279, "learning_rate": 7.40101880331624e-06, "loss": 0.0589, "step": 30815 }, { "epoch": 0.6790394817299906, "grad_norm": 0.4228181838989258, "learning_rate": 7.400095826530213e-06, "loss": 0.0708, "step": 30816 }, { "epoch": 0.6790615170195068, "grad_norm": 0.7093614935874939, "learning_rate": 7.399172888454574e-06, "loss": 0.0631, "step": 30817 }, { "epoch": 0.679083552309023, "grad_norm": 0.336940735578537, "learning_rate": 7.398249989094029e-06, "loss": 0.0471, "step": 30818 }, { "epoch": 0.679105587598539, "grad_norm": 0.687637448310852, "learning_rate": 7.397327128453267e-06, "loss": 0.0515, "step": 30819 }, { "epoch": 0.6791276228880552, "grad_norm": 0.608658492565155, "learning_rate": 7.3964043065369945e-06, "loss": 0.0514, "step": 30820 }, { "epoch": 0.6791496581775713, "grad_norm": 0.49693042039871216, "learning_rate": 7.395481523349918e-06, "loss": 0.0613, "step": 30821 }, { "epoch": 0.6791716934670875, "grad_norm": 0.21122775971889496, "learning_rate": 7.394558778896726e-06, "loss": 0.051, "step": 30822 }, { "epoch": 0.6791937287566037, "grad_norm": 0.609915554523468, "learning_rate": 7.393636073182132e-06, "loss": 0.0487, "step": 30823 }, { "epoch": 0.6792157640461198, "grad_norm": 0.5456258058547974, "learning_rate": 7.3927134062108164e-06, "loss": 0.0901, "step": 30824 }, { "epoch": 0.679237799335636, "grad_norm": 0.7504088282585144, "learning_rate": 7.391790777987502e-06, "loss": 0.0691, "step": 30825 }, { "epoch": 0.6792598346251522, "grad_norm": 0.8723317384719849, "learning_rate": 7.3908681885168706e-06, "loss": 0.1054, "step": 30826 }, { "epoch": 0.6792818699146683, "grad_norm": 0.6185360550880432, "learning_rate": 7.389945637803633e-06, "loss": 0.0671, "step": 30827 }, { "epoch": 0.6793039052041845, "grad_norm": 0.5975037813186646, "learning_rate": 7.389023125852478e-06, "loss": 0.0692, "step": 30828 }, { "epoch": 0.6793259404937007, "grad_norm": 0.49135822057724, "learning_rate": 7.388100652668111e-06, "loss": 0.0651, "step": 30829 }, { "epoch": 0.6793479757832168, "grad_norm": 0.9500200748443604, "learning_rate": 7.387178218255233e-06, "loss": 0.0954, "step": 30830 }, { "epoch": 0.679370011072733, "grad_norm": 0.771510660648346, "learning_rate": 7.386255822618533e-06, "loss": 0.0689, "step": 30831 }, { "epoch": 0.6793920463622491, "grad_norm": 0.5609620213508606, "learning_rate": 7.385333465762716e-06, "loss": 0.0871, "step": 30832 }, { "epoch": 0.6794140816517653, "grad_norm": 0.5328086614608765, "learning_rate": 7.38441114769248e-06, "loss": 0.0832, "step": 30833 }, { "epoch": 0.6794361169412815, "grad_norm": 0.5521127581596375, "learning_rate": 7.383488868412525e-06, "loss": 0.0791, "step": 30834 }, { "epoch": 0.6794581522307976, "grad_norm": 0.5461100339889526, "learning_rate": 7.382566627927541e-06, "loss": 0.0481, "step": 30835 }, { "epoch": 0.6794801875203138, "grad_norm": 5.475899696350098, "learning_rate": 7.381644426242231e-06, "loss": 0.0638, "step": 30836 }, { "epoch": 0.67950222280983, "grad_norm": 0.5726868510246277, "learning_rate": 7.380722263361295e-06, "loss": 0.0761, "step": 30837 }, { "epoch": 0.6795242580993461, "grad_norm": 0.7645475268363953, "learning_rate": 7.379800139289421e-06, "loss": 0.0916, "step": 30838 }, { "epoch": 0.6795462933888623, "grad_norm": 0.6941845417022705, "learning_rate": 7.378878054031312e-06, "loss": 0.0682, "step": 30839 }, { "epoch": 0.6795683286783785, "grad_norm": 0.8872025012969971, "learning_rate": 7.377956007591662e-06, "loss": 0.0713, "step": 30840 }, { "epoch": 0.6795903639678946, "grad_norm": 0.6279469132423401, "learning_rate": 7.377033999975177e-06, "loss": 0.0585, "step": 30841 }, { "epoch": 0.6796123992574108, "grad_norm": 0.8054506182670593, "learning_rate": 7.37611203118654e-06, "loss": 0.0736, "step": 30842 }, { "epoch": 0.679634434546927, "grad_norm": 0.6084304451942444, "learning_rate": 7.375190101230451e-06, "loss": 0.0824, "step": 30843 }, { "epoch": 0.679656469836443, "grad_norm": 0.6734308004379272, "learning_rate": 7.374268210111615e-06, "loss": 0.0449, "step": 30844 }, { "epoch": 0.6796785051259592, "grad_norm": 1.0030639171600342, "learning_rate": 7.373346357834713e-06, "loss": 0.0897, "step": 30845 }, { "epoch": 0.6797005404154753, "grad_norm": 0.6953274011611938, "learning_rate": 7.372424544404454e-06, "loss": 0.0743, "step": 30846 }, { "epoch": 0.6797225757049915, "grad_norm": 0.5825886726379395, "learning_rate": 7.3715027698255165e-06, "loss": 0.0717, "step": 30847 }, { "epoch": 0.6797446109945077, "grad_norm": 0.4785076975822449, "learning_rate": 7.370581034102616e-06, "loss": 0.072, "step": 30848 }, { "epoch": 0.6797666462840238, "grad_norm": 0.7564582824707031, "learning_rate": 7.369659337240433e-06, "loss": 0.0871, "step": 30849 }, { "epoch": 0.67978868157354, "grad_norm": 0.5066125988960266, "learning_rate": 7.368737679243673e-06, "loss": 0.0581, "step": 30850 }, { "epoch": 0.6798107168630562, "grad_norm": 0.6028587222099304, "learning_rate": 7.367816060117016e-06, "loss": 0.0877, "step": 30851 }, { "epoch": 0.6798327521525723, "grad_norm": 0.5168671607971191, "learning_rate": 7.366894479865167e-06, "loss": 0.0552, "step": 30852 }, { "epoch": 0.6798547874420885, "grad_norm": 0.6361046433448792, "learning_rate": 7.365972938492822e-06, "loss": 0.0733, "step": 30853 }, { "epoch": 0.6798768227316047, "grad_norm": 0.37947267293930054, "learning_rate": 7.365051436004666e-06, "loss": 0.0562, "step": 30854 }, { "epoch": 0.6798988580211208, "grad_norm": 0.8519837856292725, "learning_rate": 7.364129972405395e-06, "loss": 0.0973, "step": 30855 }, { "epoch": 0.679920893310637, "grad_norm": 0.37282413244247437, "learning_rate": 7.363208547699707e-06, "loss": 0.0496, "step": 30856 }, { "epoch": 0.6799429286001532, "grad_norm": 0.5065343379974365, "learning_rate": 7.362287161892297e-06, "loss": 0.06, "step": 30857 }, { "epoch": 0.6799649638896693, "grad_norm": 0.5535317063331604, "learning_rate": 7.361365814987851e-06, "loss": 0.0616, "step": 30858 }, { "epoch": 0.6799869991791855, "grad_norm": 0.6630868315696716, "learning_rate": 7.360444506991063e-06, "loss": 0.0712, "step": 30859 }, { "epoch": 0.6800090344687016, "grad_norm": 0.8496679663658142, "learning_rate": 7.359523237906633e-06, "loss": 0.0967, "step": 30860 }, { "epoch": 0.6800310697582178, "grad_norm": 0.6237088441848755, "learning_rate": 7.358602007739245e-06, "loss": 0.0873, "step": 30861 }, { "epoch": 0.680053105047734, "grad_norm": 0.4673647880554199, "learning_rate": 7.357680816493592e-06, "loss": 0.053, "step": 30862 }, { "epoch": 0.6800751403372501, "grad_norm": 0.7964484095573425, "learning_rate": 7.35675966417437e-06, "loss": 0.0728, "step": 30863 }, { "epoch": 0.6800971756267663, "grad_norm": 0.6079148054122925, "learning_rate": 7.355838550786275e-06, "loss": 0.0818, "step": 30864 }, { "epoch": 0.6801192109162825, "grad_norm": 0.9911772608757019, "learning_rate": 7.354917476333987e-06, "loss": 0.0587, "step": 30865 }, { "epoch": 0.6801412462057986, "grad_norm": 0.3868972361087799, "learning_rate": 7.353996440822206e-06, "loss": 0.0471, "step": 30866 }, { "epoch": 0.6801632814953148, "grad_norm": 0.7250844836235046, "learning_rate": 7.3530754442556255e-06, "loss": 0.086, "step": 30867 }, { "epoch": 0.6801853167848309, "grad_norm": 0.4653358459472656, "learning_rate": 7.352154486638926e-06, "loss": 0.0603, "step": 30868 }, { "epoch": 0.680207352074347, "grad_norm": 0.8851944208145142, "learning_rate": 7.351233567976813e-06, "loss": 0.0867, "step": 30869 }, { "epoch": 0.6802293873638632, "grad_norm": 0.5551479458808899, "learning_rate": 7.350312688273955e-06, "loss": 0.0521, "step": 30870 }, { "epoch": 0.6802514226533793, "grad_norm": 0.5270771980285645, "learning_rate": 7.34939184753507e-06, "loss": 0.0355, "step": 30871 }, { "epoch": 0.6802734579428955, "grad_norm": 1.108642816543579, "learning_rate": 7.3484710457648275e-06, "loss": 0.0677, "step": 30872 }, { "epoch": 0.6802954932324117, "grad_norm": 0.14595822989940643, "learning_rate": 7.347550282967931e-06, "loss": 0.0661, "step": 30873 }, { "epoch": 0.6803175285219278, "grad_norm": 0.6106114387512207, "learning_rate": 7.3466295591490594e-06, "loss": 0.0578, "step": 30874 }, { "epoch": 0.680339563811444, "grad_norm": 0.7671244740486145, "learning_rate": 7.345708874312908e-06, "loss": 0.0793, "step": 30875 }, { "epoch": 0.6803615991009602, "grad_norm": 0.5693373084068298, "learning_rate": 7.34478822846417e-06, "loss": 0.054, "step": 30876 }, { "epoch": 0.6803836343904763, "grad_norm": 0.6335747838020325, "learning_rate": 7.343867621607526e-06, "loss": 0.0413, "step": 30877 }, { "epoch": 0.6804056696799925, "grad_norm": 0.5268774032592773, "learning_rate": 7.34294705374767e-06, "loss": 0.0666, "step": 30878 }, { "epoch": 0.6804277049695087, "grad_norm": 0.8937661647796631, "learning_rate": 7.342026524889289e-06, "loss": 0.0632, "step": 30879 }, { "epoch": 0.6804497402590248, "grad_norm": 0.700435221195221, "learning_rate": 7.341106035037082e-06, "loss": 0.102, "step": 30880 }, { "epoch": 0.680471775548541, "grad_norm": 0.687817394733429, "learning_rate": 7.34018558419572e-06, "loss": 0.0852, "step": 30881 }, { "epoch": 0.6804938108380572, "grad_norm": 0.48894181847572327, "learning_rate": 7.339265172369903e-06, "loss": 0.0802, "step": 30882 }, { "epoch": 0.6805158461275733, "grad_norm": 0.7232568264007568, "learning_rate": 7.338344799564321e-06, "loss": 0.07, "step": 30883 }, { "epoch": 0.6805378814170895, "grad_norm": 0.7194907665252686, "learning_rate": 7.3374244657836515e-06, "loss": 0.099, "step": 30884 }, { "epoch": 0.6805599167066056, "grad_norm": 0.5375460386276245, "learning_rate": 7.3365041710325896e-06, "loss": 0.048, "step": 30885 }, { "epoch": 0.6805819519961218, "grad_norm": 0.5056405663490295, "learning_rate": 7.3355839153158195e-06, "loss": 0.068, "step": 30886 }, { "epoch": 0.680603987285638, "grad_norm": 0.6236123442649841, "learning_rate": 7.3346636986380365e-06, "loss": 0.0837, "step": 30887 }, { "epoch": 0.6806260225751541, "grad_norm": 0.7445897459983826, "learning_rate": 7.333743521003917e-06, "loss": 0.0793, "step": 30888 }, { "epoch": 0.6806480578646703, "grad_norm": 0.8008392453193665, "learning_rate": 7.332823382418152e-06, "loss": 0.0763, "step": 30889 }, { "epoch": 0.6806700931541865, "grad_norm": 0.769956648349762, "learning_rate": 7.33190328288543e-06, "loss": 0.0703, "step": 30890 }, { "epoch": 0.6806921284437026, "grad_norm": 0.4378412067890167, "learning_rate": 7.330983222410443e-06, "loss": 0.0749, "step": 30891 }, { "epoch": 0.6807141637332188, "grad_norm": 0.8347532749176025, "learning_rate": 7.330063200997869e-06, "loss": 0.0603, "step": 30892 }, { "epoch": 0.6807361990227349, "grad_norm": 0.5481219291687012, "learning_rate": 7.3291432186523845e-06, "loss": 0.0598, "step": 30893 }, { "epoch": 0.680758234312251, "grad_norm": 0.7290681004524231, "learning_rate": 7.3282232753787e-06, "loss": 0.071, "step": 30894 }, { "epoch": 0.6807802696017672, "grad_norm": 0.5778952836990356, "learning_rate": 7.327303371181482e-06, "loss": 0.0688, "step": 30895 }, { "epoch": 0.6808023048912833, "grad_norm": 0.6364575028419495, "learning_rate": 7.326383506065427e-06, "loss": 0.0827, "step": 30896 }, { "epoch": 0.6808243401807995, "grad_norm": 0.7416121959686279, "learning_rate": 7.325463680035205e-06, "loss": 0.0986, "step": 30897 }, { "epoch": 0.6808463754703157, "grad_norm": 0.49780064821243286, "learning_rate": 7.324543893095525e-06, "loss": 0.0713, "step": 30898 }, { "epoch": 0.6808684107598318, "grad_norm": 0.7528792023658752, "learning_rate": 7.323624145251051e-06, "loss": 0.0595, "step": 30899 }, { "epoch": 0.680890446049348, "grad_norm": 0.6779412627220154, "learning_rate": 7.3227044365064814e-06, "loss": 0.0865, "step": 30900 }, { "epoch": 0.6809124813388642, "grad_norm": 0.3467639088630676, "learning_rate": 7.321784766866489e-06, "loss": 0.0641, "step": 30901 }, { "epoch": 0.6809345166283803, "grad_norm": 0.359088659286499, "learning_rate": 7.320865136335766e-06, "loss": 0.0529, "step": 30902 }, { "epoch": 0.6809565519178965, "grad_norm": 0.7295616865158081, "learning_rate": 7.319945544918999e-06, "loss": 0.0578, "step": 30903 }, { "epoch": 0.6809785872074127, "grad_norm": 0.3810036778450012, "learning_rate": 7.319025992620857e-06, "loss": 0.04, "step": 30904 }, { "epoch": 0.6810006224969288, "grad_norm": 0.5601772665977478, "learning_rate": 7.318106479446046e-06, "loss": 0.0726, "step": 30905 }, { "epoch": 0.681022657786445, "grad_norm": 0.6393978595733643, "learning_rate": 7.317187005399231e-06, "loss": 0.0779, "step": 30906 }, { "epoch": 0.6810446930759612, "grad_norm": 0.6062089800834656, "learning_rate": 7.316267570485109e-06, "loss": 0.0575, "step": 30907 }, { "epoch": 0.6810667283654773, "grad_norm": 0.5152509212493896, "learning_rate": 7.315348174708351e-06, "loss": 0.0376, "step": 30908 }, { "epoch": 0.6810887636549935, "grad_norm": 0.5579620599746704, "learning_rate": 7.314428818073646e-06, "loss": 0.0601, "step": 30909 }, { "epoch": 0.6811107989445097, "grad_norm": 0.5587455630302429, "learning_rate": 7.313509500585682e-06, "loss": 0.0526, "step": 30910 }, { "epoch": 0.6811328342340258, "grad_norm": 0.5397785305976868, "learning_rate": 7.312590222249129e-06, "loss": 0.0736, "step": 30911 }, { "epoch": 0.681154869523542, "grad_norm": 0.9249057173728943, "learning_rate": 7.3116709830686775e-06, "loss": 0.0912, "step": 30912 }, { "epoch": 0.6811769048130581, "grad_norm": 0.7904263734817505, "learning_rate": 7.310751783049008e-06, "loss": 0.0759, "step": 30913 }, { "epoch": 0.6811989401025743, "grad_norm": 0.5535929203033447, "learning_rate": 7.309832622194809e-06, "loss": 0.0515, "step": 30914 }, { "epoch": 0.6812209753920905, "grad_norm": 0.6336537003517151, "learning_rate": 7.30891350051075e-06, "loss": 0.0522, "step": 30915 }, { "epoch": 0.6812430106816066, "grad_norm": 0.7459815740585327, "learning_rate": 7.30799441800152e-06, "loss": 0.0805, "step": 30916 }, { "epoch": 0.6812650459711228, "grad_norm": 0.8307624459266663, "learning_rate": 7.3070753746718036e-06, "loss": 0.087, "step": 30917 }, { "epoch": 0.6812870812606389, "grad_norm": 0.5255863666534424, "learning_rate": 7.306156370526272e-06, "loss": 0.0739, "step": 30918 }, { "epoch": 0.681309116550155, "grad_norm": 0.7493438124656677, "learning_rate": 7.305237405569618e-06, "loss": 0.0838, "step": 30919 }, { "epoch": 0.6813311518396712, "grad_norm": 0.8083210587501526, "learning_rate": 7.304318479806505e-06, "loss": 0.0444, "step": 30920 }, { "epoch": 0.6813531871291874, "grad_norm": 0.6000226736068726, "learning_rate": 7.3033995932416356e-06, "loss": 0.0597, "step": 30921 }, { "epoch": 0.6813752224187035, "grad_norm": 0.7079695463180542, "learning_rate": 7.302480745879674e-06, "loss": 0.0765, "step": 30922 }, { "epoch": 0.6813972577082197, "grad_norm": 0.6701220870018005, "learning_rate": 7.301561937725313e-06, "loss": 0.0856, "step": 30923 }, { "epoch": 0.6814192929977358, "grad_norm": 0.6470077037811279, "learning_rate": 7.300643168783216e-06, "loss": 0.0881, "step": 30924 }, { "epoch": 0.681441328287252, "grad_norm": 0.714572012424469, "learning_rate": 7.299724439058074e-06, "loss": 0.1011, "step": 30925 }, { "epoch": 0.6814633635767682, "grad_norm": 0.7889567613601685, "learning_rate": 7.29880574855457e-06, "loss": 0.0447, "step": 30926 }, { "epoch": 0.6814853988662843, "grad_norm": 0.8711462020874023, "learning_rate": 7.297887097277368e-06, "loss": 0.0768, "step": 30927 }, { "epoch": 0.6815074341558005, "grad_norm": 0.33922016620635986, "learning_rate": 7.296968485231166e-06, "loss": 0.053, "step": 30928 }, { "epoch": 0.6815294694453167, "grad_norm": 0.9082573056221008, "learning_rate": 7.2960499124206305e-06, "loss": 0.0509, "step": 30929 }, { "epoch": 0.6815515047348328, "grad_norm": 0.6382344365119934, "learning_rate": 7.29513137885045e-06, "loss": 0.0515, "step": 30930 }, { "epoch": 0.681573540024349, "grad_norm": 0.2865525186061859, "learning_rate": 7.294212884525289e-06, "loss": 0.04, "step": 30931 }, { "epoch": 0.6815955753138652, "grad_norm": 0.39010876417160034, "learning_rate": 7.293294429449837e-06, "loss": 0.0476, "step": 30932 }, { "epoch": 0.6816176106033813, "grad_norm": 0.8986411094665527, "learning_rate": 7.292376013628773e-06, "loss": 0.0744, "step": 30933 }, { "epoch": 0.6816396458928975, "grad_norm": 0.6320688128471375, "learning_rate": 7.291457637066765e-06, "loss": 0.0781, "step": 30934 }, { "epoch": 0.6816616811824137, "grad_norm": 0.6761692762374878, "learning_rate": 7.290539299768498e-06, "loss": 0.0654, "step": 30935 }, { "epoch": 0.6816837164719298, "grad_norm": 0.573136568069458, "learning_rate": 7.289621001738649e-06, "loss": 0.0497, "step": 30936 }, { "epoch": 0.681705751761446, "grad_norm": 0.5582543015480042, "learning_rate": 7.288702742981898e-06, "loss": 0.0911, "step": 30937 }, { "epoch": 0.6817277870509622, "grad_norm": 0.6869616508483887, "learning_rate": 7.2877845235029154e-06, "loss": 0.0936, "step": 30938 }, { "epoch": 0.6817498223404783, "grad_norm": 0.6023216843605042, "learning_rate": 7.286866343306383e-06, "loss": 0.0786, "step": 30939 }, { "epoch": 0.6817718576299945, "grad_norm": 0.30205875635147095, "learning_rate": 7.285948202396979e-06, "loss": 0.0304, "step": 30940 }, { "epoch": 0.6817938929195106, "grad_norm": 0.5651094913482666, "learning_rate": 7.285030100779373e-06, "loss": 0.0715, "step": 30941 }, { "epoch": 0.6818159282090268, "grad_norm": 0.3670869767665863, "learning_rate": 7.284112038458253e-06, "loss": 0.0635, "step": 30942 }, { "epoch": 0.6818379634985429, "grad_norm": 0.17494362592697144, "learning_rate": 7.283194015438274e-06, "loss": 0.0595, "step": 30943 }, { "epoch": 0.681859998788059, "grad_norm": 0.46499571204185486, "learning_rate": 7.282276031724139e-06, "loss": 0.0496, "step": 30944 }, { "epoch": 0.6818820340775752, "grad_norm": 0.8386003375053406, "learning_rate": 7.281358087320505e-06, "loss": 0.0935, "step": 30945 }, { "epoch": 0.6819040693670914, "grad_norm": 0.5426926016807556, "learning_rate": 7.280440182232054e-06, "loss": 0.0614, "step": 30946 }, { "epoch": 0.6819261046566075, "grad_norm": 0.9042472839355469, "learning_rate": 7.279522316463465e-06, "loss": 0.1049, "step": 30947 }, { "epoch": 0.6819481399461237, "grad_norm": 0.6518417596817017, "learning_rate": 7.278604490019403e-06, "loss": 0.0619, "step": 30948 }, { "epoch": 0.6819701752356399, "grad_norm": 0.5578314661979675, "learning_rate": 7.277686702904557e-06, "loss": 0.0482, "step": 30949 }, { "epoch": 0.681992210525156, "grad_norm": 0.9621934294700623, "learning_rate": 7.276768955123579e-06, "loss": 0.0862, "step": 30950 }, { "epoch": 0.6820142458146722, "grad_norm": 0.576242983341217, "learning_rate": 7.2758512466811725e-06, "loss": 0.0834, "step": 30951 }, { "epoch": 0.6820362811041883, "grad_norm": 0.36366209387779236, "learning_rate": 7.274933577581989e-06, "loss": 0.0805, "step": 30952 }, { "epoch": 0.6820583163937045, "grad_norm": 0.6138320565223694, "learning_rate": 7.27401594783072e-06, "loss": 0.0387, "step": 30953 }, { "epoch": 0.6820803516832207, "grad_norm": 0.663549542427063, "learning_rate": 7.273098357432023e-06, "loss": 0.0749, "step": 30954 }, { "epoch": 0.6821023869727368, "grad_norm": 0.46638092398643494, "learning_rate": 7.27218080639058e-06, "loss": 0.0533, "step": 30955 }, { "epoch": 0.682124422262253, "grad_norm": 0.27062469720840454, "learning_rate": 7.271263294711069e-06, "loss": 0.0569, "step": 30956 }, { "epoch": 0.6821464575517692, "grad_norm": 0.484005868434906, "learning_rate": 7.270345822398155e-06, "loss": 0.0882, "step": 30957 }, { "epoch": 0.6821684928412853, "grad_norm": 0.7711865901947021, "learning_rate": 7.269428389456514e-06, "loss": 0.0891, "step": 30958 }, { "epoch": 0.6821905281308015, "grad_norm": 1.2152470350265503, "learning_rate": 7.268510995890819e-06, "loss": 0.0853, "step": 30959 }, { "epoch": 0.6822125634203177, "grad_norm": 0.43973392248153687, "learning_rate": 7.2675936417057485e-06, "loss": 0.0409, "step": 30960 }, { "epoch": 0.6822345987098338, "grad_norm": 0.5955672860145569, "learning_rate": 7.266676326905965e-06, "loss": 0.0543, "step": 30961 }, { "epoch": 0.68225663399935, "grad_norm": 0.5366665124893188, "learning_rate": 7.265759051496147e-06, "loss": 0.0827, "step": 30962 }, { "epoch": 0.6822786692888662, "grad_norm": 0.5374795794487, "learning_rate": 7.264841815480969e-06, "loss": 0.0745, "step": 30963 }, { "epoch": 0.6823007045783823, "grad_norm": 0.2499140053987503, "learning_rate": 7.263924618865097e-06, "loss": 0.0482, "step": 30964 }, { "epoch": 0.6823227398678985, "grad_norm": 0.5162933468818665, "learning_rate": 7.2630074616532085e-06, "loss": 0.0653, "step": 30965 }, { "epoch": 0.6823447751574147, "grad_norm": 0.6081966161727905, "learning_rate": 7.262090343849961e-06, "loss": 0.0818, "step": 30966 }, { "epoch": 0.6823668104469307, "grad_norm": 0.5834333300590515, "learning_rate": 7.261173265460049e-06, "loss": 0.0798, "step": 30967 }, { "epoch": 0.6823888457364469, "grad_norm": 0.6355615258216858, "learning_rate": 7.260256226488125e-06, "loss": 0.0706, "step": 30968 }, { "epoch": 0.682410881025963, "grad_norm": 0.6658045053482056, "learning_rate": 7.259339226938866e-06, "loss": 0.0554, "step": 30969 }, { "epoch": 0.6824329163154792, "grad_norm": 0.6673107743263245, "learning_rate": 7.25842226681695e-06, "loss": 0.0746, "step": 30970 }, { "epoch": 0.6824549516049954, "grad_norm": 0.7748396396636963, "learning_rate": 7.257505346127035e-06, "loss": 0.0749, "step": 30971 }, { "epoch": 0.6824769868945115, "grad_norm": 0.5070906281471252, "learning_rate": 7.2565884648738015e-06, "loss": 0.0886, "step": 30972 }, { "epoch": 0.6824990221840277, "grad_norm": 1.1717989444732666, "learning_rate": 7.255671623061906e-06, "loss": 0.0741, "step": 30973 }, { "epoch": 0.6825210574735439, "grad_norm": 0.7764253616333008, "learning_rate": 7.254754820696038e-06, "loss": 0.0688, "step": 30974 }, { "epoch": 0.68254309276306, "grad_norm": 0.8134531378746033, "learning_rate": 7.253838057780851e-06, "loss": 0.0706, "step": 30975 }, { "epoch": 0.6825651280525762, "grad_norm": 0.6484913229942322, "learning_rate": 7.2529213343210265e-06, "loss": 0.0738, "step": 30976 }, { "epoch": 0.6825871633420924, "grad_norm": 0.6122310161590576, "learning_rate": 7.252004650321222e-06, "loss": 0.0501, "step": 30977 }, { "epoch": 0.6826091986316085, "grad_norm": 0.6274835467338562, "learning_rate": 7.251088005786113e-06, "loss": 0.0787, "step": 30978 }, { "epoch": 0.6826312339211247, "grad_norm": 0.8925508856773376, "learning_rate": 7.250171400720373e-06, "loss": 0.0672, "step": 30979 }, { "epoch": 0.6826532692106408, "grad_norm": 1.2618181705474854, "learning_rate": 7.249254835128662e-06, "loss": 0.1107, "step": 30980 }, { "epoch": 0.682675304500157, "grad_norm": 0.2149711400270462, "learning_rate": 7.248338309015651e-06, "loss": 0.0877, "step": 30981 }, { "epoch": 0.6826973397896732, "grad_norm": 0.5625845193862915, "learning_rate": 7.247421822386011e-06, "loss": 0.0477, "step": 30982 }, { "epoch": 0.6827193750791893, "grad_norm": 0.6580227613449097, "learning_rate": 7.246505375244413e-06, "loss": 0.0665, "step": 30983 }, { "epoch": 0.6827414103687055, "grad_norm": 1.1195223331451416, "learning_rate": 7.245588967595516e-06, "loss": 0.0875, "step": 30984 }, { "epoch": 0.6827634456582217, "grad_norm": 0.44735339283943176, "learning_rate": 7.244672599443992e-06, "loss": 0.056, "step": 30985 }, { "epoch": 0.6827854809477378, "grad_norm": 0.7525556087493896, "learning_rate": 7.243756270794515e-06, "loss": 0.0576, "step": 30986 }, { "epoch": 0.682807516237254, "grad_norm": 0.39885351061820984, "learning_rate": 7.2428399816517415e-06, "loss": 0.0944, "step": 30987 }, { "epoch": 0.6828295515267702, "grad_norm": 0.8424198627471924, "learning_rate": 7.2419237320203485e-06, "loss": 0.0838, "step": 30988 }, { "epoch": 0.6828515868162863, "grad_norm": 0.7288709282875061, "learning_rate": 7.241007521904987e-06, "loss": 0.0648, "step": 30989 }, { "epoch": 0.6828736221058025, "grad_norm": 0.5521694421768188, "learning_rate": 7.240091351310347e-06, "loss": 0.0577, "step": 30990 }, { "epoch": 0.6828956573953187, "grad_norm": 0.8297178149223328, "learning_rate": 7.239175220241077e-06, "loss": 0.0662, "step": 30991 }, { "epoch": 0.6829176926848347, "grad_norm": 0.6067399382591248, "learning_rate": 7.23825912870185e-06, "loss": 0.0606, "step": 30992 }, { "epoch": 0.6829397279743509, "grad_norm": 1.2232098579406738, "learning_rate": 7.237343076697335e-06, "loss": 0.0678, "step": 30993 }, { "epoch": 0.682961763263867, "grad_norm": 0.45346930623054504, "learning_rate": 7.236427064232191e-06, "loss": 0.0638, "step": 30994 }, { "epoch": 0.6829837985533832, "grad_norm": 0.6479440927505493, "learning_rate": 7.235511091311091e-06, "loss": 0.0515, "step": 30995 }, { "epoch": 0.6830058338428994, "grad_norm": 0.4075329899787903, "learning_rate": 7.234595157938686e-06, "loss": 0.0523, "step": 30996 }, { "epoch": 0.6830278691324155, "grad_norm": 0.3440828025341034, "learning_rate": 7.233679264119663e-06, "loss": 0.034, "step": 30997 }, { "epoch": 0.6830499044219317, "grad_norm": 0.7781459093093872, "learning_rate": 7.232763409858671e-06, "loss": 0.072, "step": 30998 }, { "epoch": 0.6830719397114479, "grad_norm": 0.5955060124397278, "learning_rate": 7.2318475951603864e-06, "loss": 0.0673, "step": 30999 }, { "epoch": 0.683093975000964, "grad_norm": 0.4646633267402649, "learning_rate": 7.230931820029456e-06, "loss": 0.0673, "step": 31000 }, { "epoch": 0.6831160102904802, "grad_norm": 0.565902590751648, "learning_rate": 7.230016084470567e-06, "loss": 0.0967, "step": 31001 }, { "epoch": 0.6831380455799964, "grad_norm": 0.6884875297546387, "learning_rate": 7.229100388488365e-06, "loss": 0.0462, "step": 31002 }, { "epoch": 0.6831600808695125, "grad_norm": 0.2897268533706665, "learning_rate": 7.2281847320875305e-06, "loss": 0.0668, "step": 31003 }, { "epoch": 0.6831821161590287, "grad_norm": 0.9275617003440857, "learning_rate": 7.227269115272712e-06, "loss": 0.0734, "step": 31004 }, { "epoch": 0.6832041514485448, "grad_norm": 0.6357849836349487, "learning_rate": 7.2263535380485805e-06, "loss": 0.0733, "step": 31005 }, { "epoch": 0.683226186738061, "grad_norm": 0.548274040222168, "learning_rate": 7.225438000419802e-06, "loss": 0.0446, "step": 31006 }, { "epoch": 0.6832482220275772, "grad_norm": 0.6752200126647949, "learning_rate": 7.224522502391034e-06, "loss": 0.0662, "step": 31007 }, { "epoch": 0.6832702573170933, "grad_norm": 0.6172910928726196, "learning_rate": 7.2236070439669415e-06, "loss": 0.0527, "step": 31008 }, { "epoch": 0.6832922926066095, "grad_norm": 0.4754309356212616, "learning_rate": 7.222691625152189e-06, "loss": 0.0519, "step": 31009 }, { "epoch": 0.6833143278961257, "grad_norm": 0.47443076968193054, "learning_rate": 7.221776245951443e-06, "loss": 0.0412, "step": 31010 }, { "epoch": 0.6833363631856418, "grad_norm": 0.5425402522087097, "learning_rate": 7.220860906369357e-06, "loss": 0.0487, "step": 31011 }, { "epoch": 0.683358398475158, "grad_norm": 0.7211005091667175, "learning_rate": 7.219945606410596e-06, "loss": 0.0744, "step": 31012 }, { "epoch": 0.6833804337646742, "grad_norm": 0.6229386925697327, "learning_rate": 7.219030346079832e-06, "loss": 0.0818, "step": 31013 }, { "epoch": 0.6834024690541903, "grad_norm": 0.4681510031223297, "learning_rate": 7.218115125381712e-06, "loss": 0.0701, "step": 31014 }, { "epoch": 0.6834245043437065, "grad_norm": 0.6647553443908691, "learning_rate": 7.217199944320906e-06, "loss": 0.0598, "step": 31015 }, { "epoch": 0.6834465396332227, "grad_norm": 0.6790470480918884, "learning_rate": 7.2162848029020735e-06, "loss": 0.0671, "step": 31016 }, { "epoch": 0.6834685749227387, "grad_norm": 0.7908619046211243, "learning_rate": 7.215369701129881e-06, "loss": 0.0788, "step": 31017 }, { "epoch": 0.6834906102122549, "grad_norm": 0.4175027310848236, "learning_rate": 7.2144546390089815e-06, "loss": 0.045, "step": 31018 }, { "epoch": 0.683512645501771, "grad_norm": 0.6375248432159424, "learning_rate": 7.213539616544037e-06, "loss": 0.0556, "step": 31019 }, { "epoch": 0.6835346807912872, "grad_norm": 0.6560407876968384, "learning_rate": 7.212624633739719e-06, "loss": 0.0417, "step": 31020 }, { "epoch": 0.6835567160808034, "grad_norm": 0.2687283754348755, "learning_rate": 7.211709690600672e-06, "loss": 0.0796, "step": 31021 }, { "epoch": 0.6835787513703195, "grad_norm": 0.5330265164375305, "learning_rate": 7.210794787131571e-06, "loss": 0.0533, "step": 31022 }, { "epoch": 0.6836007866598357, "grad_norm": 0.7956336736679077, "learning_rate": 7.209879923337056e-06, "loss": 0.0645, "step": 31023 }, { "epoch": 0.6836228219493519, "grad_norm": 0.5869985222816467, "learning_rate": 7.208965099221811e-06, "loss": 0.065, "step": 31024 }, { "epoch": 0.683644857238868, "grad_norm": 0.39915931224823, "learning_rate": 7.208050314790481e-06, "loss": 0.0485, "step": 31025 }, { "epoch": 0.6836668925283842, "grad_norm": 0.42637354135513306, "learning_rate": 7.207135570047734e-06, "loss": 0.0706, "step": 31026 }, { "epoch": 0.6836889278179004, "grad_norm": 0.4916577637195587, "learning_rate": 7.206220864998218e-06, "loss": 0.045, "step": 31027 }, { "epoch": 0.6837109631074165, "grad_norm": 0.5306881666183472, "learning_rate": 7.205306199646599e-06, "loss": 0.054, "step": 31028 }, { "epoch": 0.6837329983969327, "grad_norm": 0.8942685723304749, "learning_rate": 7.204391573997541e-06, "loss": 0.0708, "step": 31029 }, { "epoch": 0.6837550336864489, "grad_norm": 0.8720718026161194, "learning_rate": 7.203476988055693e-06, "loss": 0.0618, "step": 31030 }, { "epoch": 0.683777068975965, "grad_norm": 1.002326488494873, "learning_rate": 7.202562441825716e-06, "loss": 0.0492, "step": 31031 }, { "epoch": 0.6837991042654812, "grad_norm": 0.9520363807678223, "learning_rate": 7.20164793531227e-06, "loss": 0.0904, "step": 31032 }, { "epoch": 0.6838211395549973, "grad_norm": 0.6609165668487549, "learning_rate": 7.200733468520018e-06, "loss": 0.0573, "step": 31033 }, { "epoch": 0.6838431748445135, "grad_norm": 0.4339575171470642, "learning_rate": 7.199819041453607e-06, "loss": 0.0589, "step": 31034 }, { "epoch": 0.6838652101340297, "grad_norm": 0.6624125838279724, "learning_rate": 7.198904654117702e-06, "loss": 0.0613, "step": 31035 }, { "epoch": 0.6838872454235458, "grad_norm": 0.6140376329421997, "learning_rate": 7.197990306516964e-06, "loss": 0.0687, "step": 31036 }, { "epoch": 0.683909280713062, "grad_norm": 0.5080550312995911, "learning_rate": 7.1970759986560385e-06, "loss": 0.0656, "step": 31037 }, { "epoch": 0.6839313160025782, "grad_norm": 0.4762391746044159, "learning_rate": 7.196161730539589e-06, "loss": 0.0625, "step": 31038 }, { "epoch": 0.6839533512920943, "grad_norm": 0.5088039040565491, "learning_rate": 7.1952475021722745e-06, "loss": 0.0507, "step": 31039 }, { "epoch": 0.6839753865816105, "grad_norm": 0.46872586011886597, "learning_rate": 7.194333313558753e-06, "loss": 0.0608, "step": 31040 }, { "epoch": 0.6839974218711266, "grad_norm": 0.44287994503974915, "learning_rate": 7.193419164703673e-06, "loss": 0.0538, "step": 31041 }, { "epoch": 0.6840194571606427, "grad_norm": 0.667740523815155, "learning_rate": 7.192505055611695e-06, "loss": 0.045, "step": 31042 }, { "epoch": 0.6840414924501589, "grad_norm": 0.27285346388816833, "learning_rate": 7.191590986287482e-06, "loss": 0.0391, "step": 31043 }, { "epoch": 0.684063527739675, "grad_norm": 0.691137433052063, "learning_rate": 7.190676956735677e-06, "loss": 0.0815, "step": 31044 }, { "epoch": 0.6840855630291912, "grad_norm": 0.5081951022148132, "learning_rate": 7.189762966960948e-06, "loss": 0.071, "step": 31045 }, { "epoch": 0.6841075983187074, "grad_norm": 0.5907586216926575, "learning_rate": 7.188849016967933e-06, "loss": 0.0706, "step": 31046 }, { "epoch": 0.6841296336082235, "grad_norm": 0.7181195020675659, "learning_rate": 7.18793510676131e-06, "loss": 0.0731, "step": 31047 }, { "epoch": 0.6841516688977397, "grad_norm": 0.6269686818122864, "learning_rate": 7.187021236345717e-06, "loss": 0.0585, "step": 31048 }, { "epoch": 0.6841737041872559, "grad_norm": 0.7654528021812439, "learning_rate": 7.186107405725819e-06, "loss": 0.052, "step": 31049 }, { "epoch": 0.684195739476772, "grad_norm": 0.5492909550666809, "learning_rate": 7.185193614906263e-06, "loss": 0.068, "step": 31050 }, { "epoch": 0.6842177747662882, "grad_norm": 0.5414223074913025, "learning_rate": 7.184279863891705e-06, "loss": 0.05, "step": 31051 }, { "epoch": 0.6842398100558044, "grad_norm": 0.33428847789764404, "learning_rate": 7.183366152686807e-06, "loss": 0.0517, "step": 31052 }, { "epoch": 0.6842618453453205, "grad_norm": 0.6691286563873291, "learning_rate": 7.18245248129621e-06, "loss": 0.0611, "step": 31053 }, { "epoch": 0.6842838806348367, "grad_norm": 0.4487234055995941, "learning_rate": 7.181538849724576e-06, "loss": 0.0769, "step": 31054 }, { "epoch": 0.6843059159243529, "grad_norm": 0.6189378499984741, "learning_rate": 7.180625257976556e-06, "loss": 0.0746, "step": 31055 }, { "epoch": 0.684327951213869, "grad_norm": 0.7719473838806152, "learning_rate": 7.1797117060568105e-06, "loss": 0.1089, "step": 31056 }, { "epoch": 0.6843499865033852, "grad_norm": 0.6055682897567749, "learning_rate": 7.178798193969982e-06, "loss": 0.1016, "step": 31057 }, { "epoch": 0.6843720217929014, "grad_norm": 0.8943414092063904, "learning_rate": 7.177884721720726e-06, "loss": 0.0727, "step": 31058 }, { "epoch": 0.6843940570824175, "grad_norm": 0.7046418190002441, "learning_rate": 7.1769712893137075e-06, "loss": 0.0435, "step": 31059 }, { "epoch": 0.6844160923719337, "grad_norm": 0.5844194889068604, "learning_rate": 7.176057896753561e-06, "loss": 0.0625, "step": 31060 }, { "epoch": 0.6844381276614498, "grad_norm": 0.4831179678440094, "learning_rate": 7.175144544044946e-06, "loss": 0.0391, "step": 31061 }, { "epoch": 0.684460162950966, "grad_norm": 0.8060910105705261, "learning_rate": 7.174231231192518e-06, "loss": 0.0835, "step": 31062 }, { "epoch": 0.6844821982404822, "grad_norm": 0.24166302382946014, "learning_rate": 7.173317958200931e-06, "loss": 0.0543, "step": 31063 }, { "epoch": 0.6845042335299983, "grad_norm": 0.7246116995811462, "learning_rate": 7.172404725074829e-06, "loss": 0.0724, "step": 31064 }, { "epoch": 0.6845262688195145, "grad_norm": 0.6575214862823486, "learning_rate": 7.1714915318188655e-06, "loss": 0.0456, "step": 31065 }, { "epoch": 0.6845483041090306, "grad_norm": 0.8864882588386536, "learning_rate": 7.1705783784377e-06, "loss": 0.0682, "step": 31066 }, { "epoch": 0.6845703393985467, "grad_norm": 0.4032839834690094, "learning_rate": 7.169665264935971e-06, "loss": 0.0497, "step": 31067 }, { "epoch": 0.6845923746880629, "grad_norm": 0.6087478399276733, "learning_rate": 7.168752191318342e-06, "loss": 0.0687, "step": 31068 }, { "epoch": 0.684614409977579, "grad_norm": 0.4072495400905609, "learning_rate": 7.167839157589447e-06, "loss": 0.0377, "step": 31069 }, { "epoch": 0.6846364452670952, "grad_norm": 0.3970128297805786, "learning_rate": 7.166926163753956e-06, "loss": 0.069, "step": 31070 }, { "epoch": 0.6846584805566114, "grad_norm": 0.462856650352478, "learning_rate": 7.166013209816508e-06, "loss": 0.0726, "step": 31071 }, { "epoch": 0.6846805158461275, "grad_norm": 0.9259478449821472, "learning_rate": 7.165100295781758e-06, "loss": 0.0603, "step": 31072 }, { "epoch": 0.6847025511356437, "grad_norm": 0.6066498160362244, "learning_rate": 7.16418742165435e-06, "loss": 0.0763, "step": 31073 }, { "epoch": 0.6847245864251599, "grad_norm": 0.4514473080635071, "learning_rate": 7.1632745874389375e-06, "loss": 0.0586, "step": 31074 }, { "epoch": 0.684746621714676, "grad_norm": 0.6000154614448547, "learning_rate": 7.162361793140174e-06, "loss": 0.0692, "step": 31075 }, { "epoch": 0.6847686570041922, "grad_norm": 0.6329653263092041, "learning_rate": 7.161449038762701e-06, "loss": 0.0787, "step": 31076 }, { "epoch": 0.6847906922937084, "grad_norm": 0.7474121451377869, "learning_rate": 7.16053632431117e-06, "loss": 0.0625, "step": 31077 }, { "epoch": 0.6848127275832245, "grad_norm": 0.7849804759025574, "learning_rate": 7.1596236497902315e-06, "loss": 0.0714, "step": 31078 }, { "epoch": 0.6848347628727407, "grad_norm": 0.5564764738082886, "learning_rate": 7.158711015204541e-06, "loss": 0.0873, "step": 31079 }, { "epoch": 0.6848567981622569, "grad_norm": 0.7370741367340088, "learning_rate": 7.157798420558732e-06, "loss": 0.0408, "step": 31080 }, { "epoch": 0.684878833451773, "grad_norm": 0.47588974237442017, "learning_rate": 7.156885865857462e-06, "loss": 0.0404, "step": 31081 }, { "epoch": 0.6849008687412892, "grad_norm": 1.0043400526046753, "learning_rate": 7.1559733511053855e-06, "loss": 0.0754, "step": 31082 }, { "epoch": 0.6849229040308054, "grad_norm": 0.7073840498924255, "learning_rate": 7.155060876307137e-06, "loss": 0.0699, "step": 31083 }, { "epoch": 0.6849449393203215, "grad_norm": 0.7234665155410767, "learning_rate": 7.154148441467368e-06, "loss": 0.0839, "step": 31084 }, { "epoch": 0.6849669746098377, "grad_norm": 0.7547352910041809, "learning_rate": 7.15323604659073e-06, "loss": 0.0577, "step": 31085 }, { "epoch": 0.6849890098993539, "grad_norm": 0.48606160283088684, "learning_rate": 7.152323691681873e-06, "loss": 0.0564, "step": 31086 }, { "epoch": 0.68501104518887, "grad_norm": 0.45454636216163635, "learning_rate": 7.151411376745435e-06, "loss": 0.0557, "step": 31087 }, { "epoch": 0.6850330804783862, "grad_norm": 0.5312095880508423, "learning_rate": 7.150499101786069e-06, "loss": 0.055, "step": 31088 }, { "epoch": 0.6850551157679023, "grad_norm": 0.5790910720825195, "learning_rate": 7.149586866808423e-06, "loss": 0.0631, "step": 31089 }, { "epoch": 0.6850771510574185, "grad_norm": 0.8628585338592529, "learning_rate": 7.148674671817138e-06, "loss": 0.095, "step": 31090 }, { "epoch": 0.6850991863469346, "grad_norm": 0.34839242696762085, "learning_rate": 7.147762516816867e-06, "loss": 0.0351, "step": 31091 }, { "epoch": 0.6851212216364507, "grad_norm": 0.698123037815094, "learning_rate": 7.146850401812241e-06, "loss": 0.0845, "step": 31092 }, { "epoch": 0.6851432569259669, "grad_norm": 0.42791345715522766, "learning_rate": 7.14593832680793e-06, "loss": 0.099, "step": 31093 }, { "epoch": 0.6851652922154831, "grad_norm": 0.7128629684448242, "learning_rate": 7.14502629180856e-06, "loss": 0.0692, "step": 31094 }, { "epoch": 0.6851873275049992, "grad_norm": 0.3387201726436615, "learning_rate": 7.144114296818787e-06, "loss": 0.0574, "step": 31095 }, { "epoch": 0.6852093627945154, "grad_norm": 0.21414093673229218, "learning_rate": 7.1432023418432505e-06, "loss": 0.0357, "step": 31096 }, { "epoch": 0.6852313980840316, "grad_norm": 1.0315059423446655, "learning_rate": 7.1422904268865945e-06, "loss": 0.0976, "step": 31097 }, { "epoch": 0.6852534333735477, "grad_norm": 0.6114830374717712, "learning_rate": 7.141378551953473e-06, "loss": 0.0724, "step": 31098 }, { "epoch": 0.6852754686630639, "grad_norm": 0.5000482797622681, "learning_rate": 7.140466717048521e-06, "loss": 0.0617, "step": 31099 }, { "epoch": 0.68529750395258, "grad_norm": 0.5365231037139893, "learning_rate": 7.139554922176385e-06, "loss": 0.077, "step": 31100 }, { "epoch": 0.6853195392420962, "grad_norm": 0.6777381896972656, "learning_rate": 7.138643167341712e-06, "loss": 0.0841, "step": 31101 }, { "epoch": 0.6853415745316124, "grad_norm": 0.6538597345352173, "learning_rate": 7.13773145254915e-06, "loss": 0.0719, "step": 31102 }, { "epoch": 0.6853636098211285, "grad_norm": 0.5383036732673645, "learning_rate": 7.136819777803327e-06, "loss": 0.0742, "step": 31103 }, { "epoch": 0.6853856451106447, "grad_norm": 0.3250507712364197, "learning_rate": 7.135908143108908e-06, "loss": 0.0508, "step": 31104 }, { "epoch": 0.6854076804001609, "grad_norm": 0.6366339325904846, "learning_rate": 7.134996548470527e-06, "loss": 0.0585, "step": 31105 }, { "epoch": 0.685429715689677, "grad_norm": 0.612511932849884, "learning_rate": 7.134084993892819e-06, "loss": 0.0642, "step": 31106 }, { "epoch": 0.6854517509791932, "grad_norm": 0.5794296860694885, "learning_rate": 7.1331734793804345e-06, "loss": 0.0541, "step": 31107 }, { "epoch": 0.6854737862687094, "grad_norm": 0.7649654150009155, "learning_rate": 7.132262004938016e-06, "loss": 0.085, "step": 31108 }, { "epoch": 0.6854958215582255, "grad_norm": 0.3746389150619507, "learning_rate": 7.1313505705702125e-06, "loss": 0.0537, "step": 31109 }, { "epoch": 0.6855178568477417, "grad_norm": 0.5558607578277588, "learning_rate": 7.130439176281654e-06, "loss": 0.068, "step": 31110 }, { "epoch": 0.6855398921372579, "grad_norm": 0.5055269002914429, "learning_rate": 7.129527822076988e-06, "loss": 0.0716, "step": 31111 }, { "epoch": 0.685561927426774, "grad_norm": 0.7455052137374878, "learning_rate": 7.128616507960859e-06, "loss": 0.075, "step": 31112 }, { "epoch": 0.6855839627162902, "grad_norm": 0.6245713233947754, "learning_rate": 7.127705233937912e-06, "loss": 0.059, "step": 31113 }, { "epoch": 0.6856059980058063, "grad_norm": 0.7608966827392578, "learning_rate": 7.126794000012778e-06, "loss": 0.0697, "step": 31114 }, { "epoch": 0.6856280332953225, "grad_norm": 0.9496463537216187, "learning_rate": 7.125882806190103e-06, "loss": 0.0502, "step": 31115 }, { "epoch": 0.6856500685848386, "grad_norm": 0.49192187190055847, "learning_rate": 7.124971652474536e-06, "loss": 0.0552, "step": 31116 }, { "epoch": 0.6856721038743547, "grad_norm": 0.8918130397796631, "learning_rate": 7.1240605388707066e-06, "loss": 0.0917, "step": 31117 }, { "epoch": 0.6856941391638709, "grad_norm": 0.5841414332389832, "learning_rate": 7.1231494653832636e-06, "loss": 0.0661, "step": 31118 }, { "epoch": 0.6857161744533871, "grad_norm": 0.7138931751251221, "learning_rate": 7.1222384320168355e-06, "loss": 0.0981, "step": 31119 }, { "epoch": 0.6857382097429032, "grad_norm": 0.9081428050994873, "learning_rate": 7.12132743877608e-06, "loss": 0.0478, "step": 31120 }, { "epoch": 0.6857602450324194, "grad_norm": 0.6075642108917236, "learning_rate": 7.120416485665626e-06, "loss": 0.0809, "step": 31121 }, { "epoch": 0.6857822803219356, "grad_norm": 0.8623992800712585, "learning_rate": 7.11950557269012e-06, "loss": 0.064, "step": 31122 }, { "epoch": 0.6858043156114517, "grad_norm": 0.4479347765445709, "learning_rate": 7.118594699854192e-06, "loss": 0.0545, "step": 31123 }, { "epoch": 0.6858263509009679, "grad_norm": 0.5456404089927673, "learning_rate": 7.117683867162489e-06, "loss": 0.0521, "step": 31124 }, { "epoch": 0.685848386190484, "grad_norm": 0.6933543682098389, "learning_rate": 7.116773074619654e-06, "loss": 0.0813, "step": 31125 }, { "epoch": 0.6858704214800002, "grad_norm": 0.4873257875442505, "learning_rate": 7.115862322230309e-06, "loss": 0.0483, "step": 31126 }, { "epoch": 0.6858924567695164, "grad_norm": 0.5887467265129089, "learning_rate": 7.114951609999118e-06, "loss": 0.0651, "step": 31127 }, { "epoch": 0.6859144920590325, "grad_norm": 0.4836407005786896, "learning_rate": 7.1140409379307e-06, "loss": 0.0408, "step": 31128 }, { "epoch": 0.6859365273485487, "grad_norm": 0.6375092267990112, "learning_rate": 7.113130306029706e-06, "loss": 0.0598, "step": 31129 }, { "epoch": 0.6859585626380649, "grad_norm": 0.6281035542488098, "learning_rate": 7.112219714300763e-06, "loss": 0.0827, "step": 31130 }, { "epoch": 0.685980597927581, "grad_norm": 0.7666551470756531, "learning_rate": 7.111309162748514e-06, "loss": 0.0929, "step": 31131 }, { "epoch": 0.6860026332170972, "grad_norm": 0.47518953680992126, "learning_rate": 7.110398651377604e-06, "loss": 0.0579, "step": 31132 }, { "epoch": 0.6860246685066134, "grad_norm": 0.5353682041168213, "learning_rate": 7.109488180192659e-06, "loss": 0.0612, "step": 31133 }, { "epoch": 0.6860467037961295, "grad_norm": 0.7050542831420898, "learning_rate": 7.108577749198321e-06, "loss": 0.0489, "step": 31134 }, { "epoch": 0.6860687390856457, "grad_norm": 0.22659708559513092, "learning_rate": 7.107667358399229e-06, "loss": 0.0467, "step": 31135 }, { "epoch": 0.6860907743751619, "grad_norm": 0.5595171451568604, "learning_rate": 7.106757007800026e-06, "loss": 0.0679, "step": 31136 }, { "epoch": 0.686112809664678, "grad_norm": 0.6088038682937622, "learning_rate": 7.105846697405335e-06, "loss": 0.0844, "step": 31137 }, { "epoch": 0.6861348449541942, "grad_norm": 0.764397144317627, "learning_rate": 7.1049364272198004e-06, "loss": 0.0758, "step": 31138 }, { "epoch": 0.6861568802437104, "grad_norm": 0.4766726493835449, "learning_rate": 7.104026197248065e-06, "loss": 0.0577, "step": 31139 }, { "epoch": 0.6861789155332264, "grad_norm": 0.728160560131073, "learning_rate": 7.103116007494751e-06, "loss": 0.0545, "step": 31140 }, { "epoch": 0.6862009508227426, "grad_norm": 0.4181242287158966, "learning_rate": 7.102205857964509e-06, "loss": 0.0541, "step": 31141 }, { "epoch": 0.6862229861122587, "grad_norm": 0.9015403389930725, "learning_rate": 7.101295748661955e-06, "loss": 0.0715, "step": 31142 }, { "epoch": 0.6862450214017749, "grad_norm": 0.24850143492221832, "learning_rate": 7.100385679591749e-06, "loss": 0.0431, "step": 31143 }, { "epoch": 0.6862670566912911, "grad_norm": 1.0454299449920654, "learning_rate": 7.0994756507585095e-06, "loss": 0.0951, "step": 31144 }, { "epoch": 0.6862890919808072, "grad_norm": 0.9103071689605713, "learning_rate": 7.0985656621668835e-06, "loss": 0.0853, "step": 31145 }, { "epoch": 0.6863111272703234, "grad_norm": 0.5737603306770325, "learning_rate": 7.097655713821491e-06, "loss": 0.1022, "step": 31146 }, { "epoch": 0.6863331625598396, "grad_norm": 0.5350738763809204, "learning_rate": 7.096745805726979e-06, "loss": 0.0693, "step": 31147 }, { "epoch": 0.6863551978493557, "grad_norm": 0.6911018490791321, "learning_rate": 7.095835937887982e-06, "loss": 0.0666, "step": 31148 }, { "epoch": 0.6863772331388719, "grad_norm": 0.4761626422405243, "learning_rate": 7.094926110309123e-06, "loss": 0.0783, "step": 31149 }, { "epoch": 0.686399268428388, "grad_norm": 0.32494670152664185, "learning_rate": 7.094016322995052e-06, "loss": 0.0323, "step": 31150 }, { "epoch": 0.6864213037179042, "grad_norm": 0.2763456404209137, "learning_rate": 7.093106575950392e-06, "loss": 0.0335, "step": 31151 }, { "epoch": 0.6864433390074204, "grad_norm": 0.6773032546043396, "learning_rate": 7.092196869179785e-06, "loss": 0.0602, "step": 31152 }, { "epoch": 0.6864653742969365, "grad_norm": 0.49296829104423523, "learning_rate": 7.0912872026878536e-06, "loss": 0.0678, "step": 31153 }, { "epoch": 0.6864874095864527, "grad_norm": 0.47513341903686523, "learning_rate": 7.0903775764792376e-06, "loss": 0.057, "step": 31154 }, { "epoch": 0.6865094448759689, "grad_norm": 0.7265848517417908, "learning_rate": 7.089467990558578e-06, "loss": 0.0988, "step": 31155 }, { "epoch": 0.686531480165485, "grad_norm": 0.871627926826477, "learning_rate": 7.088558444930493e-06, "loss": 0.0684, "step": 31156 }, { "epoch": 0.6865535154550012, "grad_norm": 0.434648722410202, "learning_rate": 7.087648939599621e-06, "loss": 0.0542, "step": 31157 }, { "epoch": 0.6865755507445174, "grad_norm": 0.8903284072875977, "learning_rate": 7.086739474570596e-06, "loss": 0.0919, "step": 31158 }, { "epoch": 0.6865975860340335, "grad_norm": 0.8035737872123718, "learning_rate": 7.085830049848058e-06, "loss": 0.0621, "step": 31159 }, { "epoch": 0.6866196213235497, "grad_norm": 0.6196966767311096, "learning_rate": 7.084920665436624e-06, "loss": 0.0545, "step": 31160 }, { "epoch": 0.6866416566130659, "grad_norm": 0.46251559257507324, "learning_rate": 7.084011321340934e-06, "loss": 0.0425, "step": 31161 }, { "epoch": 0.686663691902582, "grad_norm": 0.41504740715026855, "learning_rate": 7.083102017565625e-06, "loss": 0.0678, "step": 31162 }, { "epoch": 0.6866857271920982, "grad_norm": 0.7488622069358826, "learning_rate": 7.082192754115317e-06, "loss": 0.0709, "step": 31163 }, { "epoch": 0.6867077624816144, "grad_norm": 0.5746570229530334, "learning_rate": 7.081283530994652e-06, "loss": 0.0608, "step": 31164 }, { "epoch": 0.6867297977711304, "grad_norm": 0.6809127330780029, "learning_rate": 7.080374348208247e-06, "loss": 0.0614, "step": 31165 }, { "epoch": 0.6867518330606466, "grad_norm": 0.37188994884490967, "learning_rate": 7.079465205760752e-06, "loss": 0.0592, "step": 31166 }, { "epoch": 0.6867738683501627, "grad_norm": 0.695410966873169, "learning_rate": 7.078556103656782e-06, "loss": 0.0629, "step": 31167 }, { "epoch": 0.6867959036396789, "grad_norm": 0.5753129720687866, "learning_rate": 7.07764704190098e-06, "loss": 0.0734, "step": 31168 }, { "epoch": 0.6868179389291951, "grad_norm": 0.4263650178909302, "learning_rate": 7.0767380204979645e-06, "loss": 0.0547, "step": 31169 }, { "epoch": 0.6868399742187112, "grad_norm": 0.8690373301506042, "learning_rate": 7.07582903945237e-06, "loss": 0.1055, "step": 31170 }, { "epoch": 0.6868620095082274, "grad_norm": 0.4138891398906708, "learning_rate": 7.074920098768834e-06, "loss": 0.0329, "step": 31171 }, { "epoch": 0.6868840447977436, "grad_norm": 0.3510177433490753, "learning_rate": 7.07401119845197e-06, "loss": 0.0482, "step": 31172 }, { "epoch": 0.6869060800872597, "grad_norm": 0.9890276193618774, "learning_rate": 7.0731023385064265e-06, "loss": 0.0754, "step": 31173 }, { "epoch": 0.6869281153767759, "grad_norm": 0.47238391637802124, "learning_rate": 7.072193518936818e-06, "loss": 0.054, "step": 31174 }, { "epoch": 0.6869501506662921, "grad_norm": 0.6079242825508118, "learning_rate": 7.071284739747785e-06, "loss": 0.0724, "step": 31175 }, { "epoch": 0.6869721859558082, "grad_norm": 0.5248685479164124, "learning_rate": 7.070376000943945e-06, "loss": 0.0684, "step": 31176 }, { "epoch": 0.6869942212453244, "grad_norm": 0.6132646799087524, "learning_rate": 7.069467302529931e-06, "loss": 0.084, "step": 31177 }, { "epoch": 0.6870162565348406, "grad_norm": 0.6802740097045898, "learning_rate": 7.06855864451038e-06, "loss": 0.1035, "step": 31178 }, { "epoch": 0.6870382918243567, "grad_norm": 0.40643104910850525, "learning_rate": 7.067650026889907e-06, "loss": 0.0814, "step": 31179 }, { "epoch": 0.6870603271138729, "grad_norm": 0.922292947769165, "learning_rate": 7.066741449673147e-06, "loss": 0.0932, "step": 31180 }, { "epoch": 0.687082362403389, "grad_norm": 0.47027111053466797, "learning_rate": 7.065832912864725e-06, "loss": 0.0531, "step": 31181 }, { "epoch": 0.6871043976929052, "grad_norm": 0.7262096405029297, "learning_rate": 7.064924416469277e-06, "loss": 0.0479, "step": 31182 }, { "epoch": 0.6871264329824214, "grad_norm": 0.4762721061706543, "learning_rate": 7.064015960491419e-06, "loss": 0.0521, "step": 31183 }, { "epoch": 0.6871484682719375, "grad_norm": 0.718636155128479, "learning_rate": 7.063107544935784e-06, "loss": 0.0453, "step": 31184 }, { "epoch": 0.6871705035614537, "grad_norm": 0.2655630111694336, "learning_rate": 7.062199169807002e-06, "loss": 0.0488, "step": 31185 }, { "epoch": 0.6871925388509699, "grad_norm": 0.4616703987121582, "learning_rate": 7.06129083510969e-06, "loss": 0.0511, "step": 31186 }, { "epoch": 0.687214574140486, "grad_norm": 0.8234648108482361, "learning_rate": 7.06038254084849e-06, "loss": 0.0524, "step": 31187 }, { "epoch": 0.6872366094300022, "grad_norm": 1.0211368799209595, "learning_rate": 7.059474287028006e-06, "loss": 0.1065, "step": 31188 }, { "epoch": 0.6872586447195184, "grad_norm": 0.9087245464324951, "learning_rate": 7.058566073652888e-06, "loss": 0.0542, "step": 31189 }, { "epoch": 0.6872806800090344, "grad_norm": 0.7844052314758301, "learning_rate": 7.057657900727747e-06, "loss": 0.0679, "step": 31190 }, { "epoch": 0.6873027152985506, "grad_norm": 0.43763023614883423, "learning_rate": 7.056749768257219e-06, "loss": 0.0854, "step": 31191 }, { "epoch": 0.6873247505880667, "grad_norm": 0.4483737051486969, "learning_rate": 7.055841676245917e-06, "loss": 0.0635, "step": 31192 }, { "epoch": 0.6873467858775829, "grad_norm": 0.3602924942970276, "learning_rate": 7.054933624698474e-06, "loss": 0.0575, "step": 31193 }, { "epoch": 0.6873688211670991, "grad_norm": 0.7007876634597778, "learning_rate": 7.054025613619521e-06, "loss": 0.0572, "step": 31194 }, { "epoch": 0.6873908564566152, "grad_norm": 0.7011466026306152, "learning_rate": 7.053117643013665e-06, "loss": 0.08, "step": 31195 }, { "epoch": 0.6874128917461314, "grad_norm": 0.6379483342170715, "learning_rate": 7.052209712885553e-06, "loss": 0.0695, "step": 31196 }, { "epoch": 0.6874349270356476, "grad_norm": 0.5340657830238342, "learning_rate": 7.051301823239794e-06, "loss": 0.0766, "step": 31197 }, { "epoch": 0.6874569623251637, "grad_norm": 0.7448093295097351, "learning_rate": 7.050393974081022e-06, "loss": 0.0412, "step": 31198 }, { "epoch": 0.6874789976146799, "grad_norm": 0.6148877143859863, "learning_rate": 7.0494861654138515e-06, "loss": 0.0759, "step": 31199 }, { "epoch": 0.6875010329041961, "grad_norm": 0.675053060054779, "learning_rate": 7.0485783972429105e-06, "loss": 0.0728, "step": 31200 }, { "epoch": 0.6875230681937122, "grad_norm": 0.6515694260597229, "learning_rate": 7.0476706695728294e-06, "loss": 0.0745, "step": 31201 }, { "epoch": 0.6875451034832284, "grad_norm": 0.7416191101074219, "learning_rate": 7.046762982408222e-06, "loss": 0.0897, "step": 31202 }, { "epoch": 0.6875671387727446, "grad_norm": 0.353882759809494, "learning_rate": 7.045855335753713e-06, "loss": 0.0597, "step": 31203 }, { "epoch": 0.6875891740622607, "grad_norm": 0.6296254992485046, "learning_rate": 7.044947729613929e-06, "loss": 0.0616, "step": 31204 }, { "epoch": 0.6876112093517769, "grad_norm": 0.632436990737915, "learning_rate": 7.0440401639935e-06, "loss": 0.0776, "step": 31205 }, { "epoch": 0.687633244641293, "grad_norm": 1.113034963607788, "learning_rate": 7.043132638897033e-06, "loss": 0.082, "step": 31206 }, { "epoch": 0.6876552799308092, "grad_norm": 0.4352814257144928, "learning_rate": 7.042225154329158e-06, "loss": 0.0581, "step": 31207 }, { "epoch": 0.6876773152203254, "grad_norm": 0.6768476366996765, "learning_rate": 7.041317710294504e-06, "loss": 0.0977, "step": 31208 }, { "epoch": 0.6876993505098415, "grad_norm": 0.5041972994804382, "learning_rate": 7.040410306797681e-06, "loss": 0.0638, "step": 31209 }, { "epoch": 0.6877213857993577, "grad_norm": 0.7839303016662598, "learning_rate": 7.0395029438433216e-06, "loss": 0.0705, "step": 31210 }, { "epoch": 0.6877434210888739, "grad_norm": 0.5483207106590271, "learning_rate": 7.038595621436032e-06, "loss": 0.0678, "step": 31211 }, { "epoch": 0.68776545637839, "grad_norm": 0.29857155680656433, "learning_rate": 7.0376883395804544e-06, "loss": 0.0492, "step": 31212 }, { "epoch": 0.6877874916679062, "grad_norm": 0.5141153931617737, "learning_rate": 7.0367810982811946e-06, "loss": 0.0451, "step": 31213 }, { "epoch": 0.6878095269574223, "grad_norm": 0.814469575881958, "learning_rate": 7.035873897542878e-06, "loss": 0.1099, "step": 31214 }, { "epoch": 0.6878315622469384, "grad_norm": 0.6891950368881226, "learning_rate": 7.034966737370129e-06, "loss": 0.0693, "step": 31215 }, { "epoch": 0.6878535975364546, "grad_norm": 0.7906405925750732, "learning_rate": 7.034059617767569e-06, "loss": 0.035, "step": 31216 }, { "epoch": 0.6878756328259708, "grad_norm": 0.7751676440238953, "learning_rate": 7.033152538739816e-06, "loss": 0.1024, "step": 31217 }, { "epoch": 0.6878976681154869, "grad_norm": 0.7869545221328735, "learning_rate": 7.032245500291477e-06, "loss": 0.0774, "step": 31218 }, { "epoch": 0.6879197034050031, "grad_norm": 0.5643525719642639, "learning_rate": 7.031338502427197e-06, "loss": 0.0395, "step": 31219 }, { "epoch": 0.6879417386945192, "grad_norm": 0.6837323904037476, "learning_rate": 7.0304315451515774e-06, "loss": 0.0704, "step": 31220 }, { "epoch": 0.6879637739840354, "grad_norm": 0.6929269433021545, "learning_rate": 7.029524628469248e-06, "loss": 0.0578, "step": 31221 }, { "epoch": 0.6879858092735516, "grad_norm": 0.6508055925369263, "learning_rate": 7.028617752384813e-06, "loss": 0.0603, "step": 31222 }, { "epoch": 0.6880078445630677, "grad_norm": 0.1823742687702179, "learning_rate": 7.027710916902913e-06, "loss": 0.0484, "step": 31223 }, { "epoch": 0.6880298798525839, "grad_norm": 0.5058918595314026, "learning_rate": 7.026804122028152e-06, "loss": 0.072, "step": 31224 }, { "epoch": 0.6880519151421001, "grad_norm": 0.5884968638420105, "learning_rate": 7.025897367765158e-06, "loss": 0.072, "step": 31225 }, { "epoch": 0.6880739504316162, "grad_norm": 0.613691508769989, "learning_rate": 7.02499065411854e-06, "loss": 0.1048, "step": 31226 }, { "epoch": 0.6880959857211324, "grad_norm": 0.5299010872840881, "learning_rate": 7.024083981092921e-06, "loss": 0.0888, "step": 31227 }, { "epoch": 0.6881180210106486, "grad_norm": 0.7725710868835449, "learning_rate": 7.023177348692924e-06, "loss": 0.0626, "step": 31228 }, { "epoch": 0.6881400563001647, "grad_norm": 0.6948776841163635, "learning_rate": 7.0222707569231565e-06, "loss": 0.1067, "step": 31229 }, { "epoch": 0.6881620915896809, "grad_norm": 0.6731216311454773, "learning_rate": 7.021364205788243e-06, "loss": 0.088, "step": 31230 }, { "epoch": 0.6881841268791971, "grad_norm": 0.5877755880355835, "learning_rate": 7.020457695292801e-06, "loss": 0.0625, "step": 31231 }, { "epoch": 0.6882061621687132, "grad_norm": 0.6919511556625366, "learning_rate": 7.01955122544145e-06, "loss": 0.0805, "step": 31232 }, { "epoch": 0.6882281974582294, "grad_norm": 0.5615858435630798, "learning_rate": 7.018644796238801e-06, "loss": 0.0869, "step": 31233 }, { "epoch": 0.6882502327477455, "grad_norm": 0.6644147038459778, "learning_rate": 7.017738407689472e-06, "loss": 0.099, "step": 31234 }, { "epoch": 0.6882722680372617, "grad_norm": 0.5124117136001587, "learning_rate": 7.016832059798089e-06, "loss": 0.0495, "step": 31235 }, { "epoch": 0.6882943033267779, "grad_norm": 0.5912358164787292, "learning_rate": 7.015925752569253e-06, "loss": 0.045, "step": 31236 }, { "epoch": 0.688316338616294, "grad_norm": 0.3724384307861328, "learning_rate": 7.0150194860075915e-06, "loss": 0.0817, "step": 31237 }, { "epoch": 0.6883383739058102, "grad_norm": 0.621480405330658, "learning_rate": 7.014113260117715e-06, "loss": 0.0494, "step": 31238 }, { "epoch": 0.6883604091953263, "grad_norm": 1.1497794389724731, "learning_rate": 7.013207074904249e-06, "loss": 0.1105, "step": 31239 }, { "epoch": 0.6883824444848424, "grad_norm": 0.7820994257926941, "learning_rate": 7.012300930371797e-06, "loss": 0.1223, "step": 31240 }, { "epoch": 0.6884044797743586, "grad_norm": 0.7608494162559509, "learning_rate": 7.01139482652498e-06, "loss": 0.0745, "step": 31241 }, { "epoch": 0.6884265150638748, "grad_norm": 0.62180495262146, "learning_rate": 7.010488763368417e-06, "loss": 0.0825, "step": 31242 }, { "epoch": 0.6884485503533909, "grad_norm": 0.8374338150024414, "learning_rate": 7.009582740906715e-06, "loss": 0.0729, "step": 31243 }, { "epoch": 0.6884705856429071, "grad_norm": 0.7219060063362122, "learning_rate": 7.0086767591444975e-06, "loss": 0.0538, "step": 31244 }, { "epoch": 0.6884926209324232, "grad_norm": 0.6569474339485168, "learning_rate": 7.007770818086364e-06, "loss": 0.0593, "step": 31245 }, { "epoch": 0.6885146562219394, "grad_norm": 0.7970805168151855, "learning_rate": 7.006864917736951e-06, "loss": 0.0912, "step": 31246 }, { "epoch": 0.6885366915114556, "grad_norm": 0.48200392723083496, "learning_rate": 7.005959058100855e-06, "loss": 0.0606, "step": 31247 }, { "epoch": 0.6885587268009717, "grad_norm": 0.6781254410743713, "learning_rate": 7.005053239182702e-06, "loss": 0.0659, "step": 31248 }, { "epoch": 0.6885807620904879, "grad_norm": 0.8353865742683411, "learning_rate": 7.004147460987094e-06, "loss": 0.0939, "step": 31249 }, { "epoch": 0.6886027973800041, "grad_norm": 0.6373729705810547, "learning_rate": 7.003241723518651e-06, "loss": 0.0628, "step": 31250 }, { "epoch": 0.6886248326695202, "grad_norm": 0.47293010354042053, "learning_rate": 7.002336026781992e-06, "loss": 0.057, "step": 31251 }, { "epoch": 0.6886468679590364, "grad_norm": 0.5128381252288818, "learning_rate": 7.001430370781719e-06, "loss": 0.0684, "step": 31252 }, { "epoch": 0.6886689032485526, "grad_norm": 1.0829510688781738, "learning_rate": 7.0005247555224485e-06, "loss": 0.1019, "step": 31253 }, { "epoch": 0.6886909385380687, "grad_norm": 0.571060836315155, "learning_rate": 6.999619181008797e-06, "loss": 0.0659, "step": 31254 }, { "epoch": 0.6887129738275849, "grad_norm": 0.7259832620620728, "learning_rate": 6.998713647245381e-06, "loss": 0.0633, "step": 31255 }, { "epoch": 0.6887350091171011, "grad_norm": 0.9270591139793396, "learning_rate": 6.9978081542368e-06, "loss": 0.0788, "step": 31256 }, { "epoch": 0.6887570444066172, "grad_norm": 0.3374420404434204, "learning_rate": 6.996902701987674e-06, "loss": 0.0371, "step": 31257 }, { "epoch": 0.6887790796961334, "grad_norm": 0.2285701036453247, "learning_rate": 6.995997290502619e-06, "loss": 0.0271, "step": 31258 }, { "epoch": 0.6888011149856496, "grad_norm": 0.6002838015556335, "learning_rate": 6.9950919197862365e-06, "loss": 0.0487, "step": 31259 }, { "epoch": 0.6888231502751657, "grad_norm": 0.31769421696662903, "learning_rate": 6.994186589843144e-06, "loss": 0.0263, "step": 31260 }, { "epoch": 0.6888451855646819, "grad_norm": 0.6624124050140381, "learning_rate": 6.993281300677952e-06, "loss": 0.0449, "step": 31261 }, { "epoch": 0.688867220854198, "grad_norm": 0.6116151213645935, "learning_rate": 6.9923760522952775e-06, "loss": 0.0522, "step": 31262 }, { "epoch": 0.6888892561437142, "grad_norm": 0.8731264472007751, "learning_rate": 6.991470844699721e-06, "loss": 0.1021, "step": 31263 }, { "epoch": 0.6889112914332303, "grad_norm": 0.6715019941329956, "learning_rate": 6.990565677895896e-06, "loss": 0.0609, "step": 31264 }, { "epoch": 0.6889333267227464, "grad_norm": 0.8158477544784546, "learning_rate": 6.989660551888422e-06, "loss": 0.059, "step": 31265 }, { "epoch": 0.6889553620122626, "grad_norm": 0.6289953589439392, "learning_rate": 6.988755466681897e-06, "loss": 0.0712, "step": 31266 }, { "epoch": 0.6889773973017788, "grad_norm": 0.4205055236816406, "learning_rate": 6.987850422280941e-06, "loss": 0.0648, "step": 31267 }, { "epoch": 0.6889994325912949, "grad_norm": 0.9105555415153503, "learning_rate": 6.986945418690148e-06, "loss": 0.0616, "step": 31268 }, { "epoch": 0.6890214678808111, "grad_norm": 0.5242117047309875, "learning_rate": 6.986040455914151e-06, "loss": 0.0694, "step": 31269 }, { "epoch": 0.6890435031703273, "grad_norm": 0.5705249309539795, "learning_rate": 6.9851355339575406e-06, "loss": 0.0557, "step": 31270 }, { "epoch": 0.6890655384598434, "grad_norm": 0.32178881764411926, "learning_rate": 6.98423065282494e-06, "loss": 0.0539, "step": 31271 }, { "epoch": 0.6890875737493596, "grad_norm": 0.5595095157623291, "learning_rate": 6.9833258125209445e-06, "loss": 0.0443, "step": 31272 }, { "epoch": 0.6891096090388757, "grad_norm": 0.2872827351093292, "learning_rate": 6.98242101305017e-06, "loss": 0.0712, "step": 31273 }, { "epoch": 0.6891316443283919, "grad_norm": 0.5336757898330688, "learning_rate": 6.98151625441723e-06, "loss": 0.0359, "step": 31274 }, { "epoch": 0.6891536796179081, "grad_norm": 0.9106126427650452, "learning_rate": 6.980611536626722e-06, "loss": 0.0831, "step": 31275 }, { "epoch": 0.6891757149074242, "grad_norm": 0.40980643033981323, "learning_rate": 6.979706859683261e-06, "loss": 0.0496, "step": 31276 }, { "epoch": 0.6891977501969404, "grad_norm": 1.034993052482605, "learning_rate": 6.97880222359145e-06, "loss": 0.1121, "step": 31277 }, { "epoch": 0.6892197854864566, "grad_norm": 0.6007599234580994, "learning_rate": 6.97789762835591e-06, "loss": 0.0803, "step": 31278 }, { "epoch": 0.6892418207759727, "grad_norm": 0.756583034992218, "learning_rate": 6.976993073981233e-06, "loss": 0.069, "step": 31279 }, { "epoch": 0.6892638560654889, "grad_norm": 0.629301130771637, "learning_rate": 6.97608856047203e-06, "loss": 0.0529, "step": 31280 }, { "epoch": 0.6892858913550051, "grad_norm": 0.5759776830673218, "learning_rate": 6.975184087832919e-06, "loss": 0.0809, "step": 31281 }, { "epoch": 0.6893079266445212, "grad_norm": 0.6490129828453064, "learning_rate": 6.974279656068492e-06, "loss": 0.0579, "step": 31282 }, { "epoch": 0.6893299619340374, "grad_norm": 0.6070737242698669, "learning_rate": 6.973375265183363e-06, "loss": 0.0481, "step": 31283 }, { "epoch": 0.6893519972235536, "grad_norm": 0.5044678449630737, "learning_rate": 6.972470915182138e-06, "loss": 0.0361, "step": 31284 }, { "epoch": 0.6893740325130697, "grad_norm": 0.549659013748169, "learning_rate": 6.971566606069428e-06, "loss": 0.0662, "step": 31285 }, { "epoch": 0.6893960678025859, "grad_norm": 0.5885997414588928, "learning_rate": 6.97066233784983e-06, "loss": 0.0841, "step": 31286 }, { "epoch": 0.689418103092102, "grad_norm": 0.7324398756027222, "learning_rate": 6.969758110527953e-06, "loss": 0.0739, "step": 31287 }, { "epoch": 0.6894401383816181, "grad_norm": 0.9581459164619446, "learning_rate": 6.9688539241084105e-06, "loss": 0.0681, "step": 31288 }, { "epoch": 0.6894621736711343, "grad_norm": 0.9221123456954956, "learning_rate": 6.967949778595797e-06, "loss": 0.1081, "step": 31289 }, { "epoch": 0.6894842089606504, "grad_norm": 0.40668293833732605, "learning_rate": 6.967045673994727e-06, "loss": 0.0611, "step": 31290 }, { "epoch": 0.6895062442501666, "grad_norm": 0.5905054807662964, "learning_rate": 6.966141610309789e-06, "loss": 0.0681, "step": 31291 }, { "epoch": 0.6895282795396828, "grad_norm": 0.6549076437950134, "learning_rate": 6.965237587545611e-06, "loss": 0.0649, "step": 31292 }, { "epoch": 0.6895503148291989, "grad_norm": 0.7397761940956116, "learning_rate": 6.964333605706784e-06, "loss": 0.0986, "step": 31293 }, { "epoch": 0.6895723501187151, "grad_norm": 0.545621931552887, "learning_rate": 6.963429664797917e-06, "loss": 0.0772, "step": 31294 }, { "epoch": 0.6895943854082313, "grad_norm": 0.6711089611053467, "learning_rate": 6.962525764823609e-06, "loss": 0.0955, "step": 31295 }, { "epoch": 0.6896164206977474, "grad_norm": 0.6493209004402161, "learning_rate": 6.961621905788467e-06, "loss": 0.0663, "step": 31296 }, { "epoch": 0.6896384559872636, "grad_norm": 0.2188836932182312, "learning_rate": 6.960718087697099e-06, "loss": 0.0782, "step": 31297 }, { "epoch": 0.6896604912767798, "grad_norm": 0.6270462870597839, "learning_rate": 6.9598143105541004e-06, "loss": 0.0641, "step": 31298 }, { "epoch": 0.6896825265662959, "grad_norm": 0.46503403782844543, "learning_rate": 6.95891057436408e-06, "loss": 0.0498, "step": 31299 }, { "epoch": 0.6897045618558121, "grad_norm": 0.8318802118301392, "learning_rate": 6.958006879131638e-06, "loss": 0.0965, "step": 31300 }, { "epoch": 0.6897265971453282, "grad_norm": 0.5500221252441406, "learning_rate": 6.957103224861387e-06, "loss": 0.065, "step": 31301 }, { "epoch": 0.6897486324348444, "grad_norm": 0.8443537354469299, "learning_rate": 6.956199611557914e-06, "loss": 0.0732, "step": 31302 }, { "epoch": 0.6897706677243606, "grad_norm": 0.6034025549888611, "learning_rate": 6.9552960392258315e-06, "loss": 0.0326, "step": 31303 }, { "epoch": 0.6897927030138767, "grad_norm": 0.7921522855758667, "learning_rate": 6.954392507869747e-06, "loss": 0.0616, "step": 31304 }, { "epoch": 0.6898147383033929, "grad_norm": 0.19539794325828552, "learning_rate": 6.953489017494249e-06, "loss": 0.0417, "step": 31305 }, { "epoch": 0.6898367735929091, "grad_norm": 0.36451655626296997, "learning_rate": 6.9525855681039465e-06, "loss": 0.0602, "step": 31306 }, { "epoch": 0.6898588088824252, "grad_norm": 0.4618748426437378, "learning_rate": 6.951682159703441e-06, "loss": 0.0473, "step": 31307 }, { "epoch": 0.6898808441719414, "grad_norm": 0.5214496850967407, "learning_rate": 6.950778792297339e-06, "loss": 0.0575, "step": 31308 }, { "epoch": 0.6899028794614576, "grad_norm": 0.8458903431892395, "learning_rate": 6.949875465890231e-06, "loss": 0.0809, "step": 31309 }, { "epoch": 0.6899249147509737, "grad_norm": 0.8001389503479004, "learning_rate": 6.9489721804867265e-06, "loss": 0.0797, "step": 31310 }, { "epoch": 0.6899469500404899, "grad_norm": 0.6147564053535461, "learning_rate": 6.948068936091428e-06, "loss": 0.0607, "step": 31311 }, { "epoch": 0.6899689853300061, "grad_norm": 0.5440802574157715, "learning_rate": 6.947165732708926e-06, "loss": 0.052, "step": 31312 }, { "epoch": 0.6899910206195221, "grad_norm": 0.49793410301208496, "learning_rate": 6.946262570343834e-06, "loss": 0.079, "step": 31313 }, { "epoch": 0.6900130559090383, "grad_norm": 0.5980937480926514, "learning_rate": 6.945359449000734e-06, "loss": 0.0707, "step": 31314 }, { "epoch": 0.6900350911985544, "grad_norm": 0.9966033697128296, "learning_rate": 6.944456368684248e-06, "loss": 0.067, "step": 31315 }, { "epoch": 0.6900571264880706, "grad_norm": 0.579699695110321, "learning_rate": 6.9435533293989605e-06, "loss": 0.0715, "step": 31316 }, { "epoch": 0.6900791617775868, "grad_norm": 0.5059203505516052, "learning_rate": 6.942650331149482e-06, "loss": 0.0765, "step": 31317 }, { "epoch": 0.6901011970671029, "grad_norm": 0.32346561551094055, "learning_rate": 6.9417473739404e-06, "loss": 0.0456, "step": 31318 }, { "epoch": 0.6901232323566191, "grad_norm": 0.6057569980621338, "learning_rate": 6.940844457776319e-06, "loss": 0.0581, "step": 31319 }, { "epoch": 0.6901452676461353, "grad_norm": 0.642245352268219, "learning_rate": 6.939941582661845e-06, "loss": 0.0726, "step": 31320 }, { "epoch": 0.6901673029356514, "grad_norm": 0.5218019485473633, "learning_rate": 6.939038748601566e-06, "loss": 0.054, "step": 31321 }, { "epoch": 0.6901893382251676, "grad_norm": 0.6674692630767822, "learning_rate": 6.938135955600085e-06, "loss": 0.0508, "step": 31322 }, { "epoch": 0.6902113735146838, "grad_norm": 0.6263133883476257, "learning_rate": 6.937233203662e-06, "loss": 0.0844, "step": 31323 }, { "epoch": 0.6902334088041999, "grad_norm": 0.732143759727478, "learning_rate": 6.936330492791916e-06, "loss": 0.0794, "step": 31324 }, { "epoch": 0.6902554440937161, "grad_norm": 0.8266646265983582, "learning_rate": 6.935427822994418e-06, "loss": 0.0724, "step": 31325 }, { "epoch": 0.6902774793832323, "grad_norm": 0.41480752825737, "learning_rate": 6.9345251942741125e-06, "loss": 0.0455, "step": 31326 }, { "epoch": 0.6902995146727484, "grad_norm": 0.7551382184028625, "learning_rate": 6.933622606635595e-06, "loss": 0.0538, "step": 31327 }, { "epoch": 0.6903215499622646, "grad_norm": 0.5064883828163147, "learning_rate": 6.932720060083468e-06, "loss": 0.0581, "step": 31328 }, { "epoch": 0.6903435852517807, "grad_norm": 0.47810399532318115, "learning_rate": 6.931817554622319e-06, "loss": 0.0698, "step": 31329 }, { "epoch": 0.6903656205412969, "grad_norm": 0.8564918041229248, "learning_rate": 6.930915090256748e-06, "loss": 0.0987, "step": 31330 }, { "epoch": 0.6903876558308131, "grad_norm": 0.532663106918335, "learning_rate": 6.93001266699136e-06, "loss": 0.0711, "step": 31331 }, { "epoch": 0.6904096911203292, "grad_norm": 1.0503973960876465, "learning_rate": 6.929110284830741e-06, "loss": 0.0892, "step": 31332 }, { "epoch": 0.6904317264098454, "grad_norm": 0.3372073769569397, "learning_rate": 6.9282079437794885e-06, "loss": 0.0696, "step": 31333 }, { "epoch": 0.6904537616993616, "grad_norm": 0.7144209742546082, "learning_rate": 6.927305643842202e-06, "loss": 0.0601, "step": 31334 }, { "epoch": 0.6904757969888777, "grad_norm": 0.8780449032783508, "learning_rate": 6.926403385023484e-06, "loss": 0.0633, "step": 31335 }, { "epoch": 0.6904978322783939, "grad_norm": 0.7822431325912476, "learning_rate": 6.925501167327916e-06, "loss": 0.0885, "step": 31336 }, { "epoch": 0.6905198675679101, "grad_norm": 0.6698812246322632, "learning_rate": 6.924598990760101e-06, "loss": 0.0702, "step": 31337 }, { "epoch": 0.6905419028574261, "grad_norm": 0.8053914904594421, "learning_rate": 6.92369685532464e-06, "loss": 0.0558, "step": 31338 }, { "epoch": 0.6905639381469423, "grad_norm": 0.6155014634132385, "learning_rate": 6.922794761026114e-06, "loss": 0.0573, "step": 31339 }, { "epoch": 0.6905859734364584, "grad_norm": 0.6423577666282654, "learning_rate": 6.9218927078691325e-06, "loss": 0.0822, "step": 31340 }, { "epoch": 0.6906080087259746, "grad_norm": 0.5941574573516846, "learning_rate": 6.920990695858272e-06, "loss": 0.0605, "step": 31341 }, { "epoch": 0.6906300440154908, "grad_norm": 0.46391594409942627, "learning_rate": 6.920088724998149e-06, "loss": 0.0505, "step": 31342 }, { "epoch": 0.6906520793050069, "grad_norm": 0.5667461156845093, "learning_rate": 6.919186795293342e-06, "loss": 0.0665, "step": 31343 }, { "epoch": 0.6906741145945231, "grad_norm": 0.6223177313804626, "learning_rate": 6.918284906748456e-06, "loss": 0.0849, "step": 31344 }, { "epoch": 0.6906961498840393, "grad_norm": 0.24871081113815308, "learning_rate": 6.917383059368073e-06, "loss": 0.057, "step": 31345 }, { "epoch": 0.6907181851735554, "grad_norm": 0.5070100426673889, "learning_rate": 6.916481253156792e-06, "loss": 0.0678, "step": 31346 }, { "epoch": 0.6907402204630716, "grad_norm": 0.6494457721710205, "learning_rate": 6.915579488119211e-06, "loss": 0.0545, "step": 31347 }, { "epoch": 0.6907622557525878, "grad_norm": 0.8685826659202576, "learning_rate": 6.914677764259908e-06, "loss": 0.0693, "step": 31348 }, { "epoch": 0.6907842910421039, "grad_norm": 0.8789642453193665, "learning_rate": 6.9137760815834995e-06, "loss": 0.0691, "step": 31349 }, { "epoch": 0.6908063263316201, "grad_norm": 0.5808504223823547, "learning_rate": 6.9128744400945585e-06, "loss": 0.0525, "step": 31350 }, { "epoch": 0.6908283616211363, "grad_norm": 0.6240456104278564, "learning_rate": 6.911972839797691e-06, "loss": 0.0761, "step": 31351 }, { "epoch": 0.6908503969106524, "grad_norm": 0.6792502999305725, "learning_rate": 6.911071280697478e-06, "loss": 0.0916, "step": 31352 }, { "epoch": 0.6908724322001686, "grad_norm": 0.4848511815071106, "learning_rate": 6.9101697627985155e-06, "loss": 0.0531, "step": 31353 }, { "epoch": 0.6908944674896847, "grad_norm": 0.4046099781990051, "learning_rate": 6.9092682861054035e-06, "loss": 0.0661, "step": 31354 }, { "epoch": 0.6909165027792009, "grad_norm": 0.44784629344940186, "learning_rate": 6.90836685062272e-06, "loss": 0.0684, "step": 31355 }, { "epoch": 0.6909385380687171, "grad_norm": 0.4120177626609802, "learning_rate": 6.907465456355063e-06, "loss": 0.0299, "step": 31356 }, { "epoch": 0.6909605733582332, "grad_norm": 0.41334763169288635, "learning_rate": 6.906564103307025e-06, "loss": 0.0589, "step": 31357 }, { "epoch": 0.6909826086477494, "grad_norm": 0.4453314244747162, "learning_rate": 6.905662791483201e-06, "loss": 0.0718, "step": 31358 }, { "epoch": 0.6910046439372656, "grad_norm": 0.6798377633094788, "learning_rate": 6.904761520888171e-06, "loss": 0.0911, "step": 31359 }, { "epoch": 0.6910266792267817, "grad_norm": 0.8361218571662903, "learning_rate": 6.903860291526533e-06, "loss": 0.0686, "step": 31360 }, { "epoch": 0.6910487145162979, "grad_norm": 0.5302117466926575, "learning_rate": 6.9029591034028804e-06, "loss": 0.0796, "step": 31361 }, { "epoch": 0.6910707498058141, "grad_norm": 0.6157001256942749, "learning_rate": 6.9020579565217925e-06, "loss": 0.0361, "step": 31362 }, { "epoch": 0.6910927850953301, "grad_norm": 0.5301441550254822, "learning_rate": 6.901156850887873e-06, "loss": 0.0849, "step": 31363 }, { "epoch": 0.6911148203848463, "grad_norm": 0.9209233522415161, "learning_rate": 6.900255786505692e-06, "loss": 0.0848, "step": 31364 }, { "epoch": 0.6911368556743624, "grad_norm": 0.6275431513786316, "learning_rate": 6.899354763379866e-06, "loss": 0.053, "step": 31365 }, { "epoch": 0.6911588909638786, "grad_norm": 0.6473509669303894, "learning_rate": 6.898453781514962e-06, "loss": 0.07, "step": 31366 }, { "epoch": 0.6911809262533948, "grad_norm": 0.5624041557312012, "learning_rate": 6.897552840915583e-06, "loss": 0.0742, "step": 31367 }, { "epoch": 0.6912029615429109, "grad_norm": 0.5363585352897644, "learning_rate": 6.896651941586307e-06, "loss": 0.048, "step": 31368 }, { "epoch": 0.6912249968324271, "grad_norm": 0.6730169057846069, "learning_rate": 6.895751083531728e-06, "loss": 0.0777, "step": 31369 }, { "epoch": 0.6912470321219433, "grad_norm": 0.5413048267364502, "learning_rate": 6.8948502667564416e-06, "loss": 0.0556, "step": 31370 }, { "epoch": 0.6912690674114594, "grad_norm": 0.34772801399230957, "learning_rate": 6.893949491265016e-06, "loss": 0.032, "step": 31371 }, { "epoch": 0.6912911027009756, "grad_norm": 0.6222640872001648, "learning_rate": 6.8930487570620634e-06, "loss": 0.0672, "step": 31372 }, { "epoch": 0.6913131379904918, "grad_norm": 0.6434184908866882, "learning_rate": 6.892148064152156e-06, "loss": 0.0653, "step": 31373 }, { "epoch": 0.6913351732800079, "grad_norm": 0.5477057695388794, "learning_rate": 6.891247412539891e-06, "loss": 0.0587, "step": 31374 }, { "epoch": 0.6913572085695241, "grad_norm": 0.41265833377838135, "learning_rate": 6.8903468022298475e-06, "loss": 0.0812, "step": 31375 }, { "epoch": 0.6913792438590403, "grad_norm": 0.47534096240997314, "learning_rate": 6.889446233226616e-06, "loss": 0.0599, "step": 31376 }, { "epoch": 0.6914012791485564, "grad_norm": 0.644980788230896, "learning_rate": 6.888545705534788e-06, "loss": 0.0635, "step": 31377 }, { "epoch": 0.6914233144380726, "grad_norm": 0.6509073972702026, "learning_rate": 6.887645219158942e-06, "loss": 0.0714, "step": 31378 }, { "epoch": 0.6914453497275888, "grad_norm": 0.6719129681587219, "learning_rate": 6.886744774103669e-06, "loss": 0.0821, "step": 31379 }, { "epoch": 0.6914673850171049, "grad_norm": 0.5952851176261902, "learning_rate": 6.885844370373555e-06, "loss": 0.062, "step": 31380 }, { "epoch": 0.6914894203066211, "grad_norm": 0.7973179817199707, "learning_rate": 6.884944007973193e-06, "loss": 0.094, "step": 31381 }, { "epoch": 0.6915114555961372, "grad_norm": 0.5148652195930481, "learning_rate": 6.884043686907158e-06, "loss": 0.0629, "step": 31382 }, { "epoch": 0.6915334908856534, "grad_norm": 0.5568665862083435, "learning_rate": 6.883143407180039e-06, "loss": 0.0779, "step": 31383 }, { "epoch": 0.6915555261751696, "grad_norm": 0.7375239729881287, "learning_rate": 6.88224316879643e-06, "loss": 0.0753, "step": 31384 }, { "epoch": 0.6915775614646857, "grad_norm": 0.7755690813064575, "learning_rate": 6.881342971760903e-06, "loss": 0.059, "step": 31385 }, { "epoch": 0.6915995967542019, "grad_norm": 0.6235577464103699, "learning_rate": 6.8804428160780555e-06, "loss": 0.0705, "step": 31386 }, { "epoch": 0.691621632043718, "grad_norm": 0.4958173334598541, "learning_rate": 6.879542701752457e-06, "loss": 0.0469, "step": 31387 }, { "epoch": 0.6916436673332341, "grad_norm": 0.5358705520629883, "learning_rate": 6.878642628788712e-06, "loss": 0.0743, "step": 31388 }, { "epoch": 0.6916657026227503, "grad_norm": 0.5339149832725525, "learning_rate": 6.877742597191389e-06, "loss": 0.055, "step": 31389 }, { "epoch": 0.6916877379122665, "grad_norm": 0.9256910681724548, "learning_rate": 6.876842606965085e-06, "loss": 0.0955, "step": 31390 }, { "epoch": 0.6917097732017826, "grad_norm": 0.7827112078666687, "learning_rate": 6.875942658114372e-06, "loss": 0.0771, "step": 31391 }, { "epoch": 0.6917318084912988, "grad_norm": 0.6627365946769714, "learning_rate": 6.8750427506438404e-06, "loss": 0.0531, "step": 31392 }, { "epoch": 0.691753843780815, "grad_norm": 0.5804594159126282, "learning_rate": 6.874142884558078e-06, "loss": 0.0824, "step": 31393 }, { "epoch": 0.6917758790703311, "grad_norm": 0.5198655128479004, "learning_rate": 6.87324305986165e-06, "loss": 0.0546, "step": 31394 }, { "epoch": 0.6917979143598473, "grad_norm": 0.6243919730186462, "learning_rate": 6.872343276559167e-06, "loss": 0.0612, "step": 31395 }, { "epoch": 0.6918199496493634, "grad_norm": 0.3808542490005493, "learning_rate": 6.8714435346551916e-06, "loss": 0.0969, "step": 31396 }, { "epoch": 0.6918419849388796, "grad_norm": 0.3179018199443817, "learning_rate": 6.870543834154318e-06, "loss": 0.0528, "step": 31397 }, { "epoch": 0.6918640202283958, "grad_norm": 0.47540387511253357, "learning_rate": 6.8696441750611195e-06, "loss": 0.0571, "step": 31398 }, { "epoch": 0.6918860555179119, "grad_norm": 0.8310069441795349, "learning_rate": 6.868744557380182e-06, "loss": 0.0608, "step": 31399 }, { "epoch": 0.6919080908074281, "grad_norm": 0.679829478263855, "learning_rate": 6.867844981116095e-06, "loss": 0.0734, "step": 31400 }, { "epoch": 0.6919301260969443, "grad_norm": 0.5916332602500916, "learning_rate": 6.866945446273429e-06, "loss": 0.0486, "step": 31401 }, { "epoch": 0.6919521613864604, "grad_norm": 0.8260795474052429, "learning_rate": 6.866045952856771e-06, "loss": 0.0713, "step": 31402 }, { "epoch": 0.6919741966759766, "grad_norm": 0.5794629454612732, "learning_rate": 6.865146500870702e-06, "loss": 0.0463, "step": 31403 }, { "epoch": 0.6919962319654928, "grad_norm": 0.5840276479721069, "learning_rate": 6.86424709031981e-06, "loss": 0.059, "step": 31404 }, { "epoch": 0.6920182672550089, "grad_norm": 0.47959989309310913, "learning_rate": 6.8633477212086655e-06, "loss": 0.0387, "step": 31405 }, { "epoch": 0.6920403025445251, "grad_norm": 0.4809265732765198, "learning_rate": 6.862448393541854e-06, "loss": 0.0632, "step": 31406 }, { "epoch": 0.6920623378340413, "grad_norm": 0.7524807453155518, "learning_rate": 6.861549107323961e-06, "loss": 0.0982, "step": 31407 }, { "epoch": 0.6920843731235574, "grad_norm": 0.6364306807518005, "learning_rate": 6.860649862559557e-06, "loss": 0.0695, "step": 31408 }, { "epoch": 0.6921064084130736, "grad_norm": 0.5075209736824036, "learning_rate": 6.859750659253236e-06, "loss": 0.0591, "step": 31409 }, { "epoch": 0.6921284437025897, "grad_norm": 0.7028062343597412, "learning_rate": 6.858851497409556e-06, "loss": 0.0656, "step": 31410 }, { "epoch": 0.6921504789921059, "grad_norm": 0.6123467683792114, "learning_rate": 6.857952377033125e-06, "loss": 0.0704, "step": 31411 }, { "epoch": 0.692172514281622, "grad_norm": 0.5190587639808655, "learning_rate": 6.8570532981285e-06, "loss": 0.0605, "step": 31412 }, { "epoch": 0.6921945495711381, "grad_norm": 0.41203179955482483, "learning_rate": 6.856154260700276e-06, "loss": 0.0369, "step": 31413 }, { "epoch": 0.6922165848606543, "grad_norm": 0.9445645213127136, "learning_rate": 6.855255264753021e-06, "loss": 0.0992, "step": 31414 }, { "epoch": 0.6922386201501705, "grad_norm": 0.5965949892997742, "learning_rate": 6.854356310291317e-06, "loss": 0.0794, "step": 31415 }, { "epoch": 0.6922606554396866, "grad_norm": 0.3849155604839325, "learning_rate": 6.853457397319752e-06, "loss": 0.0483, "step": 31416 }, { "epoch": 0.6922826907292028, "grad_norm": 0.5937339663505554, "learning_rate": 6.852558525842884e-06, "loss": 0.0544, "step": 31417 }, { "epoch": 0.692304726018719, "grad_norm": 0.44010162353515625, "learning_rate": 6.851659695865315e-06, "loss": 0.0746, "step": 31418 }, { "epoch": 0.6923267613082351, "grad_norm": 0.6959505677223206, "learning_rate": 6.850760907391608e-06, "loss": 0.0738, "step": 31419 }, { "epoch": 0.6923487965977513, "grad_norm": 0.7525438070297241, "learning_rate": 6.84986216042635e-06, "loss": 0.0492, "step": 31420 }, { "epoch": 0.6923708318872674, "grad_norm": 0.6728693246841431, "learning_rate": 6.848963454974111e-06, "loss": 0.0629, "step": 31421 }, { "epoch": 0.6923928671767836, "grad_norm": 0.536345362663269, "learning_rate": 6.8480647910394705e-06, "loss": 0.0558, "step": 31422 }, { "epoch": 0.6924149024662998, "grad_norm": 0.6437354683876038, "learning_rate": 6.847166168627014e-06, "loss": 0.0681, "step": 31423 }, { "epoch": 0.6924369377558159, "grad_norm": 0.5829907655715942, "learning_rate": 6.846267587741306e-06, "loss": 0.0668, "step": 31424 }, { "epoch": 0.6924589730453321, "grad_norm": 0.5482692122459412, "learning_rate": 6.845369048386929e-06, "loss": 0.0652, "step": 31425 }, { "epoch": 0.6924810083348483, "grad_norm": 0.8636858463287354, "learning_rate": 6.844470550568462e-06, "loss": 0.0746, "step": 31426 }, { "epoch": 0.6925030436243644, "grad_norm": 1.7523404359817505, "learning_rate": 6.843572094290485e-06, "loss": 0.0483, "step": 31427 }, { "epoch": 0.6925250789138806, "grad_norm": 0.7229277491569519, "learning_rate": 6.842673679557562e-06, "loss": 0.0926, "step": 31428 }, { "epoch": 0.6925471142033968, "grad_norm": 0.5773565769195557, "learning_rate": 6.841775306374277e-06, "loss": 0.0773, "step": 31429 }, { "epoch": 0.6925691494929129, "grad_norm": 0.5007243156433105, "learning_rate": 6.840876974745211e-06, "loss": 0.0422, "step": 31430 }, { "epoch": 0.6925911847824291, "grad_norm": 0.5611002445220947, "learning_rate": 6.839978684674928e-06, "loss": 0.0697, "step": 31431 }, { "epoch": 0.6926132200719453, "grad_norm": 0.3886193037033081, "learning_rate": 6.839080436168016e-06, "loss": 0.0306, "step": 31432 }, { "epoch": 0.6926352553614614, "grad_norm": 0.592361330986023, "learning_rate": 6.838182229229032e-06, "loss": 0.0547, "step": 31433 }, { "epoch": 0.6926572906509776, "grad_norm": 0.4524768590927124, "learning_rate": 6.837284063862572e-06, "loss": 0.047, "step": 31434 }, { "epoch": 0.6926793259404938, "grad_norm": 0.8174872994422913, "learning_rate": 6.836385940073198e-06, "loss": 0.0934, "step": 31435 }, { "epoch": 0.6927013612300099, "grad_norm": 1.0361437797546387, "learning_rate": 6.835487857865492e-06, "loss": 0.0947, "step": 31436 }, { "epoch": 0.692723396519526, "grad_norm": 0.9516077637672424, "learning_rate": 6.8345898172440145e-06, "loss": 0.0655, "step": 31437 }, { "epoch": 0.6927454318090421, "grad_norm": 0.3778505325317383, "learning_rate": 6.83369181821336e-06, "loss": 0.0513, "step": 31438 }, { "epoch": 0.6927674670985583, "grad_norm": 0.7866332530975342, "learning_rate": 6.832793860778088e-06, "loss": 0.0705, "step": 31439 }, { "epoch": 0.6927895023880745, "grad_norm": 0.8347243666648865, "learning_rate": 6.831895944942775e-06, "loss": 0.0452, "step": 31440 }, { "epoch": 0.6928115376775906, "grad_norm": 1.0803821086883545, "learning_rate": 6.830998070712003e-06, "loss": 0.0673, "step": 31441 }, { "epoch": 0.6928335729671068, "grad_norm": 0.563373863697052, "learning_rate": 6.830100238090332e-06, "loss": 0.0741, "step": 31442 }, { "epoch": 0.692855608256623, "grad_norm": 0.9161458015441895, "learning_rate": 6.8292024470823476e-06, "loss": 0.0739, "step": 31443 }, { "epoch": 0.6928776435461391, "grad_norm": 0.6762645840644836, "learning_rate": 6.828304697692604e-06, "loss": 0.0532, "step": 31444 }, { "epoch": 0.6928996788356553, "grad_norm": 0.5643614530563354, "learning_rate": 6.8274069899256985e-06, "loss": 0.0719, "step": 31445 }, { "epoch": 0.6929217141251715, "grad_norm": 0.5875223875045776, "learning_rate": 6.826509323786188e-06, "loss": 0.0551, "step": 31446 }, { "epoch": 0.6929437494146876, "grad_norm": 1.0220938920974731, "learning_rate": 6.825611699278651e-06, "loss": 0.092, "step": 31447 }, { "epoch": 0.6929657847042038, "grad_norm": 0.5940496325492859, "learning_rate": 6.824714116407654e-06, "loss": 0.0785, "step": 31448 }, { "epoch": 0.69298781999372, "grad_norm": 0.6307227611541748, "learning_rate": 6.8238165751777705e-06, "loss": 0.0589, "step": 31449 }, { "epoch": 0.6930098552832361, "grad_norm": 0.6906229257583618, "learning_rate": 6.822919075593579e-06, "loss": 0.0623, "step": 31450 }, { "epoch": 0.6930318905727523, "grad_norm": 0.8362995982170105, "learning_rate": 6.8220216176596405e-06, "loss": 0.0679, "step": 31451 }, { "epoch": 0.6930539258622684, "grad_norm": 0.5789732336997986, "learning_rate": 6.821124201380532e-06, "loss": 0.0571, "step": 31452 }, { "epoch": 0.6930759611517846, "grad_norm": 0.5466409921646118, "learning_rate": 6.820226826760823e-06, "loss": 0.0587, "step": 31453 }, { "epoch": 0.6930979964413008, "grad_norm": 0.3883163332939148, "learning_rate": 6.819329493805091e-06, "loss": 0.0527, "step": 31454 }, { "epoch": 0.6931200317308169, "grad_norm": 0.7239276766777039, "learning_rate": 6.818432202517895e-06, "loss": 0.0535, "step": 31455 }, { "epoch": 0.6931420670203331, "grad_norm": 0.48063093423843384, "learning_rate": 6.817534952903811e-06, "loss": 0.0445, "step": 31456 }, { "epoch": 0.6931641023098493, "grad_norm": 0.2992390990257263, "learning_rate": 6.816637744967415e-06, "loss": 0.0591, "step": 31457 }, { "epoch": 0.6931861375993654, "grad_norm": 0.7327326536178589, "learning_rate": 6.815740578713265e-06, "loss": 0.092, "step": 31458 }, { "epoch": 0.6932081728888816, "grad_norm": 0.7469552755355835, "learning_rate": 6.8148434541459445e-06, "loss": 0.0598, "step": 31459 }, { "epoch": 0.6932302081783978, "grad_norm": 0.6392358541488647, "learning_rate": 6.813946371270003e-06, "loss": 0.0251, "step": 31460 }, { "epoch": 0.6932522434679138, "grad_norm": 0.8480372428894043, "learning_rate": 6.813049330090032e-06, "loss": 0.0803, "step": 31461 }, { "epoch": 0.69327427875743, "grad_norm": 0.7648131251335144, "learning_rate": 6.812152330610588e-06, "loss": 0.0568, "step": 31462 }, { "epoch": 0.6932963140469461, "grad_norm": 0.5451143980026245, "learning_rate": 6.811255372836242e-06, "loss": 0.0568, "step": 31463 }, { "epoch": 0.6933183493364623, "grad_norm": 0.8615745902061462, "learning_rate": 6.810358456771568e-06, "loss": 0.094, "step": 31464 }, { "epoch": 0.6933403846259785, "grad_norm": 0.6886435747146606, "learning_rate": 6.809461582421125e-06, "loss": 0.0503, "step": 31465 }, { "epoch": 0.6933624199154946, "grad_norm": 0.7524282336235046, "learning_rate": 6.808564749789493e-06, "loss": 0.0599, "step": 31466 }, { "epoch": 0.6933844552050108, "grad_norm": 0.5796117782592773, "learning_rate": 6.807667958881221e-06, "loss": 0.0663, "step": 31467 }, { "epoch": 0.693406490494527, "grad_norm": 0.4656531810760498, "learning_rate": 6.8067712097009e-06, "loss": 0.0852, "step": 31468 }, { "epoch": 0.6934285257840431, "grad_norm": 0.6321163177490234, "learning_rate": 6.805874502253082e-06, "loss": 0.1009, "step": 31469 }, { "epoch": 0.6934505610735593, "grad_norm": 0.7373644113540649, "learning_rate": 6.804977836542344e-06, "loss": 0.0767, "step": 31470 }, { "epoch": 0.6934725963630755, "grad_norm": 0.7054437398910522, "learning_rate": 6.804081212573245e-06, "loss": 0.0539, "step": 31471 }, { "epoch": 0.6934946316525916, "grad_norm": 0.6092653870582581, "learning_rate": 6.803184630350354e-06, "loss": 0.0526, "step": 31472 }, { "epoch": 0.6935166669421078, "grad_norm": 0.8189066052436829, "learning_rate": 6.802288089878244e-06, "loss": 0.0772, "step": 31473 }, { "epoch": 0.693538702231624, "grad_norm": 0.6426398754119873, "learning_rate": 6.801391591161471e-06, "loss": 0.0819, "step": 31474 }, { "epoch": 0.6935607375211401, "grad_norm": 0.7806412577629089, "learning_rate": 6.8004951342046075e-06, "loss": 0.087, "step": 31475 }, { "epoch": 0.6935827728106563, "grad_norm": 0.41773727536201477, "learning_rate": 6.79959871901222e-06, "loss": 0.0494, "step": 31476 }, { "epoch": 0.6936048081001724, "grad_norm": 0.6241048574447632, "learning_rate": 6.798702345588879e-06, "loss": 0.0807, "step": 31477 }, { "epoch": 0.6936268433896886, "grad_norm": 0.5250904560089111, "learning_rate": 6.797806013939138e-06, "loss": 0.0462, "step": 31478 }, { "epoch": 0.6936488786792048, "grad_norm": 0.2449866533279419, "learning_rate": 6.796909724067571e-06, "loss": 0.0544, "step": 31479 }, { "epoch": 0.6936709139687209, "grad_norm": 0.6403770446777344, "learning_rate": 6.796013475978745e-06, "loss": 0.0485, "step": 31480 }, { "epoch": 0.6936929492582371, "grad_norm": 0.6017972826957703, "learning_rate": 6.795117269677218e-06, "loss": 0.0652, "step": 31481 }, { "epoch": 0.6937149845477533, "grad_norm": 0.8086325526237488, "learning_rate": 6.79422110516756e-06, "loss": 0.068, "step": 31482 }, { "epoch": 0.6937370198372694, "grad_norm": 0.4933874309062958, "learning_rate": 6.7933249824543316e-06, "loss": 0.0406, "step": 31483 }, { "epoch": 0.6937590551267856, "grad_norm": 0.90167236328125, "learning_rate": 6.792428901542105e-06, "loss": 0.0666, "step": 31484 }, { "epoch": 0.6937810904163018, "grad_norm": 0.6251069903373718, "learning_rate": 6.791532862435436e-06, "loss": 0.1085, "step": 31485 }, { "epoch": 0.6938031257058178, "grad_norm": 0.9253605008125305, "learning_rate": 6.790636865138891e-06, "loss": 0.0699, "step": 31486 }, { "epoch": 0.693825160995334, "grad_norm": 0.4904170036315918, "learning_rate": 6.78974090965704e-06, "loss": 0.0597, "step": 31487 }, { "epoch": 0.6938471962848501, "grad_norm": 0.48791277408599854, "learning_rate": 6.7888449959944344e-06, "loss": 0.0783, "step": 31488 }, { "epoch": 0.6938692315743663, "grad_norm": 0.34365251660346985, "learning_rate": 6.787949124155651e-06, "loss": 0.0985, "step": 31489 }, { "epoch": 0.6938912668638825, "grad_norm": 0.5637307167053223, "learning_rate": 6.787053294145236e-06, "loss": 0.0823, "step": 31490 }, { "epoch": 0.6939133021533986, "grad_norm": 0.21225494146347046, "learning_rate": 6.786157505967774e-06, "loss": 0.0525, "step": 31491 }, { "epoch": 0.6939353374429148, "grad_norm": 0.6331360936164856, "learning_rate": 6.785261759627809e-06, "loss": 0.0671, "step": 31492 }, { "epoch": 0.693957372732431, "grad_norm": 0.617477297782898, "learning_rate": 6.784366055129919e-06, "loss": 0.0628, "step": 31493 }, { "epoch": 0.6939794080219471, "grad_norm": 0.4429198205471039, "learning_rate": 6.7834703924786495e-06, "loss": 0.0541, "step": 31494 }, { "epoch": 0.6940014433114633, "grad_norm": 0.6902872323989868, "learning_rate": 6.782574771678573e-06, "loss": 0.0598, "step": 31495 }, { "epoch": 0.6940234786009795, "grad_norm": 0.7912737727165222, "learning_rate": 6.781679192734255e-06, "loss": 0.0678, "step": 31496 }, { "epoch": 0.6940455138904956, "grad_norm": 0.746441662311554, "learning_rate": 6.780783655650248e-06, "loss": 0.0902, "step": 31497 }, { "epoch": 0.6940675491800118, "grad_norm": 0.7050302624702454, "learning_rate": 6.7798881604311155e-06, "loss": 0.0905, "step": 31498 }, { "epoch": 0.694089584469528, "grad_norm": 0.38733989000320435, "learning_rate": 6.7789927070814225e-06, "loss": 0.0554, "step": 31499 }, { "epoch": 0.6941116197590441, "grad_norm": 0.4636411964893341, "learning_rate": 6.7780972956057315e-06, "loss": 0.0838, "step": 31500 }, { "epoch": 0.6941336550485603, "grad_norm": 0.5536785125732422, "learning_rate": 6.777201926008595e-06, "loss": 0.0527, "step": 31501 }, { "epoch": 0.6941556903380764, "grad_norm": 0.6493386626243591, "learning_rate": 6.77630659829458e-06, "loss": 0.0606, "step": 31502 }, { "epoch": 0.6941777256275926, "grad_norm": 0.40070226788520813, "learning_rate": 6.775411312468248e-06, "loss": 0.0645, "step": 31503 }, { "epoch": 0.6941997609171088, "grad_norm": 0.8057562112808228, "learning_rate": 6.774516068534154e-06, "loss": 0.0616, "step": 31504 }, { "epoch": 0.6942217962066249, "grad_norm": 0.8382648229598999, "learning_rate": 6.773620866496858e-06, "loss": 0.059, "step": 31505 }, { "epoch": 0.6942438314961411, "grad_norm": 0.5306257605552673, "learning_rate": 6.7727257063609235e-06, "loss": 0.0881, "step": 31506 }, { "epoch": 0.6942658667856573, "grad_norm": 0.2272368222475052, "learning_rate": 6.771830588130914e-06, "loss": 0.0421, "step": 31507 }, { "epoch": 0.6942879020751734, "grad_norm": 0.9987362623214722, "learning_rate": 6.770935511811378e-06, "loss": 0.0828, "step": 31508 }, { "epoch": 0.6943099373646896, "grad_norm": 0.4922446608543396, "learning_rate": 6.7700404774068805e-06, "loss": 0.0626, "step": 31509 }, { "epoch": 0.6943319726542058, "grad_norm": 0.659609317779541, "learning_rate": 6.7691454849219856e-06, "loss": 0.0912, "step": 31510 }, { "epoch": 0.6943540079437218, "grad_norm": 0.5376023054122925, "learning_rate": 6.768250534361241e-06, "loss": 0.057, "step": 31511 }, { "epoch": 0.694376043233238, "grad_norm": 0.463410347700119, "learning_rate": 6.767355625729216e-06, "loss": 0.0451, "step": 31512 }, { "epoch": 0.6943980785227541, "grad_norm": 0.49887827038764954, "learning_rate": 6.7664607590304525e-06, "loss": 0.0607, "step": 31513 }, { "epoch": 0.6944201138122703, "grad_norm": 0.575633704662323, "learning_rate": 6.765565934269532e-06, "loss": 0.0641, "step": 31514 }, { "epoch": 0.6944421491017865, "grad_norm": 0.5604740977287292, "learning_rate": 6.764671151450993e-06, "loss": 0.0568, "step": 31515 }, { "epoch": 0.6944641843913026, "grad_norm": 0.38638344407081604, "learning_rate": 6.7637764105794045e-06, "loss": 0.0257, "step": 31516 }, { "epoch": 0.6944862196808188, "grad_norm": 0.5475969314575195, "learning_rate": 6.7628817116593155e-06, "loss": 0.0465, "step": 31517 }, { "epoch": 0.694508254970335, "grad_norm": 0.6176003813743591, "learning_rate": 6.761987054695285e-06, "loss": 0.1209, "step": 31518 }, { "epoch": 0.6945302902598511, "grad_norm": 0.43424609303474426, "learning_rate": 6.761092439691879e-06, "loss": 0.0655, "step": 31519 }, { "epoch": 0.6945523255493673, "grad_norm": 0.5549329519271851, "learning_rate": 6.7601978666536415e-06, "loss": 0.0613, "step": 31520 }, { "epoch": 0.6945743608388835, "grad_norm": 0.6099535226821899, "learning_rate": 6.759303335585135e-06, "loss": 0.0737, "step": 31521 }, { "epoch": 0.6945963961283996, "grad_norm": 0.5255078077316284, "learning_rate": 6.758408846490916e-06, "loss": 0.0688, "step": 31522 }, { "epoch": 0.6946184314179158, "grad_norm": 0.29906564950942993, "learning_rate": 6.757514399375546e-06, "loss": 0.0684, "step": 31523 }, { "epoch": 0.694640466707432, "grad_norm": 0.6616004109382629, "learning_rate": 6.756619994243569e-06, "loss": 0.0723, "step": 31524 }, { "epoch": 0.6946625019969481, "grad_norm": 0.6636614799499512, "learning_rate": 6.7557256310995466e-06, "loss": 0.0631, "step": 31525 }, { "epoch": 0.6946845372864643, "grad_norm": 1.0266214609146118, "learning_rate": 6.75483130994804e-06, "loss": 0.0777, "step": 31526 }, { "epoch": 0.6947065725759805, "grad_norm": 0.5719205737113953, "learning_rate": 6.753937030793593e-06, "loss": 0.0534, "step": 31527 }, { "epoch": 0.6947286078654966, "grad_norm": 0.4600607454776764, "learning_rate": 6.753042793640767e-06, "loss": 0.0543, "step": 31528 }, { "epoch": 0.6947506431550128, "grad_norm": 0.6438581943511963, "learning_rate": 6.752148598494117e-06, "loss": 0.0751, "step": 31529 }, { "epoch": 0.694772678444529, "grad_norm": 0.6277874708175659, "learning_rate": 6.7512544453582025e-06, "loss": 0.0637, "step": 31530 }, { "epoch": 0.6947947137340451, "grad_norm": 0.3580406606197357, "learning_rate": 6.750360334237566e-06, "loss": 0.0643, "step": 31531 }, { "epoch": 0.6948167490235613, "grad_norm": 0.6605425477027893, "learning_rate": 6.749466265136768e-06, "loss": 0.0437, "step": 31532 }, { "epoch": 0.6948387843130774, "grad_norm": 0.7241694331169128, "learning_rate": 6.74857223806037e-06, "loss": 0.074, "step": 31533 }, { "epoch": 0.6948608196025936, "grad_norm": 0.6694587469100952, "learning_rate": 6.747678253012912e-06, "loss": 0.0752, "step": 31534 }, { "epoch": 0.6948828548921098, "grad_norm": 0.5659814476966858, "learning_rate": 6.7467843099989586e-06, "loss": 0.0638, "step": 31535 }, { "epoch": 0.6949048901816258, "grad_norm": 0.7333672642707825, "learning_rate": 6.745890409023046e-06, "loss": 0.0622, "step": 31536 }, { "epoch": 0.694926925471142, "grad_norm": 0.5780356526374817, "learning_rate": 6.744996550089754e-06, "loss": 0.0435, "step": 31537 }, { "epoch": 0.6949489607606582, "grad_norm": 0.6679885387420654, "learning_rate": 6.744102733203614e-06, "loss": 0.0904, "step": 31538 }, { "epoch": 0.6949709960501743, "grad_norm": 0.7451974749565125, "learning_rate": 6.7432089583691935e-06, "loss": 0.0646, "step": 31539 }, { "epoch": 0.6949930313396905, "grad_norm": 0.5885077714920044, "learning_rate": 6.7423152255910235e-06, "loss": 0.0587, "step": 31540 }, { "epoch": 0.6950150666292066, "grad_norm": 0.40058448910713196, "learning_rate": 6.741421534873682e-06, "loss": 0.0703, "step": 31541 }, { "epoch": 0.6950371019187228, "grad_norm": 0.6523475646972656, "learning_rate": 6.740527886221711e-06, "loss": 0.0713, "step": 31542 }, { "epoch": 0.695059137208239, "grad_norm": 0.6732547283172607, "learning_rate": 6.739634279639653e-06, "loss": 0.0477, "step": 31543 }, { "epoch": 0.6950811724977551, "grad_norm": 0.3559172451496124, "learning_rate": 6.738740715132068e-06, "loss": 0.0706, "step": 31544 }, { "epoch": 0.6951032077872713, "grad_norm": 0.7855676412582397, "learning_rate": 6.737847192703506e-06, "loss": 0.057, "step": 31545 }, { "epoch": 0.6951252430767875, "grad_norm": 0.5168651342391968, "learning_rate": 6.736953712358524e-06, "loss": 0.0316, "step": 31546 }, { "epoch": 0.6951472783663036, "grad_norm": 0.7392825484275818, "learning_rate": 6.736060274101664e-06, "loss": 0.0801, "step": 31547 }, { "epoch": 0.6951693136558198, "grad_norm": 0.9888644814491272, "learning_rate": 6.7351668779374795e-06, "loss": 0.0965, "step": 31548 }, { "epoch": 0.695191348945336, "grad_norm": 0.7591500282287598, "learning_rate": 6.734273523870521e-06, "loss": 0.064, "step": 31549 }, { "epoch": 0.6952133842348521, "grad_norm": 0.328184574842453, "learning_rate": 6.733380211905348e-06, "loss": 0.0527, "step": 31550 }, { "epoch": 0.6952354195243683, "grad_norm": 0.8666787147521973, "learning_rate": 6.732486942046494e-06, "loss": 0.0823, "step": 31551 }, { "epoch": 0.6952574548138845, "grad_norm": 0.7486506700515747, "learning_rate": 6.73159371429852e-06, "loss": 0.1014, "step": 31552 }, { "epoch": 0.6952794901034006, "grad_norm": 0.8536949157714844, "learning_rate": 6.730700528665977e-06, "loss": 0.0947, "step": 31553 }, { "epoch": 0.6953015253929168, "grad_norm": 0.5970146656036377, "learning_rate": 6.729807385153404e-06, "loss": 0.0646, "step": 31554 }, { "epoch": 0.695323560682433, "grad_norm": 0.6523281335830688, "learning_rate": 6.728914283765358e-06, "loss": 0.1052, "step": 31555 }, { "epoch": 0.6953455959719491, "grad_norm": 0.5225106477737427, "learning_rate": 6.728021224506387e-06, "loss": 0.0515, "step": 31556 }, { "epoch": 0.6953676312614653, "grad_norm": 0.9525690674781799, "learning_rate": 6.727128207381047e-06, "loss": 0.1024, "step": 31557 }, { "epoch": 0.6953896665509814, "grad_norm": 0.53550124168396, "learning_rate": 6.726235232393871e-06, "loss": 0.0681, "step": 31558 }, { "epoch": 0.6954117018404976, "grad_norm": 0.6973468661308289, "learning_rate": 6.725342299549418e-06, "loss": 0.0684, "step": 31559 }, { "epoch": 0.6954337371300137, "grad_norm": 0.6484355926513672, "learning_rate": 6.724449408852238e-06, "loss": 0.0693, "step": 31560 }, { "epoch": 0.6954557724195298, "grad_norm": 0.7097878456115723, "learning_rate": 6.7235565603068685e-06, "loss": 0.0387, "step": 31561 }, { "epoch": 0.695477807709046, "grad_norm": 0.5216615796089172, "learning_rate": 6.722663753917871e-06, "loss": 0.0481, "step": 31562 }, { "epoch": 0.6954998429985622, "grad_norm": 0.49898484349250793, "learning_rate": 6.721770989689774e-06, "loss": 0.0611, "step": 31563 }, { "epoch": 0.6955218782880783, "grad_norm": 0.7982268929481506, "learning_rate": 6.720878267627148e-06, "loss": 0.0593, "step": 31564 }, { "epoch": 0.6955439135775945, "grad_norm": 0.6225391626358032, "learning_rate": 6.719985587734522e-06, "loss": 0.0657, "step": 31565 }, { "epoch": 0.6955659488671107, "grad_norm": 0.6259384155273438, "learning_rate": 6.719092950016455e-06, "loss": 0.0829, "step": 31566 }, { "epoch": 0.6955879841566268, "grad_norm": 0.7001049518585205, "learning_rate": 6.718200354477483e-06, "loss": 0.076, "step": 31567 }, { "epoch": 0.695610019446143, "grad_norm": 0.6522597074508667, "learning_rate": 6.717307801122156e-06, "loss": 0.0632, "step": 31568 }, { "epoch": 0.6956320547356591, "grad_norm": 0.16687041521072388, "learning_rate": 6.716415289955031e-06, "loss": 0.0272, "step": 31569 }, { "epoch": 0.6956540900251753, "grad_norm": 0.3456957936286926, "learning_rate": 6.715522820980637e-06, "loss": 0.0338, "step": 31570 }, { "epoch": 0.6956761253146915, "grad_norm": 0.5215331315994263, "learning_rate": 6.7146303942035284e-06, "loss": 0.046, "step": 31571 }, { "epoch": 0.6956981606042076, "grad_norm": 0.41455984115600586, "learning_rate": 6.713738009628249e-06, "loss": 0.0631, "step": 31572 }, { "epoch": 0.6957201958937238, "grad_norm": 0.4377850294113159, "learning_rate": 6.712845667259353e-06, "loss": 0.0401, "step": 31573 }, { "epoch": 0.69574223118324, "grad_norm": 0.5724890232086182, "learning_rate": 6.711953367101371e-06, "loss": 0.0871, "step": 31574 }, { "epoch": 0.6957642664727561, "grad_norm": 0.39435017108917236, "learning_rate": 6.711061109158855e-06, "loss": 0.0763, "step": 31575 }, { "epoch": 0.6957863017622723, "grad_norm": 0.562484085559845, "learning_rate": 6.710168893436357e-06, "loss": 0.0463, "step": 31576 }, { "epoch": 0.6958083370517885, "grad_norm": 0.7392009496688843, "learning_rate": 6.709276719938406e-06, "loss": 0.0798, "step": 31577 }, { "epoch": 0.6958303723413046, "grad_norm": 0.3960777223110199, "learning_rate": 6.708384588669556e-06, "loss": 0.0645, "step": 31578 }, { "epoch": 0.6958524076308208, "grad_norm": 0.3344357907772064, "learning_rate": 6.707492499634349e-06, "loss": 0.0506, "step": 31579 }, { "epoch": 0.695874442920337, "grad_norm": 0.5125272274017334, "learning_rate": 6.706600452837336e-06, "loss": 0.0726, "step": 31580 }, { "epoch": 0.6958964782098531, "grad_norm": 0.704953134059906, "learning_rate": 6.705708448283047e-06, "loss": 0.0549, "step": 31581 }, { "epoch": 0.6959185134993693, "grad_norm": 0.6927117705345154, "learning_rate": 6.7048164859760345e-06, "loss": 0.0722, "step": 31582 }, { "epoch": 0.6959405487888854, "grad_norm": 0.5723907351493835, "learning_rate": 6.703924565920845e-06, "loss": 0.0529, "step": 31583 }, { "epoch": 0.6959625840784016, "grad_norm": 0.34423258900642395, "learning_rate": 6.703032688122009e-06, "loss": 0.067, "step": 31584 }, { "epoch": 0.6959846193679177, "grad_norm": 0.693626344203949, "learning_rate": 6.702140852584084e-06, "loss": 0.0942, "step": 31585 }, { "epoch": 0.6960066546574338, "grad_norm": 0.9966861009597778, "learning_rate": 6.701249059311594e-06, "loss": 0.0994, "step": 31586 }, { "epoch": 0.69602868994695, "grad_norm": 0.9389376044273376, "learning_rate": 6.7003573083091035e-06, "loss": 0.0547, "step": 31587 }, { "epoch": 0.6960507252364662, "grad_norm": 0.4348013997077942, "learning_rate": 6.6994655995811385e-06, "loss": 0.0526, "step": 31588 }, { "epoch": 0.6960727605259823, "grad_norm": 0.9887492656707764, "learning_rate": 6.698573933132251e-06, "loss": 0.0601, "step": 31589 }, { "epoch": 0.6960947958154985, "grad_norm": 0.933773934841156, "learning_rate": 6.697682308966973e-06, "loss": 0.0807, "step": 31590 }, { "epoch": 0.6961168311050147, "grad_norm": 1.0933996438980103, "learning_rate": 6.696790727089851e-06, "loss": 0.0706, "step": 31591 }, { "epoch": 0.6961388663945308, "grad_norm": 0.5535470247268677, "learning_rate": 6.695899187505433e-06, "loss": 0.0681, "step": 31592 }, { "epoch": 0.696160901684047, "grad_norm": 0.8329253196716309, "learning_rate": 6.6950076902182414e-06, "loss": 0.0935, "step": 31593 }, { "epoch": 0.6961829369735631, "grad_norm": 0.6101568341255188, "learning_rate": 6.69411623523284e-06, "loss": 0.066, "step": 31594 }, { "epoch": 0.6962049722630793, "grad_norm": 0.673041820526123, "learning_rate": 6.693224822553753e-06, "loss": 0.0564, "step": 31595 }, { "epoch": 0.6962270075525955, "grad_norm": 0.5507842302322388, "learning_rate": 6.6923334521855325e-06, "loss": 0.067, "step": 31596 }, { "epoch": 0.6962490428421116, "grad_norm": 0.8264877200126648, "learning_rate": 6.691442124132707e-06, "loss": 0.0644, "step": 31597 }, { "epoch": 0.6962710781316278, "grad_norm": 0.46472322940826416, "learning_rate": 6.690550838399823e-06, "loss": 0.0657, "step": 31598 }, { "epoch": 0.696293113421144, "grad_norm": 0.7417970895767212, "learning_rate": 6.6896595949914235e-06, "loss": 0.0585, "step": 31599 }, { "epoch": 0.6963151487106601, "grad_norm": 0.19925373792648315, "learning_rate": 6.68876839391204e-06, "loss": 0.0389, "step": 31600 }, { "epoch": 0.6963371840001763, "grad_norm": 0.5656139254570007, "learning_rate": 6.687877235166215e-06, "loss": 0.0871, "step": 31601 }, { "epoch": 0.6963592192896925, "grad_norm": 0.6472231149673462, "learning_rate": 6.686986118758487e-06, "loss": 0.0716, "step": 31602 }, { "epoch": 0.6963812545792086, "grad_norm": 0.447115421295166, "learning_rate": 6.686095044693404e-06, "loss": 0.0429, "step": 31603 }, { "epoch": 0.6964032898687248, "grad_norm": 0.8555667400360107, "learning_rate": 6.685204012975492e-06, "loss": 0.0448, "step": 31604 }, { "epoch": 0.696425325158241, "grad_norm": 0.7103302478790283, "learning_rate": 6.684313023609293e-06, "loss": 0.0593, "step": 31605 }, { "epoch": 0.6964473604477571, "grad_norm": 0.4478924572467804, "learning_rate": 6.683422076599353e-06, "loss": 0.0485, "step": 31606 }, { "epoch": 0.6964693957372733, "grad_norm": 0.5658189058303833, "learning_rate": 6.6825311719501995e-06, "loss": 0.071, "step": 31607 }, { "epoch": 0.6964914310267895, "grad_norm": 0.5483789443969727, "learning_rate": 6.681640309666381e-06, "loss": 0.0588, "step": 31608 }, { "epoch": 0.6965134663163056, "grad_norm": 1.1895588636398315, "learning_rate": 6.6807494897524165e-06, "loss": 0.067, "step": 31609 }, { "epoch": 0.6965355016058217, "grad_norm": 0.5417526960372925, "learning_rate": 6.679858712212867e-06, "loss": 0.0624, "step": 31610 }, { "epoch": 0.6965575368953378, "grad_norm": 0.7089594006538391, "learning_rate": 6.678967977052254e-06, "loss": 0.0835, "step": 31611 }, { "epoch": 0.696579572184854, "grad_norm": 0.7218774557113647, "learning_rate": 6.678077284275124e-06, "loss": 0.0723, "step": 31612 }, { "epoch": 0.6966016074743702, "grad_norm": 0.3610294461250305, "learning_rate": 6.677186633886005e-06, "loss": 0.0643, "step": 31613 }, { "epoch": 0.6966236427638863, "grad_norm": 0.6099391579627991, "learning_rate": 6.676296025889436e-06, "loss": 0.0824, "step": 31614 }, { "epoch": 0.6966456780534025, "grad_norm": 0.4480419456958771, "learning_rate": 6.675405460289961e-06, "loss": 0.0475, "step": 31615 }, { "epoch": 0.6966677133429187, "grad_norm": 0.5956529974937439, "learning_rate": 6.6745149370920976e-06, "loss": 0.0811, "step": 31616 }, { "epoch": 0.6966897486324348, "grad_norm": 0.4500795602798462, "learning_rate": 6.673624456300407e-06, "loss": 0.0494, "step": 31617 }, { "epoch": 0.696711783921951, "grad_norm": 0.47888287901878357, "learning_rate": 6.672734017919406e-06, "loss": 0.0585, "step": 31618 }, { "epoch": 0.6967338192114672, "grad_norm": 0.777249813079834, "learning_rate": 6.671843621953641e-06, "loss": 0.0896, "step": 31619 }, { "epoch": 0.6967558545009833, "grad_norm": 0.7562878131866455, "learning_rate": 6.670953268407638e-06, "loss": 0.0666, "step": 31620 }, { "epoch": 0.6967778897904995, "grad_norm": 0.5180929899215698, "learning_rate": 6.670062957285935e-06, "loss": 0.0501, "step": 31621 }, { "epoch": 0.6967999250800156, "grad_norm": 0.4740729331970215, "learning_rate": 6.6691726885930735e-06, "loss": 0.0355, "step": 31622 }, { "epoch": 0.6968219603695318, "grad_norm": 0.6123851537704468, "learning_rate": 6.668282462333578e-06, "loss": 0.0703, "step": 31623 }, { "epoch": 0.696843995659048, "grad_norm": 0.5605183243751526, "learning_rate": 6.667392278511987e-06, "loss": 0.0705, "step": 31624 }, { "epoch": 0.6968660309485641, "grad_norm": 0.41827264428138733, "learning_rate": 6.666502137132838e-06, "loss": 0.0681, "step": 31625 }, { "epoch": 0.6968880662380803, "grad_norm": 0.7039856314659119, "learning_rate": 6.6656120382006655e-06, "loss": 0.0574, "step": 31626 }, { "epoch": 0.6969101015275965, "grad_norm": 0.8082373738288879, "learning_rate": 6.664721981719995e-06, "loss": 0.0578, "step": 31627 }, { "epoch": 0.6969321368171126, "grad_norm": 0.9520025253295898, "learning_rate": 6.663831967695365e-06, "loss": 0.0829, "step": 31628 }, { "epoch": 0.6969541721066288, "grad_norm": 0.3050616681575775, "learning_rate": 6.662941996131316e-06, "loss": 0.0491, "step": 31629 }, { "epoch": 0.696976207396145, "grad_norm": 0.4697846472263336, "learning_rate": 6.662052067032367e-06, "loss": 0.0641, "step": 31630 }, { "epoch": 0.6969982426856611, "grad_norm": 0.4554535448551178, "learning_rate": 6.661162180403065e-06, "loss": 0.0927, "step": 31631 }, { "epoch": 0.6970202779751773, "grad_norm": 0.37398502230644226, "learning_rate": 6.660272336247922e-06, "loss": 0.0665, "step": 31632 }, { "epoch": 0.6970423132646935, "grad_norm": 0.8375331163406372, "learning_rate": 6.659382534571497e-06, "loss": 0.0636, "step": 31633 }, { "epoch": 0.6970643485542095, "grad_norm": 0.48946166038513184, "learning_rate": 6.658492775378304e-06, "loss": 0.0763, "step": 31634 }, { "epoch": 0.6970863838437257, "grad_norm": 0.8278186321258545, "learning_rate": 6.657603058672885e-06, "loss": 0.0623, "step": 31635 }, { "epoch": 0.6971084191332418, "grad_norm": 0.8014156222343445, "learning_rate": 6.656713384459763e-06, "loss": 0.0771, "step": 31636 }, { "epoch": 0.697130454422758, "grad_norm": 0.4028281569480896, "learning_rate": 6.655823752743474e-06, "loss": 0.0451, "step": 31637 }, { "epoch": 0.6971524897122742, "grad_norm": 0.8551614284515381, "learning_rate": 6.654934163528552e-06, "loss": 0.0615, "step": 31638 }, { "epoch": 0.6971745250017903, "grad_norm": 0.41994708776474, "learning_rate": 6.654044616819517e-06, "loss": 0.0594, "step": 31639 }, { "epoch": 0.6971965602913065, "grad_norm": 0.4402499198913574, "learning_rate": 6.653155112620917e-06, "loss": 0.0554, "step": 31640 }, { "epoch": 0.6972185955808227, "grad_norm": 0.8593316078186035, "learning_rate": 6.6522656509372694e-06, "loss": 0.0691, "step": 31641 }, { "epoch": 0.6972406308703388, "grad_norm": 0.4997159540653229, "learning_rate": 6.651376231773116e-06, "loss": 0.0553, "step": 31642 }, { "epoch": 0.697262666159855, "grad_norm": 0.36729708313941956, "learning_rate": 6.6504868551329735e-06, "loss": 0.0385, "step": 31643 }, { "epoch": 0.6972847014493712, "grad_norm": 0.32673659920692444, "learning_rate": 6.649597521021379e-06, "loss": 0.0432, "step": 31644 }, { "epoch": 0.6973067367388873, "grad_norm": 0.897422730922699, "learning_rate": 6.6487082294428674e-06, "loss": 0.0633, "step": 31645 }, { "epoch": 0.6973287720284035, "grad_norm": 0.584705650806427, "learning_rate": 6.647818980401958e-06, "loss": 0.0759, "step": 31646 }, { "epoch": 0.6973508073179197, "grad_norm": 0.6281449794769287, "learning_rate": 6.646929773903185e-06, "loss": 0.0783, "step": 31647 }, { "epoch": 0.6973728426074358, "grad_norm": 0.6801300048828125, "learning_rate": 6.646040609951078e-06, "loss": 0.0489, "step": 31648 }, { "epoch": 0.697394877896952, "grad_norm": 0.5982131361961365, "learning_rate": 6.645151488550172e-06, "loss": 0.0382, "step": 31649 }, { "epoch": 0.6974169131864681, "grad_norm": 0.6386008858680725, "learning_rate": 6.644262409704985e-06, "loss": 0.0674, "step": 31650 }, { "epoch": 0.6974389484759843, "grad_norm": 0.866991400718689, "learning_rate": 6.643373373420048e-06, "loss": 0.06, "step": 31651 }, { "epoch": 0.6974609837655005, "grad_norm": 0.832883358001709, "learning_rate": 6.642484379699893e-06, "loss": 0.0694, "step": 31652 }, { "epoch": 0.6974830190550166, "grad_norm": 0.6856433153152466, "learning_rate": 6.641595428549051e-06, "loss": 0.0831, "step": 31653 }, { "epoch": 0.6975050543445328, "grad_norm": 0.5451369285583496, "learning_rate": 6.640706519972047e-06, "loss": 0.0631, "step": 31654 }, { "epoch": 0.697527089634049, "grad_norm": 0.5590823888778687, "learning_rate": 6.639817653973395e-06, "loss": 0.0787, "step": 31655 }, { "epoch": 0.6975491249235651, "grad_norm": 0.5571610331535339, "learning_rate": 6.638928830557647e-06, "loss": 0.0517, "step": 31656 }, { "epoch": 0.6975711602130813, "grad_norm": 0.8838882446289062, "learning_rate": 6.638040049729313e-06, "loss": 0.1044, "step": 31657 }, { "epoch": 0.6975931955025975, "grad_norm": 0.5527793765068054, "learning_rate": 6.637151311492929e-06, "loss": 0.0658, "step": 31658 }, { "epoch": 0.6976152307921135, "grad_norm": 0.6230137944221497, "learning_rate": 6.636262615853008e-06, "loss": 0.069, "step": 31659 }, { "epoch": 0.6976372660816297, "grad_norm": 0.5224071145057678, "learning_rate": 6.6353739628140955e-06, "loss": 0.0869, "step": 31660 }, { "epoch": 0.6976593013711458, "grad_norm": 0.6230269074440002, "learning_rate": 6.6344853523807066e-06, "loss": 0.0604, "step": 31661 }, { "epoch": 0.697681336660662, "grad_norm": 0.43963879346847534, "learning_rate": 6.633596784557368e-06, "loss": 0.0689, "step": 31662 }, { "epoch": 0.6977033719501782, "grad_norm": 0.6113603711128235, "learning_rate": 6.632708259348612e-06, "loss": 0.0735, "step": 31663 }, { "epoch": 0.6977254072396943, "grad_norm": 0.797808051109314, "learning_rate": 6.631819776758955e-06, "loss": 0.0896, "step": 31664 }, { "epoch": 0.6977474425292105, "grad_norm": 0.6850062608718872, "learning_rate": 6.630931336792933e-06, "loss": 0.0527, "step": 31665 }, { "epoch": 0.6977694778187267, "grad_norm": 0.471224844455719, "learning_rate": 6.630042939455054e-06, "loss": 0.0467, "step": 31666 }, { "epoch": 0.6977915131082428, "grad_norm": 0.4611636698246002, "learning_rate": 6.629154584749867e-06, "loss": 0.0491, "step": 31667 }, { "epoch": 0.697813548397759, "grad_norm": 0.7788837552070618, "learning_rate": 6.628266272681879e-06, "loss": 0.1223, "step": 31668 }, { "epoch": 0.6978355836872752, "grad_norm": 0.5027675628662109, "learning_rate": 6.627378003255623e-06, "loss": 0.0753, "step": 31669 }, { "epoch": 0.6978576189767913, "grad_norm": 0.5832859873771667, "learning_rate": 6.626489776475618e-06, "loss": 0.0701, "step": 31670 }, { "epoch": 0.6978796542663075, "grad_norm": 0.2542109191417694, "learning_rate": 6.625601592346389e-06, "loss": 0.0464, "step": 31671 }, { "epoch": 0.6979016895558237, "grad_norm": 0.8424588441848755, "learning_rate": 6.624713450872467e-06, "loss": 0.0662, "step": 31672 }, { "epoch": 0.6979237248453398, "grad_norm": 0.8211095333099365, "learning_rate": 6.623825352058366e-06, "loss": 0.0668, "step": 31673 }, { "epoch": 0.697945760134856, "grad_norm": 0.4824634790420532, "learning_rate": 6.622937295908614e-06, "loss": 0.0484, "step": 31674 }, { "epoch": 0.6979677954243722, "grad_norm": 0.629230797290802, "learning_rate": 6.622049282427736e-06, "loss": 0.0595, "step": 31675 }, { "epoch": 0.6979898307138883, "grad_norm": 0.5439085960388184, "learning_rate": 6.621161311620257e-06, "loss": 0.0692, "step": 31676 }, { "epoch": 0.6980118660034045, "grad_norm": 0.5700076818466187, "learning_rate": 6.620273383490693e-06, "loss": 0.0553, "step": 31677 }, { "epoch": 0.6980339012929206, "grad_norm": 0.8106339573860168, "learning_rate": 6.619385498043567e-06, "loss": 0.0895, "step": 31678 }, { "epoch": 0.6980559365824368, "grad_norm": 0.5657702684402466, "learning_rate": 6.618497655283413e-06, "loss": 0.0596, "step": 31679 }, { "epoch": 0.698077971871953, "grad_norm": 0.441135048866272, "learning_rate": 6.617609855214739e-06, "loss": 0.0608, "step": 31680 }, { "epoch": 0.6981000071614691, "grad_norm": 0.6061432957649231, "learning_rate": 6.616722097842078e-06, "loss": 0.0408, "step": 31681 }, { "epoch": 0.6981220424509853, "grad_norm": 0.5782400369644165, "learning_rate": 6.615834383169936e-06, "loss": 0.0847, "step": 31682 }, { "epoch": 0.6981440777405015, "grad_norm": 0.798831582069397, "learning_rate": 6.6149467112028565e-06, "loss": 0.0686, "step": 31683 }, { "epoch": 0.6981661130300175, "grad_norm": 0.6712741851806641, "learning_rate": 6.614059081945344e-06, "loss": 0.0725, "step": 31684 }, { "epoch": 0.6981881483195337, "grad_norm": 0.5928139090538025, "learning_rate": 6.613171495401926e-06, "loss": 0.0852, "step": 31685 }, { "epoch": 0.6982101836090499, "grad_norm": 0.3571031987667084, "learning_rate": 6.612283951577128e-06, "loss": 0.0597, "step": 31686 }, { "epoch": 0.698232218898566, "grad_norm": 0.7766207456588745, "learning_rate": 6.611396450475461e-06, "loss": 0.0933, "step": 31687 }, { "epoch": 0.6982542541880822, "grad_norm": 0.43149346113204956, "learning_rate": 6.610508992101455e-06, "loss": 0.0789, "step": 31688 }, { "epoch": 0.6982762894775983, "grad_norm": 0.39565566182136536, "learning_rate": 6.609621576459615e-06, "loss": 0.0761, "step": 31689 }, { "epoch": 0.6982983247671145, "grad_norm": 0.7162171006202698, "learning_rate": 6.608734203554482e-06, "loss": 0.0934, "step": 31690 }, { "epoch": 0.6983203600566307, "grad_norm": 0.6352521777153015, "learning_rate": 6.60784687339056e-06, "loss": 0.0405, "step": 31691 }, { "epoch": 0.6983423953461468, "grad_norm": 1.0131667852401733, "learning_rate": 6.606959585972379e-06, "loss": 0.0854, "step": 31692 }, { "epoch": 0.698364430635663, "grad_norm": 0.6797551512718201, "learning_rate": 6.606072341304449e-06, "loss": 0.067, "step": 31693 }, { "epoch": 0.6983864659251792, "grad_norm": 0.37858885526657104, "learning_rate": 6.605185139391293e-06, "loss": 0.0705, "step": 31694 }, { "epoch": 0.6984085012146953, "grad_norm": 0.30354249477386475, "learning_rate": 6.604297980237437e-06, "loss": 0.0677, "step": 31695 }, { "epoch": 0.6984305365042115, "grad_norm": 0.6019755601882935, "learning_rate": 6.603410863847388e-06, "loss": 0.0588, "step": 31696 }, { "epoch": 0.6984525717937277, "grad_norm": 0.29795944690704346, "learning_rate": 6.60252379022567e-06, "loss": 0.0561, "step": 31697 }, { "epoch": 0.6984746070832438, "grad_norm": 0.6986798644065857, "learning_rate": 6.6016367593768006e-06, "loss": 0.0615, "step": 31698 }, { "epoch": 0.69849664237276, "grad_norm": 0.6449378132820129, "learning_rate": 6.600749771305306e-06, "loss": 0.0555, "step": 31699 }, { "epoch": 0.6985186776622762, "grad_norm": 0.6440557241439819, "learning_rate": 6.599862826015689e-06, "loss": 0.0815, "step": 31700 }, { "epoch": 0.6985407129517923, "grad_norm": 0.6154255867004395, "learning_rate": 6.598975923512478e-06, "loss": 0.0425, "step": 31701 }, { "epoch": 0.6985627482413085, "grad_norm": 0.7387394309043884, "learning_rate": 6.598089063800191e-06, "loss": 0.0717, "step": 31702 }, { "epoch": 0.6985847835308246, "grad_norm": 0.7100567817687988, "learning_rate": 6.597202246883337e-06, "loss": 0.0482, "step": 31703 }, { "epoch": 0.6986068188203408, "grad_norm": 0.5746628046035767, "learning_rate": 6.596315472766443e-06, "loss": 0.0484, "step": 31704 }, { "epoch": 0.698628854109857, "grad_norm": 0.6601393818855286, "learning_rate": 6.595428741454009e-06, "loss": 0.0649, "step": 31705 }, { "epoch": 0.6986508893993731, "grad_norm": 0.6290224194526672, "learning_rate": 6.594542052950577e-06, "loss": 0.0764, "step": 31706 }, { "epoch": 0.6986729246888893, "grad_norm": 0.5174217820167542, "learning_rate": 6.593655407260642e-06, "loss": 0.0607, "step": 31707 }, { "epoch": 0.6986949599784054, "grad_norm": 0.5284660458564758, "learning_rate": 6.592768804388728e-06, "loss": 0.088, "step": 31708 }, { "epoch": 0.6987169952679215, "grad_norm": 0.7085179686546326, "learning_rate": 6.591882244339356e-06, "loss": 0.1045, "step": 31709 }, { "epoch": 0.6987390305574377, "grad_norm": 0.7137941122055054, "learning_rate": 6.590995727117033e-06, "loss": 0.0783, "step": 31710 }, { "epoch": 0.6987610658469539, "grad_norm": 0.3108011782169342, "learning_rate": 6.5901092527262815e-06, "loss": 0.0306, "step": 31711 }, { "epoch": 0.69878310113647, "grad_norm": 0.4562077522277832, "learning_rate": 6.589222821171603e-06, "loss": 0.0489, "step": 31712 }, { "epoch": 0.6988051364259862, "grad_norm": 0.48867467045783997, "learning_rate": 6.5883364324575334e-06, "loss": 0.0586, "step": 31713 }, { "epoch": 0.6988271717155023, "grad_norm": 0.6519798636436462, "learning_rate": 6.5874500865885715e-06, "loss": 0.0485, "step": 31714 }, { "epoch": 0.6988492070050185, "grad_norm": 0.5892458558082581, "learning_rate": 6.586563783569244e-06, "loss": 0.0665, "step": 31715 }, { "epoch": 0.6988712422945347, "grad_norm": 0.7627581357955933, "learning_rate": 6.585677523404053e-06, "loss": 0.0594, "step": 31716 }, { "epoch": 0.6988932775840508, "grad_norm": 0.7650572061538696, "learning_rate": 6.5847913060975185e-06, "loss": 0.0584, "step": 31717 }, { "epoch": 0.698915312873567, "grad_norm": 0.6855986714363098, "learning_rate": 6.58390513165416e-06, "loss": 0.0481, "step": 31718 }, { "epoch": 0.6989373481630832, "grad_norm": 1.0722029209136963, "learning_rate": 6.58301900007848e-06, "loss": 0.0692, "step": 31719 }, { "epoch": 0.6989593834525993, "grad_norm": 0.681914210319519, "learning_rate": 6.582132911374997e-06, "loss": 0.0731, "step": 31720 }, { "epoch": 0.6989814187421155, "grad_norm": 0.496878981590271, "learning_rate": 6.581246865548227e-06, "loss": 0.0676, "step": 31721 }, { "epoch": 0.6990034540316317, "grad_norm": 0.6400668621063232, "learning_rate": 6.5803608626026845e-06, "loss": 0.0606, "step": 31722 }, { "epoch": 0.6990254893211478, "grad_norm": 0.7410255670547485, "learning_rate": 6.579474902542875e-06, "loss": 0.0802, "step": 31723 }, { "epoch": 0.699047524610664, "grad_norm": 0.7393596172332764, "learning_rate": 6.578588985373315e-06, "loss": 0.0707, "step": 31724 }, { "epoch": 0.6990695599001802, "grad_norm": 0.7923616170883179, "learning_rate": 6.577703111098522e-06, "loss": 0.0789, "step": 31725 }, { "epoch": 0.6990915951896963, "grad_norm": 1.0637468099594116, "learning_rate": 6.576817279722999e-06, "loss": 0.1099, "step": 31726 }, { "epoch": 0.6991136304792125, "grad_norm": 0.6412050724029541, "learning_rate": 6.575931491251263e-06, "loss": 0.0675, "step": 31727 }, { "epoch": 0.6991356657687287, "grad_norm": 0.8319854140281677, "learning_rate": 6.575045745687823e-06, "loss": 0.0854, "step": 31728 }, { "epoch": 0.6991577010582448, "grad_norm": 0.5710998773574829, "learning_rate": 6.5741600430372e-06, "loss": 0.0477, "step": 31729 }, { "epoch": 0.699179736347761, "grad_norm": 0.4356778562068939, "learning_rate": 6.5732743833038936e-06, "loss": 0.0556, "step": 31730 }, { "epoch": 0.6992017716372771, "grad_norm": 0.6105290651321411, "learning_rate": 6.572388766492418e-06, "loss": 0.0759, "step": 31731 }, { "epoch": 0.6992238069267933, "grad_norm": 0.8622562885284424, "learning_rate": 6.571503192607291e-06, "loss": 0.1058, "step": 31732 }, { "epoch": 0.6992458422163094, "grad_norm": 0.6612144708633423, "learning_rate": 6.570617661653013e-06, "loss": 0.0565, "step": 31733 }, { "epoch": 0.6992678775058255, "grad_norm": 0.7237197756767273, "learning_rate": 6.569732173634105e-06, "loss": 0.0848, "step": 31734 }, { "epoch": 0.6992899127953417, "grad_norm": 0.8900412917137146, "learning_rate": 6.5688467285550605e-06, "loss": 0.0872, "step": 31735 }, { "epoch": 0.6993119480848579, "grad_norm": 0.7578569054603577, "learning_rate": 6.5679613264204106e-06, "loss": 0.068, "step": 31736 }, { "epoch": 0.699333983374374, "grad_norm": 1.0697143077850342, "learning_rate": 6.567075967234651e-06, "loss": 0.0573, "step": 31737 }, { "epoch": 0.6993560186638902, "grad_norm": 0.7879821062088013, "learning_rate": 6.566190651002301e-06, "loss": 0.0488, "step": 31738 }, { "epoch": 0.6993780539534064, "grad_norm": 0.795092761516571, "learning_rate": 6.56530537772786e-06, "loss": 0.076, "step": 31739 }, { "epoch": 0.6994000892429225, "grad_norm": 0.7902830243110657, "learning_rate": 6.56442014741584e-06, "loss": 0.0718, "step": 31740 }, { "epoch": 0.6994221245324387, "grad_norm": 0.5485015511512756, "learning_rate": 6.563534960070758e-06, "loss": 0.0493, "step": 31741 }, { "epoch": 0.6994441598219548, "grad_norm": 0.6182588338851929, "learning_rate": 6.562649815697111e-06, "loss": 0.0801, "step": 31742 }, { "epoch": 0.699466195111471, "grad_norm": 0.37286028265953064, "learning_rate": 6.561764714299414e-06, "loss": 0.044, "step": 31743 }, { "epoch": 0.6994882304009872, "grad_norm": 0.9628639817237854, "learning_rate": 6.560879655882173e-06, "loss": 0.066, "step": 31744 }, { "epoch": 0.6995102656905033, "grad_norm": 0.4594266712665558, "learning_rate": 6.559994640449903e-06, "loss": 0.0664, "step": 31745 }, { "epoch": 0.6995323009800195, "grad_norm": 0.38061192631721497, "learning_rate": 6.559109668007101e-06, "loss": 0.0622, "step": 31746 }, { "epoch": 0.6995543362695357, "grad_norm": 0.5606160759925842, "learning_rate": 6.5582247385582785e-06, "loss": 0.0781, "step": 31747 }, { "epoch": 0.6995763715590518, "grad_norm": 0.6480575203895569, "learning_rate": 6.557339852107951e-06, "loss": 0.0657, "step": 31748 }, { "epoch": 0.699598406848568, "grad_norm": 0.7018253803253174, "learning_rate": 6.556455008660614e-06, "loss": 0.0587, "step": 31749 }, { "epoch": 0.6996204421380842, "grad_norm": 0.5577670931816101, "learning_rate": 6.555570208220778e-06, "loss": 0.0598, "step": 31750 }, { "epoch": 0.6996424774276003, "grad_norm": 0.506737470626831, "learning_rate": 6.554685450792953e-06, "loss": 0.0729, "step": 31751 }, { "epoch": 0.6996645127171165, "grad_norm": 0.646371603012085, "learning_rate": 6.553800736381647e-06, "loss": 0.0482, "step": 31752 }, { "epoch": 0.6996865480066327, "grad_norm": 0.6322748064994812, "learning_rate": 6.552916064991359e-06, "loss": 0.0877, "step": 31753 }, { "epoch": 0.6997085832961488, "grad_norm": 0.6261048316955566, "learning_rate": 6.5520314366266e-06, "loss": 0.047, "step": 31754 }, { "epoch": 0.699730618585665, "grad_norm": 0.7317070364952087, "learning_rate": 6.551146851291878e-06, "loss": 0.0753, "step": 31755 }, { "epoch": 0.6997526538751812, "grad_norm": 0.5935584306716919, "learning_rate": 6.5502623089916925e-06, "loss": 0.0431, "step": 31756 }, { "epoch": 0.6997746891646973, "grad_norm": 0.5375268459320068, "learning_rate": 6.549377809730557e-06, "loss": 0.0625, "step": 31757 }, { "epoch": 0.6997967244542134, "grad_norm": 0.42635440826416016, "learning_rate": 6.548493353512961e-06, "loss": 0.0705, "step": 31758 }, { "epoch": 0.6998187597437295, "grad_norm": 0.6331323981285095, "learning_rate": 6.547608940343432e-06, "loss": 0.0649, "step": 31759 }, { "epoch": 0.6998407950332457, "grad_norm": 0.4363097548484802, "learning_rate": 6.546724570226455e-06, "loss": 0.0617, "step": 31760 }, { "epoch": 0.6998628303227619, "grad_norm": 0.3902519643306732, "learning_rate": 6.54584024316655e-06, "loss": 0.0433, "step": 31761 }, { "epoch": 0.699884865612278, "grad_norm": 0.6439230442047119, "learning_rate": 6.544955959168204e-06, "loss": 0.0497, "step": 31762 }, { "epoch": 0.6999069009017942, "grad_norm": 0.6874012351036072, "learning_rate": 6.544071718235941e-06, "loss": 0.055, "step": 31763 }, { "epoch": 0.6999289361913104, "grad_norm": 0.5667092204093933, "learning_rate": 6.54318752037425e-06, "loss": 0.0423, "step": 31764 }, { "epoch": 0.6999509714808265, "grad_norm": 0.4422939717769623, "learning_rate": 6.542303365587646e-06, "loss": 0.0406, "step": 31765 }, { "epoch": 0.6999730067703427, "grad_norm": 0.5657420754432678, "learning_rate": 6.54141925388062e-06, "loss": 0.066, "step": 31766 }, { "epoch": 0.6999950420598589, "grad_norm": 0.49220746755599976, "learning_rate": 6.540535185257681e-06, "loss": 0.058, "step": 31767 }, { "epoch": 0.700017077349375, "grad_norm": 0.6488606333732605, "learning_rate": 6.53965115972334e-06, "loss": 0.0694, "step": 31768 }, { "epoch": 0.7000391126388912, "grad_norm": 0.42882391810417175, "learning_rate": 6.538767177282086e-06, "loss": 0.0643, "step": 31769 }, { "epoch": 0.7000611479284073, "grad_norm": 0.4640500843524933, "learning_rate": 6.537883237938428e-06, "loss": 0.0541, "step": 31770 }, { "epoch": 0.7000831832179235, "grad_norm": 0.7394757270812988, "learning_rate": 6.5369993416968696e-06, "loss": 0.0391, "step": 31771 }, { "epoch": 0.7001052185074397, "grad_norm": 0.6922963261604309, "learning_rate": 6.536115488561915e-06, "loss": 0.0616, "step": 31772 }, { "epoch": 0.7001272537969558, "grad_norm": 0.4940657317638397, "learning_rate": 6.535231678538059e-06, "loss": 0.0769, "step": 31773 }, { "epoch": 0.700149289086472, "grad_norm": 0.6888747811317444, "learning_rate": 6.5343479116298055e-06, "loss": 0.0728, "step": 31774 }, { "epoch": 0.7001713243759882, "grad_norm": 0.541901707649231, "learning_rate": 6.5334641878416645e-06, "loss": 0.0536, "step": 31775 }, { "epoch": 0.7001933596655043, "grad_norm": 0.4023199677467346, "learning_rate": 6.532580507178126e-06, "loss": 0.052, "step": 31776 }, { "epoch": 0.7002153949550205, "grad_norm": 0.7046397924423218, "learning_rate": 6.531696869643694e-06, "loss": 0.076, "step": 31777 }, { "epoch": 0.7002374302445367, "grad_norm": 0.7501463890075684, "learning_rate": 6.5308132752428725e-06, "loss": 0.0979, "step": 31778 }, { "epoch": 0.7002594655340528, "grad_norm": 0.36293017864227295, "learning_rate": 6.529929723980164e-06, "loss": 0.0417, "step": 31779 }, { "epoch": 0.700281500823569, "grad_norm": 0.6076620221138, "learning_rate": 6.529046215860061e-06, "loss": 0.058, "step": 31780 }, { "epoch": 0.7003035361130852, "grad_norm": 0.5543405413627625, "learning_rate": 6.528162750887068e-06, "loss": 0.0743, "step": 31781 }, { "epoch": 0.7003255714026013, "grad_norm": 0.4903382956981659, "learning_rate": 6.52727932906569e-06, "loss": 0.0501, "step": 31782 }, { "epoch": 0.7003476066921174, "grad_norm": 0.5423287749290466, "learning_rate": 6.526395950400418e-06, "loss": 0.0908, "step": 31783 }, { "epoch": 0.7003696419816335, "grad_norm": 0.8327243328094482, "learning_rate": 6.525512614895759e-06, "loss": 0.0651, "step": 31784 }, { "epoch": 0.7003916772711497, "grad_norm": 0.7238420248031616, "learning_rate": 6.524629322556198e-06, "loss": 0.0547, "step": 31785 }, { "epoch": 0.7004137125606659, "grad_norm": 0.5071867108345032, "learning_rate": 6.523746073386257e-06, "loss": 0.0627, "step": 31786 }, { "epoch": 0.700435747850182, "grad_norm": 0.36604005098342896, "learning_rate": 6.522862867390417e-06, "loss": 0.0561, "step": 31787 }, { "epoch": 0.7004577831396982, "grad_norm": 0.7980619668960571, "learning_rate": 6.521979704573185e-06, "loss": 0.084, "step": 31788 }, { "epoch": 0.7004798184292144, "grad_norm": 0.35637110471725464, "learning_rate": 6.521096584939055e-06, "loss": 0.0574, "step": 31789 }, { "epoch": 0.7005018537187305, "grad_norm": 0.8854597210884094, "learning_rate": 6.520213508492524e-06, "loss": 0.0598, "step": 31790 }, { "epoch": 0.7005238890082467, "grad_norm": 0.38503119349479675, "learning_rate": 6.5193304752380985e-06, "loss": 0.0647, "step": 31791 }, { "epoch": 0.7005459242977629, "grad_norm": 0.4146852493286133, "learning_rate": 6.518447485180266e-06, "loss": 0.0764, "step": 31792 }, { "epoch": 0.700567959587279, "grad_norm": 0.8289828896522522, "learning_rate": 6.5175645383235284e-06, "loss": 0.0538, "step": 31793 }, { "epoch": 0.7005899948767952, "grad_norm": 0.7928619384765625, "learning_rate": 6.516681634672383e-06, "loss": 0.0807, "step": 31794 }, { "epoch": 0.7006120301663114, "grad_norm": 0.922696053981781, "learning_rate": 6.515798774231334e-06, "loss": 0.0702, "step": 31795 }, { "epoch": 0.7006340654558275, "grad_norm": 0.7617148756980896, "learning_rate": 6.514915957004864e-06, "loss": 0.0612, "step": 31796 }, { "epoch": 0.7006561007453437, "grad_norm": 0.48154619336128235, "learning_rate": 6.514033182997479e-06, "loss": 0.0568, "step": 31797 }, { "epoch": 0.7006781360348598, "grad_norm": 0.5889765024185181, "learning_rate": 6.513150452213677e-06, "loss": 0.086, "step": 31798 }, { "epoch": 0.700700171324376, "grad_norm": 0.9912756681442261, "learning_rate": 6.512267764657947e-06, "loss": 0.1029, "step": 31799 }, { "epoch": 0.7007222066138922, "grad_norm": 0.44655951857566833, "learning_rate": 6.511385120334787e-06, "loss": 0.054, "step": 31800 }, { "epoch": 0.7007442419034083, "grad_norm": 0.5742701292037964, "learning_rate": 6.510502519248696e-06, "loss": 0.0568, "step": 31801 }, { "epoch": 0.7007662771929245, "grad_norm": 0.7180150747299194, "learning_rate": 6.509619961404173e-06, "loss": 0.0507, "step": 31802 }, { "epoch": 0.7007883124824407, "grad_norm": 0.711679995059967, "learning_rate": 6.508737446805703e-06, "loss": 0.0752, "step": 31803 }, { "epoch": 0.7008103477719568, "grad_norm": 0.7048376798629761, "learning_rate": 6.5078549754577855e-06, "loss": 0.0575, "step": 31804 }, { "epoch": 0.700832383061473, "grad_norm": 0.30674299597740173, "learning_rate": 6.506972547364925e-06, "loss": 0.0294, "step": 31805 }, { "epoch": 0.7008544183509892, "grad_norm": 0.6454781889915466, "learning_rate": 6.506090162531599e-06, "loss": 0.0465, "step": 31806 }, { "epoch": 0.7008764536405052, "grad_norm": 0.5940794944763184, "learning_rate": 6.5052078209623175e-06, "loss": 0.0496, "step": 31807 }, { "epoch": 0.7008984889300214, "grad_norm": 0.6924678087234497, "learning_rate": 6.504325522661556e-06, "loss": 0.0794, "step": 31808 }, { "epoch": 0.7009205242195375, "grad_norm": 0.7082812190055847, "learning_rate": 6.503443267633834e-06, "loss": 0.0557, "step": 31809 }, { "epoch": 0.7009425595090537, "grad_norm": 0.5177953243255615, "learning_rate": 6.502561055883622e-06, "loss": 0.0534, "step": 31810 }, { "epoch": 0.7009645947985699, "grad_norm": 0.44715091586112976, "learning_rate": 6.5016788874154325e-06, "loss": 0.0898, "step": 31811 }, { "epoch": 0.700986630088086, "grad_norm": 0.8827350735664368, "learning_rate": 6.5007967622337425e-06, "loss": 0.1114, "step": 31812 }, { "epoch": 0.7010086653776022, "grad_norm": 0.2673925459384918, "learning_rate": 6.499914680343051e-06, "loss": 0.0286, "step": 31813 }, { "epoch": 0.7010307006671184, "grad_norm": 1.386583924293518, "learning_rate": 6.499032641747858e-06, "loss": 0.0971, "step": 31814 }, { "epoch": 0.7010527359566345, "grad_norm": 0.5958681106567383, "learning_rate": 6.4981506464526466e-06, "loss": 0.1062, "step": 31815 }, { "epoch": 0.7010747712461507, "grad_norm": 0.6362640261650085, "learning_rate": 6.497268694461912e-06, "loss": 0.0786, "step": 31816 }, { "epoch": 0.7010968065356669, "grad_norm": 0.6585057377815247, "learning_rate": 6.496386785780149e-06, "loss": 0.0714, "step": 31817 }, { "epoch": 0.701118841825183, "grad_norm": 0.5471420288085938, "learning_rate": 6.49550492041185e-06, "loss": 0.0451, "step": 31818 }, { "epoch": 0.7011408771146992, "grad_norm": 0.7893087267875671, "learning_rate": 6.494623098361504e-06, "loss": 0.0513, "step": 31819 }, { "epoch": 0.7011629124042154, "grad_norm": 0.7310301065444946, "learning_rate": 6.493741319633601e-06, "loss": 0.0821, "step": 31820 }, { "epoch": 0.7011849476937315, "grad_norm": 0.5800217986106873, "learning_rate": 6.492859584232639e-06, "loss": 0.0562, "step": 31821 }, { "epoch": 0.7012069829832477, "grad_norm": 0.7955945134162903, "learning_rate": 6.491977892163102e-06, "loss": 0.0803, "step": 31822 }, { "epoch": 0.7012290182727638, "grad_norm": 0.45257940888404846, "learning_rate": 6.4910962434294826e-06, "loss": 0.0756, "step": 31823 }, { "epoch": 0.70125105356228, "grad_norm": 0.380813330411911, "learning_rate": 6.4902146380362726e-06, "loss": 0.0409, "step": 31824 }, { "epoch": 0.7012730888517962, "grad_norm": 0.5226616263389587, "learning_rate": 6.489333075987969e-06, "loss": 0.0749, "step": 31825 }, { "epoch": 0.7012951241413123, "grad_norm": 0.4158662259578705, "learning_rate": 6.488451557289051e-06, "loss": 0.0392, "step": 31826 }, { "epoch": 0.7013171594308285, "grad_norm": 0.2691473960876465, "learning_rate": 6.487570081944011e-06, "loss": 0.0596, "step": 31827 }, { "epoch": 0.7013391947203447, "grad_norm": 0.5464909076690674, "learning_rate": 6.486688649957349e-06, "loss": 0.0613, "step": 31828 }, { "epoch": 0.7013612300098608, "grad_norm": 0.45668354630470276, "learning_rate": 6.485807261333542e-06, "loss": 0.0635, "step": 31829 }, { "epoch": 0.701383265299377, "grad_norm": 0.6857801675796509, "learning_rate": 6.4849259160770876e-06, "loss": 0.0677, "step": 31830 }, { "epoch": 0.7014053005888932, "grad_norm": 0.8076857328414917, "learning_rate": 6.484044614192462e-06, "loss": 0.0419, "step": 31831 }, { "epoch": 0.7014273358784092, "grad_norm": 0.5309823155403137, "learning_rate": 6.483163355684173e-06, "loss": 0.0628, "step": 31832 }, { "epoch": 0.7014493711679254, "grad_norm": 0.3645336329936981, "learning_rate": 6.482282140556697e-06, "loss": 0.052, "step": 31833 }, { "epoch": 0.7014714064574415, "grad_norm": 0.7847979068756104, "learning_rate": 6.48140096881453e-06, "loss": 0.0765, "step": 31834 }, { "epoch": 0.7014934417469577, "grad_norm": 0.5817907452583313, "learning_rate": 6.4805198404621505e-06, "loss": 0.0442, "step": 31835 }, { "epoch": 0.7015154770364739, "grad_norm": 0.6854994297027588, "learning_rate": 6.479638755504052e-06, "loss": 0.0579, "step": 31836 }, { "epoch": 0.70153751232599, "grad_norm": 0.697839617729187, "learning_rate": 6.478757713944729e-06, "loss": 0.0459, "step": 31837 }, { "epoch": 0.7015595476155062, "grad_norm": 0.8629785776138306, "learning_rate": 6.477876715788655e-06, "loss": 0.0962, "step": 31838 }, { "epoch": 0.7015815829050224, "grad_norm": 0.38523435592651367, "learning_rate": 6.4769957610403266e-06, "loss": 0.0495, "step": 31839 }, { "epoch": 0.7016036181945385, "grad_norm": 0.2968381941318512, "learning_rate": 6.476114849704228e-06, "loss": 0.0323, "step": 31840 }, { "epoch": 0.7016256534840547, "grad_norm": 0.6457723379135132, "learning_rate": 6.475233981784852e-06, "loss": 0.0782, "step": 31841 }, { "epoch": 0.7016476887735709, "grad_norm": 0.5288549661636353, "learning_rate": 6.474353157286678e-06, "loss": 0.0653, "step": 31842 }, { "epoch": 0.701669724063087, "grad_norm": 0.6927791237831116, "learning_rate": 6.473472376214193e-06, "loss": 0.0594, "step": 31843 }, { "epoch": 0.7016917593526032, "grad_norm": 1.2805863618850708, "learning_rate": 6.472591638571891e-06, "loss": 0.096, "step": 31844 }, { "epoch": 0.7017137946421194, "grad_norm": 0.25186094641685486, "learning_rate": 6.471710944364248e-06, "loss": 0.0659, "step": 31845 }, { "epoch": 0.7017358299316355, "grad_norm": 0.531880259513855, "learning_rate": 6.470830293595753e-06, "loss": 0.0461, "step": 31846 }, { "epoch": 0.7017578652211517, "grad_norm": 0.6487832069396973, "learning_rate": 6.469949686270894e-06, "loss": 0.0605, "step": 31847 }, { "epoch": 0.7017799005106679, "grad_norm": 0.5520749688148499, "learning_rate": 6.469069122394161e-06, "loss": 0.0817, "step": 31848 }, { "epoch": 0.701801935800184, "grad_norm": 0.9754845499992371, "learning_rate": 6.468188601970027e-06, "loss": 0.0735, "step": 31849 }, { "epoch": 0.7018239710897002, "grad_norm": 0.7349224090576172, "learning_rate": 6.4673081250029846e-06, "loss": 0.0732, "step": 31850 }, { "epoch": 0.7018460063792163, "grad_norm": 0.33342328667640686, "learning_rate": 6.466427691497524e-06, "loss": 0.0617, "step": 31851 }, { "epoch": 0.7018680416687325, "grad_norm": 0.5812118649482727, "learning_rate": 6.465547301458117e-06, "loss": 0.0601, "step": 31852 }, { "epoch": 0.7018900769582487, "grad_norm": 0.9291537404060364, "learning_rate": 6.464666954889258e-06, "loss": 0.0634, "step": 31853 }, { "epoch": 0.7019121122477648, "grad_norm": 0.46255818009376526, "learning_rate": 6.463786651795418e-06, "loss": 0.064, "step": 31854 }, { "epoch": 0.701934147537281, "grad_norm": 1.046212911605835, "learning_rate": 6.4629063921811e-06, "loss": 0.0753, "step": 31855 }, { "epoch": 0.7019561828267972, "grad_norm": 0.6573694944381714, "learning_rate": 6.462026176050773e-06, "loss": 0.0689, "step": 31856 }, { "epoch": 0.7019782181163132, "grad_norm": 0.8110240697860718, "learning_rate": 6.461146003408929e-06, "loss": 0.0736, "step": 31857 }, { "epoch": 0.7020002534058294, "grad_norm": 0.754676878452301, "learning_rate": 6.4602658742600445e-06, "loss": 0.0713, "step": 31858 }, { "epoch": 0.7020222886953456, "grad_norm": 0.6999937295913696, "learning_rate": 6.459385788608604e-06, "loss": 0.062, "step": 31859 }, { "epoch": 0.7020443239848617, "grad_norm": 0.45138928294181824, "learning_rate": 6.4585057464590965e-06, "loss": 0.0467, "step": 31860 }, { "epoch": 0.7020663592743779, "grad_norm": 0.5260343551635742, "learning_rate": 6.4576257478159885e-06, "loss": 0.0495, "step": 31861 }, { "epoch": 0.702088394563894, "grad_norm": 0.6148493885993958, "learning_rate": 6.456745792683785e-06, "loss": 0.0727, "step": 31862 }, { "epoch": 0.7021104298534102, "grad_norm": 0.4260067939758301, "learning_rate": 6.455865881066951e-06, "loss": 0.027, "step": 31863 }, { "epoch": 0.7021324651429264, "grad_norm": 0.3599858283996582, "learning_rate": 6.454986012969978e-06, "loss": 0.0558, "step": 31864 }, { "epoch": 0.7021545004324425, "grad_norm": 0.4762296974658966, "learning_rate": 6.4541061883973344e-06, "loss": 0.0711, "step": 31865 }, { "epoch": 0.7021765357219587, "grad_norm": 0.957200825214386, "learning_rate": 6.453226407353521e-06, "loss": 0.0568, "step": 31866 }, { "epoch": 0.7021985710114749, "grad_norm": 0.74387127161026, "learning_rate": 6.452346669843008e-06, "loss": 0.0609, "step": 31867 }, { "epoch": 0.702220606300991, "grad_norm": 0.5226743817329407, "learning_rate": 6.4514669758702714e-06, "loss": 0.0351, "step": 31868 }, { "epoch": 0.7022426415905072, "grad_norm": 0.6896142959594727, "learning_rate": 6.450587325439798e-06, "loss": 0.0725, "step": 31869 }, { "epoch": 0.7022646768800234, "grad_norm": 0.3983813524246216, "learning_rate": 6.449707718556068e-06, "loss": 0.0839, "step": 31870 }, { "epoch": 0.7022867121695395, "grad_norm": 1.0185096263885498, "learning_rate": 6.448828155223567e-06, "loss": 0.0614, "step": 31871 }, { "epoch": 0.7023087474590557, "grad_norm": 0.584372878074646, "learning_rate": 6.447948635446763e-06, "loss": 0.0733, "step": 31872 }, { "epoch": 0.7023307827485719, "grad_norm": 0.6320415139198303, "learning_rate": 6.447069159230146e-06, "loss": 0.0881, "step": 31873 }, { "epoch": 0.702352818038088, "grad_norm": 0.6606029868125916, "learning_rate": 6.446189726578189e-06, "loss": 0.0692, "step": 31874 }, { "epoch": 0.7023748533276042, "grad_norm": 0.5280735492706299, "learning_rate": 6.445310337495382e-06, "loss": 0.0357, "step": 31875 }, { "epoch": 0.7023968886171204, "grad_norm": 0.9589534401893616, "learning_rate": 6.4444309919861945e-06, "loss": 0.0808, "step": 31876 }, { "epoch": 0.7024189239066365, "grad_norm": 0.6775733828544617, "learning_rate": 6.4435516900551e-06, "loss": 0.0669, "step": 31877 }, { "epoch": 0.7024409591961527, "grad_norm": 0.7857856154441833, "learning_rate": 6.442672431706593e-06, "loss": 0.061, "step": 31878 }, { "epoch": 0.7024629944856688, "grad_norm": 0.40014520287513733, "learning_rate": 6.441793216945141e-06, "loss": 0.039, "step": 31879 }, { "epoch": 0.702485029775185, "grad_norm": 0.9115869998931885, "learning_rate": 6.4409140457752304e-06, "loss": 0.1129, "step": 31880 }, { "epoch": 0.7025070650647011, "grad_norm": 0.6042677760124207, "learning_rate": 6.440034918201323e-06, "loss": 0.0756, "step": 31881 }, { "epoch": 0.7025291003542172, "grad_norm": 0.3465796709060669, "learning_rate": 6.4391558342279186e-06, "loss": 0.0915, "step": 31882 }, { "epoch": 0.7025511356437334, "grad_norm": 0.7042437791824341, "learning_rate": 6.438276793859478e-06, "loss": 0.0678, "step": 31883 }, { "epoch": 0.7025731709332496, "grad_norm": 0.5664697885513306, "learning_rate": 6.437397797100487e-06, "loss": 0.0878, "step": 31884 }, { "epoch": 0.7025952062227657, "grad_norm": 0.3530764877796173, "learning_rate": 6.436518843955425e-06, "loss": 0.0579, "step": 31885 }, { "epoch": 0.7026172415122819, "grad_norm": 0.4884653091430664, "learning_rate": 6.435639934428759e-06, "loss": 0.0448, "step": 31886 }, { "epoch": 0.702639276801798, "grad_norm": 0.5287913084030151, "learning_rate": 6.434761068524977e-06, "loss": 0.0986, "step": 31887 }, { "epoch": 0.7026613120913142, "grad_norm": 0.5684219002723694, "learning_rate": 6.433882246248538e-06, "loss": 0.076, "step": 31888 }, { "epoch": 0.7026833473808304, "grad_norm": 0.5066706538200378, "learning_rate": 6.433003467603943e-06, "loss": 0.0536, "step": 31889 }, { "epoch": 0.7027053826703465, "grad_norm": 0.433359295129776, "learning_rate": 6.432124732595648e-06, "loss": 0.0618, "step": 31890 }, { "epoch": 0.7027274179598627, "grad_norm": 0.569743812084198, "learning_rate": 6.431246041228142e-06, "loss": 0.0849, "step": 31891 }, { "epoch": 0.7027494532493789, "grad_norm": 0.4326615631580353, "learning_rate": 6.430367393505891e-06, "loss": 0.053, "step": 31892 }, { "epoch": 0.702771488538895, "grad_norm": 0.5604077577590942, "learning_rate": 6.429488789433373e-06, "loss": 0.1096, "step": 31893 }, { "epoch": 0.7027935238284112, "grad_norm": 0.7026798725128174, "learning_rate": 6.428610229015071e-06, "loss": 0.0847, "step": 31894 }, { "epoch": 0.7028155591179274, "grad_norm": 0.8550520539283752, "learning_rate": 6.427731712255448e-06, "loss": 0.0683, "step": 31895 }, { "epoch": 0.7028375944074435, "grad_norm": 0.5170603394508362, "learning_rate": 6.4268532391589836e-06, "loss": 0.053, "step": 31896 }, { "epoch": 0.7028596296969597, "grad_norm": 0.7692053914070129, "learning_rate": 6.425974809730152e-06, "loss": 0.0577, "step": 31897 }, { "epoch": 0.7028816649864759, "grad_norm": 0.7806403040885925, "learning_rate": 6.425096423973435e-06, "loss": 0.0771, "step": 31898 }, { "epoch": 0.702903700275992, "grad_norm": 0.6960549354553223, "learning_rate": 6.424218081893295e-06, "loss": 0.095, "step": 31899 }, { "epoch": 0.7029257355655082, "grad_norm": 0.6793379783630371, "learning_rate": 6.42333978349421e-06, "loss": 0.0645, "step": 31900 }, { "epoch": 0.7029477708550244, "grad_norm": 0.4770377278327942, "learning_rate": 6.42246152878066e-06, "loss": 0.0471, "step": 31901 }, { "epoch": 0.7029698061445405, "grad_norm": 0.346507728099823, "learning_rate": 6.421583317757108e-06, "loss": 0.0764, "step": 31902 }, { "epoch": 0.7029918414340567, "grad_norm": 0.6906335949897766, "learning_rate": 6.4207051504280376e-06, "loss": 0.0513, "step": 31903 }, { "epoch": 0.7030138767235729, "grad_norm": 0.7126102447509766, "learning_rate": 6.419827026797906e-06, "loss": 0.0814, "step": 31904 }, { "epoch": 0.703035912013089, "grad_norm": 0.8918881416320801, "learning_rate": 6.418948946871206e-06, "loss": 0.0577, "step": 31905 }, { "epoch": 0.7030579473026051, "grad_norm": 0.8817066550254822, "learning_rate": 6.418070910652398e-06, "loss": 0.0528, "step": 31906 }, { "epoch": 0.7030799825921212, "grad_norm": 0.7824772000312805, "learning_rate": 6.417192918145954e-06, "loss": 0.0806, "step": 31907 }, { "epoch": 0.7031020178816374, "grad_norm": 0.8772129416465759, "learning_rate": 6.416314969356356e-06, "loss": 0.0546, "step": 31908 }, { "epoch": 0.7031240531711536, "grad_norm": 0.6251717209815979, "learning_rate": 6.4154370642880626e-06, "loss": 0.0377, "step": 31909 }, { "epoch": 0.7031460884606697, "grad_norm": 0.5684980750083923, "learning_rate": 6.414559202945558e-06, "loss": 0.0394, "step": 31910 }, { "epoch": 0.7031681237501859, "grad_norm": 0.48140764236450195, "learning_rate": 6.4136813853332946e-06, "loss": 0.0629, "step": 31911 }, { "epoch": 0.7031901590397021, "grad_norm": 0.37939682602882385, "learning_rate": 6.4128036114557684e-06, "loss": 0.0523, "step": 31912 }, { "epoch": 0.7032121943292182, "grad_norm": 0.6691084504127502, "learning_rate": 6.411925881317432e-06, "loss": 0.0781, "step": 31913 }, { "epoch": 0.7032342296187344, "grad_norm": 1.1030352115631104, "learning_rate": 6.411048194922768e-06, "loss": 0.0851, "step": 31914 }, { "epoch": 0.7032562649082506, "grad_norm": 0.8739027976989746, "learning_rate": 6.410170552276237e-06, "loss": 0.0926, "step": 31915 }, { "epoch": 0.7032783001977667, "grad_norm": 0.5548573136329651, "learning_rate": 6.409292953382313e-06, "loss": 0.044, "step": 31916 }, { "epoch": 0.7033003354872829, "grad_norm": 0.43011733889579773, "learning_rate": 6.4084153982454715e-06, "loss": 0.0741, "step": 31917 }, { "epoch": 0.703322370776799, "grad_norm": 0.6214448809623718, "learning_rate": 6.4075378868701735e-06, "loss": 0.0598, "step": 31918 }, { "epoch": 0.7033444060663152, "grad_norm": 0.41608864068984985, "learning_rate": 6.406660419260891e-06, "loss": 0.0316, "step": 31919 }, { "epoch": 0.7033664413558314, "grad_norm": 0.5402963757514954, "learning_rate": 6.405782995422095e-06, "loss": 0.0531, "step": 31920 }, { "epoch": 0.7033884766453475, "grad_norm": 0.4424745440483093, "learning_rate": 6.404905615358262e-06, "loss": 0.059, "step": 31921 }, { "epoch": 0.7034105119348637, "grad_norm": 0.5375649929046631, "learning_rate": 6.404028279073847e-06, "loss": 0.0538, "step": 31922 }, { "epoch": 0.7034325472243799, "grad_norm": 0.8016886711120605, "learning_rate": 6.403150986573327e-06, "loss": 0.082, "step": 31923 }, { "epoch": 0.703454582513896, "grad_norm": 1.0715633630752563, "learning_rate": 6.402273737861173e-06, "loss": 0.0722, "step": 31924 }, { "epoch": 0.7034766178034122, "grad_norm": 0.9734093546867371, "learning_rate": 6.401396532941843e-06, "loss": 0.1017, "step": 31925 }, { "epoch": 0.7034986530929284, "grad_norm": 0.4545418918132782, "learning_rate": 6.400519371819818e-06, "loss": 0.0436, "step": 31926 }, { "epoch": 0.7035206883824445, "grad_norm": 0.34855470061302185, "learning_rate": 6.399642254499549e-06, "loss": 0.0734, "step": 31927 }, { "epoch": 0.7035427236719607, "grad_norm": 0.5027084350585938, "learning_rate": 6.398765180985524e-06, "loss": 0.0646, "step": 31928 }, { "epoch": 0.7035647589614769, "grad_norm": 0.23435258865356445, "learning_rate": 6.397888151282196e-06, "loss": 0.0523, "step": 31929 }, { "epoch": 0.703586794250993, "grad_norm": 0.4821641743183136, "learning_rate": 6.397011165394034e-06, "loss": 0.0658, "step": 31930 }, { "epoch": 0.7036088295405091, "grad_norm": 0.6275558471679688, "learning_rate": 6.396134223325516e-06, "loss": 0.0695, "step": 31931 }, { "epoch": 0.7036308648300252, "grad_norm": 0.19461043179035187, "learning_rate": 6.395257325081092e-06, "loss": 0.0406, "step": 31932 }, { "epoch": 0.7036529001195414, "grad_norm": 0.6819806694984436, "learning_rate": 6.394380470665243e-06, "loss": 0.0523, "step": 31933 }, { "epoch": 0.7036749354090576, "grad_norm": 0.5430244207382202, "learning_rate": 6.393503660082419e-06, "loss": 0.0496, "step": 31934 }, { "epoch": 0.7036969706985737, "grad_norm": 0.4353475272655487, "learning_rate": 6.392626893337105e-06, "loss": 0.0507, "step": 31935 }, { "epoch": 0.7037190059880899, "grad_norm": 0.6348600387573242, "learning_rate": 6.391750170433754e-06, "loss": 0.09, "step": 31936 }, { "epoch": 0.7037410412776061, "grad_norm": 0.6043045520782471, "learning_rate": 6.390873491376842e-06, "loss": 0.068, "step": 31937 }, { "epoch": 0.7037630765671222, "grad_norm": 1.02065110206604, "learning_rate": 6.3899968561708205e-06, "loss": 0.0706, "step": 31938 }, { "epoch": 0.7037851118566384, "grad_norm": 0.7348645925521851, "learning_rate": 6.389120264820162e-06, "loss": 0.075, "step": 31939 }, { "epoch": 0.7038071471461546, "grad_norm": 0.4459840953350067, "learning_rate": 6.388243717329338e-06, "loss": 0.0536, "step": 31940 }, { "epoch": 0.7038291824356707, "grad_norm": 0.32465800642967224, "learning_rate": 6.3873672137028e-06, "loss": 0.0487, "step": 31941 }, { "epoch": 0.7038512177251869, "grad_norm": 0.6035911440849304, "learning_rate": 6.3864907539450215e-06, "loss": 0.0605, "step": 31942 }, { "epoch": 0.703873253014703, "grad_norm": 0.5886651277542114, "learning_rate": 6.3856143380604625e-06, "loss": 0.0577, "step": 31943 }, { "epoch": 0.7038952883042192, "grad_norm": 0.36834144592285156, "learning_rate": 6.384737966053595e-06, "loss": 0.0619, "step": 31944 }, { "epoch": 0.7039173235937354, "grad_norm": 0.23574303090572357, "learning_rate": 6.383861637928871e-06, "loss": 0.0603, "step": 31945 }, { "epoch": 0.7039393588832515, "grad_norm": 0.6462244391441345, "learning_rate": 6.3829853536907616e-06, "loss": 0.0633, "step": 31946 }, { "epoch": 0.7039613941727677, "grad_norm": 0.7921675443649292, "learning_rate": 6.382109113343732e-06, "loss": 0.0756, "step": 31947 }, { "epoch": 0.7039834294622839, "grad_norm": 0.656862735748291, "learning_rate": 6.381232916892239e-06, "loss": 0.0635, "step": 31948 }, { "epoch": 0.7040054647518, "grad_norm": 0.9328097701072693, "learning_rate": 6.380356764340751e-06, "loss": 0.0707, "step": 31949 }, { "epoch": 0.7040275000413162, "grad_norm": 0.862993061542511, "learning_rate": 6.379480655693719e-06, "loss": 0.0666, "step": 31950 }, { "epoch": 0.7040495353308324, "grad_norm": 0.495916485786438, "learning_rate": 6.378604590955625e-06, "loss": 0.0572, "step": 31951 }, { "epoch": 0.7040715706203485, "grad_norm": 0.9975380897521973, "learning_rate": 6.377728570130917e-06, "loss": 0.0678, "step": 31952 }, { "epoch": 0.7040936059098647, "grad_norm": 0.38233110308647156, "learning_rate": 6.37685259322406e-06, "loss": 0.0449, "step": 31953 }, { "epoch": 0.7041156411993809, "grad_norm": 0.28536656498908997, "learning_rate": 6.3759766602395215e-06, "loss": 0.0513, "step": 31954 }, { "epoch": 0.7041376764888969, "grad_norm": 0.5810309648513794, "learning_rate": 6.3751007711817535e-06, "loss": 0.0646, "step": 31955 }, { "epoch": 0.7041597117784131, "grad_norm": 0.314348429441452, "learning_rate": 6.374224926055229e-06, "loss": 0.0364, "step": 31956 }, { "epoch": 0.7041817470679292, "grad_norm": 0.5456584692001343, "learning_rate": 6.373349124864389e-06, "loss": 0.0776, "step": 31957 }, { "epoch": 0.7042037823574454, "grad_norm": 0.7233904004096985, "learning_rate": 6.372473367613721e-06, "loss": 0.0862, "step": 31958 }, { "epoch": 0.7042258176469616, "grad_norm": 0.6240573525428772, "learning_rate": 6.371597654307666e-06, "loss": 0.053, "step": 31959 }, { "epoch": 0.7042478529364777, "grad_norm": 0.5667046904563904, "learning_rate": 6.370721984950698e-06, "loss": 0.0512, "step": 31960 }, { "epoch": 0.7042698882259939, "grad_norm": 0.8159976005554199, "learning_rate": 6.369846359547263e-06, "loss": 0.0661, "step": 31961 }, { "epoch": 0.7042919235155101, "grad_norm": 0.7003495097160339, "learning_rate": 6.36897077810183e-06, "loss": 0.0768, "step": 31962 }, { "epoch": 0.7043139588050262, "grad_norm": 0.1339678317308426, "learning_rate": 6.3680952406188615e-06, "loss": 0.0458, "step": 31963 }, { "epoch": 0.7043359940945424, "grad_norm": 0.5138013958930969, "learning_rate": 6.367219747102807e-06, "loss": 0.0645, "step": 31964 }, { "epoch": 0.7043580293840586, "grad_norm": 0.9163980484008789, "learning_rate": 6.3663442975581325e-06, "loss": 0.0786, "step": 31965 }, { "epoch": 0.7043800646735747, "grad_norm": 0.6604836583137512, "learning_rate": 6.3654688919892966e-06, "loss": 0.0673, "step": 31966 }, { "epoch": 0.7044020999630909, "grad_norm": 0.668289303779602, "learning_rate": 6.364593530400763e-06, "loss": 0.0842, "step": 31967 }, { "epoch": 0.704424135252607, "grad_norm": 0.939478874206543, "learning_rate": 6.363718212796979e-06, "loss": 0.0801, "step": 31968 }, { "epoch": 0.7044461705421232, "grad_norm": 0.394890159368515, "learning_rate": 6.362842939182409e-06, "loss": 0.0594, "step": 31969 }, { "epoch": 0.7044682058316394, "grad_norm": 0.38844969868659973, "learning_rate": 6.3619677095615175e-06, "loss": 0.053, "step": 31970 }, { "epoch": 0.7044902411211555, "grad_norm": 0.5178481936454773, "learning_rate": 6.361092523938751e-06, "loss": 0.0525, "step": 31971 }, { "epoch": 0.7045122764106717, "grad_norm": 0.3548566699028015, "learning_rate": 6.360217382318579e-06, "loss": 0.0682, "step": 31972 }, { "epoch": 0.7045343117001879, "grad_norm": 0.7122117877006531, "learning_rate": 6.359342284705442e-06, "loss": 0.0714, "step": 31973 }, { "epoch": 0.704556346989704, "grad_norm": 0.45776239037513733, "learning_rate": 6.358467231103818e-06, "loss": 0.0526, "step": 31974 }, { "epoch": 0.7045783822792202, "grad_norm": 0.35076335072517395, "learning_rate": 6.35759222151815e-06, "loss": 0.0182, "step": 31975 }, { "epoch": 0.7046004175687364, "grad_norm": 0.44821417331695557, "learning_rate": 6.3567172559528994e-06, "loss": 0.0625, "step": 31976 }, { "epoch": 0.7046224528582525, "grad_norm": 0.6309215426445007, "learning_rate": 6.35584233441252e-06, "loss": 0.0581, "step": 31977 }, { "epoch": 0.7046444881477687, "grad_norm": 0.8266361951828003, "learning_rate": 6.354967456901478e-06, "loss": 0.0897, "step": 31978 }, { "epoch": 0.7046665234372849, "grad_norm": 0.9908127784729004, "learning_rate": 6.354092623424225e-06, "loss": 0.0757, "step": 31979 }, { "epoch": 0.7046885587268009, "grad_norm": 0.4679476320743561, "learning_rate": 6.353217833985201e-06, "loss": 0.0687, "step": 31980 }, { "epoch": 0.7047105940163171, "grad_norm": 0.5719078183174133, "learning_rate": 6.3523430885888875e-06, "loss": 0.0735, "step": 31981 }, { "epoch": 0.7047326293058332, "grad_norm": 0.48804622888565063, "learning_rate": 6.351468387239722e-06, "loss": 0.086, "step": 31982 }, { "epoch": 0.7047546645953494, "grad_norm": 0.8298879861831665, "learning_rate": 6.350593729942171e-06, "loss": 0.0564, "step": 31983 }, { "epoch": 0.7047766998848656, "grad_norm": 0.40389829874038696, "learning_rate": 6.349719116700672e-06, "loss": 0.0589, "step": 31984 }, { "epoch": 0.7047987351743817, "grad_norm": 0.48177361488342285, "learning_rate": 6.3488445475197056e-06, "loss": 0.0489, "step": 31985 }, { "epoch": 0.7048207704638979, "grad_norm": 0.9522599577903748, "learning_rate": 6.347970022403707e-06, "loss": 0.0656, "step": 31986 }, { "epoch": 0.7048428057534141, "grad_norm": 0.598442792892456, "learning_rate": 6.347095541357142e-06, "loss": 0.0562, "step": 31987 }, { "epoch": 0.7048648410429302, "grad_norm": 0.7649146914482117, "learning_rate": 6.346221104384454e-06, "loss": 0.0757, "step": 31988 }, { "epoch": 0.7048868763324464, "grad_norm": 0.5397365689277649, "learning_rate": 6.345346711490101e-06, "loss": 0.0524, "step": 31989 }, { "epoch": 0.7049089116219626, "grad_norm": 0.7195330858230591, "learning_rate": 6.344472362678545e-06, "loss": 0.0618, "step": 31990 }, { "epoch": 0.7049309469114787, "grad_norm": 0.7190033793449402, "learning_rate": 6.343598057954227e-06, "loss": 0.0597, "step": 31991 }, { "epoch": 0.7049529822009949, "grad_norm": 0.6819698810577393, "learning_rate": 6.342723797321606e-06, "loss": 0.0721, "step": 31992 }, { "epoch": 0.7049750174905111, "grad_norm": 0.4328460693359375, "learning_rate": 6.341849580785136e-06, "loss": 0.0761, "step": 31993 }, { "epoch": 0.7049970527800272, "grad_norm": 0.4686206877231598, "learning_rate": 6.340975408349272e-06, "loss": 0.0673, "step": 31994 }, { "epoch": 0.7050190880695434, "grad_norm": 0.17319735884666443, "learning_rate": 6.34010128001846e-06, "loss": 0.0546, "step": 31995 }, { "epoch": 0.7050411233590596, "grad_norm": 0.7001094818115234, "learning_rate": 6.339227195797155e-06, "loss": 0.0707, "step": 31996 }, { "epoch": 0.7050631586485757, "grad_norm": 0.574240505695343, "learning_rate": 6.338353155689817e-06, "loss": 0.0714, "step": 31997 }, { "epoch": 0.7050851939380919, "grad_norm": 0.4328728914260864, "learning_rate": 6.337479159700885e-06, "loss": 0.0564, "step": 31998 }, { "epoch": 0.705107229227608, "grad_norm": 0.6318033337593079, "learning_rate": 6.3366052078348174e-06, "loss": 0.0653, "step": 31999 }, { "epoch": 0.7051292645171242, "grad_norm": 0.5508264303207397, "learning_rate": 6.335731300096065e-06, "loss": 0.0548, "step": 32000 }, { "epoch": 0.7051512998066404, "grad_norm": 0.32995808124542236, "learning_rate": 6.334857436489084e-06, "loss": 0.0695, "step": 32001 }, { "epoch": 0.7051733350961565, "grad_norm": 0.3417530059814453, "learning_rate": 6.333983617018315e-06, "loss": 0.0444, "step": 32002 }, { "epoch": 0.7051953703856727, "grad_norm": 0.1438048779964447, "learning_rate": 6.333109841688215e-06, "loss": 0.0583, "step": 32003 }, { "epoch": 0.7052174056751889, "grad_norm": 0.5477792024612427, "learning_rate": 6.332236110503239e-06, "loss": 0.0449, "step": 32004 }, { "epoch": 0.7052394409647049, "grad_norm": 0.34112438559532166, "learning_rate": 6.331362423467827e-06, "loss": 0.0545, "step": 32005 }, { "epoch": 0.7052614762542211, "grad_norm": 0.44317224621772766, "learning_rate": 6.330488780586443e-06, "loss": 0.0591, "step": 32006 }, { "epoch": 0.7052835115437373, "grad_norm": 0.7032195329666138, "learning_rate": 6.329615181863515e-06, "loss": 0.0703, "step": 32007 }, { "epoch": 0.7053055468332534, "grad_norm": 0.5642963647842407, "learning_rate": 6.328741627303517e-06, "loss": 0.0724, "step": 32008 }, { "epoch": 0.7053275821227696, "grad_norm": 0.525745153427124, "learning_rate": 6.327868116910883e-06, "loss": 0.0758, "step": 32009 }, { "epoch": 0.7053496174122857, "grad_norm": 0.6518242359161377, "learning_rate": 6.326994650690073e-06, "loss": 0.0616, "step": 32010 }, { "epoch": 0.7053716527018019, "grad_norm": 0.7074975967407227, "learning_rate": 6.326121228645524e-06, "loss": 0.0683, "step": 32011 }, { "epoch": 0.7053936879913181, "grad_norm": 0.5413176417350769, "learning_rate": 6.325247850781692e-06, "loss": 0.0548, "step": 32012 }, { "epoch": 0.7054157232808342, "grad_norm": 0.21129409968852997, "learning_rate": 6.324374517103028e-06, "loss": 0.077, "step": 32013 }, { "epoch": 0.7054377585703504, "grad_norm": 0.6332514882087708, "learning_rate": 6.323501227613972e-06, "loss": 0.0626, "step": 32014 }, { "epoch": 0.7054597938598666, "grad_norm": 0.46587279438972473, "learning_rate": 6.322627982318978e-06, "loss": 0.0385, "step": 32015 }, { "epoch": 0.7054818291493827, "grad_norm": 0.4682841897010803, "learning_rate": 6.321754781222491e-06, "loss": 0.0635, "step": 32016 }, { "epoch": 0.7055038644388989, "grad_norm": 0.5324788689613342, "learning_rate": 6.320881624328968e-06, "loss": 0.075, "step": 32017 }, { "epoch": 0.7055258997284151, "grad_norm": 0.410244345664978, "learning_rate": 6.320008511642843e-06, "loss": 0.0454, "step": 32018 }, { "epoch": 0.7055479350179312, "grad_norm": 0.5046284794807434, "learning_rate": 6.319135443168568e-06, "loss": 0.0578, "step": 32019 }, { "epoch": 0.7055699703074474, "grad_norm": 0.5779078602790833, "learning_rate": 6.3182624189105985e-06, "loss": 0.0912, "step": 32020 }, { "epoch": 0.7055920055969636, "grad_norm": 0.640718400478363, "learning_rate": 6.317389438873366e-06, "loss": 0.0746, "step": 32021 }, { "epoch": 0.7056140408864797, "grad_norm": 0.5746623277664185, "learning_rate": 6.316516503061328e-06, "loss": 0.0564, "step": 32022 }, { "epoch": 0.7056360761759959, "grad_norm": 0.4427527189254761, "learning_rate": 6.315643611478926e-06, "loss": 0.07, "step": 32023 }, { "epoch": 0.705658111465512, "grad_norm": 0.3981177806854248, "learning_rate": 6.314770764130614e-06, "loss": 0.0686, "step": 32024 }, { "epoch": 0.7056801467550282, "grad_norm": 0.40750056505203247, "learning_rate": 6.313897961020828e-06, "loss": 0.0497, "step": 32025 }, { "epoch": 0.7057021820445444, "grad_norm": 0.8528773188591003, "learning_rate": 6.313025202154014e-06, "loss": 0.0648, "step": 32026 }, { "epoch": 0.7057242173340605, "grad_norm": 0.5430695414543152, "learning_rate": 6.312152487534631e-06, "loss": 0.0438, "step": 32027 }, { "epoch": 0.7057462526235767, "grad_norm": 0.706416130065918, "learning_rate": 6.311279817167105e-06, "loss": 0.0527, "step": 32028 }, { "epoch": 0.7057682879130929, "grad_norm": 0.4200967252254486, "learning_rate": 6.310407191055897e-06, "loss": 0.0553, "step": 32029 }, { "epoch": 0.7057903232026089, "grad_norm": 0.40562766790390015, "learning_rate": 6.309534609205434e-06, "loss": 0.0525, "step": 32030 }, { "epoch": 0.7058123584921251, "grad_norm": 0.8490297198295593, "learning_rate": 6.308662071620183e-06, "loss": 0.0933, "step": 32031 }, { "epoch": 0.7058343937816413, "grad_norm": 0.5153964161872864, "learning_rate": 6.30778957830457e-06, "loss": 0.0487, "step": 32032 }, { "epoch": 0.7058564290711574, "grad_norm": 0.4795757532119751, "learning_rate": 6.306917129263051e-06, "loss": 0.0668, "step": 32033 }, { "epoch": 0.7058784643606736, "grad_norm": 0.6217731833457947, "learning_rate": 6.306044724500061e-06, "loss": 0.0779, "step": 32034 }, { "epoch": 0.7059004996501897, "grad_norm": 0.32408538460731506, "learning_rate": 6.305172364020045e-06, "loss": 0.0458, "step": 32035 }, { "epoch": 0.7059225349397059, "grad_norm": 0.35447269678115845, "learning_rate": 6.304300047827456e-06, "loss": 0.0331, "step": 32036 }, { "epoch": 0.7059445702292221, "grad_norm": 0.6801900863647461, "learning_rate": 6.303427775926724e-06, "loss": 0.0484, "step": 32037 }, { "epoch": 0.7059666055187382, "grad_norm": 0.726157009601593, "learning_rate": 6.3025555483222974e-06, "loss": 0.0622, "step": 32038 }, { "epoch": 0.7059886408082544, "grad_norm": 0.7250104546546936, "learning_rate": 6.301683365018619e-06, "loss": 0.0709, "step": 32039 }, { "epoch": 0.7060106760977706, "grad_norm": 0.46719327569007874, "learning_rate": 6.300811226020138e-06, "loss": 0.0513, "step": 32040 }, { "epoch": 0.7060327113872867, "grad_norm": 0.5517911314964294, "learning_rate": 6.299939131331284e-06, "loss": 0.0582, "step": 32041 }, { "epoch": 0.7060547466768029, "grad_norm": 0.5617838501930237, "learning_rate": 6.299067080956506e-06, "loss": 0.0488, "step": 32042 }, { "epoch": 0.7060767819663191, "grad_norm": 0.6434273719787598, "learning_rate": 6.298195074900251e-06, "loss": 0.0358, "step": 32043 }, { "epoch": 0.7060988172558352, "grad_norm": 0.8785885572433472, "learning_rate": 6.297323113166949e-06, "loss": 0.0611, "step": 32044 }, { "epoch": 0.7061208525453514, "grad_norm": 0.6106991171836853, "learning_rate": 6.296451195761046e-06, "loss": 0.1125, "step": 32045 }, { "epoch": 0.7061428878348676, "grad_norm": 0.5012333989143372, "learning_rate": 6.295579322686985e-06, "loss": 0.0616, "step": 32046 }, { "epoch": 0.7061649231243837, "grad_norm": 0.687968373298645, "learning_rate": 6.294707493949212e-06, "loss": 0.0685, "step": 32047 }, { "epoch": 0.7061869584138999, "grad_norm": 0.7134890556335449, "learning_rate": 6.293835709552155e-06, "loss": 0.0567, "step": 32048 }, { "epoch": 0.706208993703416, "grad_norm": 0.6637066602706909, "learning_rate": 6.292963969500264e-06, "loss": 0.0618, "step": 32049 }, { "epoch": 0.7062310289929322, "grad_norm": 0.8723232746124268, "learning_rate": 6.292092273797981e-06, "loss": 0.0785, "step": 32050 }, { "epoch": 0.7062530642824484, "grad_norm": 0.5471175312995911, "learning_rate": 6.291220622449734e-06, "loss": 0.0439, "step": 32051 }, { "epoch": 0.7062750995719645, "grad_norm": 0.6643904447555542, "learning_rate": 6.290349015459978e-06, "loss": 0.0769, "step": 32052 }, { "epoch": 0.7062971348614807, "grad_norm": 1.1068698167800903, "learning_rate": 6.289477452833134e-06, "loss": 0.0901, "step": 32053 }, { "epoch": 0.7063191701509968, "grad_norm": 0.39400216937065125, "learning_rate": 6.288605934573663e-06, "loss": 0.0658, "step": 32054 }, { "epoch": 0.7063412054405129, "grad_norm": 0.41048476099967957, "learning_rate": 6.287734460685988e-06, "loss": 0.063, "step": 32055 }, { "epoch": 0.7063632407300291, "grad_norm": 0.44831976294517517, "learning_rate": 6.286863031174559e-06, "loss": 0.0453, "step": 32056 }, { "epoch": 0.7063852760195453, "grad_norm": 0.6216501593589783, "learning_rate": 6.285991646043802e-06, "loss": 0.0725, "step": 32057 }, { "epoch": 0.7064073113090614, "grad_norm": 0.577785313129425, "learning_rate": 6.2851203052981645e-06, "loss": 0.0717, "step": 32058 }, { "epoch": 0.7064293465985776, "grad_norm": 0.687228262424469, "learning_rate": 6.284249008942085e-06, "loss": 0.0676, "step": 32059 }, { "epoch": 0.7064513818880938, "grad_norm": 0.5596563220024109, "learning_rate": 6.283377756979996e-06, "loss": 0.1042, "step": 32060 }, { "epoch": 0.7064734171776099, "grad_norm": 0.5675053596496582, "learning_rate": 6.282506549416338e-06, "loss": 0.0919, "step": 32061 }, { "epoch": 0.7064954524671261, "grad_norm": 0.5121950507164001, "learning_rate": 6.281635386255549e-06, "loss": 0.0566, "step": 32062 }, { "epoch": 0.7065174877566422, "grad_norm": 0.5045830607414246, "learning_rate": 6.280764267502071e-06, "loss": 0.0692, "step": 32063 }, { "epoch": 0.7065395230461584, "grad_norm": 0.6013408899307251, "learning_rate": 6.279893193160331e-06, "loss": 0.0745, "step": 32064 }, { "epoch": 0.7065615583356746, "grad_norm": 0.8124533295631409, "learning_rate": 6.279022163234771e-06, "loss": 0.0753, "step": 32065 }, { "epoch": 0.7065835936251907, "grad_norm": 0.7584360241889954, "learning_rate": 6.278151177729833e-06, "loss": 0.0529, "step": 32066 }, { "epoch": 0.7066056289147069, "grad_norm": 0.5065404772758484, "learning_rate": 6.277280236649942e-06, "loss": 0.0588, "step": 32067 }, { "epoch": 0.7066276642042231, "grad_norm": 0.6678820848464966, "learning_rate": 6.276409339999541e-06, "loss": 0.071, "step": 32068 }, { "epoch": 0.7066496994937392, "grad_norm": 0.5235602259635925, "learning_rate": 6.275538487783066e-06, "loss": 0.0797, "step": 32069 }, { "epoch": 0.7066717347832554, "grad_norm": 0.4977562725543976, "learning_rate": 6.274667680004956e-06, "loss": 0.0382, "step": 32070 }, { "epoch": 0.7066937700727716, "grad_norm": 0.41294577717781067, "learning_rate": 6.273796916669638e-06, "loss": 0.0591, "step": 32071 }, { "epoch": 0.7067158053622877, "grad_norm": 0.6048794388771057, "learning_rate": 6.27292619778155e-06, "loss": 0.0642, "step": 32072 }, { "epoch": 0.7067378406518039, "grad_norm": 0.6889777183532715, "learning_rate": 6.272055523345137e-06, "loss": 0.0494, "step": 32073 }, { "epoch": 0.7067598759413201, "grad_norm": 0.9059519171714783, "learning_rate": 6.2711848933648174e-06, "loss": 0.103, "step": 32074 }, { "epoch": 0.7067819112308362, "grad_norm": 0.34744417667388916, "learning_rate": 6.270314307845042e-06, "loss": 0.0357, "step": 32075 }, { "epoch": 0.7068039465203524, "grad_norm": 1.2409164905548096, "learning_rate": 6.2694437667902235e-06, "loss": 0.093, "step": 32076 }, { "epoch": 0.7068259818098686, "grad_norm": 0.41746804118156433, "learning_rate": 6.268573270204821e-06, "loss": 0.0544, "step": 32077 }, { "epoch": 0.7068480170993847, "grad_norm": 0.8343795537948608, "learning_rate": 6.267702818093252e-06, "loss": 0.0768, "step": 32078 }, { "epoch": 0.7068700523889008, "grad_norm": 0.3310718834400177, "learning_rate": 6.26683241045996e-06, "loss": 0.0441, "step": 32079 }, { "epoch": 0.7068920876784169, "grad_norm": 0.3791433870792389, "learning_rate": 6.265962047309369e-06, "loss": 0.0397, "step": 32080 }, { "epoch": 0.7069141229679331, "grad_norm": 0.7522153258323669, "learning_rate": 6.265091728645917e-06, "loss": 0.0697, "step": 32081 }, { "epoch": 0.7069361582574493, "grad_norm": 0.7260427474975586, "learning_rate": 6.264221454474041e-06, "loss": 0.1118, "step": 32082 }, { "epoch": 0.7069581935469654, "grad_norm": 0.9418724179267883, "learning_rate": 6.263351224798167e-06, "loss": 0.0614, "step": 32083 }, { "epoch": 0.7069802288364816, "grad_norm": 0.3298906087875366, "learning_rate": 6.2624810396227286e-06, "loss": 0.0499, "step": 32084 }, { "epoch": 0.7070022641259978, "grad_norm": 0.8211404085159302, "learning_rate": 6.261610898952158e-06, "loss": 0.0869, "step": 32085 }, { "epoch": 0.7070242994155139, "grad_norm": 0.7058747410774231, "learning_rate": 6.260740802790897e-06, "loss": 0.0765, "step": 32086 }, { "epoch": 0.7070463347050301, "grad_norm": 0.5619930028915405, "learning_rate": 6.259870751143358e-06, "loss": 0.063, "step": 32087 }, { "epoch": 0.7070683699945463, "grad_norm": 0.5063340663909912, "learning_rate": 6.259000744013994e-06, "loss": 0.0563, "step": 32088 }, { "epoch": 0.7070904052840624, "grad_norm": 0.42692244052886963, "learning_rate": 6.2581307814072255e-06, "loss": 0.0428, "step": 32089 }, { "epoch": 0.7071124405735786, "grad_norm": 0.50992351770401, "learning_rate": 6.257260863327481e-06, "loss": 0.0696, "step": 32090 }, { "epoch": 0.7071344758630947, "grad_norm": 0.6668018698692322, "learning_rate": 6.256390989779195e-06, "loss": 0.0416, "step": 32091 }, { "epoch": 0.7071565111526109, "grad_norm": 0.7847784757614136, "learning_rate": 6.2555211607668e-06, "loss": 0.0887, "step": 32092 }, { "epoch": 0.7071785464421271, "grad_norm": 0.3606123626232147, "learning_rate": 6.254651376294726e-06, "loss": 0.0367, "step": 32093 }, { "epoch": 0.7072005817316432, "grad_norm": 0.6182240843772888, "learning_rate": 6.2537816363674e-06, "loss": 0.0714, "step": 32094 }, { "epoch": 0.7072226170211594, "grad_norm": 0.9079274535179138, "learning_rate": 6.2529119409892544e-06, "loss": 0.0767, "step": 32095 }, { "epoch": 0.7072446523106756, "grad_norm": 0.6419005393981934, "learning_rate": 6.252042290164718e-06, "loss": 0.0521, "step": 32096 }, { "epoch": 0.7072666876001917, "grad_norm": 0.612494707107544, "learning_rate": 6.251172683898227e-06, "loss": 0.0509, "step": 32097 }, { "epoch": 0.7072887228897079, "grad_norm": 0.6207000613212585, "learning_rate": 6.250303122194198e-06, "loss": 0.0523, "step": 32098 }, { "epoch": 0.7073107581792241, "grad_norm": 0.5938254594802856, "learning_rate": 6.249433605057069e-06, "loss": 0.0457, "step": 32099 }, { "epoch": 0.7073327934687402, "grad_norm": 0.5552873015403748, "learning_rate": 6.248564132491272e-06, "loss": 0.0865, "step": 32100 }, { "epoch": 0.7073548287582564, "grad_norm": 0.7208353281021118, "learning_rate": 6.247694704501225e-06, "loss": 0.1026, "step": 32101 }, { "epoch": 0.7073768640477726, "grad_norm": 0.691620945930481, "learning_rate": 6.246825321091368e-06, "loss": 0.0711, "step": 32102 }, { "epoch": 0.7073988993372887, "grad_norm": 0.5541574954986572, "learning_rate": 6.245955982266112e-06, "loss": 0.0854, "step": 32103 }, { "epoch": 0.7074209346268048, "grad_norm": 0.32968994975090027, "learning_rate": 6.245086688029908e-06, "loss": 0.054, "step": 32104 }, { "epoch": 0.7074429699163209, "grad_norm": 0.5667563080787659, "learning_rate": 6.244217438387167e-06, "loss": 0.0619, "step": 32105 }, { "epoch": 0.7074650052058371, "grad_norm": 0.7052839994430542, "learning_rate": 6.2433482333423275e-06, "loss": 0.058, "step": 32106 }, { "epoch": 0.7074870404953533, "grad_norm": 0.8252784609794617, "learning_rate": 6.242479072899804e-06, "loss": 0.0688, "step": 32107 }, { "epoch": 0.7075090757848694, "grad_norm": 0.5128394365310669, "learning_rate": 6.241609957064032e-06, "loss": 0.0853, "step": 32108 }, { "epoch": 0.7075311110743856, "grad_norm": 0.7106438875198364, "learning_rate": 6.240740885839443e-06, "loss": 0.0727, "step": 32109 }, { "epoch": 0.7075531463639018, "grad_norm": 0.5023247003555298, "learning_rate": 6.239871859230445e-06, "loss": 0.0591, "step": 32110 }, { "epoch": 0.7075751816534179, "grad_norm": 0.7437278032302856, "learning_rate": 6.239002877241489e-06, "loss": 0.0679, "step": 32111 }, { "epoch": 0.7075972169429341, "grad_norm": 0.3840668499469757, "learning_rate": 6.238133939876986e-06, "loss": 0.0458, "step": 32112 }, { "epoch": 0.7076192522324503, "grad_norm": 0.6078743934631348, "learning_rate": 6.237265047141369e-06, "loss": 0.0676, "step": 32113 }, { "epoch": 0.7076412875219664, "grad_norm": 0.578258216381073, "learning_rate": 6.2363961990390546e-06, "loss": 0.0705, "step": 32114 }, { "epoch": 0.7076633228114826, "grad_norm": 0.7251619100570679, "learning_rate": 6.235527395574473e-06, "loss": 0.0575, "step": 32115 }, { "epoch": 0.7076853581009988, "grad_norm": 0.3405904471874237, "learning_rate": 6.234658636752056e-06, "loss": 0.0551, "step": 32116 }, { "epoch": 0.7077073933905149, "grad_norm": 0.47265011072158813, "learning_rate": 6.233789922576218e-06, "loss": 0.0504, "step": 32117 }, { "epoch": 0.7077294286800311, "grad_norm": 0.34951406717300415, "learning_rate": 6.2329212530513895e-06, "loss": 0.058, "step": 32118 }, { "epoch": 0.7077514639695472, "grad_norm": 0.4856199324131012, "learning_rate": 6.232052628181994e-06, "loss": 0.0348, "step": 32119 }, { "epoch": 0.7077734992590634, "grad_norm": 0.47911936044692993, "learning_rate": 6.231184047972461e-06, "loss": 0.0595, "step": 32120 }, { "epoch": 0.7077955345485796, "grad_norm": 0.4977281391620636, "learning_rate": 6.2303155124272034e-06, "loss": 0.0638, "step": 32121 }, { "epoch": 0.7078175698380957, "grad_norm": 0.8269813656806946, "learning_rate": 6.229447021550653e-06, "loss": 0.0807, "step": 32122 }, { "epoch": 0.7078396051276119, "grad_norm": 0.6527584791183472, "learning_rate": 6.228578575347238e-06, "loss": 0.0676, "step": 32123 }, { "epoch": 0.7078616404171281, "grad_norm": 0.3390878736972809, "learning_rate": 6.227710173821369e-06, "loss": 0.0781, "step": 32124 }, { "epoch": 0.7078836757066442, "grad_norm": 0.5012994408607483, "learning_rate": 6.226841816977483e-06, "loss": 0.0421, "step": 32125 }, { "epoch": 0.7079057109961604, "grad_norm": 0.37059611082077026, "learning_rate": 6.225973504819984e-06, "loss": 0.0668, "step": 32126 }, { "epoch": 0.7079277462856766, "grad_norm": 0.7495594024658203, "learning_rate": 6.225105237353319e-06, "loss": 0.069, "step": 32127 }, { "epoch": 0.7079497815751926, "grad_norm": 0.5783165693283081, "learning_rate": 6.224237014581892e-06, "loss": 0.0618, "step": 32128 }, { "epoch": 0.7079718168647088, "grad_norm": 0.42834344506263733, "learning_rate": 6.223368836510133e-06, "loss": 0.0736, "step": 32129 }, { "epoch": 0.707993852154225, "grad_norm": 0.8939513564109802, "learning_rate": 6.2225007031424665e-06, "loss": 0.0628, "step": 32130 }, { "epoch": 0.7080158874437411, "grad_norm": 0.21840716898441315, "learning_rate": 6.2216326144833075e-06, "loss": 0.0518, "step": 32131 }, { "epoch": 0.7080379227332573, "grad_norm": 0.7221729159355164, "learning_rate": 6.220764570537085e-06, "loss": 0.0536, "step": 32132 }, { "epoch": 0.7080599580227734, "grad_norm": 0.4249660074710846, "learning_rate": 6.219896571308206e-06, "loss": 0.0547, "step": 32133 }, { "epoch": 0.7080819933122896, "grad_norm": 0.30390024185180664, "learning_rate": 6.219028616801113e-06, "loss": 0.0513, "step": 32134 }, { "epoch": 0.7081040286018058, "grad_norm": 0.340500146150589, "learning_rate": 6.218160707020211e-06, "loss": 0.0581, "step": 32135 }, { "epoch": 0.7081260638913219, "grad_norm": 0.6827547550201416, "learning_rate": 6.21729284196993e-06, "loss": 0.0886, "step": 32136 }, { "epoch": 0.7081480991808381, "grad_norm": 0.6758310794830322, "learning_rate": 6.216425021654681e-06, "loss": 0.0763, "step": 32137 }, { "epoch": 0.7081701344703543, "grad_norm": 0.6391688585281372, "learning_rate": 6.2155572460788905e-06, "loss": 0.0843, "step": 32138 }, { "epoch": 0.7081921697598704, "grad_norm": 0.7890176177024841, "learning_rate": 6.214689515246982e-06, "loss": 0.0711, "step": 32139 }, { "epoch": 0.7082142050493866, "grad_norm": 0.8353466391563416, "learning_rate": 6.2138218291633656e-06, "loss": 0.0666, "step": 32140 }, { "epoch": 0.7082362403389028, "grad_norm": 0.7328169345855713, "learning_rate": 6.212954187832466e-06, "loss": 0.0833, "step": 32141 }, { "epoch": 0.7082582756284189, "grad_norm": 0.67009037733078, "learning_rate": 6.212086591258702e-06, "loss": 0.0738, "step": 32142 }, { "epoch": 0.7082803109179351, "grad_norm": 0.6640802621841431, "learning_rate": 6.211219039446502e-06, "loss": 0.0782, "step": 32143 }, { "epoch": 0.7083023462074512, "grad_norm": 0.822373628616333, "learning_rate": 6.210351532400268e-06, "loss": 0.096, "step": 32144 }, { "epoch": 0.7083243814969674, "grad_norm": 0.32548701763153076, "learning_rate": 6.209484070124427e-06, "loss": 0.0626, "step": 32145 }, { "epoch": 0.7083464167864836, "grad_norm": 0.48990434408187866, "learning_rate": 6.2086166526234035e-06, "loss": 0.0585, "step": 32146 }, { "epoch": 0.7083684520759997, "grad_norm": 0.5311849117279053, "learning_rate": 6.207749279901605e-06, "loss": 0.0839, "step": 32147 }, { "epoch": 0.7083904873655159, "grad_norm": 0.3941833972930908, "learning_rate": 6.2068819519634586e-06, "loss": 0.0557, "step": 32148 }, { "epoch": 0.7084125226550321, "grad_norm": 0.6904701590538025, "learning_rate": 6.206014668813367e-06, "loss": 0.0636, "step": 32149 }, { "epoch": 0.7084345579445482, "grad_norm": 0.800259530544281, "learning_rate": 6.20514743045577e-06, "loss": 0.0713, "step": 32150 }, { "epoch": 0.7084565932340644, "grad_norm": 0.5719924569129944, "learning_rate": 6.204280236895067e-06, "loss": 0.0816, "step": 32151 }, { "epoch": 0.7084786285235806, "grad_norm": 0.40449830889701843, "learning_rate": 6.203413088135683e-06, "loss": 0.0436, "step": 32152 }, { "epoch": 0.7085006638130966, "grad_norm": 0.6012399196624756, "learning_rate": 6.202545984182037e-06, "loss": 0.0564, "step": 32153 }, { "epoch": 0.7085226991026128, "grad_norm": 1.2079658508300781, "learning_rate": 6.2016789250385366e-06, "loss": 0.073, "step": 32154 }, { "epoch": 0.708544734392129, "grad_norm": 0.48893383145332336, "learning_rate": 6.200811910709609e-06, "loss": 0.0544, "step": 32155 }, { "epoch": 0.7085667696816451, "grad_norm": 0.4929184317588806, "learning_rate": 6.199944941199654e-06, "loss": 0.084, "step": 32156 }, { "epoch": 0.7085888049711613, "grad_norm": 0.3972216248512268, "learning_rate": 6.19907801651311e-06, "loss": 0.0665, "step": 32157 }, { "epoch": 0.7086108402606774, "grad_norm": 0.6109843254089355, "learning_rate": 6.198211136654375e-06, "loss": 0.0939, "step": 32158 }, { "epoch": 0.7086328755501936, "grad_norm": 0.6140643358230591, "learning_rate": 6.1973443016278755e-06, "loss": 0.0637, "step": 32159 }, { "epoch": 0.7086549108397098, "grad_norm": 0.894823431968689, "learning_rate": 6.196477511438018e-06, "loss": 0.0639, "step": 32160 }, { "epoch": 0.7086769461292259, "grad_norm": 0.5656302571296692, "learning_rate": 6.19561076608922e-06, "loss": 0.0433, "step": 32161 }, { "epoch": 0.7086989814187421, "grad_norm": 0.3177308142185211, "learning_rate": 6.194744065585902e-06, "loss": 0.0616, "step": 32162 }, { "epoch": 0.7087210167082583, "grad_norm": 0.5239449739456177, "learning_rate": 6.193877409932472e-06, "loss": 0.0801, "step": 32163 }, { "epoch": 0.7087430519977744, "grad_norm": 0.7113091349601746, "learning_rate": 6.193010799133343e-06, "loss": 0.0633, "step": 32164 }, { "epoch": 0.7087650872872906, "grad_norm": 1.6694071292877197, "learning_rate": 6.192144233192935e-06, "loss": 0.07, "step": 32165 }, { "epoch": 0.7087871225768068, "grad_norm": 0.8345043063163757, "learning_rate": 6.191277712115665e-06, "loss": 0.0616, "step": 32166 }, { "epoch": 0.7088091578663229, "grad_norm": 0.6415901780128479, "learning_rate": 6.190411235905934e-06, "loss": 0.0542, "step": 32167 }, { "epoch": 0.7088311931558391, "grad_norm": 0.642033040523529, "learning_rate": 6.189544804568165e-06, "loss": 0.0907, "step": 32168 }, { "epoch": 0.7088532284453553, "grad_norm": 0.5497536659240723, "learning_rate": 6.1886784181067736e-06, "loss": 0.0563, "step": 32169 }, { "epoch": 0.7088752637348714, "grad_norm": 0.7244348526000977, "learning_rate": 6.187812076526163e-06, "loss": 0.0711, "step": 32170 }, { "epoch": 0.7088972990243876, "grad_norm": 0.5776336789131165, "learning_rate": 6.186945779830754e-06, "loss": 0.0624, "step": 32171 }, { "epoch": 0.7089193343139037, "grad_norm": 0.4974317252635956, "learning_rate": 6.186079528024947e-06, "loss": 0.0595, "step": 32172 }, { "epoch": 0.7089413696034199, "grad_norm": 0.2978824973106384, "learning_rate": 6.185213321113174e-06, "loss": 0.0652, "step": 32173 }, { "epoch": 0.7089634048929361, "grad_norm": 0.7239217758178711, "learning_rate": 6.18434715909983e-06, "loss": 0.0624, "step": 32174 }, { "epoch": 0.7089854401824522, "grad_norm": 0.7583296895027161, "learning_rate": 6.183481041989336e-06, "loss": 0.0586, "step": 32175 }, { "epoch": 0.7090074754719684, "grad_norm": 0.43192604184150696, "learning_rate": 6.182614969786104e-06, "loss": 0.0605, "step": 32176 }, { "epoch": 0.7090295107614846, "grad_norm": 0.5825005173683167, "learning_rate": 6.181748942494537e-06, "loss": 0.0476, "step": 32177 }, { "epoch": 0.7090515460510006, "grad_norm": 0.4392746090888977, "learning_rate": 6.180882960119057e-06, "loss": 0.0629, "step": 32178 }, { "epoch": 0.7090735813405168, "grad_norm": 0.45032116770744324, "learning_rate": 6.180017022664059e-06, "loss": 0.0436, "step": 32179 }, { "epoch": 0.709095616630033, "grad_norm": 0.7640122771263123, "learning_rate": 6.179151130133975e-06, "loss": 0.068, "step": 32180 }, { "epoch": 0.7091176519195491, "grad_norm": 0.4738283157348633, "learning_rate": 6.1782852825331975e-06, "loss": 0.0796, "step": 32181 }, { "epoch": 0.7091396872090653, "grad_norm": 0.5030784010887146, "learning_rate": 6.17741947986615e-06, "loss": 0.0305, "step": 32182 }, { "epoch": 0.7091617224985814, "grad_norm": 0.6177066564559937, "learning_rate": 6.176553722137229e-06, "loss": 0.071, "step": 32183 }, { "epoch": 0.7091837577880976, "grad_norm": 0.37501126527786255, "learning_rate": 6.175688009350854e-06, "loss": 0.0494, "step": 32184 }, { "epoch": 0.7092057930776138, "grad_norm": 0.6516925692558289, "learning_rate": 6.174822341511438e-06, "loss": 0.0756, "step": 32185 }, { "epoch": 0.7092278283671299, "grad_norm": 0.45146849751472473, "learning_rate": 6.173956718623375e-06, "loss": 0.0435, "step": 32186 }, { "epoch": 0.7092498636566461, "grad_norm": 0.8850827813148499, "learning_rate": 6.173091140691087e-06, "loss": 0.0674, "step": 32187 }, { "epoch": 0.7092718989461623, "grad_norm": 0.7218366861343384, "learning_rate": 6.172225607718976e-06, "loss": 0.0689, "step": 32188 }, { "epoch": 0.7092939342356784, "grad_norm": 0.4474802315235138, "learning_rate": 6.1713601197114625e-06, "loss": 0.0532, "step": 32189 }, { "epoch": 0.7093159695251946, "grad_norm": 0.5090801119804382, "learning_rate": 6.170494676672939e-06, "loss": 0.0657, "step": 32190 }, { "epoch": 0.7093380048147108, "grad_norm": 0.4770425260066986, "learning_rate": 6.1696292786078206e-06, "loss": 0.0383, "step": 32191 }, { "epoch": 0.7093600401042269, "grad_norm": 0.42201003432273865, "learning_rate": 6.168763925520522e-06, "loss": 0.0532, "step": 32192 }, { "epoch": 0.7093820753937431, "grad_norm": 0.6363666653633118, "learning_rate": 6.167898617415438e-06, "loss": 0.0903, "step": 32193 }, { "epoch": 0.7094041106832593, "grad_norm": 0.5398832559585571, "learning_rate": 6.1670333542969885e-06, "loss": 0.0422, "step": 32194 }, { "epoch": 0.7094261459727754, "grad_norm": 0.7630429863929749, "learning_rate": 6.166168136169564e-06, "loss": 0.1033, "step": 32195 }, { "epoch": 0.7094481812622916, "grad_norm": 0.7050232887268066, "learning_rate": 6.165302963037593e-06, "loss": 0.0655, "step": 32196 }, { "epoch": 0.7094702165518078, "grad_norm": 0.45346635580062866, "learning_rate": 6.164437834905465e-06, "loss": 0.0658, "step": 32197 }, { "epoch": 0.7094922518413239, "grad_norm": 0.5280793309211731, "learning_rate": 6.1635727517775946e-06, "loss": 0.0723, "step": 32198 }, { "epoch": 0.7095142871308401, "grad_norm": 0.3932362496852875, "learning_rate": 6.162707713658386e-06, "loss": 0.0701, "step": 32199 }, { "epoch": 0.7095363224203562, "grad_norm": 0.4055323004722595, "learning_rate": 6.16184272055225e-06, "loss": 0.0572, "step": 32200 }, { "epoch": 0.7095583577098724, "grad_norm": 0.8003425598144531, "learning_rate": 6.1609777724635915e-06, "loss": 0.0753, "step": 32201 }, { "epoch": 0.7095803929993886, "grad_norm": 0.6797165870666504, "learning_rate": 6.1601128693968e-06, "loss": 0.0573, "step": 32202 }, { "epoch": 0.7096024282889046, "grad_norm": 0.7118901014328003, "learning_rate": 6.159248011356305e-06, "loss": 0.0645, "step": 32203 }, { "epoch": 0.7096244635784208, "grad_norm": 0.4857076108455658, "learning_rate": 6.158383198346495e-06, "loss": 0.0417, "step": 32204 }, { "epoch": 0.709646498867937, "grad_norm": 0.49432551860809326, "learning_rate": 6.157518430371788e-06, "loss": 0.0471, "step": 32205 }, { "epoch": 0.7096685341574531, "grad_norm": 0.6450387239456177, "learning_rate": 6.156653707436569e-06, "loss": 0.0541, "step": 32206 }, { "epoch": 0.7096905694469693, "grad_norm": 0.277089923620224, "learning_rate": 6.155789029545266e-06, "loss": 0.061, "step": 32207 }, { "epoch": 0.7097126047364855, "grad_norm": 0.37430351972579956, "learning_rate": 6.154924396702266e-06, "loss": 0.0637, "step": 32208 }, { "epoch": 0.7097346400260016, "grad_norm": 0.5033947229385376, "learning_rate": 6.154059808911985e-06, "loss": 0.0455, "step": 32209 }, { "epoch": 0.7097566753155178, "grad_norm": 0.7177748680114746, "learning_rate": 6.1531952661788165e-06, "loss": 0.0749, "step": 32210 }, { "epoch": 0.709778710605034, "grad_norm": 0.4566417634487152, "learning_rate": 6.152330768507168e-06, "loss": 0.0374, "step": 32211 }, { "epoch": 0.7098007458945501, "grad_norm": 0.8800777792930603, "learning_rate": 6.151466315901448e-06, "loss": 0.0557, "step": 32212 }, { "epoch": 0.7098227811840663, "grad_norm": 0.528611421585083, "learning_rate": 6.15060190836605e-06, "loss": 0.0558, "step": 32213 }, { "epoch": 0.7098448164735824, "grad_norm": 0.9143022894859314, "learning_rate": 6.1497375459053816e-06, "loss": 0.0545, "step": 32214 }, { "epoch": 0.7098668517630986, "grad_norm": 0.505013108253479, "learning_rate": 6.148873228523847e-06, "loss": 0.068, "step": 32215 }, { "epoch": 0.7098888870526148, "grad_norm": 0.37621766328811646, "learning_rate": 6.148008956225852e-06, "loss": 0.0835, "step": 32216 }, { "epoch": 0.7099109223421309, "grad_norm": 0.774192214012146, "learning_rate": 6.1471447290157895e-06, "loss": 0.0609, "step": 32217 }, { "epoch": 0.7099329576316471, "grad_norm": 0.7071180939674377, "learning_rate": 6.146280546898065e-06, "loss": 0.073, "step": 32218 }, { "epoch": 0.7099549929211633, "grad_norm": 0.976147472858429, "learning_rate": 6.1454164098770876e-06, "loss": 0.0959, "step": 32219 }, { "epoch": 0.7099770282106794, "grad_norm": 0.5961635708808899, "learning_rate": 6.144552317957246e-06, "loss": 0.0764, "step": 32220 }, { "epoch": 0.7099990635001956, "grad_norm": 0.38453710079193115, "learning_rate": 6.143688271142949e-06, "loss": 0.0864, "step": 32221 }, { "epoch": 0.7100210987897118, "grad_norm": 0.4236963391304016, "learning_rate": 6.142824269438598e-06, "loss": 0.0456, "step": 32222 }, { "epoch": 0.7100431340792279, "grad_norm": 0.823208212852478, "learning_rate": 6.141960312848596e-06, "loss": 0.0666, "step": 32223 }, { "epoch": 0.7100651693687441, "grad_norm": 0.41478341817855835, "learning_rate": 6.141096401377336e-06, "loss": 0.0513, "step": 32224 }, { "epoch": 0.7100872046582603, "grad_norm": 0.7206525206565857, "learning_rate": 6.140232535029222e-06, "loss": 0.0571, "step": 32225 }, { "epoch": 0.7101092399477764, "grad_norm": 0.5946204662322998, "learning_rate": 6.13936871380866e-06, "loss": 0.0507, "step": 32226 }, { "epoch": 0.7101312752372925, "grad_norm": 0.7357391119003296, "learning_rate": 6.13850493772004e-06, "loss": 0.0604, "step": 32227 }, { "epoch": 0.7101533105268086, "grad_norm": 0.7522971034049988, "learning_rate": 6.13764120676777e-06, "loss": 0.0612, "step": 32228 }, { "epoch": 0.7101753458163248, "grad_norm": 0.30964958667755127, "learning_rate": 6.136777520956236e-06, "loss": 0.0462, "step": 32229 }, { "epoch": 0.710197381105841, "grad_norm": 0.6444304585456848, "learning_rate": 6.1359138802898576e-06, "loss": 0.0446, "step": 32230 }, { "epoch": 0.7102194163953571, "grad_norm": 0.5328295826911926, "learning_rate": 6.135050284773018e-06, "loss": 0.0418, "step": 32231 }, { "epoch": 0.7102414516848733, "grad_norm": 0.5877109169960022, "learning_rate": 6.1341867344101274e-06, "loss": 0.0789, "step": 32232 }, { "epoch": 0.7102634869743895, "grad_norm": 0.44582822918891907, "learning_rate": 6.13332322920557e-06, "loss": 0.0536, "step": 32233 }, { "epoch": 0.7102855222639056, "grad_norm": 0.5406816005706787, "learning_rate": 6.132459769163754e-06, "loss": 0.0468, "step": 32234 }, { "epoch": 0.7103075575534218, "grad_norm": 0.7281002402305603, "learning_rate": 6.1315963542890805e-06, "loss": 0.0634, "step": 32235 }, { "epoch": 0.710329592842938, "grad_norm": 0.6134690046310425, "learning_rate": 6.1307329845859365e-06, "loss": 0.085, "step": 32236 }, { "epoch": 0.7103516281324541, "grad_norm": 0.8004950881004333, "learning_rate": 6.1298696600587265e-06, "loss": 0.0864, "step": 32237 }, { "epoch": 0.7103736634219703, "grad_norm": 0.8432602286338806, "learning_rate": 6.1290063807118464e-06, "loss": 0.0807, "step": 32238 }, { "epoch": 0.7103956987114864, "grad_norm": 0.5574097633361816, "learning_rate": 6.128143146549698e-06, "loss": 0.0662, "step": 32239 }, { "epoch": 0.7104177340010026, "grad_norm": 0.4390295147895813, "learning_rate": 6.127279957576671e-06, "loss": 0.0424, "step": 32240 }, { "epoch": 0.7104397692905188, "grad_norm": 0.6225897073745728, "learning_rate": 6.126416813797164e-06, "loss": 0.08, "step": 32241 }, { "epoch": 0.7104618045800349, "grad_norm": 0.47813281416893005, "learning_rate": 6.1255537152155785e-06, "loss": 0.0771, "step": 32242 }, { "epoch": 0.7104838398695511, "grad_norm": 0.6476444602012634, "learning_rate": 6.124690661836304e-06, "loss": 0.0539, "step": 32243 }, { "epoch": 0.7105058751590673, "grad_norm": 0.4769984483718872, "learning_rate": 6.1238276536637375e-06, "loss": 0.0494, "step": 32244 }, { "epoch": 0.7105279104485834, "grad_norm": 0.39225703477859497, "learning_rate": 6.122964690702278e-06, "loss": 0.0689, "step": 32245 }, { "epoch": 0.7105499457380996, "grad_norm": 0.5807698965072632, "learning_rate": 6.1221017729563225e-06, "loss": 0.0536, "step": 32246 }, { "epoch": 0.7105719810276158, "grad_norm": 0.7970324754714966, "learning_rate": 6.1212389004302606e-06, "loss": 0.0621, "step": 32247 }, { "epoch": 0.7105940163171319, "grad_norm": 0.5218576788902283, "learning_rate": 6.120376073128489e-06, "loss": 0.0566, "step": 32248 }, { "epoch": 0.7106160516066481, "grad_norm": 0.8162497282028198, "learning_rate": 6.119513291055408e-06, "loss": 0.0665, "step": 32249 }, { "epoch": 0.7106380868961643, "grad_norm": 1.0452229976654053, "learning_rate": 6.118650554215403e-06, "loss": 0.0743, "step": 32250 }, { "epoch": 0.7106601221856804, "grad_norm": 0.5648587942123413, "learning_rate": 6.117787862612881e-06, "loss": 0.075, "step": 32251 }, { "epoch": 0.7106821574751965, "grad_norm": 0.8000114560127258, "learning_rate": 6.116925216252215e-06, "loss": 0.0887, "step": 32252 }, { "epoch": 0.7107041927647126, "grad_norm": 0.6365768909454346, "learning_rate": 6.116062615137824e-06, "loss": 0.0736, "step": 32253 }, { "epoch": 0.7107262280542288, "grad_norm": 0.5747005939483643, "learning_rate": 6.1152000592740844e-06, "loss": 0.0599, "step": 32254 }, { "epoch": 0.710748263343745, "grad_norm": 0.6642954349517822, "learning_rate": 6.1143375486654014e-06, "loss": 0.0794, "step": 32255 }, { "epoch": 0.7107702986332611, "grad_norm": 0.3863343596458435, "learning_rate": 6.113475083316157e-06, "loss": 0.0568, "step": 32256 }, { "epoch": 0.7107923339227773, "grad_norm": 0.5874086022377014, "learning_rate": 6.112612663230749e-06, "loss": 0.0499, "step": 32257 }, { "epoch": 0.7108143692122935, "grad_norm": 0.6374387145042419, "learning_rate": 6.111750288413575e-06, "loss": 0.0633, "step": 32258 }, { "epoch": 0.7108364045018096, "grad_norm": 0.41433537006378174, "learning_rate": 6.1108879588690185e-06, "loss": 0.045, "step": 32259 }, { "epoch": 0.7108584397913258, "grad_norm": 0.44081032276153564, "learning_rate": 6.110025674601477e-06, "loss": 0.0493, "step": 32260 }, { "epoch": 0.710880475080842, "grad_norm": 0.9780749082565308, "learning_rate": 6.10916343561534e-06, "loss": 0.0896, "step": 32261 }, { "epoch": 0.7109025103703581, "grad_norm": 0.7983665466308594, "learning_rate": 6.108301241915008e-06, "loss": 0.0677, "step": 32262 }, { "epoch": 0.7109245456598743, "grad_norm": 0.6122141480445862, "learning_rate": 6.10743909350486e-06, "loss": 0.0571, "step": 32263 }, { "epoch": 0.7109465809493904, "grad_norm": 0.48211669921875, "learning_rate": 6.106576990389293e-06, "loss": 0.0603, "step": 32264 }, { "epoch": 0.7109686162389066, "grad_norm": 1.056546926498413, "learning_rate": 6.1057149325727054e-06, "loss": 0.0794, "step": 32265 }, { "epoch": 0.7109906515284228, "grad_norm": 0.3457351624965668, "learning_rate": 6.104852920059474e-06, "loss": 0.0637, "step": 32266 }, { "epoch": 0.7110126868179389, "grad_norm": 0.24655568599700928, "learning_rate": 6.103990952853997e-06, "loss": 0.0413, "step": 32267 }, { "epoch": 0.7110347221074551, "grad_norm": 0.4542987644672394, "learning_rate": 6.1031290309606655e-06, "loss": 0.0499, "step": 32268 }, { "epoch": 0.7110567573969713, "grad_norm": 0.4629174470901489, "learning_rate": 6.102267154383873e-06, "loss": 0.0961, "step": 32269 }, { "epoch": 0.7110787926864874, "grad_norm": 1.0252798795700073, "learning_rate": 6.101405323128001e-06, "loss": 0.0692, "step": 32270 }, { "epoch": 0.7111008279760036, "grad_norm": 0.9535079598426819, "learning_rate": 6.100543537197442e-06, "loss": 0.0717, "step": 32271 }, { "epoch": 0.7111228632655198, "grad_norm": 0.6326978206634521, "learning_rate": 6.099681796596593e-06, "loss": 0.041, "step": 32272 }, { "epoch": 0.7111448985550359, "grad_norm": 0.5654743909835815, "learning_rate": 6.098820101329832e-06, "loss": 0.0857, "step": 32273 }, { "epoch": 0.7111669338445521, "grad_norm": 1.2066751718521118, "learning_rate": 6.097958451401558e-06, "loss": 0.0813, "step": 32274 }, { "epoch": 0.7111889691340683, "grad_norm": 0.8334665894508362, "learning_rate": 6.097096846816146e-06, "loss": 0.0573, "step": 32275 }, { "epoch": 0.7112110044235844, "grad_norm": 0.7196062207221985, "learning_rate": 6.096235287578004e-06, "loss": 0.0754, "step": 32276 }, { "epoch": 0.7112330397131005, "grad_norm": 0.9010668396949768, "learning_rate": 6.095373773691505e-06, "loss": 0.0379, "step": 32277 }, { "epoch": 0.7112550750026166, "grad_norm": 0.5097053050994873, "learning_rate": 6.094512305161047e-06, "loss": 0.0824, "step": 32278 }, { "epoch": 0.7112771102921328, "grad_norm": 0.5528029799461365, "learning_rate": 6.093650881991008e-06, "loss": 0.0464, "step": 32279 }, { "epoch": 0.711299145581649, "grad_norm": 0.6282830834388733, "learning_rate": 6.092789504185783e-06, "loss": 0.0752, "step": 32280 }, { "epoch": 0.7113211808711651, "grad_norm": 0.4546118676662445, "learning_rate": 6.091928171749761e-06, "loss": 0.0777, "step": 32281 }, { "epoch": 0.7113432161606813, "grad_norm": 0.5635356903076172, "learning_rate": 6.09106688468732e-06, "loss": 0.0563, "step": 32282 }, { "epoch": 0.7113652514501975, "grad_norm": 0.6115794777870178, "learning_rate": 6.090205643002854e-06, "loss": 0.0501, "step": 32283 }, { "epoch": 0.7113872867397136, "grad_norm": 1.0927132368087769, "learning_rate": 6.089344446700748e-06, "loss": 0.0754, "step": 32284 }, { "epoch": 0.7114093220292298, "grad_norm": 0.497457891702652, "learning_rate": 6.088483295785394e-06, "loss": 0.0708, "step": 32285 }, { "epoch": 0.711431357318746, "grad_norm": 0.6984765529632568, "learning_rate": 6.087622190261168e-06, "loss": 0.0554, "step": 32286 }, { "epoch": 0.7114533926082621, "grad_norm": 0.5227257609367371, "learning_rate": 6.086761130132463e-06, "loss": 0.0561, "step": 32287 }, { "epoch": 0.7114754278977783, "grad_norm": 1.0250256061553955, "learning_rate": 6.085900115403667e-06, "loss": 0.0827, "step": 32288 }, { "epoch": 0.7114974631872945, "grad_norm": 0.4943258464336395, "learning_rate": 6.085039146079156e-06, "loss": 0.0444, "step": 32289 }, { "epoch": 0.7115194984768106, "grad_norm": 0.7784757018089294, "learning_rate": 6.084178222163322e-06, "loss": 0.0781, "step": 32290 }, { "epoch": 0.7115415337663268, "grad_norm": 0.6578741669654846, "learning_rate": 6.083317343660549e-06, "loss": 0.0903, "step": 32291 }, { "epoch": 0.711563569055843, "grad_norm": 0.42988693714141846, "learning_rate": 6.082456510575228e-06, "loss": 0.0585, "step": 32292 }, { "epoch": 0.7115856043453591, "grad_norm": 0.5828585624694824, "learning_rate": 6.0815957229117334e-06, "loss": 0.0629, "step": 32293 }, { "epoch": 0.7116076396348753, "grad_norm": 0.4995911121368408, "learning_rate": 6.080734980674454e-06, "loss": 0.0489, "step": 32294 }, { "epoch": 0.7116296749243914, "grad_norm": 0.5284138321876526, "learning_rate": 6.079874283867778e-06, "loss": 0.068, "step": 32295 }, { "epoch": 0.7116517102139076, "grad_norm": 0.8348769545555115, "learning_rate": 6.07901363249608e-06, "loss": 0.0742, "step": 32296 }, { "epoch": 0.7116737455034238, "grad_norm": 0.6019879579544067, "learning_rate": 6.078153026563757e-06, "loss": 0.0875, "step": 32297 }, { "epoch": 0.7116957807929399, "grad_norm": 0.5856955647468567, "learning_rate": 6.077292466075173e-06, "loss": 0.0604, "step": 32298 }, { "epoch": 0.7117178160824561, "grad_norm": 0.6620035171508789, "learning_rate": 6.076431951034735e-06, "loss": 0.0645, "step": 32299 }, { "epoch": 0.7117398513719723, "grad_norm": 0.7738699913024902, "learning_rate": 6.0755714814468074e-06, "loss": 0.0601, "step": 32300 }, { "epoch": 0.7117618866614883, "grad_norm": 0.8846418261528015, "learning_rate": 6.074711057315787e-06, "loss": 0.0714, "step": 32301 }, { "epoch": 0.7117839219510045, "grad_norm": 0.6348223090171814, "learning_rate": 6.073850678646036e-06, "loss": 0.0728, "step": 32302 }, { "epoch": 0.7118059572405206, "grad_norm": 0.4423251748085022, "learning_rate": 6.0729903454419625e-06, "loss": 0.1147, "step": 32303 }, { "epoch": 0.7118279925300368, "grad_norm": 0.4271933436393738, "learning_rate": 6.072130057707936e-06, "loss": 0.0543, "step": 32304 }, { "epoch": 0.711850027819553, "grad_norm": 0.4717308580875397, "learning_rate": 6.071269815448333e-06, "loss": 0.0679, "step": 32305 }, { "epoch": 0.7118720631090691, "grad_norm": 0.7446146011352539, "learning_rate": 6.070409618667541e-06, "loss": 0.0463, "step": 32306 }, { "epoch": 0.7118940983985853, "grad_norm": 0.6566962599754333, "learning_rate": 6.069549467369942e-06, "loss": 0.0536, "step": 32307 }, { "epoch": 0.7119161336881015, "grad_norm": 0.296387255191803, "learning_rate": 6.06868936155992e-06, "loss": 0.0339, "step": 32308 }, { "epoch": 0.7119381689776176, "grad_norm": 0.6221507787704468, "learning_rate": 6.067829301241843e-06, "loss": 0.0545, "step": 32309 }, { "epoch": 0.7119602042671338, "grad_norm": 0.4560861885547638, "learning_rate": 6.066969286420112e-06, "loss": 0.0751, "step": 32310 }, { "epoch": 0.71198223955665, "grad_norm": 0.629769504070282, "learning_rate": 6.066109317099091e-06, "loss": 0.0746, "step": 32311 }, { "epoch": 0.7120042748461661, "grad_norm": 0.7510277628898621, "learning_rate": 6.0652493932831715e-06, "loss": 0.0561, "step": 32312 }, { "epoch": 0.7120263101356823, "grad_norm": 0.4103156626224518, "learning_rate": 6.064389514976721e-06, "loss": 0.0606, "step": 32313 }, { "epoch": 0.7120483454251985, "grad_norm": 0.8767039775848389, "learning_rate": 6.063529682184129e-06, "loss": 0.0921, "step": 32314 }, { "epoch": 0.7120703807147146, "grad_norm": 0.4720144271850586, "learning_rate": 6.062669894909776e-06, "loss": 0.06, "step": 32315 }, { "epoch": 0.7120924160042308, "grad_norm": 0.39514341950416565, "learning_rate": 6.0618101531580335e-06, "loss": 0.0372, "step": 32316 }, { "epoch": 0.712114451293747, "grad_norm": 0.2857550084590912, "learning_rate": 6.0609504569332845e-06, "loss": 0.043, "step": 32317 }, { "epoch": 0.7121364865832631, "grad_norm": 0.6099652647972107, "learning_rate": 6.060090806239908e-06, "loss": 0.0683, "step": 32318 }, { "epoch": 0.7121585218727793, "grad_norm": 0.3148517310619354, "learning_rate": 6.059231201082288e-06, "loss": 0.0738, "step": 32319 }, { "epoch": 0.7121805571622954, "grad_norm": 0.6729894876480103, "learning_rate": 6.0583716414647935e-06, "loss": 0.0834, "step": 32320 }, { "epoch": 0.7122025924518116, "grad_norm": 0.4317004084587097, "learning_rate": 6.057512127391807e-06, "loss": 0.0409, "step": 32321 }, { "epoch": 0.7122246277413278, "grad_norm": 0.470215767621994, "learning_rate": 6.056652658867713e-06, "loss": 0.087, "step": 32322 }, { "epoch": 0.7122466630308439, "grad_norm": 0.5971536636352539, "learning_rate": 6.0557932358968756e-06, "loss": 0.0533, "step": 32323 }, { "epoch": 0.7122686983203601, "grad_norm": 0.4346751272678375, "learning_rate": 6.0549338584836855e-06, "loss": 0.046, "step": 32324 }, { "epoch": 0.7122907336098763, "grad_norm": 0.31124043464660645, "learning_rate": 6.0540745266325035e-06, "loss": 0.0534, "step": 32325 }, { "epoch": 0.7123127688993923, "grad_norm": 0.5288628935813904, "learning_rate": 6.053215240347727e-06, "loss": 0.0447, "step": 32326 }, { "epoch": 0.7123348041889085, "grad_norm": 0.5060555338859558, "learning_rate": 6.052355999633719e-06, "loss": 0.0739, "step": 32327 }, { "epoch": 0.7123568394784247, "grad_norm": 1.0478240251541138, "learning_rate": 6.051496804494864e-06, "loss": 0.0741, "step": 32328 }, { "epoch": 0.7123788747679408, "grad_norm": 0.8581157326698303, "learning_rate": 6.05063765493553e-06, "loss": 0.0627, "step": 32329 }, { "epoch": 0.712400910057457, "grad_norm": 0.5666395425796509, "learning_rate": 6.049778550960096e-06, "loss": 0.0541, "step": 32330 }, { "epoch": 0.7124229453469731, "grad_norm": 0.49020129442214966, "learning_rate": 6.048919492572947e-06, "loss": 0.0575, "step": 32331 }, { "epoch": 0.7124449806364893, "grad_norm": 0.6448142528533936, "learning_rate": 6.048060479778437e-06, "loss": 0.0671, "step": 32332 }, { "epoch": 0.7124670159260055, "grad_norm": 0.6643781661987305, "learning_rate": 6.047201512580969e-06, "loss": 0.0733, "step": 32333 }, { "epoch": 0.7124890512155216, "grad_norm": 0.437703937292099, "learning_rate": 6.046342590984898e-06, "loss": 0.0486, "step": 32334 }, { "epoch": 0.7125110865050378, "grad_norm": 0.6142997741699219, "learning_rate": 6.0454837149946115e-06, "loss": 0.0668, "step": 32335 }, { "epoch": 0.712533121794554, "grad_norm": 0.4459995925426483, "learning_rate": 6.044624884614472e-06, "loss": 0.063, "step": 32336 }, { "epoch": 0.7125551570840701, "grad_norm": 0.742871880531311, "learning_rate": 6.043766099848861e-06, "loss": 0.0812, "step": 32337 }, { "epoch": 0.7125771923735863, "grad_norm": 0.7821649312973022, "learning_rate": 6.042907360702156e-06, "loss": 0.0948, "step": 32338 }, { "epoch": 0.7125992276631025, "grad_norm": 0.2681121528148651, "learning_rate": 6.042048667178723e-06, "loss": 0.0591, "step": 32339 }, { "epoch": 0.7126212629526186, "grad_norm": 0.6765632629394531, "learning_rate": 6.04119001928294e-06, "loss": 0.0605, "step": 32340 }, { "epoch": 0.7126432982421348, "grad_norm": 0.9540229439735413, "learning_rate": 6.040331417019179e-06, "loss": 0.0616, "step": 32341 }, { "epoch": 0.712665333531651, "grad_norm": 0.48279869556427, "learning_rate": 6.039472860391822e-06, "loss": 0.0515, "step": 32342 }, { "epoch": 0.7126873688211671, "grad_norm": 0.5659441947937012, "learning_rate": 6.038614349405228e-06, "loss": 0.0656, "step": 32343 }, { "epoch": 0.7127094041106833, "grad_norm": 0.7069953083992004, "learning_rate": 6.037755884063776e-06, "loss": 0.0781, "step": 32344 }, { "epoch": 0.7127314394001995, "grad_norm": 0.3321860134601593, "learning_rate": 6.036897464371845e-06, "loss": 0.0617, "step": 32345 }, { "epoch": 0.7127534746897156, "grad_norm": 0.3706090450286865, "learning_rate": 6.036039090333797e-06, "loss": 0.0643, "step": 32346 }, { "epoch": 0.7127755099792318, "grad_norm": 0.5312888026237488, "learning_rate": 6.0351807619540134e-06, "loss": 0.0596, "step": 32347 }, { "epoch": 0.712797545268748, "grad_norm": 0.6659585237503052, "learning_rate": 6.034322479236852e-06, "loss": 0.0465, "step": 32348 }, { "epoch": 0.7128195805582641, "grad_norm": 0.40998756885528564, "learning_rate": 6.0334642421867034e-06, "loss": 0.0553, "step": 32349 }, { "epoch": 0.7128416158477803, "grad_norm": 0.8032415509223938, "learning_rate": 6.032606050807924e-06, "loss": 0.0638, "step": 32350 }, { "epoch": 0.7128636511372963, "grad_norm": 0.6264257431030273, "learning_rate": 6.031747905104898e-06, "loss": 0.0594, "step": 32351 }, { "epoch": 0.7128856864268125, "grad_norm": 0.6355358958244324, "learning_rate": 6.030889805081981e-06, "loss": 0.0509, "step": 32352 }, { "epoch": 0.7129077217163287, "grad_norm": 0.34680163860321045, "learning_rate": 6.0300317507435535e-06, "loss": 0.0344, "step": 32353 }, { "epoch": 0.7129297570058448, "grad_norm": 0.7545162439346313, "learning_rate": 6.029173742093991e-06, "loss": 0.0567, "step": 32354 }, { "epoch": 0.712951792295361, "grad_norm": 0.5544970035552979, "learning_rate": 6.028315779137644e-06, "loss": 0.0598, "step": 32355 }, { "epoch": 0.7129738275848772, "grad_norm": 0.7229779958724976, "learning_rate": 6.027457861878907e-06, "loss": 0.0709, "step": 32356 }, { "epoch": 0.7129958628743933, "grad_norm": 0.29964232444763184, "learning_rate": 6.0265999903221345e-06, "loss": 0.0654, "step": 32357 }, { "epoch": 0.7130178981639095, "grad_norm": 0.43762436509132385, "learning_rate": 6.025742164471705e-06, "loss": 0.077, "step": 32358 }, { "epoch": 0.7130399334534256, "grad_norm": 0.6565340161323547, "learning_rate": 6.0248843843319766e-06, "loss": 0.0493, "step": 32359 }, { "epoch": 0.7130619687429418, "grad_norm": 0.6211746335029602, "learning_rate": 6.0240266499073254e-06, "loss": 0.0673, "step": 32360 }, { "epoch": 0.713084004032458, "grad_norm": 0.7828367352485657, "learning_rate": 6.0231689612021255e-06, "loss": 0.1045, "step": 32361 }, { "epoch": 0.7131060393219741, "grad_norm": 0.5824344158172607, "learning_rate": 6.022311318220734e-06, "loss": 0.0622, "step": 32362 }, { "epoch": 0.7131280746114903, "grad_norm": 0.6952016949653625, "learning_rate": 6.021453720967526e-06, "loss": 0.0727, "step": 32363 }, { "epoch": 0.7131501099010065, "grad_norm": 0.7809101343154907, "learning_rate": 6.020596169446868e-06, "loss": 0.0615, "step": 32364 }, { "epoch": 0.7131721451905226, "grad_norm": 0.7788877487182617, "learning_rate": 6.019738663663134e-06, "loss": 0.1069, "step": 32365 }, { "epoch": 0.7131941804800388, "grad_norm": 0.6501630544662476, "learning_rate": 6.018881203620682e-06, "loss": 0.0565, "step": 32366 }, { "epoch": 0.713216215769555, "grad_norm": 0.5658702850341797, "learning_rate": 6.018023789323885e-06, "loss": 0.0664, "step": 32367 }, { "epoch": 0.7132382510590711, "grad_norm": 0.6198230385780334, "learning_rate": 6.017166420777112e-06, "loss": 0.068, "step": 32368 }, { "epoch": 0.7132602863485873, "grad_norm": 0.5629708766937256, "learning_rate": 6.016309097984725e-06, "loss": 0.0667, "step": 32369 }, { "epoch": 0.7132823216381035, "grad_norm": 0.47145888209342957, "learning_rate": 6.0154518209510964e-06, "loss": 0.0661, "step": 32370 }, { "epoch": 0.7133043569276196, "grad_norm": 0.3666825294494629, "learning_rate": 6.0145945896805805e-06, "loss": 0.0692, "step": 32371 }, { "epoch": 0.7133263922171358, "grad_norm": 0.7479097247123718, "learning_rate": 6.013737404177562e-06, "loss": 0.0585, "step": 32372 }, { "epoch": 0.713348427506652, "grad_norm": 0.6078861355781555, "learning_rate": 6.012880264446392e-06, "loss": 0.0735, "step": 32373 }, { "epoch": 0.7133704627961681, "grad_norm": 0.9437399506568909, "learning_rate": 6.012023170491443e-06, "loss": 0.0681, "step": 32374 }, { "epoch": 0.7133924980856842, "grad_norm": 0.8257173895835876, "learning_rate": 6.011166122317085e-06, "loss": 0.0649, "step": 32375 }, { "epoch": 0.7134145333752003, "grad_norm": 0.5588417649269104, "learning_rate": 6.010309119927674e-06, "loss": 0.0882, "step": 32376 }, { "epoch": 0.7134365686647165, "grad_norm": 0.49655696749687195, "learning_rate": 6.009452163327583e-06, "loss": 0.0646, "step": 32377 }, { "epoch": 0.7134586039542327, "grad_norm": 0.5741414427757263, "learning_rate": 6.008595252521164e-06, "loss": 0.0727, "step": 32378 }, { "epoch": 0.7134806392437488, "grad_norm": 0.3789643943309784, "learning_rate": 6.007738387512799e-06, "loss": 0.0633, "step": 32379 }, { "epoch": 0.713502674533265, "grad_norm": 0.7376970052719116, "learning_rate": 6.006881568306842e-06, "loss": 0.0744, "step": 32380 }, { "epoch": 0.7135247098227812, "grad_norm": 0.6285573244094849, "learning_rate": 6.006024794907663e-06, "loss": 0.0804, "step": 32381 }, { "epoch": 0.7135467451122973, "grad_norm": 0.5107317566871643, "learning_rate": 6.005168067319618e-06, "loss": 0.0586, "step": 32382 }, { "epoch": 0.7135687804018135, "grad_norm": 0.9476863741874695, "learning_rate": 6.004311385547075e-06, "loss": 0.0611, "step": 32383 }, { "epoch": 0.7135908156913296, "grad_norm": 0.6770551204681396, "learning_rate": 6.003454749594406e-06, "loss": 0.0855, "step": 32384 }, { "epoch": 0.7136128509808458, "grad_norm": 0.6715530753135681, "learning_rate": 6.002598159465958e-06, "loss": 0.0676, "step": 32385 }, { "epoch": 0.713634886270362, "grad_norm": 1.0448905229568481, "learning_rate": 6.001741615166103e-06, "loss": 0.0699, "step": 32386 }, { "epoch": 0.7136569215598781, "grad_norm": 0.6202532649040222, "learning_rate": 6.000885116699203e-06, "loss": 0.0733, "step": 32387 }, { "epoch": 0.7136789568493943, "grad_norm": 0.5449388027191162, "learning_rate": 6.000028664069627e-06, "loss": 0.053, "step": 32388 }, { "epoch": 0.7137009921389105, "grad_norm": 0.5431755185127258, "learning_rate": 5.999172257281726e-06, "loss": 0.0497, "step": 32389 }, { "epoch": 0.7137230274284266, "grad_norm": 0.5904863476753235, "learning_rate": 5.998315896339867e-06, "loss": 0.0657, "step": 32390 }, { "epoch": 0.7137450627179428, "grad_norm": 0.5056356191635132, "learning_rate": 5.9974595812484165e-06, "loss": 0.0543, "step": 32391 }, { "epoch": 0.713767098007459, "grad_norm": 0.7817263603210449, "learning_rate": 5.996603312011728e-06, "loss": 0.0875, "step": 32392 }, { "epoch": 0.7137891332969751, "grad_norm": 0.5754271745681763, "learning_rate": 5.995747088634171e-06, "loss": 0.0607, "step": 32393 }, { "epoch": 0.7138111685864913, "grad_norm": 0.7386481165885925, "learning_rate": 5.994890911120094e-06, "loss": 0.0708, "step": 32394 }, { "epoch": 0.7138332038760075, "grad_norm": 0.7961533069610596, "learning_rate": 5.994034779473874e-06, "loss": 0.0668, "step": 32395 }, { "epoch": 0.7138552391655236, "grad_norm": 0.5080776214599609, "learning_rate": 5.99317869369986e-06, "loss": 0.0575, "step": 32396 }, { "epoch": 0.7138772744550398, "grad_norm": 0.4892381727695465, "learning_rate": 5.992322653802417e-06, "loss": 0.0444, "step": 32397 }, { "epoch": 0.713899309744556, "grad_norm": 0.615219235420227, "learning_rate": 5.9914666597859115e-06, "loss": 0.0575, "step": 32398 }, { "epoch": 0.7139213450340721, "grad_norm": 0.5296348333358765, "learning_rate": 5.990610711654691e-06, "loss": 0.0662, "step": 32399 }, { "epoch": 0.7139433803235882, "grad_norm": 0.5486775040626526, "learning_rate": 5.9897548094131264e-06, "loss": 0.0589, "step": 32400 }, { "epoch": 0.7139654156131043, "grad_norm": 0.778026282787323, "learning_rate": 5.9888989530655615e-06, "loss": 0.083, "step": 32401 }, { "epoch": 0.7139874509026205, "grad_norm": 0.7459715008735657, "learning_rate": 5.9880431426163775e-06, "loss": 0.0605, "step": 32402 }, { "epoch": 0.7140094861921367, "grad_norm": 0.7500383257865906, "learning_rate": 5.987187378069917e-06, "loss": 0.0454, "step": 32403 }, { "epoch": 0.7140315214816528, "grad_norm": 0.44622501730918884, "learning_rate": 5.98633165943055e-06, "loss": 0.0599, "step": 32404 }, { "epoch": 0.714053556771169, "grad_norm": 0.8405413031578064, "learning_rate": 5.985475986702624e-06, "loss": 0.0645, "step": 32405 }, { "epoch": 0.7140755920606852, "grad_norm": 0.39460229873657227, "learning_rate": 5.984620359890501e-06, "loss": 0.0362, "step": 32406 }, { "epoch": 0.7140976273502013, "grad_norm": 0.4153822660446167, "learning_rate": 5.983764778998547e-06, "loss": 0.0506, "step": 32407 }, { "epoch": 0.7141196626397175, "grad_norm": 0.410989910364151, "learning_rate": 5.982909244031109e-06, "loss": 0.0464, "step": 32408 }, { "epoch": 0.7141416979292337, "grad_norm": 1.0885802507400513, "learning_rate": 5.9820537549925494e-06, "loss": 0.0633, "step": 32409 }, { "epoch": 0.7141637332187498, "grad_norm": 0.4395718574523926, "learning_rate": 5.981198311887226e-06, "loss": 0.053, "step": 32410 }, { "epoch": 0.714185768508266, "grad_norm": 0.6231846213340759, "learning_rate": 5.980342914719501e-06, "loss": 0.0442, "step": 32411 }, { "epoch": 0.7142078037977821, "grad_norm": 0.4883738160133362, "learning_rate": 5.979487563493721e-06, "loss": 0.0532, "step": 32412 }, { "epoch": 0.7142298390872983, "grad_norm": 0.7842045426368713, "learning_rate": 5.978632258214248e-06, "loss": 0.0578, "step": 32413 }, { "epoch": 0.7142518743768145, "grad_norm": 0.42887842655181885, "learning_rate": 5.977776998885444e-06, "loss": 0.0635, "step": 32414 }, { "epoch": 0.7142739096663306, "grad_norm": 0.3817618787288666, "learning_rate": 5.976921785511655e-06, "loss": 0.0663, "step": 32415 }, { "epoch": 0.7142959449558468, "grad_norm": 0.5696590542793274, "learning_rate": 5.976066618097247e-06, "loss": 0.0536, "step": 32416 }, { "epoch": 0.714317980245363, "grad_norm": 0.9296544790267944, "learning_rate": 5.975211496646559e-06, "loss": 0.0702, "step": 32417 }, { "epoch": 0.7143400155348791, "grad_norm": 0.5738217830657959, "learning_rate": 5.97435642116397e-06, "loss": 0.0475, "step": 32418 }, { "epoch": 0.7143620508243953, "grad_norm": 0.7182453870773315, "learning_rate": 5.973501391653819e-06, "loss": 0.0595, "step": 32419 }, { "epoch": 0.7143840861139115, "grad_norm": 0.8899269700050354, "learning_rate": 5.972646408120464e-06, "loss": 0.0511, "step": 32420 }, { "epoch": 0.7144061214034276, "grad_norm": 0.6061573028564453, "learning_rate": 5.971791470568263e-06, "loss": 0.0776, "step": 32421 }, { "epoch": 0.7144281566929438, "grad_norm": 0.44578737020492554, "learning_rate": 5.970936579001574e-06, "loss": 0.0683, "step": 32422 }, { "epoch": 0.71445019198246, "grad_norm": 0.7136213183403015, "learning_rate": 5.970081733424742e-06, "loss": 0.071, "step": 32423 }, { "epoch": 0.7144722272719761, "grad_norm": 0.6565456390380859, "learning_rate": 5.969226933842125e-06, "loss": 0.0515, "step": 32424 }, { "epoch": 0.7144942625614922, "grad_norm": 0.47123420238494873, "learning_rate": 5.968372180258085e-06, "loss": 0.0683, "step": 32425 }, { "epoch": 0.7145162978510083, "grad_norm": 0.3595578372478485, "learning_rate": 5.967517472676963e-06, "loss": 0.0831, "step": 32426 }, { "epoch": 0.7145383331405245, "grad_norm": 0.6088308691978455, "learning_rate": 5.966662811103122e-06, "loss": 0.1005, "step": 32427 }, { "epoch": 0.7145603684300407, "grad_norm": 0.5785951018333435, "learning_rate": 5.965808195540901e-06, "loss": 0.0614, "step": 32428 }, { "epoch": 0.7145824037195568, "grad_norm": 0.6010776162147522, "learning_rate": 5.964953625994675e-06, "loss": 0.0601, "step": 32429 }, { "epoch": 0.714604439009073, "grad_norm": 0.6418412923812866, "learning_rate": 5.964099102468779e-06, "loss": 0.0513, "step": 32430 }, { "epoch": 0.7146264742985892, "grad_norm": 0.29980966448783875, "learning_rate": 5.963244624967577e-06, "loss": 0.0777, "step": 32431 }, { "epoch": 0.7146485095881053, "grad_norm": 0.6479931473731995, "learning_rate": 5.96239019349541e-06, "loss": 0.0716, "step": 32432 }, { "epoch": 0.7146705448776215, "grad_norm": 0.8318076133728027, "learning_rate": 5.961535808056638e-06, "loss": 0.0754, "step": 32433 }, { "epoch": 0.7146925801671377, "grad_norm": 0.47760409116744995, "learning_rate": 5.960681468655616e-06, "loss": 0.0704, "step": 32434 }, { "epoch": 0.7147146154566538, "grad_norm": 0.7341898679733276, "learning_rate": 5.959827175296685e-06, "loss": 0.0783, "step": 32435 }, { "epoch": 0.71473665074617, "grad_norm": 0.8475378751754761, "learning_rate": 5.958972927984201e-06, "loss": 0.098, "step": 32436 }, { "epoch": 0.7147586860356862, "grad_norm": 0.5754307508468628, "learning_rate": 5.958118726722517e-06, "loss": 0.0684, "step": 32437 }, { "epoch": 0.7147807213252023, "grad_norm": 0.6894654035568237, "learning_rate": 5.957264571515987e-06, "loss": 0.0762, "step": 32438 }, { "epoch": 0.7148027566147185, "grad_norm": 0.7012741565704346, "learning_rate": 5.956410462368953e-06, "loss": 0.065, "step": 32439 }, { "epoch": 0.7148247919042346, "grad_norm": 0.6694132089614868, "learning_rate": 5.95555639928577e-06, "loss": 0.1003, "step": 32440 }, { "epoch": 0.7148468271937508, "grad_norm": 0.5595981478691101, "learning_rate": 5.954702382270794e-06, "loss": 0.0789, "step": 32441 }, { "epoch": 0.714868862483267, "grad_norm": 0.4803621768951416, "learning_rate": 5.953848411328363e-06, "loss": 0.083, "step": 32442 }, { "epoch": 0.7148908977727831, "grad_norm": 0.68455970287323, "learning_rate": 5.952994486462834e-06, "loss": 0.0612, "step": 32443 }, { "epoch": 0.7149129330622993, "grad_norm": 0.5398143529891968, "learning_rate": 5.952140607678556e-06, "loss": 0.0687, "step": 32444 }, { "epoch": 0.7149349683518155, "grad_norm": 0.5110316872596741, "learning_rate": 5.951286774979882e-06, "loss": 0.0599, "step": 32445 }, { "epoch": 0.7149570036413316, "grad_norm": 0.5619086623191833, "learning_rate": 5.950432988371152e-06, "loss": 0.084, "step": 32446 }, { "epoch": 0.7149790389308478, "grad_norm": 0.6963137984275818, "learning_rate": 5.94957924785672e-06, "loss": 0.065, "step": 32447 }, { "epoch": 0.715001074220364, "grad_norm": 0.5820996761322021, "learning_rate": 5.9487255534409376e-06, "loss": 0.0428, "step": 32448 }, { "epoch": 0.7150231095098801, "grad_norm": 0.8518420457839966, "learning_rate": 5.9478719051281445e-06, "loss": 0.0494, "step": 32449 }, { "epoch": 0.7150451447993962, "grad_norm": 0.918606698513031, "learning_rate": 5.947018302922701e-06, "loss": 0.0817, "step": 32450 }, { "epoch": 0.7150671800889123, "grad_norm": 0.6903185248374939, "learning_rate": 5.946164746828937e-06, "loss": 0.0623, "step": 32451 }, { "epoch": 0.7150892153784285, "grad_norm": 0.5963813066482544, "learning_rate": 5.94531123685122e-06, "loss": 0.0923, "step": 32452 }, { "epoch": 0.7151112506679447, "grad_norm": 0.5869271159172058, "learning_rate": 5.944457772993885e-06, "loss": 0.0513, "step": 32453 }, { "epoch": 0.7151332859574608, "grad_norm": 0.20689673721790314, "learning_rate": 5.943604355261286e-06, "loss": 0.0539, "step": 32454 }, { "epoch": 0.715155321246977, "grad_norm": 0.3961932361125946, "learning_rate": 5.9427509836577624e-06, "loss": 0.0573, "step": 32455 }, { "epoch": 0.7151773565364932, "grad_norm": 1.0101863145828247, "learning_rate": 5.941897658187664e-06, "loss": 0.0701, "step": 32456 }, { "epoch": 0.7151993918260093, "grad_norm": 0.6288471817970276, "learning_rate": 5.941044378855344e-06, "loss": 0.0765, "step": 32457 }, { "epoch": 0.7152214271155255, "grad_norm": 0.42382192611694336, "learning_rate": 5.940191145665138e-06, "loss": 0.0747, "step": 32458 }, { "epoch": 0.7152434624050417, "grad_norm": 0.40263253450393677, "learning_rate": 5.939337958621397e-06, "loss": 0.0382, "step": 32459 }, { "epoch": 0.7152654976945578, "grad_norm": 0.40007156133651733, "learning_rate": 5.9384848177284656e-06, "loss": 0.047, "step": 32460 }, { "epoch": 0.715287532984074, "grad_norm": 0.6431078910827637, "learning_rate": 5.937631722990696e-06, "loss": 0.0742, "step": 32461 }, { "epoch": 0.7153095682735902, "grad_norm": 0.5899732112884521, "learning_rate": 5.93677867441242e-06, "loss": 0.0568, "step": 32462 }, { "epoch": 0.7153316035631063, "grad_norm": 0.9130367040634155, "learning_rate": 5.935925671997993e-06, "loss": 0.0789, "step": 32463 }, { "epoch": 0.7153536388526225, "grad_norm": 0.6510242223739624, "learning_rate": 5.9350727157517624e-06, "loss": 0.0548, "step": 32464 }, { "epoch": 0.7153756741421387, "grad_norm": 0.6759477853775024, "learning_rate": 5.934219805678061e-06, "loss": 0.0522, "step": 32465 }, { "epoch": 0.7153977094316548, "grad_norm": 0.5456510186195374, "learning_rate": 5.93336694178124e-06, "loss": 0.0515, "step": 32466 }, { "epoch": 0.715419744721171, "grad_norm": 0.3031967282295227, "learning_rate": 5.932514124065644e-06, "loss": 0.0682, "step": 32467 }, { "epoch": 0.7154417800106871, "grad_norm": 0.40458157658576965, "learning_rate": 5.931661352535619e-06, "loss": 0.0559, "step": 32468 }, { "epoch": 0.7154638153002033, "grad_norm": 0.3747851848602295, "learning_rate": 5.930808627195501e-06, "loss": 0.0541, "step": 32469 }, { "epoch": 0.7154858505897195, "grad_norm": 0.9812238216400146, "learning_rate": 5.929955948049639e-06, "loss": 0.0708, "step": 32470 }, { "epoch": 0.7155078858792356, "grad_norm": 0.528093159198761, "learning_rate": 5.92910331510238e-06, "loss": 0.0587, "step": 32471 }, { "epoch": 0.7155299211687518, "grad_norm": 0.4554150402545929, "learning_rate": 5.928250728358057e-06, "loss": 0.0842, "step": 32472 }, { "epoch": 0.715551956458268, "grad_norm": 0.5253582000732422, "learning_rate": 5.927398187821023e-06, "loss": 0.0518, "step": 32473 }, { "epoch": 0.715573991747784, "grad_norm": 0.9207959175109863, "learning_rate": 5.9265456934956045e-06, "loss": 0.0772, "step": 32474 }, { "epoch": 0.7155960270373002, "grad_norm": 0.504948079586029, "learning_rate": 5.925693245386165e-06, "loss": 0.0707, "step": 32475 }, { "epoch": 0.7156180623268164, "grad_norm": 0.48969435691833496, "learning_rate": 5.92484084349703e-06, "loss": 0.0721, "step": 32476 }, { "epoch": 0.7156400976163325, "grad_norm": 0.46342697739601135, "learning_rate": 5.923988487832553e-06, "loss": 0.0787, "step": 32477 }, { "epoch": 0.7156621329058487, "grad_norm": 0.65180903673172, "learning_rate": 5.923136178397063e-06, "loss": 0.0492, "step": 32478 }, { "epoch": 0.7156841681953648, "grad_norm": 0.5988327860832214, "learning_rate": 5.9222839151949084e-06, "loss": 0.0572, "step": 32479 }, { "epoch": 0.715706203484881, "grad_norm": 0.5331946611404419, "learning_rate": 5.9214316982304355e-06, "loss": 0.037, "step": 32480 }, { "epoch": 0.7157282387743972, "grad_norm": 0.5852738618850708, "learning_rate": 5.920579527507973e-06, "loss": 0.0487, "step": 32481 }, { "epoch": 0.7157502740639133, "grad_norm": 0.48053401708602905, "learning_rate": 5.9197274030318694e-06, "loss": 0.0718, "step": 32482 }, { "epoch": 0.7157723093534295, "grad_norm": 0.49802204966545105, "learning_rate": 5.918875324806462e-06, "loss": 0.0544, "step": 32483 }, { "epoch": 0.7157943446429457, "grad_norm": 0.5617063641548157, "learning_rate": 5.918023292836098e-06, "loss": 0.0782, "step": 32484 }, { "epoch": 0.7158163799324618, "grad_norm": 0.39639195799827576, "learning_rate": 5.917171307125107e-06, "loss": 0.0799, "step": 32485 }, { "epoch": 0.715838415221978, "grad_norm": 1.0464951992034912, "learning_rate": 5.916319367677831e-06, "loss": 0.0896, "step": 32486 }, { "epoch": 0.7158604505114942, "grad_norm": 0.49980127811431885, "learning_rate": 5.915467474498619e-06, "loss": 0.086, "step": 32487 }, { "epoch": 0.7158824858010103, "grad_norm": 0.9125207662582397, "learning_rate": 5.914615627591796e-06, "loss": 0.0687, "step": 32488 }, { "epoch": 0.7159045210905265, "grad_norm": 0.5334997773170471, "learning_rate": 5.913763826961709e-06, "loss": 0.0748, "step": 32489 }, { "epoch": 0.7159265563800427, "grad_norm": 0.7553195953369141, "learning_rate": 5.912912072612694e-06, "loss": 0.0544, "step": 32490 }, { "epoch": 0.7159485916695588, "grad_norm": 0.5290897488594055, "learning_rate": 5.912060364549096e-06, "loss": 0.0693, "step": 32491 }, { "epoch": 0.715970626959075, "grad_norm": 0.6805670261383057, "learning_rate": 5.911208702775243e-06, "loss": 0.0641, "step": 32492 }, { "epoch": 0.7159926622485911, "grad_norm": 0.7326207756996155, "learning_rate": 5.910357087295478e-06, "loss": 0.081, "step": 32493 }, { "epoch": 0.7160146975381073, "grad_norm": 0.45762184262275696, "learning_rate": 5.909505518114144e-06, "loss": 0.0766, "step": 32494 }, { "epoch": 0.7160367328276235, "grad_norm": 0.7125076651573181, "learning_rate": 5.9086539952355674e-06, "loss": 0.081, "step": 32495 }, { "epoch": 0.7160587681171396, "grad_norm": 0.5475557446479797, "learning_rate": 5.907802518664096e-06, "loss": 0.0608, "step": 32496 }, { "epoch": 0.7160808034066558, "grad_norm": 0.5479331016540527, "learning_rate": 5.9069510884040525e-06, "loss": 0.0748, "step": 32497 }, { "epoch": 0.716102838696172, "grad_norm": 0.5561971068382263, "learning_rate": 5.906099704459792e-06, "loss": 0.0896, "step": 32498 }, { "epoch": 0.716124873985688, "grad_norm": 0.420806348323822, "learning_rate": 5.905248366835639e-06, "loss": 0.0398, "step": 32499 }, { "epoch": 0.7161469092752042, "grad_norm": 0.9900542497634888, "learning_rate": 5.9043970755359375e-06, "loss": 0.0571, "step": 32500 }, { "epoch": 0.7161689445647204, "grad_norm": 0.8661037087440491, "learning_rate": 5.903545830565014e-06, "loss": 0.0611, "step": 32501 }, { "epoch": 0.7161909798542365, "grad_norm": 0.4195013642311096, "learning_rate": 5.90269463192721e-06, "loss": 0.0352, "step": 32502 }, { "epoch": 0.7162130151437527, "grad_norm": 0.5096072554588318, "learning_rate": 5.901843479626865e-06, "loss": 0.0377, "step": 32503 }, { "epoch": 0.7162350504332688, "grad_norm": 0.5382771492004395, "learning_rate": 5.900992373668305e-06, "loss": 0.0903, "step": 32504 }, { "epoch": 0.716257085722785, "grad_norm": 0.9429720640182495, "learning_rate": 5.9001413140558705e-06, "loss": 0.0771, "step": 32505 }, { "epoch": 0.7162791210123012, "grad_norm": 0.7401857972145081, "learning_rate": 5.8992903007938956e-06, "loss": 0.0412, "step": 32506 }, { "epoch": 0.7163011563018173, "grad_norm": 0.9468065500259399, "learning_rate": 5.89843933388672e-06, "loss": 0.0766, "step": 32507 }, { "epoch": 0.7163231915913335, "grad_norm": 0.5763218998908997, "learning_rate": 5.89758841333867e-06, "loss": 0.061, "step": 32508 }, { "epoch": 0.7163452268808497, "grad_norm": 0.9403342604637146, "learning_rate": 5.896737539154082e-06, "loss": 0.0916, "step": 32509 }, { "epoch": 0.7163672621703658, "grad_norm": 0.25274139642715454, "learning_rate": 5.895886711337296e-06, "loss": 0.0814, "step": 32510 }, { "epoch": 0.716389297459882, "grad_norm": 1.9586492776870728, "learning_rate": 5.895035929892637e-06, "loss": 0.0859, "step": 32511 }, { "epoch": 0.7164113327493982, "grad_norm": 0.4474751949310303, "learning_rate": 5.894185194824443e-06, "loss": 0.0496, "step": 32512 }, { "epoch": 0.7164333680389143, "grad_norm": 0.6187837719917297, "learning_rate": 5.893334506137045e-06, "loss": 0.0529, "step": 32513 }, { "epoch": 0.7164554033284305, "grad_norm": 0.6730852127075195, "learning_rate": 5.8924838638347835e-06, "loss": 0.0866, "step": 32514 }, { "epoch": 0.7164774386179467, "grad_norm": 0.4655141532421112, "learning_rate": 5.89163326792198e-06, "loss": 0.0532, "step": 32515 }, { "epoch": 0.7164994739074628, "grad_norm": 0.6039941310882568, "learning_rate": 5.890782718402975e-06, "loss": 0.0376, "step": 32516 }, { "epoch": 0.716521509196979, "grad_norm": 0.5619644522666931, "learning_rate": 5.889932215282101e-06, "loss": 0.0443, "step": 32517 }, { "epoch": 0.7165435444864952, "grad_norm": 0.6881287097930908, "learning_rate": 5.889081758563682e-06, "loss": 0.0611, "step": 32518 }, { "epoch": 0.7165655797760113, "grad_norm": 0.4926583170890808, "learning_rate": 5.888231348252061e-06, "loss": 0.0544, "step": 32519 }, { "epoch": 0.7165876150655275, "grad_norm": 0.5871102809906006, "learning_rate": 5.8873809843515545e-06, "loss": 0.0505, "step": 32520 }, { "epoch": 0.7166096503550436, "grad_norm": 0.5269095301628113, "learning_rate": 5.8865306668665125e-06, "loss": 0.0572, "step": 32521 }, { "epoch": 0.7166316856445598, "grad_norm": 0.5148981809616089, "learning_rate": 5.885680395801252e-06, "loss": 0.0789, "step": 32522 }, { "epoch": 0.716653720934076, "grad_norm": 0.7219670414924622, "learning_rate": 5.884830171160114e-06, "loss": 0.0933, "step": 32523 }, { "epoch": 0.716675756223592, "grad_norm": 0.591131329536438, "learning_rate": 5.883979992947413e-06, "loss": 0.0507, "step": 32524 }, { "epoch": 0.7166977915131082, "grad_norm": 0.5589318871498108, "learning_rate": 5.883129861167503e-06, "loss": 0.0805, "step": 32525 }, { "epoch": 0.7167198268026244, "grad_norm": 0.6559667587280273, "learning_rate": 5.8822797758247e-06, "loss": 0.0639, "step": 32526 }, { "epoch": 0.7167418620921405, "grad_norm": 0.5453293323516846, "learning_rate": 5.881429736923328e-06, "loss": 0.0799, "step": 32527 }, { "epoch": 0.7167638973816567, "grad_norm": 0.7297589778900146, "learning_rate": 5.880579744467726e-06, "loss": 0.1008, "step": 32528 }, { "epoch": 0.7167859326711729, "grad_norm": 0.45241349935531616, "learning_rate": 5.879729798462223e-06, "loss": 0.0648, "step": 32529 }, { "epoch": 0.716807967960689, "grad_norm": 0.5532498955726624, "learning_rate": 5.878879898911149e-06, "loss": 0.0404, "step": 32530 }, { "epoch": 0.7168300032502052, "grad_norm": 0.693254828453064, "learning_rate": 5.878030045818822e-06, "loss": 0.0639, "step": 32531 }, { "epoch": 0.7168520385397213, "grad_norm": 0.6737905144691467, "learning_rate": 5.877180239189591e-06, "loss": 0.0456, "step": 32532 }, { "epoch": 0.7168740738292375, "grad_norm": 1.0731399059295654, "learning_rate": 5.876330479027766e-06, "loss": 0.0905, "step": 32533 }, { "epoch": 0.7168961091187537, "grad_norm": 0.907045304775238, "learning_rate": 5.875480765337688e-06, "loss": 0.0592, "step": 32534 }, { "epoch": 0.7169181444082698, "grad_norm": 0.6183074116706848, "learning_rate": 5.8746310981236765e-06, "loss": 0.0598, "step": 32535 }, { "epoch": 0.716940179697786, "grad_norm": 0.6783631443977356, "learning_rate": 5.873781477390059e-06, "loss": 0.08, "step": 32536 }, { "epoch": 0.7169622149873022, "grad_norm": 0.5069039463996887, "learning_rate": 5.872931903141173e-06, "loss": 0.0731, "step": 32537 }, { "epoch": 0.7169842502768183, "grad_norm": 0.4637456238269806, "learning_rate": 5.8720823753813344e-06, "loss": 0.0567, "step": 32538 }, { "epoch": 0.7170062855663345, "grad_norm": 0.7627527713775635, "learning_rate": 5.871232894114873e-06, "loss": 0.0499, "step": 32539 }, { "epoch": 0.7170283208558507, "grad_norm": 0.8685789108276367, "learning_rate": 5.870383459346119e-06, "loss": 0.0582, "step": 32540 }, { "epoch": 0.7170503561453668, "grad_norm": 0.5258068442344666, "learning_rate": 5.869534071079404e-06, "loss": 0.0758, "step": 32541 }, { "epoch": 0.717072391434883, "grad_norm": 0.6877066493034363, "learning_rate": 5.868684729319042e-06, "loss": 0.0533, "step": 32542 }, { "epoch": 0.7170944267243992, "grad_norm": 0.5392400622367859, "learning_rate": 5.867835434069366e-06, "loss": 0.0648, "step": 32543 }, { "epoch": 0.7171164620139153, "grad_norm": 1.2815155982971191, "learning_rate": 5.866986185334705e-06, "loss": 0.0703, "step": 32544 }, { "epoch": 0.7171384973034315, "grad_norm": 0.6656734943389893, "learning_rate": 5.866136983119376e-06, "loss": 0.0782, "step": 32545 }, { "epoch": 0.7171605325929477, "grad_norm": 0.4054485559463501, "learning_rate": 5.865287827427716e-06, "loss": 0.0654, "step": 32546 }, { "epoch": 0.7171825678824638, "grad_norm": 0.6032242774963379, "learning_rate": 5.864438718264031e-06, "loss": 0.0562, "step": 32547 }, { "epoch": 0.7172046031719799, "grad_norm": 0.7237911224365234, "learning_rate": 5.8635896556326704e-06, "loss": 0.0705, "step": 32548 }, { "epoch": 0.717226638461496, "grad_norm": 0.7161562442779541, "learning_rate": 5.86274063953794e-06, "loss": 0.0643, "step": 32549 }, { "epoch": 0.7172486737510122, "grad_norm": 0.8729084134101868, "learning_rate": 5.861891669984178e-06, "loss": 0.0604, "step": 32550 }, { "epoch": 0.7172707090405284, "grad_norm": 0.9534624814987183, "learning_rate": 5.8610427469756965e-06, "loss": 0.0774, "step": 32551 }, { "epoch": 0.7172927443300445, "grad_norm": 0.5201634168624878, "learning_rate": 5.860193870516824e-06, "loss": 0.0737, "step": 32552 }, { "epoch": 0.7173147796195607, "grad_norm": 0.5974709987640381, "learning_rate": 5.859345040611893e-06, "loss": 0.0528, "step": 32553 }, { "epoch": 0.7173368149090769, "grad_norm": 0.6913374662399292, "learning_rate": 5.858496257265207e-06, "loss": 0.0587, "step": 32554 }, { "epoch": 0.717358850198593, "grad_norm": 0.6083298921585083, "learning_rate": 5.857647520481112e-06, "loss": 0.0549, "step": 32555 }, { "epoch": 0.7173808854881092, "grad_norm": 0.6889892220497131, "learning_rate": 5.856798830263917e-06, "loss": 0.0952, "step": 32556 }, { "epoch": 0.7174029207776254, "grad_norm": 0.34612491726875305, "learning_rate": 5.855950186617952e-06, "loss": 0.0616, "step": 32557 }, { "epoch": 0.7174249560671415, "grad_norm": 0.8096463680267334, "learning_rate": 5.85510158954753e-06, "loss": 0.0627, "step": 32558 }, { "epoch": 0.7174469913566577, "grad_norm": 0.8771361708641052, "learning_rate": 5.854253039056983e-06, "loss": 0.0572, "step": 32559 }, { "epoch": 0.7174690266461738, "grad_norm": 0.9245675206184387, "learning_rate": 5.853404535150633e-06, "loss": 0.0837, "step": 32560 }, { "epoch": 0.71749106193569, "grad_norm": 1.0228155851364136, "learning_rate": 5.852556077832792e-06, "loss": 0.0622, "step": 32561 }, { "epoch": 0.7175130972252062, "grad_norm": 0.6072882413864136, "learning_rate": 5.85170766710779e-06, "loss": 0.081, "step": 32562 }, { "epoch": 0.7175351325147223, "grad_norm": 0.4441973567008972, "learning_rate": 5.850859302979945e-06, "loss": 0.0647, "step": 32563 }, { "epoch": 0.7175571678042385, "grad_norm": 0.33808085322380066, "learning_rate": 5.850010985453587e-06, "loss": 0.0702, "step": 32564 }, { "epoch": 0.7175792030937547, "grad_norm": 0.5563927888870239, "learning_rate": 5.8491627145330225e-06, "loss": 0.0752, "step": 32565 }, { "epoch": 0.7176012383832708, "grad_norm": 0.6443985104560852, "learning_rate": 5.848314490222581e-06, "loss": 0.0553, "step": 32566 }, { "epoch": 0.717623273672787, "grad_norm": 0.5286203622817993, "learning_rate": 5.847466312526584e-06, "loss": 0.0766, "step": 32567 }, { "epoch": 0.7176453089623032, "grad_norm": 0.47647759318351746, "learning_rate": 5.846618181449346e-06, "loss": 0.0727, "step": 32568 }, { "epoch": 0.7176673442518193, "grad_norm": 0.5739223957061768, "learning_rate": 5.845770096995196e-06, "loss": 0.0464, "step": 32569 }, { "epoch": 0.7176893795413355, "grad_norm": 0.572185754776001, "learning_rate": 5.844922059168435e-06, "loss": 0.081, "step": 32570 }, { "epoch": 0.7177114148308517, "grad_norm": 0.8070334196090698, "learning_rate": 5.8440740679734074e-06, "loss": 0.085, "step": 32571 }, { "epoch": 0.7177334501203678, "grad_norm": 0.6300365924835205, "learning_rate": 5.8432261234144145e-06, "loss": 0.0441, "step": 32572 }, { "epoch": 0.7177554854098839, "grad_norm": 0.7907922267913818, "learning_rate": 5.842378225495786e-06, "loss": 0.0649, "step": 32573 }, { "epoch": 0.7177775206994, "grad_norm": 0.5989628434181213, "learning_rate": 5.8415303742218305e-06, "loss": 0.0571, "step": 32574 }, { "epoch": 0.7177995559889162, "grad_norm": 0.597648024559021, "learning_rate": 5.840682569596873e-06, "loss": 0.0557, "step": 32575 }, { "epoch": 0.7178215912784324, "grad_norm": 0.7048812508583069, "learning_rate": 5.839834811625236e-06, "loss": 0.0875, "step": 32576 }, { "epoch": 0.7178436265679485, "grad_norm": 0.6061822175979614, "learning_rate": 5.838987100311221e-06, "loss": 0.0743, "step": 32577 }, { "epoch": 0.7178656618574647, "grad_norm": 0.39701196551322937, "learning_rate": 5.838139435659169e-06, "loss": 0.079, "step": 32578 }, { "epoch": 0.7178876971469809, "grad_norm": 0.4075475037097931, "learning_rate": 5.837291817673379e-06, "loss": 0.0442, "step": 32579 }, { "epoch": 0.717909732436497, "grad_norm": 0.34913206100463867, "learning_rate": 5.836444246358182e-06, "loss": 0.051, "step": 32580 }, { "epoch": 0.7179317677260132, "grad_norm": 0.4638741612434387, "learning_rate": 5.8355967217178815e-06, "loss": 0.0451, "step": 32581 }, { "epoch": 0.7179538030155294, "grad_norm": 0.5222797989845276, "learning_rate": 5.834749243756802e-06, "loss": 0.0671, "step": 32582 }, { "epoch": 0.7179758383050455, "grad_norm": 0.6556347608566284, "learning_rate": 5.833901812479267e-06, "loss": 0.0585, "step": 32583 }, { "epoch": 0.7179978735945617, "grad_norm": 0.3806668817996979, "learning_rate": 5.833054427889576e-06, "loss": 0.0546, "step": 32584 }, { "epoch": 0.7180199088840779, "grad_norm": 0.5819418430328369, "learning_rate": 5.832207089992057e-06, "loss": 0.0671, "step": 32585 }, { "epoch": 0.718041944173594, "grad_norm": 0.6188651919364929, "learning_rate": 5.831359798791024e-06, "loss": 0.0529, "step": 32586 }, { "epoch": 0.7180639794631102, "grad_norm": 0.6715471744537354, "learning_rate": 5.830512554290795e-06, "loss": 0.0618, "step": 32587 }, { "epoch": 0.7180860147526263, "grad_norm": 0.5959937572479248, "learning_rate": 5.829665356495679e-06, "loss": 0.0657, "step": 32588 }, { "epoch": 0.7181080500421425, "grad_norm": 1.0992683172225952, "learning_rate": 5.828818205409994e-06, "loss": 0.0705, "step": 32589 }, { "epoch": 0.7181300853316587, "grad_norm": 0.7851758599281311, "learning_rate": 5.827971101038061e-06, "loss": 0.0761, "step": 32590 }, { "epoch": 0.7181521206211748, "grad_norm": 0.6682327389717102, "learning_rate": 5.827124043384185e-06, "loss": 0.0717, "step": 32591 }, { "epoch": 0.718174155910691, "grad_norm": 0.2823197841644287, "learning_rate": 5.826277032452691e-06, "loss": 0.0527, "step": 32592 }, { "epoch": 0.7181961912002072, "grad_norm": 0.7767840623855591, "learning_rate": 5.825430068247875e-06, "loss": 0.0646, "step": 32593 }, { "epoch": 0.7182182264897233, "grad_norm": 0.7997138500213623, "learning_rate": 5.824583150774076e-06, "loss": 0.0558, "step": 32594 }, { "epoch": 0.7182402617792395, "grad_norm": 0.8951309323310852, "learning_rate": 5.823736280035589e-06, "loss": 0.0876, "step": 32595 }, { "epoch": 0.7182622970687557, "grad_norm": 0.3269646167755127, "learning_rate": 5.8228894560367405e-06, "loss": 0.0727, "step": 32596 }, { "epoch": 0.7182843323582718, "grad_norm": 0.6370134353637695, "learning_rate": 5.82204267878183e-06, "loss": 0.0509, "step": 32597 }, { "epoch": 0.7183063676477879, "grad_norm": 0.543736457824707, "learning_rate": 5.821195948275178e-06, "loss": 0.0739, "step": 32598 }, { "epoch": 0.718328402937304, "grad_norm": 0.6197916865348816, "learning_rate": 5.820349264521103e-06, "loss": 0.0687, "step": 32599 }, { "epoch": 0.7183504382268202, "grad_norm": 0.77727210521698, "learning_rate": 5.8195026275239e-06, "loss": 0.0701, "step": 32600 }, { "epoch": 0.7183724735163364, "grad_norm": 0.636279284954071, "learning_rate": 5.818656037287906e-06, "loss": 0.0703, "step": 32601 }, { "epoch": 0.7183945088058525, "grad_norm": 0.5716966986656189, "learning_rate": 5.817809493817413e-06, "loss": 0.07, "step": 32602 }, { "epoch": 0.7184165440953687, "grad_norm": 0.7461841106414795, "learning_rate": 5.8169629971167464e-06, "loss": 0.0668, "step": 32603 }, { "epoch": 0.7184385793848849, "grad_norm": 0.4791897237300873, "learning_rate": 5.816116547190206e-06, "loss": 0.0558, "step": 32604 }, { "epoch": 0.718460614674401, "grad_norm": 0.5273618102073669, "learning_rate": 5.815270144042109e-06, "loss": 0.0761, "step": 32605 }, { "epoch": 0.7184826499639172, "grad_norm": 0.5860985517501831, "learning_rate": 5.814423787676771e-06, "loss": 0.0488, "step": 32606 }, { "epoch": 0.7185046852534334, "grad_norm": 0.7750458717346191, "learning_rate": 5.8135774780984925e-06, "loss": 0.0895, "step": 32607 }, { "epoch": 0.7185267205429495, "grad_norm": 1.1244465112686157, "learning_rate": 5.812731215311592e-06, "loss": 0.086, "step": 32608 }, { "epoch": 0.7185487558324657, "grad_norm": 0.6916686296463013, "learning_rate": 5.811884999320377e-06, "loss": 0.0772, "step": 32609 }, { "epoch": 0.7185707911219819, "grad_norm": 0.841688871383667, "learning_rate": 5.811038830129164e-06, "loss": 0.0826, "step": 32610 }, { "epoch": 0.718592826411498, "grad_norm": 0.6543009877204895, "learning_rate": 5.810192707742251e-06, "loss": 0.081, "step": 32611 }, { "epoch": 0.7186148617010142, "grad_norm": 0.5662955045700073, "learning_rate": 5.809346632163957e-06, "loss": 0.0614, "step": 32612 }, { "epoch": 0.7186368969905303, "grad_norm": 0.7212055325508118, "learning_rate": 5.808500603398593e-06, "loss": 0.0781, "step": 32613 }, { "epoch": 0.7186589322800465, "grad_norm": 0.45830678939819336, "learning_rate": 5.80765462145046e-06, "loss": 0.0425, "step": 32614 }, { "epoch": 0.7186809675695627, "grad_norm": 0.6922544836997986, "learning_rate": 5.806808686323875e-06, "loss": 0.0706, "step": 32615 }, { "epoch": 0.7187030028590788, "grad_norm": 0.6911364793777466, "learning_rate": 5.805962798023134e-06, "loss": 0.0792, "step": 32616 }, { "epoch": 0.718725038148595, "grad_norm": 0.5498039722442627, "learning_rate": 5.805116956552564e-06, "loss": 0.1048, "step": 32617 }, { "epoch": 0.7187470734381112, "grad_norm": 0.6774948239326477, "learning_rate": 5.804271161916461e-06, "loss": 0.0598, "step": 32618 }, { "epoch": 0.7187691087276273, "grad_norm": 0.607047438621521, "learning_rate": 5.803425414119139e-06, "loss": 0.0485, "step": 32619 }, { "epoch": 0.7187911440171435, "grad_norm": 0.5309339761734009, "learning_rate": 5.802579713164899e-06, "loss": 0.0818, "step": 32620 }, { "epoch": 0.7188131793066597, "grad_norm": 0.4846877157688141, "learning_rate": 5.801734059058053e-06, "loss": 0.0855, "step": 32621 }, { "epoch": 0.7188352145961757, "grad_norm": 0.630829393863678, "learning_rate": 5.800888451802911e-06, "loss": 0.0714, "step": 32622 }, { "epoch": 0.7188572498856919, "grad_norm": 0.6807238459587097, "learning_rate": 5.800042891403768e-06, "loss": 0.0777, "step": 32623 }, { "epoch": 0.718879285175208, "grad_norm": 0.5629692077636719, "learning_rate": 5.7991973778649515e-06, "loss": 0.0659, "step": 32624 }, { "epoch": 0.7189013204647242, "grad_norm": 0.9690841436386108, "learning_rate": 5.798351911190751e-06, "loss": 0.0861, "step": 32625 }, { "epoch": 0.7189233557542404, "grad_norm": 0.46198368072509766, "learning_rate": 5.797506491385482e-06, "loss": 0.0584, "step": 32626 }, { "epoch": 0.7189453910437565, "grad_norm": 0.5493968725204468, "learning_rate": 5.7966611184534384e-06, "loss": 0.0685, "step": 32627 }, { "epoch": 0.7189674263332727, "grad_norm": 0.38126543164253235, "learning_rate": 5.795815792398943e-06, "loss": 0.0475, "step": 32628 }, { "epoch": 0.7189894616227889, "grad_norm": 0.6151584386825562, "learning_rate": 5.794970513226297e-06, "loss": 0.0687, "step": 32629 }, { "epoch": 0.719011496912305, "grad_norm": 0.3497696816921234, "learning_rate": 5.794125280939795e-06, "loss": 0.0302, "step": 32630 }, { "epoch": 0.7190335322018212, "grad_norm": 0.4776056706905365, "learning_rate": 5.793280095543751e-06, "loss": 0.0638, "step": 32631 }, { "epoch": 0.7190555674913374, "grad_norm": 0.7432328462600708, "learning_rate": 5.792434957042467e-06, "loss": 0.0818, "step": 32632 }, { "epoch": 0.7190776027808535, "grad_norm": 0.547805666923523, "learning_rate": 5.791589865440254e-06, "loss": 0.0625, "step": 32633 }, { "epoch": 0.7190996380703697, "grad_norm": 0.5069623589515686, "learning_rate": 5.790744820741407e-06, "loss": 0.0577, "step": 32634 }, { "epoch": 0.7191216733598859, "grad_norm": 0.3073863983154297, "learning_rate": 5.789899822950234e-06, "loss": 0.0829, "step": 32635 }, { "epoch": 0.719143708649402, "grad_norm": 0.42790722846984863, "learning_rate": 5.789054872071042e-06, "loss": 0.0863, "step": 32636 }, { "epoch": 0.7191657439389182, "grad_norm": 0.5151271820068359, "learning_rate": 5.788209968108139e-06, "loss": 0.0688, "step": 32637 }, { "epoch": 0.7191877792284344, "grad_norm": 0.6196897029876709, "learning_rate": 5.7873651110658196e-06, "loss": 0.0857, "step": 32638 }, { "epoch": 0.7192098145179505, "grad_norm": 0.5032604932785034, "learning_rate": 5.786520300948381e-06, "loss": 0.0347, "step": 32639 }, { "epoch": 0.7192318498074667, "grad_norm": 0.5639533400535583, "learning_rate": 5.785675537760147e-06, "loss": 0.0634, "step": 32640 }, { "epoch": 0.7192538850969828, "grad_norm": 0.40408584475517273, "learning_rate": 5.784830821505401e-06, "loss": 0.0707, "step": 32641 }, { "epoch": 0.719275920386499, "grad_norm": 0.570530354976654, "learning_rate": 5.783986152188454e-06, "loss": 0.0616, "step": 32642 }, { "epoch": 0.7192979556760152, "grad_norm": 0.38601192831993103, "learning_rate": 5.783141529813606e-06, "loss": 0.0695, "step": 32643 }, { "epoch": 0.7193199909655313, "grad_norm": 0.7556045055389404, "learning_rate": 5.782296954385168e-06, "loss": 0.055, "step": 32644 }, { "epoch": 0.7193420262550475, "grad_norm": 0.8062257766723633, "learning_rate": 5.781452425907429e-06, "loss": 0.0627, "step": 32645 }, { "epoch": 0.7193640615445637, "grad_norm": 0.6271836161613464, "learning_rate": 5.7806079443846955e-06, "loss": 0.0731, "step": 32646 }, { "epoch": 0.7193860968340797, "grad_norm": 0.9074220061302185, "learning_rate": 5.7797635098212745e-06, "loss": 0.0601, "step": 32647 }, { "epoch": 0.7194081321235959, "grad_norm": 0.470167875289917, "learning_rate": 5.778919122221459e-06, "loss": 0.051, "step": 32648 }, { "epoch": 0.719430167413112, "grad_norm": 0.5974845886230469, "learning_rate": 5.7780747815895575e-06, "loss": 0.0604, "step": 32649 }, { "epoch": 0.7194522027026282, "grad_norm": 0.9005594253540039, "learning_rate": 5.777230487929854e-06, "loss": 0.0707, "step": 32650 }, { "epoch": 0.7194742379921444, "grad_norm": 1.0604907274246216, "learning_rate": 5.776386241246673e-06, "loss": 0.0875, "step": 32651 }, { "epoch": 0.7194962732816605, "grad_norm": 0.725118100643158, "learning_rate": 5.775542041544298e-06, "loss": 0.0926, "step": 32652 }, { "epoch": 0.7195183085711767, "grad_norm": 1.0791829824447632, "learning_rate": 5.77469788882704e-06, "loss": 0.076, "step": 32653 }, { "epoch": 0.7195403438606929, "grad_norm": 0.7659370303153992, "learning_rate": 5.773853783099186e-06, "loss": 0.0509, "step": 32654 }, { "epoch": 0.719562379150209, "grad_norm": 0.5057954788208008, "learning_rate": 5.773009724365042e-06, "loss": 0.0661, "step": 32655 }, { "epoch": 0.7195844144397252, "grad_norm": 0.5472001433372498, "learning_rate": 5.772165712628913e-06, "loss": 0.0587, "step": 32656 }, { "epoch": 0.7196064497292414, "grad_norm": 0.8168977499008179, "learning_rate": 5.771321747895087e-06, "loss": 0.0558, "step": 32657 }, { "epoch": 0.7196284850187575, "grad_norm": 0.5237447619438171, "learning_rate": 5.770477830167869e-06, "loss": 0.0503, "step": 32658 }, { "epoch": 0.7196505203082737, "grad_norm": 0.6227894425392151, "learning_rate": 5.769633959451556e-06, "loss": 0.0707, "step": 32659 }, { "epoch": 0.7196725555977899, "grad_norm": 0.5376084446907043, "learning_rate": 5.768790135750453e-06, "loss": 0.0771, "step": 32660 }, { "epoch": 0.719694590887306, "grad_norm": 0.5687538981437683, "learning_rate": 5.7679463590688456e-06, "loss": 0.0621, "step": 32661 }, { "epoch": 0.7197166261768222, "grad_norm": 0.7578575611114502, "learning_rate": 5.767102629411039e-06, "loss": 0.0853, "step": 32662 }, { "epoch": 0.7197386614663384, "grad_norm": 0.7182037234306335, "learning_rate": 5.766258946781334e-06, "loss": 0.0578, "step": 32663 }, { "epoch": 0.7197606967558545, "grad_norm": 0.7940026521682739, "learning_rate": 5.76541531118402e-06, "loss": 0.0438, "step": 32664 }, { "epoch": 0.7197827320453707, "grad_norm": 0.4468037486076355, "learning_rate": 5.764571722623397e-06, "loss": 0.0633, "step": 32665 }, { "epoch": 0.7198047673348869, "grad_norm": 0.5292645692825317, "learning_rate": 5.7637281811037616e-06, "loss": 0.0595, "step": 32666 }, { "epoch": 0.719826802624403, "grad_norm": 0.5315828919410706, "learning_rate": 5.7628846866294175e-06, "loss": 0.0612, "step": 32667 }, { "epoch": 0.7198488379139192, "grad_norm": 0.7920107841491699, "learning_rate": 5.762041239204649e-06, "loss": 0.0782, "step": 32668 }, { "epoch": 0.7198708732034353, "grad_norm": 0.47128045558929443, "learning_rate": 5.761197838833761e-06, "loss": 0.0499, "step": 32669 }, { "epoch": 0.7198929084929515, "grad_norm": 0.41992947459220886, "learning_rate": 5.760354485521049e-06, "loss": 0.0636, "step": 32670 }, { "epoch": 0.7199149437824677, "grad_norm": 0.9406858086585999, "learning_rate": 5.759511179270801e-06, "loss": 0.1018, "step": 32671 }, { "epoch": 0.7199369790719837, "grad_norm": 0.4830644726753235, "learning_rate": 5.758667920087326e-06, "loss": 0.051, "step": 32672 }, { "epoch": 0.7199590143614999, "grad_norm": 0.5375494360923767, "learning_rate": 5.757824707974897e-06, "loss": 0.0678, "step": 32673 }, { "epoch": 0.7199810496510161, "grad_norm": 0.7363389134407043, "learning_rate": 5.756981542937835e-06, "loss": 0.0676, "step": 32674 }, { "epoch": 0.7200030849405322, "grad_norm": 0.5482721924781799, "learning_rate": 5.756138424980417e-06, "loss": 0.0752, "step": 32675 }, { "epoch": 0.7200251202300484, "grad_norm": 1.060031533241272, "learning_rate": 5.755295354106948e-06, "loss": 0.0785, "step": 32676 }, { "epoch": 0.7200471555195646, "grad_norm": 0.6368879675865173, "learning_rate": 5.754452330321711e-06, "loss": 0.0401, "step": 32677 }, { "epoch": 0.7200691908090807, "grad_norm": 0.4368032217025757, "learning_rate": 5.753609353629008e-06, "loss": 0.0502, "step": 32678 }, { "epoch": 0.7200912260985969, "grad_norm": 0.6542349457740784, "learning_rate": 5.752766424033135e-06, "loss": 0.0659, "step": 32679 }, { "epoch": 0.720113261388113, "grad_norm": 0.49634233117103577, "learning_rate": 5.751923541538377e-06, "loss": 0.0987, "step": 32680 }, { "epoch": 0.7201352966776292, "grad_norm": 0.4794483780860901, "learning_rate": 5.75108070614903e-06, "loss": 0.0615, "step": 32681 }, { "epoch": 0.7201573319671454, "grad_norm": 0.42282891273498535, "learning_rate": 5.75023791786939e-06, "loss": 0.0526, "step": 32682 }, { "epoch": 0.7201793672566615, "grad_norm": 0.712821364402771, "learning_rate": 5.749395176703754e-06, "loss": 0.0754, "step": 32683 }, { "epoch": 0.7202014025461777, "grad_norm": 0.5480215549468994, "learning_rate": 5.748552482656402e-06, "loss": 0.068, "step": 32684 }, { "epoch": 0.7202234378356939, "grad_norm": 0.564630389213562, "learning_rate": 5.747709835731635e-06, "loss": 0.0502, "step": 32685 }, { "epoch": 0.72024547312521, "grad_norm": 0.6296217441558838, "learning_rate": 5.746867235933748e-06, "loss": 0.0946, "step": 32686 }, { "epoch": 0.7202675084147262, "grad_norm": 0.4730641841888428, "learning_rate": 5.7460246832670225e-06, "loss": 0.0516, "step": 32687 }, { "epoch": 0.7202895437042424, "grad_norm": 0.6023616194725037, "learning_rate": 5.745182177735755e-06, "loss": 0.0907, "step": 32688 }, { "epoch": 0.7203115789937585, "grad_norm": 0.3641282021999359, "learning_rate": 5.744339719344239e-06, "loss": 0.057, "step": 32689 }, { "epoch": 0.7203336142832747, "grad_norm": 0.9166306853294373, "learning_rate": 5.7434973080967686e-06, "loss": 0.0663, "step": 32690 }, { "epoch": 0.7203556495727909, "grad_norm": 0.5159674882888794, "learning_rate": 5.742654943997624e-06, "loss": 0.0701, "step": 32691 }, { "epoch": 0.720377684862307, "grad_norm": 0.457268089056015, "learning_rate": 5.741812627051104e-06, "loss": 0.036, "step": 32692 }, { "epoch": 0.7203997201518232, "grad_norm": 0.5343358516693115, "learning_rate": 5.7409703572615e-06, "loss": 0.0453, "step": 32693 }, { "epoch": 0.7204217554413394, "grad_norm": 0.48919257521629333, "learning_rate": 5.7401281346330945e-06, "loss": 0.0535, "step": 32694 }, { "epoch": 0.7204437907308555, "grad_norm": 0.4383648931980133, "learning_rate": 5.739285959170187e-06, "loss": 0.0573, "step": 32695 }, { "epoch": 0.7204658260203717, "grad_norm": 0.576166570186615, "learning_rate": 5.7384438308770534e-06, "loss": 0.0612, "step": 32696 }, { "epoch": 0.7204878613098877, "grad_norm": 0.35337093472480774, "learning_rate": 5.737601749758e-06, "loss": 0.0615, "step": 32697 }, { "epoch": 0.7205098965994039, "grad_norm": 0.5486935973167419, "learning_rate": 5.736759715817303e-06, "loss": 0.0518, "step": 32698 }, { "epoch": 0.7205319318889201, "grad_norm": 0.3387666344642639, "learning_rate": 5.735917729059263e-06, "loss": 0.0679, "step": 32699 }, { "epoch": 0.7205539671784362, "grad_norm": 0.43250808119773865, "learning_rate": 5.735075789488155e-06, "loss": 0.0747, "step": 32700 }, { "epoch": 0.7205760024679524, "grad_norm": 1.399588942527771, "learning_rate": 5.734233897108275e-06, "loss": 0.0518, "step": 32701 }, { "epoch": 0.7205980377574686, "grad_norm": 0.41292449831962585, "learning_rate": 5.733392051923915e-06, "loss": 0.0699, "step": 32702 }, { "epoch": 0.7206200730469847, "grad_norm": 0.5007683038711548, "learning_rate": 5.732550253939355e-06, "loss": 0.0798, "step": 32703 }, { "epoch": 0.7206421083365009, "grad_norm": 0.9423652291297913, "learning_rate": 5.731708503158884e-06, "loss": 0.0876, "step": 32704 }, { "epoch": 0.720664143626017, "grad_norm": 0.47679826617240906, "learning_rate": 5.730866799586792e-06, "loss": 0.0623, "step": 32705 }, { "epoch": 0.7206861789155332, "grad_norm": 0.5337563753128052, "learning_rate": 5.730025143227372e-06, "loss": 0.0591, "step": 32706 }, { "epoch": 0.7207082142050494, "grad_norm": 0.45036476850509644, "learning_rate": 5.729183534084899e-06, "loss": 0.0582, "step": 32707 }, { "epoch": 0.7207302494945655, "grad_norm": 0.8436532616615295, "learning_rate": 5.7283419721636655e-06, "loss": 0.0683, "step": 32708 }, { "epoch": 0.7207522847840817, "grad_norm": 0.5589092969894409, "learning_rate": 5.727500457467964e-06, "loss": 0.0263, "step": 32709 }, { "epoch": 0.7207743200735979, "grad_norm": 0.490504652261734, "learning_rate": 5.7266589900020696e-06, "loss": 0.0641, "step": 32710 }, { "epoch": 0.720796355363114, "grad_norm": 0.5628527998924255, "learning_rate": 5.725817569770273e-06, "loss": 0.0362, "step": 32711 }, { "epoch": 0.7208183906526302, "grad_norm": 0.40979617834091187, "learning_rate": 5.724976196776861e-06, "loss": 0.0427, "step": 32712 }, { "epoch": 0.7208404259421464, "grad_norm": 0.4034874737262726, "learning_rate": 5.724134871026125e-06, "loss": 0.036, "step": 32713 }, { "epoch": 0.7208624612316625, "grad_norm": 0.679668664932251, "learning_rate": 5.723293592522339e-06, "loss": 0.0972, "step": 32714 }, { "epoch": 0.7208844965211787, "grad_norm": 0.9017102122306824, "learning_rate": 5.722452361269792e-06, "loss": 0.0513, "step": 32715 }, { "epoch": 0.7209065318106949, "grad_norm": 0.5310065746307373, "learning_rate": 5.721611177272777e-06, "loss": 0.0628, "step": 32716 }, { "epoch": 0.720928567100211, "grad_norm": 0.6103478074073792, "learning_rate": 5.7207700405355655e-06, "loss": 0.0615, "step": 32717 }, { "epoch": 0.7209506023897272, "grad_norm": 0.6911736130714417, "learning_rate": 5.719928951062453e-06, "loss": 0.0707, "step": 32718 }, { "epoch": 0.7209726376792434, "grad_norm": 0.23339463770389557, "learning_rate": 5.7190879088577096e-06, "loss": 0.0785, "step": 32719 }, { "epoch": 0.7209946729687595, "grad_norm": 0.7332813143730164, "learning_rate": 5.718246913925637e-06, "loss": 0.0464, "step": 32720 }, { "epoch": 0.7210167082582756, "grad_norm": 0.2890316843986511, "learning_rate": 5.717405966270507e-06, "loss": 0.0355, "step": 32721 }, { "epoch": 0.7210387435477917, "grad_norm": 0.6134551763534546, "learning_rate": 5.716565065896609e-06, "loss": 0.0513, "step": 32722 }, { "epoch": 0.7210607788373079, "grad_norm": 0.3543857932090759, "learning_rate": 5.71572421280822e-06, "loss": 0.0556, "step": 32723 }, { "epoch": 0.7210828141268241, "grad_norm": 0.866802453994751, "learning_rate": 5.714883407009625e-06, "loss": 0.071, "step": 32724 }, { "epoch": 0.7211048494163402, "grad_norm": 0.5963408350944519, "learning_rate": 5.714042648505114e-06, "loss": 0.0738, "step": 32725 }, { "epoch": 0.7211268847058564, "grad_norm": 0.39586982131004333, "learning_rate": 5.713201937298958e-06, "loss": 0.08, "step": 32726 }, { "epoch": 0.7211489199953726, "grad_norm": 0.751594066619873, "learning_rate": 5.7123612733954434e-06, "loss": 0.0581, "step": 32727 }, { "epoch": 0.7211709552848887, "grad_norm": 0.526609480381012, "learning_rate": 5.7115206567988535e-06, "loss": 0.0436, "step": 32728 }, { "epoch": 0.7211929905744049, "grad_norm": 0.26326438784599304, "learning_rate": 5.710680087513475e-06, "loss": 0.0408, "step": 32729 }, { "epoch": 0.721215025863921, "grad_norm": 0.6107556223869324, "learning_rate": 5.709839565543581e-06, "loss": 0.0799, "step": 32730 }, { "epoch": 0.7212370611534372, "grad_norm": 0.8705054521560669, "learning_rate": 5.708999090893454e-06, "loss": 0.0532, "step": 32731 }, { "epoch": 0.7212590964429534, "grad_norm": 0.7952500581741333, "learning_rate": 5.708158663567382e-06, "loss": 0.0568, "step": 32732 }, { "epoch": 0.7212811317324695, "grad_norm": 0.6369749903678894, "learning_rate": 5.707318283569636e-06, "loss": 0.0784, "step": 32733 }, { "epoch": 0.7213031670219857, "grad_norm": 0.8336036205291748, "learning_rate": 5.706477950904502e-06, "loss": 0.0899, "step": 32734 }, { "epoch": 0.7213252023115019, "grad_norm": 0.45362815260887146, "learning_rate": 5.705637665576257e-06, "loss": 0.0598, "step": 32735 }, { "epoch": 0.721347237601018, "grad_norm": 0.8180487751960754, "learning_rate": 5.70479742758919e-06, "loss": 0.0693, "step": 32736 }, { "epoch": 0.7213692728905342, "grad_norm": 0.5492898225784302, "learning_rate": 5.703957236947569e-06, "loss": 0.0627, "step": 32737 }, { "epoch": 0.7213913081800504, "grad_norm": 0.6905496716499329, "learning_rate": 5.703117093655678e-06, "loss": 0.0866, "step": 32738 }, { "epoch": 0.7214133434695665, "grad_norm": 0.45514100790023804, "learning_rate": 5.702276997717803e-06, "loss": 0.0627, "step": 32739 }, { "epoch": 0.7214353787590827, "grad_norm": 0.7822771072387695, "learning_rate": 5.7014369491382115e-06, "loss": 0.0885, "step": 32740 }, { "epoch": 0.7214574140485989, "grad_norm": 0.49779176712036133, "learning_rate": 5.700596947921194e-06, "loss": 0.063, "step": 32741 }, { "epoch": 0.721479449338115, "grad_norm": 0.4833725690841675, "learning_rate": 5.699756994071012e-06, "loss": 0.0604, "step": 32742 }, { "epoch": 0.7215014846276312, "grad_norm": 0.8755772113800049, "learning_rate": 5.698917087591965e-06, "loss": 0.0742, "step": 32743 }, { "epoch": 0.7215235199171474, "grad_norm": 0.5514485239982605, "learning_rate": 5.698077228488317e-06, "loss": 0.0591, "step": 32744 }, { "epoch": 0.7215455552066635, "grad_norm": 0.4684208333492279, "learning_rate": 5.697237416764352e-06, "loss": 0.0528, "step": 32745 }, { "epoch": 0.7215675904961796, "grad_norm": 0.7150427103042603, "learning_rate": 5.696397652424338e-06, "loss": 0.0561, "step": 32746 }, { "epoch": 0.7215896257856957, "grad_norm": 0.7104083895683289, "learning_rate": 5.695557935472569e-06, "loss": 0.065, "step": 32747 }, { "epoch": 0.7216116610752119, "grad_norm": 0.5746617317199707, "learning_rate": 5.694718265913308e-06, "loss": 0.0468, "step": 32748 }, { "epoch": 0.7216336963647281, "grad_norm": 0.3896122872829437, "learning_rate": 5.693878643750839e-06, "loss": 0.0536, "step": 32749 }, { "epoch": 0.7216557316542442, "grad_norm": 0.4078862965106964, "learning_rate": 5.693039068989435e-06, "loss": 0.0321, "step": 32750 }, { "epoch": 0.7216777669437604, "grad_norm": 0.46035048365592957, "learning_rate": 5.6921995416333725e-06, "loss": 0.0428, "step": 32751 }, { "epoch": 0.7216998022332766, "grad_norm": 0.6830947399139404, "learning_rate": 5.691360061686934e-06, "loss": 0.0513, "step": 32752 }, { "epoch": 0.7217218375227927, "grad_norm": 0.8894323706626892, "learning_rate": 5.690520629154386e-06, "loss": 0.0782, "step": 32753 }, { "epoch": 0.7217438728123089, "grad_norm": 0.72303307056427, "learning_rate": 5.689681244040008e-06, "loss": 0.0603, "step": 32754 }, { "epoch": 0.7217659081018251, "grad_norm": 0.2045939713716507, "learning_rate": 5.688841906348075e-06, "loss": 0.0444, "step": 32755 }, { "epoch": 0.7217879433913412, "grad_norm": 0.8231521248817444, "learning_rate": 5.68800261608287e-06, "loss": 0.057, "step": 32756 }, { "epoch": 0.7218099786808574, "grad_norm": 0.6817630529403687, "learning_rate": 5.687163373248656e-06, "loss": 0.05, "step": 32757 }, { "epoch": 0.7218320139703736, "grad_norm": 0.4906069040298462, "learning_rate": 5.686324177849711e-06, "loss": 0.0751, "step": 32758 }, { "epoch": 0.7218540492598897, "grad_norm": 0.624506413936615, "learning_rate": 5.685485029890318e-06, "loss": 0.078, "step": 32759 }, { "epoch": 0.7218760845494059, "grad_norm": 0.7099190950393677, "learning_rate": 5.684645929374739e-06, "loss": 0.0608, "step": 32760 }, { "epoch": 0.721898119838922, "grad_norm": 0.3919784426689148, "learning_rate": 5.683806876307254e-06, "loss": 0.0591, "step": 32761 }, { "epoch": 0.7219201551284382, "grad_norm": 0.6535589098930359, "learning_rate": 5.682967870692136e-06, "loss": 0.0613, "step": 32762 }, { "epoch": 0.7219421904179544, "grad_norm": 0.5208935737609863, "learning_rate": 5.682128912533665e-06, "loss": 0.0592, "step": 32763 }, { "epoch": 0.7219642257074705, "grad_norm": 0.6747545003890991, "learning_rate": 5.681290001836101e-06, "loss": 0.0614, "step": 32764 }, { "epoch": 0.7219862609969867, "grad_norm": 0.4735548794269562, "learning_rate": 5.680451138603725e-06, "loss": 0.0447, "step": 32765 }, { "epoch": 0.7220082962865029, "grad_norm": 0.6399965286254883, "learning_rate": 5.679612322840814e-06, "loss": 0.0509, "step": 32766 }, { "epoch": 0.722030331576019, "grad_norm": 1.044928789138794, "learning_rate": 5.678773554551632e-06, "loss": 0.0929, "step": 32767 }, { "epoch": 0.7220523668655352, "grad_norm": 0.5525678992271423, "learning_rate": 5.6779348337404575e-06, "loss": 0.0553, "step": 32768 }, { "epoch": 0.7220744021550514, "grad_norm": 0.4784049987792969, "learning_rate": 5.6770961604115495e-06, "loss": 0.0707, "step": 32769 }, { "epoch": 0.7220964374445675, "grad_norm": 0.45941320061683655, "learning_rate": 5.6762575345692016e-06, "loss": 0.0844, "step": 32770 }, { "epoch": 0.7221184727340836, "grad_norm": 0.6324614882469177, "learning_rate": 5.675418956217668e-06, "loss": 0.0883, "step": 32771 }, { "epoch": 0.7221405080235997, "grad_norm": 0.9998370409011841, "learning_rate": 5.674580425361232e-06, "loss": 0.0472, "step": 32772 }, { "epoch": 0.7221625433131159, "grad_norm": 0.6835512518882751, "learning_rate": 5.6737419420041525e-06, "loss": 0.0425, "step": 32773 }, { "epoch": 0.7221845786026321, "grad_norm": 0.4685395061969757, "learning_rate": 5.672903506150708e-06, "loss": 0.0549, "step": 32774 }, { "epoch": 0.7222066138921482, "grad_norm": 0.49947217106819153, "learning_rate": 5.67206511780517e-06, "loss": 0.0838, "step": 32775 }, { "epoch": 0.7222286491816644, "grad_norm": 0.4098436236381531, "learning_rate": 5.671226776971798e-06, "loss": 0.0548, "step": 32776 }, { "epoch": 0.7222506844711806, "grad_norm": 0.7567852735519409, "learning_rate": 5.67038848365488e-06, "loss": 0.082, "step": 32777 }, { "epoch": 0.7222727197606967, "grad_norm": 0.43232834339141846, "learning_rate": 5.669550237858672e-06, "loss": 0.0601, "step": 32778 }, { "epoch": 0.7222947550502129, "grad_norm": 0.24237966537475586, "learning_rate": 5.668712039587452e-06, "loss": 0.0613, "step": 32779 }, { "epoch": 0.7223167903397291, "grad_norm": 0.7617054581642151, "learning_rate": 5.667873888845481e-06, "loss": 0.0563, "step": 32780 }, { "epoch": 0.7223388256292452, "grad_norm": 0.39900892972946167, "learning_rate": 5.667035785637032e-06, "loss": 0.0428, "step": 32781 }, { "epoch": 0.7223608609187614, "grad_norm": 0.3805603086948395, "learning_rate": 5.6661977299663805e-06, "loss": 0.0793, "step": 32782 }, { "epoch": 0.7223828962082776, "grad_norm": 0.5371000170707703, "learning_rate": 5.665359721837783e-06, "loss": 0.0759, "step": 32783 }, { "epoch": 0.7224049314977937, "grad_norm": 0.7210797667503357, "learning_rate": 5.664521761255515e-06, "loss": 0.0683, "step": 32784 }, { "epoch": 0.7224269667873099, "grad_norm": 0.8029316067695618, "learning_rate": 5.663683848223844e-06, "loss": 0.0786, "step": 32785 }, { "epoch": 0.722449002076826, "grad_norm": 0.6582812666893005, "learning_rate": 5.662845982747041e-06, "loss": 0.0772, "step": 32786 }, { "epoch": 0.7224710373663422, "grad_norm": 0.27497315406799316, "learning_rate": 5.662008164829367e-06, "loss": 0.0592, "step": 32787 }, { "epoch": 0.7224930726558584, "grad_norm": 0.6436781883239746, "learning_rate": 5.661170394475092e-06, "loss": 0.0842, "step": 32788 }, { "epoch": 0.7225151079453745, "grad_norm": 0.561592698097229, "learning_rate": 5.66033267168849e-06, "loss": 0.0569, "step": 32789 }, { "epoch": 0.7225371432348907, "grad_norm": 0.8710971474647522, "learning_rate": 5.6594949964738165e-06, "loss": 0.0878, "step": 32790 }, { "epoch": 0.7225591785244069, "grad_norm": 0.7160682082176208, "learning_rate": 5.658657368835349e-06, "loss": 0.0725, "step": 32791 }, { "epoch": 0.722581213813923, "grad_norm": 0.5880023241043091, "learning_rate": 5.657819788777339e-06, "loss": 0.0501, "step": 32792 }, { "epoch": 0.7226032491034392, "grad_norm": 0.5284339189529419, "learning_rate": 5.656982256304071e-06, "loss": 0.0492, "step": 32793 }, { "epoch": 0.7226252843929554, "grad_norm": 0.3988763689994812, "learning_rate": 5.656144771419799e-06, "loss": 0.0755, "step": 32794 }, { "epoch": 0.7226473196824714, "grad_norm": 0.597751259803772, "learning_rate": 5.655307334128796e-06, "loss": 0.1003, "step": 32795 }, { "epoch": 0.7226693549719876, "grad_norm": 0.7196089029312134, "learning_rate": 5.65446994443532e-06, "loss": 0.0453, "step": 32796 }, { "epoch": 0.7226913902615038, "grad_norm": 0.7756450772285461, "learning_rate": 5.6536326023436394e-06, "loss": 0.0529, "step": 32797 }, { "epoch": 0.7227134255510199, "grad_norm": 0.602691650390625, "learning_rate": 5.652795307858026e-06, "loss": 0.0671, "step": 32798 }, { "epoch": 0.7227354608405361, "grad_norm": 0.5662665367126465, "learning_rate": 5.651958060982726e-06, "loss": 0.067, "step": 32799 }, { "epoch": 0.7227574961300522, "grad_norm": 0.6041393280029297, "learning_rate": 5.651120861722028e-06, "loss": 0.0553, "step": 32800 }, { "epoch": 0.7227795314195684, "grad_norm": 0.6663466095924377, "learning_rate": 5.65028371008018e-06, "loss": 0.0743, "step": 32801 }, { "epoch": 0.7228015667090846, "grad_norm": 0.38874828815460205, "learning_rate": 5.649446606061455e-06, "loss": 0.0707, "step": 32802 }, { "epoch": 0.7228236019986007, "grad_norm": 0.809917151927948, "learning_rate": 5.6486095496701075e-06, "loss": 0.0724, "step": 32803 }, { "epoch": 0.7228456372881169, "grad_norm": 0.8281177878379822, "learning_rate": 5.647772540910407e-06, "loss": 0.0806, "step": 32804 }, { "epoch": 0.7228676725776331, "grad_norm": 0.6299722790718079, "learning_rate": 5.646935579786622e-06, "loss": 0.0503, "step": 32805 }, { "epoch": 0.7228897078671492, "grad_norm": 0.7004687786102295, "learning_rate": 5.646098666303002e-06, "loss": 0.045, "step": 32806 }, { "epoch": 0.7229117431566654, "grad_norm": 0.39890190958976746, "learning_rate": 5.645261800463819e-06, "loss": 0.0495, "step": 32807 }, { "epoch": 0.7229337784461816, "grad_norm": 0.7844890356063843, "learning_rate": 5.644424982273334e-06, "loss": 0.0646, "step": 32808 }, { "epoch": 0.7229558137356977, "grad_norm": 0.9919145703315735, "learning_rate": 5.643588211735814e-06, "loss": 0.0866, "step": 32809 }, { "epoch": 0.7229778490252139, "grad_norm": 0.5875716805458069, "learning_rate": 5.642751488855512e-06, "loss": 0.0814, "step": 32810 }, { "epoch": 0.7229998843147301, "grad_norm": 1.0170421600341797, "learning_rate": 5.641914813636695e-06, "loss": 0.0726, "step": 32811 }, { "epoch": 0.7230219196042462, "grad_norm": 0.7055527567863464, "learning_rate": 5.641078186083627e-06, "loss": 0.043, "step": 32812 }, { "epoch": 0.7230439548937624, "grad_norm": 0.5759168267250061, "learning_rate": 5.640241606200563e-06, "loss": 0.0398, "step": 32813 }, { "epoch": 0.7230659901832786, "grad_norm": 0.5638960599899292, "learning_rate": 5.639405073991773e-06, "loss": 0.0628, "step": 32814 }, { "epoch": 0.7230880254727947, "grad_norm": 0.42637184262275696, "learning_rate": 5.638568589461501e-06, "loss": 0.0677, "step": 32815 }, { "epoch": 0.7231100607623109, "grad_norm": 0.6860341429710388, "learning_rate": 5.637732152614029e-06, "loss": 0.0574, "step": 32816 }, { "epoch": 0.723132096051827, "grad_norm": 0.5135979056358337, "learning_rate": 5.636895763453604e-06, "loss": 0.0393, "step": 32817 }, { "epoch": 0.7231541313413432, "grad_norm": 0.5149953365325928, "learning_rate": 5.636059421984496e-06, "loss": 0.0768, "step": 32818 }, { "epoch": 0.7231761666308594, "grad_norm": 0.7450852990150452, "learning_rate": 5.635223128210952e-06, "loss": 0.0553, "step": 32819 }, { "epoch": 0.7231982019203754, "grad_norm": 0.7140519618988037, "learning_rate": 5.634386882137237e-06, "loss": 0.0789, "step": 32820 }, { "epoch": 0.7232202372098916, "grad_norm": 0.586144745349884, "learning_rate": 5.633550683767619e-06, "loss": 0.0833, "step": 32821 }, { "epoch": 0.7232422724994078, "grad_norm": 0.5661383867263794, "learning_rate": 5.632714533106339e-06, "loss": 0.0699, "step": 32822 }, { "epoch": 0.7232643077889239, "grad_norm": 0.4332941174507141, "learning_rate": 5.631878430157678e-06, "loss": 0.0546, "step": 32823 }, { "epoch": 0.7232863430784401, "grad_norm": 1.2637052536010742, "learning_rate": 5.631042374925878e-06, "loss": 0.1154, "step": 32824 }, { "epoch": 0.7233083783679563, "grad_norm": 0.7354233860969543, "learning_rate": 5.630206367415208e-06, "loss": 0.062, "step": 32825 }, { "epoch": 0.7233304136574724, "grad_norm": 0.3711676299571991, "learning_rate": 5.629370407629916e-06, "loss": 0.0735, "step": 32826 }, { "epoch": 0.7233524489469886, "grad_norm": 0.576485812664032, "learning_rate": 5.628534495574267e-06, "loss": 0.0428, "step": 32827 }, { "epoch": 0.7233744842365047, "grad_norm": 0.4601288139820099, "learning_rate": 5.627698631252523e-06, "loss": 0.0871, "step": 32828 }, { "epoch": 0.7233965195260209, "grad_norm": 0.5299569964408875, "learning_rate": 5.6268628146689285e-06, "loss": 0.0551, "step": 32829 }, { "epoch": 0.7234185548155371, "grad_norm": 0.8044765591621399, "learning_rate": 5.6260270458277475e-06, "loss": 0.0606, "step": 32830 }, { "epoch": 0.7234405901050532, "grad_norm": 0.5131374597549438, "learning_rate": 5.6251913247332385e-06, "loss": 0.0445, "step": 32831 }, { "epoch": 0.7234626253945694, "grad_norm": 0.6537173986434937, "learning_rate": 5.624355651389662e-06, "loss": 0.0883, "step": 32832 }, { "epoch": 0.7234846606840856, "grad_norm": 0.7580084800720215, "learning_rate": 5.623520025801266e-06, "loss": 0.075, "step": 32833 }, { "epoch": 0.7235066959736017, "grad_norm": 0.881279706954956, "learning_rate": 5.6226844479723095e-06, "loss": 0.0503, "step": 32834 }, { "epoch": 0.7235287312631179, "grad_norm": 0.5961933135986328, "learning_rate": 5.621848917907054e-06, "loss": 0.0476, "step": 32835 }, { "epoch": 0.7235507665526341, "grad_norm": 0.8222039341926575, "learning_rate": 5.621013435609747e-06, "loss": 0.0748, "step": 32836 }, { "epoch": 0.7235728018421502, "grad_norm": 0.7098660469055176, "learning_rate": 5.620178001084653e-06, "loss": 0.0564, "step": 32837 }, { "epoch": 0.7235948371316664, "grad_norm": 0.7686583995819092, "learning_rate": 5.619342614336012e-06, "loss": 0.0699, "step": 32838 }, { "epoch": 0.7236168724211826, "grad_norm": 0.519612193107605, "learning_rate": 5.618507275368099e-06, "loss": 0.0591, "step": 32839 }, { "epoch": 0.7236389077106987, "grad_norm": 0.5887577533721924, "learning_rate": 5.617671984185154e-06, "loss": 0.0859, "step": 32840 }, { "epoch": 0.7236609430002149, "grad_norm": 1.129065752029419, "learning_rate": 5.616836740791444e-06, "loss": 0.0801, "step": 32841 }, { "epoch": 0.723682978289731, "grad_norm": 0.6597026586532593, "learning_rate": 5.616001545191209e-06, "loss": 0.07, "step": 32842 }, { "epoch": 0.7237050135792472, "grad_norm": 0.6252852082252502, "learning_rate": 5.615166397388711e-06, "loss": 0.0806, "step": 32843 }, { "epoch": 0.7237270488687634, "grad_norm": 0.7100567817687988, "learning_rate": 5.614331297388208e-06, "loss": 0.0584, "step": 32844 }, { "epoch": 0.7237490841582794, "grad_norm": 0.3060145080089569, "learning_rate": 5.613496245193937e-06, "loss": 0.0736, "step": 32845 }, { "epoch": 0.7237711194477956, "grad_norm": 0.6582393050193787, "learning_rate": 5.612661240810175e-06, "loss": 0.0871, "step": 32846 }, { "epoch": 0.7237931547373118, "grad_norm": 0.6288372278213501, "learning_rate": 5.611826284241159e-06, "loss": 0.074, "step": 32847 }, { "epoch": 0.7238151900268279, "grad_norm": 0.4621100127696991, "learning_rate": 5.610991375491149e-06, "loss": 0.0751, "step": 32848 }, { "epoch": 0.7238372253163441, "grad_norm": 0.8968663215637207, "learning_rate": 5.610156514564385e-06, "loss": 0.0613, "step": 32849 }, { "epoch": 0.7238592606058603, "grad_norm": 0.3938048779964447, "learning_rate": 5.60932170146514e-06, "loss": 0.0695, "step": 32850 }, { "epoch": 0.7238812958953764, "grad_norm": 0.7781201601028442, "learning_rate": 5.608486936197656e-06, "loss": 0.072, "step": 32851 }, { "epoch": 0.7239033311848926, "grad_norm": 0.4548987150192261, "learning_rate": 5.6076522187661775e-06, "loss": 0.0591, "step": 32852 }, { "epoch": 0.7239253664744087, "grad_norm": 0.518211305141449, "learning_rate": 5.6068175491749626e-06, "loss": 0.0875, "step": 32853 }, { "epoch": 0.7239474017639249, "grad_norm": 0.3811931610107422, "learning_rate": 5.605982927428264e-06, "loss": 0.037, "step": 32854 }, { "epoch": 0.7239694370534411, "grad_norm": 0.6730385422706604, "learning_rate": 5.605148353530336e-06, "loss": 0.0768, "step": 32855 }, { "epoch": 0.7239914723429572, "grad_norm": 0.5790318846702576, "learning_rate": 5.604313827485421e-06, "loss": 0.0748, "step": 32856 }, { "epoch": 0.7240135076324734, "grad_norm": 0.44665586948394775, "learning_rate": 5.603479349297775e-06, "loss": 0.0536, "step": 32857 }, { "epoch": 0.7240355429219896, "grad_norm": 0.5804625153541565, "learning_rate": 5.602644918971646e-06, "loss": 0.0738, "step": 32858 }, { "epoch": 0.7240575782115057, "grad_norm": 0.5731931328773499, "learning_rate": 5.6018105365112915e-06, "loss": 0.0655, "step": 32859 }, { "epoch": 0.7240796135010219, "grad_norm": 0.9010996222496033, "learning_rate": 5.600976201920951e-06, "loss": 0.0612, "step": 32860 }, { "epoch": 0.7241016487905381, "grad_norm": 0.5926405191421509, "learning_rate": 5.6001419152048774e-06, "loss": 0.0678, "step": 32861 }, { "epoch": 0.7241236840800542, "grad_norm": 0.7840623259544373, "learning_rate": 5.599307676367327e-06, "loss": 0.0839, "step": 32862 }, { "epoch": 0.7241457193695704, "grad_norm": 0.7888352870941162, "learning_rate": 5.598473485412539e-06, "loss": 0.0777, "step": 32863 }, { "epoch": 0.7241677546590866, "grad_norm": 0.7070381045341492, "learning_rate": 5.597639342344772e-06, "loss": 0.0845, "step": 32864 }, { "epoch": 0.7241897899486027, "grad_norm": 0.2852453589439392, "learning_rate": 5.59680524716826e-06, "loss": 0.0588, "step": 32865 }, { "epoch": 0.7242118252381189, "grad_norm": 0.45504534244537354, "learning_rate": 5.59597119988727e-06, "loss": 0.0732, "step": 32866 }, { "epoch": 0.724233860527635, "grad_norm": 0.3855953514575958, "learning_rate": 5.595137200506038e-06, "loss": 0.0477, "step": 32867 }, { "epoch": 0.7242558958171512, "grad_norm": 0.5205078125, "learning_rate": 5.594303249028816e-06, "loss": 0.0925, "step": 32868 }, { "epoch": 0.7242779311066674, "grad_norm": 0.8044105768203735, "learning_rate": 5.593469345459856e-06, "loss": 0.0406, "step": 32869 }, { "epoch": 0.7242999663961834, "grad_norm": 0.8074728846549988, "learning_rate": 5.592635489803394e-06, "loss": 0.0566, "step": 32870 }, { "epoch": 0.7243220016856996, "grad_norm": 0.2670273780822754, "learning_rate": 5.5918016820636905e-06, "loss": 0.098, "step": 32871 }, { "epoch": 0.7243440369752158, "grad_norm": 0.6325774788856506, "learning_rate": 5.590967922244977e-06, "loss": 0.058, "step": 32872 }, { "epoch": 0.7243660722647319, "grad_norm": 0.6800546646118164, "learning_rate": 5.590134210351518e-06, "loss": 0.0805, "step": 32873 }, { "epoch": 0.7243881075542481, "grad_norm": 0.8382187485694885, "learning_rate": 5.5893005463875464e-06, "loss": 0.0683, "step": 32874 }, { "epoch": 0.7244101428437643, "grad_norm": 0.7654580473899841, "learning_rate": 5.588466930357318e-06, "loss": 0.0629, "step": 32875 }, { "epoch": 0.7244321781332804, "grad_norm": 0.5221835374832153, "learning_rate": 5.587633362265071e-06, "loss": 0.073, "step": 32876 }, { "epoch": 0.7244542134227966, "grad_norm": 0.49122318625450134, "learning_rate": 5.586799842115055e-06, "loss": 0.0918, "step": 32877 }, { "epoch": 0.7244762487123128, "grad_norm": 0.5975300073623657, "learning_rate": 5.585966369911519e-06, "loss": 0.0838, "step": 32878 }, { "epoch": 0.7244982840018289, "grad_norm": 0.3750893473625183, "learning_rate": 5.585132945658698e-06, "loss": 0.0568, "step": 32879 }, { "epoch": 0.7245203192913451, "grad_norm": 0.39917293190956116, "learning_rate": 5.584299569360846e-06, "loss": 0.0472, "step": 32880 }, { "epoch": 0.7245423545808612, "grad_norm": 0.6052538156509399, "learning_rate": 5.583466241022205e-06, "loss": 0.0623, "step": 32881 }, { "epoch": 0.7245643898703774, "grad_norm": 0.6179911494255066, "learning_rate": 5.582632960647024e-06, "loss": 0.0524, "step": 32882 }, { "epoch": 0.7245864251598936, "grad_norm": 0.5936802625656128, "learning_rate": 5.581799728239538e-06, "loss": 0.0527, "step": 32883 }, { "epoch": 0.7246084604494097, "grad_norm": 0.5515012145042419, "learning_rate": 5.5809665438039985e-06, "loss": 0.0427, "step": 32884 }, { "epoch": 0.7246304957389259, "grad_norm": 0.5357697010040283, "learning_rate": 5.58013340734465e-06, "loss": 0.0524, "step": 32885 }, { "epoch": 0.7246525310284421, "grad_norm": 0.4980470538139343, "learning_rate": 5.579300318865729e-06, "loss": 0.0714, "step": 32886 }, { "epoch": 0.7246745663179582, "grad_norm": 1.034812092781067, "learning_rate": 5.5784672783714885e-06, "loss": 0.097, "step": 32887 }, { "epoch": 0.7246966016074744, "grad_norm": 0.8032640218734741, "learning_rate": 5.577634285866158e-06, "loss": 0.0677, "step": 32888 }, { "epoch": 0.7247186368969906, "grad_norm": 0.41897067427635193, "learning_rate": 5.576801341353997e-06, "loss": 0.0491, "step": 32889 }, { "epoch": 0.7247406721865067, "grad_norm": 0.5526645183563232, "learning_rate": 5.575968444839234e-06, "loss": 0.0488, "step": 32890 }, { "epoch": 0.7247627074760229, "grad_norm": 0.4889410138130188, "learning_rate": 5.57513559632612e-06, "loss": 0.0458, "step": 32891 }, { "epoch": 0.7247847427655391, "grad_norm": 0.3086574971675873, "learning_rate": 5.574302795818897e-06, "loss": 0.0508, "step": 32892 }, { "epoch": 0.7248067780550552, "grad_norm": 0.6286623477935791, "learning_rate": 5.5734700433218e-06, "loss": 0.0492, "step": 32893 }, { "epoch": 0.7248288133445713, "grad_norm": 0.8673102259635925, "learning_rate": 5.5726373388390805e-06, "loss": 0.0568, "step": 32894 }, { "epoch": 0.7248508486340874, "grad_norm": 0.29387256503105164, "learning_rate": 5.571804682374964e-06, "loss": 0.0473, "step": 32895 }, { "epoch": 0.7248728839236036, "grad_norm": 0.5905231237411499, "learning_rate": 5.570972073933712e-06, "loss": 0.0543, "step": 32896 }, { "epoch": 0.7248949192131198, "grad_norm": 0.8125423789024353, "learning_rate": 5.57013951351955e-06, "loss": 0.0839, "step": 32897 }, { "epoch": 0.7249169545026359, "grad_norm": 0.5792007446289062, "learning_rate": 5.56930700113673e-06, "loss": 0.0576, "step": 32898 }, { "epoch": 0.7249389897921521, "grad_norm": 0.809673547744751, "learning_rate": 5.568474536789481e-06, "loss": 0.0615, "step": 32899 }, { "epoch": 0.7249610250816683, "grad_norm": 0.5047701597213745, "learning_rate": 5.567642120482049e-06, "loss": 0.0534, "step": 32900 }, { "epoch": 0.7249830603711844, "grad_norm": 0.99505215883255, "learning_rate": 5.56680975221868e-06, "loss": 0.0728, "step": 32901 }, { "epoch": 0.7250050956607006, "grad_norm": 0.2866145372390747, "learning_rate": 5.5659774320036e-06, "loss": 0.0453, "step": 32902 }, { "epoch": 0.7250271309502168, "grad_norm": 0.45752328634262085, "learning_rate": 5.5651451598410574e-06, "loss": 0.023, "step": 32903 }, { "epoch": 0.7250491662397329, "grad_norm": 0.5090444087982178, "learning_rate": 5.56431293573529e-06, "loss": 0.0677, "step": 32904 }, { "epoch": 0.7250712015292491, "grad_norm": 0.6541065573692322, "learning_rate": 5.5634807596905414e-06, "loss": 0.0396, "step": 32905 }, { "epoch": 0.7250932368187653, "grad_norm": 0.6165850758552551, "learning_rate": 5.562648631711039e-06, "loss": 0.0853, "step": 32906 }, { "epoch": 0.7251152721082814, "grad_norm": 1.2200069427490234, "learning_rate": 5.561816551801029e-06, "loss": 0.05, "step": 32907 }, { "epoch": 0.7251373073977976, "grad_norm": 0.8598611354827881, "learning_rate": 5.560984519964754e-06, "loss": 0.0781, "step": 32908 }, { "epoch": 0.7251593426873137, "grad_norm": 0.6344372630119324, "learning_rate": 5.560152536206442e-06, "loss": 0.0634, "step": 32909 }, { "epoch": 0.7251813779768299, "grad_norm": 0.6427833437919617, "learning_rate": 5.559320600530333e-06, "loss": 0.0447, "step": 32910 }, { "epoch": 0.7252034132663461, "grad_norm": 0.8316037654876709, "learning_rate": 5.558488712940669e-06, "loss": 0.1046, "step": 32911 }, { "epoch": 0.7252254485558622, "grad_norm": 0.3722883462905884, "learning_rate": 5.557656873441688e-06, "loss": 0.0636, "step": 32912 }, { "epoch": 0.7252474838453784, "grad_norm": 0.6056919097900391, "learning_rate": 5.55682508203762e-06, "loss": 0.0797, "step": 32913 }, { "epoch": 0.7252695191348946, "grad_norm": 0.6587696075439453, "learning_rate": 5.555993338732705e-06, "loss": 0.0473, "step": 32914 }, { "epoch": 0.7252915544244107, "grad_norm": 0.25781702995300293, "learning_rate": 5.555161643531187e-06, "loss": 0.0561, "step": 32915 }, { "epoch": 0.7253135897139269, "grad_norm": 0.6480947732925415, "learning_rate": 5.554329996437287e-06, "loss": 0.0522, "step": 32916 }, { "epoch": 0.7253356250034431, "grad_norm": 0.5012407898902893, "learning_rate": 5.553498397455258e-06, "loss": 0.0453, "step": 32917 }, { "epoch": 0.7253576602929592, "grad_norm": 0.6679484248161316, "learning_rate": 5.552666846589316e-06, "loss": 0.0738, "step": 32918 }, { "epoch": 0.7253796955824753, "grad_norm": 1.14540433883667, "learning_rate": 5.551835343843717e-06, "loss": 0.0822, "step": 32919 }, { "epoch": 0.7254017308719914, "grad_norm": 0.8511123657226562, "learning_rate": 5.551003889222682e-06, "loss": 0.075, "step": 32920 }, { "epoch": 0.7254237661615076, "grad_norm": 0.6265912055969238, "learning_rate": 5.550172482730458e-06, "loss": 0.0602, "step": 32921 }, { "epoch": 0.7254458014510238, "grad_norm": 0.8397718667984009, "learning_rate": 5.549341124371267e-06, "loss": 0.0839, "step": 32922 }, { "epoch": 0.7254678367405399, "grad_norm": 0.3747606575489044, "learning_rate": 5.5485098141493486e-06, "loss": 0.0779, "step": 32923 }, { "epoch": 0.7254898720300561, "grad_norm": 0.5393986701965332, "learning_rate": 5.547678552068944e-06, "loss": 0.0557, "step": 32924 }, { "epoch": 0.7255119073195723, "grad_norm": 0.9298136234283447, "learning_rate": 5.546847338134276e-06, "loss": 0.0805, "step": 32925 }, { "epoch": 0.7255339426090884, "grad_norm": 0.812065064907074, "learning_rate": 5.546016172349585e-06, "loss": 0.0581, "step": 32926 }, { "epoch": 0.7255559778986046, "grad_norm": 0.885857343673706, "learning_rate": 5.5451850547191005e-06, "loss": 0.0644, "step": 32927 }, { "epoch": 0.7255780131881208, "grad_norm": 0.7097352147102356, "learning_rate": 5.544353985247065e-06, "loss": 0.0565, "step": 32928 }, { "epoch": 0.7256000484776369, "grad_norm": 0.3896319270133972, "learning_rate": 5.5435229639377e-06, "loss": 0.0581, "step": 32929 }, { "epoch": 0.7256220837671531, "grad_norm": 0.4680370092391968, "learning_rate": 5.542691990795245e-06, "loss": 0.0702, "step": 32930 }, { "epoch": 0.7256441190566693, "grad_norm": 0.6200881600379944, "learning_rate": 5.541861065823935e-06, "loss": 0.0561, "step": 32931 }, { "epoch": 0.7256661543461854, "grad_norm": 0.6218874454498291, "learning_rate": 5.541030189027994e-06, "loss": 0.0562, "step": 32932 }, { "epoch": 0.7256881896357016, "grad_norm": 0.5178576111793518, "learning_rate": 5.5401993604116565e-06, "loss": 0.0263, "step": 32933 }, { "epoch": 0.7257102249252178, "grad_norm": 0.7107000946998596, "learning_rate": 5.5393685799791586e-06, "loss": 0.0738, "step": 32934 }, { "epoch": 0.7257322602147339, "grad_norm": 0.4486347436904907, "learning_rate": 5.538537847734734e-06, "loss": 0.0564, "step": 32935 }, { "epoch": 0.7257542955042501, "grad_norm": 0.39015647768974304, "learning_rate": 5.5377071636826046e-06, "loss": 0.0501, "step": 32936 }, { "epoch": 0.7257763307937662, "grad_norm": 0.7650524377822876, "learning_rate": 5.536876527827007e-06, "loss": 0.072, "step": 32937 }, { "epoch": 0.7257983660832824, "grad_norm": 0.37969771027565, "learning_rate": 5.536045940172177e-06, "loss": 0.0516, "step": 32938 }, { "epoch": 0.7258204013727986, "grad_norm": 0.8127231001853943, "learning_rate": 5.5352154007223335e-06, "loss": 0.0504, "step": 32939 }, { "epoch": 0.7258424366623147, "grad_norm": 0.705990195274353, "learning_rate": 5.53438490948172e-06, "loss": 0.0912, "step": 32940 }, { "epoch": 0.7258644719518309, "grad_norm": 0.7250486612319946, "learning_rate": 5.533554466454549e-06, "loss": 0.0637, "step": 32941 }, { "epoch": 0.7258865072413471, "grad_norm": 0.7038873434066772, "learning_rate": 5.5327240716450725e-06, "loss": 0.0705, "step": 32942 }, { "epoch": 0.7259085425308632, "grad_norm": 0.46255606412887573, "learning_rate": 5.531893725057503e-06, "loss": 0.0535, "step": 32943 }, { "epoch": 0.7259305778203793, "grad_norm": 0.4580189287662506, "learning_rate": 5.531063426696081e-06, "loss": 0.056, "step": 32944 }, { "epoch": 0.7259526131098955, "grad_norm": 0.38915643095970154, "learning_rate": 5.530233176565027e-06, "loss": 0.0656, "step": 32945 }, { "epoch": 0.7259746483994116, "grad_norm": 0.4785204827785492, "learning_rate": 5.5294029746685715e-06, "loss": 0.0413, "step": 32946 }, { "epoch": 0.7259966836889278, "grad_norm": 0.3958934247493744, "learning_rate": 5.5285728210109524e-06, "loss": 0.0327, "step": 32947 }, { "epoch": 0.7260187189784439, "grad_norm": 0.3655741512775421, "learning_rate": 5.527742715596384e-06, "loss": 0.0681, "step": 32948 }, { "epoch": 0.7260407542679601, "grad_norm": 0.406072735786438, "learning_rate": 5.526912658429102e-06, "loss": 0.0787, "step": 32949 }, { "epoch": 0.7260627895574763, "grad_norm": 0.44871437549591064, "learning_rate": 5.5260826495133345e-06, "loss": 0.0807, "step": 32950 }, { "epoch": 0.7260848248469924, "grad_norm": 0.5461111664772034, "learning_rate": 5.525252688853313e-06, "loss": 0.0646, "step": 32951 }, { "epoch": 0.7261068601365086, "grad_norm": 0.28689196705818176, "learning_rate": 5.524422776453254e-06, "loss": 0.0526, "step": 32952 }, { "epoch": 0.7261288954260248, "grad_norm": 0.5524795055389404, "learning_rate": 5.523592912317393e-06, "loss": 0.0621, "step": 32953 }, { "epoch": 0.7261509307155409, "grad_norm": 0.3804614841938019, "learning_rate": 5.522763096449957e-06, "loss": 0.0533, "step": 32954 }, { "epoch": 0.7261729660050571, "grad_norm": 0.6908335089683533, "learning_rate": 5.521933328855168e-06, "loss": 0.0478, "step": 32955 }, { "epoch": 0.7261950012945733, "grad_norm": 0.44108501076698303, "learning_rate": 5.521103609537255e-06, "loss": 0.0612, "step": 32956 }, { "epoch": 0.7262170365840894, "grad_norm": 0.7701371908187866, "learning_rate": 5.520273938500444e-06, "loss": 0.0656, "step": 32957 }, { "epoch": 0.7262390718736056, "grad_norm": 0.771037220954895, "learning_rate": 5.519444315748967e-06, "loss": 0.0612, "step": 32958 }, { "epoch": 0.7262611071631218, "grad_norm": 0.5762956142425537, "learning_rate": 5.518614741287038e-06, "loss": 0.0484, "step": 32959 }, { "epoch": 0.7262831424526379, "grad_norm": 0.4416463077068329, "learning_rate": 5.517785215118888e-06, "loss": 0.056, "step": 32960 }, { "epoch": 0.7263051777421541, "grad_norm": 0.8183837532997131, "learning_rate": 5.5169557372487445e-06, "loss": 0.0604, "step": 32961 }, { "epoch": 0.7263272130316702, "grad_norm": 0.6534127593040466, "learning_rate": 5.5161263076808335e-06, "loss": 0.0549, "step": 32962 }, { "epoch": 0.7263492483211864, "grad_norm": 0.6112404465675354, "learning_rate": 5.515296926419379e-06, "loss": 0.0634, "step": 32963 }, { "epoch": 0.7263712836107026, "grad_norm": 0.5767067670822144, "learning_rate": 5.514467593468592e-06, "loss": 0.0665, "step": 32964 }, { "epoch": 0.7263933189002187, "grad_norm": 0.7225420475006104, "learning_rate": 5.513638308832718e-06, "loss": 0.0822, "step": 32965 }, { "epoch": 0.7264153541897349, "grad_norm": 0.7685237526893616, "learning_rate": 5.512809072515967e-06, "loss": 0.0727, "step": 32966 }, { "epoch": 0.7264373894792511, "grad_norm": 0.579450249671936, "learning_rate": 5.51197988452257e-06, "loss": 0.0562, "step": 32967 }, { "epoch": 0.7264594247687671, "grad_norm": 0.9910711646080017, "learning_rate": 5.511150744856738e-06, "loss": 0.0352, "step": 32968 }, { "epoch": 0.7264814600582833, "grad_norm": 0.5221773386001587, "learning_rate": 5.510321653522714e-06, "loss": 0.0822, "step": 32969 }, { "epoch": 0.7265034953477995, "grad_norm": 0.5620000958442688, "learning_rate": 5.509492610524706e-06, "loss": 0.0757, "step": 32970 }, { "epoch": 0.7265255306373156, "grad_norm": 0.6251093745231628, "learning_rate": 5.508663615866948e-06, "loss": 0.0756, "step": 32971 }, { "epoch": 0.7265475659268318, "grad_norm": 0.36657774448394775, "learning_rate": 5.507834669553648e-06, "loss": 0.0569, "step": 32972 }, { "epoch": 0.726569601216348, "grad_norm": 0.5355701446533203, "learning_rate": 5.507005771589037e-06, "loss": 0.0709, "step": 32973 }, { "epoch": 0.7265916365058641, "grad_norm": 0.9028723239898682, "learning_rate": 5.506176921977342e-06, "loss": 0.0729, "step": 32974 }, { "epoch": 0.7266136717953803, "grad_norm": 0.4812251925468445, "learning_rate": 5.505348120722772e-06, "loss": 0.0815, "step": 32975 }, { "epoch": 0.7266357070848964, "grad_norm": 1.287284016609192, "learning_rate": 5.504519367829557e-06, "loss": 0.0766, "step": 32976 }, { "epoch": 0.7266577423744126, "grad_norm": 0.9169391393661499, "learning_rate": 5.5036906633019165e-06, "loss": 0.0883, "step": 32977 }, { "epoch": 0.7266797776639288, "grad_norm": 0.7831847071647644, "learning_rate": 5.502862007144076e-06, "loss": 0.0417, "step": 32978 }, { "epoch": 0.7267018129534449, "grad_norm": 0.4026831090450287, "learning_rate": 5.502033399360248e-06, "loss": 0.0454, "step": 32979 }, { "epoch": 0.7267238482429611, "grad_norm": 0.49390244483947754, "learning_rate": 5.501204839954657e-06, "loss": 0.0438, "step": 32980 }, { "epoch": 0.7267458835324773, "grad_norm": 0.5033997297286987, "learning_rate": 5.500376328931528e-06, "loss": 0.0682, "step": 32981 }, { "epoch": 0.7267679188219934, "grad_norm": 0.4203488528728485, "learning_rate": 5.499547866295072e-06, "loss": 0.0633, "step": 32982 }, { "epoch": 0.7267899541115096, "grad_norm": 0.8115926384925842, "learning_rate": 5.4987194520495135e-06, "loss": 0.0896, "step": 32983 }, { "epoch": 0.7268119894010258, "grad_norm": 0.3777249753475189, "learning_rate": 5.497891086199071e-06, "loss": 0.0454, "step": 32984 }, { "epoch": 0.7268340246905419, "grad_norm": 0.6166483759880066, "learning_rate": 5.49706276874797e-06, "loss": 0.0558, "step": 32985 }, { "epoch": 0.7268560599800581, "grad_norm": 0.43775415420532227, "learning_rate": 5.496234499700419e-06, "loss": 0.0498, "step": 32986 }, { "epoch": 0.7268780952695743, "grad_norm": 0.7112564444541931, "learning_rate": 5.495406279060642e-06, "loss": 0.0642, "step": 32987 }, { "epoch": 0.7269001305590904, "grad_norm": 0.42288973927497864, "learning_rate": 5.494578106832863e-06, "loss": 0.0377, "step": 32988 }, { "epoch": 0.7269221658486066, "grad_norm": 0.5453571081161499, "learning_rate": 5.4937499830212895e-06, "loss": 0.0649, "step": 32989 }, { "epoch": 0.7269442011381227, "grad_norm": 0.3840637505054474, "learning_rate": 5.492921907630149e-06, "loss": 0.0515, "step": 32990 }, { "epoch": 0.7269662364276389, "grad_norm": 0.6008361577987671, "learning_rate": 5.492093880663648e-06, "loss": 0.0537, "step": 32991 }, { "epoch": 0.7269882717171551, "grad_norm": 0.5876221656799316, "learning_rate": 5.4912659021260195e-06, "loss": 0.0703, "step": 32992 }, { "epoch": 0.7270103070066711, "grad_norm": 0.38053247332572937, "learning_rate": 5.490437972021468e-06, "loss": 0.0603, "step": 32993 }, { "epoch": 0.7270323422961873, "grad_norm": 0.4601455628871918, "learning_rate": 5.48961009035422e-06, "loss": 0.0641, "step": 32994 }, { "epoch": 0.7270543775857035, "grad_norm": 0.778843343257904, "learning_rate": 5.488782257128483e-06, "loss": 0.0786, "step": 32995 }, { "epoch": 0.7270764128752196, "grad_norm": 0.5873610377311707, "learning_rate": 5.4879544723484785e-06, "loss": 0.0608, "step": 32996 }, { "epoch": 0.7270984481647358, "grad_norm": 0.5386521816253662, "learning_rate": 5.487126736018427e-06, "loss": 0.0517, "step": 32997 }, { "epoch": 0.727120483454252, "grad_norm": 0.5283834338188171, "learning_rate": 5.486299048142536e-06, "loss": 0.0473, "step": 32998 }, { "epoch": 0.7271425187437681, "grad_norm": 1.0379788875579834, "learning_rate": 5.485471408725026e-06, "loss": 0.0844, "step": 32999 }, { "epoch": 0.7271645540332843, "grad_norm": 0.34929001331329346, "learning_rate": 5.484643817770112e-06, "loss": 0.0483, "step": 33000 }, { "epoch": 0.7271865893228004, "grad_norm": 0.5570009350776672, "learning_rate": 5.4838162752820135e-06, "loss": 0.0623, "step": 33001 }, { "epoch": 0.7272086246123166, "grad_norm": 0.3659330904483795, "learning_rate": 5.482988781264936e-06, "loss": 0.0473, "step": 33002 }, { "epoch": 0.7272306599018328, "grad_norm": 0.8082411885261536, "learning_rate": 5.482161335723102e-06, "loss": 0.0501, "step": 33003 }, { "epoch": 0.7272526951913489, "grad_norm": 0.561363160610199, "learning_rate": 5.481333938660729e-06, "loss": 0.0807, "step": 33004 }, { "epoch": 0.7272747304808651, "grad_norm": 0.6393573880195618, "learning_rate": 5.4805065900820214e-06, "loss": 0.0821, "step": 33005 }, { "epoch": 0.7272967657703813, "grad_norm": 0.5119249820709229, "learning_rate": 5.479679289991198e-06, "loss": 0.0567, "step": 33006 }, { "epoch": 0.7273188010598974, "grad_norm": 1.003749966621399, "learning_rate": 5.478852038392474e-06, "loss": 0.0677, "step": 33007 }, { "epoch": 0.7273408363494136, "grad_norm": 0.3352479338645935, "learning_rate": 5.478024835290066e-06, "loss": 0.0515, "step": 33008 }, { "epoch": 0.7273628716389298, "grad_norm": 0.3938937783241272, "learning_rate": 5.47719768068818e-06, "loss": 0.0646, "step": 33009 }, { "epoch": 0.7273849069284459, "grad_norm": 0.32848167419433594, "learning_rate": 5.476370574591032e-06, "loss": 0.0447, "step": 33010 }, { "epoch": 0.7274069422179621, "grad_norm": 0.7921886444091797, "learning_rate": 5.47554351700284e-06, "loss": 0.0917, "step": 33011 }, { "epoch": 0.7274289775074783, "grad_norm": 0.5455636382102966, "learning_rate": 5.474716507927808e-06, "loss": 0.0486, "step": 33012 }, { "epoch": 0.7274510127969944, "grad_norm": 0.3936130404472351, "learning_rate": 5.473889547370158e-06, "loss": 0.0434, "step": 33013 }, { "epoch": 0.7274730480865106, "grad_norm": 0.6415764093399048, "learning_rate": 5.473062635334086e-06, "loss": 0.0558, "step": 33014 }, { "epoch": 0.7274950833760268, "grad_norm": 0.3905811905860901, "learning_rate": 5.472235771823824e-06, "loss": 0.0505, "step": 33015 }, { "epoch": 0.7275171186655429, "grad_norm": 0.7762379050254822, "learning_rate": 5.471408956843572e-06, "loss": 0.0871, "step": 33016 }, { "epoch": 0.7275391539550591, "grad_norm": 0.9010246396064758, "learning_rate": 5.470582190397547e-06, "loss": 0.0694, "step": 33017 }, { "epoch": 0.7275611892445751, "grad_norm": 0.8841543793678284, "learning_rate": 5.4697554724899535e-06, "loss": 0.1007, "step": 33018 }, { "epoch": 0.7275832245340913, "grad_norm": 0.4467606246471405, "learning_rate": 5.468928803125003e-06, "loss": 0.0522, "step": 33019 }, { "epoch": 0.7276052598236075, "grad_norm": 0.5834057927131653, "learning_rate": 5.468102182306916e-06, "loss": 0.0736, "step": 33020 }, { "epoch": 0.7276272951131236, "grad_norm": 0.49813514947891235, "learning_rate": 5.467275610039886e-06, "loss": 0.0459, "step": 33021 }, { "epoch": 0.7276493304026398, "grad_norm": 0.5386376976966858, "learning_rate": 5.466449086328144e-06, "loss": 0.042, "step": 33022 }, { "epoch": 0.727671365692156, "grad_norm": 0.39741769433021545, "learning_rate": 5.465622611175884e-06, "loss": 0.0468, "step": 33023 }, { "epoch": 0.7276934009816721, "grad_norm": 0.6620110869407654, "learning_rate": 5.464796184587324e-06, "loss": 0.0721, "step": 33024 }, { "epoch": 0.7277154362711883, "grad_norm": 0.4452302157878876, "learning_rate": 5.463969806566666e-06, "loss": 0.0487, "step": 33025 }, { "epoch": 0.7277374715607045, "grad_norm": 0.7565487027168274, "learning_rate": 5.463143477118124e-06, "loss": 0.1005, "step": 33026 }, { "epoch": 0.7277595068502206, "grad_norm": 0.6152786016464233, "learning_rate": 5.462317196245911e-06, "loss": 0.0612, "step": 33027 }, { "epoch": 0.7277815421397368, "grad_norm": 0.5341556668281555, "learning_rate": 5.461490963954225e-06, "loss": 0.0631, "step": 33028 }, { "epoch": 0.727803577429253, "grad_norm": 0.4454163610935211, "learning_rate": 5.460664780247282e-06, "loss": 0.0443, "step": 33029 }, { "epoch": 0.7278256127187691, "grad_norm": 0.710273802280426, "learning_rate": 5.459838645129287e-06, "loss": 0.1004, "step": 33030 }, { "epoch": 0.7278476480082853, "grad_norm": 0.43205565214157104, "learning_rate": 5.459012558604457e-06, "loss": 0.0834, "step": 33031 }, { "epoch": 0.7278696832978014, "grad_norm": 0.7363676428794861, "learning_rate": 5.458186520676985e-06, "loss": 0.0568, "step": 33032 }, { "epoch": 0.7278917185873176, "grad_norm": 1.1955244541168213, "learning_rate": 5.457360531351087e-06, "loss": 0.0618, "step": 33033 }, { "epoch": 0.7279137538768338, "grad_norm": 0.48661720752716064, "learning_rate": 5.456534590630973e-06, "loss": 0.0634, "step": 33034 }, { "epoch": 0.7279357891663499, "grad_norm": 0.554138720035553, "learning_rate": 5.455708698520842e-06, "loss": 0.0527, "step": 33035 }, { "epoch": 0.7279578244558661, "grad_norm": 0.7201664447784424, "learning_rate": 5.454882855024911e-06, "loss": 0.0717, "step": 33036 }, { "epoch": 0.7279798597453823, "grad_norm": 0.680090069770813, "learning_rate": 5.454057060147367e-06, "loss": 0.0577, "step": 33037 }, { "epoch": 0.7280018950348984, "grad_norm": 0.5252309441566467, "learning_rate": 5.453231313892441e-06, "loss": 0.0596, "step": 33038 }, { "epoch": 0.7280239303244146, "grad_norm": 0.5800451040267944, "learning_rate": 5.452405616264322e-06, "loss": 0.0565, "step": 33039 }, { "epoch": 0.7280459656139308, "grad_norm": 0.4826999306678772, "learning_rate": 5.451579967267227e-06, "loss": 0.0823, "step": 33040 }, { "epoch": 0.7280680009034469, "grad_norm": 0.502440869808197, "learning_rate": 5.450754366905348e-06, "loss": 0.0634, "step": 33041 }, { "epoch": 0.728090036192963, "grad_norm": 0.6551823019981384, "learning_rate": 5.4499288151829e-06, "loss": 0.0692, "step": 33042 }, { "epoch": 0.7281120714824791, "grad_norm": 0.8241705298423767, "learning_rate": 5.449103312104091e-06, "loss": 0.0646, "step": 33043 }, { "epoch": 0.7281341067719953, "grad_norm": 0.5774510502815247, "learning_rate": 5.44827785767311e-06, "loss": 0.0386, "step": 33044 }, { "epoch": 0.7281561420615115, "grad_norm": 0.961912214756012, "learning_rate": 5.44745245189418e-06, "loss": 0.0743, "step": 33045 }, { "epoch": 0.7281781773510276, "grad_norm": 0.5727797150611877, "learning_rate": 5.446627094771494e-06, "loss": 0.0732, "step": 33046 }, { "epoch": 0.7282002126405438, "grad_norm": 0.5829084515571594, "learning_rate": 5.445801786309264e-06, "loss": 0.0702, "step": 33047 }, { "epoch": 0.72822224793006, "grad_norm": 0.5144366025924683, "learning_rate": 5.4449765265116834e-06, "loss": 0.0647, "step": 33048 }, { "epoch": 0.7282442832195761, "grad_norm": 0.4731123447418213, "learning_rate": 5.4441513153829605e-06, "loss": 0.0606, "step": 33049 }, { "epoch": 0.7282663185090923, "grad_norm": 0.6331143379211426, "learning_rate": 5.443326152927306e-06, "loss": 0.0897, "step": 33050 }, { "epoch": 0.7282883537986085, "grad_norm": 0.3679918646812439, "learning_rate": 5.4425010391489084e-06, "loss": 0.0396, "step": 33051 }, { "epoch": 0.7283103890881246, "grad_norm": 0.6360217332839966, "learning_rate": 5.441675974051981e-06, "loss": 0.0709, "step": 33052 }, { "epoch": 0.7283324243776408, "grad_norm": 0.766086757183075, "learning_rate": 5.440850957640721e-06, "loss": 0.0502, "step": 33053 }, { "epoch": 0.728354459667157, "grad_norm": 0.6987030506134033, "learning_rate": 5.4400259899193405e-06, "loss": 0.0643, "step": 33054 }, { "epoch": 0.7283764949566731, "grad_norm": 0.816947340965271, "learning_rate": 5.439201070892028e-06, "loss": 0.0367, "step": 33055 }, { "epoch": 0.7283985302461893, "grad_norm": 0.6102135181427002, "learning_rate": 5.43837620056299e-06, "loss": 0.0389, "step": 33056 }, { "epoch": 0.7284205655357054, "grad_norm": 0.7627894878387451, "learning_rate": 5.437551378936436e-06, "loss": 0.0602, "step": 33057 }, { "epoch": 0.7284426008252216, "grad_norm": 0.47582489252090454, "learning_rate": 5.436726606016555e-06, "loss": 0.046, "step": 33058 }, { "epoch": 0.7284646361147378, "grad_norm": 0.9216248989105225, "learning_rate": 5.43590188180756e-06, "loss": 0.1008, "step": 33059 }, { "epoch": 0.7284866714042539, "grad_norm": 0.6603890657424927, "learning_rate": 5.435077206313635e-06, "loss": 0.0763, "step": 33060 }, { "epoch": 0.7285087066937701, "grad_norm": 0.46351566910743713, "learning_rate": 5.434252579539e-06, "loss": 0.0656, "step": 33061 }, { "epoch": 0.7285307419832863, "grad_norm": 0.8209950923919678, "learning_rate": 5.433428001487841e-06, "loss": 0.0773, "step": 33062 }, { "epoch": 0.7285527772728024, "grad_norm": 0.6855449676513672, "learning_rate": 5.432603472164368e-06, "loss": 0.0935, "step": 33063 }, { "epoch": 0.7285748125623186, "grad_norm": 0.7602229118347168, "learning_rate": 5.431778991572772e-06, "loss": 0.0609, "step": 33064 }, { "epoch": 0.7285968478518348, "grad_norm": 1.0372220277786255, "learning_rate": 5.430954559717257e-06, "loss": 0.0762, "step": 33065 }, { "epoch": 0.7286188831413509, "grad_norm": 0.5756118893623352, "learning_rate": 5.4301301766020245e-06, "loss": 0.0616, "step": 33066 }, { "epoch": 0.728640918430867, "grad_norm": 0.5762602686882019, "learning_rate": 5.4293058422312625e-06, "loss": 0.0697, "step": 33067 }, { "epoch": 0.7286629537203831, "grad_norm": 0.4177914261817932, "learning_rate": 5.428481556609187e-06, "loss": 0.0638, "step": 33068 }, { "epoch": 0.7286849890098993, "grad_norm": 0.8012778759002686, "learning_rate": 5.427657319739984e-06, "loss": 0.0793, "step": 33069 }, { "epoch": 0.7287070242994155, "grad_norm": 0.4912084639072418, "learning_rate": 5.42683313162786e-06, "loss": 0.0793, "step": 33070 }, { "epoch": 0.7287290595889316, "grad_norm": 0.4605003893375397, "learning_rate": 5.4260089922769965e-06, "loss": 0.0566, "step": 33071 }, { "epoch": 0.7287510948784478, "grad_norm": 0.769008457660675, "learning_rate": 5.425184901691615e-06, "loss": 0.0709, "step": 33072 }, { "epoch": 0.728773130167964, "grad_norm": 0.9708783030509949, "learning_rate": 5.424360859875896e-06, "loss": 0.0748, "step": 33073 }, { "epoch": 0.7287951654574801, "grad_norm": 0.6723892092704773, "learning_rate": 5.423536866834048e-06, "loss": 0.0573, "step": 33074 }, { "epoch": 0.7288172007469963, "grad_norm": 0.4830663502216339, "learning_rate": 5.422712922570256e-06, "loss": 0.0595, "step": 33075 }, { "epoch": 0.7288392360365125, "grad_norm": 0.6802923679351807, "learning_rate": 5.421889027088722e-06, "loss": 0.0731, "step": 33076 }, { "epoch": 0.7288612713260286, "grad_norm": 0.6330434679985046, "learning_rate": 5.421065180393648e-06, "loss": 0.0608, "step": 33077 }, { "epoch": 0.7288833066155448, "grad_norm": 0.5826939940452576, "learning_rate": 5.420241382489223e-06, "loss": 0.0589, "step": 33078 }, { "epoch": 0.728905341905061, "grad_norm": 0.8270740509033203, "learning_rate": 5.419417633379645e-06, "loss": 0.0768, "step": 33079 }, { "epoch": 0.7289273771945771, "grad_norm": 0.6315096616744995, "learning_rate": 5.418593933069109e-06, "loss": 0.0584, "step": 33080 }, { "epoch": 0.7289494124840933, "grad_norm": 0.6241579055786133, "learning_rate": 5.417770281561818e-06, "loss": 0.0662, "step": 33081 }, { "epoch": 0.7289714477736094, "grad_norm": 0.4290454685688019, "learning_rate": 5.416946678861957e-06, "loss": 0.0586, "step": 33082 }, { "epoch": 0.7289934830631256, "grad_norm": 0.6645209789276123, "learning_rate": 5.416123124973725e-06, "loss": 0.0647, "step": 33083 }, { "epoch": 0.7290155183526418, "grad_norm": 0.5333254337310791, "learning_rate": 5.415299619901323e-06, "loss": 0.055, "step": 33084 }, { "epoch": 0.7290375536421579, "grad_norm": 0.5382959842681885, "learning_rate": 5.414476163648932e-06, "loss": 0.0607, "step": 33085 }, { "epoch": 0.7290595889316741, "grad_norm": 0.5354949235916138, "learning_rate": 5.413652756220761e-06, "loss": 0.0975, "step": 33086 }, { "epoch": 0.7290816242211903, "grad_norm": 0.557875394821167, "learning_rate": 5.412829397620988e-06, "loss": 0.0724, "step": 33087 }, { "epoch": 0.7291036595107064, "grad_norm": 0.480983704328537, "learning_rate": 5.412006087853825e-06, "loss": 0.0469, "step": 33088 }, { "epoch": 0.7291256948002226, "grad_norm": 0.5921252369880676, "learning_rate": 5.411182826923451e-06, "loss": 0.0773, "step": 33089 }, { "epoch": 0.7291477300897388, "grad_norm": 0.6734164357185364, "learning_rate": 5.410359614834065e-06, "loss": 0.0773, "step": 33090 }, { "epoch": 0.7291697653792549, "grad_norm": 0.8615555763244629, "learning_rate": 5.409536451589865e-06, "loss": 0.0556, "step": 33091 }, { "epoch": 0.729191800668771, "grad_norm": 0.35163864493370056, "learning_rate": 5.4087133371950336e-06, "loss": 0.0445, "step": 33092 }, { "epoch": 0.7292138359582871, "grad_norm": 0.4220582842826843, "learning_rate": 5.407890271653775e-06, "loss": 0.0614, "step": 33093 }, { "epoch": 0.7292358712478033, "grad_norm": 1.0053622722625732, "learning_rate": 5.407067254970262e-06, "loss": 0.0748, "step": 33094 }, { "epoch": 0.7292579065373195, "grad_norm": 0.5520122647285461, "learning_rate": 5.406244287148712e-06, "loss": 0.0467, "step": 33095 }, { "epoch": 0.7292799418268356, "grad_norm": 0.6708216667175293, "learning_rate": 5.405421368193298e-06, "loss": 0.0579, "step": 33096 }, { "epoch": 0.7293019771163518, "grad_norm": 1.0729875564575195, "learning_rate": 5.404598498108224e-06, "loss": 0.0783, "step": 33097 }, { "epoch": 0.729324012405868, "grad_norm": 0.6210831999778748, "learning_rate": 5.4037756768976695e-06, "loss": 0.0493, "step": 33098 }, { "epoch": 0.7293460476953841, "grad_norm": 0.656242847442627, "learning_rate": 5.4029529045658324e-06, "loss": 0.0672, "step": 33099 }, { "epoch": 0.7293680829849003, "grad_norm": 0.38734161853790283, "learning_rate": 5.402130181116906e-06, "loss": 0.0499, "step": 33100 }, { "epoch": 0.7293901182744165, "grad_norm": 0.37591245770454407, "learning_rate": 5.401307506555073e-06, "loss": 0.0477, "step": 33101 }, { "epoch": 0.7294121535639326, "grad_norm": 0.5901097655296326, "learning_rate": 5.400484880884529e-06, "loss": 0.069, "step": 33102 }, { "epoch": 0.7294341888534488, "grad_norm": 0.8015340566635132, "learning_rate": 5.399662304109462e-06, "loss": 0.0991, "step": 33103 }, { "epoch": 0.729456224142965, "grad_norm": 0.6222577691078186, "learning_rate": 5.398839776234068e-06, "loss": 0.0574, "step": 33104 }, { "epoch": 0.7294782594324811, "grad_norm": 0.8740555047988892, "learning_rate": 5.398017297262527e-06, "loss": 0.0667, "step": 33105 }, { "epoch": 0.7295002947219973, "grad_norm": 0.4595831334590912, "learning_rate": 5.397194867199034e-06, "loss": 0.04, "step": 33106 }, { "epoch": 0.7295223300115135, "grad_norm": 0.33147528767585754, "learning_rate": 5.3963724860477795e-06, "loss": 0.0474, "step": 33107 }, { "epoch": 0.7295443653010296, "grad_norm": 0.6478270292282104, "learning_rate": 5.395550153812946e-06, "loss": 0.0621, "step": 33108 }, { "epoch": 0.7295664005905458, "grad_norm": 0.3150472342967987, "learning_rate": 5.394727870498732e-06, "loss": 0.0499, "step": 33109 }, { "epoch": 0.729588435880062, "grad_norm": 0.5911135673522949, "learning_rate": 5.3939056361093094e-06, "loss": 0.0781, "step": 33110 }, { "epoch": 0.7296104711695781, "grad_norm": 0.4895835518836975, "learning_rate": 5.393083450648885e-06, "loss": 0.0682, "step": 33111 }, { "epoch": 0.7296325064590943, "grad_norm": 0.88873690366745, "learning_rate": 5.392261314121636e-06, "loss": 0.098, "step": 33112 }, { "epoch": 0.7296545417486104, "grad_norm": 0.4506480395793915, "learning_rate": 5.39143922653175e-06, "loss": 0.0473, "step": 33113 }, { "epoch": 0.7296765770381266, "grad_norm": 0.7005180716514587, "learning_rate": 5.390617187883422e-06, "loss": 0.0566, "step": 33114 }, { "epoch": 0.7296986123276428, "grad_norm": 0.6474327445030212, "learning_rate": 5.389795198180827e-06, "loss": 0.0663, "step": 33115 }, { "epoch": 0.7297206476171589, "grad_norm": 0.4420247972011566, "learning_rate": 5.388973257428166e-06, "loss": 0.0492, "step": 33116 }, { "epoch": 0.729742682906675, "grad_norm": 0.5290629863739014, "learning_rate": 5.388151365629606e-06, "loss": 0.0678, "step": 33117 }, { "epoch": 0.7297647181961912, "grad_norm": 0.5685185790061951, "learning_rate": 5.387329522789356e-06, "loss": 0.0625, "step": 33118 }, { "epoch": 0.7297867534857073, "grad_norm": 0.4716132879257202, "learning_rate": 5.386507728911589e-06, "loss": 0.0617, "step": 33119 }, { "epoch": 0.7298087887752235, "grad_norm": 0.8338180184364319, "learning_rate": 5.3856859840004954e-06, "loss": 0.0769, "step": 33120 }, { "epoch": 0.7298308240647396, "grad_norm": 0.6943657994270325, "learning_rate": 5.384864288060254e-06, "loss": 0.0589, "step": 33121 }, { "epoch": 0.7298528593542558, "grad_norm": 0.7599793076515198, "learning_rate": 5.384042641095055e-06, "loss": 0.0682, "step": 33122 }, { "epoch": 0.729874894643772, "grad_norm": 0.6416719555854797, "learning_rate": 5.38322104310909e-06, "loss": 0.0436, "step": 33123 }, { "epoch": 0.7298969299332881, "grad_norm": 0.5584006309509277, "learning_rate": 5.382399494106531e-06, "loss": 0.0799, "step": 33124 }, { "epoch": 0.7299189652228043, "grad_norm": 0.5514885783195496, "learning_rate": 5.3815779940915675e-06, "loss": 0.0667, "step": 33125 }, { "epoch": 0.7299410005123205, "grad_norm": 0.6293257474899292, "learning_rate": 5.380756543068386e-06, "loss": 0.059, "step": 33126 }, { "epoch": 0.7299630358018366, "grad_norm": 0.8373354077339172, "learning_rate": 5.379935141041175e-06, "loss": 0.0799, "step": 33127 }, { "epoch": 0.7299850710913528, "grad_norm": 0.4182587265968323, "learning_rate": 5.3791137880141085e-06, "loss": 0.0446, "step": 33128 }, { "epoch": 0.730007106380869, "grad_norm": 0.765636146068573, "learning_rate": 5.378292483991374e-06, "loss": 0.0655, "step": 33129 }, { "epoch": 0.7300291416703851, "grad_norm": 0.6488004326820374, "learning_rate": 5.37747122897716e-06, "loss": 0.044, "step": 33130 }, { "epoch": 0.7300511769599013, "grad_norm": 0.39406150579452515, "learning_rate": 5.3766500229756395e-06, "loss": 0.0431, "step": 33131 }, { "epoch": 0.7300732122494175, "grad_norm": 0.49332571029663086, "learning_rate": 5.375828865991007e-06, "loss": 0.0499, "step": 33132 }, { "epoch": 0.7300952475389336, "grad_norm": 0.2618725597858429, "learning_rate": 5.375007758027428e-06, "loss": 0.0529, "step": 33133 }, { "epoch": 0.7301172828284498, "grad_norm": 0.4288795292377472, "learning_rate": 5.374186699089108e-06, "loss": 0.0533, "step": 33134 }, { "epoch": 0.730139318117966, "grad_norm": 0.5102954506874084, "learning_rate": 5.37336568918021e-06, "loss": 0.0466, "step": 33135 }, { "epoch": 0.7301613534074821, "grad_norm": 0.28660890460014343, "learning_rate": 5.372544728304923e-06, "loss": 0.0935, "step": 33136 }, { "epoch": 0.7301833886969983, "grad_norm": 0.38701626658439636, "learning_rate": 5.371723816467434e-06, "loss": 0.0523, "step": 33137 }, { "epoch": 0.7302054239865144, "grad_norm": 0.5335754752159119, "learning_rate": 5.370902953671913e-06, "loss": 0.063, "step": 33138 }, { "epoch": 0.7302274592760306, "grad_norm": 0.4563673138618469, "learning_rate": 5.370082139922552e-06, "loss": 0.0634, "step": 33139 }, { "epoch": 0.7302494945655468, "grad_norm": 0.641273558139801, "learning_rate": 5.3692613752235165e-06, "loss": 0.0679, "step": 33140 }, { "epoch": 0.7302715298550628, "grad_norm": 0.6630446314811707, "learning_rate": 5.368440659579008e-06, "loss": 0.0709, "step": 33141 }, { "epoch": 0.730293565144579, "grad_norm": 0.5760415196418762, "learning_rate": 5.36761999299319e-06, "loss": 0.0657, "step": 33142 }, { "epoch": 0.7303156004340952, "grad_norm": 0.5353786945343018, "learning_rate": 5.366799375470254e-06, "loss": 0.0528, "step": 33143 }, { "epoch": 0.7303376357236113, "grad_norm": 0.3006889820098877, "learning_rate": 5.365978807014371e-06, "loss": 0.0478, "step": 33144 }, { "epoch": 0.7303596710131275, "grad_norm": 0.5488409399986267, "learning_rate": 5.365158287629722e-06, "loss": 0.0804, "step": 33145 }, { "epoch": 0.7303817063026437, "grad_norm": 0.5798323154449463, "learning_rate": 5.364337817320494e-06, "loss": 0.0826, "step": 33146 }, { "epoch": 0.7304037415921598, "grad_norm": 0.6991639137268066, "learning_rate": 5.3635173960908565e-06, "loss": 0.0639, "step": 33147 }, { "epoch": 0.730425776881676, "grad_norm": 0.6240541934967041, "learning_rate": 5.362697023944992e-06, "loss": 0.0515, "step": 33148 }, { "epoch": 0.7304478121711921, "grad_norm": 0.701618492603302, "learning_rate": 5.361876700887078e-06, "loss": 0.0895, "step": 33149 }, { "epoch": 0.7304698474607083, "grad_norm": 0.28705865144729614, "learning_rate": 5.361056426921301e-06, "loss": 0.0486, "step": 33150 }, { "epoch": 0.7304918827502245, "grad_norm": 0.728935718536377, "learning_rate": 5.360236202051827e-06, "loss": 0.0728, "step": 33151 }, { "epoch": 0.7305139180397406, "grad_norm": 0.6482149362564087, "learning_rate": 5.35941602628284e-06, "loss": 0.0621, "step": 33152 }, { "epoch": 0.7305359533292568, "grad_norm": 0.8927769064903259, "learning_rate": 5.358595899618521e-06, "loss": 0.0592, "step": 33153 }, { "epoch": 0.730557988618773, "grad_norm": 0.46732887625694275, "learning_rate": 5.357775822063037e-06, "loss": 0.0553, "step": 33154 }, { "epoch": 0.7305800239082891, "grad_norm": 0.6607043147087097, "learning_rate": 5.3569557936205725e-06, "loss": 0.0534, "step": 33155 }, { "epoch": 0.7306020591978053, "grad_norm": 0.472262978553772, "learning_rate": 5.356135814295304e-06, "loss": 0.0518, "step": 33156 }, { "epoch": 0.7306240944873215, "grad_norm": 0.4628105163574219, "learning_rate": 5.355315884091411e-06, "loss": 0.0293, "step": 33157 }, { "epoch": 0.7306461297768376, "grad_norm": 0.6581069231033325, "learning_rate": 5.354496003013062e-06, "loss": 0.0899, "step": 33158 }, { "epoch": 0.7306681650663538, "grad_norm": 0.5248386263847351, "learning_rate": 5.3536761710644365e-06, "loss": 0.037, "step": 33159 }, { "epoch": 0.73069020035587, "grad_norm": 0.6301301717758179, "learning_rate": 5.352856388249717e-06, "loss": 0.0473, "step": 33160 }, { "epoch": 0.7307122356453861, "grad_norm": 0.6088128685951233, "learning_rate": 5.352036654573067e-06, "loss": 0.0524, "step": 33161 }, { "epoch": 0.7307342709349023, "grad_norm": 0.6701053380966187, "learning_rate": 5.351216970038674e-06, "loss": 0.0842, "step": 33162 }, { "epoch": 0.7307563062244185, "grad_norm": 0.43910545110702515, "learning_rate": 5.350397334650695e-06, "loss": 0.0681, "step": 33163 }, { "epoch": 0.7307783415139346, "grad_norm": 0.42679959535598755, "learning_rate": 5.34957774841333e-06, "loss": 0.0501, "step": 33164 }, { "epoch": 0.7308003768034508, "grad_norm": 0.47458359599113464, "learning_rate": 5.348758211330733e-06, "loss": 0.0907, "step": 33165 }, { "epoch": 0.7308224120929668, "grad_norm": 0.5573321580886841, "learning_rate": 5.3479387234070925e-06, "loss": 0.0755, "step": 33166 }, { "epoch": 0.730844447382483, "grad_norm": 0.45464712381362915, "learning_rate": 5.347119284646569e-06, "loss": 0.0409, "step": 33167 }, { "epoch": 0.7308664826719992, "grad_norm": 0.4934206008911133, "learning_rate": 5.3462998950533445e-06, "loss": 0.055, "step": 33168 }, { "epoch": 0.7308885179615153, "grad_norm": 0.6190052628517151, "learning_rate": 5.345480554631597e-06, "loss": 0.0687, "step": 33169 }, { "epoch": 0.7309105532510315, "grad_norm": 0.5355744957923889, "learning_rate": 5.344661263385488e-06, "loss": 0.0561, "step": 33170 }, { "epoch": 0.7309325885405477, "grad_norm": 0.649061918258667, "learning_rate": 5.343842021319197e-06, "loss": 0.0533, "step": 33171 }, { "epoch": 0.7309546238300638, "grad_norm": 0.23505215346813202, "learning_rate": 5.343022828436897e-06, "loss": 0.0587, "step": 33172 }, { "epoch": 0.73097665911958, "grad_norm": 0.6063539385795593, "learning_rate": 5.342203684742765e-06, "loss": 0.0651, "step": 33173 }, { "epoch": 0.7309986944090962, "grad_norm": 0.7255345582962036, "learning_rate": 5.3413845902409635e-06, "loss": 0.0731, "step": 33174 }, { "epoch": 0.7310207296986123, "grad_norm": 0.7363991737365723, "learning_rate": 5.34056554493567e-06, "loss": 0.0689, "step": 33175 }, { "epoch": 0.7310427649881285, "grad_norm": 0.7201659083366394, "learning_rate": 5.339746548831061e-06, "loss": 0.0629, "step": 33176 }, { "epoch": 0.7310648002776446, "grad_norm": 0.40720582008361816, "learning_rate": 5.338927601931299e-06, "loss": 0.0455, "step": 33177 }, { "epoch": 0.7310868355671608, "grad_norm": 0.47152459621429443, "learning_rate": 5.338108704240557e-06, "loss": 0.0478, "step": 33178 }, { "epoch": 0.731108870856677, "grad_norm": 0.34826895594596863, "learning_rate": 5.33728985576301e-06, "loss": 0.0539, "step": 33179 }, { "epoch": 0.7311309061461931, "grad_norm": 0.456533282995224, "learning_rate": 5.3364710565028315e-06, "loss": 0.0505, "step": 33180 }, { "epoch": 0.7311529414357093, "grad_norm": 0.43453294038772583, "learning_rate": 5.335652306464184e-06, "loss": 0.0646, "step": 33181 }, { "epoch": 0.7311749767252255, "grad_norm": 0.5247361063957214, "learning_rate": 5.334833605651241e-06, "loss": 0.0733, "step": 33182 }, { "epoch": 0.7311970120147416, "grad_norm": 0.5384187698364258, "learning_rate": 5.3340149540681725e-06, "loss": 0.0904, "step": 33183 }, { "epoch": 0.7312190473042578, "grad_norm": 0.4900949001312256, "learning_rate": 5.333196351719156e-06, "loss": 0.0716, "step": 33184 }, { "epoch": 0.731241082593774, "grad_norm": 0.5458875894546509, "learning_rate": 5.3323777986083485e-06, "loss": 0.0779, "step": 33185 }, { "epoch": 0.7312631178832901, "grad_norm": 0.499819815158844, "learning_rate": 5.3315592947399255e-06, "loss": 0.0556, "step": 33186 }, { "epoch": 0.7312851531728063, "grad_norm": 1.0544835329055786, "learning_rate": 5.33074084011806e-06, "loss": 0.0687, "step": 33187 }, { "epoch": 0.7313071884623225, "grad_norm": 0.607527494430542, "learning_rate": 5.3299224347469105e-06, "loss": 0.0745, "step": 33188 }, { "epoch": 0.7313292237518386, "grad_norm": 0.520823061466217, "learning_rate": 5.3291040786306585e-06, "loss": 0.0557, "step": 33189 }, { "epoch": 0.7313512590413548, "grad_norm": 0.7609160542488098, "learning_rate": 5.328285771773454e-06, "loss": 0.0706, "step": 33190 }, { "epoch": 0.7313732943308708, "grad_norm": 0.7302474975585938, "learning_rate": 5.327467514179488e-06, "loss": 0.0899, "step": 33191 }, { "epoch": 0.731395329620387, "grad_norm": 0.4288116693496704, "learning_rate": 5.326649305852909e-06, "loss": 0.0475, "step": 33192 }, { "epoch": 0.7314173649099032, "grad_norm": 0.530863344669342, "learning_rate": 5.325831146797899e-06, "loss": 0.0683, "step": 33193 }, { "epoch": 0.7314394001994193, "grad_norm": 0.8008325099945068, "learning_rate": 5.325013037018615e-06, "loss": 0.0833, "step": 33194 }, { "epoch": 0.7314614354889355, "grad_norm": 0.661507785320282, "learning_rate": 5.324194976519226e-06, "loss": 0.0458, "step": 33195 }, { "epoch": 0.7314834707784517, "grad_norm": 0.4980536103248596, "learning_rate": 5.323376965303905e-06, "loss": 0.0509, "step": 33196 }, { "epoch": 0.7315055060679678, "grad_norm": 0.6358488202095032, "learning_rate": 5.322559003376811e-06, "loss": 0.0735, "step": 33197 }, { "epoch": 0.731527541357484, "grad_norm": 0.8533080220222473, "learning_rate": 5.321741090742112e-06, "loss": 0.0457, "step": 33198 }, { "epoch": 0.7315495766470002, "grad_norm": 0.48504728078842163, "learning_rate": 5.320923227403977e-06, "loss": 0.0514, "step": 33199 }, { "epoch": 0.7315716119365163, "grad_norm": 0.48781198263168335, "learning_rate": 5.320105413366574e-06, "loss": 0.0537, "step": 33200 }, { "epoch": 0.7315936472260325, "grad_norm": 0.3664551377296448, "learning_rate": 5.319287648634061e-06, "loss": 0.0572, "step": 33201 }, { "epoch": 0.7316156825155486, "grad_norm": 0.801406741142273, "learning_rate": 5.3184699332106055e-06, "loss": 0.0814, "step": 33202 }, { "epoch": 0.7316377178050648, "grad_norm": 0.6340805888175964, "learning_rate": 5.317652267100381e-06, "loss": 0.0518, "step": 33203 }, { "epoch": 0.731659753094581, "grad_norm": 0.8266752362251282, "learning_rate": 5.31683465030754e-06, "loss": 0.0937, "step": 33204 }, { "epoch": 0.7316817883840971, "grad_norm": 0.826896607875824, "learning_rate": 5.316017082836253e-06, "loss": 0.0827, "step": 33205 }, { "epoch": 0.7317038236736133, "grad_norm": 0.5163163542747498, "learning_rate": 5.3151995646906834e-06, "loss": 0.0695, "step": 33206 }, { "epoch": 0.7317258589631295, "grad_norm": 0.8037980198860168, "learning_rate": 5.314382095875001e-06, "loss": 0.0456, "step": 33207 }, { "epoch": 0.7317478942526456, "grad_norm": 0.5279937982559204, "learning_rate": 5.313564676393359e-06, "loss": 0.0528, "step": 33208 }, { "epoch": 0.7317699295421618, "grad_norm": 0.5027453899383545, "learning_rate": 5.312747306249927e-06, "loss": 0.0823, "step": 33209 }, { "epoch": 0.731791964831678, "grad_norm": 0.44478777050971985, "learning_rate": 5.311929985448873e-06, "loss": 0.0439, "step": 33210 }, { "epoch": 0.7318140001211941, "grad_norm": 0.36991390585899353, "learning_rate": 5.311112713994348e-06, "loss": 0.0593, "step": 33211 }, { "epoch": 0.7318360354107103, "grad_norm": 0.39236316084861755, "learning_rate": 5.310295491890528e-06, "loss": 0.0378, "step": 33212 }, { "epoch": 0.7318580707002265, "grad_norm": 0.4013265073299408, "learning_rate": 5.30947831914156e-06, "loss": 0.0793, "step": 33213 }, { "epoch": 0.7318801059897426, "grad_norm": 0.8467921614646912, "learning_rate": 5.308661195751624e-06, "loss": 0.0903, "step": 33214 }, { "epoch": 0.7319021412792587, "grad_norm": 0.6491982936859131, "learning_rate": 5.307844121724868e-06, "loss": 0.0461, "step": 33215 }, { "epoch": 0.7319241765687748, "grad_norm": 0.4206574559211731, "learning_rate": 5.307027097065465e-06, "loss": 0.0756, "step": 33216 }, { "epoch": 0.731946211858291, "grad_norm": 0.8641307950019836, "learning_rate": 5.306210121777566e-06, "loss": 0.0717, "step": 33217 }, { "epoch": 0.7319682471478072, "grad_norm": 0.7019457817077637, "learning_rate": 5.305393195865336e-06, "loss": 0.069, "step": 33218 }, { "epoch": 0.7319902824373233, "grad_norm": 0.40744420886039734, "learning_rate": 5.304576319332944e-06, "loss": 0.0421, "step": 33219 }, { "epoch": 0.7320123177268395, "grad_norm": 0.9785420298576355, "learning_rate": 5.303759492184537e-06, "loss": 0.0819, "step": 33220 }, { "epoch": 0.7320343530163557, "grad_norm": 0.5155667662620544, "learning_rate": 5.3029427144242824e-06, "loss": 0.0629, "step": 33221 }, { "epoch": 0.7320563883058718, "grad_norm": 0.5398008823394775, "learning_rate": 5.302125986056342e-06, "loss": 0.0699, "step": 33222 }, { "epoch": 0.732078423595388, "grad_norm": 0.7225516438484192, "learning_rate": 5.301309307084879e-06, "loss": 0.081, "step": 33223 }, { "epoch": 0.7321004588849042, "grad_norm": 1.2438498735427856, "learning_rate": 5.300492677514043e-06, "loss": 0.0634, "step": 33224 }, { "epoch": 0.7321224941744203, "grad_norm": 0.46353477239608765, "learning_rate": 5.2996760973479995e-06, "loss": 0.0882, "step": 33225 }, { "epoch": 0.7321445294639365, "grad_norm": 0.557802140712738, "learning_rate": 5.298859566590911e-06, "loss": 0.0517, "step": 33226 }, { "epoch": 0.7321665647534527, "grad_norm": 0.5498997569084167, "learning_rate": 5.29804308524693e-06, "loss": 0.0736, "step": 33227 }, { "epoch": 0.7321886000429688, "grad_norm": 0.33742859959602356, "learning_rate": 5.2972266533202165e-06, "loss": 0.0568, "step": 33228 }, { "epoch": 0.732210635332485, "grad_norm": 0.7518672943115234, "learning_rate": 5.296410270814933e-06, "loss": 0.0703, "step": 33229 }, { "epoch": 0.7322326706220011, "grad_norm": 0.4134596586227417, "learning_rate": 5.295593937735238e-06, "loss": 0.0492, "step": 33230 }, { "epoch": 0.7322547059115173, "grad_norm": 0.7308891415596008, "learning_rate": 5.294777654085284e-06, "loss": 0.105, "step": 33231 }, { "epoch": 0.7322767412010335, "grad_norm": 0.6436583995819092, "learning_rate": 5.29396141986923e-06, "loss": 0.0324, "step": 33232 }, { "epoch": 0.7322987764905496, "grad_norm": 0.6721432209014893, "learning_rate": 5.29314523509124e-06, "loss": 0.0807, "step": 33233 }, { "epoch": 0.7323208117800658, "grad_norm": 0.4208957552909851, "learning_rate": 5.292329099755463e-06, "loss": 0.0624, "step": 33234 }, { "epoch": 0.732342847069582, "grad_norm": 0.5913547277450562, "learning_rate": 5.291513013866065e-06, "loss": 0.0606, "step": 33235 }, { "epoch": 0.7323648823590981, "grad_norm": 0.6536169052124023, "learning_rate": 5.290696977427186e-06, "loss": 0.0504, "step": 33236 }, { "epoch": 0.7323869176486143, "grad_norm": 0.7983101010322571, "learning_rate": 5.289880990443005e-06, "loss": 0.069, "step": 33237 }, { "epoch": 0.7324089529381305, "grad_norm": 0.6875640749931335, "learning_rate": 5.28906505291766e-06, "loss": 0.0838, "step": 33238 }, { "epoch": 0.7324309882276466, "grad_norm": 0.6999116539955139, "learning_rate": 5.288249164855321e-06, "loss": 0.0853, "step": 33239 }, { "epoch": 0.7324530235171627, "grad_norm": 1.095928430557251, "learning_rate": 5.287433326260132e-06, "loss": 0.0715, "step": 33240 }, { "epoch": 0.7324750588066788, "grad_norm": 0.6469718813896179, "learning_rate": 5.286617537136251e-06, "loss": 0.0637, "step": 33241 }, { "epoch": 0.732497094096195, "grad_norm": 0.7298267483711243, "learning_rate": 5.2858017974878435e-06, "loss": 0.0719, "step": 33242 }, { "epoch": 0.7325191293857112, "grad_norm": 0.4459395408630371, "learning_rate": 5.28498610731905e-06, "loss": 0.0598, "step": 33243 }, { "epoch": 0.7325411646752273, "grad_norm": 0.5755753517150879, "learning_rate": 5.284170466634032e-06, "loss": 0.055, "step": 33244 }, { "epoch": 0.7325631999647435, "grad_norm": 0.7151206731796265, "learning_rate": 5.2833548754369445e-06, "loss": 0.0699, "step": 33245 }, { "epoch": 0.7325852352542597, "grad_norm": 0.7582699656486511, "learning_rate": 5.282539333731944e-06, "loss": 0.1016, "step": 33246 }, { "epoch": 0.7326072705437758, "grad_norm": 0.6821169853210449, "learning_rate": 5.28172384152318e-06, "loss": 0.0575, "step": 33247 }, { "epoch": 0.732629305833292, "grad_norm": 0.8641880750656128, "learning_rate": 5.280908398814806e-06, "loss": 0.0468, "step": 33248 }, { "epoch": 0.7326513411228082, "grad_norm": 0.4591706693172455, "learning_rate": 5.280093005610982e-06, "loss": 0.0548, "step": 33249 }, { "epoch": 0.7326733764123243, "grad_norm": 0.8472082614898682, "learning_rate": 5.279277661915851e-06, "loss": 0.0651, "step": 33250 }, { "epoch": 0.7326954117018405, "grad_norm": 0.8210006952285767, "learning_rate": 5.278462367733574e-06, "loss": 0.0622, "step": 33251 }, { "epoch": 0.7327174469913567, "grad_norm": 0.6457569599151611, "learning_rate": 5.277647123068298e-06, "loss": 0.0645, "step": 33252 }, { "epoch": 0.7327394822808728, "grad_norm": 0.47204938530921936, "learning_rate": 5.276831927924187e-06, "loss": 0.046, "step": 33253 }, { "epoch": 0.732761517570389, "grad_norm": 0.6097857356071472, "learning_rate": 5.2760167823053775e-06, "loss": 0.0443, "step": 33254 }, { "epoch": 0.7327835528599052, "grad_norm": 0.7612226605415344, "learning_rate": 5.27520168621603e-06, "loss": 0.0538, "step": 33255 }, { "epoch": 0.7328055881494213, "grad_norm": 0.29240110516548157, "learning_rate": 5.2743866396603005e-06, "loss": 0.0487, "step": 33256 }, { "epoch": 0.7328276234389375, "grad_norm": 0.6731173396110535, "learning_rate": 5.273571642642329e-06, "loss": 0.0804, "step": 33257 }, { "epoch": 0.7328496587284536, "grad_norm": 0.471050888299942, "learning_rate": 5.272756695166278e-06, "loss": 0.0544, "step": 33258 }, { "epoch": 0.7328716940179698, "grad_norm": 0.6506868600845337, "learning_rate": 5.271941797236285e-06, "loss": 0.0503, "step": 33259 }, { "epoch": 0.732893729307486, "grad_norm": 0.4102669656276703, "learning_rate": 5.271126948856518e-06, "loss": 0.0712, "step": 33260 }, { "epoch": 0.7329157645970021, "grad_norm": 0.5979223847389221, "learning_rate": 5.270312150031113e-06, "loss": 0.087, "step": 33261 }, { "epoch": 0.7329377998865183, "grad_norm": 0.5881449580192566, "learning_rate": 5.269497400764231e-06, "loss": 0.0532, "step": 33262 }, { "epoch": 0.7329598351760345, "grad_norm": 0.4116327464580536, "learning_rate": 5.268682701060012e-06, "loss": 0.0455, "step": 33263 }, { "epoch": 0.7329818704655506, "grad_norm": 0.7892898321151733, "learning_rate": 5.26786805092261e-06, "loss": 0.055, "step": 33264 }, { "epoch": 0.7330039057550667, "grad_norm": 0.5035779476165771, "learning_rate": 5.267053450356181e-06, "loss": 0.0537, "step": 33265 }, { "epoch": 0.7330259410445829, "grad_norm": 0.7059043049812317, "learning_rate": 5.2662388993648605e-06, "loss": 0.0815, "step": 33266 }, { "epoch": 0.733047976334099, "grad_norm": 0.8183173537254333, "learning_rate": 5.265424397952807e-06, "loss": 0.0968, "step": 33267 }, { "epoch": 0.7330700116236152, "grad_norm": 0.4182855784893036, "learning_rate": 5.264609946124166e-06, "loss": 0.0559, "step": 33268 }, { "epoch": 0.7330920469131313, "grad_norm": 0.6839936375617981, "learning_rate": 5.263795543883093e-06, "loss": 0.0637, "step": 33269 }, { "epoch": 0.7331140822026475, "grad_norm": 0.4276989698410034, "learning_rate": 5.262981191233724e-06, "loss": 0.0585, "step": 33270 }, { "epoch": 0.7331361174921637, "grad_norm": 0.5085976719856262, "learning_rate": 5.262166888180213e-06, "loss": 0.0666, "step": 33271 }, { "epoch": 0.7331581527816798, "grad_norm": 0.4821094870567322, "learning_rate": 5.2613526347267145e-06, "loss": 0.0646, "step": 33272 }, { "epoch": 0.733180188071196, "grad_norm": 0.6774910688400269, "learning_rate": 5.260538430877364e-06, "loss": 0.0625, "step": 33273 }, { "epoch": 0.7332022233607122, "grad_norm": 0.8972213268280029, "learning_rate": 5.2597242766363125e-06, "loss": 0.0814, "step": 33274 }, { "epoch": 0.7332242586502283, "grad_norm": 0.5963144898414612, "learning_rate": 5.258910172007711e-06, "loss": 0.0736, "step": 33275 }, { "epoch": 0.7332462939397445, "grad_norm": 0.5886210203170776, "learning_rate": 5.258096116995706e-06, "loss": 0.077, "step": 33276 }, { "epoch": 0.7332683292292607, "grad_norm": 0.5486893653869629, "learning_rate": 5.257282111604438e-06, "loss": 0.067, "step": 33277 }, { "epoch": 0.7332903645187768, "grad_norm": 0.2741142213344574, "learning_rate": 5.256468155838057e-06, "loss": 0.0458, "step": 33278 }, { "epoch": 0.733312399808293, "grad_norm": 0.9360200762748718, "learning_rate": 5.255654249700713e-06, "loss": 0.0893, "step": 33279 }, { "epoch": 0.7333344350978092, "grad_norm": 0.5805999040603638, "learning_rate": 5.2548403931965425e-06, "loss": 0.0725, "step": 33280 }, { "epoch": 0.7333564703873253, "grad_norm": 1.3987210988998413, "learning_rate": 5.254026586329699e-06, "loss": 0.0844, "step": 33281 }, { "epoch": 0.7333785056768415, "grad_norm": 0.576129138469696, "learning_rate": 5.253212829104316e-06, "loss": 0.0688, "step": 33282 }, { "epoch": 0.7334005409663577, "grad_norm": 0.3393961489200592, "learning_rate": 5.252399121524557e-06, "loss": 0.0608, "step": 33283 }, { "epoch": 0.7334225762558738, "grad_norm": 0.5073741674423218, "learning_rate": 5.2515854635945495e-06, "loss": 0.0514, "step": 33284 }, { "epoch": 0.73344461154539, "grad_norm": 0.6596755981445312, "learning_rate": 5.250771855318451e-06, "loss": 0.1145, "step": 33285 }, { "epoch": 0.7334666468349061, "grad_norm": 0.47155487537384033, "learning_rate": 5.2499582967003896e-06, "loss": 0.0472, "step": 33286 }, { "epoch": 0.7334886821244223, "grad_norm": 0.7495080828666687, "learning_rate": 5.249144787744529e-06, "loss": 0.0459, "step": 33287 }, { "epoch": 0.7335107174139385, "grad_norm": 1.2320317029953003, "learning_rate": 5.248331328455002e-06, "loss": 0.0897, "step": 33288 }, { "epoch": 0.7335327527034546, "grad_norm": 0.6577823162078857, "learning_rate": 5.247517918835944e-06, "loss": 0.0588, "step": 33289 }, { "epoch": 0.7335547879929707, "grad_norm": 0.573008120059967, "learning_rate": 5.246704558891516e-06, "loss": 0.0663, "step": 33290 }, { "epoch": 0.7335768232824869, "grad_norm": 0.6041306257247925, "learning_rate": 5.245891248625846e-06, "loss": 0.0587, "step": 33291 }, { "epoch": 0.733598858572003, "grad_norm": 0.6489421129226685, "learning_rate": 5.245077988043089e-06, "loss": 0.0462, "step": 33292 }, { "epoch": 0.7336208938615192, "grad_norm": 0.7550349235534668, "learning_rate": 5.24426477714737e-06, "loss": 0.0669, "step": 33293 }, { "epoch": 0.7336429291510354, "grad_norm": 0.51810222864151, "learning_rate": 5.243451615942853e-06, "loss": 0.0507, "step": 33294 }, { "epoch": 0.7336649644405515, "grad_norm": 0.39918604493141174, "learning_rate": 5.2426385044336626e-06, "loss": 0.05, "step": 33295 }, { "epoch": 0.7336869997300677, "grad_norm": 0.47863632440567017, "learning_rate": 5.241825442623952e-06, "loss": 0.0429, "step": 33296 }, { "epoch": 0.7337090350195838, "grad_norm": 0.5294227600097656, "learning_rate": 5.241012430517852e-06, "loss": 0.0421, "step": 33297 }, { "epoch": 0.7337310703091, "grad_norm": 0.8732107877731323, "learning_rate": 5.240199468119509e-06, "loss": 0.0703, "step": 33298 }, { "epoch": 0.7337531055986162, "grad_norm": 0.5621417760848999, "learning_rate": 5.239386555433069e-06, "loss": 0.0711, "step": 33299 }, { "epoch": 0.7337751408881323, "grad_norm": 0.519034743309021, "learning_rate": 5.238573692462661e-06, "loss": 0.0532, "step": 33300 }, { "epoch": 0.7337971761776485, "grad_norm": 0.7895236015319824, "learning_rate": 5.2377608792124335e-06, "loss": 0.0832, "step": 33301 }, { "epoch": 0.7338192114671647, "grad_norm": 0.4179922044277191, "learning_rate": 5.236948115686524e-06, "loss": 0.0486, "step": 33302 }, { "epoch": 0.7338412467566808, "grad_norm": 0.7881187796592712, "learning_rate": 5.236135401889077e-06, "loss": 0.0718, "step": 33303 }, { "epoch": 0.733863282046197, "grad_norm": 0.7584068775177002, "learning_rate": 5.235322737824225e-06, "loss": 0.0416, "step": 33304 }, { "epoch": 0.7338853173357132, "grad_norm": 0.6489136815071106, "learning_rate": 5.23451012349611e-06, "loss": 0.0661, "step": 33305 }, { "epoch": 0.7339073526252293, "grad_norm": 0.6330029368400574, "learning_rate": 5.233697558908877e-06, "loss": 0.0763, "step": 33306 }, { "epoch": 0.7339293879147455, "grad_norm": 0.46680954098701477, "learning_rate": 5.232885044066655e-06, "loss": 0.0456, "step": 33307 }, { "epoch": 0.7339514232042617, "grad_norm": 0.727153480052948, "learning_rate": 5.2320725789735895e-06, "loss": 0.0495, "step": 33308 }, { "epoch": 0.7339734584937778, "grad_norm": 0.6858564019203186, "learning_rate": 5.231260163633808e-06, "loss": 0.042, "step": 33309 }, { "epoch": 0.733995493783294, "grad_norm": 0.8701875805854797, "learning_rate": 5.2304477980514665e-06, "loss": 0.088, "step": 33310 }, { "epoch": 0.7340175290728101, "grad_norm": 0.6220049262046814, "learning_rate": 5.229635482230688e-06, "loss": 0.0499, "step": 33311 }, { "epoch": 0.7340395643623263, "grad_norm": 0.5803167223930359, "learning_rate": 5.228823216175616e-06, "loss": 0.0445, "step": 33312 }, { "epoch": 0.7340615996518425, "grad_norm": 0.5257735848426819, "learning_rate": 5.228010999890392e-06, "loss": 0.0665, "step": 33313 }, { "epoch": 0.7340836349413585, "grad_norm": 0.7140001058578491, "learning_rate": 5.227198833379142e-06, "loss": 0.0759, "step": 33314 }, { "epoch": 0.7341056702308747, "grad_norm": 1.0152721405029297, "learning_rate": 5.226386716646015e-06, "loss": 0.0764, "step": 33315 }, { "epoch": 0.7341277055203909, "grad_norm": 0.27463769912719727, "learning_rate": 5.225574649695131e-06, "loss": 0.0931, "step": 33316 }, { "epoch": 0.734149740809907, "grad_norm": 0.7458061575889587, "learning_rate": 5.224762632530648e-06, "loss": 0.0535, "step": 33317 }, { "epoch": 0.7341717760994232, "grad_norm": 0.6077356934547424, "learning_rate": 5.223950665156685e-06, "loss": 0.0716, "step": 33318 }, { "epoch": 0.7341938113889394, "grad_norm": 0.6524510383605957, "learning_rate": 5.2231387475773885e-06, "loss": 0.057, "step": 33319 }, { "epoch": 0.7342158466784555, "grad_norm": 0.4393473267555237, "learning_rate": 5.222326879796883e-06, "loss": 0.0409, "step": 33320 }, { "epoch": 0.7342378819679717, "grad_norm": 0.37110090255737305, "learning_rate": 5.2215150618193115e-06, "loss": 0.0505, "step": 33321 }, { "epoch": 0.7342599172574878, "grad_norm": 0.6168078780174255, "learning_rate": 5.220703293648811e-06, "loss": 0.0658, "step": 33322 }, { "epoch": 0.734281952547004, "grad_norm": 0.7148057818412781, "learning_rate": 5.21989157528951e-06, "loss": 0.0559, "step": 33323 }, { "epoch": 0.7343039878365202, "grad_norm": 0.7456590533256531, "learning_rate": 5.2190799067455435e-06, "loss": 0.0859, "step": 33324 }, { "epoch": 0.7343260231260363, "grad_norm": 0.5669301748275757, "learning_rate": 5.2182682880210494e-06, "loss": 0.0482, "step": 33325 }, { "epoch": 0.7343480584155525, "grad_norm": 0.7271072864532471, "learning_rate": 5.217456719120165e-06, "loss": 0.0911, "step": 33326 }, { "epoch": 0.7343700937050687, "grad_norm": 0.5740702748298645, "learning_rate": 5.216645200047014e-06, "loss": 0.0576, "step": 33327 }, { "epoch": 0.7343921289945848, "grad_norm": 0.9598361849784851, "learning_rate": 5.215833730805736e-06, "loss": 0.0705, "step": 33328 }, { "epoch": 0.734414164284101, "grad_norm": 0.8483201265335083, "learning_rate": 5.215022311400467e-06, "loss": 0.0526, "step": 33329 }, { "epoch": 0.7344361995736172, "grad_norm": 0.5621424913406372, "learning_rate": 5.214210941835333e-06, "loss": 0.0622, "step": 33330 }, { "epoch": 0.7344582348631333, "grad_norm": 0.8282097578048706, "learning_rate": 5.2133996221144736e-06, "loss": 0.0722, "step": 33331 }, { "epoch": 0.7344802701526495, "grad_norm": 1.106980800628662, "learning_rate": 5.212588352242008e-06, "loss": 0.0659, "step": 33332 }, { "epoch": 0.7345023054421657, "grad_norm": 0.5398094654083252, "learning_rate": 5.21177713222209e-06, "loss": 0.061, "step": 33333 }, { "epoch": 0.7345243407316818, "grad_norm": 0.6689298152923584, "learning_rate": 5.210965962058834e-06, "loss": 0.0555, "step": 33334 }, { "epoch": 0.734546376021198, "grad_norm": 0.488855242729187, "learning_rate": 5.210154841756378e-06, "loss": 0.0526, "step": 33335 }, { "epoch": 0.7345684113107142, "grad_norm": 0.4377956986427307, "learning_rate": 5.209343771318858e-06, "loss": 0.0737, "step": 33336 }, { "epoch": 0.7345904466002303, "grad_norm": 0.43759801983833313, "learning_rate": 5.208532750750395e-06, "loss": 0.0609, "step": 33337 }, { "epoch": 0.7346124818897465, "grad_norm": 0.4984804689884186, "learning_rate": 5.20772178005513e-06, "loss": 0.0641, "step": 33338 }, { "epoch": 0.7346345171792625, "grad_norm": 0.7789897322654724, "learning_rate": 5.20691085923718e-06, "loss": 0.0742, "step": 33339 }, { "epoch": 0.7346565524687787, "grad_norm": 0.5749115943908691, "learning_rate": 5.206099988300694e-06, "loss": 0.0478, "step": 33340 }, { "epoch": 0.7346785877582949, "grad_norm": 0.3446367681026459, "learning_rate": 5.2052891672497885e-06, "loss": 0.0514, "step": 33341 }, { "epoch": 0.734700623047811, "grad_norm": 0.5834825038909912, "learning_rate": 5.204478396088602e-06, "loss": 0.0543, "step": 33342 }, { "epoch": 0.7347226583373272, "grad_norm": 0.660775899887085, "learning_rate": 5.2036676748212544e-06, "loss": 0.0734, "step": 33343 }, { "epoch": 0.7347446936268434, "grad_norm": 0.5256858468055725, "learning_rate": 5.202857003451881e-06, "loss": 0.0579, "step": 33344 }, { "epoch": 0.7347667289163595, "grad_norm": 0.6355939507484436, "learning_rate": 5.202046381984615e-06, "loss": 0.0371, "step": 33345 }, { "epoch": 0.7347887642058757, "grad_norm": 0.4425218403339386, "learning_rate": 5.2012358104235744e-06, "loss": 0.0548, "step": 33346 }, { "epoch": 0.7348107994953919, "grad_norm": 0.5102203488349915, "learning_rate": 5.200425288772897e-06, "loss": 0.0724, "step": 33347 }, { "epoch": 0.734832834784908, "grad_norm": 1.0971702337265015, "learning_rate": 5.199614817036707e-06, "loss": 0.0691, "step": 33348 }, { "epoch": 0.7348548700744242, "grad_norm": 0.6935027241706848, "learning_rate": 5.198804395219141e-06, "loss": 0.0695, "step": 33349 }, { "epoch": 0.7348769053639403, "grad_norm": 0.7991840839385986, "learning_rate": 5.197994023324313e-06, "loss": 0.0646, "step": 33350 }, { "epoch": 0.7348989406534565, "grad_norm": 0.5353816151618958, "learning_rate": 5.197183701356359e-06, "loss": 0.0388, "step": 33351 }, { "epoch": 0.7349209759429727, "grad_norm": 0.8092816472053528, "learning_rate": 5.196373429319409e-06, "loss": 0.0721, "step": 33352 }, { "epoch": 0.7349430112324888, "grad_norm": 0.6459806561470032, "learning_rate": 5.195563207217582e-06, "loss": 0.0755, "step": 33353 }, { "epoch": 0.734965046522005, "grad_norm": 0.5028053522109985, "learning_rate": 5.194753035055013e-06, "loss": 0.0912, "step": 33354 }, { "epoch": 0.7349870818115212, "grad_norm": 0.8569929003715515, "learning_rate": 5.193942912835817e-06, "loss": 0.0658, "step": 33355 }, { "epoch": 0.7350091171010373, "grad_norm": 0.9626278877258301, "learning_rate": 5.193132840564137e-06, "loss": 0.0779, "step": 33356 }, { "epoch": 0.7350311523905535, "grad_norm": 0.5815375447273254, "learning_rate": 5.192322818244084e-06, "loss": 0.0611, "step": 33357 }, { "epoch": 0.7350531876800697, "grad_norm": 0.6598232388496399, "learning_rate": 5.191512845879791e-06, "loss": 0.0504, "step": 33358 }, { "epoch": 0.7350752229695858, "grad_norm": 0.9217917919158936, "learning_rate": 5.1907029234753894e-06, "loss": 0.0841, "step": 33359 }, { "epoch": 0.735097258259102, "grad_norm": 0.8818389773368835, "learning_rate": 5.1898930510349905e-06, "loss": 0.0561, "step": 33360 }, { "epoch": 0.7351192935486182, "grad_norm": 0.8065425157546997, "learning_rate": 5.189083228562733e-06, "loss": 0.084, "step": 33361 }, { "epoch": 0.7351413288381343, "grad_norm": 0.4005672037601471, "learning_rate": 5.188273456062727e-06, "loss": 0.0473, "step": 33362 }, { "epoch": 0.7351633641276505, "grad_norm": 0.43560218811035156, "learning_rate": 5.187463733539114e-06, "loss": 0.0658, "step": 33363 }, { "epoch": 0.7351853994171665, "grad_norm": 0.4934154748916626, "learning_rate": 5.186654060996005e-06, "loss": 0.0601, "step": 33364 }, { "epoch": 0.7352074347066827, "grad_norm": 0.5232334733009338, "learning_rate": 5.185844438437535e-06, "loss": 0.0607, "step": 33365 }, { "epoch": 0.7352294699961989, "grad_norm": 0.9645100831985474, "learning_rate": 5.185034865867818e-06, "loss": 0.0635, "step": 33366 }, { "epoch": 0.735251505285715, "grad_norm": 0.8516688346862793, "learning_rate": 5.184225343290979e-06, "loss": 0.0694, "step": 33367 }, { "epoch": 0.7352735405752312, "grad_norm": 0.6919474005699158, "learning_rate": 5.1834158707111515e-06, "loss": 0.0617, "step": 33368 }, { "epoch": 0.7352955758647474, "grad_norm": 0.3846064805984497, "learning_rate": 5.1826064481324444e-06, "loss": 0.0587, "step": 33369 }, { "epoch": 0.7353176111542635, "grad_norm": 0.6523535251617432, "learning_rate": 5.181797075558988e-06, "loss": 0.0757, "step": 33370 }, { "epoch": 0.7353396464437797, "grad_norm": 0.9342163801193237, "learning_rate": 5.180987752994904e-06, "loss": 0.0832, "step": 33371 }, { "epoch": 0.7353616817332959, "grad_norm": 0.5783548951148987, "learning_rate": 5.1801784804443194e-06, "loss": 0.0541, "step": 33372 }, { "epoch": 0.735383717022812, "grad_norm": 0.6965543627738953, "learning_rate": 5.179369257911347e-06, "loss": 0.0726, "step": 33373 }, { "epoch": 0.7354057523123282, "grad_norm": 0.5720388293266296, "learning_rate": 5.178560085400113e-06, "loss": 0.0649, "step": 33374 }, { "epoch": 0.7354277876018444, "grad_norm": 0.46101975440979004, "learning_rate": 5.177750962914744e-06, "loss": 0.059, "step": 33375 }, { "epoch": 0.7354498228913605, "grad_norm": 0.5495277643203735, "learning_rate": 5.1769418904593515e-06, "loss": 0.0578, "step": 33376 }, { "epoch": 0.7354718581808767, "grad_norm": 0.3886464834213257, "learning_rate": 5.176132868038066e-06, "loss": 0.0642, "step": 33377 }, { "epoch": 0.7354938934703928, "grad_norm": 0.9422851204872131, "learning_rate": 5.175323895654995e-06, "loss": 0.0685, "step": 33378 }, { "epoch": 0.735515928759909, "grad_norm": 0.5044578909873962, "learning_rate": 5.1745149733142765e-06, "loss": 0.0699, "step": 33379 }, { "epoch": 0.7355379640494252, "grad_norm": 0.6565951108932495, "learning_rate": 5.173706101020019e-06, "loss": 0.0692, "step": 33380 }, { "epoch": 0.7355599993389413, "grad_norm": 0.7991358637809753, "learning_rate": 5.172897278776343e-06, "loss": 0.0909, "step": 33381 }, { "epoch": 0.7355820346284575, "grad_norm": 0.5779423117637634, "learning_rate": 5.172088506587377e-06, "loss": 0.0577, "step": 33382 }, { "epoch": 0.7356040699179737, "grad_norm": 0.29763880372047424, "learning_rate": 5.17127978445723e-06, "loss": 0.0504, "step": 33383 }, { "epoch": 0.7356261052074898, "grad_norm": 0.2545636296272278, "learning_rate": 5.170471112390031e-06, "loss": 0.0743, "step": 33384 }, { "epoch": 0.735648140497006, "grad_norm": 0.44787508249282837, "learning_rate": 5.169662490389882e-06, "loss": 0.0563, "step": 33385 }, { "epoch": 0.7356701757865222, "grad_norm": 0.755000114440918, "learning_rate": 5.168853918460925e-06, "loss": 0.06, "step": 33386 }, { "epoch": 0.7356922110760383, "grad_norm": 0.4704633355140686, "learning_rate": 5.16804539660726e-06, "loss": 0.088, "step": 33387 }, { "epoch": 0.7357142463655544, "grad_norm": 0.8491116762161255, "learning_rate": 5.16723692483302e-06, "loss": 0.0651, "step": 33388 }, { "epoch": 0.7357362816550705, "grad_norm": 0.6632117033004761, "learning_rate": 5.166428503142308e-06, "loss": 0.0581, "step": 33389 }, { "epoch": 0.7357583169445867, "grad_norm": 0.45311251282691956, "learning_rate": 5.165620131539249e-06, "loss": 0.0648, "step": 33390 }, { "epoch": 0.7357803522341029, "grad_norm": 0.4808995723724365, "learning_rate": 5.164811810027965e-06, "loss": 0.0454, "step": 33391 }, { "epoch": 0.735802387523619, "grad_norm": 0.5242549777030945, "learning_rate": 5.164003538612564e-06, "loss": 0.0515, "step": 33392 }, { "epoch": 0.7358244228131352, "grad_norm": 0.4672453701496124, "learning_rate": 5.163195317297167e-06, "loss": 0.0481, "step": 33393 }, { "epoch": 0.7358464581026514, "grad_norm": 0.5188172459602356, "learning_rate": 5.162387146085892e-06, "loss": 0.0847, "step": 33394 }, { "epoch": 0.7358684933921675, "grad_norm": 0.8108285069465637, "learning_rate": 5.161579024982859e-06, "loss": 0.0605, "step": 33395 }, { "epoch": 0.7358905286816837, "grad_norm": 0.5858182311058044, "learning_rate": 5.160770953992174e-06, "loss": 0.0532, "step": 33396 }, { "epoch": 0.7359125639711999, "grad_norm": 0.7478014826774597, "learning_rate": 5.159962933117961e-06, "loss": 0.0745, "step": 33397 }, { "epoch": 0.735934599260716, "grad_norm": 0.602287769317627, "learning_rate": 5.159154962364332e-06, "loss": 0.0793, "step": 33398 }, { "epoch": 0.7359566345502322, "grad_norm": 0.409157931804657, "learning_rate": 5.158347041735409e-06, "loss": 0.0365, "step": 33399 }, { "epoch": 0.7359786698397484, "grad_norm": 0.7580019235610962, "learning_rate": 5.1575391712353024e-06, "loss": 0.0654, "step": 33400 }, { "epoch": 0.7360007051292645, "grad_norm": 0.5040662884712219, "learning_rate": 5.1567313508681144e-06, "loss": 0.088, "step": 33401 }, { "epoch": 0.7360227404187807, "grad_norm": 0.4523771405220032, "learning_rate": 5.155923580637985e-06, "loss": 0.0497, "step": 33402 }, { "epoch": 0.7360447757082969, "grad_norm": 0.6299099326133728, "learning_rate": 5.15511586054901e-06, "loss": 0.0489, "step": 33403 }, { "epoch": 0.736066810997813, "grad_norm": 0.9639614224433899, "learning_rate": 5.154308190605308e-06, "loss": 0.0991, "step": 33404 }, { "epoch": 0.7360888462873292, "grad_norm": 0.6411569118499756, "learning_rate": 5.153500570810996e-06, "loss": 0.0661, "step": 33405 }, { "epoch": 0.7361108815768453, "grad_norm": 0.584587037563324, "learning_rate": 5.152693001170188e-06, "loss": 0.0766, "step": 33406 }, { "epoch": 0.7361329168663615, "grad_norm": 0.47850412130355835, "learning_rate": 5.151885481686991e-06, "loss": 0.0541, "step": 33407 }, { "epoch": 0.7361549521558777, "grad_norm": 0.7621214985847473, "learning_rate": 5.1510780123655235e-06, "loss": 0.0923, "step": 33408 }, { "epoch": 0.7361769874453938, "grad_norm": 0.45030081272125244, "learning_rate": 5.1502705932099015e-06, "loss": 0.0529, "step": 33409 }, { "epoch": 0.73619902273491, "grad_norm": 0.8714841604232788, "learning_rate": 5.149463224224228e-06, "loss": 0.0487, "step": 33410 }, { "epoch": 0.7362210580244262, "grad_norm": 0.6873534917831421, "learning_rate": 5.148655905412626e-06, "loss": 0.0608, "step": 33411 }, { "epoch": 0.7362430933139423, "grad_norm": 0.5339464545249939, "learning_rate": 5.147848636779192e-06, "loss": 0.0536, "step": 33412 }, { "epoch": 0.7362651286034584, "grad_norm": 0.5137342214584351, "learning_rate": 5.1470414183280585e-06, "loss": 0.055, "step": 33413 }, { "epoch": 0.7362871638929745, "grad_norm": 0.721943736076355, "learning_rate": 5.146234250063319e-06, "loss": 0.0743, "step": 33414 }, { "epoch": 0.7363091991824907, "grad_norm": 0.7199335694313049, "learning_rate": 5.145427131989101e-06, "loss": 0.0386, "step": 33415 }, { "epoch": 0.7363312344720069, "grad_norm": 0.9415558576583862, "learning_rate": 5.144620064109501e-06, "loss": 0.0704, "step": 33416 }, { "epoch": 0.736353269761523, "grad_norm": 0.7273808717727661, "learning_rate": 5.143813046428634e-06, "loss": 0.0772, "step": 33417 }, { "epoch": 0.7363753050510392, "grad_norm": 0.48745882511138916, "learning_rate": 5.143006078950619e-06, "loss": 0.0684, "step": 33418 }, { "epoch": 0.7363973403405554, "grad_norm": 0.5856011509895325, "learning_rate": 5.142199161679555e-06, "loss": 0.0604, "step": 33419 }, { "epoch": 0.7364193756300715, "grad_norm": 0.7073507905006409, "learning_rate": 5.141392294619558e-06, "loss": 0.0586, "step": 33420 }, { "epoch": 0.7364414109195877, "grad_norm": 0.6149210929870605, "learning_rate": 5.140585477774734e-06, "loss": 0.0589, "step": 33421 }, { "epoch": 0.7364634462091039, "grad_norm": 0.3666577935218811, "learning_rate": 5.139778711149201e-06, "loss": 0.0532, "step": 33422 }, { "epoch": 0.73648548149862, "grad_norm": 0.7023029327392578, "learning_rate": 5.138971994747059e-06, "loss": 0.0708, "step": 33423 }, { "epoch": 0.7365075167881362, "grad_norm": 0.6475362181663513, "learning_rate": 5.1381653285724196e-06, "loss": 0.0664, "step": 33424 }, { "epoch": 0.7365295520776524, "grad_norm": 0.6459877490997314, "learning_rate": 5.1373587126293974e-06, "loss": 0.0457, "step": 33425 }, { "epoch": 0.7365515873671685, "grad_norm": 0.6410525441169739, "learning_rate": 5.136552146922091e-06, "loss": 0.0702, "step": 33426 }, { "epoch": 0.7365736226566847, "grad_norm": 0.7491334676742554, "learning_rate": 5.1357456314546134e-06, "loss": 0.0589, "step": 33427 }, { "epoch": 0.7365956579462009, "grad_norm": 0.7774807810783386, "learning_rate": 5.134939166231074e-06, "loss": 0.0711, "step": 33428 }, { "epoch": 0.736617693235717, "grad_norm": 0.25122934579849243, "learning_rate": 5.1341327512555825e-06, "loss": 0.0438, "step": 33429 }, { "epoch": 0.7366397285252332, "grad_norm": 0.8870503902435303, "learning_rate": 5.13332638653224e-06, "loss": 0.0654, "step": 33430 }, { "epoch": 0.7366617638147493, "grad_norm": 0.7330922484397888, "learning_rate": 5.132520072065156e-06, "loss": 0.065, "step": 33431 }, { "epoch": 0.7366837991042655, "grad_norm": 0.7364000082015991, "learning_rate": 5.131713807858444e-06, "loss": 0.1007, "step": 33432 }, { "epoch": 0.7367058343937817, "grad_norm": 1.0754015445709229, "learning_rate": 5.130907593916202e-06, "loss": 0.0839, "step": 33433 }, { "epoch": 0.7367278696832978, "grad_norm": 0.4404895305633545, "learning_rate": 5.130101430242543e-06, "loss": 0.043, "step": 33434 }, { "epoch": 0.736749904972814, "grad_norm": 0.4899119436740875, "learning_rate": 5.129295316841561e-06, "loss": 0.0639, "step": 33435 }, { "epoch": 0.7367719402623302, "grad_norm": 0.5620849132537842, "learning_rate": 5.128489253717381e-06, "loss": 0.0708, "step": 33436 }, { "epoch": 0.7367939755518463, "grad_norm": 0.5840622782707214, "learning_rate": 5.1276832408740925e-06, "loss": 0.0768, "step": 33437 }, { "epoch": 0.7368160108413624, "grad_norm": 0.46654626727104187, "learning_rate": 5.126877278315814e-06, "loss": 0.0573, "step": 33438 }, { "epoch": 0.7368380461308786, "grad_norm": 0.4876028299331665, "learning_rate": 5.126071366046637e-06, "loss": 0.0674, "step": 33439 }, { "epoch": 0.7368600814203947, "grad_norm": 0.7098669409751892, "learning_rate": 5.125265504070676e-06, "loss": 0.0403, "step": 33440 }, { "epoch": 0.7368821167099109, "grad_norm": 0.4159441888332367, "learning_rate": 5.124459692392035e-06, "loss": 0.0576, "step": 33441 }, { "epoch": 0.736904151999427, "grad_norm": 0.3788582384586334, "learning_rate": 5.123653931014813e-06, "loss": 0.0789, "step": 33442 }, { "epoch": 0.7369261872889432, "grad_norm": 0.7754336595535278, "learning_rate": 5.1228482199431195e-06, "loss": 0.0569, "step": 33443 }, { "epoch": 0.7369482225784594, "grad_norm": 0.5513843894004822, "learning_rate": 5.1220425591810535e-06, "loss": 0.0767, "step": 33444 }, { "epoch": 0.7369702578679755, "grad_norm": 0.5026208758354187, "learning_rate": 5.121236948732728e-06, "loss": 0.0494, "step": 33445 }, { "epoch": 0.7369922931574917, "grad_norm": 0.97376549243927, "learning_rate": 5.120431388602235e-06, "loss": 0.0808, "step": 33446 }, { "epoch": 0.7370143284470079, "grad_norm": 0.576545000076294, "learning_rate": 5.119625878793683e-06, "loss": 0.0487, "step": 33447 }, { "epoch": 0.737036363736524, "grad_norm": 0.5632939338684082, "learning_rate": 5.11882041931118e-06, "loss": 0.0746, "step": 33448 }, { "epoch": 0.7370583990260402, "grad_norm": 0.849682629108429, "learning_rate": 5.118015010158817e-06, "loss": 0.0758, "step": 33449 }, { "epoch": 0.7370804343155564, "grad_norm": 1.2198498249053955, "learning_rate": 5.117209651340704e-06, "loss": 0.0882, "step": 33450 }, { "epoch": 0.7371024696050725, "grad_norm": 0.949770450592041, "learning_rate": 5.116404342860942e-06, "loss": 0.0809, "step": 33451 }, { "epoch": 0.7371245048945887, "grad_norm": 0.2812678813934326, "learning_rate": 5.115599084723636e-06, "loss": 0.0519, "step": 33452 }, { "epoch": 0.7371465401841049, "grad_norm": 0.6968176960945129, "learning_rate": 5.11479387693288e-06, "loss": 0.0815, "step": 33453 }, { "epoch": 0.737168575473621, "grad_norm": 0.7018604874610901, "learning_rate": 5.11398871949278e-06, "loss": 0.0667, "step": 33454 }, { "epoch": 0.7371906107631372, "grad_norm": 0.6144158840179443, "learning_rate": 5.113183612407441e-06, "loss": 0.0619, "step": 33455 }, { "epoch": 0.7372126460526534, "grad_norm": 0.46530231833457947, "learning_rate": 5.1123785556809575e-06, "loss": 0.0468, "step": 33456 }, { "epoch": 0.7372346813421695, "grad_norm": 0.6022226214408875, "learning_rate": 5.111573549317433e-06, "loss": 0.0661, "step": 33457 }, { "epoch": 0.7372567166316857, "grad_norm": 0.6917552351951599, "learning_rate": 5.1107685933209596e-06, "loss": 0.0696, "step": 33458 }, { "epoch": 0.7372787519212018, "grad_norm": 0.5703604221343994, "learning_rate": 5.109963687695655e-06, "loss": 0.0831, "step": 33459 }, { "epoch": 0.737300787210718, "grad_norm": 0.7451227307319641, "learning_rate": 5.109158832445603e-06, "loss": 0.0664, "step": 33460 }, { "epoch": 0.7373228225002342, "grad_norm": 0.36127400398254395, "learning_rate": 5.108354027574913e-06, "loss": 0.0652, "step": 33461 }, { "epoch": 0.7373448577897502, "grad_norm": 0.568995475769043, "learning_rate": 5.107549273087677e-06, "loss": 0.0738, "step": 33462 }, { "epoch": 0.7373668930792664, "grad_norm": 0.5815040469169617, "learning_rate": 5.106744568987996e-06, "loss": 0.0742, "step": 33463 }, { "epoch": 0.7373889283687826, "grad_norm": 0.9202132225036621, "learning_rate": 5.105939915279976e-06, "loss": 0.096, "step": 33464 }, { "epoch": 0.7374109636582987, "grad_norm": 0.735821545124054, "learning_rate": 5.105135311967705e-06, "loss": 0.0614, "step": 33465 }, { "epoch": 0.7374329989478149, "grad_norm": 0.6669020652770996, "learning_rate": 5.104330759055284e-06, "loss": 0.0576, "step": 33466 }, { "epoch": 0.737455034237331, "grad_norm": 0.4905705749988556, "learning_rate": 5.103526256546815e-06, "loss": 0.0637, "step": 33467 }, { "epoch": 0.7374770695268472, "grad_norm": 0.4125168025493622, "learning_rate": 5.1027218044463976e-06, "loss": 0.0386, "step": 33468 }, { "epoch": 0.7374991048163634, "grad_norm": 0.6753233075141907, "learning_rate": 5.10191740275812e-06, "loss": 0.0702, "step": 33469 }, { "epoch": 0.7375211401058795, "grad_norm": 0.31325763463974, "learning_rate": 5.101113051486085e-06, "loss": 0.0403, "step": 33470 }, { "epoch": 0.7375431753953957, "grad_norm": 0.8636093735694885, "learning_rate": 5.100308750634395e-06, "loss": 0.0654, "step": 33471 }, { "epoch": 0.7375652106849119, "grad_norm": 0.7046788334846497, "learning_rate": 5.099504500207135e-06, "loss": 0.053, "step": 33472 }, { "epoch": 0.737587245974428, "grad_norm": 0.9111437201499939, "learning_rate": 5.098700300208409e-06, "loss": 0.0731, "step": 33473 }, { "epoch": 0.7376092812639442, "grad_norm": 0.4639087915420532, "learning_rate": 5.097896150642312e-06, "loss": 0.0532, "step": 33474 }, { "epoch": 0.7376313165534604, "grad_norm": 0.2781970500946045, "learning_rate": 5.097092051512944e-06, "loss": 0.0341, "step": 33475 }, { "epoch": 0.7376533518429765, "grad_norm": 0.6103026866912842, "learning_rate": 5.096288002824393e-06, "loss": 0.0724, "step": 33476 }, { "epoch": 0.7376753871324927, "grad_norm": 0.3076574206352234, "learning_rate": 5.095484004580756e-06, "loss": 0.0544, "step": 33477 }, { "epoch": 0.7376974224220089, "grad_norm": 0.6874740123748779, "learning_rate": 5.0946800567861365e-06, "loss": 0.071, "step": 33478 }, { "epoch": 0.737719457711525, "grad_norm": 0.3152591586112976, "learning_rate": 5.093876159444616e-06, "loss": 0.054, "step": 33479 }, { "epoch": 0.7377414930010412, "grad_norm": 0.7171990871429443, "learning_rate": 5.093072312560304e-06, "loss": 0.0674, "step": 33480 }, { "epoch": 0.7377635282905574, "grad_norm": 0.5781530141830444, "learning_rate": 5.092268516137275e-06, "loss": 0.0684, "step": 33481 }, { "epoch": 0.7377855635800735, "grad_norm": 0.7770044207572937, "learning_rate": 5.091464770179648e-06, "loss": 0.0707, "step": 33482 }, { "epoch": 0.7378075988695897, "grad_norm": 0.7637043595314026, "learning_rate": 5.090661074691497e-06, "loss": 0.082, "step": 33483 }, { "epoch": 0.7378296341591059, "grad_norm": 0.6396543383598328, "learning_rate": 5.08985742967693e-06, "loss": 0.0592, "step": 33484 }, { "epoch": 0.737851669448622, "grad_norm": 0.5997832417488098, "learning_rate": 5.089053835140027e-06, "loss": 0.0503, "step": 33485 }, { "epoch": 0.7378737047381382, "grad_norm": 0.7231014966964722, "learning_rate": 5.088250291084887e-06, "loss": 0.0595, "step": 33486 }, { "epoch": 0.7378957400276542, "grad_norm": 0.629055917263031, "learning_rate": 5.087446797515609e-06, "loss": 0.0592, "step": 33487 }, { "epoch": 0.7379177753171704, "grad_norm": 0.5009335279464722, "learning_rate": 5.086643354436276e-06, "loss": 0.0648, "step": 33488 }, { "epoch": 0.7379398106066866, "grad_norm": 0.46291324496269226, "learning_rate": 5.085839961850984e-06, "loss": 0.0615, "step": 33489 }, { "epoch": 0.7379618458962027, "grad_norm": 0.31164297461509705, "learning_rate": 5.085036619763824e-06, "loss": 0.0335, "step": 33490 }, { "epoch": 0.7379838811857189, "grad_norm": 0.6738730669021606, "learning_rate": 5.084233328178894e-06, "loss": 0.0885, "step": 33491 }, { "epoch": 0.7380059164752351, "grad_norm": 0.946364164352417, "learning_rate": 5.083430087100278e-06, "loss": 0.0485, "step": 33492 }, { "epoch": 0.7380279517647512, "grad_norm": 0.515652596950531, "learning_rate": 5.082626896532068e-06, "loss": 0.0386, "step": 33493 }, { "epoch": 0.7380499870542674, "grad_norm": 0.5629586577415466, "learning_rate": 5.081823756478365e-06, "loss": 0.0384, "step": 33494 }, { "epoch": 0.7380720223437836, "grad_norm": 0.5495250225067139, "learning_rate": 5.081020666943245e-06, "loss": 0.0573, "step": 33495 }, { "epoch": 0.7380940576332997, "grad_norm": 0.5400803685188293, "learning_rate": 5.0802176279308085e-06, "loss": 0.0425, "step": 33496 }, { "epoch": 0.7381160929228159, "grad_norm": 0.6207651495933533, "learning_rate": 5.079414639445141e-06, "loss": 0.061, "step": 33497 }, { "epoch": 0.738138128212332, "grad_norm": 0.559287965297699, "learning_rate": 5.07861170149034e-06, "loss": 0.0433, "step": 33498 }, { "epoch": 0.7381601635018482, "grad_norm": 0.5211800932884216, "learning_rate": 5.077808814070482e-06, "loss": 0.0664, "step": 33499 }, { "epoch": 0.7381821987913644, "grad_norm": 0.6502419710159302, "learning_rate": 5.077005977189669e-06, "loss": 0.0521, "step": 33500 }, { "epoch": 0.7382042340808805, "grad_norm": 0.4650515019893646, "learning_rate": 5.076203190851988e-06, "loss": 0.0504, "step": 33501 }, { "epoch": 0.7382262693703967, "grad_norm": 1.0576375722885132, "learning_rate": 5.075400455061522e-06, "loss": 0.0974, "step": 33502 }, { "epoch": 0.7382483046599129, "grad_norm": 0.7504174709320068, "learning_rate": 5.074597769822367e-06, "loss": 0.053, "step": 33503 }, { "epoch": 0.738270339949429, "grad_norm": 0.4861263036727905, "learning_rate": 5.0737951351385994e-06, "loss": 0.0873, "step": 33504 }, { "epoch": 0.7382923752389452, "grad_norm": 0.33428850769996643, "learning_rate": 5.072992551014324e-06, "loss": 0.0631, "step": 33505 }, { "epoch": 0.7383144105284614, "grad_norm": 0.9294875264167786, "learning_rate": 5.0721900174536175e-06, "loss": 0.0771, "step": 33506 }, { "epoch": 0.7383364458179775, "grad_norm": 0.5949575901031494, "learning_rate": 5.071387534460577e-06, "loss": 0.0672, "step": 33507 }, { "epoch": 0.7383584811074937, "grad_norm": 0.5967565178871155, "learning_rate": 5.0705851020392716e-06, "loss": 0.0587, "step": 33508 }, { "epoch": 0.7383805163970099, "grad_norm": 0.994289219379425, "learning_rate": 5.069782720193812e-06, "loss": 0.0832, "step": 33509 }, { "epoch": 0.738402551686526, "grad_norm": 0.4210908114910126, "learning_rate": 5.06898038892827e-06, "loss": 0.0794, "step": 33510 }, { "epoch": 0.7384245869760422, "grad_norm": 0.8486480712890625, "learning_rate": 5.06817810824674e-06, "loss": 0.0754, "step": 33511 }, { "epoch": 0.7384466222655582, "grad_norm": 0.619455873966217, "learning_rate": 5.067375878153299e-06, "loss": 0.0465, "step": 33512 }, { "epoch": 0.7384686575550744, "grad_norm": 0.6307885646820068, "learning_rate": 5.066573698652041e-06, "loss": 0.0456, "step": 33513 }, { "epoch": 0.7384906928445906, "grad_norm": 0.5105963945388794, "learning_rate": 5.065771569747054e-06, "loss": 0.0581, "step": 33514 }, { "epoch": 0.7385127281341067, "grad_norm": 0.8449806571006775, "learning_rate": 5.064969491442408e-06, "loss": 0.0871, "step": 33515 }, { "epoch": 0.7385347634236229, "grad_norm": 0.5712602138519287, "learning_rate": 5.064167463742212e-06, "loss": 0.0789, "step": 33516 }, { "epoch": 0.7385567987131391, "grad_norm": 0.7859227061271667, "learning_rate": 5.063365486650533e-06, "loss": 0.0667, "step": 33517 }, { "epoch": 0.7385788340026552, "grad_norm": 0.7001278400421143, "learning_rate": 5.062563560171468e-06, "loss": 0.0584, "step": 33518 }, { "epoch": 0.7386008692921714, "grad_norm": 0.4096755087375641, "learning_rate": 5.061761684309091e-06, "loss": 0.0459, "step": 33519 }, { "epoch": 0.7386229045816876, "grad_norm": 0.262408047914505, "learning_rate": 5.0609598590674896e-06, "loss": 0.0546, "step": 33520 }, { "epoch": 0.7386449398712037, "grad_norm": 0.5390611290931702, "learning_rate": 5.060158084450756e-06, "loss": 0.0479, "step": 33521 }, { "epoch": 0.7386669751607199, "grad_norm": 0.5682088136672974, "learning_rate": 5.05935636046296e-06, "loss": 0.0715, "step": 33522 }, { "epoch": 0.738689010450236, "grad_norm": 0.42114222049713135, "learning_rate": 5.058554687108195e-06, "loss": 0.0512, "step": 33523 }, { "epoch": 0.7387110457397522, "grad_norm": 0.4954730272293091, "learning_rate": 5.057753064390541e-06, "loss": 0.0687, "step": 33524 }, { "epoch": 0.7387330810292684, "grad_norm": 0.5604771375656128, "learning_rate": 5.056951492314086e-06, "loss": 0.0556, "step": 33525 }, { "epoch": 0.7387551163187845, "grad_norm": 0.4916173219680786, "learning_rate": 5.056149970882905e-06, "loss": 0.0635, "step": 33526 }, { "epoch": 0.7387771516083007, "grad_norm": 0.6529445648193359, "learning_rate": 5.055348500101085e-06, "loss": 0.0443, "step": 33527 }, { "epoch": 0.7387991868978169, "grad_norm": 0.38821864128112793, "learning_rate": 5.054547079972713e-06, "loss": 0.0549, "step": 33528 }, { "epoch": 0.738821222187333, "grad_norm": 0.430369108915329, "learning_rate": 5.05374571050186e-06, "loss": 0.0654, "step": 33529 }, { "epoch": 0.7388432574768492, "grad_norm": 0.7354964017868042, "learning_rate": 5.052944391692619e-06, "loss": 0.0924, "step": 33530 }, { "epoch": 0.7388652927663654, "grad_norm": 0.5761250853538513, "learning_rate": 5.052143123549057e-06, "loss": 0.0415, "step": 33531 }, { "epoch": 0.7388873280558815, "grad_norm": 0.6930521130561829, "learning_rate": 5.051341906075274e-06, "loss": 0.0569, "step": 33532 }, { "epoch": 0.7389093633453977, "grad_norm": 0.8942204713821411, "learning_rate": 5.050540739275335e-06, "loss": 0.0607, "step": 33533 }, { "epoch": 0.7389313986349139, "grad_norm": 0.5980067253112793, "learning_rate": 5.0497396231533345e-06, "loss": 0.0366, "step": 33534 }, { "epoch": 0.73895343392443, "grad_norm": 0.7194321155548096, "learning_rate": 5.04893855771334e-06, "loss": 0.0659, "step": 33535 }, { "epoch": 0.7389754692139462, "grad_norm": 0.7276974320411682, "learning_rate": 5.048137542959438e-06, "loss": 0.0628, "step": 33536 }, { "epoch": 0.7389975045034622, "grad_norm": 0.5561928153038025, "learning_rate": 5.047336578895713e-06, "loss": 0.0849, "step": 33537 }, { "epoch": 0.7390195397929784, "grad_norm": 0.7140436172485352, "learning_rate": 5.046535665526229e-06, "loss": 0.0587, "step": 33538 }, { "epoch": 0.7390415750824946, "grad_norm": 0.6004961133003235, "learning_rate": 5.045734802855086e-06, "loss": 0.0719, "step": 33539 }, { "epoch": 0.7390636103720107, "grad_norm": 0.7760924696922302, "learning_rate": 5.044933990886351e-06, "loss": 0.0993, "step": 33540 }, { "epoch": 0.7390856456615269, "grad_norm": 0.9217644929885864, "learning_rate": 5.044133229624109e-06, "loss": 0.0855, "step": 33541 }, { "epoch": 0.7391076809510431, "grad_norm": 0.5365381836891174, "learning_rate": 5.043332519072429e-06, "loss": 0.0769, "step": 33542 }, { "epoch": 0.7391297162405592, "grad_norm": 0.7179238796234131, "learning_rate": 5.042531859235397e-06, "loss": 0.0484, "step": 33543 }, { "epoch": 0.7391517515300754, "grad_norm": 0.45369887351989746, "learning_rate": 5.041731250117096e-06, "loss": 0.0397, "step": 33544 }, { "epoch": 0.7391737868195916, "grad_norm": 0.6498338580131531, "learning_rate": 5.0409306917215915e-06, "loss": 0.0687, "step": 33545 }, { "epoch": 0.7391958221091077, "grad_norm": 0.3058509826660156, "learning_rate": 5.040130184052967e-06, "loss": 0.047, "step": 33546 }, { "epoch": 0.7392178573986239, "grad_norm": 0.7472463250160217, "learning_rate": 5.039329727115301e-06, "loss": 0.0793, "step": 33547 }, { "epoch": 0.73923989268814, "grad_norm": 0.8605637550354004, "learning_rate": 5.038529320912676e-06, "loss": 0.0774, "step": 33548 }, { "epoch": 0.7392619279776562, "grad_norm": 0.5832164287567139, "learning_rate": 5.037728965449155e-06, "loss": 0.0716, "step": 33549 }, { "epoch": 0.7392839632671724, "grad_norm": 0.4758276641368866, "learning_rate": 5.036928660728826e-06, "loss": 0.035, "step": 33550 }, { "epoch": 0.7393059985566885, "grad_norm": 0.7922165393829346, "learning_rate": 5.036128406755765e-06, "loss": 0.086, "step": 33551 }, { "epoch": 0.7393280338462047, "grad_norm": 0.6102539896965027, "learning_rate": 5.0353282035340405e-06, "loss": 0.0552, "step": 33552 }, { "epoch": 0.7393500691357209, "grad_norm": 0.45535972714424133, "learning_rate": 5.034528051067737e-06, "loss": 0.0428, "step": 33553 }, { "epoch": 0.739372104425237, "grad_norm": 0.4750058352947235, "learning_rate": 5.033727949360918e-06, "loss": 0.0739, "step": 33554 }, { "epoch": 0.7393941397147532, "grad_norm": 0.43268007040023804, "learning_rate": 5.032927898417676e-06, "loss": 0.0503, "step": 33555 }, { "epoch": 0.7394161750042694, "grad_norm": 0.7526571750640869, "learning_rate": 5.032127898242071e-06, "loss": 0.0756, "step": 33556 }, { "epoch": 0.7394382102937855, "grad_norm": 0.31346365809440613, "learning_rate": 5.031327948838185e-06, "loss": 0.0366, "step": 33557 }, { "epoch": 0.7394602455833017, "grad_norm": 0.4402133822441101, "learning_rate": 5.030528050210097e-06, "loss": 0.0675, "step": 33558 }, { "epoch": 0.7394822808728179, "grad_norm": 0.8099681735038757, "learning_rate": 5.029728202361869e-06, "loss": 0.0666, "step": 33559 }, { "epoch": 0.739504316162334, "grad_norm": 0.754412055015564, "learning_rate": 5.028928405297588e-06, "loss": 0.0588, "step": 33560 }, { "epoch": 0.7395263514518501, "grad_norm": 0.5255905389785767, "learning_rate": 5.028128659021311e-06, "loss": 0.0601, "step": 33561 }, { "epoch": 0.7395483867413662, "grad_norm": 0.1078692302107811, "learning_rate": 5.027328963537132e-06, "loss": 0.0641, "step": 33562 }, { "epoch": 0.7395704220308824, "grad_norm": 0.8102102875709534, "learning_rate": 5.02652931884911e-06, "loss": 0.0588, "step": 33563 }, { "epoch": 0.7395924573203986, "grad_norm": 0.9104576706886292, "learning_rate": 5.025729724961326e-06, "loss": 0.0796, "step": 33564 }, { "epoch": 0.7396144926099147, "grad_norm": 0.9595080018043518, "learning_rate": 5.024930181877845e-06, "loss": 0.0837, "step": 33565 }, { "epoch": 0.7396365278994309, "grad_norm": 0.3230021893978119, "learning_rate": 5.0241306896027424e-06, "loss": 0.0571, "step": 33566 }, { "epoch": 0.7396585631889471, "grad_norm": 0.5159747004508972, "learning_rate": 5.023331248140099e-06, "loss": 0.0485, "step": 33567 }, { "epoch": 0.7396805984784632, "grad_norm": 0.7125446200370789, "learning_rate": 5.022531857493972e-06, "loss": 0.0849, "step": 33568 }, { "epoch": 0.7397026337679794, "grad_norm": 0.7534168362617493, "learning_rate": 5.021732517668443e-06, "loss": 0.0846, "step": 33569 }, { "epoch": 0.7397246690574956, "grad_norm": 0.5111914873123169, "learning_rate": 5.02093322866758e-06, "loss": 0.0315, "step": 33570 }, { "epoch": 0.7397467043470117, "grad_norm": 0.46393871307373047, "learning_rate": 5.02013399049546e-06, "loss": 0.0641, "step": 33571 }, { "epoch": 0.7397687396365279, "grad_norm": 0.5768125057220459, "learning_rate": 5.019334803156146e-06, "loss": 0.0602, "step": 33572 }, { "epoch": 0.7397907749260441, "grad_norm": 0.8048369884490967, "learning_rate": 5.018535666653709e-06, "loss": 0.1064, "step": 33573 }, { "epoch": 0.7398128102155602, "grad_norm": 0.17669770121574402, "learning_rate": 5.017736580992229e-06, "loss": 0.0642, "step": 33574 }, { "epoch": 0.7398348455050764, "grad_norm": 0.9365371465682983, "learning_rate": 5.016937546175765e-06, "loss": 0.0718, "step": 33575 }, { "epoch": 0.7398568807945926, "grad_norm": 0.9085856676101685, "learning_rate": 5.016138562208396e-06, "loss": 0.0734, "step": 33576 }, { "epoch": 0.7398789160841087, "grad_norm": 0.3031107187271118, "learning_rate": 5.015339629094175e-06, "loss": 0.0595, "step": 33577 }, { "epoch": 0.7399009513736249, "grad_norm": 0.7298261523246765, "learning_rate": 5.014540746837197e-06, "loss": 0.0472, "step": 33578 }, { "epoch": 0.739922986663141, "grad_norm": 0.37241214513778687, "learning_rate": 5.01374191544151e-06, "loss": 0.0471, "step": 33579 }, { "epoch": 0.7399450219526572, "grad_norm": 0.5451434254646301, "learning_rate": 5.012943134911191e-06, "loss": 0.0616, "step": 33580 }, { "epoch": 0.7399670572421734, "grad_norm": 0.5004953145980835, "learning_rate": 5.012144405250313e-06, "loss": 0.0506, "step": 33581 }, { "epoch": 0.7399890925316895, "grad_norm": 0.3986909091472626, "learning_rate": 5.0113457264629355e-06, "loss": 0.0475, "step": 33582 }, { "epoch": 0.7400111278212057, "grad_norm": 0.5665118098258972, "learning_rate": 5.0105470985531355e-06, "loss": 0.0585, "step": 33583 }, { "epoch": 0.7400331631107219, "grad_norm": 0.5569437742233276, "learning_rate": 5.009748521524965e-06, "loss": 0.0451, "step": 33584 }, { "epoch": 0.740055198400238, "grad_norm": 0.5196760892868042, "learning_rate": 5.008949995382513e-06, "loss": 0.0511, "step": 33585 }, { "epoch": 0.7400772336897541, "grad_norm": 0.6515592336654663, "learning_rate": 5.008151520129831e-06, "loss": 0.0676, "step": 33586 }, { "epoch": 0.7400992689792703, "grad_norm": 0.5741866827011108, "learning_rate": 5.007353095770997e-06, "loss": 0.0576, "step": 33587 }, { "epoch": 0.7401213042687864, "grad_norm": 0.8988909721374512, "learning_rate": 5.006554722310067e-06, "loss": 0.0945, "step": 33588 }, { "epoch": 0.7401433395583026, "grad_norm": 0.8413577079772949, "learning_rate": 5.005756399751111e-06, "loss": 0.0701, "step": 33589 }, { "epoch": 0.7401653748478187, "grad_norm": 0.5368834137916565, "learning_rate": 5.004958128098204e-06, "loss": 0.0728, "step": 33590 }, { "epoch": 0.7401874101373349, "grad_norm": 0.6217118501663208, "learning_rate": 5.004159907355399e-06, "loss": 0.0529, "step": 33591 }, { "epoch": 0.7402094454268511, "grad_norm": 0.4308238923549652, "learning_rate": 5.003361737526768e-06, "loss": 0.0485, "step": 33592 }, { "epoch": 0.7402314807163672, "grad_norm": 0.6872395277023315, "learning_rate": 5.002563618616377e-06, "loss": 0.0807, "step": 33593 }, { "epoch": 0.7402535160058834, "grad_norm": 0.5922868847846985, "learning_rate": 5.001765550628294e-06, "loss": 0.0691, "step": 33594 }, { "epoch": 0.7402755512953996, "grad_norm": 0.5156309604644775, "learning_rate": 5.000967533566577e-06, "loss": 0.065, "step": 33595 }, { "epoch": 0.7402975865849157, "grad_norm": 0.3899196982383728, "learning_rate": 5.000169567435294e-06, "loss": 0.0538, "step": 33596 }, { "epoch": 0.7403196218744319, "grad_norm": 0.5896218419075012, "learning_rate": 4.9993716522385135e-06, "loss": 0.0383, "step": 33597 }, { "epoch": 0.7403416571639481, "grad_norm": 0.4044354557991028, "learning_rate": 4.998573787980291e-06, "loss": 0.0356, "step": 33598 }, { "epoch": 0.7403636924534642, "grad_norm": 0.6963140368461609, "learning_rate": 4.997775974664703e-06, "loss": 0.0518, "step": 33599 }, { "epoch": 0.7403857277429804, "grad_norm": 0.4960790276527405, "learning_rate": 4.996978212295793e-06, "loss": 0.0403, "step": 33600 }, { "epoch": 0.7404077630324966, "grad_norm": 0.5673540234565735, "learning_rate": 4.996180500877649e-06, "loss": 0.0649, "step": 33601 }, { "epoch": 0.7404297983220127, "grad_norm": 0.7633129954338074, "learning_rate": 4.995382840414316e-06, "loss": 0.0599, "step": 33602 }, { "epoch": 0.7404518336115289, "grad_norm": 0.805720865726471, "learning_rate": 4.994585230909863e-06, "loss": 0.0865, "step": 33603 }, { "epoch": 0.740473868901045, "grad_norm": 0.593948483467102, "learning_rate": 4.993787672368358e-06, "loss": 0.0541, "step": 33604 }, { "epoch": 0.7404959041905612, "grad_norm": 0.5570946335792542, "learning_rate": 4.9929901647938535e-06, "loss": 0.0371, "step": 33605 }, { "epoch": 0.7405179394800774, "grad_norm": 0.40208959579467773, "learning_rate": 4.99219270819042e-06, "loss": 0.0524, "step": 33606 }, { "epoch": 0.7405399747695935, "grad_norm": 0.6336370706558228, "learning_rate": 4.9913953025621075e-06, "loss": 0.0614, "step": 33607 }, { "epoch": 0.7405620100591097, "grad_norm": 0.6433587074279785, "learning_rate": 4.9905979479129935e-06, "loss": 0.0558, "step": 33608 }, { "epoch": 0.7405840453486259, "grad_norm": 0.560309648513794, "learning_rate": 4.989800644247128e-06, "loss": 0.0734, "step": 33609 }, { "epoch": 0.740606080638142, "grad_norm": 0.28736579418182373, "learning_rate": 4.989003391568581e-06, "loss": 0.0469, "step": 33610 }, { "epoch": 0.7406281159276581, "grad_norm": 0.36504238843917847, "learning_rate": 4.988206189881397e-06, "loss": 0.0514, "step": 33611 }, { "epoch": 0.7406501512171743, "grad_norm": 0.5100148916244507, "learning_rate": 4.9874090391896585e-06, "loss": 0.0416, "step": 33612 }, { "epoch": 0.7406721865066904, "grad_norm": 0.880773663520813, "learning_rate": 4.986611939497415e-06, "loss": 0.0464, "step": 33613 }, { "epoch": 0.7406942217962066, "grad_norm": 0.48714178800582886, "learning_rate": 4.98581489080872e-06, "loss": 0.0874, "step": 33614 }, { "epoch": 0.7407162570857228, "grad_norm": 0.42108532786369324, "learning_rate": 4.985017893127642e-06, "loss": 0.0442, "step": 33615 }, { "epoch": 0.7407382923752389, "grad_norm": 0.638553261756897, "learning_rate": 4.984220946458236e-06, "loss": 0.0567, "step": 33616 }, { "epoch": 0.7407603276647551, "grad_norm": 0.6008275747299194, "learning_rate": 4.983424050804571e-06, "loss": 0.0735, "step": 33617 }, { "epoch": 0.7407823629542712, "grad_norm": 0.4499002993106842, "learning_rate": 4.9826272061706926e-06, "loss": 0.068, "step": 33618 }, { "epoch": 0.7408043982437874, "grad_norm": 0.6178157329559326, "learning_rate": 4.9818304125606664e-06, "loss": 0.0562, "step": 33619 }, { "epoch": 0.7408264335333036, "grad_norm": 0.6439237594604492, "learning_rate": 4.981033669978548e-06, "loss": 0.0881, "step": 33620 }, { "epoch": 0.7408484688228197, "grad_norm": 0.4868532419204712, "learning_rate": 4.980236978428405e-06, "loss": 0.0482, "step": 33621 }, { "epoch": 0.7408705041123359, "grad_norm": 0.19231650233268738, "learning_rate": 4.979440337914281e-06, "loss": 0.0888, "step": 33622 }, { "epoch": 0.7408925394018521, "grad_norm": 0.7075595855712891, "learning_rate": 4.978643748440243e-06, "loss": 0.0676, "step": 33623 }, { "epoch": 0.7409145746913682, "grad_norm": 0.3993929326534271, "learning_rate": 4.97784721001035e-06, "loss": 0.0467, "step": 33624 }, { "epoch": 0.7409366099808844, "grad_norm": 0.31102728843688965, "learning_rate": 4.977050722628651e-06, "loss": 0.0905, "step": 33625 }, { "epoch": 0.7409586452704006, "grad_norm": 0.7547758221626282, "learning_rate": 4.976254286299206e-06, "loss": 0.0556, "step": 33626 }, { "epoch": 0.7409806805599167, "grad_norm": 0.7303404211997986, "learning_rate": 4.975457901026073e-06, "loss": 0.0755, "step": 33627 }, { "epoch": 0.7410027158494329, "grad_norm": 0.15402106940746307, "learning_rate": 4.974661566813315e-06, "loss": 0.0274, "step": 33628 }, { "epoch": 0.7410247511389491, "grad_norm": 0.9332618117332458, "learning_rate": 4.973865283664976e-06, "loss": 0.0686, "step": 33629 }, { "epoch": 0.7410467864284652, "grad_norm": 0.9041937589645386, "learning_rate": 4.973069051585119e-06, "loss": 0.0885, "step": 33630 }, { "epoch": 0.7410688217179814, "grad_norm": 0.35286271572113037, "learning_rate": 4.972272870577802e-06, "loss": 0.0488, "step": 33631 }, { "epoch": 0.7410908570074975, "grad_norm": 0.5722730755805969, "learning_rate": 4.97147674064707e-06, "loss": 0.0781, "step": 33632 }, { "epoch": 0.7411128922970137, "grad_norm": 0.8593286275863647, "learning_rate": 4.970680661796992e-06, "loss": 0.0647, "step": 33633 }, { "epoch": 0.7411349275865299, "grad_norm": 0.7639630436897278, "learning_rate": 4.969884634031604e-06, "loss": 0.0892, "step": 33634 }, { "epoch": 0.7411569628760459, "grad_norm": 0.20970991253852844, "learning_rate": 4.969088657354983e-06, "loss": 0.0565, "step": 33635 }, { "epoch": 0.7411789981655621, "grad_norm": 0.5118655562400818, "learning_rate": 4.968292731771167e-06, "loss": 0.0573, "step": 33636 }, { "epoch": 0.7412010334550783, "grad_norm": 0.6182146072387695, "learning_rate": 4.967496857284221e-06, "loss": 0.0535, "step": 33637 }, { "epoch": 0.7412230687445944, "grad_norm": 0.5230509638786316, "learning_rate": 4.9667010338981896e-06, "loss": 0.0561, "step": 33638 }, { "epoch": 0.7412451040341106, "grad_norm": 0.8742294907569885, "learning_rate": 4.9659052616171275e-06, "loss": 0.0773, "step": 33639 }, { "epoch": 0.7412671393236268, "grad_norm": 0.4902589023113251, "learning_rate": 4.965109540445098e-06, "loss": 0.056, "step": 33640 }, { "epoch": 0.7412891746131429, "grad_norm": 0.7618261575698853, "learning_rate": 4.964313870386139e-06, "loss": 0.0778, "step": 33641 }, { "epoch": 0.7413112099026591, "grad_norm": 0.39873382449150085, "learning_rate": 4.963518251444314e-06, "loss": 0.0467, "step": 33642 }, { "epoch": 0.7413332451921752, "grad_norm": 0.32407867908477783, "learning_rate": 4.9627226836236705e-06, "loss": 0.0637, "step": 33643 }, { "epoch": 0.7413552804816914, "grad_norm": 0.46908602118492126, "learning_rate": 4.961927166928268e-06, "loss": 0.0592, "step": 33644 }, { "epoch": 0.7413773157712076, "grad_norm": 0.6518688797950745, "learning_rate": 4.961131701362149e-06, "loss": 0.059, "step": 33645 }, { "epoch": 0.7413993510607237, "grad_norm": 0.5443639755249023, "learning_rate": 4.96033628692937e-06, "loss": 0.0496, "step": 33646 }, { "epoch": 0.7414213863502399, "grad_norm": 0.7382034659385681, "learning_rate": 4.959540923633985e-06, "loss": 0.0703, "step": 33647 }, { "epoch": 0.7414434216397561, "grad_norm": 0.6201143860816956, "learning_rate": 4.95874561148004e-06, "loss": 0.0451, "step": 33648 }, { "epoch": 0.7414654569292722, "grad_norm": 0.6411921977996826, "learning_rate": 4.957950350471585e-06, "loss": 0.0701, "step": 33649 }, { "epoch": 0.7414874922187884, "grad_norm": 0.7393618226051331, "learning_rate": 4.957155140612675e-06, "loss": 0.0644, "step": 33650 }, { "epoch": 0.7415095275083046, "grad_norm": 0.3142610788345337, "learning_rate": 4.9563599819073655e-06, "loss": 0.0717, "step": 33651 }, { "epoch": 0.7415315627978207, "grad_norm": 0.46949100494384766, "learning_rate": 4.955564874359695e-06, "loss": 0.0635, "step": 33652 }, { "epoch": 0.7415535980873369, "grad_norm": 0.6215627193450928, "learning_rate": 4.954769817973717e-06, "loss": 0.0593, "step": 33653 }, { "epoch": 0.7415756333768531, "grad_norm": 0.4384118914604187, "learning_rate": 4.953974812753491e-06, "loss": 0.0507, "step": 33654 }, { "epoch": 0.7415976686663692, "grad_norm": 0.30422234535217285, "learning_rate": 4.953179858703052e-06, "loss": 0.0603, "step": 33655 }, { "epoch": 0.7416197039558854, "grad_norm": 0.547227144241333, "learning_rate": 4.95238495582646e-06, "loss": 0.0655, "step": 33656 }, { "epoch": 0.7416417392454016, "grad_norm": 0.6689955592155457, "learning_rate": 4.95159010412775e-06, "loss": 0.0817, "step": 33657 }, { "epoch": 0.7416637745349177, "grad_norm": 0.5099367499351501, "learning_rate": 4.950795303610989e-06, "loss": 0.0445, "step": 33658 }, { "epoch": 0.7416858098244339, "grad_norm": 0.6029869318008423, "learning_rate": 4.950000554280211e-06, "loss": 0.0648, "step": 33659 }, { "epoch": 0.7417078451139499, "grad_norm": 0.5675944089889526, "learning_rate": 4.949205856139476e-06, "loss": 0.0485, "step": 33660 }, { "epoch": 0.7417298804034661, "grad_norm": 0.551857590675354, "learning_rate": 4.94841120919282e-06, "loss": 0.0371, "step": 33661 }, { "epoch": 0.7417519156929823, "grad_norm": 0.6982070207595825, "learning_rate": 4.947616613444296e-06, "loss": 0.0646, "step": 33662 }, { "epoch": 0.7417739509824984, "grad_norm": 0.6623705625534058, "learning_rate": 4.946822068897955e-06, "loss": 0.0463, "step": 33663 }, { "epoch": 0.7417959862720146, "grad_norm": 0.5786981582641602, "learning_rate": 4.946027575557835e-06, "loss": 0.0664, "step": 33664 }, { "epoch": 0.7418180215615308, "grad_norm": 0.8268603682518005, "learning_rate": 4.945233133427989e-06, "loss": 0.053, "step": 33665 }, { "epoch": 0.7418400568510469, "grad_norm": 0.5749466419219971, "learning_rate": 4.944438742512462e-06, "loss": 0.0625, "step": 33666 }, { "epoch": 0.7418620921405631, "grad_norm": 0.6389812231063843, "learning_rate": 4.943644402815307e-06, "loss": 0.0521, "step": 33667 }, { "epoch": 0.7418841274300793, "grad_norm": 1.0176550149917603, "learning_rate": 4.942850114340556e-06, "loss": 0.0672, "step": 33668 }, { "epoch": 0.7419061627195954, "grad_norm": 0.48310911655426025, "learning_rate": 4.942055877092265e-06, "loss": 0.0614, "step": 33669 }, { "epoch": 0.7419281980091116, "grad_norm": 0.4916258156299591, "learning_rate": 4.941261691074481e-06, "loss": 0.0469, "step": 33670 }, { "epoch": 0.7419502332986277, "grad_norm": 0.7903904914855957, "learning_rate": 4.94046755629124e-06, "loss": 0.0486, "step": 33671 }, { "epoch": 0.7419722685881439, "grad_norm": 0.4534071981906891, "learning_rate": 4.939673472746592e-06, "loss": 0.0646, "step": 33672 }, { "epoch": 0.7419943038776601, "grad_norm": 0.6991276144981384, "learning_rate": 4.9388794404445816e-06, "loss": 0.0545, "step": 33673 }, { "epoch": 0.7420163391671762, "grad_norm": 0.5941031575202942, "learning_rate": 4.938085459389259e-06, "loss": 0.0621, "step": 33674 }, { "epoch": 0.7420383744566924, "grad_norm": 1.0297365188598633, "learning_rate": 4.937291529584657e-06, "loss": 0.0948, "step": 33675 }, { "epoch": 0.7420604097462086, "grad_norm": 0.7087058424949646, "learning_rate": 4.936497651034825e-06, "loss": 0.048, "step": 33676 }, { "epoch": 0.7420824450357247, "grad_norm": 0.6700248122215271, "learning_rate": 4.935703823743814e-06, "loss": 0.0537, "step": 33677 }, { "epoch": 0.7421044803252409, "grad_norm": 0.5752222537994385, "learning_rate": 4.9349100477156536e-06, "loss": 0.031, "step": 33678 }, { "epoch": 0.7421265156147571, "grad_norm": 0.8204594254493713, "learning_rate": 4.934116322954398e-06, "loss": 0.0752, "step": 33679 }, { "epoch": 0.7421485509042732, "grad_norm": 0.5559899806976318, "learning_rate": 4.933322649464078e-06, "loss": 0.0645, "step": 33680 }, { "epoch": 0.7421705861937894, "grad_norm": 0.8153085708618164, "learning_rate": 4.9325290272487526e-06, "loss": 0.0594, "step": 33681 }, { "epoch": 0.7421926214833056, "grad_norm": 0.5630114078521729, "learning_rate": 4.9317354563124506e-06, "loss": 0.0655, "step": 33682 }, { "epoch": 0.7422146567728217, "grad_norm": 0.6018143892288208, "learning_rate": 4.930941936659223e-06, "loss": 0.0511, "step": 33683 }, { "epoch": 0.7422366920623379, "grad_norm": 0.5034214854240417, "learning_rate": 4.930148468293104e-06, "loss": 0.0707, "step": 33684 }, { "epoch": 0.7422587273518539, "grad_norm": 0.45911386609077454, "learning_rate": 4.9293550512181375e-06, "loss": 0.052, "step": 33685 }, { "epoch": 0.7422807626413701, "grad_norm": 0.5342603921890259, "learning_rate": 4.928561685438372e-06, "loss": 0.0467, "step": 33686 }, { "epoch": 0.7423027979308863, "grad_norm": 0.7196674942970276, "learning_rate": 4.927768370957838e-06, "loss": 0.0611, "step": 33687 }, { "epoch": 0.7423248332204024, "grad_norm": 0.3565351963043213, "learning_rate": 4.92697510778058e-06, "loss": 0.038, "step": 33688 }, { "epoch": 0.7423468685099186, "grad_norm": 0.9757276177406311, "learning_rate": 4.926181895910639e-06, "loss": 0.062, "step": 33689 }, { "epoch": 0.7423689037994348, "grad_norm": 0.5915318727493286, "learning_rate": 4.925388735352062e-06, "loss": 0.0553, "step": 33690 }, { "epoch": 0.7423909390889509, "grad_norm": 0.343635618686676, "learning_rate": 4.924595626108877e-06, "loss": 0.0641, "step": 33691 }, { "epoch": 0.7424129743784671, "grad_norm": 0.6417864561080933, "learning_rate": 4.9238025681851286e-06, "loss": 0.059, "step": 33692 }, { "epoch": 0.7424350096679833, "grad_norm": 0.7371377348899841, "learning_rate": 4.923009561584863e-06, "loss": 0.0595, "step": 33693 }, { "epoch": 0.7424570449574994, "grad_norm": 0.5630049109458923, "learning_rate": 4.9222166063121075e-06, "loss": 0.0702, "step": 33694 }, { "epoch": 0.7424790802470156, "grad_norm": 0.44195884466171265, "learning_rate": 4.921423702370908e-06, "loss": 0.041, "step": 33695 }, { "epoch": 0.7425011155365318, "grad_norm": 0.6836602687835693, "learning_rate": 4.9206308497653006e-06, "loss": 0.0597, "step": 33696 }, { "epoch": 0.7425231508260479, "grad_norm": 0.6066451668739319, "learning_rate": 4.919838048499331e-06, "loss": 0.068, "step": 33697 }, { "epoch": 0.7425451861155641, "grad_norm": 0.4276498556137085, "learning_rate": 4.919045298577027e-06, "loss": 0.0456, "step": 33698 }, { "epoch": 0.7425672214050802, "grad_norm": 0.5084959864616394, "learning_rate": 4.918252600002431e-06, "loss": 0.0488, "step": 33699 }, { "epoch": 0.7425892566945964, "grad_norm": 0.7082292437553406, "learning_rate": 4.917459952779587e-06, "loss": 0.0408, "step": 33700 }, { "epoch": 0.7426112919841126, "grad_norm": 0.5072237849235535, "learning_rate": 4.91666735691252e-06, "loss": 0.0531, "step": 33701 }, { "epoch": 0.7426333272736287, "grad_norm": 0.9559771418571472, "learning_rate": 4.915874812405278e-06, "loss": 0.0504, "step": 33702 }, { "epoch": 0.7426553625631449, "grad_norm": 0.42748022079467773, "learning_rate": 4.915082319261882e-06, "loss": 0.0717, "step": 33703 }, { "epoch": 0.7426773978526611, "grad_norm": 0.3451981246471405, "learning_rate": 4.914289877486392e-06, "loss": 0.0613, "step": 33704 }, { "epoch": 0.7426994331421772, "grad_norm": 0.8380248546600342, "learning_rate": 4.913497487082826e-06, "loss": 0.0683, "step": 33705 }, { "epoch": 0.7427214684316934, "grad_norm": 0.4688389301300049, "learning_rate": 4.912705148055231e-06, "loss": 0.044, "step": 33706 }, { "epoch": 0.7427435037212096, "grad_norm": 0.6385434865951538, "learning_rate": 4.911912860407632e-06, "loss": 0.0431, "step": 33707 }, { "epoch": 0.7427655390107257, "grad_norm": 0.3034321367740631, "learning_rate": 4.911120624144072e-06, "loss": 0.0734, "step": 33708 }, { "epoch": 0.7427875743002418, "grad_norm": 0.5442022681236267, "learning_rate": 4.91032843926859e-06, "loss": 0.0395, "step": 33709 }, { "epoch": 0.742809609589758, "grad_norm": 1.0954973697662354, "learning_rate": 4.909536305785209e-06, "loss": 0.0728, "step": 33710 }, { "epoch": 0.7428316448792741, "grad_norm": 0.4505407512187958, "learning_rate": 4.908744223697972e-06, "loss": 0.0842, "step": 33711 }, { "epoch": 0.7428536801687903, "grad_norm": 1.0122267007827759, "learning_rate": 4.9079521930109114e-06, "loss": 0.0829, "step": 33712 }, { "epoch": 0.7428757154583064, "grad_norm": 0.3059362769126892, "learning_rate": 4.907160213728067e-06, "loss": 0.0519, "step": 33713 }, { "epoch": 0.7428977507478226, "grad_norm": 0.38285794854164124, "learning_rate": 4.906368285853463e-06, "loss": 0.0676, "step": 33714 }, { "epoch": 0.7429197860373388, "grad_norm": 0.3098142743110657, "learning_rate": 4.905576409391139e-06, "loss": 0.0679, "step": 33715 }, { "epoch": 0.7429418213268549, "grad_norm": 0.3589680790901184, "learning_rate": 4.90478458434513e-06, "loss": 0.0523, "step": 33716 }, { "epoch": 0.7429638566163711, "grad_norm": 0.9419198036193848, "learning_rate": 4.903992810719464e-06, "loss": 0.0774, "step": 33717 }, { "epoch": 0.7429858919058873, "grad_norm": 0.7242596745491028, "learning_rate": 4.9032010885181746e-06, "loss": 0.0536, "step": 33718 }, { "epoch": 0.7430079271954034, "grad_norm": 0.7767217755317688, "learning_rate": 4.902409417745298e-06, "loss": 0.0555, "step": 33719 }, { "epoch": 0.7430299624849196, "grad_norm": 0.3478870689868927, "learning_rate": 4.901617798404868e-06, "loss": 0.0659, "step": 33720 }, { "epoch": 0.7430519977744358, "grad_norm": 0.37892764806747437, "learning_rate": 4.900826230500912e-06, "loss": 0.0641, "step": 33721 }, { "epoch": 0.7430740330639519, "grad_norm": 0.5532506704330444, "learning_rate": 4.900034714037461e-06, "loss": 0.0711, "step": 33722 }, { "epoch": 0.7430960683534681, "grad_norm": 0.4423776865005493, "learning_rate": 4.899243249018549e-06, "loss": 0.0587, "step": 33723 }, { "epoch": 0.7431181036429843, "grad_norm": 0.7248038053512573, "learning_rate": 4.898451835448215e-06, "loss": 0.093, "step": 33724 }, { "epoch": 0.7431401389325004, "grad_norm": 0.8059253096580505, "learning_rate": 4.897660473330481e-06, "loss": 0.083, "step": 33725 }, { "epoch": 0.7431621742220166, "grad_norm": 0.5700528025627136, "learning_rate": 4.896869162669369e-06, "loss": 0.0962, "step": 33726 }, { "epoch": 0.7431842095115327, "grad_norm": 0.40272530913352966, "learning_rate": 4.8960779034689314e-06, "loss": 0.0692, "step": 33727 }, { "epoch": 0.7432062448010489, "grad_norm": 0.5577307343482971, "learning_rate": 4.895286695733181e-06, "loss": 0.0544, "step": 33728 }, { "epoch": 0.7432282800905651, "grad_norm": 0.5020532608032227, "learning_rate": 4.89449553946616e-06, "loss": 0.0807, "step": 33729 }, { "epoch": 0.7432503153800812, "grad_norm": 0.5832090377807617, "learning_rate": 4.893704434671882e-06, "loss": 0.0445, "step": 33730 }, { "epoch": 0.7432723506695974, "grad_norm": 0.8886818885803223, "learning_rate": 4.892913381354396e-06, "loss": 0.0592, "step": 33731 }, { "epoch": 0.7432943859591136, "grad_norm": 0.643307626247406, "learning_rate": 4.8921223795177185e-06, "loss": 0.0801, "step": 33732 }, { "epoch": 0.7433164212486297, "grad_norm": 0.6272056698799133, "learning_rate": 4.891331429165886e-06, "loss": 0.0789, "step": 33733 }, { "epoch": 0.7433384565381458, "grad_norm": 0.11999083310365677, "learning_rate": 4.890540530302918e-06, "loss": 0.0531, "step": 33734 }, { "epoch": 0.743360491827662, "grad_norm": 0.6157465577125549, "learning_rate": 4.889749682932849e-06, "loss": 0.087, "step": 33735 }, { "epoch": 0.7433825271171781, "grad_norm": 0.6647385954856873, "learning_rate": 4.8889588870597105e-06, "loss": 0.0596, "step": 33736 }, { "epoch": 0.7434045624066943, "grad_norm": 0.7674902677536011, "learning_rate": 4.888168142687516e-06, "loss": 0.0718, "step": 33737 }, { "epoch": 0.7434265976962104, "grad_norm": 0.5773390531539917, "learning_rate": 4.887377449820316e-06, "loss": 0.0776, "step": 33738 }, { "epoch": 0.7434486329857266, "grad_norm": 0.6776533126831055, "learning_rate": 4.886586808462119e-06, "loss": 0.0524, "step": 33739 }, { "epoch": 0.7434706682752428, "grad_norm": 0.7522815465927124, "learning_rate": 4.885796218616962e-06, "loss": 0.0563, "step": 33740 }, { "epoch": 0.7434927035647589, "grad_norm": 0.7061267495155334, "learning_rate": 4.885005680288866e-06, "loss": 0.065, "step": 33741 }, { "epoch": 0.7435147388542751, "grad_norm": 0.3974255323410034, "learning_rate": 4.884215193481859e-06, "loss": 0.057, "step": 33742 }, { "epoch": 0.7435367741437913, "grad_norm": 0.7913306355476379, "learning_rate": 4.883424758199973e-06, "loss": 0.0688, "step": 33743 }, { "epoch": 0.7435588094333074, "grad_norm": 0.4115007519721985, "learning_rate": 4.882634374447225e-06, "loss": 0.0386, "step": 33744 }, { "epoch": 0.7435808447228236, "grad_norm": 0.3098762035369873, "learning_rate": 4.881844042227646e-06, "loss": 0.0261, "step": 33745 }, { "epoch": 0.7436028800123398, "grad_norm": 0.7536119818687439, "learning_rate": 4.881053761545261e-06, "loss": 0.0658, "step": 33746 }, { "epoch": 0.7436249153018559, "grad_norm": 0.3354238271713257, "learning_rate": 4.8802635324041e-06, "loss": 0.0408, "step": 33747 }, { "epoch": 0.7436469505913721, "grad_norm": 0.7144671678543091, "learning_rate": 4.87947335480818e-06, "loss": 0.0736, "step": 33748 }, { "epoch": 0.7436689858808883, "grad_norm": 0.41122472286224365, "learning_rate": 4.878683228761528e-06, "loss": 0.0523, "step": 33749 }, { "epoch": 0.7436910211704044, "grad_norm": 0.5500991344451904, "learning_rate": 4.877893154268174e-06, "loss": 0.0686, "step": 33750 }, { "epoch": 0.7437130564599206, "grad_norm": 0.32873836159706116, "learning_rate": 4.877103131332133e-06, "loss": 0.0665, "step": 33751 }, { "epoch": 0.7437350917494367, "grad_norm": 1.072593092918396, "learning_rate": 4.8763131599574405e-06, "loss": 0.0878, "step": 33752 }, { "epoch": 0.7437571270389529, "grad_norm": 0.6577169299125671, "learning_rate": 4.8755232401481035e-06, "loss": 0.0591, "step": 33753 }, { "epoch": 0.7437791623284691, "grad_norm": 0.4566030204296112, "learning_rate": 4.874733371908164e-06, "loss": 0.0513, "step": 33754 }, { "epoch": 0.7438011976179852, "grad_norm": 0.6041979789733887, "learning_rate": 4.873943555241634e-06, "loss": 0.0357, "step": 33755 }, { "epoch": 0.7438232329075014, "grad_norm": 0.7250882387161255, "learning_rate": 4.873153790152544e-06, "loss": 0.0903, "step": 33756 }, { "epoch": 0.7438452681970176, "grad_norm": 0.5204320549964905, "learning_rate": 4.872364076644909e-06, "loss": 0.0634, "step": 33757 }, { "epoch": 0.7438673034865337, "grad_norm": 0.5155338644981384, "learning_rate": 4.871574414722752e-06, "loss": 0.032, "step": 33758 }, { "epoch": 0.7438893387760498, "grad_norm": 0.5357574224472046, "learning_rate": 4.870784804390103e-06, "loss": 0.0582, "step": 33759 }, { "epoch": 0.743911374065566, "grad_norm": 0.7420111298561096, "learning_rate": 4.869995245650971e-06, "loss": 0.0668, "step": 33760 }, { "epoch": 0.7439334093550821, "grad_norm": 0.5597658753395081, "learning_rate": 4.869205738509392e-06, "loss": 0.0543, "step": 33761 }, { "epoch": 0.7439554446445983, "grad_norm": 0.6185532808303833, "learning_rate": 4.868416282969377e-06, "loss": 0.0612, "step": 33762 }, { "epoch": 0.7439774799341144, "grad_norm": 0.47426164150238037, "learning_rate": 4.8676268790349575e-06, "loss": 0.0516, "step": 33763 }, { "epoch": 0.7439995152236306, "grad_norm": 0.23594284057617188, "learning_rate": 4.866837526710142e-06, "loss": 0.0347, "step": 33764 }, { "epoch": 0.7440215505131468, "grad_norm": 1.4102320671081543, "learning_rate": 4.866048225998956e-06, "loss": 0.0383, "step": 33765 }, { "epoch": 0.7440435858026629, "grad_norm": 0.4758202135562897, "learning_rate": 4.865258976905426e-06, "loss": 0.0661, "step": 33766 }, { "epoch": 0.7440656210921791, "grad_norm": 0.49509233236312866, "learning_rate": 4.864469779433561e-06, "loss": 0.0451, "step": 33767 }, { "epoch": 0.7440876563816953, "grad_norm": 0.34791743755340576, "learning_rate": 4.863680633587389e-06, "loss": 0.0616, "step": 33768 }, { "epoch": 0.7441096916712114, "grad_norm": 0.9441866874694824, "learning_rate": 4.862891539370925e-06, "loss": 0.0792, "step": 33769 }, { "epoch": 0.7441317269607276, "grad_norm": 0.4464987516403198, "learning_rate": 4.862102496788195e-06, "loss": 0.0537, "step": 33770 }, { "epoch": 0.7441537622502438, "grad_norm": 0.5313007831573486, "learning_rate": 4.861313505843208e-06, "loss": 0.0688, "step": 33771 }, { "epoch": 0.7441757975397599, "grad_norm": 0.9542199969291687, "learning_rate": 4.86052456653999e-06, "loss": 0.0657, "step": 33772 }, { "epoch": 0.7441978328292761, "grad_norm": 0.6886938214302063, "learning_rate": 4.859735678882561e-06, "loss": 0.0572, "step": 33773 }, { "epoch": 0.7442198681187923, "grad_norm": 0.37792325019836426, "learning_rate": 4.858946842874933e-06, "loss": 0.0478, "step": 33774 }, { "epoch": 0.7442419034083084, "grad_norm": 0.62434321641922, "learning_rate": 4.858158058521129e-06, "loss": 0.0888, "step": 33775 }, { "epoch": 0.7442639386978246, "grad_norm": 0.9278123378753662, "learning_rate": 4.857369325825156e-06, "loss": 0.086, "step": 33776 }, { "epoch": 0.7442859739873408, "grad_norm": 0.5333245992660522, "learning_rate": 4.85658064479105e-06, "loss": 0.0679, "step": 33777 }, { "epoch": 0.7443080092768569, "grad_norm": 0.5814509987831116, "learning_rate": 4.855792015422812e-06, "loss": 0.0512, "step": 33778 }, { "epoch": 0.7443300445663731, "grad_norm": 0.7397534847259521, "learning_rate": 4.855003437724472e-06, "loss": 0.0643, "step": 33779 }, { "epoch": 0.7443520798558892, "grad_norm": 0.5423712134361267, "learning_rate": 4.8542149117000325e-06, "loss": 0.0558, "step": 33780 }, { "epoch": 0.7443741151454054, "grad_norm": 0.7417100667953491, "learning_rate": 4.8534264373535194e-06, "loss": 0.078, "step": 33781 }, { "epoch": 0.7443961504349216, "grad_norm": 0.6033948063850403, "learning_rate": 4.8526380146889495e-06, "loss": 0.0655, "step": 33782 }, { "epoch": 0.7444181857244377, "grad_norm": 0.5637994408607483, "learning_rate": 4.8518496437103265e-06, "loss": 0.0509, "step": 33783 }, { "epoch": 0.7444402210139538, "grad_norm": 0.48405224084854126, "learning_rate": 4.851061324421684e-06, "loss": 0.0531, "step": 33784 }, { "epoch": 0.74446225630347, "grad_norm": 0.2758215665817261, "learning_rate": 4.850273056827025e-06, "loss": 0.0376, "step": 33785 }, { "epoch": 0.7444842915929861, "grad_norm": 0.5042831897735596, "learning_rate": 4.8494848409303725e-06, "loss": 0.0723, "step": 33786 }, { "epoch": 0.7445063268825023, "grad_norm": 0.4749544858932495, "learning_rate": 4.848696676735731e-06, "loss": 0.0328, "step": 33787 }, { "epoch": 0.7445283621720185, "grad_norm": 0.5247415900230408, "learning_rate": 4.8479085642471235e-06, "loss": 0.0512, "step": 33788 }, { "epoch": 0.7445503974615346, "grad_norm": 0.456354022026062, "learning_rate": 4.847120503468564e-06, "loss": 0.0433, "step": 33789 }, { "epoch": 0.7445724327510508, "grad_norm": 0.21799179911613464, "learning_rate": 4.84633249440406e-06, "loss": 0.0506, "step": 33790 }, { "epoch": 0.744594468040567, "grad_norm": 0.25968271493911743, "learning_rate": 4.845544537057629e-06, "loss": 0.044, "step": 33791 }, { "epoch": 0.7446165033300831, "grad_norm": 0.5445778369903564, "learning_rate": 4.844756631433287e-06, "loss": 0.0505, "step": 33792 }, { "epoch": 0.7446385386195993, "grad_norm": 0.523879885673523, "learning_rate": 4.843968777535048e-06, "loss": 0.0478, "step": 33793 }, { "epoch": 0.7446605739091154, "grad_norm": 0.3724551200866699, "learning_rate": 4.843180975366917e-06, "loss": 0.0661, "step": 33794 }, { "epoch": 0.7446826091986316, "grad_norm": 0.4749012589454651, "learning_rate": 4.842393224932913e-06, "loss": 0.0384, "step": 33795 }, { "epoch": 0.7447046444881478, "grad_norm": 0.4090210795402527, "learning_rate": 4.841605526237052e-06, "loss": 0.0373, "step": 33796 }, { "epoch": 0.7447266797776639, "grad_norm": 0.634941816329956, "learning_rate": 4.8408178792833355e-06, "loss": 0.0682, "step": 33797 }, { "epoch": 0.7447487150671801, "grad_norm": 0.365497887134552, "learning_rate": 4.840030284075787e-06, "loss": 0.0527, "step": 33798 }, { "epoch": 0.7447707503566963, "grad_norm": 0.7238827347755432, "learning_rate": 4.8392427406184025e-06, "loss": 0.0723, "step": 33799 }, { "epoch": 0.7447927856462124, "grad_norm": 0.8891956210136414, "learning_rate": 4.838455248915214e-06, "loss": 0.0721, "step": 33800 }, { "epoch": 0.7448148209357286, "grad_norm": 0.5288158655166626, "learning_rate": 4.8376678089702165e-06, "loss": 0.0693, "step": 33801 }, { "epoch": 0.7448368562252448, "grad_norm": 0.4758703112602234, "learning_rate": 4.8368804207874255e-06, "loss": 0.0598, "step": 33802 }, { "epoch": 0.7448588915147609, "grad_norm": 0.5262326002120972, "learning_rate": 4.836093084370858e-06, "loss": 0.0537, "step": 33803 }, { "epoch": 0.7448809268042771, "grad_norm": 0.6544094681739807, "learning_rate": 4.8353057997245135e-06, "loss": 0.0404, "step": 33804 }, { "epoch": 0.7449029620937933, "grad_norm": 0.48280951380729675, "learning_rate": 4.834518566852413e-06, "loss": 0.0543, "step": 33805 }, { "epoch": 0.7449249973833094, "grad_norm": 0.5163049101829529, "learning_rate": 4.833731385758549e-06, "loss": 0.07, "step": 33806 }, { "epoch": 0.7449470326728256, "grad_norm": 0.8510307669639587, "learning_rate": 4.832944256446954e-06, "loss": 0.093, "step": 33807 }, { "epoch": 0.7449690679623416, "grad_norm": 0.7491469383239746, "learning_rate": 4.8321571789216205e-06, "loss": 0.0468, "step": 33808 }, { "epoch": 0.7449911032518578, "grad_norm": 0.5434448719024658, "learning_rate": 4.83137015318657e-06, "loss": 0.0879, "step": 33809 }, { "epoch": 0.745013138541374, "grad_norm": 0.466327965259552, "learning_rate": 4.8305831792457954e-06, "loss": 0.0631, "step": 33810 }, { "epoch": 0.7450351738308901, "grad_norm": 0.816275417804718, "learning_rate": 4.829796257103317e-06, "loss": 0.0808, "step": 33811 }, { "epoch": 0.7450572091204063, "grad_norm": 0.5287842154502869, "learning_rate": 4.829009386763143e-06, "loss": 0.1023, "step": 33812 }, { "epoch": 0.7450792444099225, "grad_norm": 0.45851269364356995, "learning_rate": 4.828222568229274e-06, "loss": 0.0518, "step": 33813 }, { "epoch": 0.7451012796994386, "grad_norm": 0.6108199954032898, "learning_rate": 4.827435801505722e-06, "loss": 0.0836, "step": 33814 }, { "epoch": 0.7451233149889548, "grad_norm": 0.5403786897659302, "learning_rate": 4.826649086596494e-06, "loss": 0.0428, "step": 33815 }, { "epoch": 0.745145350278471, "grad_norm": 0.6284749507904053, "learning_rate": 4.8258624235056045e-06, "loss": 0.0694, "step": 33816 }, { "epoch": 0.7451673855679871, "grad_norm": 0.671263575553894, "learning_rate": 4.825075812237046e-06, "loss": 0.0539, "step": 33817 }, { "epoch": 0.7451894208575033, "grad_norm": 0.7174276113510132, "learning_rate": 4.824289252794835e-06, "loss": 0.0537, "step": 33818 }, { "epoch": 0.7452114561470194, "grad_norm": 0.29168999195098877, "learning_rate": 4.8235027451829775e-06, "loss": 0.0507, "step": 33819 }, { "epoch": 0.7452334914365356, "grad_norm": 0.9630463719367981, "learning_rate": 4.822716289405474e-06, "loss": 0.0718, "step": 33820 }, { "epoch": 0.7452555267260518, "grad_norm": 0.6112134456634521, "learning_rate": 4.821929885466339e-06, "loss": 0.0703, "step": 33821 }, { "epoch": 0.7452775620155679, "grad_norm": 0.4159037470817566, "learning_rate": 4.821143533369565e-06, "loss": 0.0566, "step": 33822 }, { "epoch": 0.7452995973050841, "grad_norm": 0.5233514308929443, "learning_rate": 4.820357233119173e-06, "loss": 0.0633, "step": 33823 }, { "epoch": 0.7453216325946003, "grad_norm": 0.6568130254745483, "learning_rate": 4.8195709847191566e-06, "loss": 0.0822, "step": 33824 }, { "epoch": 0.7453436678841164, "grad_norm": 0.9552932977676392, "learning_rate": 4.818784788173522e-06, "loss": 0.0741, "step": 33825 }, { "epoch": 0.7453657031736326, "grad_norm": 0.633738100528717, "learning_rate": 4.817998643486285e-06, "loss": 0.0587, "step": 33826 }, { "epoch": 0.7453877384631488, "grad_norm": 0.6781614422798157, "learning_rate": 4.817212550661432e-06, "loss": 0.0569, "step": 33827 }, { "epoch": 0.7454097737526649, "grad_norm": 0.6288866400718689, "learning_rate": 4.816426509702983e-06, "loss": 0.0754, "step": 33828 }, { "epoch": 0.7454318090421811, "grad_norm": 0.46982595324516296, "learning_rate": 4.815640520614924e-06, "loss": 0.0401, "step": 33829 }, { "epoch": 0.7454538443316973, "grad_norm": 0.47034308314323425, "learning_rate": 4.8148545834012805e-06, "loss": 0.0443, "step": 33830 }, { "epoch": 0.7454758796212134, "grad_norm": 1.010362982749939, "learning_rate": 4.814068698066037e-06, "loss": 0.0859, "step": 33831 }, { "epoch": 0.7454979149107296, "grad_norm": 0.4976465702056885, "learning_rate": 4.81328286461321e-06, "loss": 0.0823, "step": 33832 }, { "epoch": 0.7455199502002456, "grad_norm": 0.5527675747871399, "learning_rate": 4.812497083046786e-06, "loss": 0.0555, "step": 33833 }, { "epoch": 0.7455419854897618, "grad_norm": 0.6294236183166504, "learning_rate": 4.811711353370786e-06, "loss": 0.0578, "step": 33834 }, { "epoch": 0.745564020779278, "grad_norm": 0.6778412461280823, "learning_rate": 4.8109256755892e-06, "loss": 0.0745, "step": 33835 }, { "epoch": 0.7455860560687941, "grad_norm": 0.6087890267372131, "learning_rate": 4.810140049706039e-06, "loss": 0.0916, "step": 33836 }, { "epoch": 0.7456080913583103, "grad_norm": 0.5655747652053833, "learning_rate": 4.8093544757252925e-06, "loss": 0.0439, "step": 33837 }, { "epoch": 0.7456301266478265, "grad_norm": 0.39289191365242004, "learning_rate": 4.808568953650969e-06, "loss": 0.0644, "step": 33838 }, { "epoch": 0.7456521619373426, "grad_norm": 0.7960986495018005, "learning_rate": 4.807783483487073e-06, "loss": 0.0977, "step": 33839 }, { "epoch": 0.7456741972268588, "grad_norm": 0.48224711418151855, "learning_rate": 4.806998065237597e-06, "loss": 0.044, "step": 33840 }, { "epoch": 0.745696232516375, "grad_norm": 0.30276259779930115, "learning_rate": 4.806212698906546e-06, "loss": 0.0558, "step": 33841 }, { "epoch": 0.7457182678058911, "grad_norm": 0.7499122023582458, "learning_rate": 4.805427384497919e-06, "loss": 0.0471, "step": 33842 }, { "epoch": 0.7457403030954073, "grad_norm": 0.720748245716095, "learning_rate": 4.804642122015723e-06, "loss": 0.0646, "step": 33843 }, { "epoch": 0.7457623383849235, "grad_norm": 0.7679233551025391, "learning_rate": 4.803856911463948e-06, "loss": 0.0562, "step": 33844 }, { "epoch": 0.7457843736744396, "grad_norm": 0.9889833927154541, "learning_rate": 4.803071752846595e-06, "loss": 0.1103, "step": 33845 }, { "epoch": 0.7458064089639558, "grad_norm": 0.8682860136032104, "learning_rate": 4.802286646167671e-06, "loss": 0.0564, "step": 33846 }, { "epoch": 0.745828444253472, "grad_norm": 0.7285010814666748, "learning_rate": 4.801501591431166e-06, "loss": 0.0459, "step": 33847 }, { "epoch": 0.7458504795429881, "grad_norm": 0.48294776678085327, "learning_rate": 4.800716588641081e-06, "loss": 0.0659, "step": 33848 }, { "epoch": 0.7458725148325043, "grad_norm": 0.6552186012268066, "learning_rate": 4.799931637801416e-06, "loss": 0.0639, "step": 33849 }, { "epoch": 0.7458945501220204, "grad_norm": 0.701433539390564, "learning_rate": 4.799146738916174e-06, "loss": 0.0724, "step": 33850 }, { "epoch": 0.7459165854115366, "grad_norm": 0.6494911313056946, "learning_rate": 4.798361891989343e-06, "loss": 0.1022, "step": 33851 }, { "epoch": 0.7459386207010528, "grad_norm": 0.4220859110355377, "learning_rate": 4.7975770970249255e-06, "loss": 0.056, "step": 33852 }, { "epoch": 0.7459606559905689, "grad_norm": 0.343041330575943, "learning_rate": 4.796792354026923e-06, "loss": 0.0752, "step": 33853 }, { "epoch": 0.7459826912800851, "grad_norm": 0.4486684799194336, "learning_rate": 4.796007662999325e-06, "loss": 0.0425, "step": 33854 }, { "epoch": 0.7460047265696013, "grad_norm": 1.0097228288650513, "learning_rate": 4.795223023946136e-06, "loss": 0.0543, "step": 33855 }, { "epoch": 0.7460267618591174, "grad_norm": 0.57023024559021, "learning_rate": 4.794438436871337e-06, "loss": 0.0634, "step": 33856 }, { "epoch": 0.7460487971486336, "grad_norm": 0.6948100924491882, "learning_rate": 4.7936539017789475e-06, "loss": 0.0465, "step": 33857 }, { "epoch": 0.7460708324381496, "grad_norm": 0.26652729511260986, "learning_rate": 4.792869418672946e-06, "loss": 0.0454, "step": 33858 }, { "epoch": 0.7460928677276658, "grad_norm": 0.5092015862464905, "learning_rate": 4.7920849875573385e-06, "loss": 0.0489, "step": 33859 }, { "epoch": 0.746114903017182, "grad_norm": 0.41536834836006165, "learning_rate": 4.7913006084361126e-06, "loss": 0.0726, "step": 33860 }, { "epoch": 0.7461369383066981, "grad_norm": 0.7968032956123352, "learning_rate": 4.790516281313266e-06, "loss": 0.0647, "step": 33861 }, { "epoch": 0.7461589735962143, "grad_norm": 0.6361615657806396, "learning_rate": 4.7897320061927994e-06, "loss": 0.036, "step": 33862 }, { "epoch": 0.7461810088857305, "grad_norm": 0.4606812596321106, "learning_rate": 4.788947783078699e-06, "loss": 0.0652, "step": 33863 }, { "epoch": 0.7462030441752466, "grad_norm": 0.5975398421287537, "learning_rate": 4.7881636119749624e-06, "loss": 0.0489, "step": 33864 }, { "epoch": 0.7462250794647628, "grad_norm": 0.4385427236557007, "learning_rate": 4.7873794928855855e-06, "loss": 0.064, "step": 33865 }, { "epoch": 0.746247114754279, "grad_norm": 0.604825496673584, "learning_rate": 4.786595425814564e-06, "loss": 0.0666, "step": 33866 }, { "epoch": 0.7462691500437951, "grad_norm": 0.3063974380493164, "learning_rate": 4.785811410765885e-06, "loss": 0.0588, "step": 33867 }, { "epoch": 0.7462911853333113, "grad_norm": 0.5024047493934631, "learning_rate": 4.785027447743548e-06, "loss": 0.0624, "step": 33868 }, { "epoch": 0.7463132206228275, "grad_norm": 0.609562873840332, "learning_rate": 4.784243536751546e-06, "loss": 0.0687, "step": 33869 }, { "epoch": 0.7463352559123436, "grad_norm": 0.720154881477356, "learning_rate": 4.783459677793866e-06, "loss": 0.0795, "step": 33870 }, { "epoch": 0.7463572912018598, "grad_norm": 0.7599461078643799, "learning_rate": 4.782675870874503e-06, "loss": 0.064, "step": 33871 }, { "epoch": 0.746379326491376, "grad_norm": 0.37071552872657776, "learning_rate": 4.7818921159974535e-06, "loss": 0.0395, "step": 33872 }, { "epoch": 0.7464013617808921, "grad_norm": 0.9122820496559143, "learning_rate": 4.7811084131667086e-06, "loss": 0.0488, "step": 33873 }, { "epoch": 0.7464233970704083, "grad_norm": 0.8695318102836609, "learning_rate": 4.780324762386255e-06, "loss": 0.0722, "step": 33874 }, { "epoch": 0.7464454323599244, "grad_norm": 0.4735814332962036, "learning_rate": 4.779541163660089e-06, "loss": 0.0649, "step": 33875 }, { "epoch": 0.7464674676494406, "grad_norm": 0.6667495965957642, "learning_rate": 4.778757616992204e-06, "loss": 0.0417, "step": 33876 }, { "epoch": 0.7464895029389568, "grad_norm": 0.564545214176178, "learning_rate": 4.777974122386583e-06, "loss": 0.0473, "step": 33877 }, { "epoch": 0.7465115382284729, "grad_norm": 0.3718337416648865, "learning_rate": 4.777190679847226e-06, "loss": 0.0538, "step": 33878 }, { "epoch": 0.7465335735179891, "grad_norm": 0.5721443891525269, "learning_rate": 4.776407289378107e-06, "loss": 0.0622, "step": 33879 }, { "epoch": 0.7465556088075053, "grad_norm": 0.5345238447189331, "learning_rate": 4.775623950983241e-06, "loss": 0.0707, "step": 33880 }, { "epoch": 0.7465776440970214, "grad_norm": 0.5553885698318481, "learning_rate": 4.7748406646665985e-06, "loss": 0.0462, "step": 33881 }, { "epoch": 0.7465996793865375, "grad_norm": 0.3527267277240753, "learning_rate": 4.7740574304321805e-06, "loss": 0.044, "step": 33882 }, { "epoch": 0.7466217146760536, "grad_norm": 0.6196553111076355, "learning_rate": 4.773274248283966e-06, "loss": 0.0611, "step": 33883 }, { "epoch": 0.7466437499655698, "grad_norm": 0.5527160167694092, "learning_rate": 4.772491118225951e-06, "loss": 0.0771, "step": 33884 }, { "epoch": 0.746665785255086, "grad_norm": 0.6531015038490295, "learning_rate": 4.771708040262129e-06, "loss": 0.0505, "step": 33885 }, { "epoch": 0.7466878205446021, "grad_norm": 0.6423985362052917, "learning_rate": 4.7709250143964765e-06, "loss": 0.0748, "step": 33886 }, { "epoch": 0.7467098558341183, "grad_norm": 0.5752610564231873, "learning_rate": 4.770142040632988e-06, "loss": 0.059, "step": 33887 }, { "epoch": 0.7467318911236345, "grad_norm": 0.6848328113555908, "learning_rate": 4.769359118975651e-06, "loss": 0.0678, "step": 33888 }, { "epoch": 0.7467539264131506, "grad_norm": 0.6713677048683167, "learning_rate": 4.768576249428461e-06, "loss": 0.0751, "step": 33889 }, { "epoch": 0.7467759617026668, "grad_norm": 0.87846440076828, "learning_rate": 4.767793431995393e-06, "loss": 0.0705, "step": 33890 }, { "epoch": 0.746797996992183, "grad_norm": 0.42755696177482605, "learning_rate": 4.76701066668044e-06, "loss": 0.0811, "step": 33891 }, { "epoch": 0.7468200322816991, "grad_norm": 0.7375349402427673, "learning_rate": 4.766227953487594e-06, "loss": 0.0649, "step": 33892 }, { "epoch": 0.7468420675712153, "grad_norm": 0.5310359001159668, "learning_rate": 4.765445292420831e-06, "loss": 0.046, "step": 33893 }, { "epoch": 0.7468641028607315, "grad_norm": 0.6154734492301941, "learning_rate": 4.764662683484144e-06, "loss": 0.0582, "step": 33894 }, { "epoch": 0.7468861381502476, "grad_norm": 0.5826106071472168, "learning_rate": 4.763880126681519e-06, "loss": 0.0528, "step": 33895 }, { "epoch": 0.7469081734397638, "grad_norm": 0.565202534198761, "learning_rate": 4.763097622016947e-06, "loss": 0.0891, "step": 33896 }, { "epoch": 0.74693020872928, "grad_norm": 0.9177977442741394, "learning_rate": 4.762315169494401e-06, "loss": 0.0591, "step": 33897 }, { "epoch": 0.7469522440187961, "grad_norm": 0.5896950364112854, "learning_rate": 4.761532769117876e-06, "loss": 0.0607, "step": 33898 }, { "epoch": 0.7469742793083123, "grad_norm": 0.41678422689437866, "learning_rate": 4.7607504208913586e-06, "loss": 0.0601, "step": 33899 }, { "epoch": 0.7469963145978284, "grad_norm": 0.6581169962882996, "learning_rate": 4.759968124818826e-06, "loss": 0.0621, "step": 33900 }, { "epoch": 0.7470183498873446, "grad_norm": 0.884482741355896, "learning_rate": 4.759185880904271e-06, "loss": 0.0693, "step": 33901 }, { "epoch": 0.7470403851768608, "grad_norm": 0.48102110624313354, "learning_rate": 4.758403689151664e-06, "loss": 0.0549, "step": 33902 }, { "epoch": 0.7470624204663769, "grad_norm": 0.7303460836410522, "learning_rate": 4.757621549565011e-06, "loss": 0.0706, "step": 33903 }, { "epoch": 0.7470844557558931, "grad_norm": 0.5318807363510132, "learning_rate": 4.756839462148277e-06, "loss": 0.0619, "step": 33904 }, { "epoch": 0.7471064910454093, "grad_norm": 0.6849748492240906, "learning_rate": 4.756057426905457e-06, "loss": 0.0578, "step": 33905 }, { "epoch": 0.7471285263349254, "grad_norm": 0.5109624266624451, "learning_rate": 4.755275443840525e-06, "loss": 0.0473, "step": 33906 }, { "epoch": 0.7471505616244415, "grad_norm": 0.6757268905639648, "learning_rate": 4.754493512957469e-06, "loss": 0.0847, "step": 33907 }, { "epoch": 0.7471725969139577, "grad_norm": 0.4688674509525299, "learning_rate": 4.753711634260276e-06, "loss": 0.0544, "step": 33908 }, { "epoch": 0.7471946322034738, "grad_norm": 0.5257495045661926, "learning_rate": 4.752929807752922e-06, "loss": 0.0453, "step": 33909 }, { "epoch": 0.74721666749299, "grad_norm": 0.9655523896217346, "learning_rate": 4.752148033439388e-06, "loss": 0.078, "step": 33910 }, { "epoch": 0.7472387027825061, "grad_norm": 0.6812536120414734, "learning_rate": 4.751366311323661e-06, "loss": 0.0369, "step": 33911 }, { "epoch": 0.7472607380720223, "grad_norm": 0.6347153186798096, "learning_rate": 4.750584641409726e-06, "loss": 0.0799, "step": 33912 }, { "epoch": 0.7472827733615385, "grad_norm": 0.7717636823654175, "learning_rate": 4.7498030237015545e-06, "loss": 0.0462, "step": 33913 }, { "epoch": 0.7473048086510546, "grad_norm": 0.6259672045707703, "learning_rate": 4.749021458203132e-06, "loss": 0.0476, "step": 33914 }, { "epoch": 0.7473268439405708, "grad_norm": 0.8458226323127747, "learning_rate": 4.748239944918446e-06, "loss": 0.0856, "step": 33915 }, { "epoch": 0.747348879230087, "grad_norm": 0.6533597111701965, "learning_rate": 4.7474584838514665e-06, "loss": 0.0604, "step": 33916 }, { "epoch": 0.7473709145196031, "grad_norm": 1.197872519493103, "learning_rate": 4.746677075006178e-06, "loss": 0.0964, "step": 33917 }, { "epoch": 0.7473929498091193, "grad_norm": 0.8941083550453186, "learning_rate": 4.745895718386563e-06, "loss": 0.088, "step": 33918 }, { "epoch": 0.7474149850986355, "grad_norm": 0.4943094849586487, "learning_rate": 4.745114413996604e-06, "loss": 0.0704, "step": 33919 }, { "epoch": 0.7474370203881516, "grad_norm": 0.5906917452812195, "learning_rate": 4.744333161840271e-06, "loss": 0.0683, "step": 33920 }, { "epoch": 0.7474590556776678, "grad_norm": 0.8100200891494751, "learning_rate": 4.743551961921548e-06, "loss": 0.0799, "step": 33921 }, { "epoch": 0.747481090967184, "grad_norm": 0.772258996963501, "learning_rate": 4.742770814244422e-06, "loss": 0.0638, "step": 33922 }, { "epoch": 0.7475031262567001, "grad_norm": 0.40857481956481934, "learning_rate": 4.7419897188128576e-06, "loss": 0.0559, "step": 33923 }, { "epoch": 0.7475251615462163, "grad_norm": 0.42694342136383057, "learning_rate": 4.741208675630846e-06, "loss": 0.0516, "step": 33924 }, { "epoch": 0.7475471968357325, "grad_norm": 0.8425940871238708, "learning_rate": 4.740427684702351e-06, "loss": 0.0966, "step": 33925 }, { "epoch": 0.7475692321252486, "grad_norm": 0.5971265435218811, "learning_rate": 4.739646746031368e-06, "loss": 0.0881, "step": 33926 }, { "epoch": 0.7475912674147648, "grad_norm": 0.7501863241195679, "learning_rate": 4.738865859621861e-06, "loss": 0.0802, "step": 33927 }, { "epoch": 0.747613302704281, "grad_norm": 0.4618919789791107, "learning_rate": 4.738085025477818e-06, "loss": 0.0435, "step": 33928 }, { "epoch": 0.7476353379937971, "grad_norm": 0.45957329869270325, "learning_rate": 4.737304243603206e-06, "loss": 0.0915, "step": 33929 }, { "epoch": 0.7476573732833133, "grad_norm": 0.552594780921936, "learning_rate": 4.736523514002007e-06, "loss": 0.0649, "step": 33930 }, { "epoch": 0.7476794085728294, "grad_norm": 0.9893130660057068, "learning_rate": 4.7357428366782025e-06, "loss": 0.0786, "step": 33931 }, { "epoch": 0.7477014438623455, "grad_norm": 0.517731785774231, "learning_rate": 4.734962211635759e-06, "loss": 0.0575, "step": 33932 }, { "epoch": 0.7477234791518617, "grad_norm": 0.491000235080719, "learning_rate": 4.734181638878656e-06, "loss": 0.0597, "step": 33933 }, { "epoch": 0.7477455144413778, "grad_norm": 0.795681893825531, "learning_rate": 4.7334011184108714e-06, "loss": 0.049, "step": 33934 }, { "epoch": 0.747767549730894, "grad_norm": 0.6782742142677307, "learning_rate": 4.732620650236387e-06, "loss": 0.05, "step": 33935 }, { "epoch": 0.7477895850204102, "grad_norm": 0.6437174677848816, "learning_rate": 4.73184023435916e-06, "loss": 0.0641, "step": 33936 }, { "epoch": 0.7478116203099263, "grad_norm": 0.6616621613502502, "learning_rate": 4.731059870783187e-06, "loss": 0.0703, "step": 33937 }, { "epoch": 0.7478336555994425, "grad_norm": 0.29551219940185547, "learning_rate": 4.730279559512433e-06, "loss": 0.0529, "step": 33938 }, { "epoch": 0.7478556908889586, "grad_norm": 0.6318879127502441, "learning_rate": 4.729499300550867e-06, "loss": 0.0637, "step": 33939 }, { "epoch": 0.7478777261784748, "grad_norm": 0.720528781414032, "learning_rate": 4.728719093902469e-06, "loss": 0.0598, "step": 33940 }, { "epoch": 0.747899761467991, "grad_norm": 0.5847758650779724, "learning_rate": 4.727938939571213e-06, "loss": 0.0592, "step": 33941 }, { "epoch": 0.7479217967575071, "grad_norm": 0.5802377462387085, "learning_rate": 4.727158837561076e-06, "loss": 0.0396, "step": 33942 }, { "epoch": 0.7479438320470233, "grad_norm": 0.571138322353363, "learning_rate": 4.726378787876023e-06, "loss": 0.0305, "step": 33943 }, { "epoch": 0.7479658673365395, "grad_norm": 0.5001584887504578, "learning_rate": 4.725598790520032e-06, "loss": 0.0577, "step": 33944 }, { "epoch": 0.7479879026260556, "grad_norm": 0.33284324407577515, "learning_rate": 4.724818845497078e-06, "loss": 0.0437, "step": 33945 }, { "epoch": 0.7480099379155718, "grad_norm": 0.45095160603523254, "learning_rate": 4.724038952811133e-06, "loss": 0.0436, "step": 33946 }, { "epoch": 0.748031973205088, "grad_norm": 0.407959908246994, "learning_rate": 4.723259112466166e-06, "loss": 0.0429, "step": 33947 }, { "epoch": 0.7480540084946041, "grad_norm": 0.19154657423496246, "learning_rate": 4.722479324466149e-06, "loss": 0.0481, "step": 33948 }, { "epoch": 0.7480760437841203, "grad_norm": 0.43990084528923035, "learning_rate": 4.721699588815063e-06, "loss": 0.0537, "step": 33949 }, { "epoch": 0.7480980790736365, "grad_norm": 0.6187105774879456, "learning_rate": 4.720919905516866e-06, "loss": 0.0621, "step": 33950 }, { "epoch": 0.7481201143631526, "grad_norm": 0.6026324033737183, "learning_rate": 4.720140274575542e-06, "loss": 0.0559, "step": 33951 }, { "epoch": 0.7481421496526688, "grad_norm": 0.8067536950111389, "learning_rate": 4.7193606959950455e-06, "loss": 0.0697, "step": 33952 }, { "epoch": 0.748164184942185, "grad_norm": 0.6143959760665894, "learning_rate": 4.7185811697793675e-06, "loss": 0.0694, "step": 33953 }, { "epoch": 0.7481862202317011, "grad_norm": 0.6425555348396301, "learning_rate": 4.717801695932466e-06, "loss": 0.06, "step": 33954 }, { "epoch": 0.7482082555212173, "grad_norm": 0.36638227105140686, "learning_rate": 4.717022274458318e-06, "loss": 0.0695, "step": 33955 }, { "epoch": 0.7482302908107334, "grad_norm": 0.6871395111083984, "learning_rate": 4.716242905360884e-06, "loss": 0.0613, "step": 33956 }, { "epoch": 0.7482523261002495, "grad_norm": 0.3708110451698303, "learning_rate": 4.71546358864414e-06, "loss": 0.046, "step": 33957 }, { "epoch": 0.7482743613897657, "grad_norm": 0.4497731029987335, "learning_rate": 4.714684324312061e-06, "loss": 0.0521, "step": 33958 }, { "epoch": 0.7482963966792818, "grad_norm": 0.5492181777954102, "learning_rate": 4.713905112368598e-06, "loss": 0.0621, "step": 33959 }, { "epoch": 0.748318431968798, "grad_norm": 0.4909510314464569, "learning_rate": 4.713125952817744e-06, "loss": 0.0482, "step": 33960 }, { "epoch": 0.7483404672583142, "grad_norm": 0.6529558300971985, "learning_rate": 4.71234684566345e-06, "loss": 0.0703, "step": 33961 }, { "epoch": 0.7483625025478303, "grad_norm": 0.6446971893310547, "learning_rate": 4.711567790909695e-06, "loss": 0.0513, "step": 33962 }, { "epoch": 0.7483845378373465, "grad_norm": 0.4979020953178406, "learning_rate": 4.710788788560438e-06, "loss": 0.0512, "step": 33963 }, { "epoch": 0.7484065731268627, "grad_norm": 0.70570307970047, "learning_rate": 4.71000983861965e-06, "loss": 0.0851, "step": 33964 }, { "epoch": 0.7484286084163788, "grad_norm": 0.517125129699707, "learning_rate": 4.709230941091307e-06, "loss": 0.0709, "step": 33965 }, { "epoch": 0.748450643705895, "grad_norm": 0.45285630226135254, "learning_rate": 4.708452095979363e-06, "loss": 0.0507, "step": 33966 }, { "epoch": 0.7484726789954111, "grad_norm": 0.33805787563323975, "learning_rate": 4.70767330328779e-06, "loss": 0.0524, "step": 33967 }, { "epoch": 0.7484947142849273, "grad_norm": 0.3949710428714752, "learning_rate": 4.706894563020557e-06, "loss": 0.0543, "step": 33968 }, { "epoch": 0.7485167495744435, "grad_norm": 0.5968036651611328, "learning_rate": 4.706115875181635e-06, "loss": 0.059, "step": 33969 }, { "epoch": 0.7485387848639596, "grad_norm": 0.7090871334075928, "learning_rate": 4.705337239774979e-06, "loss": 0.0507, "step": 33970 }, { "epoch": 0.7485608201534758, "grad_norm": 0.5559574365615845, "learning_rate": 4.704558656804562e-06, "loss": 0.0506, "step": 33971 }, { "epoch": 0.748582855442992, "grad_norm": 0.6895521283149719, "learning_rate": 4.703780126274354e-06, "loss": 0.0676, "step": 33972 }, { "epoch": 0.7486048907325081, "grad_norm": 0.4457235634326935, "learning_rate": 4.703001648188308e-06, "loss": 0.0426, "step": 33973 }, { "epoch": 0.7486269260220243, "grad_norm": 0.5771230459213257, "learning_rate": 4.702223222550402e-06, "loss": 0.054, "step": 33974 }, { "epoch": 0.7486489613115405, "grad_norm": 0.24949172139167786, "learning_rate": 4.701444849364587e-06, "loss": 0.0643, "step": 33975 }, { "epoch": 0.7486709966010566, "grad_norm": 0.4965411126613617, "learning_rate": 4.700666528634844e-06, "loss": 0.0619, "step": 33976 }, { "epoch": 0.7486930318905728, "grad_norm": 0.44632381200790405, "learning_rate": 4.699888260365124e-06, "loss": 0.051, "step": 33977 }, { "epoch": 0.748715067180089, "grad_norm": 0.4623664319515228, "learning_rate": 4.699110044559402e-06, "loss": 0.0554, "step": 33978 }, { "epoch": 0.7487371024696051, "grad_norm": 0.42015329003334045, "learning_rate": 4.6983318812216316e-06, "loss": 0.0642, "step": 33979 }, { "epoch": 0.7487591377591213, "grad_norm": 0.5791313648223877, "learning_rate": 4.697553770355782e-06, "loss": 0.0571, "step": 33980 }, { "epoch": 0.7487811730486373, "grad_norm": 0.3683721721172333, "learning_rate": 4.69677571196582e-06, "loss": 0.0608, "step": 33981 }, { "epoch": 0.7488032083381535, "grad_norm": 0.8151391744613647, "learning_rate": 4.6959977060556945e-06, "loss": 0.0991, "step": 33982 }, { "epoch": 0.7488252436276697, "grad_norm": 0.4856034815311432, "learning_rate": 4.695219752629388e-06, "loss": 0.0465, "step": 33983 }, { "epoch": 0.7488472789171858, "grad_norm": 0.5336710810661316, "learning_rate": 4.694441851690847e-06, "loss": 0.0657, "step": 33984 }, { "epoch": 0.748869314206702, "grad_norm": 0.758074939250946, "learning_rate": 4.693664003244047e-06, "loss": 0.0584, "step": 33985 }, { "epoch": 0.7488913494962182, "grad_norm": 0.9422067403793335, "learning_rate": 4.692886207292935e-06, "loss": 0.0658, "step": 33986 }, { "epoch": 0.7489133847857343, "grad_norm": 0.7586117386817932, "learning_rate": 4.692108463841483e-06, "loss": 0.0753, "step": 33987 }, { "epoch": 0.7489354200752505, "grad_norm": 0.4542126953601837, "learning_rate": 4.691330772893655e-06, "loss": 0.0577, "step": 33988 }, { "epoch": 0.7489574553647667, "grad_norm": 0.7024549841880798, "learning_rate": 4.6905531344534016e-06, "loss": 0.0626, "step": 33989 }, { "epoch": 0.7489794906542828, "grad_norm": 0.4352813959121704, "learning_rate": 4.6897755485246905e-06, "loss": 0.0458, "step": 33990 }, { "epoch": 0.749001525943799, "grad_norm": 0.8618690967559814, "learning_rate": 4.68899801511148e-06, "loss": 0.0727, "step": 33991 }, { "epoch": 0.7490235612333151, "grad_norm": 0.8325990438461304, "learning_rate": 4.688220534217738e-06, "loss": 0.067, "step": 33992 }, { "epoch": 0.7490455965228313, "grad_norm": 0.6873361468315125, "learning_rate": 4.687443105847413e-06, "loss": 0.0577, "step": 33993 }, { "epoch": 0.7490676318123475, "grad_norm": 0.675838053226471, "learning_rate": 4.6866657300044685e-06, "loss": 0.0909, "step": 33994 }, { "epoch": 0.7490896671018636, "grad_norm": 0.33576175570487976, "learning_rate": 4.685888406692874e-06, "loss": 0.0456, "step": 33995 }, { "epoch": 0.7491117023913798, "grad_norm": 0.6493175625801086, "learning_rate": 4.685111135916573e-06, "loss": 0.0711, "step": 33996 }, { "epoch": 0.749133737680896, "grad_norm": 0.6485851407051086, "learning_rate": 4.684333917679539e-06, "loss": 0.0824, "step": 33997 }, { "epoch": 0.7491557729704121, "grad_norm": 0.28091514110565186, "learning_rate": 4.683556751985713e-06, "loss": 0.0832, "step": 33998 }, { "epoch": 0.7491778082599283, "grad_norm": 0.516976535320282, "learning_rate": 4.682779638839075e-06, "loss": 0.0562, "step": 33999 }, { "epoch": 0.7491998435494445, "grad_norm": 0.5292040109634399, "learning_rate": 4.682002578243568e-06, "loss": 0.0553, "step": 34000 }, { "epoch": 0.7492218788389606, "grad_norm": 0.6117521524429321, "learning_rate": 4.68122557020316e-06, "loss": 0.0681, "step": 34001 }, { "epoch": 0.7492439141284768, "grad_norm": 0.6065378189086914, "learning_rate": 4.680448614721799e-06, "loss": 0.0626, "step": 34002 }, { "epoch": 0.749265949417993, "grad_norm": 0.614893913269043, "learning_rate": 4.679671711803445e-06, "loss": 0.0485, "step": 34003 }, { "epoch": 0.7492879847075091, "grad_norm": 0.7857567071914673, "learning_rate": 4.678894861452063e-06, "loss": 0.0725, "step": 34004 }, { "epoch": 0.7493100199970253, "grad_norm": 0.47999337315559387, "learning_rate": 4.678118063671596e-06, "loss": 0.0372, "step": 34005 }, { "epoch": 0.7493320552865413, "grad_norm": 0.5538679361343384, "learning_rate": 4.677341318466017e-06, "loss": 0.0542, "step": 34006 }, { "epoch": 0.7493540905760575, "grad_norm": 0.4364825487136841, "learning_rate": 4.67656462583927e-06, "loss": 0.0326, "step": 34007 }, { "epoch": 0.7493761258655737, "grad_norm": 0.6416772603988647, "learning_rate": 4.675787985795319e-06, "loss": 0.0655, "step": 34008 }, { "epoch": 0.7493981611550898, "grad_norm": 0.8054705858230591, "learning_rate": 4.675011398338111e-06, "loss": 0.0318, "step": 34009 }, { "epoch": 0.749420196444606, "grad_norm": 0.7218928933143616, "learning_rate": 4.674234863471608e-06, "loss": 0.0625, "step": 34010 }, { "epoch": 0.7494422317341222, "grad_norm": 0.2633395493030548, "learning_rate": 4.6734583811997676e-06, "loss": 0.036, "step": 34011 }, { "epoch": 0.7494642670236383, "grad_norm": 0.5725075602531433, "learning_rate": 4.672681951526537e-06, "loss": 0.073, "step": 34012 }, { "epoch": 0.7494863023131545, "grad_norm": 0.5016105771064758, "learning_rate": 4.671905574455875e-06, "loss": 0.0462, "step": 34013 }, { "epoch": 0.7495083376026707, "grad_norm": 0.37039563059806824, "learning_rate": 4.671129249991736e-06, "loss": 0.0569, "step": 34014 }, { "epoch": 0.7495303728921868, "grad_norm": 0.7232820391654968, "learning_rate": 4.6703529781380805e-06, "loss": 0.0798, "step": 34015 }, { "epoch": 0.749552408181703, "grad_norm": 0.37613269686698914, "learning_rate": 4.669576758898851e-06, "loss": 0.0836, "step": 34016 }, { "epoch": 0.7495744434712192, "grad_norm": 0.6907175183296204, "learning_rate": 4.668800592278006e-06, "loss": 0.106, "step": 34017 }, { "epoch": 0.7495964787607353, "grad_norm": 0.6300926208496094, "learning_rate": 4.668024478279506e-06, "loss": 0.0444, "step": 34018 }, { "epoch": 0.7496185140502515, "grad_norm": 0.8902015089988708, "learning_rate": 4.66724841690729e-06, "loss": 0.089, "step": 34019 }, { "epoch": 0.7496405493397676, "grad_norm": 0.5687638521194458, "learning_rate": 4.666472408165327e-06, "loss": 0.0472, "step": 34020 }, { "epoch": 0.7496625846292838, "grad_norm": 0.7546749711036682, "learning_rate": 4.6656964520575494e-06, "loss": 0.0741, "step": 34021 }, { "epoch": 0.7496846199188, "grad_norm": 1.0163049697875977, "learning_rate": 4.664920548587931e-06, "loss": 0.1147, "step": 34022 }, { "epoch": 0.7497066552083161, "grad_norm": 0.5561322569847107, "learning_rate": 4.664144697760411e-06, "loss": 0.0621, "step": 34023 }, { "epoch": 0.7497286904978323, "grad_norm": 0.601266086101532, "learning_rate": 4.663368899578949e-06, "loss": 0.0574, "step": 34024 }, { "epoch": 0.7497507257873485, "grad_norm": 0.2823268473148346, "learning_rate": 4.662593154047487e-06, "loss": 0.0541, "step": 34025 }, { "epoch": 0.7497727610768646, "grad_norm": 0.31079164147377014, "learning_rate": 4.661817461169981e-06, "loss": 0.0609, "step": 34026 }, { "epoch": 0.7497947963663808, "grad_norm": 0.7268170714378357, "learning_rate": 4.6610418209503875e-06, "loss": 0.0772, "step": 34027 }, { "epoch": 0.749816831655897, "grad_norm": 0.7652584910392761, "learning_rate": 4.660266233392643e-06, "loss": 0.0871, "step": 34028 }, { "epoch": 0.7498388669454131, "grad_norm": 0.7137349843978882, "learning_rate": 4.6594906985007176e-06, "loss": 0.0616, "step": 34029 }, { "epoch": 0.7498609022349293, "grad_norm": 0.6924242973327637, "learning_rate": 4.658715216278545e-06, "loss": 0.0672, "step": 34030 }, { "epoch": 0.7498829375244453, "grad_norm": 1.005918264389038, "learning_rate": 4.657939786730085e-06, "loss": 0.077, "step": 34031 }, { "epoch": 0.7499049728139615, "grad_norm": 0.8283720016479492, "learning_rate": 4.65716440985928e-06, "loss": 0.066, "step": 34032 }, { "epoch": 0.7499270081034777, "grad_norm": 0.6240222454071045, "learning_rate": 4.656389085670083e-06, "loss": 0.0789, "step": 34033 }, { "epoch": 0.7499490433929938, "grad_norm": 0.37258002161979675, "learning_rate": 4.655613814166448e-06, "loss": 0.0478, "step": 34034 }, { "epoch": 0.74997107868251, "grad_norm": 0.5211034417152405, "learning_rate": 4.654838595352315e-06, "loss": 0.0664, "step": 34035 }, { "epoch": 0.7499931139720262, "grad_norm": 0.4243376553058624, "learning_rate": 4.654063429231634e-06, "loss": 0.0488, "step": 34036 }, { "epoch": 0.7500151492615423, "grad_norm": 0.32477468252182007, "learning_rate": 4.653288315808358e-06, "loss": 0.055, "step": 34037 }, { "epoch": 0.7500371845510585, "grad_norm": 0.6389496326446533, "learning_rate": 4.6525132550864374e-06, "loss": 0.0589, "step": 34038 }, { "epoch": 0.7500592198405747, "grad_norm": 0.8740328550338745, "learning_rate": 4.65173824706981e-06, "loss": 0.0482, "step": 34039 }, { "epoch": 0.7500812551300908, "grad_norm": 0.8723414540290833, "learning_rate": 4.6509632917624296e-06, "loss": 0.0589, "step": 34040 }, { "epoch": 0.750103290419607, "grad_norm": 0.5139848589897156, "learning_rate": 4.650188389168248e-06, "loss": 0.0409, "step": 34041 }, { "epoch": 0.7501253257091232, "grad_norm": 0.5155493021011353, "learning_rate": 4.6494135392912015e-06, "loss": 0.0582, "step": 34042 }, { "epoch": 0.7501473609986393, "grad_norm": 0.42890268564224243, "learning_rate": 4.648638742135247e-06, "loss": 0.0546, "step": 34043 }, { "epoch": 0.7501693962881555, "grad_norm": 0.5884897708892822, "learning_rate": 4.647863997704316e-06, "loss": 0.0612, "step": 34044 }, { "epoch": 0.7501914315776717, "grad_norm": 0.5539000630378723, "learning_rate": 4.647089306002376e-06, "loss": 0.0516, "step": 34045 }, { "epoch": 0.7502134668671878, "grad_norm": 0.40689361095428467, "learning_rate": 4.646314667033356e-06, "loss": 0.0723, "step": 34046 }, { "epoch": 0.750235502156704, "grad_norm": 0.5297887325286865, "learning_rate": 4.645540080801213e-06, "loss": 0.0762, "step": 34047 }, { "epoch": 0.7502575374462201, "grad_norm": 0.8560956120491028, "learning_rate": 4.644765547309876e-06, "loss": 0.0849, "step": 34048 }, { "epoch": 0.7502795727357363, "grad_norm": 0.6595809459686279, "learning_rate": 4.643991066563311e-06, "loss": 0.0836, "step": 34049 }, { "epoch": 0.7503016080252525, "grad_norm": 0.7625510096549988, "learning_rate": 4.643216638565453e-06, "loss": 0.0635, "step": 34050 }, { "epoch": 0.7503236433147686, "grad_norm": 0.5344886779785156, "learning_rate": 4.642442263320238e-06, "loss": 0.0527, "step": 34051 }, { "epoch": 0.7503456786042848, "grad_norm": 0.5979042053222656, "learning_rate": 4.641667940831626e-06, "loss": 0.055, "step": 34052 }, { "epoch": 0.750367713893801, "grad_norm": 0.8332766890525818, "learning_rate": 4.640893671103551e-06, "loss": 0.074, "step": 34053 }, { "epoch": 0.7503897491833171, "grad_norm": 0.3739597797393799, "learning_rate": 4.640119454139963e-06, "loss": 0.051, "step": 34054 }, { "epoch": 0.7504117844728332, "grad_norm": 0.39092668890953064, "learning_rate": 4.639345289944793e-06, "loss": 0.059, "step": 34055 }, { "epoch": 0.7504338197623494, "grad_norm": 0.47433599829673767, "learning_rate": 4.638571178522004e-06, "loss": 0.0597, "step": 34056 }, { "epoch": 0.7504558550518655, "grad_norm": 0.49304378032684326, "learning_rate": 4.637797119875522e-06, "loss": 0.0499, "step": 34057 }, { "epoch": 0.7504778903413817, "grad_norm": 0.3580322861671448, "learning_rate": 4.637023114009301e-06, "loss": 0.0609, "step": 34058 }, { "epoch": 0.7504999256308978, "grad_norm": 0.878001868724823, "learning_rate": 4.636249160927273e-06, "loss": 0.0959, "step": 34059 }, { "epoch": 0.750521960920414, "grad_norm": 0.29436689615249634, "learning_rate": 4.635475260633387e-06, "loss": 0.028, "step": 34060 }, { "epoch": 0.7505439962099302, "grad_norm": 0.3934405744075775, "learning_rate": 4.634701413131587e-06, "loss": 0.0597, "step": 34061 }, { "epoch": 0.7505660314994463, "grad_norm": 0.5628271102905273, "learning_rate": 4.633927618425807e-06, "loss": 0.0573, "step": 34062 }, { "epoch": 0.7505880667889625, "grad_norm": 0.5274453163146973, "learning_rate": 4.633153876519992e-06, "loss": 0.0553, "step": 34063 }, { "epoch": 0.7506101020784787, "grad_norm": 0.40125876665115356, "learning_rate": 4.632380187418082e-06, "loss": 0.0901, "step": 34064 }, { "epoch": 0.7506321373679948, "grad_norm": 0.7137560248374939, "learning_rate": 4.631606551124027e-06, "loss": 0.0636, "step": 34065 }, { "epoch": 0.750654172657511, "grad_norm": 0.6707228422164917, "learning_rate": 4.630832967641753e-06, "loss": 0.067, "step": 34066 }, { "epoch": 0.7506762079470272, "grad_norm": 0.4223201870918274, "learning_rate": 4.6300594369752066e-06, "loss": 0.0405, "step": 34067 }, { "epoch": 0.7506982432365433, "grad_norm": 0.882064700126648, "learning_rate": 4.629285959128335e-06, "loss": 0.0781, "step": 34068 }, { "epoch": 0.7507202785260595, "grad_norm": 0.9211209416389465, "learning_rate": 4.628512534105066e-06, "loss": 0.0562, "step": 34069 }, { "epoch": 0.7507423138155757, "grad_norm": 0.7200397849082947, "learning_rate": 4.627739161909341e-06, "loss": 0.049, "step": 34070 }, { "epoch": 0.7507643491050918, "grad_norm": 0.45131734013557434, "learning_rate": 4.6269658425451075e-06, "loss": 0.0929, "step": 34071 }, { "epoch": 0.750786384394608, "grad_norm": 0.15170665085315704, "learning_rate": 4.6261925760163e-06, "loss": 0.0431, "step": 34072 }, { "epoch": 0.7508084196841242, "grad_norm": 0.582453191280365, "learning_rate": 4.625419362326855e-06, "loss": 0.0316, "step": 34073 }, { "epoch": 0.7508304549736403, "grad_norm": 0.5777446031570435, "learning_rate": 4.624646201480711e-06, "loss": 0.0527, "step": 34074 }, { "epoch": 0.7508524902631565, "grad_norm": 0.4914163053035736, "learning_rate": 4.623873093481813e-06, "loss": 0.0603, "step": 34075 }, { "epoch": 0.7508745255526726, "grad_norm": 0.8439517021179199, "learning_rate": 4.623100038334087e-06, "loss": 0.0785, "step": 34076 }, { "epoch": 0.7508965608421888, "grad_norm": 0.4857080280780792, "learning_rate": 4.6223270360414835e-06, "loss": 0.0537, "step": 34077 }, { "epoch": 0.750918596131705, "grad_norm": 0.6372601389884949, "learning_rate": 4.621554086607923e-06, "loss": 0.0795, "step": 34078 }, { "epoch": 0.7509406314212211, "grad_norm": 0.5241970419883728, "learning_rate": 4.620781190037364e-06, "loss": 0.0658, "step": 34079 }, { "epoch": 0.7509626667107372, "grad_norm": 0.6984455585479736, "learning_rate": 4.620008346333725e-06, "loss": 0.0799, "step": 34080 }, { "epoch": 0.7509847020002534, "grad_norm": 0.6501219868659973, "learning_rate": 4.619235555500956e-06, "loss": 0.0668, "step": 34081 }, { "epoch": 0.7510067372897695, "grad_norm": 0.5741058588027954, "learning_rate": 4.618462817542983e-06, "loss": 0.0741, "step": 34082 }, { "epoch": 0.7510287725792857, "grad_norm": 0.4973349869251251, "learning_rate": 4.617690132463745e-06, "loss": 0.0326, "step": 34083 }, { "epoch": 0.7510508078688019, "grad_norm": 0.44236746430397034, "learning_rate": 4.616917500267182e-06, "loss": 0.0579, "step": 34084 }, { "epoch": 0.751072843158318, "grad_norm": 0.6742479801177979, "learning_rate": 4.616144920957223e-06, "loss": 0.0644, "step": 34085 }, { "epoch": 0.7510948784478342, "grad_norm": 0.5387376546859741, "learning_rate": 4.615372394537804e-06, "loss": 0.0453, "step": 34086 }, { "epoch": 0.7511169137373503, "grad_norm": 0.6405068635940552, "learning_rate": 4.6145999210128654e-06, "loss": 0.058, "step": 34087 }, { "epoch": 0.7511389490268665, "grad_norm": 0.5328285098075867, "learning_rate": 4.61382750038634e-06, "loss": 0.0535, "step": 34088 }, { "epoch": 0.7511609843163827, "grad_norm": 0.7263683676719666, "learning_rate": 4.613055132662157e-06, "loss": 0.0716, "step": 34089 }, { "epoch": 0.7511830196058988, "grad_norm": 0.5074722766876221, "learning_rate": 4.612282817844255e-06, "loss": 0.0481, "step": 34090 }, { "epoch": 0.751205054895415, "grad_norm": 0.523360550403595, "learning_rate": 4.611510555936572e-06, "loss": 0.0654, "step": 34091 }, { "epoch": 0.7512270901849312, "grad_norm": 0.3115614950656891, "learning_rate": 4.610738346943031e-06, "loss": 0.0752, "step": 34092 }, { "epoch": 0.7512491254744473, "grad_norm": 0.505850613117218, "learning_rate": 4.60996619086757e-06, "loss": 0.0377, "step": 34093 }, { "epoch": 0.7512711607639635, "grad_norm": 0.40622127056121826, "learning_rate": 4.609194087714124e-06, "loss": 0.0519, "step": 34094 }, { "epoch": 0.7512931960534797, "grad_norm": 0.45391416549682617, "learning_rate": 4.608422037486629e-06, "loss": 0.0453, "step": 34095 }, { "epoch": 0.7513152313429958, "grad_norm": 0.7927805781364441, "learning_rate": 4.607650040189007e-06, "loss": 0.059, "step": 34096 }, { "epoch": 0.751337266632512, "grad_norm": 0.34268543124198914, "learning_rate": 4.606878095825198e-06, "loss": 0.054, "step": 34097 }, { "epoch": 0.7513593019220282, "grad_norm": 0.47534772753715515, "learning_rate": 4.606106204399136e-06, "loss": 0.0411, "step": 34098 }, { "epoch": 0.7513813372115443, "grad_norm": 0.7194904088973999, "learning_rate": 4.605334365914744e-06, "loss": 0.0415, "step": 34099 }, { "epoch": 0.7514033725010605, "grad_norm": 0.6949189901351929, "learning_rate": 4.6045625803759625e-06, "loss": 0.0568, "step": 34100 }, { "epoch": 0.7514254077905766, "grad_norm": 0.6165667176246643, "learning_rate": 4.603790847786709e-06, "loss": 0.055, "step": 34101 }, { "epoch": 0.7514474430800928, "grad_norm": 0.7525473237037659, "learning_rate": 4.603019168150933e-06, "loss": 0.0746, "step": 34102 }, { "epoch": 0.751469478369609, "grad_norm": 0.9837695956230164, "learning_rate": 4.602247541472551e-06, "loss": 0.0612, "step": 34103 }, { "epoch": 0.7514915136591251, "grad_norm": 1.0069791078567505, "learning_rate": 4.601475967755504e-06, "loss": 0.1026, "step": 34104 }, { "epoch": 0.7515135489486412, "grad_norm": 0.7476807236671448, "learning_rate": 4.600704447003708e-06, "loss": 0.0611, "step": 34105 }, { "epoch": 0.7515355842381574, "grad_norm": 0.5720727443695068, "learning_rate": 4.599932979221103e-06, "loss": 0.0693, "step": 34106 }, { "epoch": 0.7515576195276735, "grad_norm": 0.4504445791244507, "learning_rate": 4.599161564411621e-06, "loss": 0.0444, "step": 34107 }, { "epoch": 0.7515796548171897, "grad_norm": 0.367860347032547, "learning_rate": 4.598390202579183e-06, "loss": 0.0485, "step": 34108 }, { "epoch": 0.7516016901067059, "grad_norm": 0.7984578609466553, "learning_rate": 4.5976188937277195e-06, "loss": 0.0872, "step": 34109 }, { "epoch": 0.751623725396222, "grad_norm": 0.5833072662353516, "learning_rate": 4.596847637861161e-06, "loss": 0.0638, "step": 34110 }, { "epoch": 0.7516457606857382, "grad_norm": 0.5503571629524231, "learning_rate": 4.596076434983443e-06, "loss": 0.0485, "step": 34111 }, { "epoch": 0.7516677959752543, "grad_norm": 0.5115289092063904, "learning_rate": 4.5953052850984814e-06, "loss": 0.0455, "step": 34112 }, { "epoch": 0.7516898312647705, "grad_norm": 0.5227195024490356, "learning_rate": 4.594534188210208e-06, "loss": 0.0496, "step": 34113 }, { "epoch": 0.7517118665542867, "grad_norm": 0.2528168857097626, "learning_rate": 4.593763144322558e-06, "loss": 0.0463, "step": 34114 }, { "epoch": 0.7517339018438028, "grad_norm": 0.8278957009315491, "learning_rate": 4.5929921534394455e-06, "loss": 0.0625, "step": 34115 }, { "epoch": 0.751755937133319, "grad_norm": 0.5115376710891724, "learning_rate": 4.592221215564807e-06, "loss": 0.0508, "step": 34116 }, { "epoch": 0.7517779724228352, "grad_norm": 0.9666011929512024, "learning_rate": 4.591450330702566e-06, "loss": 0.103, "step": 34117 }, { "epoch": 0.7518000077123513, "grad_norm": 0.8303733468055725, "learning_rate": 4.590679498856656e-06, "loss": 0.0835, "step": 34118 }, { "epoch": 0.7518220430018675, "grad_norm": 0.4765213131904602, "learning_rate": 4.5899087200309906e-06, "loss": 0.0598, "step": 34119 }, { "epoch": 0.7518440782913837, "grad_norm": 0.6660280823707581, "learning_rate": 4.589137994229503e-06, "loss": 0.0664, "step": 34120 }, { "epoch": 0.7518661135808998, "grad_norm": 0.6713484525680542, "learning_rate": 4.5883673214561225e-06, "loss": 0.0744, "step": 34121 }, { "epoch": 0.751888148870416, "grad_norm": 0.9441041350364685, "learning_rate": 4.587596701714767e-06, "loss": 0.0825, "step": 34122 }, { "epoch": 0.7519101841599322, "grad_norm": 0.7345244288444519, "learning_rate": 4.58682613500937e-06, "loss": 0.0889, "step": 34123 }, { "epoch": 0.7519322194494483, "grad_norm": 0.6795092225074768, "learning_rate": 4.58605562134384e-06, "loss": 0.0734, "step": 34124 }, { "epoch": 0.7519542547389645, "grad_norm": 0.5409985184669495, "learning_rate": 4.5852851607221256e-06, "loss": 0.0695, "step": 34125 }, { "epoch": 0.7519762900284807, "grad_norm": 0.7460671067237854, "learning_rate": 4.584514753148131e-06, "loss": 0.0842, "step": 34126 }, { "epoch": 0.7519983253179968, "grad_norm": 0.5960351228713989, "learning_rate": 4.583744398625796e-06, "loss": 0.0609, "step": 34127 }, { "epoch": 0.752020360607513, "grad_norm": 0.49165138602256775, "learning_rate": 4.582974097159027e-06, "loss": 0.0584, "step": 34128 }, { "epoch": 0.752042395897029, "grad_norm": 0.3755405843257904, "learning_rate": 4.582203848751761e-06, "loss": 0.05, "step": 34129 }, { "epoch": 0.7520644311865452, "grad_norm": 0.7746033072471619, "learning_rate": 4.58143365340792e-06, "loss": 0.0526, "step": 34130 }, { "epoch": 0.7520864664760614, "grad_norm": 0.2613677382469177, "learning_rate": 4.580663511131422e-06, "loss": 0.0404, "step": 34131 }, { "epoch": 0.7521085017655775, "grad_norm": 0.8505741357803345, "learning_rate": 4.579893421926189e-06, "loss": 0.0471, "step": 34132 }, { "epoch": 0.7521305370550937, "grad_norm": 0.5438916087150574, "learning_rate": 4.579123385796147e-06, "loss": 0.0732, "step": 34133 }, { "epoch": 0.7521525723446099, "grad_norm": 0.37924960255622864, "learning_rate": 4.578353402745223e-06, "loss": 0.0704, "step": 34134 }, { "epoch": 0.752174607634126, "grad_norm": 0.736665666103363, "learning_rate": 4.577583472777331e-06, "loss": 0.0663, "step": 34135 }, { "epoch": 0.7521966429236422, "grad_norm": 0.7320238947868347, "learning_rate": 4.576813595896393e-06, "loss": 0.0865, "step": 34136 }, { "epoch": 0.7522186782131584, "grad_norm": 0.6940996050834656, "learning_rate": 4.576043772106337e-06, "loss": 0.0573, "step": 34137 }, { "epoch": 0.7522407135026745, "grad_norm": 0.5342769026756287, "learning_rate": 4.575274001411076e-06, "loss": 0.0694, "step": 34138 }, { "epoch": 0.7522627487921907, "grad_norm": 0.7266249060630798, "learning_rate": 4.574504283814536e-06, "loss": 0.0765, "step": 34139 }, { "epoch": 0.7522847840817068, "grad_norm": 0.6234167814254761, "learning_rate": 4.573734619320634e-06, "loss": 0.0779, "step": 34140 }, { "epoch": 0.752306819371223, "grad_norm": 0.4396570324897766, "learning_rate": 4.572965007933299e-06, "loss": 0.0468, "step": 34141 }, { "epoch": 0.7523288546607392, "grad_norm": 0.5844394564628601, "learning_rate": 4.572195449656439e-06, "loss": 0.0742, "step": 34142 }, { "epoch": 0.7523508899502553, "grad_norm": 0.5307756662368774, "learning_rate": 4.571425944493979e-06, "loss": 0.0603, "step": 34143 }, { "epoch": 0.7523729252397715, "grad_norm": 0.7247220873832703, "learning_rate": 4.570656492449843e-06, "loss": 0.0563, "step": 34144 }, { "epoch": 0.7523949605292877, "grad_norm": 0.5507100224494934, "learning_rate": 4.569887093527943e-06, "loss": 0.0622, "step": 34145 }, { "epoch": 0.7524169958188038, "grad_norm": 0.49151158332824707, "learning_rate": 4.569117747732206e-06, "loss": 0.0544, "step": 34146 }, { "epoch": 0.75243903110832, "grad_norm": 0.5420087575912476, "learning_rate": 4.568348455066536e-06, "loss": 0.0481, "step": 34147 }, { "epoch": 0.7524610663978362, "grad_norm": 0.48764947056770325, "learning_rate": 4.56757921553487e-06, "loss": 0.0598, "step": 34148 }, { "epoch": 0.7524831016873523, "grad_norm": 0.47829562425613403, "learning_rate": 4.566810029141113e-06, "loss": 0.0547, "step": 34149 }, { "epoch": 0.7525051369768685, "grad_norm": 0.36383509635925293, "learning_rate": 4.566040895889192e-06, "loss": 0.0506, "step": 34150 }, { "epoch": 0.7525271722663847, "grad_norm": 0.9062052369117737, "learning_rate": 4.565271815783015e-06, "loss": 0.0815, "step": 34151 }, { "epoch": 0.7525492075559008, "grad_norm": 0.49821412563323975, "learning_rate": 4.564502788826505e-06, "loss": 0.0563, "step": 34152 }, { "epoch": 0.752571242845417, "grad_norm": 0.5882003903388977, "learning_rate": 4.563733815023583e-06, "loss": 0.0865, "step": 34153 }, { "epoch": 0.752593278134933, "grad_norm": 0.45929786562919617, "learning_rate": 4.562964894378155e-06, "loss": 0.0657, "step": 34154 }, { "epoch": 0.7526153134244492, "grad_norm": 0.7575928568840027, "learning_rate": 4.562196026894144e-06, "loss": 0.0721, "step": 34155 }, { "epoch": 0.7526373487139654, "grad_norm": 0.8716760873794556, "learning_rate": 4.561427212575467e-06, "loss": 0.0939, "step": 34156 }, { "epoch": 0.7526593840034815, "grad_norm": 0.5239524841308594, "learning_rate": 4.560658451426043e-06, "loss": 0.0709, "step": 34157 }, { "epoch": 0.7526814192929977, "grad_norm": 0.22191520035266876, "learning_rate": 4.559889743449778e-06, "loss": 0.0674, "step": 34158 }, { "epoch": 0.7527034545825139, "grad_norm": 0.9419711828231812, "learning_rate": 4.559121088650593e-06, "loss": 0.0803, "step": 34159 }, { "epoch": 0.75272548987203, "grad_norm": 0.5547184944152832, "learning_rate": 4.558352487032404e-06, "loss": 0.0771, "step": 34160 }, { "epoch": 0.7527475251615462, "grad_norm": 0.6902160048484802, "learning_rate": 4.557583938599128e-06, "loss": 0.0866, "step": 34161 }, { "epoch": 0.7527695604510624, "grad_norm": 0.6319743990898132, "learning_rate": 4.556815443354673e-06, "loss": 0.0375, "step": 34162 }, { "epoch": 0.7527915957405785, "grad_norm": 0.5309950113296509, "learning_rate": 4.556047001302957e-06, "loss": 0.0368, "step": 34163 }, { "epoch": 0.7528136310300947, "grad_norm": 0.5499676465988159, "learning_rate": 4.555278612447899e-06, "loss": 0.0754, "step": 34164 }, { "epoch": 0.7528356663196109, "grad_norm": 0.45470476150512695, "learning_rate": 4.5545102767934045e-06, "loss": 0.0397, "step": 34165 }, { "epoch": 0.752857701609127, "grad_norm": 0.5711163878440857, "learning_rate": 4.553741994343389e-06, "loss": 0.046, "step": 34166 }, { "epoch": 0.7528797368986432, "grad_norm": 0.7926573753356934, "learning_rate": 4.5529737651017675e-06, "loss": 0.071, "step": 34167 }, { "epoch": 0.7529017721881593, "grad_norm": 0.9364494681358337, "learning_rate": 4.552205589072458e-06, "loss": 0.0984, "step": 34168 }, { "epoch": 0.7529238074776755, "grad_norm": 0.7262024879455566, "learning_rate": 4.551437466259363e-06, "loss": 0.0546, "step": 34169 }, { "epoch": 0.7529458427671917, "grad_norm": 0.3431278169155121, "learning_rate": 4.5506693966664e-06, "loss": 0.0629, "step": 34170 }, { "epoch": 0.7529678780567078, "grad_norm": 0.9560613632202148, "learning_rate": 4.5499013802974845e-06, "loss": 0.1008, "step": 34171 }, { "epoch": 0.752989913346224, "grad_norm": 0.7552398443222046, "learning_rate": 4.549133417156521e-06, "loss": 0.0898, "step": 34172 }, { "epoch": 0.7530119486357402, "grad_norm": 0.3912501335144043, "learning_rate": 4.548365507247431e-06, "loss": 0.0605, "step": 34173 }, { "epoch": 0.7530339839252563, "grad_norm": 0.5014187097549438, "learning_rate": 4.547597650574109e-06, "loss": 0.0681, "step": 34174 }, { "epoch": 0.7530560192147725, "grad_norm": 0.8141343593597412, "learning_rate": 4.546829847140489e-06, "loss": 0.0479, "step": 34175 }, { "epoch": 0.7530780545042887, "grad_norm": 0.6897950172424316, "learning_rate": 4.5460620969504626e-06, "loss": 0.0683, "step": 34176 }, { "epoch": 0.7531000897938048, "grad_norm": 0.6048806309700012, "learning_rate": 4.545294400007954e-06, "loss": 0.0769, "step": 34177 }, { "epoch": 0.753122125083321, "grad_norm": 0.31342899799346924, "learning_rate": 4.544526756316863e-06, "loss": 0.0631, "step": 34178 }, { "epoch": 0.753144160372837, "grad_norm": 0.7417230010032654, "learning_rate": 4.543759165881104e-06, "loss": 0.0557, "step": 34179 }, { "epoch": 0.7531661956623532, "grad_norm": 0.7427940964698792, "learning_rate": 4.542991628704589e-06, "loss": 0.0589, "step": 34180 }, { "epoch": 0.7531882309518694, "grad_norm": 0.5702651739120483, "learning_rate": 4.542224144791222e-06, "loss": 0.0562, "step": 34181 }, { "epoch": 0.7532102662413855, "grad_norm": 0.44251298904418945, "learning_rate": 4.541456714144916e-06, "loss": 0.0489, "step": 34182 }, { "epoch": 0.7532323015309017, "grad_norm": 0.5592424273490906, "learning_rate": 4.540689336769579e-06, "loss": 0.0585, "step": 34183 }, { "epoch": 0.7532543368204179, "grad_norm": 0.5183098316192627, "learning_rate": 4.539922012669123e-06, "loss": 0.0423, "step": 34184 }, { "epoch": 0.753276372109934, "grad_norm": 0.9710028767585754, "learning_rate": 4.53915474184745e-06, "loss": 0.0585, "step": 34185 }, { "epoch": 0.7532984073994502, "grad_norm": 0.5675062537193298, "learning_rate": 4.538387524308473e-06, "loss": 0.066, "step": 34186 }, { "epoch": 0.7533204426889664, "grad_norm": 0.5690184831619263, "learning_rate": 4.5376203600561e-06, "loss": 0.0858, "step": 34187 }, { "epoch": 0.7533424779784825, "grad_norm": 0.606326699256897, "learning_rate": 4.536853249094235e-06, "loss": 0.0526, "step": 34188 }, { "epoch": 0.7533645132679987, "grad_norm": 0.359940767288208, "learning_rate": 4.536086191426784e-06, "loss": 0.0566, "step": 34189 }, { "epoch": 0.7533865485575149, "grad_norm": 0.5497743487358093, "learning_rate": 4.53531918705766e-06, "loss": 0.0517, "step": 34190 }, { "epoch": 0.753408583847031, "grad_norm": 0.6872967481613159, "learning_rate": 4.53455223599077e-06, "loss": 0.0591, "step": 34191 }, { "epoch": 0.7534306191365472, "grad_norm": 0.7919753193855286, "learning_rate": 4.533785338230014e-06, "loss": 0.064, "step": 34192 }, { "epoch": 0.7534526544260634, "grad_norm": 0.6447528004646301, "learning_rate": 4.533018493779302e-06, "loss": 0.0615, "step": 34193 }, { "epoch": 0.7534746897155795, "grad_norm": 0.5512693524360657, "learning_rate": 4.532251702642542e-06, "loss": 0.0594, "step": 34194 }, { "epoch": 0.7534967250050957, "grad_norm": 0.6809200644493103, "learning_rate": 4.531484964823635e-06, "loss": 0.0694, "step": 34195 }, { "epoch": 0.7535187602946118, "grad_norm": 0.2296963632106781, "learning_rate": 4.530718280326493e-06, "loss": 0.061, "step": 34196 }, { "epoch": 0.753540795584128, "grad_norm": 0.402404248714447, "learning_rate": 4.529951649155006e-06, "loss": 0.0767, "step": 34197 }, { "epoch": 0.7535628308736442, "grad_norm": 0.45508837699890137, "learning_rate": 4.5291850713131e-06, "loss": 0.0668, "step": 34198 }, { "epoch": 0.7535848661631603, "grad_norm": 0.3023299276828766, "learning_rate": 4.528418546804666e-06, "loss": 0.0623, "step": 34199 }, { "epoch": 0.7536069014526765, "grad_norm": 0.45671412348747253, "learning_rate": 4.5276520756336134e-06, "loss": 0.0413, "step": 34200 }, { "epoch": 0.7536289367421927, "grad_norm": 0.27184295654296875, "learning_rate": 4.526885657803843e-06, "loss": 0.0518, "step": 34201 }, { "epoch": 0.7536509720317088, "grad_norm": 0.6159172058105469, "learning_rate": 4.526119293319257e-06, "loss": 0.0695, "step": 34202 }, { "epoch": 0.753673007321225, "grad_norm": 0.8190770745277405, "learning_rate": 4.525352982183769e-06, "loss": 0.0848, "step": 34203 }, { "epoch": 0.753695042610741, "grad_norm": 0.5768319964408875, "learning_rate": 4.524586724401265e-06, "loss": 0.0551, "step": 34204 }, { "epoch": 0.7537170779002572, "grad_norm": 0.4646309018135071, "learning_rate": 4.523820519975667e-06, "loss": 0.0457, "step": 34205 }, { "epoch": 0.7537391131897734, "grad_norm": 0.6302626132965088, "learning_rate": 4.523054368910865e-06, "loss": 0.0835, "step": 34206 }, { "epoch": 0.7537611484792895, "grad_norm": 0.7635809779167175, "learning_rate": 4.522288271210771e-06, "loss": 0.0923, "step": 34207 }, { "epoch": 0.7537831837688057, "grad_norm": 0.8640017509460449, "learning_rate": 4.521522226879276e-06, "loss": 0.0592, "step": 34208 }, { "epoch": 0.7538052190583219, "grad_norm": 0.620184600353241, "learning_rate": 4.520756235920285e-06, "loss": 0.0642, "step": 34209 }, { "epoch": 0.753827254347838, "grad_norm": 0.9792392253875732, "learning_rate": 4.5199902983377104e-06, "loss": 0.0896, "step": 34210 }, { "epoch": 0.7538492896373542, "grad_norm": 0.5813378691673279, "learning_rate": 4.519224414135437e-06, "loss": 0.0718, "step": 34211 }, { "epoch": 0.7538713249268704, "grad_norm": 0.5704658627510071, "learning_rate": 4.518458583317375e-06, "loss": 0.0699, "step": 34212 }, { "epoch": 0.7538933602163865, "grad_norm": 0.7345489859580994, "learning_rate": 4.5176928058874246e-06, "loss": 0.0674, "step": 34213 }, { "epoch": 0.7539153955059027, "grad_norm": 0.3521798849105835, "learning_rate": 4.516927081849491e-06, "loss": 0.0656, "step": 34214 }, { "epoch": 0.7539374307954189, "grad_norm": 0.4678862392902374, "learning_rate": 4.516161411207464e-06, "loss": 0.0709, "step": 34215 }, { "epoch": 0.753959466084935, "grad_norm": 0.4679665267467499, "learning_rate": 4.515395793965248e-06, "loss": 0.035, "step": 34216 }, { "epoch": 0.7539815013744512, "grad_norm": 0.5360772013664246, "learning_rate": 4.514630230126748e-06, "loss": 0.0353, "step": 34217 }, { "epoch": 0.7540035366639674, "grad_norm": 0.6021727919578552, "learning_rate": 4.513864719695855e-06, "loss": 0.0555, "step": 34218 }, { "epoch": 0.7540255719534835, "grad_norm": 0.5239103436470032, "learning_rate": 4.513099262676478e-06, "loss": 0.0682, "step": 34219 }, { "epoch": 0.7540476072429997, "grad_norm": 0.9001886248588562, "learning_rate": 4.5123338590724985e-06, "loss": 0.0766, "step": 34220 }, { "epoch": 0.7540696425325158, "grad_norm": 0.30709436535835266, "learning_rate": 4.511568508887837e-06, "loss": 0.0517, "step": 34221 }, { "epoch": 0.754091677822032, "grad_norm": 0.8270117044448853, "learning_rate": 4.5108032121263785e-06, "loss": 0.0507, "step": 34222 }, { "epoch": 0.7541137131115482, "grad_norm": 0.7169835567474365, "learning_rate": 4.5100379687920255e-06, "loss": 0.0755, "step": 34223 }, { "epoch": 0.7541357484010643, "grad_norm": 0.5360126495361328, "learning_rate": 4.509272778888672e-06, "loss": 0.0443, "step": 34224 }, { "epoch": 0.7541577836905805, "grad_norm": 0.7590744495391846, "learning_rate": 4.508507642420216e-06, "loss": 0.0639, "step": 34225 }, { "epoch": 0.7541798189800967, "grad_norm": 0.6463221311569214, "learning_rate": 4.507742559390561e-06, "loss": 0.0671, "step": 34226 }, { "epoch": 0.7542018542696128, "grad_norm": 0.6969289779663086, "learning_rate": 4.5069775298035915e-06, "loss": 0.0555, "step": 34227 }, { "epoch": 0.7542238895591289, "grad_norm": 0.5591971278190613, "learning_rate": 4.506212553663222e-06, "loss": 0.0632, "step": 34228 }, { "epoch": 0.754245924848645, "grad_norm": 0.5991675853729248, "learning_rate": 4.5054476309733335e-06, "loss": 0.072, "step": 34229 }, { "epoch": 0.7542679601381612, "grad_norm": 0.5765640735626221, "learning_rate": 4.504682761737834e-06, "loss": 0.0555, "step": 34230 }, { "epoch": 0.7542899954276774, "grad_norm": 0.6588971018791199, "learning_rate": 4.503917945960607e-06, "loss": 0.0802, "step": 34231 }, { "epoch": 0.7543120307171935, "grad_norm": 0.687817394733429, "learning_rate": 4.503153183645554e-06, "loss": 0.0895, "step": 34232 }, { "epoch": 0.7543340660067097, "grad_norm": 0.6534672975540161, "learning_rate": 4.502388474796577e-06, "loss": 0.0566, "step": 34233 }, { "epoch": 0.7543561012962259, "grad_norm": 1.0275453329086304, "learning_rate": 4.501623819417558e-06, "loss": 0.0542, "step": 34234 }, { "epoch": 0.754378136585742, "grad_norm": 0.6321313977241516, "learning_rate": 4.5008592175124e-06, "loss": 0.0609, "step": 34235 }, { "epoch": 0.7544001718752582, "grad_norm": 0.6542023420333862, "learning_rate": 4.500094669084995e-06, "loss": 0.0616, "step": 34236 }, { "epoch": 0.7544222071647744, "grad_norm": 0.7250924110412598, "learning_rate": 4.499330174139245e-06, "loss": 0.053, "step": 34237 }, { "epoch": 0.7544442424542905, "grad_norm": 0.5547693967819214, "learning_rate": 4.498565732679031e-06, "loss": 0.0407, "step": 34238 }, { "epoch": 0.7544662777438067, "grad_norm": 0.644526481628418, "learning_rate": 4.497801344708254e-06, "loss": 0.0713, "step": 34239 }, { "epoch": 0.7544883130333229, "grad_norm": 0.7019640207290649, "learning_rate": 4.497037010230811e-06, "loss": 0.0563, "step": 34240 }, { "epoch": 0.754510348322839, "grad_norm": 0.6357091665267944, "learning_rate": 4.496272729250585e-06, "loss": 0.0721, "step": 34241 }, { "epoch": 0.7545323836123552, "grad_norm": 0.6053047776222229, "learning_rate": 4.49550850177148e-06, "loss": 0.0694, "step": 34242 }, { "epoch": 0.7545544189018714, "grad_norm": 0.5386391282081604, "learning_rate": 4.494744327797374e-06, "loss": 0.084, "step": 34243 }, { "epoch": 0.7545764541913875, "grad_norm": 0.6090055704116821, "learning_rate": 4.493980207332177e-06, "loss": 0.0415, "step": 34244 }, { "epoch": 0.7545984894809037, "grad_norm": 1.175471305847168, "learning_rate": 4.493216140379769e-06, "loss": 0.0936, "step": 34245 }, { "epoch": 0.7546205247704199, "grad_norm": 0.6834486126899719, "learning_rate": 4.492452126944049e-06, "loss": 0.0546, "step": 34246 }, { "epoch": 0.754642560059936, "grad_norm": 0.6014745235443115, "learning_rate": 4.4916881670289e-06, "loss": 0.0669, "step": 34247 }, { "epoch": 0.7546645953494522, "grad_norm": 0.7337460517883301, "learning_rate": 4.49092426063822e-06, "loss": 0.0655, "step": 34248 }, { "epoch": 0.7546866306389683, "grad_norm": 0.3883218467235565, "learning_rate": 4.490160407775902e-06, "loss": 0.0529, "step": 34249 }, { "epoch": 0.7547086659284845, "grad_norm": 0.4837409257888794, "learning_rate": 4.489396608445822e-06, "loss": 0.0423, "step": 34250 }, { "epoch": 0.7547307012180007, "grad_norm": 0.6139019131660461, "learning_rate": 4.488632862651892e-06, "loss": 0.0695, "step": 34251 }, { "epoch": 0.7547527365075168, "grad_norm": 0.9799789190292358, "learning_rate": 4.4878691703979865e-06, "loss": 0.0544, "step": 34252 }, { "epoch": 0.7547747717970329, "grad_norm": 0.8392353653907776, "learning_rate": 4.4871055316880055e-06, "loss": 0.0671, "step": 34253 }, { "epoch": 0.7547968070865491, "grad_norm": 0.5721737146377563, "learning_rate": 4.486341946525829e-06, "loss": 0.054, "step": 34254 }, { "epoch": 0.7548188423760652, "grad_norm": 0.7594188451766968, "learning_rate": 4.48557841491535e-06, "loss": 0.0844, "step": 34255 }, { "epoch": 0.7548408776655814, "grad_norm": 0.5256098508834839, "learning_rate": 4.484814936860465e-06, "loss": 0.048, "step": 34256 }, { "epoch": 0.7548629129550976, "grad_norm": 0.5577238202095032, "learning_rate": 4.4840515123650494e-06, "loss": 0.0513, "step": 34257 }, { "epoch": 0.7548849482446137, "grad_norm": 0.47391605377197266, "learning_rate": 4.483288141432998e-06, "loss": 0.0488, "step": 34258 }, { "epoch": 0.7549069835341299, "grad_norm": 0.6658950448036194, "learning_rate": 4.482524824068202e-06, "loss": 0.0541, "step": 34259 }, { "epoch": 0.754929018823646, "grad_norm": 0.41750746965408325, "learning_rate": 4.481761560274551e-06, "loss": 0.0573, "step": 34260 }, { "epoch": 0.7549510541131622, "grad_norm": 0.4264678955078125, "learning_rate": 4.480998350055921e-06, "loss": 0.0443, "step": 34261 }, { "epoch": 0.7549730894026784, "grad_norm": 0.5458483695983887, "learning_rate": 4.480235193416211e-06, "loss": 0.0473, "step": 34262 }, { "epoch": 0.7549951246921945, "grad_norm": 0.5869609713554382, "learning_rate": 4.479472090359308e-06, "loss": 0.06, "step": 34263 }, { "epoch": 0.7550171599817107, "grad_norm": 0.8523595333099365, "learning_rate": 4.4787090408890904e-06, "loss": 0.0515, "step": 34264 }, { "epoch": 0.7550391952712269, "grad_norm": 0.5355958342552185, "learning_rate": 4.477946045009453e-06, "loss": 0.0515, "step": 34265 }, { "epoch": 0.755061230560743, "grad_norm": 0.5369623899459839, "learning_rate": 4.477183102724271e-06, "loss": 0.0484, "step": 34266 }, { "epoch": 0.7550832658502592, "grad_norm": 0.29115593433380127, "learning_rate": 4.476420214037446e-06, "loss": 0.0405, "step": 34267 }, { "epoch": 0.7551053011397754, "grad_norm": 0.2762019634246826, "learning_rate": 4.475657378952853e-06, "loss": 0.0645, "step": 34268 }, { "epoch": 0.7551273364292915, "grad_norm": 0.5373167395591736, "learning_rate": 4.474894597474384e-06, "loss": 0.0651, "step": 34269 }, { "epoch": 0.7551493717188077, "grad_norm": 0.3323734700679779, "learning_rate": 4.474131869605912e-06, "loss": 0.0735, "step": 34270 }, { "epoch": 0.7551714070083239, "grad_norm": 0.8802348375320435, "learning_rate": 4.473369195351341e-06, "loss": 0.0773, "step": 34271 }, { "epoch": 0.75519344229784, "grad_norm": 0.5848481059074402, "learning_rate": 4.472606574714539e-06, "loss": 0.0627, "step": 34272 }, { "epoch": 0.7552154775873562, "grad_norm": 0.6120537519454956, "learning_rate": 4.471844007699399e-06, "loss": 0.0605, "step": 34273 }, { "epoch": 0.7552375128768724, "grad_norm": 0.7184247970581055, "learning_rate": 4.471081494309806e-06, "loss": 0.0539, "step": 34274 }, { "epoch": 0.7552595481663885, "grad_norm": 0.7107143402099609, "learning_rate": 4.4703190345496376e-06, "loss": 0.0578, "step": 34275 }, { "epoch": 0.7552815834559047, "grad_norm": 0.32671424746513367, "learning_rate": 4.469556628422785e-06, "loss": 0.0819, "step": 34276 }, { "epoch": 0.7553036187454208, "grad_norm": 0.6279906630516052, "learning_rate": 4.468794275933118e-06, "loss": 0.0651, "step": 34277 }, { "epoch": 0.7553256540349369, "grad_norm": 0.43839913606643677, "learning_rate": 4.468031977084538e-06, "loss": 0.0375, "step": 34278 }, { "epoch": 0.7553476893244531, "grad_norm": 0.4097228944301605, "learning_rate": 4.467269731880915e-06, "loss": 0.0375, "step": 34279 }, { "epoch": 0.7553697246139692, "grad_norm": 0.6541405320167542, "learning_rate": 4.466507540326139e-06, "loss": 0.0572, "step": 34280 }, { "epoch": 0.7553917599034854, "grad_norm": 0.4660004675388336, "learning_rate": 4.4657454024240844e-06, "loss": 0.0716, "step": 34281 }, { "epoch": 0.7554137951930016, "grad_norm": 0.41584762930870056, "learning_rate": 4.464983318178639e-06, "loss": 0.0443, "step": 34282 }, { "epoch": 0.7554358304825177, "grad_norm": 0.3974701464176178, "learning_rate": 4.464221287593686e-06, "loss": 0.0572, "step": 34283 }, { "epoch": 0.7554578657720339, "grad_norm": 0.5606102347373962, "learning_rate": 4.463459310673099e-06, "loss": 0.0738, "step": 34284 }, { "epoch": 0.75547990106155, "grad_norm": 0.8388063907623291, "learning_rate": 4.462697387420763e-06, "loss": 0.0698, "step": 34285 }, { "epoch": 0.7555019363510662, "grad_norm": 0.6763433814048767, "learning_rate": 4.461935517840561e-06, "loss": 0.0925, "step": 34286 }, { "epoch": 0.7555239716405824, "grad_norm": 1.1612730026245117, "learning_rate": 4.461173701936378e-06, "loss": 0.0849, "step": 34287 }, { "epoch": 0.7555460069300985, "grad_norm": 1.0423178672790527, "learning_rate": 4.460411939712082e-06, "loss": 0.077, "step": 34288 }, { "epoch": 0.7555680422196147, "grad_norm": 0.6906265616416931, "learning_rate": 4.45965023117156e-06, "loss": 0.0488, "step": 34289 }, { "epoch": 0.7555900775091309, "grad_norm": 0.7793113589286804, "learning_rate": 4.458888576318697e-06, "loss": 0.0587, "step": 34290 }, { "epoch": 0.755612112798647, "grad_norm": 0.5698248744010925, "learning_rate": 4.4581269751573625e-06, "loss": 0.059, "step": 34291 }, { "epoch": 0.7556341480881632, "grad_norm": 0.6198626160621643, "learning_rate": 4.457365427691446e-06, "loss": 0.0822, "step": 34292 }, { "epoch": 0.7556561833776794, "grad_norm": 0.9807233214378357, "learning_rate": 4.456603933924809e-06, "loss": 0.0559, "step": 34293 }, { "epoch": 0.7556782186671955, "grad_norm": 0.5501568913459778, "learning_rate": 4.4558424938613525e-06, "loss": 0.0475, "step": 34294 }, { "epoch": 0.7557002539567117, "grad_norm": 0.32223010063171387, "learning_rate": 4.455081107504939e-06, "loss": 0.0706, "step": 34295 }, { "epoch": 0.7557222892462279, "grad_norm": 0.6285380125045776, "learning_rate": 4.454319774859453e-06, "loss": 0.0954, "step": 34296 }, { "epoch": 0.755744324535744, "grad_norm": 0.6165630221366882, "learning_rate": 4.453558495928777e-06, "loss": 0.0849, "step": 34297 }, { "epoch": 0.7557663598252602, "grad_norm": 0.884212076663971, "learning_rate": 4.452797270716777e-06, "loss": 0.083, "step": 34298 }, { "epoch": 0.7557883951147764, "grad_norm": 0.5324769020080566, "learning_rate": 4.45203609922734e-06, "loss": 0.0738, "step": 34299 }, { "epoch": 0.7558104304042925, "grad_norm": 0.5896562933921814, "learning_rate": 4.4512749814643314e-06, "loss": 0.0521, "step": 34300 }, { "epoch": 0.7558324656938087, "grad_norm": 0.5333959460258484, "learning_rate": 4.450513917431645e-06, "loss": 0.0714, "step": 34301 }, { "epoch": 0.7558545009833247, "grad_norm": 0.6600450277328491, "learning_rate": 4.449752907133144e-06, "loss": 0.0557, "step": 34302 }, { "epoch": 0.7558765362728409, "grad_norm": 0.5083625316619873, "learning_rate": 4.448991950572714e-06, "loss": 0.0479, "step": 34303 }, { "epoch": 0.7558985715623571, "grad_norm": 0.5090032815933228, "learning_rate": 4.448231047754221e-06, "loss": 0.0536, "step": 34304 }, { "epoch": 0.7559206068518732, "grad_norm": 0.5932968258857727, "learning_rate": 4.4474701986815455e-06, "loss": 0.0632, "step": 34305 }, { "epoch": 0.7559426421413894, "grad_norm": 0.48086369037628174, "learning_rate": 4.4467094033585665e-06, "loss": 0.0707, "step": 34306 }, { "epoch": 0.7559646774309056, "grad_norm": 0.5577461123466492, "learning_rate": 4.445948661789153e-06, "loss": 0.0692, "step": 34307 }, { "epoch": 0.7559867127204217, "grad_norm": 0.29946473240852356, "learning_rate": 4.445187973977181e-06, "loss": 0.0676, "step": 34308 }, { "epoch": 0.7560087480099379, "grad_norm": 0.5180684328079224, "learning_rate": 4.444427339926527e-06, "loss": 0.0453, "step": 34309 }, { "epoch": 0.7560307832994541, "grad_norm": 0.8056768774986267, "learning_rate": 4.44366675964107e-06, "loss": 0.0974, "step": 34310 }, { "epoch": 0.7560528185889702, "grad_norm": 0.4408164918422699, "learning_rate": 4.4429062331246726e-06, "loss": 0.0472, "step": 34311 }, { "epoch": 0.7560748538784864, "grad_norm": 0.5559694170951843, "learning_rate": 4.4421457603812175e-06, "loss": 0.0604, "step": 34312 }, { "epoch": 0.7560968891680026, "grad_norm": 0.8563210964202881, "learning_rate": 4.441385341414577e-06, "loss": 0.0823, "step": 34313 }, { "epoch": 0.7561189244575187, "grad_norm": 0.46875736117362976, "learning_rate": 4.440624976228621e-06, "loss": 0.0462, "step": 34314 }, { "epoch": 0.7561409597470349, "grad_norm": 0.5109974145889282, "learning_rate": 4.439864664827224e-06, "loss": 0.0535, "step": 34315 }, { "epoch": 0.756162995036551, "grad_norm": 0.510762631893158, "learning_rate": 4.439104407214259e-06, "loss": 0.0562, "step": 34316 }, { "epoch": 0.7561850303260672, "grad_norm": 0.6144523620605469, "learning_rate": 4.438344203393603e-06, "loss": 0.0467, "step": 34317 }, { "epoch": 0.7562070656155834, "grad_norm": 0.5145596861839294, "learning_rate": 4.437584053369118e-06, "loss": 0.0398, "step": 34318 }, { "epoch": 0.7562291009050995, "grad_norm": 0.6135131120681763, "learning_rate": 4.4368239571446825e-06, "loss": 0.0613, "step": 34319 }, { "epoch": 0.7562511361946157, "grad_norm": 0.6461144685745239, "learning_rate": 4.436063914724171e-06, "loss": 0.066, "step": 34320 }, { "epoch": 0.7562731714841319, "grad_norm": 0.7145212292671204, "learning_rate": 4.435303926111447e-06, "loss": 0.0546, "step": 34321 }, { "epoch": 0.756295206773648, "grad_norm": 0.8098433613777161, "learning_rate": 4.434543991310389e-06, "loss": 0.0556, "step": 34322 }, { "epoch": 0.7563172420631642, "grad_norm": 0.571766197681427, "learning_rate": 4.433784110324856e-06, "loss": 0.0328, "step": 34323 }, { "epoch": 0.7563392773526804, "grad_norm": 0.6318618655204773, "learning_rate": 4.4330242831587355e-06, "loss": 0.0682, "step": 34324 }, { "epoch": 0.7563613126421965, "grad_norm": 0.501361072063446, "learning_rate": 4.432264509815882e-06, "loss": 0.0588, "step": 34325 }, { "epoch": 0.7563833479317127, "grad_norm": 0.6476224660873413, "learning_rate": 4.43150479030018e-06, "loss": 0.0682, "step": 34326 }, { "epoch": 0.7564053832212287, "grad_norm": 0.24429598450660706, "learning_rate": 4.430745124615486e-06, "loss": 0.0445, "step": 34327 }, { "epoch": 0.7564274185107449, "grad_norm": 0.3774668574333191, "learning_rate": 4.429985512765673e-06, "loss": 0.0601, "step": 34328 }, { "epoch": 0.7564494538002611, "grad_norm": 0.5953855514526367, "learning_rate": 4.429225954754617e-06, "loss": 0.06, "step": 34329 }, { "epoch": 0.7564714890897772, "grad_norm": 0.5041013956069946, "learning_rate": 4.428466450586178e-06, "loss": 0.0563, "step": 34330 }, { "epoch": 0.7564935243792934, "grad_norm": 0.5724736452102661, "learning_rate": 4.4277070002642285e-06, "loss": 0.0832, "step": 34331 }, { "epoch": 0.7565155596688096, "grad_norm": 0.5978306531906128, "learning_rate": 4.4269476037926356e-06, "loss": 0.0965, "step": 34332 }, { "epoch": 0.7565375949583257, "grad_norm": 0.9776435494422913, "learning_rate": 4.426188261175273e-06, "loss": 0.0631, "step": 34333 }, { "epoch": 0.7565596302478419, "grad_norm": 0.6551445722579956, "learning_rate": 4.4254289724159996e-06, "loss": 0.0663, "step": 34334 }, { "epoch": 0.7565816655373581, "grad_norm": 0.8791419863700867, "learning_rate": 4.424669737518688e-06, "loss": 0.0843, "step": 34335 }, { "epoch": 0.7566037008268742, "grad_norm": 0.3858891725540161, "learning_rate": 4.423910556487208e-06, "loss": 0.0552, "step": 34336 }, { "epoch": 0.7566257361163904, "grad_norm": 1.1773366928100586, "learning_rate": 4.423151429325417e-06, "loss": 0.0891, "step": 34337 }, { "epoch": 0.7566477714059066, "grad_norm": 0.702325701713562, "learning_rate": 4.422392356037189e-06, "loss": 0.0788, "step": 34338 }, { "epoch": 0.7566698066954227, "grad_norm": 0.6623531579971313, "learning_rate": 4.421633336626388e-06, "loss": 0.047, "step": 34339 }, { "epoch": 0.7566918419849389, "grad_norm": 0.622549831867218, "learning_rate": 4.420874371096886e-06, "loss": 0.0706, "step": 34340 }, { "epoch": 0.756713877274455, "grad_norm": 0.41911187767982483, "learning_rate": 4.42011545945254e-06, "loss": 0.049, "step": 34341 }, { "epoch": 0.7567359125639712, "grad_norm": 0.9085838794708252, "learning_rate": 4.419356601697217e-06, "loss": 0.0709, "step": 34342 }, { "epoch": 0.7567579478534874, "grad_norm": 0.6090498566627502, "learning_rate": 4.4185977978347915e-06, "loss": 0.0778, "step": 34343 }, { "epoch": 0.7567799831430035, "grad_norm": 0.5532763004302979, "learning_rate": 4.417839047869117e-06, "loss": 0.0665, "step": 34344 }, { "epoch": 0.7568020184325197, "grad_norm": 0.8227800130844116, "learning_rate": 4.417080351804067e-06, "loss": 0.0834, "step": 34345 }, { "epoch": 0.7568240537220359, "grad_norm": 0.575904905796051, "learning_rate": 4.416321709643491e-06, "loss": 0.0563, "step": 34346 }, { "epoch": 0.756846089011552, "grad_norm": 0.7365079522132874, "learning_rate": 4.415563121391276e-06, "loss": 0.0822, "step": 34347 }, { "epoch": 0.7568681243010682, "grad_norm": 0.6464969515800476, "learning_rate": 4.414804587051268e-06, "loss": 0.0703, "step": 34348 }, { "epoch": 0.7568901595905844, "grad_norm": 0.5659552216529846, "learning_rate": 4.414046106627341e-06, "loss": 0.073, "step": 34349 }, { "epoch": 0.7569121948801005, "grad_norm": 0.7469918131828308, "learning_rate": 4.413287680123348e-06, "loss": 0.0749, "step": 34350 }, { "epoch": 0.7569342301696167, "grad_norm": 0.5543291568756104, "learning_rate": 4.412529307543159e-06, "loss": 0.0486, "step": 34351 }, { "epoch": 0.7569562654591327, "grad_norm": 0.6187534928321838, "learning_rate": 4.411770988890641e-06, "loss": 0.0729, "step": 34352 }, { "epoch": 0.7569783007486489, "grad_norm": 0.6421672701835632, "learning_rate": 4.411012724169645e-06, "loss": 0.0586, "step": 34353 }, { "epoch": 0.7570003360381651, "grad_norm": 0.6516799926757812, "learning_rate": 4.410254513384041e-06, "loss": 0.0712, "step": 34354 }, { "epoch": 0.7570223713276812, "grad_norm": 0.4885849356651306, "learning_rate": 4.409496356537689e-06, "loss": 0.0504, "step": 34355 }, { "epoch": 0.7570444066171974, "grad_norm": 0.42654678225517273, "learning_rate": 4.408738253634455e-06, "loss": 0.0623, "step": 34356 }, { "epoch": 0.7570664419067136, "grad_norm": 0.48647865653038025, "learning_rate": 4.407980204678193e-06, "loss": 0.0469, "step": 34357 }, { "epoch": 0.7570884771962297, "grad_norm": 0.5502539277076721, "learning_rate": 4.407222209672766e-06, "loss": 0.0533, "step": 34358 }, { "epoch": 0.7571105124857459, "grad_norm": 0.30966946482658386, "learning_rate": 4.406464268622043e-06, "loss": 0.0514, "step": 34359 }, { "epoch": 0.7571325477752621, "grad_norm": 0.8856409788131714, "learning_rate": 4.4057063815298735e-06, "loss": 0.0827, "step": 34360 }, { "epoch": 0.7571545830647782, "grad_norm": 0.6487370729446411, "learning_rate": 4.4049485484001226e-06, "loss": 0.0588, "step": 34361 }, { "epoch": 0.7571766183542944, "grad_norm": 0.3570699691772461, "learning_rate": 4.404190769236649e-06, "loss": 0.0438, "step": 34362 }, { "epoch": 0.7571986536438106, "grad_norm": 0.4599376618862152, "learning_rate": 4.403433044043319e-06, "loss": 0.0627, "step": 34363 }, { "epoch": 0.7572206889333267, "grad_norm": 0.6148172616958618, "learning_rate": 4.402675372823981e-06, "loss": 0.0524, "step": 34364 }, { "epoch": 0.7572427242228429, "grad_norm": 0.4187820851802826, "learning_rate": 4.4019177555825015e-06, "loss": 0.0584, "step": 34365 }, { "epoch": 0.757264759512359, "grad_norm": 0.36001360416412354, "learning_rate": 4.401160192322743e-06, "loss": 0.0501, "step": 34366 }, { "epoch": 0.7572867948018752, "grad_norm": 0.714611828327179, "learning_rate": 4.400402683048554e-06, "loss": 0.0772, "step": 34367 }, { "epoch": 0.7573088300913914, "grad_norm": 0.5986682176589966, "learning_rate": 4.3996452277638035e-06, "loss": 0.0785, "step": 34368 }, { "epoch": 0.7573308653809075, "grad_norm": 0.6739855408668518, "learning_rate": 4.3988878264723335e-06, "loss": 0.0867, "step": 34369 }, { "epoch": 0.7573529006704237, "grad_norm": 0.48311150074005127, "learning_rate": 4.398130479178022e-06, "loss": 0.0533, "step": 34370 }, { "epoch": 0.7573749359599399, "grad_norm": 0.3847799301147461, "learning_rate": 4.397373185884713e-06, "loss": 0.0708, "step": 34371 }, { "epoch": 0.757396971249456, "grad_norm": 0.8490183353424072, "learning_rate": 4.396615946596273e-06, "loss": 0.0826, "step": 34372 }, { "epoch": 0.7574190065389722, "grad_norm": 0.8601558208465576, "learning_rate": 4.395858761316543e-06, "loss": 0.0569, "step": 34373 }, { "epoch": 0.7574410418284884, "grad_norm": 0.4988044500350952, "learning_rate": 4.395101630049402e-06, "loss": 0.0519, "step": 34374 }, { "epoch": 0.7574630771180045, "grad_norm": 0.5692751407623291, "learning_rate": 4.394344552798695e-06, "loss": 0.0493, "step": 34375 }, { "epoch": 0.7574851124075206, "grad_norm": 0.9045118093490601, "learning_rate": 4.3935875295682724e-06, "loss": 0.0834, "step": 34376 }, { "epoch": 0.7575071476970368, "grad_norm": 0.467537522315979, "learning_rate": 4.392830560361997e-06, "loss": 0.072, "step": 34377 }, { "epoch": 0.7575291829865529, "grad_norm": 0.9004090428352356, "learning_rate": 4.392073645183724e-06, "loss": 0.0655, "step": 34378 }, { "epoch": 0.7575512182760691, "grad_norm": 0.49731260538101196, "learning_rate": 4.3913167840373115e-06, "loss": 0.0576, "step": 34379 }, { "epoch": 0.7575732535655852, "grad_norm": 0.8480799794197083, "learning_rate": 4.390559976926608e-06, "loss": 0.0802, "step": 34380 }, { "epoch": 0.7575952888551014, "grad_norm": 0.6062888503074646, "learning_rate": 4.389803223855472e-06, "loss": 0.077, "step": 34381 }, { "epoch": 0.7576173241446176, "grad_norm": 0.5596257448196411, "learning_rate": 4.389046524827757e-06, "loss": 0.0698, "step": 34382 }, { "epoch": 0.7576393594341337, "grad_norm": 0.6310461759567261, "learning_rate": 4.388289879847323e-06, "loss": 0.0697, "step": 34383 }, { "epoch": 0.7576613947236499, "grad_norm": 0.40962037444114685, "learning_rate": 4.3875332889180155e-06, "loss": 0.0573, "step": 34384 }, { "epoch": 0.7576834300131661, "grad_norm": 0.7112342119216919, "learning_rate": 4.38677675204369e-06, "loss": 0.0531, "step": 34385 }, { "epoch": 0.7577054653026822, "grad_norm": 0.6861449480056763, "learning_rate": 4.386020269228207e-06, "loss": 0.0699, "step": 34386 }, { "epoch": 0.7577275005921984, "grad_norm": 0.564303457736969, "learning_rate": 4.385263840475411e-06, "loss": 0.0481, "step": 34387 }, { "epoch": 0.7577495358817146, "grad_norm": 0.3733145296573639, "learning_rate": 4.384507465789157e-06, "loss": 0.0343, "step": 34388 }, { "epoch": 0.7577715711712307, "grad_norm": 0.5579960942268372, "learning_rate": 4.3837511451733e-06, "loss": 0.0757, "step": 34389 }, { "epoch": 0.7577936064607469, "grad_norm": 0.5152192115783691, "learning_rate": 4.382994878631694e-06, "loss": 0.0724, "step": 34390 }, { "epoch": 0.7578156417502631, "grad_norm": 0.6924278736114502, "learning_rate": 4.382238666168186e-06, "loss": 0.0457, "step": 34391 }, { "epoch": 0.7578376770397792, "grad_norm": 0.5646365284919739, "learning_rate": 4.381482507786629e-06, "loss": 0.0697, "step": 34392 }, { "epoch": 0.7578597123292954, "grad_norm": 0.7084037661552429, "learning_rate": 4.380726403490881e-06, "loss": 0.091, "step": 34393 }, { "epoch": 0.7578817476188116, "grad_norm": 0.38224679231643677, "learning_rate": 4.379970353284781e-06, "loss": 0.0413, "step": 34394 }, { "epoch": 0.7579037829083277, "grad_norm": 0.3678559362888336, "learning_rate": 4.379214357172195e-06, "loss": 0.0668, "step": 34395 }, { "epoch": 0.7579258181978439, "grad_norm": 0.6226396560668945, "learning_rate": 4.378458415156954e-06, "loss": 0.0615, "step": 34396 }, { "epoch": 0.75794785348736, "grad_norm": 0.4259420931339264, "learning_rate": 4.377702527242931e-06, "loss": 0.06, "step": 34397 }, { "epoch": 0.7579698887768762, "grad_norm": 0.6905208826065063, "learning_rate": 4.376946693433958e-06, "loss": 0.0608, "step": 34398 }, { "epoch": 0.7579919240663924, "grad_norm": 0.5924425721168518, "learning_rate": 4.376190913733898e-06, "loss": 0.0549, "step": 34399 }, { "epoch": 0.7580139593559085, "grad_norm": 0.24420896172523499, "learning_rate": 4.375435188146591e-06, "loss": 0.0806, "step": 34400 }, { "epoch": 0.7580359946454246, "grad_norm": 0.5619858503341675, "learning_rate": 4.374679516675889e-06, "loss": 0.0575, "step": 34401 }, { "epoch": 0.7580580299349408, "grad_norm": 0.2885383367538452, "learning_rate": 4.373923899325646e-06, "loss": 0.0348, "step": 34402 }, { "epoch": 0.7580800652244569, "grad_norm": 0.22759050130844116, "learning_rate": 4.373168336099703e-06, "loss": 0.0395, "step": 34403 }, { "epoch": 0.7581021005139731, "grad_norm": 0.42758530378341675, "learning_rate": 4.372412827001912e-06, "loss": 0.0661, "step": 34404 }, { "epoch": 0.7581241358034893, "grad_norm": 0.8478033542633057, "learning_rate": 4.371657372036121e-06, "loss": 0.0898, "step": 34405 }, { "epoch": 0.7581461710930054, "grad_norm": 0.7583177089691162, "learning_rate": 4.370901971206184e-06, "loss": 0.061, "step": 34406 }, { "epoch": 0.7581682063825216, "grad_norm": 0.3640385866165161, "learning_rate": 4.3701466245159375e-06, "loss": 0.0741, "step": 34407 }, { "epoch": 0.7581902416720377, "grad_norm": 0.8791671991348267, "learning_rate": 4.3693913319692344e-06, "loss": 0.0824, "step": 34408 }, { "epoch": 0.7582122769615539, "grad_norm": 0.6808306574821472, "learning_rate": 4.368636093569927e-06, "loss": 0.0641, "step": 34409 }, { "epoch": 0.7582343122510701, "grad_norm": 0.6391271948814392, "learning_rate": 4.367880909321853e-06, "loss": 0.0725, "step": 34410 }, { "epoch": 0.7582563475405862, "grad_norm": 0.7032857537269592, "learning_rate": 4.367125779228861e-06, "loss": 0.0833, "step": 34411 }, { "epoch": 0.7582783828301024, "grad_norm": 0.3981788158416748, "learning_rate": 4.366370703294802e-06, "loss": 0.0745, "step": 34412 }, { "epoch": 0.7583004181196186, "grad_norm": 0.39784926176071167, "learning_rate": 4.365615681523521e-06, "loss": 0.0507, "step": 34413 }, { "epoch": 0.7583224534091347, "grad_norm": 0.582832932472229, "learning_rate": 4.364860713918858e-06, "loss": 0.0931, "step": 34414 }, { "epoch": 0.7583444886986509, "grad_norm": 0.43668824434280396, "learning_rate": 4.364105800484662e-06, "loss": 0.07, "step": 34415 }, { "epoch": 0.7583665239881671, "grad_norm": 0.32333487272262573, "learning_rate": 4.363350941224785e-06, "loss": 0.0483, "step": 34416 }, { "epoch": 0.7583885592776832, "grad_norm": 0.622748076915741, "learning_rate": 4.36259613614306e-06, "loss": 0.0567, "step": 34417 }, { "epoch": 0.7584105945671994, "grad_norm": 0.5055369734764099, "learning_rate": 4.361841385243342e-06, "loss": 0.0602, "step": 34418 }, { "epoch": 0.7584326298567156, "grad_norm": 0.7335903644561768, "learning_rate": 4.361086688529461e-06, "loss": 0.0741, "step": 34419 }, { "epoch": 0.7584546651462317, "grad_norm": 0.4588213264942169, "learning_rate": 4.3603320460052795e-06, "loss": 0.0439, "step": 34420 }, { "epoch": 0.7584767004357479, "grad_norm": 1.0504860877990723, "learning_rate": 4.3595774576746274e-06, "loss": 0.0862, "step": 34421 }, { "epoch": 0.758498735725264, "grad_norm": 0.639555037021637, "learning_rate": 4.358822923541358e-06, "loss": 0.0465, "step": 34422 }, { "epoch": 0.7585207710147802, "grad_norm": 0.6370387673377991, "learning_rate": 4.3580684436093045e-06, "loss": 0.0429, "step": 34423 }, { "epoch": 0.7585428063042964, "grad_norm": 0.4780363440513611, "learning_rate": 4.357314017882317e-06, "loss": 0.055, "step": 34424 }, { "epoch": 0.7585648415938125, "grad_norm": 0.666144847869873, "learning_rate": 4.35655964636424e-06, "loss": 0.0671, "step": 34425 }, { "epoch": 0.7585868768833286, "grad_norm": 0.68755704164505, "learning_rate": 4.355805329058906e-06, "loss": 0.0581, "step": 34426 }, { "epoch": 0.7586089121728448, "grad_norm": 0.5001732110977173, "learning_rate": 4.355051065970166e-06, "loss": 0.0573, "step": 34427 }, { "epoch": 0.7586309474623609, "grad_norm": 0.7281419634819031, "learning_rate": 4.354296857101857e-06, "loss": 0.06, "step": 34428 }, { "epoch": 0.7586529827518771, "grad_norm": 0.5644711852073669, "learning_rate": 4.353542702457828e-06, "loss": 0.0726, "step": 34429 }, { "epoch": 0.7586750180413933, "grad_norm": 0.4772859215736389, "learning_rate": 4.352788602041912e-06, "loss": 0.0354, "step": 34430 }, { "epoch": 0.7586970533309094, "grad_norm": 0.6906876564025879, "learning_rate": 4.3520345558579505e-06, "loss": 0.0558, "step": 34431 }, { "epoch": 0.7587190886204256, "grad_norm": 0.5962116122245789, "learning_rate": 4.351280563909794e-06, "loss": 0.0421, "step": 34432 }, { "epoch": 0.7587411239099418, "grad_norm": 1.1567193269729614, "learning_rate": 4.350526626201269e-06, "loss": 0.0691, "step": 34433 }, { "epoch": 0.7587631591994579, "grad_norm": 0.6750131845474243, "learning_rate": 4.349772742736225e-06, "loss": 0.0528, "step": 34434 }, { "epoch": 0.7587851944889741, "grad_norm": 0.1903391182422638, "learning_rate": 4.349018913518497e-06, "loss": 0.0553, "step": 34435 }, { "epoch": 0.7588072297784902, "grad_norm": 0.5063775181770325, "learning_rate": 4.3482651385519325e-06, "loss": 0.0622, "step": 34436 }, { "epoch": 0.7588292650680064, "grad_norm": 0.2806068956851959, "learning_rate": 4.347511417840362e-06, "loss": 0.0575, "step": 34437 }, { "epoch": 0.7588513003575226, "grad_norm": 0.4653323292732239, "learning_rate": 4.346757751387628e-06, "loss": 0.0636, "step": 34438 }, { "epoch": 0.7588733356470387, "grad_norm": 0.44880175590515137, "learning_rate": 4.346004139197574e-06, "loss": 0.0568, "step": 34439 }, { "epoch": 0.7588953709365549, "grad_norm": 0.7505001425743103, "learning_rate": 4.345250581274029e-06, "loss": 0.0894, "step": 34440 }, { "epoch": 0.7589174062260711, "grad_norm": 0.3770471215248108, "learning_rate": 4.344497077620841e-06, "loss": 0.0648, "step": 34441 }, { "epoch": 0.7589394415155872, "grad_norm": 0.6041288375854492, "learning_rate": 4.343743628241834e-06, "loss": 0.0485, "step": 34442 }, { "epoch": 0.7589614768051034, "grad_norm": 0.8211307525634766, "learning_rate": 4.342990233140865e-06, "loss": 0.0759, "step": 34443 }, { "epoch": 0.7589835120946196, "grad_norm": 1.5622105598449707, "learning_rate": 4.342236892321756e-06, "loss": 0.0993, "step": 34444 }, { "epoch": 0.7590055473841357, "grad_norm": 0.7252174615859985, "learning_rate": 4.341483605788355e-06, "loss": 0.0521, "step": 34445 }, { "epoch": 0.7590275826736519, "grad_norm": 0.1699369251728058, "learning_rate": 4.340730373544488e-06, "loss": 0.0658, "step": 34446 }, { "epoch": 0.759049617963168, "grad_norm": 0.6919301152229309, "learning_rate": 4.339977195593998e-06, "loss": 0.0814, "step": 34447 }, { "epoch": 0.7590716532526842, "grad_norm": 0.5552132725715637, "learning_rate": 4.3392240719407254e-06, "loss": 0.0734, "step": 34448 }, { "epoch": 0.7590936885422004, "grad_norm": 0.7894052267074585, "learning_rate": 4.338471002588492e-06, "loss": 0.0606, "step": 34449 }, { "epoch": 0.7591157238317165, "grad_norm": 0.47556689381599426, "learning_rate": 4.337717987541152e-06, "loss": 0.0677, "step": 34450 }, { "epoch": 0.7591377591212326, "grad_norm": 0.746299684047699, "learning_rate": 4.336965026802526e-06, "loss": 0.0915, "step": 34451 }, { "epoch": 0.7591597944107488, "grad_norm": 0.25495439767837524, "learning_rate": 4.336212120376461e-06, "loss": 0.0595, "step": 34452 }, { "epoch": 0.7591818297002649, "grad_norm": 0.6687781810760498, "learning_rate": 4.335459268266781e-06, "loss": 0.084, "step": 34453 }, { "epoch": 0.7592038649897811, "grad_norm": 0.632019579410553, "learning_rate": 4.334706470477326e-06, "loss": 0.0549, "step": 34454 }, { "epoch": 0.7592259002792973, "grad_norm": 0.8776763081550598, "learning_rate": 4.333953727011935e-06, "loss": 0.0568, "step": 34455 }, { "epoch": 0.7592479355688134, "grad_norm": 0.41207757592201233, "learning_rate": 4.333201037874432e-06, "loss": 0.0467, "step": 34456 }, { "epoch": 0.7592699708583296, "grad_norm": 0.6168073415756226, "learning_rate": 4.332448403068655e-06, "loss": 0.0511, "step": 34457 }, { "epoch": 0.7592920061478458, "grad_norm": 0.4319254159927368, "learning_rate": 4.331695822598441e-06, "loss": 0.0497, "step": 34458 }, { "epoch": 0.7593140414373619, "grad_norm": 0.9845623970031738, "learning_rate": 4.330943296467625e-06, "loss": 0.0936, "step": 34459 }, { "epoch": 0.7593360767268781, "grad_norm": 0.3708072304725647, "learning_rate": 4.3301908246800305e-06, "loss": 0.0539, "step": 34460 }, { "epoch": 0.7593581120163942, "grad_norm": 0.4529569745063782, "learning_rate": 4.329438407239496e-06, "loss": 0.0366, "step": 34461 }, { "epoch": 0.7593801473059104, "grad_norm": 0.3562944829463959, "learning_rate": 4.328686044149859e-06, "loss": 0.0642, "step": 34462 }, { "epoch": 0.7594021825954266, "grad_norm": 0.5789563655853271, "learning_rate": 4.327933735414941e-06, "loss": 0.0673, "step": 34463 }, { "epoch": 0.7594242178849427, "grad_norm": 0.7904791235923767, "learning_rate": 4.327181481038583e-06, "loss": 0.0609, "step": 34464 }, { "epoch": 0.7594462531744589, "grad_norm": 1.228055477142334, "learning_rate": 4.326429281024606e-06, "loss": 0.0833, "step": 34465 }, { "epoch": 0.7594682884639751, "grad_norm": 0.5995906591415405, "learning_rate": 4.3256771353768565e-06, "loss": 0.0647, "step": 34466 }, { "epoch": 0.7594903237534912, "grad_norm": 0.4510434865951538, "learning_rate": 4.3249250440991525e-06, "loss": 0.0706, "step": 34467 }, { "epoch": 0.7595123590430074, "grad_norm": 0.5632002353668213, "learning_rate": 4.324173007195335e-06, "loss": 0.044, "step": 34468 }, { "epoch": 0.7595343943325236, "grad_norm": 0.6594470739364624, "learning_rate": 4.323421024669224e-06, "loss": 0.068, "step": 34469 }, { "epoch": 0.7595564296220397, "grad_norm": 0.7255129218101501, "learning_rate": 4.322669096524655e-06, "loss": 0.0817, "step": 34470 }, { "epoch": 0.7595784649115559, "grad_norm": 0.8764404654502869, "learning_rate": 4.321917222765462e-06, "loss": 0.0658, "step": 34471 }, { "epoch": 0.7596005002010721, "grad_norm": 0.5686691999435425, "learning_rate": 4.321165403395462e-06, "loss": 0.0564, "step": 34472 }, { "epoch": 0.7596225354905882, "grad_norm": 0.6520390510559082, "learning_rate": 4.320413638418503e-06, "loss": 0.0691, "step": 34473 }, { "epoch": 0.7596445707801044, "grad_norm": 0.8720150589942932, "learning_rate": 4.319661927838398e-06, "loss": 0.0511, "step": 34474 }, { "epoch": 0.7596666060696204, "grad_norm": 0.42190229892730713, "learning_rate": 4.318910271658988e-06, "loss": 0.0515, "step": 34475 }, { "epoch": 0.7596886413591366, "grad_norm": 0.7218167185783386, "learning_rate": 4.318158669884091e-06, "loss": 0.0758, "step": 34476 }, { "epoch": 0.7597106766486528, "grad_norm": 0.5332818031311035, "learning_rate": 4.317407122517539e-06, "loss": 0.0778, "step": 34477 }, { "epoch": 0.7597327119381689, "grad_norm": 0.6041039824485779, "learning_rate": 4.316655629563166e-06, "loss": 0.0821, "step": 34478 }, { "epoch": 0.7597547472276851, "grad_norm": 0.6362594366073608, "learning_rate": 4.315904191024791e-06, "loss": 0.0584, "step": 34479 }, { "epoch": 0.7597767825172013, "grad_norm": 0.6125777363777161, "learning_rate": 4.315152806906245e-06, "loss": 0.0457, "step": 34480 }, { "epoch": 0.7597988178067174, "grad_norm": 0.4927225410938263, "learning_rate": 4.314401477211354e-06, "loss": 0.0686, "step": 34481 }, { "epoch": 0.7598208530962336, "grad_norm": 0.32615625858306885, "learning_rate": 4.313650201943952e-06, "loss": 0.0434, "step": 34482 }, { "epoch": 0.7598428883857498, "grad_norm": 0.35980838537216187, "learning_rate": 4.312898981107855e-06, "loss": 0.0422, "step": 34483 }, { "epoch": 0.7598649236752659, "grad_norm": 0.5691125392913818, "learning_rate": 4.3121478147068945e-06, "loss": 0.0698, "step": 34484 }, { "epoch": 0.7598869589647821, "grad_norm": 0.5456891059875488, "learning_rate": 4.311396702744897e-06, "loss": 0.0734, "step": 34485 }, { "epoch": 0.7599089942542983, "grad_norm": 0.6555920839309692, "learning_rate": 4.310645645225691e-06, "loss": 0.0609, "step": 34486 }, { "epoch": 0.7599310295438144, "grad_norm": 0.5087719559669495, "learning_rate": 4.3098946421531e-06, "loss": 0.0421, "step": 34487 }, { "epoch": 0.7599530648333306, "grad_norm": 0.4808070659637451, "learning_rate": 4.309143693530939e-06, "loss": 0.0523, "step": 34488 }, { "epoch": 0.7599751001228467, "grad_norm": 0.7147126793861389, "learning_rate": 4.308392799363051e-06, "loss": 0.1059, "step": 34489 }, { "epoch": 0.7599971354123629, "grad_norm": 0.46245574951171875, "learning_rate": 4.3076419596532465e-06, "loss": 0.0442, "step": 34490 }, { "epoch": 0.7600191707018791, "grad_norm": 0.48965534567832947, "learning_rate": 4.306891174405361e-06, "loss": 0.0594, "step": 34491 }, { "epoch": 0.7600412059913952, "grad_norm": 0.45194491744041443, "learning_rate": 4.306140443623203e-06, "loss": 0.0378, "step": 34492 }, { "epoch": 0.7600632412809114, "grad_norm": 0.3886886239051819, "learning_rate": 4.305389767310616e-06, "loss": 0.0533, "step": 34493 }, { "epoch": 0.7600852765704276, "grad_norm": 0.4537181556224823, "learning_rate": 4.304639145471407e-06, "loss": 0.0421, "step": 34494 }, { "epoch": 0.7601073118599437, "grad_norm": 0.5292421579360962, "learning_rate": 4.303888578109408e-06, "loss": 0.061, "step": 34495 }, { "epoch": 0.7601293471494599, "grad_norm": 0.9158676862716675, "learning_rate": 4.303138065228444e-06, "loss": 0.0901, "step": 34496 }, { "epoch": 0.7601513824389761, "grad_norm": 0.5611627101898193, "learning_rate": 4.302387606832332e-06, "loss": 0.0468, "step": 34497 }, { "epoch": 0.7601734177284922, "grad_norm": 0.9095615744590759, "learning_rate": 4.301637202924897e-06, "loss": 0.0651, "step": 34498 }, { "epoch": 0.7601954530180084, "grad_norm": 0.6437177658081055, "learning_rate": 4.300886853509954e-06, "loss": 0.0759, "step": 34499 }, { "epoch": 0.7602174883075244, "grad_norm": 0.3714863955974579, "learning_rate": 4.300136558591341e-06, "loss": 0.0425, "step": 34500 }, { "epoch": 0.7602395235970406, "grad_norm": 1.181660532951355, "learning_rate": 4.299386318172865e-06, "loss": 0.1216, "step": 34501 }, { "epoch": 0.7602615588865568, "grad_norm": 0.6655150651931763, "learning_rate": 4.298636132258355e-06, "loss": 0.0486, "step": 34502 }, { "epoch": 0.7602835941760729, "grad_norm": 0.6097142696380615, "learning_rate": 4.297886000851629e-06, "loss": 0.0545, "step": 34503 }, { "epoch": 0.7603056294655891, "grad_norm": 0.3518134355545044, "learning_rate": 4.297135923956505e-06, "loss": 0.0575, "step": 34504 }, { "epoch": 0.7603276647551053, "grad_norm": 0.5133174657821655, "learning_rate": 4.296385901576814e-06, "loss": 0.0488, "step": 34505 }, { "epoch": 0.7603497000446214, "grad_norm": 0.3226068913936615, "learning_rate": 4.295635933716364e-06, "loss": 0.0487, "step": 34506 }, { "epoch": 0.7603717353341376, "grad_norm": 1.2145541906356812, "learning_rate": 4.29488602037898e-06, "loss": 0.0859, "step": 34507 }, { "epoch": 0.7603937706236538, "grad_norm": 0.7549628615379333, "learning_rate": 4.294136161568482e-06, "loss": 0.0734, "step": 34508 }, { "epoch": 0.7604158059131699, "grad_norm": 0.5684276223182678, "learning_rate": 4.293386357288696e-06, "loss": 0.0537, "step": 34509 }, { "epoch": 0.7604378412026861, "grad_norm": 0.5167282223701477, "learning_rate": 4.292636607543427e-06, "loss": 0.0716, "step": 34510 }, { "epoch": 0.7604598764922023, "grad_norm": 0.541479229927063, "learning_rate": 4.291886912336504e-06, "loss": 0.0647, "step": 34511 }, { "epoch": 0.7604819117817184, "grad_norm": 0.5812724232673645, "learning_rate": 4.291137271671747e-06, "loss": 0.074, "step": 34512 }, { "epoch": 0.7605039470712346, "grad_norm": 0.7486585378646851, "learning_rate": 4.290387685552965e-06, "loss": 0.0545, "step": 34513 }, { "epoch": 0.7605259823607508, "grad_norm": 0.41786593198776245, "learning_rate": 4.2896381539839865e-06, "loss": 0.0508, "step": 34514 }, { "epoch": 0.7605480176502669, "grad_norm": 0.5180377960205078, "learning_rate": 4.288888676968614e-06, "loss": 0.049, "step": 34515 }, { "epoch": 0.7605700529397831, "grad_norm": 0.5129451155662537, "learning_rate": 4.288139254510687e-06, "loss": 0.0809, "step": 34516 }, { "epoch": 0.7605920882292992, "grad_norm": 0.4885697662830353, "learning_rate": 4.287389886614005e-06, "loss": 0.0493, "step": 34517 }, { "epoch": 0.7606141235188154, "grad_norm": 0.48072493076324463, "learning_rate": 4.28664057328239e-06, "loss": 0.06, "step": 34518 }, { "epoch": 0.7606361588083316, "grad_norm": 0.45072224736213684, "learning_rate": 4.285891314519665e-06, "loss": 0.0636, "step": 34519 }, { "epoch": 0.7606581940978477, "grad_norm": 1.1666946411132812, "learning_rate": 4.285142110329637e-06, "loss": 0.0643, "step": 34520 }, { "epoch": 0.7606802293873639, "grad_norm": 0.6530531048774719, "learning_rate": 4.284392960716128e-06, "loss": 0.0587, "step": 34521 }, { "epoch": 0.7607022646768801, "grad_norm": 0.6343236565589905, "learning_rate": 4.283643865682945e-06, "loss": 0.0605, "step": 34522 }, { "epoch": 0.7607242999663962, "grad_norm": 0.6070383191108704, "learning_rate": 4.282894825233919e-06, "loss": 0.0363, "step": 34523 }, { "epoch": 0.7607463352559124, "grad_norm": 0.39356544613838196, "learning_rate": 4.282145839372851e-06, "loss": 0.0484, "step": 34524 }, { "epoch": 0.7607683705454285, "grad_norm": 0.5916635990142822, "learning_rate": 4.281396908103569e-06, "loss": 0.0827, "step": 34525 }, { "epoch": 0.7607904058349446, "grad_norm": 0.3741285502910614, "learning_rate": 4.280648031429872e-06, "loss": 0.0466, "step": 34526 }, { "epoch": 0.7608124411244608, "grad_norm": 0.663131058216095, "learning_rate": 4.279899209355586e-06, "loss": 0.0545, "step": 34527 }, { "epoch": 0.760834476413977, "grad_norm": 0.30826741456985474, "learning_rate": 4.279150441884525e-06, "loss": 0.0429, "step": 34528 }, { "epoch": 0.7608565117034931, "grad_norm": 0.5408077239990234, "learning_rate": 4.278401729020495e-06, "loss": 0.0902, "step": 34529 }, { "epoch": 0.7608785469930093, "grad_norm": 0.4260394275188446, "learning_rate": 4.277653070767315e-06, "loss": 0.072, "step": 34530 }, { "epoch": 0.7609005822825254, "grad_norm": 0.6107338070869446, "learning_rate": 4.276904467128796e-06, "loss": 0.0543, "step": 34531 }, { "epoch": 0.7609226175720416, "grad_norm": 0.34759148955345154, "learning_rate": 4.276155918108759e-06, "loss": 0.0825, "step": 34532 }, { "epoch": 0.7609446528615578, "grad_norm": 0.693891167640686, "learning_rate": 4.2754074237110055e-06, "loss": 0.0673, "step": 34533 }, { "epoch": 0.7609666881510739, "grad_norm": 0.6181079149246216, "learning_rate": 4.274658983939354e-06, "loss": 0.0859, "step": 34534 }, { "epoch": 0.7609887234405901, "grad_norm": 0.6492457389831543, "learning_rate": 4.27391059879762e-06, "loss": 0.0534, "step": 34535 }, { "epoch": 0.7610107587301063, "grad_norm": 0.38143855333328247, "learning_rate": 4.273162268289605e-06, "loss": 0.0559, "step": 34536 }, { "epoch": 0.7610327940196224, "grad_norm": 0.5394360423088074, "learning_rate": 4.272413992419132e-06, "loss": 0.0696, "step": 34537 }, { "epoch": 0.7610548293091386, "grad_norm": 0.5757223963737488, "learning_rate": 4.271665771189998e-06, "loss": 0.0752, "step": 34538 }, { "epoch": 0.7610768645986548, "grad_norm": 0.6165467500686646, "learning_rate": 4.270917604606033e-06, "loss": 0.054, "step": 34539 }, { "epoch": 0.7610988998881709, "grad_norm": 0.9097257852554321, "learning_rate": 4.270169492671033e-06, "loss": 0.0621, "step": 34540 }, { "epoch": 0.7611209351776871, "grad_norm": 0.48103490471839905, "learning_rate": 4.269421435388813e-06, "loss": 0.0451, "step": 34541 }, { "epoch": 0.7611429704672033, "grad_norm": 0.6451464891433716, "learning_rate": 4.268673432763189e-06, "loss": 0.05, "step": 34542 }, { "epoch": 0.7611650057567194, "grad_norm": 0.5801284909248352, "learning_rate": 4.267925484797961e-06, "loss": 0.0518, "step": 34543 }, { "epoch": 0.7611870410462356, "grad_norm": 0.2672126293182373, "learning_rate": 4.2671775914969476e-06, "loss": 0.0375, "step": 34544 }, { "epoch": 0.7612090763357517, "grad_norm": 0.5452079176902771, "learning_rate": 4.266429752863946e-06, "loss": 0.0646, "step": 34545 }, { "epoch": 0.7612311116252679, "grad_norm": 0.5184955596923828, "learning_rate": 4.2656819689027824e-06, "loss": 0.0463, "step": 34546 }, { "epoch": 0.7612531469147841, "grad_norm": 0.7332383990287781, "learning_rate": 4.264934239617252e-06, "loss": 0.0731, "step": 34547 }, { "epoch": 0.7612751822043002, "grad_norm": 0.6336397528648376, "learning_rate": 4.264186565011173e-06, "loss": 0.0487, "step": 34548 }, { "epoch": 0.7612972174938163, "grad_norm": 0.5618482232093811, "learning_rate": 4.263438945088344e-06, "loss": 0.0597, "step": 34549 }, { "epoch": 0.7613192527833325, "grad_norm": 0.7673595547676086, "learning_rate": 4.262691379852578e-06, "loss": 0.079, "step": 34550 }, { "epoch": 0.7613412880728486, "grad_norm": 0.7573367953300476, "learning_rate": 4.261943869307686e-06, "loss": 0.0502, "step": 34551 }, { "epoch": 0.7613633233623648, "grad_norm": 0.5684394836425781, "learning_rate": 4.2611964134574685e-06, "loss": 0.0573, "step": 34552 }, { "epoch": 0.761385358651881, "grad_norm": 0.6358349323272705, "learning_rate": 4.260449012305737e-06, "loss": 0.0572, "step": 34553 }, { "epoch": 0.7614073939413971, "grad_norm": 0.5963165163993835, "learning_rate": 4.259701665856297e-06, "loss": 0.0675, "step": 34554 }, { "epoch": 0.7614294292309133, "grad_norm": 0.5734086036682129, "learning_rate": 4.258954374112959e-06, "loss": 0.0774, "step": 34555 }, { "epoch": 0.7614514645204294, "grad_norm": 0.9421215653419495, "learning_rate": 4.258207137079523e-06, "loss": 0.0695, "step": 34556 }, { "epoch": 0.7614734998099456, "grad_norm": 0.5145715475082397, "learning_rate": 4.257459954759798e-06, "loss": 0.0502, "step": 34557 }, { "epoch": 0.7614955350994618, "grad_norm": 0.6661852598190308, "learning_rate": 4.256712827157595e-06, "loss": 0.0489, "step": 34558 }, { "epoch": 0.7615175703889779, "grad_norm": 0.9415755867958069, "learning_rate": 4.255965754276711e-06, "loss": 0.0806, "step": 34559 }, { "epoch": 0.7615396056784941, "grad_norm": 0.7995728850364685, "learning_rate": 4.255218736120957e-06, "loss": 0.0773, "step": 34560 }, { "epoch": 0.7615616409680103, "grad_norm": 0.453157901763916, "learning_rate": 4.2544717726941284e-06, "loss": 0.0399, "step": 34561 }, { "epoch": 0.7615836762575264, "grad_norm": 0.7550511956214905, "learning_rate": 4.253724864000045e-06, "loss": 0.0754, "step": 34562 }, { "epoch": 0.7616057115470426, "grad_norm": 0.43512648344039917, "learning_rate": 4.2529780100425e-06, "loss": 0.0546, "step": 34563 }, { "epoch": 0.7616277468365588, "grad_norm": 0.7065545916557312, "learning_rate": 4.252231210825301e-06, "loss": 0.0698, "step": 34564 }, { "epoch": 0.7616497821260749, "grad_norm": 0.5150766968727112, "learning_rate": 4.2514844663522565e-06, "loss": 0.0579, "step": 34565 }, { "epoch": 0.7616718174155911, "grad_norm": 0.4513907730579376, "learning_rate": 4.250737776627161e-06, "loss": 0.0493, "step": 34566 }, { "epoch": 0.7616938527051073, "grad_norm": 0.9551697969436646, "learning_rate": 4.2499911416538255e-06, "loss": 0.0825, "step": 34567 }, { "epoch": 0.7617158879946234, "grad_norm": 0.7654746770858765, "learning_rate": 4.249244561436041e-06, "loss": 0.0438, "step": 34568 }, { "epoch": 0.7617379232841396, "grad_norm": 0.5135681629180908, "learning_rate": 4.248498035977629e-06, "loss": 0.0478, "step": 34569 }, { "epoch": 0.7617599585736557, "grad_norm": 0.5970317125320435, "learning_rate": 4.247751565282378e-06, "loss": 0.0653, "step": 34570 }, { "epoch": 0.7617819938631719, "grad_norm": 0.788109302520752, "learning_rate": 4.247005149354097e-06, "loss": 0.0831, "step": 34571 }, { "epoch": 0.7618040291526881, "grad_norm": 0.5491868853569031, "learning_rate": 4.246258788196581e-06, "loss": 0.0564, "step": 34572 }, { "epoch": 0.7618260644422042, "grad_norm": 0.48598626255989075, "learning_rate": 4.2455124818136346e-06, "loss": 0.064, "step": 34573 }, { "epoch": 0.7618480997317203, "grad_norm": 0.3505716025829315, "learning_rate": 4.244766230209066e-06, "loss": 0.0516, "step": 34574 }, { "epoch": 0.7618701350212365, "grad_norm": 0.5838049650192261, "learning_rate": 4.244020033386665e-06, "loss": 0.062, "step": 34575 }, { "epoch": 0.7618921703107526, "grad_norm": 0.5424227118492126, "learning_rate": 4.243273891350239e-06, "loss": 0.0683, "step": 34576 }, { "epoch": 0.7619142056002688, "grad_norm": 0.6279916167259216, "learning_rate": 4.2425278041035855e-06, "loss": 0.062, "step": 34577 }, { "epoch": 0.761936240889785, "grad_norm": 0.5778070688247681, "learning_rate": 4.241781771650513e-06, "loss": 0.0596, "step": 34578 }, { "epoch": 0.7619582761793011, "grad_norm": 0.3796685039997101, "learning_rate": 4.241035793994809e-06, "loss": 0.0479, "step": 34579 }, { "epoch": 0.7619803114688173, "grad_norm": 0.3888213038444519, "learning_rate": 4.240289871140281e-06, "loss": 0.059, "step": 34580 }, { "epoch": 0.7620023467583334, "grad_norm": 0.6507706046104431, "learning_rate": 4.239544003090729e-06, "loss": 0.0439, "step": 34581 }, { "epoch": 0.7620243820478496, "grad_norm": 0.6964208483695984, "learning_rate": 4.238798189849945e-06, "loss": 0.1017, "step": 34582 }, { "epoch": 0.7620464173373658, "grad_norm": 0.6494075655937195, "learning_rate": 4.2380524314217335e-06, "loss": 0.0596, "step": 34583 }, { "epoch": 0.7620684526268819, "grad_norm": 0.5691060423851013, "learning_rate": 4.237306727809891e-06, "loss": 0.0537, "step": 34584 }, { "epoch": 0.7620904879163981, "grad_norm": 0.4672265648841858, "learning_rate": 4.236561079018223e-06, "loss": 0.0515, "step": 34585 }, { "epoch": 0.7621125232059143, "grad_norm": 0.5459420680999756, "learning_rate": 4.235815485050516e-06, "loss": 0.0611, "step": 34586 }, { "epoch": 0.7621345584954304, "grad_norm": 0.46914413571357727, "learning_rate": 4.2350699459105715e-06, "loss": 0.0395, "step": 34587 }, { "epoch": 0.7621565937849466, "grad_norm": 0.8242844939231873, "learning_rate": 4.234324461602194e-06, "loss": 0.0833, "step": 34588 }, { "epoch": 0.7621786290744628, "grad_norm": 0.3495963513851166, "learning_rate": 4.2335790321291714e-06, "loss": 0.0563, "step": 34589 }, { "epoch": 0.7622006643639789, "grad_norm": 0.4512118697166443, "learning_rate": 4.232833657495307e-06, "loss": 0.0659, "step": 34590 }, { "epoch": 0.7622226996534951, "grad_norm": 0.781158447265625, "learning_rate": 4.232088337704387e-06, "loss": 0.0763, "step": 34591 }, { "epoch": 0.7622447349430113, "grad_norm": 0.6289854049682617, "learning_rate": 4.231343072760222e-06, "loss": 0.0891, "step": 34592 }, { "epoch": 0.7622667702325274, "grad_norm": 0.9323899745941162, "learning_rate": 4.2305978626665995e-06, "loss": 0.0694, "step": 34593 }, { "epoch": 0.7622888055220436, "grad_norm": 0.49428051710128784, "learning_rate": 4.22985270742732e-06, "loss": 0.0447, "step": 34594 }, { "epoch": 0.7623108408115598, "grad_norm": 0.6880952715873718, "learning_rate": 4.229107607046168e-06, "loss": 0.0537, "step": 34595 }, { "epoch": 0.7623328761010759, "grad_norm": 0.364161878824234, "learning_rate": 4.228362561526955e-06, "loss": 0.0709, "step": 34596 }, { "epoch": 0.7623549113905921, "grad_norm": 0.6357756853103638, "learning_rate": 4.227617570873465e-06, "loss": 0.0783, "step": 34597 }, { "epoch": 0.7623769466801082, "grad_norm": 0.5947507619857788, "learning_rate": 4.226872635089498e-06, "loss": 0.0508, "step": 34598 }, { "epoch": 0.7623989819696243, "grad_norm": 0.9059913158416748, "learning_rate": 4.2261277541788414e-06, "loss": 0.0753, "step": 34599 }, { "epoch": 0.7624210172591405, "grad_norm": 0.6082642078399658, "learning_rate": 4.225382928145293e-06, "loss": 0.0715, "step": 34600 }, { "epoch": 0.7624430525486566, "grad_norm": 0.6178641319274902, "learning_rate": 4.224638156992654e-06, "loss": 0.0677, "step": 34601 }, { "epoch": 0.7624650878381728, "grad_norm": 0.47708648443222046, "learning_rate": 4.223893440724705e-06, "loss": 0.0534, "step": 34602 }, { "epoch": 0.762487123127689, "grad_norm": 0.5690930485725403, "learning_rate": 4.223148779345245e-06, "loss": 0.0576, "step": 34603 }, { "epoch": 0.7625091584172051, "grad_norm": 0.7993469834327698, "learning_rate": 4.222404172858068e-06, "loss": 0.0799, "step": 34604 }, { "epoch": 0.7625311937067213, "grad_norm": 0.7588285207748413, "learning_rate": 4.221659621266969e-06, "loss": 0.065, "step": 34605 }, { "epoch": 0.7625532289962375, "grad_norm": 0.8673726320266724, "learning_rate": 4.220915124575734e-06, "loss": 0.0746, "step": 34606 }, { "epoch": 0.7625752642857536, "grad_norm": 0.5414838194847107, "learning_rate": 4.220170682788158e-06, "loss": 0.0705, "step": 34607 }, { "epoch": 0.7625972995752698, "grad_norm": 0.7701026797294617, "learning_rate": 4.219426295908039e-06, "loss": 0.049, "step": 34608 }, { "epoch": 0.762619334864786, "grad_norm": 0.8900671005249023, "learning_rate": 4.218681963939159e-06, "loss": 0.0935, "step": 34609 }, { "epoch": 0.7626413701543021, "grad_norm": 0.6484471559524536, "learning_rate": 4.21793768688531e-06, "loss": 0.0779, "step": 34610 }, { "epoch": 0.7626634054438183, "grad_norm": 0.6892627477645874, "learning_rate": 4.217193464750288e-06, "loss": 0.0675, "step": 34611 }, { "epoch": 0.7626854407333344, "grad_norm": 0.4957442283630371, "learning_rate": 4.216449297537887e-06, "loss": 0.0355, "step": 34612 }, { "epoch": 0.7627074760228506, "grad_norm": 0.7339182496070862, "learning_rate": 4.215705185251886e-06, "loss": 0.0612, "step": 34613 }, { "epoch": 0.7627295113123668, "grad_norm": 0.7201154232025146, "learning_rate": 4.214961127896083e-06, "loss": 0.036, "step": 34614 }, { "epoch": 0.7627515466018829, "grad_norm": 0.6241240501403809, "learning_rate": 4.21421712547427e-06, "loss": 0.0755, "step": 34615 }, { "epoch": 0.7627735818913991, "grad_norm": 0.9796543121337891, "learning_rate": 4.213473177990231e-06, "loss": 0.0858, "step": 34616 }, { "epoch": 0.7627956171809153, "grad_norm": 1.023274302482605, "learning_rate": 4.212729285447759e-06, "loss": 0.0723, "step": 34617 }, { "epoch": 0.7628176524704314, "grad_norm": 0.7729890942573547, "learning_rate": 4.211985447850632e-06, "loss": 0.0568, "step": 34618 }, { "epoch": 0.7628396877599476, "grad_norm": 0.6656433343887329, "learning_rate": 4.211241665202659e-06, "loss": 0.056, "step": 34619 }, { "epoch": 0.7628617230494638, "grad_norm": 0.872512936592102, "learning_rate": 4.210497937507613e-06, "loss": 0.0534, "step": 34620 }, { "epoch": 0.7628837583389799, "grad_norm": 0.41091322898864746, "learning_rate": 4.209754264769291e-06, "loss": 0.039, "step": 34621 }, { "epoch": 0.7629057936284961, "grad_norm": 0.6065670251846313, "learning_rate": 4.209010646991471e-06, "loss": 0.0561, "step": 34622 }, { "epoch": 0.7629278289180123, "grad_norm": 0.7823020219802856, "learning_rate": 4.208267084177947e-06, "loss": 0.0522, "step": 34623 }, { "epoch": 0.7629498642075283, "grad_norm": 0.5560621619224548, "learning_rate": 4.207523576332511e-06, "loss": 0.0869, "step": 34624 }, { "epoch": 0.7629718994970445, "grad_norm": 0.730347216129303, "learning_rate": 4.206780123458939e-06, "loss": 0.0787, "step": 34625 }, { "epoch": 0.7629939347865606, "grad_norm": 0.5525475144386292, "learning_rate": 4.206036725561024e-06, "loss": 0.0644, "step": 34626 }, { "epoch": 0.7630159700760768, "grad_norm": 0.23147864639759064, "learning_rate": 4.205293382642553e-06, "loss": 0.0536, "step": 34627 }, { "epoch": 0.763038005365593, "grad_norm": 0.40670427680015564, "learning_rate": 4.204550094707316e-06, "loss": 0.0484, "step": 34628 }, { "epoch": 0.7630600406551091, "grad_norm": 0.7917059659957886, "learning_rate": 4.203806861759088e-06, "loss": 0.0742, "step": 34629 }, { "epoch": 0.7630820759446253, "grad_norm": 0.455915629863739, "learning_rate": 4.203063683801663e-06, "loss": 0.0763, "step": 34630 }, { "epoch": 0.7631041112341415, "grad_norm": 0.7753784656524658, "learning_rate": 4.202320560838829e-06, "loss": 0.0684, "step": 34631 }, { "epoch": 0.7631261465236576, "grad_norm": 0.6431394219398499, "learning_rate": 4.20157749287436e-06, "loss": 0.0682, "step": 34632 }, { "epoch": 0.7631481818131738, "grad_norm": 0.7477507591247559, "learning_rate": 4.200834479912048e-06, "loss": 0.0658, "step": 34633 }, { "epoch": 0.76317021710269, "grad_norm": 0.8617362380027771, "learning_rate": 4.2000915219556775e-06, "loss": 0.0363, "step": 34634 }, { "epoch": 0.7631922523922061, "grad_norm": 0.7448948621749878, "learning_rate": 4.199348619009038e-06, "loss": 0.1083, "step": 34635 }, { "epoch": 0.7632142876817223, "grad_norm": 0.5052403807640076, "learning_rate": 4.1986057710759016e-06, "loss": 0.0635, "step": 34636 }, { "epoch": 0.7632363229712384, "grad_norm": 0.7850229740142822, "learning_rate": 4.197862978160058e-06, "loss": 0.0482, "step": 34637 }, { "epoch": 0.7632583582607546, "grad_norm": 0.5174820423126221, "learning_rate": 4.197120240265297e-06, "loss": 0.0501, "step": 34638 }, { "epoch": 0.7632803935502708, "grad_norm": 0.622277021408081, "learning_rate": 4.196377557395391e-06, "loss": 0.0556, "step": 34639 }, { "epoch": 0.7633024288397869, "grad_norm": 0.6462804675102234, "learning_rate": 4.195634929554131e-06, "loss": 0.0708, "step": 34640 }, { "epoch": 0.7633244641293031, "grad_norm": 0.3618297576904297, "learning_rate": 4.1948923567452866e-06, "loss": 0.0463, "step": 34641 }, { "epoch": 0.7633464994188193, "grad_norm": 0.5997033715248108, "learning_rate": 4.194149838972659e-06, "loss": 0.0811, "step": 34642 }, { "epoch": 0.7633685347083354, "grad_norm": 0.7679364085197449, "learning_rate": 4.193407376240016e-06, "loss": 0.0408, "step": 34643 }, { "epoch": 0.7633905699978516, "grad_norm": 0.46968433260917664, "learning_rate": 4.19266496855115e-06, "loss": 0.0395, "step": 34644 }, { "epoch": 0.7634126052873678, "grad_norm": 0.7112284302711487, "learning_rate": 4.19192261590983e-06, "loss": 0.058, "step": 34645 }, { "epoch": 0.7634346405768839, "grad_norm": 0.7139723896980286, "learning_rate": 4.191180318319845e-06, "loss": 0.0636, "step": 34646 }, { "epoch": 0.7634566758664001, "grad_norm": 0.4403202533721924, "learning_rate": 4.190438075784979e-06, "loss": 0.0467, "step": 34647 }, { "epoch": 0.7634787111559161, "grad_norm": 0.47252708673477173, "learning_rate": 4.1896958883090055e-06, "loss": 0.0682, "step": 34648 }, { "epoch": 0.7635007464454323, "grad_norm": 0.5560274720191956, "learning_rate": 4.188953755895706e-06, "loss": 0.063, "step": 34649 }, { "epoch": 0.7635227817349485, "grad_norm": 0.4984440207481384, "learning_rate": 4.188211678548863e-06, "loss": 0.0791, "step": 34650 }, { "epoch": 0.7635448170244646, "grad_norm": 0.5423575639724731, "learning_rate": 4.1874696562722594e-06, "loss": 0.071, "step": 34651 }, { "epoch": 0.7635668523139808, "grad_norm": 0.7131584882736206, "learning_rate": 4.1867276890696685e-06, "loss": 0.0565, "step": 34652 }, { "epoch": 0.763588887603497, "grad_norm": 0.7691559791564941, "learning_rate": 4.18598577694487e-06, "loss": 0.079, "step": 34653 }, { "epoch": 0.7636109228930131, "grad_norm": 0.4573397934436798, "learning_rate": 4.18524391990165e-06, "loss": 0.066, "step": 34654 }, { "epoch": 0.7636329581825293, "grad_norm": 1.1277744770050049, "learning_rate": 4.18450211794378e-06, "loss": 0.0722, "step": 34655 }, { "epoch": 0.7636549934720455, "grad_norm": 0.3515779674053192, "learning_rate": 4.183760371075036e-06, "loss": 0.0467, "step": 34656 }, { "epoch": 0.7636770287615616, "grad_norm": 0.6348041296005249, "learning_rate": 4.1830186792992035e-06, "loss": 0.0737, "step": 34657 }, { "epoch": 0.7636990640510778, "grad_norm": 0.5793940424919128, "learning_rate": 4.18227704262006e-06, "loss": 0.0548, "step": 34658 }, { "epoch": 0.763721099340594, "grad_norm": 0.4496570825576782, "learning_rate": 4.181535461041378e-06, "loss": 0.0434, "step": 34659 }, { "epoch": 0.7637431346301101, "grad_norm": 0.4764832556247711, "learning_rate": 4.180793934566935e-06, "loss": 0.0717, "step": 34660 }, { "epoch": 0.7637651699196263, "grad_norm": 0.8996019959449768, "learning_rate": 4.1800524632005165e-06, "loss": 0.045, "step": 34661 }, { "epoch": 0.7637872052091425, "grad_norm": 0.4897209703922272, "learning_rate": 4.179311046945888e-06, "loss": 0.0562, "step": 34662 }, { "epoch": 0.7638092404986586, "grad_norm": 0.5026097893714905, "learning_rate": 4.178569685806836e-06, "loss": 0.0484, "step": 34663 }, { "epoch": 0.7638312757881748, "grad_norm": 0.38175106048583984, "learning_rate": 4.177828379787122e-06, "loss": 0.0393, "step": 34664 }, { "epoch": 0.7638533110776909, "grad_norm": 0.38282129168510437, "learning_rate": 4.177087128890539e-06, "loss": 0.0385, "step": 34665 }, { "epoch": 0.7638753463672071, "grad_norm": 0.8414236307144165, "learning_rate": 4.176345933120852e-06, "loss": 0.0784, "step": 34666 }, { "epoch": 0.7638973816567233, "grad_norm": 1.1598231792449951, "learning_rate": 4.175604792481844e-06, "loss": 0.0801, "step": 34667 }, { "epoch": 0.7639194169462394, "grad_norm": 0.7855952382087708, "learning_rate": 4.1748637069772785e-06, "loss": 0.0645, "step": 34668 }, { "epoch": 0.7639414522357556, "grad_norm": 0.8425152897834778, "learning_rate": 4.1741226766109395e-06, "loss": 0.0706, "step": 34669 }, { "epoch": 0.7639634875252718, "grad_norm": 0.5302008986473083, "learning_rate": 4.173381701386603e-06, "loss": 0.0633, "step": 34670 }, { "epoch": 0.7639855228147879, "grad_norm": 0.5709744691848755, "learning_rate": 4.172640781308035e-06, "loss": 0.0799, "step": 34671 }, { "epoch": 0.7640075581043041, "grad_norm": 0.6574401259422302, "learning_rate": 4.171899916379011e-06, "loss": 0.0441, "step": 34672 }, { "epoch": 0.7640295933938202, "grad_norm": 0.6724685430526733, "learning_rate": 4.17115910660331e-06, "loss": 0.0578, "step": 34673 }, { "epoch": 0.7640516286833363, "grad_norm": 0.49800458550453186, "learning_rate": 4.170418351984706e-06, "loss": 0.0358, "step": 34674 }, { "epoch": 0.7640736639728525, "grad_norm": 0.7002178430557251, "learning_rate": 4.1696776525269645e-06, "loss": 0.0552, "step": 34675 }, { "epoch": 0.7640956992623686, "grad_norm": 0.5185425281524658, "learning_rate": 4.168937008233862e-06, "loss": 0.043, "step": 34676 }, { "epoch": 0.7641177345518848, "grad_norm": 0.4190766513347626, "learning_rate": 4.168196419109177e-06, "loss": 0.0727, "step": 34677 }, { "epoch": 0.764139769841401, "grad_norm": 0.759626030921936, "learning_rate": 4.167455885156671e-06, "loss": 0.0639, "step": 34678 }, { "epoch": 0.7641618051309171, "grad_norm": 0.5943652391433716, "learning_rate": 4.166715406380121e-06, "loss": 0.0684, "step": 34679 }, { "epoch": 0.7641838404204333, "grad_norm": 0.6899100542068481, "learning_rate": 4.165974982783298e-06, "loss": 0.0586, "step": 34680 }, { "epoch": 0.7642058757099495, "grad_norm": 0.4996703565120697, "learning_rate": 4.165234614369978e-06, "loss": 0.0548, "step": 34681 }, { "epoch": 0.7642279109994656, "grad_norm": 0.6457403302192688, "learning_rate": 4.164494301143925e-06, "loss": 0.0715, "step": 34682 }, { "epoch": 0.7642499462889818, "grad_norm": 0.5517728924751282, "learning_rate": 4.1637540431089124e-06, "loss": 0.0591, "step": 34683 }, { "epoch": 0.764271981578498, "grad_norm": 0.5994491577148438, "learning_rate": 4.163013840268715e-06, "loss": 0.0572, "step": 34684 }, { "epoch": 0.7642940168680141, "grad_norm": 0.7168170213699341, "learning_rate": 4.162273692627096e-06, "loss": 0.0683, "step": 34685 }, { "epoch": 0.7643160521575303, "grad_norm": 0.3509339690208435, "learning_rate": 4.161533600187835e-06, "loss": 0.0427, "step": 34686 }, { "epoch": 0.7643380874470465, "grad_norm": 0.7224376201629639, "learning_rate": 4.160793562954683e-06, "loss": 0.0842, "step": 34687 }, { "epoch": 0.7643601227365626, "grad_norm": 0.749406635761261, "learning_rate": 4.160053580931433e-06, "loss": 0.0593, "step": 34688 }, { "epoch": 0.7643821580260788, "grad_norm": 0.5074909925460815, "learning_rate": 4.159313654121839e-06, "loss": 0.0613, "step": 34689 }, { "epoch": 0.764404193315595, "grad_norm": 0.8373918533325195, "learning_rate": 4.158573782529676e-06, "loss": 0.0693, "step": 34690 }, { "epoch": 0.7644262286051111, "grad_norm": 0.7016659379005432, "learning_rate": 4.157833966158708e-06, "loss": 0.0726, "step": 34691 }, { "epoch": 0.7644482638946273, "grad_norm": 0.6231411695480347, "learning_rate": 4.157094205012704e-06, "loss": 0.0713, "step": 34692 }, { "epoch": 0.7644702991841434, "grad_norm": 0.5939180254936218, "learning_rate": 4.1563544990954385e-06, "loss": 0.07, "step": 34693 }, { "epoch": 0.7644923344736596, "grad_norm": 0.6816079616546631, "learning_rate": 4.1556148484106705e-06, "loss": 0.0612, "step": 34694 }, { "epoch": 0.7645143697631758, "grad_norm": 0.9983474612236023, "learning_rate": 4.154875252962171e-06, "loss": 0.0815, "step": 34695 }, { "epoch": 0.7645364050526919, "grad_norm": 0.5714042782783508, "learning_rate": 4.154135712753707e-06, "loss": 0.0728, "step": 34696 }, { "epoch": 0.7645584403422081, "grad_norm": 0.6570569276809692, "learning_rate": 4.153396227789052e-06, "loss": 0.0886, "step": 34697 }, { "epoch": 0.7645804756317242, "grad_norm": 0.7045367360115051, "learning_rate": 4.152656798071956e-06, "loss": 0.0533, "step": 34698 }, { "epoch": 0.7646025109212403, "grad_norm": 0.479703426361084, "learning_rate": 4.151917423606205e-06, "loss": 0.0469, "step": 34699 }, { "epoch": 0.7646245462107565, "grad_norm": 0.5972020030021667, "learning_rate": 4.151178104395556e-06, "loss": 0.077, "step": 34700 }, { "epoch": 0.7646465815002726, "grad_norm": 0.293802946805954, "learning_rate": 4.15043884044377e-06, "loss": 0.046, "step": 34701 }, { "epoch": 0.7646686167897888, "grad_norm": 0.4909391701221466, "learning_rate": 4.149699631754617e-06, "loss": 0.033, "step": 34702 }, { "epoch": 0.764690652079305, "grad_norm": 0.3372582495212555, "learning_rate": 4.148960478331861e-06, "loss": 0.0416, "step": 34703 }, { "epoch": 0.7647126873688211, "grad_norm": 0.6224482655525208, "learning_rate": 4.148221380179273e-06, "loss": 0.0767, "step": 34704 }, { "epoch": 0.7647347226583373, "grad_norm": 0.6105795502662659, "learning_rate": 4.147482337300608e-06, "loss": 0.0704, "step": 34705 }, { "epoch": 0.7647567579478535, "grad_norm": 0.7856269478797913, "learning_rate": 4.146743349699636e-06, "loss": 0.0742, "step": 34706 }, { "epoch": 0.7647787932373696, "grad_norm": 0.9107332229614258, "learning_rate": 4.14600441738012e-06, "loss": 0.0684, "step": 34707 }, { "epoch": 0.7648008285268858, "grad_norm": 0.7174109816551208, "learning_rate": 4.145265540345828e-06, "loss": 0.0551, "step": 34708 }, { "epoch": 0.764822863816402, "grad_norm": 0.9463405609130859, "learning_rate": 4.1445267186005205e-06, "loss": 0.0775, "step": 34709 }, { "epoch": 0.7648448991059181, "grad_norm": 0.6344690918922424, "learning_rate": 4.143787952147949e-06, "loss": 0.0795, "step": 34710 }, { "epoch": 0.7648669343954343, "grad_norm": 0.9307821393013, "learning_rate": 4.1430492409918975e-06, "loss": 0.0577, "step": 34711 }, { "epoch": 0.7648889696849505, "grad_norm": 0.7185866236686707, "learning_rate": 4.142310585136111e-06, "loss": 0.1059, "step": 34712 }, { "epoch": 0.7649110049744666, "grad_norm": 0.7064363956451416, "learning_rate": 4.141571984584367e-06, "loss": 0.0754, "step": 34713 }, { "epoch": 0.7649330402639828, "grad_norm": 0.34558966755867004, "learning_rate": 4.140833439340409e-06, "loss": 0.0504, "step": 34714 }, { "epoch": 0.764955075553499, "grad_norm": 0.9472832083702087, "learning_rate": 4.140094949408018e-06, "loss": 0.0701, "step": 34715 }, { "epoch": 0.7649771108430151, "grad_norm": 0.8034924268722534, "learning_rate": 4.1393565147909426e-06, "loss": 0.0533, "step": 34716 }, { "epoch": 0.7649991461325313, "grad_norm": 0.7168905735015869, "learning_rate": 4.1386181354929485e-06, "loss": 0.0736, "step": 34717 }, { "epoch": 0.7650211814220474, "grad_norm": 0.6271438002586365, "learning_rate": 4.137879811517801e-06, "loss": 0.0572, "step": 34718 }, { "epoch": 0.7650432167115636, "grad_norm": 0.57634037733078, "learning_rate": 4.137141542869253e-06, "loss": 0.0637, "step": 34719 }, { "epoch": 0.7650652520010798, "grad_norm": 0.28270113468170166, "learning_rate": 4.136403329551072e-06, "loss": 0.0619, "step": 34720 }, { "epoch": 0.7650872872905959, "grad_norm": 0.6776915788650513, "learning_rate": 4.135665171567005e-06, "loss": 0.0489, "step": 34721 }, { "epoch": 0.765109322580112, "grad_norm": 0.6132705807685852, "learning_rate": 4.13492706892083e-06, "loss": 0.0479, "step": 34722 }, { "epoch": 0.7651313578696282, "grad_norm": 0.7755762338638306, "learning_rate": 4.134189021616294e-06, "loss": 0.057, "step": 34723 }, { "epoch": 0.7651533931591443, "grad_norm": 0.5492238402366638, "learning_rate": 4.133451029657163e-06, "loss": 0.0516, "step": 34724 }, { "epoch": 0.7651754284486605, "grad_norm": 0.47992900013923645, "learning_rate": 4.132713093047191e-06, "loss": 0.0558, "step": 34725 }, { "epoch": 0.7651974637381767, "grad_norm": 0.6427847743034363, "learning_rate": 4.1319752117901364e-06, "loss": 0.0639, "step": 34726 }, { "epoch": 0.7652194990276928, "grad_norm": 0.4992954730987549, "learning_rate": 4.1312373858897665e-06, "loss": 0.055, "step": 34727 }, { "epoch": 0.765241534317209, "grad_norm": 0.4949670433998108, "learning_rate": 4.130499615349827e-06, "loss": 0.06, "step": 34728 }, { "epoch": 0.7652635696067251, "grad_norm": 0.6224982738494873, "learning_rate": 4.129761900174082e-06, "loss": 0.0558, "step": 34729 }, { "epoch": 0.7652856048962413, "grad_norm": 0.4808826446533203, "learning_rate": 4.129024240366289e-06, "loss": 0.04, "step": 34730 }, { "epoch": 0.7653076401857575, "grad_norm": 0.503798246383667, "learning_rate": 4.12828663593021e-06, "loss": 0.0507, "step": 34731 }, { "epoch": 0.7653296754752736, "grad_norm": 0.5359629392623901, "learning_rate": 4.1275490868695905e-06, "loss": 0.0607, "step": 34732 }, { "epoch": 0.7653517107647898, "grad_norm": 0.7877559065818787, "learning_rate": 4.126811593188195e-06, "loss": 0.0695, "step": 34733 }, { "epoch": 0.765373746054306, "grad_norm": 0.8033422231674194, "learning_rate": 4.126074154889784e-06, "loss": 0.0655, "step": 34734 }, { "epoch": 0.7653957813438221, "grad_norm": 0.6355683207511902, "learning_rate": 4.1253367719781035e-06, "loss": 0.0553, "step": 34735 }, { "epoch": 0.7654178166333383, "grad_norm": 0.8403398394584656, "learning_rate": 4.124599444456918e-06, "loss": 0.087, "step": 34736 }, { "epoch": 0.7654398519228545, "grad_norm": 0.871326744556427, "learning_rate": 4.12386217232997e-06, "loss": 0.0607, "step": 34737 }, { "epoch": 0.7654618872123706, "grad_norm": 0.5886257290840149, "learning_rate": 4.1231249556010355e-06, "loss": 0.067, "step": 34738 }, { "epoch": 0.7654839225018868, "grad_norm": 0.7053425312042236, "learning_rate": 4.122387794273852e-06, "loss": 0.0561, "step": 34739 }, { "epoch": 0.765505957791403, "grad_norm": 1.2226985692977905, "learning_rate": 4.121650688352181e-06, "loss": 0.075, "step": 34740 }, { "epoch": 0.7655279930809191, "grad_norm": 0.8086503744125366, "learning_rate": 4.120913637839781e-06, "loss": 0.0718, "step": 34741 }, { "epoch": 0.7655500283704353, "grad_norm": 0.34499916434288025, "learning_rate": 4.1201766427403955e-06, "loss": 0.0797, "step": 34742 }, { "epoch": 0.7655720636599515, "grad_norm": 0.38391560316085815, "learning_rate": 4.119439703057792e-06, "loss": 0.0727, "step": 34743 }, { "epoch": 0.7655940989494676, "grad_norm": 0.7276254296302795, "learning_rate": 4.118702818795706e-06, "loss": 0.0791, "step": 34744 }, { "epoch": 0.7656161342389838, "grad_norm": 0.3256664276123047, "learning_rate": 4.11796598995791e-06, "loss": 0.0363, "step": 34745 }, { "epoch": 0.7656381695285, "grad_norm": 0.5587249994277954, "learning_rate": 4.117229216548145e-06, "loss": 0.0893, "step": 34746 }, { "epoch": 0.765660204818016, "grad_norm": 0.7153099775314331, "learning_rate": 4.116492498570173e-06, "loss": 0.066, "step": 34747 }, { "epoch": 0.7656822401075322, "grad_norm": 0.45922204852104187, "learning_rate": 4.115755836027734e-06, "loss": 0.0471, "step": 34748 }, { "epoch": 0.7657042753970483, "grad_norm": 0.4168050289154053, "learning_rate": 4.115019228924589e-06, "loss": 0.0496, "step": 34749 }, { "epoch": 0.7657263106865645, "grad_norm": 0.4123864769935608, "learning_rate": 4.114282677264491e-06, "loss": 0.0738, "step": 34750 }, { "epoch": 0.7657483459760807, "grad_norm": 0.7108303308486938, "learning_rate": 4.113546181051185e-06, "loss": 0.0808, "step": 34751 }, { "epoch": 0.7657703812655968, "grad_norm": 0.616027295589447, "learning_rate": 4.112809740288427e-06, "loss": 0.0514, "step": 34752 }, { "epoch": 0.765792416555113, "grad_norm": 0.38224413990974426, "learning_rate": 4.112073354979965e-06, "loss": 0.0595, "step": 34753 }, { "epoch": 0.7658144518446292, "grad_norm": 0.593533456325531, "learning_rate": 4.1113370251295595e-06, "loss": 0.0716, "step": 34754 }, { "epoch": 0.7658364871341453, "grad_norm": 0.9613227248191833, "learning_rate": 4.110600750740947e-06, "loss": 0.0758, "step": 34755 }, { "epoch": 0.7658585224236615, "grad_norm": 0.7140771746635437, "learning_rate": 4.109864531817885e-06, "loss": 0.0865, "step": 34756 }, { "epoch": 0.7658805577131776, "grad_norm": 0.6192865967750549, "learning_rate": 4.109128368364126e-06, "loss": 0.0552, "step": 34757 }, { "epoch": 0.7659025930026938, "grad_norm": 0.5363601446151733, "learning_rate": 4.108392260383413e-06, "loss": 0.0442, "step": 34758 }, { "epoch": 0.76592462829221, "grad_norm": 0.8281183838844299, "learning_rate": 4.107656207879502e-06, "loss": 0.0576, "step": 34759 }, { "epoch": 0.7659466635817261, "grad_norm": 0.7404115796089172, "learning_rate": 4.1069202108561325e-06, "loss": 0.0577, "step": 34760 }, { "epoch": 0.7659686988712423, "grad_norm": 0.4011014401912689, "learning_rate": 4.106184269317066e-06, "loss": 0.0558, "step": 34761 }, { "epoch": 0.7659907341607585, "grad_norm": 0.8777315616607666, "learning_rate": 4.105448383266041e-06, "loss": 0.0605, "step": 34762 }, { "epoch": 0.7660127694502746, "grad_norm": 0.6074503660202026, "learning_rate": 4.10471255270681e-06, "loss": 0.0522, "step": 34763 }, { "epoch": 0.7660348047397908, "grad_norm": 0.623897135257721, "learning_rate": 4.103976777643125e-06, "loss": 0.0551, "step": 34764 }, { "epoch": 0.766056840029307, "grad_norm": 0.7469966411590576, "learning_rate": 4.1032410580787245e-06, "loss": 0.1048, "step": 34765 }, { "epoch": 0.7660788753188231, "grad_norm": 0.3473590016365051, "learning_rate": 4.102505394017364e-06, "loss": 0.0387, "step": 34766 }, { "epoch": 0.7661009106083393, "grad_norm": 0.74634850025177, "learning_rate": 4.101769785462779e-06, "loss": 0.0788, "step": 34767 }, { "epoch": 0.7661229458978555, "grad_norm": 0.3518511652946472, "learning_rate": 4.101034232418735e-06, "loss": 0.0803, "step": 34768 }, { "epoch": 0.7661449811873716, "grad_norm": 0.5849215388298035, "learning_rate": 4.100298734888962e-06, "loss": 0.0455, "step": 34769 }, { "epoch": 0.7661670164768878, "grad_norm": 0.8438650965690613, "learning_rate": 4.099563292877218e-06, "loss": 0.0693, "step": 34770 }, { "epoch": 0.766189051766404, "grad_norm": 0.5496269464492798, "learning_rate": 4.098827906387238e-06, "loss": 0.0492, "step": 34771 }, { "epoch": 0.76621108705592, "grad_norm": 0.4425073266029358, "learning_rate": 4.098092575422772e-06, "loss": 0.0476, "step": 34772 }, { "epoch": 0.7662331223454362, "grad_norm": 0.45016610622406006, "learning_rate": 4.097357299987574e-06, "loss": 0.0611, "step": 34773 }, { "epoch": 0.7662551576349523, "grad_norm": 0.6578131914138794, "learning_rate": 4.0966220800853745e-06, "loss": 0.0398, "step": 34774 }, { "epoch": 0.7662771929244685, "grad_norm": 0.49326348304748535, "learning_rate": 4.095886915719927e-06, "loss": 0.0578, "step": 34775 }, { "epoch": 0.7662992282139847, "grad_norm": 0.6361463665962219, "learning_rate": 4.095151806894975e-06, "loss": 0.0927, "step": 34776 }, { "epoch": 0.7663212635035008, "grad_norm": 0.42013999819755554, "learning_rate": 4.094416753614266e-06, "loss": 0.0529, "step": 34777 }, { "epoch": 0.766343298793017, "grad_norm": 0.6267979145050049, "learning_rate": 4.093681755881538e-06, "loss": 0.0808, "step": 34778 }, { "epoch": 0.7663653340825332, "grad_norm": 0.7428650259971619, "learning_rate": 4.092946813700534e-06, "loss": 0.0449, "step": 34779 }, { "epoch": 0.7663873693720493, "grad_norm": 0.7281020879745483, "learning_rate": 4.092211927075008e-06, "loss": 0.0612, "step": 34780 }, { "epoch": 0.7664094046615655, "grad_norm": 0.5484663844108582, "learning_rate": 4.09147709600869e-06, "loss": 0.061, "step": 34781 }, { "epoch": 0.7664314399510817, "grad_norm": 0.4556455612182617, "learning_rate": 4.090742320505334e-06, "loss": 0.0373, "step": 34782 }, { "epoch": 0.7664534752405978, "grad_norm": 0.5603684186935425, "learning_rate": 4.090007600568667e-06, "loss": 0.0489, "step": 34783 }, { "epoch": 0.766475510530114, "grad_norm": 0.47935906052589417, "learning_rate": 4.089272936202452e-06, "loss": 0.0599, "step": 34784 }, { "epoch": 0.7664975458196301, "grad_norm": 0.680847704410553, "learning_rate": 4.088538327410417e-06, "loss": 0.0738, "step": 34785 }, { "epoch": 0.7665195811091463, "grad_norm": 0.33734264969825745, "learning_rate": 4.087803774196305e-06, "loss": 0.0549, "step": 34786 }, { "epoch": 0.7665416163986625, "grad_norm": 0.5394148230552673, "learning_rate": 4.087069276563867e-06, "loss": 0.0813, "step": 34787 }, { "epoch": 0.7665636516881786, "grad_norm": 0.5270814895629883, "learning_rate": 4.086334834516831e-06, "loss": 0.0857, "step": 34788 }, { "epoch": 0.7665856869776948, "grad_norm": 0.47157788276672363, "learning_rate": 4.085600448058949e-06, "loss": 0.0455, "step": 34789 }, { "epoch": 0.766607722267211, "grad_norm": 0.7135828733444214, "learning_rate": 4.084866117193948e-06, "loss": 0.0511, "step": 34790 }, { "epoch": 0.7666297575567271, "grad_norm": 0.5612131357192993, "learning_rate": 4.084131841925585e-06, "loss": 0.0405, "step": 34791 }, { "epoch": 0.7666517928462433, "grad_norm": 0.47118228673934937, "learning_rate": 4.08339762225759e-06, "loss": 0.0364, "step": 34792 }, { "epoch": 0.7666738281357595, "grad_norm": 0.8370749354362488, "learning_rate": 4.082663458193707e-06, "loss": 0.0665, "step": 34793 }, { "epoch": 0.7666958634252756, "grad_norm": 0.3176737129688263, "learning_rate": 4.0819293497376694e-06, "loss": 0.0594, "step": 34794 }, { "epoch": 0.7667178987147918, "grad_norm": 0.4223926365375519, "learning_rate": 4.081195296893221e-06, "loss": 0.0481, "step": 34795 }, { "epoch": 0.7667399340043078, "grad_norm": 0.7654116749763489, "learning_rate": 4.080461299664103e-06, "loss": 0.07, "step": 34796 }, { "epoch": 0.766761969293824, "grad_norm": 0.6521093249320984, "learning_rate": 4.079727358054049e-06, "loss": 0.08, "step": 34797 }, { "epoch": 0.7667840045833402, "grad_norm": 0.720020055770874, "learning_rate": 4.078993472066798e-06, "loss": 0.0601, "step": 34798 }, { "epoch": 0.7668060398728563, "grad_norm": 0.7048527002334595, "learning_rate": 4.078259641706089e-06, "loss": 0.0669, "step": 34799 }, { "epoch": 0.7668280751623725, "grad_norm": 0.5020968914031982, "learning_rate": 4.077525866975665e-06, "loss": 0.0399, "step": 34800 }, { "epoch": 0.7668501104518887, "grad_norm": 0.7890338897705078, "learning_rate": 4.076792147879256e-06, "loss": 0.0602, "step": 34801 }, { "epoch": 0.7668721457414048, "grad_norm": 0.04805168882012367, "learning_rate": 4.0760584844206006e-06, "loss": 0.0617, "step": 34802 }, { "epoch": 0.766894181030921, "grad_norm": 0.5410879254341125, "learning_rate": 4.075324876603442e-06, "loss": 0.0533, "step": 34803 }, { "epoch": 0.7669162163204372, "grad_norm": 0.5021680593490601, "learning_rate": 4.0745913244315084e-06, "loss": 0.0485, "step": 34804 }, { "epoch": 0.7669382516099533, "grad_norm": 0.7452365159988403, "learning_rate": 4.073857827908543e-06, "loss": 0.0791, "step": 34805 }, { "epoch": 0.7669602868994695, "grad_norm": 0.45149531960487366, "learning_rate": 4.07312438703827e-06, "loss": 0.0664, "step": 34806 }, { "epoch": 0.7669823221889857, "grad_norm": 0.8238431811332703, "learning_rate": 4.072391001824444e-06, "loss": 0.0812, "step": 34807 }, { "epoch": 0.7670043574785018, "grad_norm": 0.42280837893486023, "learning_rate": 4.071657672270786e-06, "loss": 0.053, "step": 34808 }, { "epoch": 0.767026392768018, "grad_norm": 0.45828545093536377, "learning_rate": 4.0709243983810344e-06, "loss": 0.0418, "step": 34809 }, { "epoch": 0.7670484280575341, "grad_norm": 0.7061437368392944, "learning_rate": 4.070191180158927e-06, "loss": 0.0605, "step": 34810 }, { "epoch": 0.7670704633470503, "grad_norm": 0.7967785596847534, "learning_rate": 4.0694580176082e-06, "loss": 0.0732, "step": 34811 }, { "epoch": 0.7670924986365665, "grad_norm": 0.6841381788253784, "learning_rate": 4.068724910732585e-06, "loss": 0.0715, "step": 34812 }, { "epoch": 0.7671145339260826, "grad_norm": 0.40659448504447937, "learning_rate": 4.067991859535808e-06, "loss": 0.0679, "step": 34813 }, { "epoch": 0.7671365692155988, "grad_norm": 0.5973163843154907, "learning_rate": 4.067258864021619e-06, "loss": 0.0496, "step": 34814 }, { "epoch": 0.767158604505115, "grad_norm": 0.46695974469184875, "learning_rate": 4.066525924193739e-06, "loss": 0.0433, "step": 34815 }, { "epoch": 0.7671806397946311, "grad_norm": 0.6095143556594849, "learning_rate": 4.065793040055908e-06, "loss": 0.0558, "step": 34816 }, { "epoch": 0.7672026750841473, "grad_norm": 0.5248620510101318, "learning_rate": 4.065060211611848e-06, "loss": 0.0594, "step": 34817 }, { "epoch": 0.7672247103736635, "grad_norm": 0.8395602107048035, "learning_rate": 4.064327438865311e-06, "loss": 0.0745, "step": 34818 }, { "epoch": 0.7672467456631796, "grad_norm": 0.5369084477424622, "learning_rate": 4.063594721820013e-06, "loss": 0.0553, "step": 34819 }, { "epoch": 0.7672687809526958, "grad_norm": 0.7241367101669312, "learning_rate": 4.0628620604796965e-06, "loss": 0.0817, "step": 34820 }, { "epoch": 0.7672908162422118, "grad_norm": 0.747149646282196, "learning_rate": 4.062129454848083e-06, "loss": 0.0559, "step": 34821 }, { "epoch": 0.767312851531728, "grad_norm": 0.4786388576030731, "learning_rate": 4.061396904928911e-06, "loss": 0.0643, "step": 34822 }, { "epoch": 0.7673348868212442, "grad_norm": 0.9289112091064453, "learning_rate": 4.060664410725914e-06, "loss": 0.0761, "step": 34823 }, { "epoch": 0.7673569221107603, "grad_norm": 0.6529898643493652, "learning_rate": 4.059931972242815e-06, "loss": 0.0925, "step": 34824 }, { "epoch": 0.7673789574002765, "grad_norm": 0.521591067314148, "learning_rate": 4.0591995894833485e-06, "loss": 0.0468, "step": 34825 }, { "epoch": 0.7674009926897927, "grad_norm": 0.6789368391036987, "learning_rate": 4.058467262451246e-06, "loss": 0.0778, "step": 34826 }, { "epoch": 0.7674230279793088, "grad_norm": 0.5443954467773438, "learning_rate": 4.057734991150241e-06, "loss": 0.0482, "step": 34827 }, { "epoch": 0.767445063268825, "grad_norm": 0.386353999376297, "learning_rate": 4.057002775584055e-06, "loss": 0.0622, "step": 34828 }, { "epoch": 0.7674670985583412, "grad_norm": 0.5173357725143433, "learning_rate": 4.0562706157564236e-06, "loss": 0.0508, "step": 34829 }, { "epoch": 0.7674891338478573, "grad_norm": 0.5592862963676453, "learning_rate": 4.055538511671077e-06, "loss": 0.0777, "step": 34830 }, { "epoch": 0.7675111691373735, "grad_norm": 0.30460307002067566, "learning_rate": 4.054806463331739e-06, "loss": 0.0482, "step": 34831 }, { "epoch": 0.7675332044268897, "grad_norm": 0.43555784225463867, "learning_rate": 4.054074470742138e-06, "loss": 0.0371, "step": 34832 }, { "epoch": 0.7675552397164058, "grad_norm": 0.7209202647209167, "learning_rate": 4.0533425339060074e-06, "loss": 0.0912, "step": 34833 }, { "epoch": 0.767577275005922, "grad_norm": 0.46337562799453735, "learning_rate": 4.052610652827078e-06, "loss": 0.0501, "step": 34834 }, { "epoch": 0.7675993102954382, "grad_norm": 0.7121704816818237, "learning_rate": 4.051878827509067e-06, "loss": 0.0432, "step": 34835 }, { "epoch": 0.7676213455849543, "grad_norm": 0.5264573097229004, "learning_rate": 4.05114705795571e-06, "loss": 0.0699, "step": 34836 }, { "epoch": 0.7676433808744705, "grad_norm": 0.2557089030742645, "learning_rate": 4.0504153441707356e-06, "loss": 0.0463, "step": 34837 }, { "epoch": 0.7676654161639866, "grad_norm": 0.3992028534412384, "learning_rate": 4.049683686157863e-06, "loss": 0.0319, "step": 34838 }, { "epoch": 0.7676874514535028, "grad_norm": 0.5451621413230896, "learning_rate": 4.048952083920828e-06, "loss": 0.051, "step": 34839 }, { "epoch": 0.767709486743019, "grad_norm": 0.4876989722251892, "learning_rate": 4.048220537463344e-06, "loss": 0.085, "step": 34840 }, { "epoch": 0.7677315220325351, "grad_norm": 0.2958080768585205, "learning_rate": 4.047489046789154e-06, "loss": 0.054, "step": 34841 }, { "epoch": 0.7677535573220513, "grad_norm": 0.5629515647888184, "learning_rate": 4.046757611901969e-06, "loss": 0.0619, "step": 34842 }, { "epoch": 0.7677755926115675, "grad_norm": 0.7094959020614624, "learning_rate": 4.0460262328055285e-06, "loss": 0.0513, "step": 34843 }, { "epoch": 0.7677976279010836, "grad_norm": 0.6200932264328003, "learning_rate": 4.045294909503545e-06, "loss": 0.0797, "step": 34844 }, { "epoch": 0.7678196631905998, "grad_norm": 0.8393722772598267, "learning_rate": 4.0445636419997495e-06, "loss": 0.0504, "step": 34845 }, { "epoch": 0.7678416984801159, "grad_norm": 0.703221321105957, "learning_rate": 4.04383243029787e-06, "loss": 0.0626, "step": 34846 }, { "epoch": 0.767863733769632, "grad_norm": 0.5646182894706726, "learning_rate": 4.043101274401623e-06, "loss": 0.083, "step": 34847 }, { "epoch": 0.7678857690591482, "grad_norm": 1.112295150756836, "learning_rate": 4.0423701743147355e-06, "loss": 0.0552, "step": 34848 }, { "epoch": 0.7679078043486643, "grad_norm": 0.5836954116821289, "learning_rate": 4.041639130040934e-06, "loss": 0.0632, "step": 34849 }, { "epoch": 0.7679298396381805, "grad_norm": 0.7607689499855042, "learning_rate": 4.040908141583944e-06, "loss": 0.0596, "step": 34850 }, { "epoch": 0.7679518749276967, "grad_norm": 0.3769860565662384, "learning_rate": 4.040177208947483e-06, "loss": 0.0588, "step": 34851 }, { "epoch": 0.7679739102172128, "grad_norm": 0.7462542653083801, "learning_rate": 4.039446332135276e-06, "loss": 0.0394, "step": 34852 }, { "epoch": 0.767995945506729, "grad_norm": 0.4272352457046509, "learning_rate": 4.03871551115105e-06, "loss": 0.0639, "step": 34853 }, { "epoch": 0.7680179807962452, "grad_norm": 0.5531303286552429, "learning_rate": 4.03798474599852e-06, "loss": 0.073, "step": 34854 }, { "epoch": 0.7680400160857613, "grad_norm": 0.645119309425354, "learning_rate": 4.037254036681411e-06, "loss": 0.0509, "step": 34855 }, { "epoch": 0.7680620513752775, "grad_norm": 0.6315440535545349, "learning_rate": 4.036523383203447e-06, "loss": 0.0637, "step": 34856 }, { "epoch": 0.7680840866647937, "grad_norm": 0.6823550462722778, "learning_rate": 4.0357927855683514e-06, "loss": 0.0548, "step": 34857 }, { "epoch": 0.7681061219543098, "grad_norm": 0.6858317255973816, "learning_rate": 4.0350622437798404e-06, "loss": 0.0607, "step": 34858 }, { "epoch": 0.768128157243826, "grad_norm": 0.6703923940658569, "learning_rate": 4.034331757841635e-06, "loss": 0.053, "step": 34859 }, { "epoch": 0.7681501925333422, "grad_norm": 0.3956560790538788, "learning_rate": 4.033601327757465e-06, "loss": 0.0479, "step": 34860 }, { "epoch": 0.7681722278228583, "grad_norm": 0.4689841866493225, "learning_rate": 4.032870953531038e-06, "loss": 0.0396, "step": 34861 }, { "epoch": 0.7681942631123745, "grad_norm": 0.355278879404068, "learning_rate": 4.032140635166085e-06, "loss": 0.0699, "step": 34862 }, { "epoch": 0.7682162984018907, "grad_norm": 0.7417128682136536, "learning_rate": 4.0314103726663125e-06, "loss": 0.075, "step": 34863 }, { "epoch": 0.7682383336914068, "grad_norm": 0.831978440284729, "learning_rate": 4.030680166035458e-06, "loss": 0.0857, "step": 34864 }, { "epoch": 0.768260368980923, "grad_norm": 0.8779630064964294, "learning_rate": 4.029950015277227e-06, "loss": 0.0638, "step": 34865 }, { "epoch": 0.7682824042704391, "grad_norm": 0.6825568675994873, "learning_rate": 4.029219920395347e-06, "loss": 0.0657, "step": 34866 }, { "epoch": 0.7683044395599553, "grad_norm": 0.44405698776245117, "learning_rate": 4.028489881393529e-06, "loss": 0.0468, "step": 34867 }, { "epoch": 0.7683264748494715, "grad_norm": 0.8402925133705139, "learning_rate": 4.0277598982754956e-06, "loss": 0.083, "step": 34868 }, { "epoch": 0.7683485101389876, "grad_norm": 0.34158673882484436, "learning_rate": 4.027029971044969e-06, "loss": 0.0452, "step": 34869 }, { "epoch": 0.7683705454285038, "grad_norm": 0.7595710158348083, "learning_rate": 4.026300099705658e-06, "loss": 0.0451, "step": 34870 }, { "epoch": 0.7683925807180199, "grad_norm": 0.7754313349723816, "learning_rate": 4.025570284261285e-06, "loss": 0.0685, "step": 34871 }, { "epoch": 0.768414616007536, "grad_norm": 0.7125498056411743, "learning_rate": 4.024840524715568e-06, "loss": 0.0753, "step": 34872 }, { "epoch": 0.7684366512970522, "grad_norm": 0.41818317770957947, "learning_rate": 4.0241108210722285e-06, "loss": 0.0558, "step": 34873 }, { "epoch": 0.7684586865865684, "grad_norm": 0.5962541699409485, "learning_rate": 4.023381173334974e-06, "loss": 0.0491, "step": 34874 }, { "epoch": 0.7684807218760845, "grad_norm": 0.89858078956604, "learning_rate": 4.022651581507524e-06, "loss": 0.0624, "step": 34875 }, { "epoch": 0.7685027571656007, "grad_norm": 0.9042015075683594, "learning_rate": 4.021922045593602e-06, "loss": 0.0654, "step": 34876 }, { "epoch": 0.7685247924551168, "grad_norm": 0.725358784198761, "learning_rate": 4.021192565596913e-06, "loss": 0.0699, "step": 34877 }, { "epoch": 0.768546827744633, "grad_norm": 0.4364475905895233, "learning_rate": 4.0204631415211776e-06, "loss": 0.0565, "step": 34878 }, { "epoch": 0.7685688630341492, "grad_norm": 0.8540897965431213, "learning_rate": 4.0197337733701104e-06, "loss": 0.0635, "step": 34879 }, { "epoch": 0.7685908983236653, "grad_norm": 0.7796037793159485, "learning_rate": 4.019004461147432e-06, "loss": 0.0549, "step": 34880 }, { "epoch": 0.7686129336131815, "grad_norm": 0.8043809533119202, "learning_rate": 4.018275204856849e-06, "loss": 0.0702, "step": 34881 }, { "epoch": 0.7686349689026977, "grad_norm": 0.578273355960846, "learning_rate": 4.017546004502077e-06, "loss": 0.0639, "step": 34882 }, { "epoch": 0.7686570041922138, "grad_norm": 0.7820932865142822, "learning_rate": 4.0168168600868395e-06, "loss": 0.0667, "step": 34883 }, { "epoch": 0.76867903948173, "grad_norm": 0.29072946310043335, "learning_rate": 4.016087771614837e-06, "loss": 0.0756, "step": 34884 }, { "epoch": 0.7687010747712462, "grad_norm": 1.043086051940918, "learning_rate": 4.015358739089795e-06, "loss": 0.08, "step": 34885 }, { "epoch": 0.7687231100607623, "grad_norm": 0.7277075052261353, "learning_rate": 4.014629762515412e-06, "loss": 0.0843, "step": 34886 }, { "epoch": 0.7687451453502785, "grad_norm": 0.34930792450904846, "learning_rate": 4.013900841895419e-06, "loss": 0.0535, "step": 34887 }, { "epoch": 0.7687671806397947, "grad_norm": 1.042097806930542, "learning_rate": 4.013171977233517e-06, "loss": 0.1203, "step": 34888 }, { "epoch": 0.7687892159293108, "grad_norm": 0.4555010199546814, "learning_rate": 4.012443168533425e-06, "loss": 0.0615, "step": 34889 }, { "epoch": 0.768811251218827, "grad_norm": 0.6940981149673462, "learning_rate": 4.011714415798847e-06, "loss": 0.0535, "step": 34890 }, { "epoch": 0.7688332865083431, "grad_norm": 0.43756556510925293, "learning_rate": 4.0109857190335e-06, "loss": 0.0572, "step": 34891 }, { "epoch": 0.7688553217978593, "grad_norm": 0.5889981985092163, "learning_rate": 4.010257078241101e-06, "loss": 0.1079, "step": 34892 }, { "epoch": 0.7688773570873755, "grad_norm": 0.7184678912162781, "learning_rate": 4.009528493425352e-06, "loss": 0.062, "step": 34893 }, { "epoch": 0.7688993923768916, "grad_norm": 0.462636262178421, "learning_rate": 4.008799964589967e-06, "loss": 0.0567, "step": 34894 }, { "epoch": 0.7689214276664077, "grad_norm": 0.9830296039581299, "learning_rate": 4.0080714917386574e-06, "loss": 0.0803, "step": 34895 }, { "epoch": 0.7689434629559239, "grad_norm": 0.6858449578285217, "learning_rate": 4.007343074875139e-06, "loss": 0.0713, "step": 34896 }, { "epoch": 0.76896549824544, "grad_norm": 0.6255911588668823, "learning_rate": 4.006614714003112e-06, "loss": 0.0553, "step": 34897 }, { "epoch": 0.7689875335349562, "grad_norm": 0.5476953983306885, "learning_rate": 4.005886409126291e-06, "loss": 0.0596, "step": 34898 }, { "epoch": 0.7690095688244724, "grad_norm": 0.5820612907409668, "learning_rate": 4.005158160248392e-06, "loss": 0.0532, "step": 34899 }, { "epoch": 0.7690316041139885, "grad_norm": 0.8052920699119568, "learning_rate": 4.004429967373113e-06, "loss": 0.0559, "step": 34900 }, { "epoch": 0.7690536394035047, "grad_norm": 0.6388850212097168, "learning_rate": 4.003701830504169e-06, "loss": 0.0412, "step": 34901 }, { "epoch": 0.7690756746930208, "grad_norm": 0.7137666940689087, "learning_rate": 4.002973749645267e-06, "loss": 0.1185, "step": 34902 }, { "epoch": 0.769097709982537, "grad_norm": 0.5894778370857239, "learning_rate": 4.002245724800123e-06, "loss": 0.0642, "step": 34903 }, { "epoch": 0.7691197452720532, "grad_norm": 0.7850286364555359, "learning_rate": 4.001517755972434e-06, "loss": 0.0668, "step": 34904 }, { "epoch": 0.7691417805615693, "grad_norm": 0.35656672716140747, "learning_rate": 4.000789843165912e-06, "loss": 0.0805, "step": 34905 }, { "epoch": 0.7691638158510855, "grad_norm": 0.6177018880844116, "learning_rate": 4.000061986384272e-06, "loss": 0.0732, "step": 34906 }, { "epoch": 0.7691858511406017, "grad_norm": 0.4522322714328766, "learning_rate": 3.999334185631209e-06, "loss": 0.0491, "step": 34907 }, { "epoch": 0.7692078864301178, "grad_norm": 0.40124425292015076, "learning_rate": 3.99860644091044e-06, "loss": 0.0506, "step": 34908 }, { "epoch": 0.769229921719634, "grad_norm": 0.5585353374481201, "learning_rate": 3.997878752225659e-06, "loss": 0.0487, "step": 34909 }, { "epoch": 0.7692519570091502, "grad_norm": 0.4406373202800751, "learning_rate": 3.997151119580592e-06, "loss": 0.0633, "step": 34910 }, { "epoch": 0.7692739922986663, "grad_norm": 0.5913204550743103, "learning_rate": 3.996423542978928e-06, "loss": 0.0609, "step": 34911 }, { "epoch": 0.7692960275881825, "grad_norm": 0.6072272658348083, "learning_rate": 3.995696022424385e-06, "loss": 0.0911, "step": 34912 }, { "epoch": 0.7693180628776987, "grad_norm": 0.5788396000862122, "learning_rate": 3.994968557920658e-06, "loss": 0.067, "step": 34913 }, { "epoch": 0.7693400981672148, "grad_norm": 0.46348515152931213, "learning_rate": 3.9942411494714584e-06, "loss": 0.0486, "step": 34914 }, { "epoch": 0.769362133456731, "grad_norm": 0.6549223065376282, "learning_rate": 3.993513797080494e-06, "loss": 0.0657, "step": 34915 }, { "epoch": 0.7693841687462472, "grad_norm": 1.0013474225997925, "learning_rate": 3.992786500751463e-06, "loss": 0.0764, "step": 34916 }, { "epoch": 0.7694062040357633, "grad_norm": 0.6147497892379761, "learning_rate": 3.992059260488072e-06, "loss": 0.0343, "step": 34917 }, { "epoch": 0.7694282393252795, "grad_norm": 0.43085843324661255, "learning_rate": 3.991332076294025e-06, "loss": 0.061, "step": 34918 }, { "epoch": 0.7694502746147956, "grad_norm": 0.5145573019981384, "learning_rate": 3.990604948173032e-06, "loss": 0.0649, "step": 34919 }, { "epoch": 0.7694723099043117, "grad_norm": 0.8066678643226624, "learning_rate": 3.989877876128783e-06, "loss": 0.0761, "step": 34920 }, { "epoch": 0.7694943451938279, "grad_norm": 0.45186522603034973, "learning_rate": 3.989150860164999e-06, "loss": 0.0504, "step": 34921 }, { "epoch": 0.769516380483344, "grad_norm": 0.7370628714561462, "learning_rate": 3.988423900285371e-06, "loss": 0.0852, "step": 34922 }, { "epoch": 0.7695384157728602, "grad_norm": 0.715807318687439, "learning_rate": 3.987696996493608e-06, "loss": 0.0623, "step": 34923 }, { "epoch": 0.7695604510623764, "grad_norm": 0.4689163267612457, "learning_rate": 3.986970148793405e-06, "loss": 0.0633, "step": 34924 }, { "epoch": 0.7695824863518925, "grad_norm": 0.5441117286682129, "learning_rate": 3.98624335718847e-06, "loss": 0.0467, "step": 34925 }, { "epoch": 0.7696045216414087, "grad_norm": 0.6744492053985596, "learning_rate": 3.985516621682506e-06, "loss": 0.0533, "step": 34926 }, { "epoch": 0.7696265569309249, "grad_norm": 0.6603535413742065, "learning_rate": 3.984789942279208e-06, "loss": 0.0481, "step": 34927 }, { "epoch": 0.769648592220441, "grad_norm": 0.7796748280525208, "learning_rate": 3.984063318982284e-06, "loss": 0.0643, "step": 34928 }, { "epoch": 0.7696706275099572, "grad_norm": 0.5095611214637756, "learning_rate": 3.983336751795431e-06, "loss": 0.0558, "step": 34929 }, { "epoch": 0.7696926627994733, "grad_norm": 0.4080391824245453, "learning_rate": 3.982610240722357e-06, "loss": 0.069, "step": 34930 }, { "epoch": 0.7697146980889895, "grad_norm": 0.6859480738639832, "learning_rate": 3.981883785766751e-06, "loss": 0.0684, "step": 34931 }, { "epoch": 0.7697367333785057, "grad_norm": 0.6187580227851868, "learning_rate": 3.98115738693232e-06, "loss": 0.052, "step": 34932 }, { "epoch": 0.7697587686680218, "grad_norm": 0.47160863876342773, "learning_rate": 3.980431044222768e-06, "loss": 0.0343, "step": 34933 }, { "epoch": 0.769780803957538, "grad_norm": 0.5738353133201599, "learning_rate": 3.9797047576417846e-06, "loss": 0.0469, "step": 34934 }, { "epoch": 0.7698028392470542, "grad_norm": 0.4693959653377533, "learning_rate": 3.97897852719308e-06, "loss": 0.0507, "step": 34935 }, { "epoch": 0.7698248745365703, "grad_norm": 0.5026964545249939, "learning_rate": 3.978252352880339e-06, "loss": 0.0513, "step": 34936 }, { "epoch": 0.7698469098260865, "grad_norm": 0.5236015915870667, "learning_rate": 3.9775262347072764e-06, "loss": 0.0549, "step": 34937 }, { "epoch": 0.7698689451156027, "grad_norm": 0.4414735734462738, "learning_rate": 3.97680017267758e-06, "loss": 0.0836, "step": 34938 }, { "epoch": 0.7698909804051188, "grad_norm": 0.7680847644805908, "learning_rate": 3.976074166794956e-06, "loss": 0.0734, "step": 34939 }, { "epoch": 0.769913015694635, "grad_norm": 0.5277242064476013, "learning_rate": 3.975348217063093e-06, "loss": 0.0586, "step": 34940 }, { "epoch": 0.7699350509841512, "grad_norm": 0.7676474452018738, "learning_rate": 3.974622323485693e-06, "loss": 0.0399, "step": 34941 }, { "epoch": 0.7699570862736673, "grad_norm": 0.643595278263092, "learning_rate": 3.9738964860664586e-06, "loss": 0.0611, "step": 34942 }, { "epoch": 0.7699791215631835, "grad_norm": 0.5656818747520447, "learning_rate": 3.973170704809072e-06, "loss": 0.0631, "step": 34943 }, { "epoch": 0.7700011568526997, "grad_norm": 0.16626283526420593, "learning_rate": 3.972444979717251e-06, "loss": 0.0751, "step": 34944 }, { "epoch": 0.7700231921422157, "grad_norm": 0.4283639192581177, "learning_rate": 3.971719310794676e-06, "loss": 0.0547, "step": 34945 }, { "epoch": 0.7700452274317319, "grad_norm": 0.4816087484359741, "learning_rate": 3.970993698045053e-06, "loss": 0.0738, "step": 34946 }, { "epoch": 0.770067262721248, "grad_norm": 0.4580899178981781, "learning_rate": 3.970268141472069e-06, "loss": 0.0421, "step": 34947 }, { "epoch": 0.7700892980107642, "grad_norm": 0.5709457397460938, "learning_rate": 3.969542641079424e-06, "loss": 0.0786, "step": 34948 }, { "epoch": 0.7701113333002804, "grad_norm": 0.8523239493370056, "learning_rate": 3.968817196870815e-06, "loss": 0.0593, "step": 34949 }, { "epoch": 0.7701333685897965, "grad_norm": 0.4554213881492615, "learning_rate": 3.968091808849933e-06, "loss": 0.0834, "step": 34950 }, { "epoch": 0.7701554038793127, "grad_norm": 0.7286773324012756, "learning_rate": 3.967366477020477e-06, "loss": 0.0604, "step": 34951 }, { "epoch": 0.7701774391688289, "grad_norm": 0.6885765194892883, "learning_rate": 3.966641201386138e-06, "loss": 0.0467, "step": 34952 }, { "epoch": 0.770199474458345, "grad_norm": 0.7531316876411438, "learning_rate": 3.965915981950617e-06, "loss": 0.0763, "step": 34953 }, { "epoch": 0.7702215097478612, "grad_norm": 0.49476322531700134, "learning_rate": 3.965190818717597e-06, "loss": 0.0577, "step": 34954 }, { "epoch": 0.7702435450373774, "grad_norm": 1.0773661136627197, "learning_rate": 3.964465711690778e-06, "loss": 0.0705, "step": 34955 }, { "epoch": 0.7702655803268935, "grad_norm": 1.0424518585205078, "learning_rate": 3.963740660873859e-06, "loss": 0.1049, "step": 34956 }, { "epoch": 0.7702876156164097, "grad_norm": 0.6792858242988586, "learning_rate": 3.963015666270521e-06, "loss": 0.0511, "step": 34957 }, { "epoch": 0.7703096509059258, "grad_norm": 0.4564332962036133, "learning_rate": 3.962290727884467e-06, "loss": 0.0433, "step": 34958 }, { "epoch": 0.770331686195442, "grad_norm": 0.8677347302436829, "learning_rate": 3.9615658457193774e-06, "loss": 0.0632, "step": 34959 }, { "epoch": 0.7703537214849582, "grad_norm": 0.9690619111061096, "learning_rate": 3.960841019778959e-06, "loss": 0.0735, "step": 34960 }, { "epoch": 0.7703757567744743, "grad_norm": 0.6274316310882568, "learning_rate": 3.960116250066894e-06, "loss": 0.0825, "step": 34961 }, { "epoch": 0.7703977920639905, "grad_norm": 0.47230055928230286, "learning_rate": 3.959391536586877e-06, "loss": 0.0665, "step": 34962 }, { "epoch": 0.7704198273535067, "grad_norm": 0.6500368714332581, "learning_rate": 3.958666879342603e-06, "loss": 0.0529, "step": 34963 }, { "epoch": 0.7704418626430228, "grad_norm": 0.5629237294197083, "learning_rate": 3.957942278337757e-06, "loss": 0.0695, "step": 34964 }, { "epoch": 0.770463897932539, "grad_norm": 0.9651867747306824, "learning_rate": 3.957217733576035e-06, "loss": 0.0892, "step": 34965 }, { "epoch": 0.7704859332220552, "grad_norm": 0.4385148584842682, "learning_rate": 3.956493245061116e-06, "loss": 0.0601, "step": 34966 }, { "epoch": 0.7705079685115713, "grad_norm": 0.6412734389305115, "learning_rate": 3.955768812796708e-06, "loss": 0.0823, "step": 34967 }, { "epoch": 0.7705300038010875, "grad_norm": 0.7227815985679626, "learning_rate": 3.955044436786488e-06, "loss": 0.0679, "step": 34968 }, { "epoch": 0.7705520390906035, "grad_norm": 0.25001969933509827, "learning_rate": 3.954320117034153e-06, "loss": 0.0487, "step": 34969 }, { "epoch": 0.7705740743801197, "grad_norm": 0.7740346789360046, "learning_rate": 3.953595853543383e-06, "loss": 0.0625, "step": 34970 }, { "epoch": 0.7705961096696359, "grad_norm": 0.355278342962265, "learning_rate": 3.952871646317875e-06, "loss": 0.054, "step": 34971 }, { "epoch": 0.770618144959152, "grad_norm": 0.8414288759231567, "learning_rate": 3.95214749536132e-06, "loss": 0.0684, "step": 34972 }, { "epoch": 0.7706401802486682, "grad_norm": 0.6460065841674805, "learning_rate": 3.951423400677396e-06, "loss": 0.0775, "step": 34973 }, { "epoch": 0.7706622155381844, "grad_norm": 0.5800428986549377, "learning_rate": 3.950699362269799e-06, "loss": 0.0701, "step": 34974 }, { "epoch": 0.7706842508277005, "grad_norm": 0.4100724458694458, "learning_rate": 3.9499753801422155e-06, "loss": 0.0277, "step": 34975 }, { "epoch": 0.7707062861172167, "grad_norm": 0.47374626994132996, "learning_rate": 3.949251454298337e-06, "loss": 0.0576, "step": 34976 }, { "epoch": 0.7707283214067329, "grad_norm": 0.666026771068573, "learning_rate": 3.9485275847418404e-06, "loss": 0.0823, "step": 34977 }, { "epoch": 0.770750356696249, "grad_norm": 0.8007179498672485, "learning_rate": 3.947803771476421e-06, "loss": 0.0798, "step": 34978 }, { "epoch": 0.7707723919857652, "grad_norm": 0.697789192199707, "learning_rate": 3.947080014505768e-06, "loss": 0.086, "step": 34979 }, { "epoch": 0.7707944272752814, "grad_norm": 0.5505248308181763, "learning_rate": 3.946356313833559e-06, "loss": 0.0422, "step": 34980 }, { "epoch": 0.7708164625647975, "grad_norm": 0.6847206354141235, "learning_rate": 3.945632669463489e-06, "loss": 0.0626, "step": 34981 }, { "epoch": 0.7708384978543137, "grad_norm": 0.4314188063144684, "learning_rate": 3.944909081399231e-06, "loss": 0.0584, "step": 34982 }, { "epoch": 0.7708605331438299, "grad_norm": 0.6813061237335205, "learning_rate": 3.944185549644488e-06, "loss": 0.0688, "step": 34983 }, { "epoch": 0.770882568433346, "grad_norm": 0.45376506447792053, "learning_rate": 3.943462074202931e-06, "loss": 0.0777, "step": 34984 }, { "epoch": 0.7709046037228622, "grad_norm": 0.5460718870162964, "learning_rate": 3.942738655078252e-06, "loss": 0.0619, "step": 34985 }, { "epoch": 0.7709266390123783, "grad_norm": 0.6716099977493286, "learning_rate": 3.942015292274138e-06, "loss": 0.0506, "step": 34986 }, { "epoch": 0.7709486743018945, "grad_norm": 0.5420807003974915, "learning_rate": 3.941291985794267e-06, "loss": 0.0777, "step": 34987 }, { "epoch": 0.7709707095914107, "grad_norm": 0.5320285558700562, "learning_rate": 3.940568735642329e-06, "loss": 0.0439, "step": 34988 }, { "epoch": 0.7709927448809268, "grad_norm": 0.5658324360847473, "learning_rate": 3.939845541821996e-06, "loss": 0.0447, "step": 34989 }, { "epoch": 0.771014780170443, "grad_norm": 0.624942421913147, "learning_rate": 3.9391224043369705e-06, "loss": 0.0561, "step": 34990 }, { "epoch": 0.7710368154599592, "grad_norm": 0.6382513642311096, "learning_rate": 3.93839932319092e-06, "loss": 0.0451, "step": 34991 }, { "epoch": 0.7710588507494753, "grad_norm": 0.6217374205589294, "learning_rate": 3.937676298387539e-06, "loss": 0.0697, "step": 34992 }, { "epoch": 0.7710808860389915, "grad_norm": 0.7708893418312073, "learning_rate": 3.936953329930501e-06, "loss": 0.0622, "step": 34993 }, { "epoch": 0.7711029213285076, "grad_norm": 0.3854902982711792, "learning_rate": 3.936230417823491e-06, "loss": 0.0357, "step": 34994 }, { "epoch": 0.7711249566180237, "grad_norm": 0.5219860076904297, "learning_rate": 3.935507562070196e-06, "loss": 0.0741, "step": 34995 }, { "epoch": 0.7711469919075399, "grad_norm": 0.6118441820144653, "learning_rate": 3.93478476267429e-06, "loss": 0.0475, "step": 34996 }, { "epoch": 0.771169027197056, "grad_norm": 0.5287240147590637, "learning_rate": 3.9340620196394605e-06, "loss": 0.0593, "step": 34997 }, { "epoch": 0.7711910624865722, "grad_norm": 0.5973634123802185, "learning_rate": 3.933339332969387e-06, "loss": 0.0553, "step": 34998 }, { "epoch": 0.7712130977760884, "grad_norm": 0.3069908022880554, "learning_rate": 3.932616702667754e-06, "loss": 0.074, "step": 34999 }, { "epoch": 0.7712351330656045, "grad_norm": 0.3701760470867157, "learning_rate": 3.931894128738234e-06, "loss": 0.0794, "step": 35000 }, { "epoch": 0.7712571683551207, "grad_norm": 0.6836367845535278, "learning_rate": 3.9311716111845145e-06, "loss": 0.0568, "step": 35001 }, { "epoch": 0.7712792036446369, "grad_norm": 0.9024111032485962, "learning_rate": 3.9304491500102765e-06, "loss": 0.0556, "step": 35002 }, { "epoch": 0.771301238934153, "grad_norm": 0.45016059279441833, "learning_rate": 3.929726745219192e-06, "loss": 0.0638, "step": 35003 }, { "epoch": 0.7713232742236692, "grad_norm": 0.6674304008483887, "learning_rate": 3.929004396814953e-06, "loss": 0.0521, "step": 35004 }, { "epoch": 0.7713453095131854, "grad_norm": 0.7945013046264648, "learning_rate": 3.92828210480122e-06, "loss": 0.0882, "step": 35005 }, { "epoch": 0.7713673448027015, "grad_norm": 0.610016942024231, "learning_rate": 3.9275598691816914e-06, "loss": 0.0498, "step": 35006 }, { "epoch": 0.7713893800922177, "grad_norm": 0.5415175557136536, "learning_rate": 3.926837689960036e-06, "loss": 0.0459, "step": 35007 }, { "epoch": 0.7714114153817339, "grad_norm": 0.7674533128738403, "learning_rate": 3.926115567139932e-06, "loss": 0.076, "step": 35008 }, { "epoch": 0.77143345067125, "grad_norm": 0.7909455299377441, "learning_rate": 3.9253935007250656e-06, "loss": 0.0486, "step": 35009 }, { "epoch": 0.7714554859607662, "grad_norm": 0.7884684801101685, "learning_rate": 3.924671490719103e-06, "loss": 0.0639, "step": 35010 }, { "epoch": 0.7714775212502823, "grad_norm": 0.48671433329582214, "learning_rate": 3.923949537125731e-06, "loss": 0.0588, "step": 35011 }, { "epoch": 0.7714995565397985, "grad_norm": 0.5032820701599121, "learning_rate": 3.923227639948618e-06, "loss": 0.0484, "step": 35012 }, { "epoch": 0.7715215918293147, "grad_norm": 0.5390880703926086, "learning_rate": 3.922505799191453e-06, "loss": 0.0743, "step": 35013 }, { "epoch": 0.7715436271188308, "grad_norm": 0.4427608251571655, "learning_rate": 3.921784014857901e-06, "loss": 0.0344, "step": 35014 }, { "epoch": 0.771565662408347, "grad_norm": 0.28894147276878357, "learning_rate": 3.921062286951651e-06, "loss": 0.0423, "step": 35015 }, { "epoch": 0.7715876976978632, "grad_norm": 0.5617964267730713, "learning_rate": 3.9203406154763654e-06, "loss": 0.0476, "step": 35016 }, { "epoch": 0.7716097329873793, "grad_norm": 0.6634063124656677, "learning_rate": 3.9196190004357265e-06, "loss": 0.0599, "step": 35017 }, { "epoch": 0.7716317682768955, "grad_norm": 0.3655892610549927, "learning_rate": 3.918897441833414e-06, "loss": 0.0405, "step": 35018 }, { "epoch": 0.7716538035664116, "grad_norm": 0.9674933552742004, "learning_rate": 3.918175939673096e-06, "loss": 0.0663, "step": 35019 }, { "epoch": 0.7716758388559277, "grad_norm": 0.4332018792629242, "learning_rate": 3.9174544939584484e-06, "loss": 0.0518, "step": 35020 }, { "epoch": 0.7716978741454439, "grad_norm": 0.5262995362281799, "learning_rate": 3.91673310469315e-06, "loss": 0.0678, "step": 35021 }, { "epoch": 0.77171990943496, "grad_norm": 0.6057379245758057, "learning_rate": 3.916011771880877e-06, "loss": 0.0603, "step": 35022 }, { "epoch": 0.7717419447244762, "grad_norm": 0.7290847897529602, "learning_rate": 3.915290495525293e-06, "loss": 0.0524, "step": 35023 }, { "epoch": 0.7717639800139924, "grad_norm": 0.6692600250244141, "learning_rate": 3.914569275630081e-06, "loss": 0.0719, "step": 35024 }, { "epoch": 0.7717860153035085, "grad_norm": 0.864298939704895, "learning_rate": 3.913848112198917e-06, "loss": 0.0799, "step": 35025 }, { "epoch": 0.7718080505930247, "grad_norm": 0.44700586795806885, "learning_rate": 3.913127005235464e-06, "loss": 0.0653, "step": 35026 }, { "epoch": 0.7718300858825409, "grad_norm": 1.3642617464065552, "learning_rate": 3.912405954743405e-06, "loss": 0.084, "step": 35027 }, { "epoch": 0.771852121172057, "grad_norm": 0.34333357214927673, "learning_rate": 3.911684960726399e-06, "loss": 0.0545, "step": 35028 }, { "epoch": 0.7718741564615732, "grad_norm": 0.805092990398407, "learning_rate": 3.910964023188138e-06, "loss": 0.058, "step": 35029 }, { "epoch": 0.7718961917510894, "grad_norm": 0.6791542172431946, "learning_rate": 3.910243142132277e-06, "loss": 0.0826, "step": 35030 }, { "epoch": 0.7719182270406055, "grad_norm": 0.9905857443809509, "learning_rate": 3.909522317562496e-06, "loss": 0.087, "step": 35031 }, { "epoch": 0.7719402623301217, "grad_norm": 0.6248450875282288, "learning_rate": 3.908801549482463e-06, "loss": 0.0834, "step": 35032 }, { "epoch": 0.7719622976196379, "grad_norm": 0.376331090927124, "learning_rate": 3.908080837895858e-06, "loss": 0.022, "step": 35033 }, { "epoch": 0.771984332909154, "grad_norm": 0.7435269951820374, "learning_rate": 3.907360182806345e-06, "loss": 0.0732, "step": 35034 }, { "epoch": 0.7720063681986702, "grad_norm": 0.6127570271492004, "learning_rate": 3.906639584217585e-06, "loss": 0.0752, "step": 35035 }, { "epoch": 0.7720284034881864, "grad_norm": 0.5219482183456421, "learning_rate": 3.9059190421332675e-06, "loss": 0.0482, "step": 35036 }, { "epoch": 0.7720504387777025, "grad_norm": 0.3208538293838501, "learning_rate": 3.905198556557049e-06, "loss": 0.0437, "step": 35037 }, { "epoch": 0.7720724740672187, "grad_norm": 0.6895447969436646, "learning_rate": 3.904478127492608e-06, "loss": 0.0808, "step": 35038 }, { "epoch": 0.7720945093567348, "grad_norm": 0.7239567041397095, "learning_rate": 3.903757754943602e-06, "loss": 0.061, "step": 35039 }, { "epoch": 0.772116544646251, "grad_norm": 0.37884679436683655, "learning_rate": 3.903037438913716e-06, "loss": 0.0434, "step": 35040 }, { "epoch": 0.7721385799357672, "grad_norm": 0.863646924495697, "learning_rate": 3.902317179406606e-06, "loss": 0.0794, "step": 35041 }, { "epoch": 0.7721606152252833, "grad_norm": 0.45153045654296875, "learning_rate": 3.901596976425951e-06, "loss": 0.0616, "step": 35042 }, { "epoch": 0.7721826505147995, "grad_norm": 0.7702061533927917, "learning_rate": 3.900876829975411e-06, "loss": 0.0364, "step": 35043 }, { "epoch": 0.7722046858043156, "grad_norm": 0.4935734272003174, "learning_rate": 3.900156740058654e-06, "loss": 0.0911, "step": 35044 }, { "epoch": 0.7722267210938317, "grad_norm": 0.36966466903686523, "learning_rate": 3.899436706679357e-06, "loss": 0.0463, "step": 35045 }, { "epoch": 0.7722487563833479, "grad_norm": 0.6501064896583557, "learning_rate": 3.898716729841177e-06, "loss": 0.0618, "step": 35046 }, { "epoch": 0.772270791672864, "grad_norm": 0.5200307965278625, "learning_rate": 3.897996809547787e-06, "loss": 0.0488, "step": 35047 }, { "epoch": 0.7722928269623802, "grad_norm": 0.45549917221069336, "learning_rate": 3.89727694580285e-06, "loss": 0.0595, "step": 35048 }, { "epoch": 0.7723148622518964, "grad_norm": 0.5586492419242859, "learning_rate": 3.896557138610041e-06, "loss": 0.0581, "step": 35049 }, { "epoch": 0.7723368975414125, "grad_norm": 0.5675830245018005, "learning_rate": 3.895837387973015e-06, "loss": 0.0775, "step": 35050 }, { "epoch": 0.7723589328309287, "grad_norm": 0.6656485795974731, "learning_rate": 3.895117693895445e-06, "loss": 0.0542, "step": 35051 }, { "epoch": 0.7723809681204449, "grad_norm": 0.2870728373527527, "learning_rate": 3.8943980563810014e-06, "loss": 0.0575, "step": 35052 }, { "epoch": 0.772403003409961, "grad_norm": 0.5864540338516235, "learning_rate": 3.893678475433337e-06, "loss": 0.0754, "step": 35053 }, { "epoch": 0.7724250386994772, "grad_norm": 0.4763975143432617, "learning_rate": 3.892958951056123e-06, "loss": 0.0908, "step": 35054 }, { "epoch": 0.7724470739889934, "grad_norm": 0.5829007625579834, "learning_rate": 3.892239483253028e-06, "loss": 0.0617, "step": 35055 }, { "epoch": 0.7724691092785095, "grad_norm": 0.4404582977294922, "learning_rate": 3.8915200720277165e-06, "loss": 0.0492, "step": 35056 }, { "epoch": 0.7724911445680257, "grad_norm": 0.3817695379257202, "learning_rate": 3.890800717383846e-06, "loss": 0.0266, "step": 35057 }, { "epoch": 0.7725131798575419, "grad_norm": 0.4643977880477905, "learning_rate": 3.890081419325085e-06, "loss": 0.0473, "step": 35058 }, { "epoch": 0.772535215147058, "grad_norm": 0.4321376383304596, "learning_rate": 3.889362177855102e-06, "loss": 0.0323, "step": 35059 }, { "epoch": 0.7725572504365742, "grad_norm": 0.39119940996170044, "learning_rate": 3.8886429929775485e-06, "loss": 0.0382, "step": 35060 }, { "epoch": 0.7725792857260904, "grad_norm": 0.9404208660125732, "learning_rate": 3.887923864696102e-06, "loss": 0.0803, "step": 35061 }, { "epoch": 0.7726013210156065, "grad_norm": 0.5650709867477417, "learning_rate": 3.887204793014407e-06, "loss": 0.0556, "step": 35062 }, { "epoch": 0.7726233563051227, "grad_norm": 0.503843367099762, "learning_rate": 3.886485777936147e-06, "loss": 0.0681, "step": 35063 }, { "epoch": 0.7726453915946389, "grad_norm": 0.8611817955970764, "learning_rate": 3.8857668194649696e-06, "loss": 0.0741, "step": 35064 }, { "epoch": 0.772667426884155, "grad_norm": 0.48721843957901, "learning_rate": 3.885047917604546e-06, "loss": 0.0438, "step": 35065 }, { "epoch": 0.7726894621736712, "grad_norm": 0.5726222395896912, "learning_rate": 3.884329072358529e-06, "loss": 0.0818, "step": 35066 }, { "epoch": 0.7727114974631873, "grad_norm": 0.6694819331169128, "learning_rate": 3.883610283730587e-06, "loss": 0.044, "step": 35067 }, { "epoch": 0.7727335327527034, "grad_norm": 0.3932873606681824, "learning_rate": 3.882891551724381e-06, "loss": 0.0419, "step": 35068 }, { "epoch": 0.7727555680422196, "grad_norm": 0.2722526490688324, "learning_rate": 3.882172876343566e-06, "loss": 0.0327, "step": 35069 }, { "epoch": 0.7727776033317357, "grad_norm": 0.8043993711471558, "learning_rate": 3.881454257591806e-06, "loss": 0.0669, "step": 35070 }, { "epoch": 0.7727996386212519, "grad_norm": 0.2879178822040558, "learning_rate": 3.880735695472762e-06, "loss": 0.0755, "step": 35071 }, { "epoch": 0.7728216739107681, "grad_norm": 0.8182224631309509, "learning_rate": 3.880017189990099e-06, "loss": 0.0874, "step": 35072 }, { "epoch": 0.7728437092002842, "grad_norm": 0.5461301207542419, "learning_rate": 3.879298741147466e-06, "loss": 0.0596, "step": 35073 }, { "epoch": 0.7728657444898004, "grad_norm": 0.5344619154930115, "learning_rate": 3.878580348948527e-06, "loss": 0.0419, "step": 35074 }, { "epoch": 0.7728877797793166, "grad_norm": 0.3746955692768097, "learning_rate": 3.877862013396949e-06, "loss": 0.0434, "step": 35075 }, { "epoch": 0.7729098150688327, "grad_norm": 0.4170486330986023, "learning_rate": 3.8771437344963776e-06, "loss": 0.0747, "step": 35076 }, { "epoch": 0.7729318503583489, "grad_norm": 0.31923815608024597, "learning_rate": 3.876425512250478e-06, "loss": 0.0573, "step": 35077 }, { "epoch": 0.772953885647865, "grad_norm": 0.6849733591079712, "learning_rate": 3.875707346662909e-06, "loss": 0.073, "step": 35078 }, { "epoch": 0.7729759209373812, "grad_norm": 0.6764376163482666, "learning_rate": 3.874989237737332e-06, "loss": 0.0535, "step": 35079 }, { "epoch": 0.7729979562268974, "grad_norm": 0.431011825799942, "learning_rate": 3.874271185477396e-06, "loss": 0.0651, "step": 35080 }, { "epoch": 0.7730199915164135, "grad_norm": 0.5456026792526245, "learning_rate": 3.873553189886761e-06, "loss": 0.0754, "step": 35081 }, { "epoch": 0.7730420268059297, "grad_norm": 0.45349979400634766, "learning_rate": 3.872835250969094e-06, "loss": 0.0455, "step": 35082 }, { "epoch": 0.7730640620954459, "grad_norm": 0.8682916164398193, "learning_rate": 3.872117368728037e-06, "loss": 0.0742, "step": 35083 }, { "epoch": 0.773086097384962, "grad_norm": 0.6564154624938965, "learning_rate": 3.87139954316726e-06, "loss": 0.0611, "step": 35084 }, { "epoch": 0.7731081326744782, "grad_norm": 0.6064294576644897, "learning_rate": 3.870681774290401e-06, "loss": 0.0606, "step": 35085 }, { "epoch": 0.7731301679639944, "grad_norm": 0.613461971282959, "learning_rate": 3.869964062101139e-06, "loss": 0.0519, "step": 35086 }, { "epoch": 0.7731522032535105, "grad_norm": 0.47319209575653076, "learning_rate": 3.869246406603112e-06, "loss": 0.0597, "step": 35087 }, { "epoch": 0.7731742385430267, "grad_norm": 0.4050808846950531, "learning_rate": 3.868528807799987e-06, "loss": 0.0406, "step": 35088 }, { "epoch": 0.7731962738325429, "grad_norm": 0.6235383749008179, "learning_rate": 3.867811265695411e-06, "loss": 0.0323, "step": 35089 }, { "epoch": 0.773218309122059, "grad_norm": 0.7784152030944824, "learning_rate": 3.86709378029304e-06, "loss": 0.0649, "step": 35090 }, { "epoch": 0.7732403444115752, "grad_norm": 0.6313318014144897, "learning_rate": 3.866376351596536e-06, "loss": 0.0588, "step": 35091 }, { "epoch": 0.7732623797010914, "grad_norm": 0.5180673599243164, "learning_rate": 3.865658979609542e-06, "loss": 0.0402, "step": 35092 }, { "epoch": 0.7732844149906074, "grad_norm": 0.403802752494812, "learning_rate": 3.8649416643357185e-06, "loss": 0.0412, "step": 35093 }, { "epoch": 0.7733064502801236, "grad_norm": 0.5331986546516418, "learning_rate": 3.864224405778719e-06, "loss": 0.0715, "step": 35094 }, { "epoch": 0.7733284855696397, "grad_norm": 1.0070345401763916, "learning_rate": 3.863507203942198e-06, "loss": 0.0918, "step": 35095 }, { "epoch": 0.7733505208591559, "grad_norm": 1.0571403503417969, "learning_rate": 3.862790058829804e-06, "loss": 0.067, "step": 35096 }, { "epoch": 0.7733725561486721, "grad_norm": 0.5670229196548462, "learning_rate": 3.862072970445193e-06, "loss": 0.0658, "step": 35097 }, { "epoch": 0.7733945914381882, "grad_norm": 0.7555049657821655, "learning_rate": 3.86135593879202e-06, "loss": 0.0612, "step": 35098 }, { "epoch": 0.7734166267277044, "grad_norm": 0.5416036248207092, "learning_rate": 3.8606389638739296e-06, "loss": 0.0813, "step": 35099 }, { "epoch": 0.7734386620172206, "grad_norm": 0.5410102009773254, "learning_rate": 3.859922045694579e-06, "loss": 0.0572, "step": 35100 }, { "epoch": 0.7734606973067367, "grad_norm": 0.6375275254249573, "learning_rate": 3.8592051842576196e-06, "loss": 0.0606, "step": 35101 }, { "epoch": 0.7734827325962529, "grad_norm": 0.5867385268211365, "learning_rate": 3.8584883795667056e-06, "loss": 0.077, "step": 35102 }, { "epoch": 0.773504767885769, "grad_norm": 0.5045324563980103, "learning_rate": 3.857771631625482e-06, "loss": 0.0539, "step": 35103 }, { "epoch": 0.7735268031752852, "grad_norm": 0.7011489868164062, "learning_rate": 3.857054940437601e-06, "loss": 0.0716, "step": 35104 }, { "epoch": 0.7735488384648014, "grad_norm": 0.5732990503311157, "learning_rate": 3.85633830600672e-06, "loss": 0.0584, "step": 35105 }, { "epoch": 0.7735708737543175, "grad_norm": 0.7145365476608276, "learning_rate": 3.855621728336479e-06, "loss": 0.066, "step": 35106 }, { "epoch": 0.7735929090438337, "grad_norm": 0.6166886687278748, "learning_rate": 3.854905207430536e-06, "loss": 0.0504, "step": 35107 }, { "epoch": 0.7736149443333499, "grad_norm": 0.19876064360141754, "learning_rate": 3.85418874329253e-06, "loss": 0.0459, "step": 35108 }, { "epoch": 0.773636979622866, "grad_norm": 0.46597620844841003, "learning_rate": 3.853472335926125e-06, "loss": 0.0969, "step": 35109 }, { "epoch": 0.7736590149123822, "grad_norm": 0.3983309268951416, "learning_rate": 3.85275598533496e-06, "loss": 0.0496, "step": 35110 }, { "epoch": 0.7736810502018984, "grad_norm": 0.44001349806785583, "learning_rate": 3.8520396915226894e-06, "loss": 0.0399, "step": 35111 }, { "epoch": 0.7737030854914145, "grad_norm": 0.920785665512085, "learning_rate": 3.851323454492955e-06, "loss": 0.0942, "step": 35112 }, { "epoch": 0.7737251207809307, "grad_norm": 0.6242782473564148, "learning_rate": 3.850607274249407e-06, "loss": 0.0678, "step": 35113 }, { "epoch": 0.7737471560704469, "grad_norm": 0.49090614914894104, "learning_rate": 3.849891150795702e-06, "loss": 0.0503, "step": 35114 }, { "epoch": 0.773769191359963, "grad_norm": 0.7199935913085938, "learning_rate": 3.849175084135474e-06, "loss": 0.0671, "step": 35115 }, { "epoch": 0.7737912266494792, "grad_norm": 0.7246462106704712, "learning_rate": 3.848459074272377e-06, "loss": 0.0683, "step": 35116 }, { "epoch": 0.7738132619389954, "grad_norm": 0.6568225622177124, "learning_rate": 3.84774312121006e-06, "loss": 0.0471, "step": 35117 }, { "epoch": 0.7738352972285114, "grad_norm": 0.25550079345703125, "learning_rate": 3.84702722495217e-06, "loss": 0.0647, "step": 35118 }, { "epoch": 0.7738573325180276, "grad_norm": 0.6199337840080261, "learning_rate": 3.846311385502347e-06, "loss": 0.0716, "step": 35119 }, { "epoch": 0.7738793678075437, "grad_norm": 0.4912683963775635, "learning_rate": 3.845595602864241e-06, "loss": 0.0357, "step": 35120 }, { "epoch": 0.7739014030970599, "grad_norm": 0.7326449155807495, "learning_rate": 3.844879877041503e-06, "loss": 0.044, "step": 35121 }, { "epoch": 0.7739234383865761, "grad_norm": 0.26393139362335205, "learning_rate": 3.8441642080377685e-06, "loss": 0.0429, "step": 35122 }, { "epoch": 0.7739454736760922, "grad_norm": 1.02419912815094, "learning_rate": 3.84344859585669e-06, "loss": 0.0746, "step": 35123 }, { "epoch": 0.7739675089656084, "grad_norm": 0.5159854292869568, "learning_rate": 3.842733040501908e-06, "loss": 0.0465, "step": 35124 }, { "epoch": 0.7739895442551246, "grad_norm": 0.5914598107337952, "learning_rate": 3.842017541977074e-06, "loss": 0.0708, "step": 35125 }, { "epoch": 0.7740115795446407, "grad_norm": 0.4253358840942383, "learning_rate": 3.841302100285826e-06, "loss": 0.0745, "step": 35126 }, { "epoch": 0.7740336148341569, "grad_norm": 0.4243527054786682, "learning_rate": 3.840586715431808e-06, "loss": 0.0753, "step": 35127 }, { "epoch": 0.7740556501236731, "grad_norm": 0.5126352906227112, "learning_rate": 3.8398713874186705e-06, "loss": 0.0504, "step": 35128 }, { "epoch": 0.7740776854131892, "grad_norm": 0.5244626998901367, "learning_rate": 3.839156116250049e-06, "loss": 0.0853, "step": 35129 }, { "epoch": 0.7740997207027054, "grad_norm": 0.5552296042442322, "learning_rate": 3.8384409019295945e-06, "loss": 0.077, "step": 35130 }, { "epoch": 0.7741217559922215, "grad_norm": 0.4863348603248596, "learning_rate": 3.837725744460937e-06, "loss": 0.0643, "step": 35131 }, { "epoch": 0.7741437912817377, "grad_norm": 0.47553375363349915, "learning_rate": 3.837010643847737e-06, "loss": 0.0683, "step": 35132 }, { "epoch": 0.7741658265712539, "grad_norm": 0.3863354027271271, "learning_rate": 3.8362956000936235e-06, "loss": 0.0696, "step": 35133 }, { "epoch": 0.77418786186077, "grad_norm": 0.4233926832675934, "learning_rate": 3.835580613202246e-06, "loss": 0.041, "step": 35134 }, { "epoch": 0.7742098971502862, "grad_norm": 0.9254252314567566, "learning_rate": 3.834865683177235e-06, "loss": 0.0696, "step": 35135 }, { "epoch": 0.7742319324398024, "grad_norm": 0.865892231464386, "learning_rate": 3.834150810022251e-06, "loss": 0.0758, "step": 35136 }, { "epoch": 0.7742539677293185, "grad_norm": 0.7854296565055847, "learning_rate": 3.833435993740923e-06, "loss": 0.0819, "step": 35137 }, { "epoch": 0.7742760030188347, "grad_norm": 0.5075185298919678, "learning_rate": 3.83272123433689e-06, "loss": 0.0658, "step": 35138 }, { "epoch": 0.7742980383083509, "grad_norm": 0.6882274150848389, "learning_rate": 3.8320065318137945e-06, "loss": 0.0526, "step": 35139 }, { "epoch": 0.774320073597867, "grad_norm": 0.6436210870742798, "learning_rate": 3.8312918861752796e-06, "loss": 0.0541, "step": 35140 }, { "epoch": 0.7743421088873832, "grad_norm": 1.0122222900390625, "learning_rate": 3.830577297424987e-06, "loss": 0.0749, "step": 35141 }, { "epoch": 0.7743641441768992, "grad_norm": 0.5396388173103333, "learning_rate": 3.829862765566547e-06, "loss": 0.0475, "step": 35142 }, { "epoch": 0.7743861794664154, "grad_norm": 0.7740739583969116, "learning_rate": 3.829148290603614e-06, "loss": 0.0692, "step": 35143 }, { "epoch": 0.7744082147559316, "grad_norm": 0.6768497228622437, "learning_rate": 3.828433872539814e-06, "loss": 0.0591, "step": 35144 }, { "epoch": 0.7744302500454477, "grad_norm": 0.3596385419368744, "learning_rate": 3.827719511378796e-06, "loss": 0.0438, "step": 35145 }, { "epoch": 0.7744522853349639, "grad_norm": 0.6570671796798706, "learning_rate": 3.82700520712419e-06, "loss": 0.0833, "step": 35146 }, { "epoch": 0.7744743206244801, "grad_norm": 0.5849577188491821, "learning_rate": 3.826290959779636e-06, "loss": 0.0377, "step": 35147 }, { "epoch": 0.7744963559139962, "grad_norm": 0.5130072236061096, "learning_rate": 3.8255767693487795e-06, "loss": 0.0462, "step": 35148 }, { "epoch": 0.7745183912035124, "grad_norm": 0.5893097519874573, "learning_rate": 3.824862635835249e-06, "loss": 0.0523, "step": 35149 }, { "epoch": 0.7745404264930286, "grad_norm": 0.7059844732284546, "learning_rate": 3.824148559242684e-06, "loss": 0.0608, "step": 35150 }, { "epoch": 0.7745624617825447, "grad_norm": 0.9320589303970337, "learning_rate": 3.823434539574725e-06, "loss": 0.0787, "step": 35151 }, { "epoch": 0.7745844970720609, "grad_norm": 1.2645823955535889, "learning_rate": 3.822720576835011e-06, "loss": 0.1027, "step": 35152 }, { "epoch": 0.7746065323615771, "grad_norm": 0.33738330006599426, "learning_rate": 3.82200667102717e-06, "loss": 0.0386, "step": 35153 }, { "epoch": 0.7746285676510932, "grad_norm": 0.3690027594566345, "learning_rate": 3.8212928221548424e-06, "loss": 0.0474, "step": 35154 }, { "epoch": 0.7746506029406094, "grad_norm": 0.43130236864089966, "learning_rate": 3.820579030221671e-06, "loss": 0.0471, "step": 35155 }, { "epoch": 0.7746726382301256, "grad_norm": 0.7019442319869995, "learning_rate": 3.819865295231281e-06, "loss": 0.1076, "step": 35156 }, { "epoch": 0.7746946735196417, "grad_norm": 0.6500954627990723, "learning_rate": 3.819151617187316e-06, "loss": 0.055, "step": 35157 }, { "epoch": 0.7747167088091579, "grad_norm": 0.5966055393218994, "learning_rate": 3.818437996093399e-06, "loss": 0.0662, "step": 35158 }, { "epoch": 0.774738744098674, "grad_norm": 0.7132899761199951, "learning_rate": 3.8177244319531815e-06, "loss": 0.0392, "step": 35159 }, { "epoch": 0.7747607793881902, "grad_norm": 0.5137353539466858, "learning_rate": 3.817010924770286e-06, "loss": 0.069, "step": 35160 }, { "epoch": 0.7747828146777064, "grad_norm": 0.5823354125022888, "learning_rate": 3.816297474548356e-06, "loss": 0.0738, "step": 35161 }, { "epoch": 0.7748048499672225, "grad_norm": 0.8463872671127319, "learning_rate": 3.815584081291014e-06, "loss": 0.0817, "step": 35162 }, { "epoch": 0.7748268852567387, "grad_norm": 0.47520846128463745, "learning_rate": 3.8148707450019e-06, "loss": 0.0656, "step": 35163 }, { "epoch": 0.7748489205462549, "grad_norm": 0.7675325274467468, "learning_rate": 3.814157465684653e-06, "loss": 0.045, "step": 35164 }, { "epoch": 0.774870955835771, "grad_norm": 0.5224699378013611, "learning_rate": 3.8134442433428902e-06, "loss": 0.0563, "step": 35165 }, { "epoch": 0.7748929911252872, "grad_norm": 0.7300729155540466, "learning_rate": 3.812731077980264e-06, "loss": 0.0899, "step": 35166 }, { "epoch": 0.7749150264148033, "grad_norm": 0.5057620406150818, "learning_rate": 3.8120179696003926e-06, "loss": 0.0593, "step": 35167 }, { "epoch": 0.7749370617043194, "grad_norm": 0.7077125310897827, "learning_rate": 3.811304918206919e-06, "loss": 0.057, "step": 35168 }, { "epoch": 0.7749590969938356, "grad_norm": 0.8472453951835632, "learning_rate": 3.810591923803464e-06, "loss": 0.0535, "step": 35169 }, { "epoch": 0.7749811322833517, "grad_norm": 0.7675859928131104, "learning_rate": 3.8098789863936644e-06, "loss": 0.0545, "step": 35170 }, { "epoch": 0.7750031675728679, "grad_norm": 0.731330394744873, "learning_rate": 3.809166105981157e-06, "loss": 0.0805, "step": 35171 }, { "epoch": 0.7750252028623841, "grad_norm": 0.487142950296402, "learning_rate": 3.8084532825695612e-06, "loss": 0.0568, "step": 35172 }, { "epoch": 0.7750472381519002, "grad_norm": 0.5290395021438599, "learning_rate": 3.807740516162515e-06, "loss": 0.0691, "step": 35173 }, { "epoch": 0.7750692734414164, "grad_norm": 0.6266445517539978, "learning_rate": 3.807027806763649e-06, "loss": 0.0866, "step": 35174 }, { "epoch": 0.7750913087309326, "grad_norm": 0.7336678504943848, "learning_rate": 3.8063151543765964e-06, "loss": 0.0524, "step": 35175 }, { "epoch": 0.7751133440204487, "grad_norm": 0.41228118538856506, "learning_rate": 3.8056025590049794e-06, "loss": 0.0406, "step": 35176 }, { "epoch": 0.7751353793099649, "grad_norm": 0.5163472890853882, "learning_rate": 3.8048900206524318e-06, "loss": 0.0622, "step": 35177 }, { "epoch": 0.7751574145994811, "grad_norm": 1.0436103343963623, "learning_rate": 3.804177539322586e-06, "loss": 0.0838, "step": 35178 }, { "epoch": 0.7751794498889972, "grad_norm": 0.5190848112106323, "learning_rate": 3.8034651150190636e-06, "loss": 0.0598, "step": 35179 }, { "epoch": 0.7752014851785134, "grad_norm": 0.7425925135612488, "learning_rate": 3.802752747745502e-06, "loss": 0.0711, "step": 35180 }, { "epoch": 0.7752235204680296, "grad_norm": 0.8388394117355347, "learning_rate": 3.802040437505515e-06, "loss": 0.0649, "step": 35181 }, { "epoch": 0.7752455557575457, "grad_norm": 0.501430094242096, "learning_rate": 3.8013281843027513e-06, "loss": 0.0679, "step": 35182 }, { "epoch": 0.7752675910470619, "grad_norm": 0.5212440490722656, "learning_rate": 3.800615988140823e-06, "loss": 0.087, "step": 35183 }, { "epoch": 0.775289626336578, "grad_norm": 0.39911502599716187, "learning_rate": 3.7999038490233672e-06, "loss": 0.0714, "step": 35184 }, { "epoch": 0.7753116616260942, "grad_norm": 0.47576770186424255, "learning_rate": 3.7991917669540043e-06, "loss": 0.0489, "step": 35185 }, { "epoch": 0.7753336969156104, "grad_norm": 0.3628336787223816, "learning_rate": 3.798479741936363e-06, "loss": 0.081, "step": 35186 }, { "epoch": 0.7753557322051265, "grad_norm": 0.7214605212211609, "learning_rate": 3.7977677739740758e-06, "loss": 0.076, "step": 35187 }, { "epoch": 0.7753777674946427, "grad_norm": 0.7724708318710327, "learning_rate": 3.7970558630707557e-06, "loss": 0.083, "step": 35188 }, { "epoch": 0.7753998027841589, "grad_norm": 1.0696080923080444, "learning_rate": 3.7963440092300455e-06, "loss": 0.0784, "step": 35189 }, { "epoch": 0.775421838073675, "grad_norm": 0.7699801325798035, "learning_rate": 3.7956322124555586e-06, "loss": 0.0838, "step": 35190 }, { "epoch": 0.7754438733631912, "grad_norm": 0.5289475917816162, "learning_rate": 3.7949204727509307e-06, "loss": 0.0717, "step": 35191 }, { "epoch": 0.7754659086527073, "grad_norm": 0.555072546005249, "learning_rate": 3.7942087901197767e-06, "loss": 0.0631, "step": 35192 }, { "epoch": 0.7754879439422234, "grad_norm": 0.2861631214618683, "learning_rate": 3.793497164565726e-06, "loss": 0.0598, "step": 35193 }, { "epoch": 0.7755099792317396, "grad_norm": 0.7789902091026306, "learning_rate": 3.792785596092409e-06, "loss": 0.0922, "step": 35194 }, { "epoch": 0.7755320145212558, "grad_norm": 0.3694579303264618, "learning_rate": 3.792074084703439e-06, "loss": 0.0666, "step": 35195 }, { "epoch": 0.7755540498107719, "grad_norm": 0.5729594826698303, "learning_rate": 3.791362630402447e-06, "loss": 0.0556, "step": 35196 }, { "epoch": 0.7755760851002881, "grad_norm": 0.5429752469062805, "learning_rate": 3.7906512331930557e-06, "loss": 0.0655, "step": 35197 }, { "epoch": 0.7755981203898042, "grad_norm": 0.46917954087257385, "learning_rate": 3.789939893078892e-06, "loss": 0.0466, "step": 35198 }, { "epoch": 0.7756201556793204, "grad_norm": 0.6434080004692078, "learning_rate": 3.789228610063572e-06, "loss": 0.052, "step": 35199 }, { "epoch": 0.7756421909688366, "grad_norm": 0.5942353010177612, "learning_rate": 3.7885173841507217e-06, "loss": 0.047, "step": 35200 }, { "epoch": 0.7756642262583527, "grad_norm": 0.8586161732673645, "learning_rate": 3.7878062153439692e-06, "loss": 0.0927, "step": 35201 }, { "epoch": 0.7756862615478689, "grad_norm": 0.4988383948802948, "learning_rate": 3.787095103646929e-06, "loss": 0.0481, "step": 35202 }, { "epoch": 0.7757082968373851, "grad_norm": 0.5881170630455017, "learning_rate": 3.786384049063229e-06, "loss": 0.0775, "step": 35203 }, { "epoch": 0.7757303321269012, "grad_norm": 0.5098129510879517, "learning_rate": 3.7856730515964804e-06, "loss": 0.0484, "step": 35204 }, { "epoch": 0.7757523674164174, "grad_norm": 0.5367980599403381, "learning_rate": 3.7849621112503203e-06, "loss": 0.075, "step": 35205 }, { "epoch": 0.7757744027059336, "grad_norm": 0.5198763608932495, "learning_rate": 3.7842512280283587e-06, "loss": 0.0587, "step": 35206 }, { "epoch": 0.7757964379954497, "grad_norm": 0.9662213325500488, "learning_rate": 3.7835404019342246e-06, "loss": 0.0984, "step": 35207 }, { "epoch": 0.7758184732849659, "grad_norm": 0.7351272106170654, "learning_rate": 3.7828296329715285e-06, "loss": 0.066, "step": 35208 }, { "epoch": 0.7758405085744821, "grad_norm": 0.6211676597595215, "learning_rate": 3.7821189211438974e-06, "loss": 0.062, "step": 35209 }, { "epoch": 0.7758625438639982, "grad_norm": 0.3343605697154999, "learning_rate": 3.7814082664549533e-06, "loss": 0.0361, "step": 35210 }, { "epoch": 0.7758845791535144, "grad_norm": 0.4390740990638733, "learning_rate": 3.7806976689083036e-06, "loss": 0.0512, "step": 35211 }, { "epoch": 0.7759066144430306, "grad_norm": 0.5781403183937073, "learning_rate": 3.7799871285075867e-06, "loss": 0.0446, "step": 35212 }, { "epoch": 0.7759286497325467, "grad_norm": 1.2409802675247192, "learning_rate": 3.779276645256408e-06, "loss": 0.0882, "step": 35213 }, { "epoch": 0.7759506850220629, "grad_norm": 0.617357611656189, "learning_rate": 3.7785662191583936e-06, "loss": 0.0564, "step": 35214 }, { "epoch": 0.775972720311579, "grad_norm": 0.48238182067871094, "learning_rate": 3.777855850217157e-06, "loss": 0.0715, "step": 35215 }, { "epoch": 0.7759947556010951, "grad_norm": 0.45097240805625916, "learning_rate": 3.7771455384363164e-06, "loss": 0.0523, "step": 35216 }, { "epoch": 0.7760167908906113, "grad_norm": 0.30842962861061096, "learning_rate": 3.776435283819496e-06, "loss": 0.0239, "step": 35217 }, { "epoch": 0.7760388261801274, "grad_norm": 0.6378111243247986, "learning_rate": 3.775725086370305e-06, "loss": 0.0632, "step": 35218 }, { "epoch": 0.7760608614696436, "grad_norm": 0.34372854232788086, "learning_rate": 3.7750149460923644e-06, "loss": 0.0708, "step": 35219 }, { "epoch": 0.7760828967591598, "grad_norm": 0.4992545545101166, "learning_rate": 3.774304862989294e-06, "loss": 0.0628, "step": 35220 }, { "epoch": 0.7761049320486759, "grad_norm": 0.7706360816955566, "learning_rate": 3.7735948370647103e-06, "loss": 0.0812, "step": 35221 }, { "epoch": 0.7761269673381921, "grad_norm": 0.7834388613700867, "learning_rate": 3.7728848683222262e-06, "loss": 0.0693, "step": 35222 }, { "epoch": 0.7761490026277083, "grad_norm": 0.3424528241157532, "learning_rate": 3.7721749567654597e-06, "loss": 0.0813, "step": 35223 }, { "epoch": 0.7761710379172244, "grad_norm": 0.5350512266159058, "learning_rate": 3.7714651023980297e-06, "loss": 0.0395, "step": 35224 }, { "epoch": 0.7761930732067406, "grad_norm": 0.44038161635398865, "learning_rate": 3.7707553052235468e-06, "loss": 0.0469, "step": 35225 }, { "epoch": 0.7762151084962567, "grad_norm": 0.5088726878166199, "learning_rate": 3.770045565245631e-06, "loss": 0.0488, "step": 35226 }, { "epoch": 0.7762371437857729, "grad_norm": 0.8113804459571838, "learning_rate": 3.769335882467887e-06, "loss": 0.0594, "step": 35227 }, { "epoch": 0.7762591790752891, "grad_norm": 0.8605188131332397, "learning_rate": 3.7686262568939477e-06, "loss": 0.0874, "step": 35228 }, { "epoch": 0.7762812143648052, "grad_norm": 0.4905060827732086, "learning_rate": 3.7679166885274126e-06, "loss": 0.0516, "step": 35229 }, { "epoch": 0.7763032496543214, "grad_norm": 0.7684768438339233, "learning_rate": 3.7672071773718998e-06, "loss": 0.074, "step": 35230 }, { "epoch": 0.7763252849438376, "grad_norm": 0.5359030365943909, "learning_rate": 3.76649772343103e-06, "loss": 0.0494, "step": 35231 }, { "epoch": 0.7763473202333537, "grad_norm": 0.3265620768070221, "learning_rate": 3.765788326708407e-06, "loss": 0.0634, "step": 35232 }, { "epoch": 0.7763693555228699, "grad_norm": 0.350309818983078, "learning_rate": 3.7650789872076513e-06, "loss": 0.0347, "step": 35233 }, { "epoch": 0.7763913908123861, "grad_norm": 0.6627348065376282, "learning_rate": 3.7643697049323666e-06, "loss": 0.0522, "step": 35234 }, { "epoch": 0.7764134261019022, "grad_norm": 0.5562392473220825, "learning_rate": 3.7636604798861785e-06, "loss": 0.0658, "step": 35235 }, { "epoch": 0.7764354613914184, "grad_norm": 0.648120641708374, "learning_rate": 3.7629513120726888e-06, "loss": 0.0495, "step": 35236 }, { "epoch": 0.7764574966809346, "grad_norm": 0.7042259573936462, "learning_rate": 3.76224220149552e-06, "loss": 0.0671, "step": 35237 }, { "epoch": 0.7764795319704507, "grad_norm": 0.621997058391571, "learning_rate": 3.761533148158272e-06, "loss": 0.0933, "step": 35238 }, { "epoch": 0.7765015672599669, "grad_norm": 0.8008533716201782, "learning_rate": 3.7608241520645625e-06, "loss": 0.0571, "step": 35239 }, { "epoch": 0.776523602549483, "grad_norm": 0.48333942890167236, "learning_rate": 3.7601152132180066e-06, "loss": 0.0596, "step": 35240 }, { "epoch": 0.7765456378389991, "grad_norm": 0.8305522799491882, "learning_rate": 3.7594063316222064e-06, "loss": 0.0564, "step": 35241 }, { "epoch": 0.7765676731285153, "grad_norm": 0.3674291968345642, "learning_rate": 3.7586975072807796e-06, "loss": 0.0629, "step": 35242 }, { "epoch": 0.7765897084180314, "grad_norm": 0.5793560743331909, "learning_rate": 3.7579887401973325e-06, "loss": 0.0432, "step": 35243 }, { "epoch": 0.7766117437075476, "grad_norm": 0.2921743094921112, "learning_rate": 3.7572800303754823e-06, "loss": 0.0502, "step": 35244 }, { "epoch": 0.7766337789970638, "grad_norm": 0.7937895655632019, "learning_rate": 3.756571377818828e-06, "loss": 0.0591, "step": 35245 }, { "epoch": 0.7766558142865799, "grad_norm": 0.6580108404159546, "learning_rate": 3.755862782530987e-06, "loss": 0.0582, "step": 35246 }, { "epoch": 0.7766778495760961, "grad_norm": 0.666587769985199, "learning_rate": 3.7551542445155696e-06, "loss": 0.0601, "step": 35247 }, { "epoch": 0.7766998848656123, "grad_norm": 1.405618667602539, "learning_rate": 3.7544457637761775e-06, "loss": 0.1135, "step": 35248 }, { "epoch": 0.7767219201551284, "grad_norm": 1.3991342782974243, "learning_rate": 3.7537373403164283e-06, "loss": 0.0757, "step": 35249 }, { "epoch": 0.7767439554446446, "grad_norm": 0.6133233904838562, "learning_rate": 3.7530289741399156e-06, "loss": 0.0524, "step": 35250 }, { "epoch": 0.7767659907341607, "grad_norm": 0.6014172434806824, "learning_rate": 3.752320665250266e-06, "loss": 0.0499, "step": 35251 }, { "epoch": 0.7767880260236769, "grad_norm": 0.8633544445037842, "learning_rate": 3.751612413651076e-06, "loss": 0.067, "step": 35252 }, { "epoch": 0.7768100613131931, "grad_norm": 0.3579392433166504, "learning_rate": 3.7509042193459555e-06, "loss": 0.0698, "step": 35253 }, { "epoch": 0.7768320966027092, "grad_norm": 0.6970821022987366, "learning_rate": 3.7501960823385113e-06, "loss": 0.0709, "step": 35254 }, { "epoch": 0.7768541318922254, "grad_norm": 0.6385515928268433, "learning_rate": 3.749488002632356e-06, "loss": 0.0457, "step": 35255 }, { "epoch": 0.7768761671817416, "grad_norm": 0.7794253826141357, "learning_rate": 3.748779980231086e-06, "loss": 0.0506, "step": 35256 }, { "epoch": 0.7768982024712577, "grad_norm": 0.5849298238754272, "learning_rate": 3.7480720151383145e-06, "loss": 0.0489, "step": 35257 }, { "epoch": 0.7769202377607739, "grad_norm": 0.5410946011543274, "learning_rate": 3.747364107357648e-06, "loss": 0.0633, "step": 35258 }, { "epoch": 0.7769422730502901, "grad_norm": 0.7059450745582581, "learning_rate": 3.7466562568926886e-06, "loss": 0.0702, "step": 35259 }, { "epoch": 0.7769643083398062, "grad_norm": 0.5770438313484192, "learning_rate": 3.745948463747047e-06, "loss": 0.078, "step": 35260 }, { "epoch": 0.7769863436293224, "grad_norm": 0.5026628971099854, "learning_rate": 3.745240727924315e-06, "loss": 0.0633, "step": 35261 }, { "epoch": 0.7770083789188386, "grad_norm": 0.628477156162262, "learning_rate": 3.7445330494281165e-06, "loss": 0.0584, "step": 35262 }, { "epoch": 0.7770304142083547, "grad_norm": 0.49966853857040405, "learning_rate": 3.7438254282620423e-06, "loss": 0.0338, "step": 35263 }, { "epoch": 0.7770524494978709, "grad_norm": 0.39967504143714905, "learning_rate": 3.7431178644297055e-06, "loss": 0.0607, "step": 35264 }, { "epoch": 0.777074484787387, "grad_norm": 0.6304724216461182, "learning_rate": 3.742410357934702e-06, "loss": 0.0593, "step": 35265 }, { "epoch": 0.7770965200769031, "grad_norm": 0.5439178347587585, "learning_rate": 3.7417029087806388e-06, "loss": 0.0507, "step": 35266 }, { "epoch": 0.7771185553664193, "grad_norm": 0.6313412189483643, "learning_rate": 3.7409955169711246e-06, "loss": 0.0796, "step": 35267 }, { "epoch": 0.7771405906559354, "grad_norm": 0.47376853227615356, "learning_rate": 3.7402881825097534e-06, "loss": 0.0608, "step": 35268 }, { "epoch": 0.7771626259454516, "grad_norm": 0.5359144806861877, "learning_rate": 3.7395809054001323e-06, "loss": 0.0916, "step": 35269 }, { "epoch": 0.7771846612349678, "grad_norm": 0.657018780708313, "learning_rate": 3.7388736856458648e-06, "loss": 0.0534, "step": 35270 }, { "epoch": 0.7772066965244839, "grad_norm": 0.6236556768417358, "learning_rate": 3.738166523250557e-06, "loss": 0.0541, "step": 35271 }, { "epoch": 0.7772287318140001, "grad_norm": 0.6557815074920654, "learning_rate": 3.7374594182178003e-06, "loss": 0.0715, "step": 35272 }, { "epoch": 0.7772507671035163, "grad_norm": 0.8540046215057373, "learning_rate": 3.7367523705512018e-06, "loss": 0.0747, "step": 35273 }, { "epoch": 0.7772728023930324, "grad_norm": 0.8255083560943604, "learning_rate": 3.7360453802543693e-06, "loss": 0.055, "step": 35274 }, { "epoch": 0.7772948376825486, "grad_norm": 0.6708410382270813, "learning_rate": 3.735338447330893e-06, "loss": 0.0599, "step": 35275 }, { "epoch": 0.7773168729720648, "grad_norm": 0.5656077861785889, "learning_rate": 3.7346315717843776e-06, "loss": 0.054, "step": 35276 }, { "epoch": 0.7773389082615809, "grad_norm": 0.8260706663131714, "learning_rate": 3.7339247536184262e-06, "loss": 0.0694, "step": 35277 }, { "epoch": 0.7773609435510971, "grad_norm": 0.29764524102211, "learning_rate": 3.7332179928366404e-06, "loss": 0.0509, "step": 35278 }, { "epoch": 0.7773829788406132, "grad_norm": 0.5518829226493835, "learning_rate": 3.732511289442613e-06, "loss": 0.0544, "step": 35279 }, { "epoch": 0.7774050141301294, "grad_norm": 0.631048321723938, "learning_rate": 3.7318046434399485e-06, "loss": 0.0813, "step": 35280 }, { "epoch": 0.7774270494196456, "grad_norm": 0.7740451693534851, "learning_rate": 3.7310980548322493e-06, "loss": 0.0277, "step": 35281 }, { "epoch": 0.7774490847091617, "grad_norm": 0.43389493227005005, "learning_rate": 3.7303915236231064e-06, "loss": 0.0526, "step": 35282 }, { "epoch": 0.7774711199986779, "grad_norm": 0.3025703728199005, "learning_rate": 3.7296850498161263e-06, "loss": 0.0606, "step": 35283 }, { "epoch": 0.7774931552881941, "grad_norm": 0.31059232354164124, "learning_rate": 3.7289786334148963e-06, "loss": 0.0581, "step": 35284 }, { "epoch": 0.7775151905777102, "grad_norm": 0.43393704295158386, "learning_rate": 3.7282722744230302e-06, "loss": 0.0907, "step": 35285 }, { "epoch": 0.7775372258672264, "grad_norm": 0.4663704037666321, "learning_rate": 3.7275659728441136e-06, "loss": 0.0584, "step": 35286 }, { "epoch": 0.7775592611567426, "grad_norm": 0.6388804912567139, "learning_rate": 3.7268597286817517e-06, "loss": 0.051, "step": 35287 }, { "epoch": 0.7775812964462587, "grad_norm": 0.6833839416503906, "learning_rate": 3.7261535419395352e-06, "loss": 0.0727, "step": 35288 }, { "epoch": 0.7776033317357749, "grad_norm": 0.4524051547050476, "learning_rate": 3.725447412621063e-06, "loss": 0.0519, "step": 35289 }, { "epoch": 0.7776253670252911, "grad_norm": 0.6690260767936707, "learning_rate": 3.7247413407299384e-06, "loss": 0.0475, "step": 35290 }, { "epoch": 0.7776474023148071, "grad_norm": 0.7205527424812317, "learning_rate": 3.7240353262697472e-06, "loss": 0.0871, "step": 35291 }, { "epoch": 0.7776694376043233, "grad_norm": 0.5658147931098938, "learning_rate": 3.7233293692440917e-06, "loss": 0.0857, "step": 35292 }, { "epoch": 0.7776914728938394, "grad_norm": 0.4669645130634308, "learning_rate": 3.722623469656567e-06, "loss": 0.0658, "step": 35293 }, { "epoch": 0.7777135081833556, "grad_norm": 0.611223042011261, "learning_rate": 3.721917627510772e-06, "loss": 0.0686, "step": 35294 }, { "epoch": 0.7777355434728718, "grad_norm": 0.6190699338912964, "learning_rate": 3.7212118428102938e-06, "loss": 0.0835, "step": 35295 }, { "epoch": 0.7777575787623879, "grad_norm": 0.4678351581096649, "learning_rate": 3.7205061155587326e-06, "loss": 0.0744, "step": 35296 }, { "epoch": 0.7777796140519041, "grad_norm": 0.729803204536438, "learning_rate": 3.7198004457596862e-06, "loss": 0.0843, "step": 35297 }, { "epoch": 0.7778016493414203, "grad_norm": 0.5243672728538513, "learning_rate": 3.7190948334167414e-06, "loss": 0.0413, "step": 35298 }, { "epoch": 0.7778236846309364, "grad_norm": 0.7822141051292419, "learning_rate": 3.718389278533495e-06, "loss": 0.0746, "step": 35299 }, { "epoch": 0.7778457199204526, "grad_norm": 0.7663615942001343, "learning_rate": 3.7176837811135413e-06, "loss": 0.0476, "step": 35300 }, { "epoch": 0.7778677552099688, "grad_norm": 0.7141662836074829, "learning_rate": 3.7169783411604787e-06, "loss": 0.0472, "step": 35301 }, { "epoch": 0.7778897904994849, "grad_norm": 0.38841569423675537, "learning_rate": 3.7162729586778926e-06, "loss": 0.0344, "step": 35302 }, { "epoch": 0.7779118257890011, "grad_norm": 0.6576749086380005, "learning_rate": 3.715567633669379e-06, "loss": 0.0767, "step": 35303 }, { "epoch": 0.7779338610785173, "grad_norm": 0.5161476731300354, "learning_rate": 3.7148623661385346e-06, "loss": 0.0464, "step": 35304 }, { "epoch": 0.7779558963680334, "grad_norm": 0.41648605465888977, "learning_rate": 3.7141571560889414e-06, "loss": 0.0816, "step": 35305 }, { "epoch": 0.7779779316575496, "grad_norm": 0.950778603553772, "learning_rate": 3.713452003524205e-06, "loss": 0.0742, "step": 35306 }, { "epoch": 0.7779999669470657, "grad_norm": 1.1149911880493164, "learning_rate": 3.7127469084479e-06, "loss": 0.0739, "step": 35307 }, { "epoch": 0.7780220022365819, "grad_norm": 0.4574025869369507, "learning_rate": 3.712041870863637e-06, "loss": 0.0179, "step": 35308 }, { "epoch": 0.7780440375260981, "grad_norm": 0.8141447305679321, "learning_rate": 3.711336890774993e-06, "loss": 0.0894, "step": 35309 }, { "epoch": 0.7780660728156142, "grad_norm": 0.6372573375701904, "learning_rate": 3.710631968185567e-06, "loss": 0.063, "step": 35310 }, { "epoch": 0.7780881081051304, "grad_norm": 0.784035325050354, "learning_rate": 3.709927103098944e-06, "loss": 0.0842, "step": 35311 }, { "epoch": 0.7781101433946466, "grad_norm": 0.3961832821369171, "learning_rate": 3.7092222955187137e-06, "loss": 0.0712, "step": 35312 }, { "epoch": 0.7781321786841627, "grad_norm": 0.6911928057670593, "learning_rate": 3.7085175454484753e-06, "loss": 0.0641, "step": 35313 }, { "epoch": 0.7781542139736789, "grad_norm": 0.8436508178710938, "learning_rate": 3.7078128528918075e-06, "loss": 0.0612, "step": 35314 }, { "epoch": 0.778176249263195, "grad_norm": 0.6835862994194031, "learning_rate": 3.707108217852304e-06, "loss": 0.0575, "step": 35315 }, { "epoch": 0.7781982845527111, "grad_norm": 0.37565693259239197, "learning_rate": 3.706403640333554e-06, "loss": 0.0643, "step": 35316 }, { "epoch": 0.7782203198422273, "grad_norm": 0.6578932404518127, "learning_rate": 3.7056991203391504e-06, "loss": 0.0551, "step": 35317 }, { "epoch": 0.7782423551317434, "grad_norm": 0.4558785557746887, "learning_rate": 3.7049946578726744e-06, "loss": 0.0629, "step": 35318 }, { "epoch": 0.7782643904212596, "grad_norm": 0.9336335062980652, "learning_rate": 3.7042902529377163e-06, "loss": 0.0608, "step": 35319 }, { "epoch": 0.7782864257107758, "grad_norm": 0.7666008472442627, "learning_rate": 3.70358590553787e-06, "loss": 0.0718, "step": 35320 }, { "epoch": 0.7783084610002919, "grad_norm": 0.8923076391220093, "learning_rate": 3.702881615676714e-06, "loss": 0.081, "step": 35321 }, { "epoch": 0.7783304962898081, "grad_norm": 0.698870062828064, "learning_rate": 3.7021773833578393e-06, "loss": 0.0641, "step": 35322 }, { "epoch": 0.7783525315793243, "grad_norm": 0.5241820812225342, "learning_rate": 3.701473208584834e-06, "loss": 0.0369, "step": 35323 }, { "epoch": 0.7783745668688404, "grad_norm": 0.5935202836990356, "learning_rate": 3.700769091361289e-06, "loss": 0.0651, "step": 35324 }, { "epoch": 0.7783966021583566, "grad_norm": 0.4289218783378601, "learning_rate": 3.7000650316907815e-06, "loss": 0.0767, "step": 35325 }, { "epoch": 0.7784186374478728, "grad_norm": 0.7492213845252991, "learning_rate": 3.6993610295769032e-06, "loss": 0.0548, "step": 35326 }, { "epoch": 0.7784406727373889, "grad_norm": 0.670303225517273, "learning_rate": 3.6986570850232417e-06, "loss": 0.06, "step": 35327 }, { "epoch": 0.7784627080269051, "grad_norm": 0.4085697829723358, "learning_rate": 3.6979531980333768e-06, "loss": 0.0613, "step": 35328 }, { "epoch": 0.7784847433164213, "grad_norm": 0.4286254346370697, "learning_rate": 3.6972493686109016e-06, "loss": 0.0804, "step": 35329 }, { "epoch": 0.7785067786059374, "grad_norm": 0.8379423022270203, "learning_rate": 3.6965455967593857e-06, "loss": 0.0695, "step": 35330 }, { "epoch": 0.7785288138954536, "grad_norm": 0.43966102600097656, "learning_rate": 3.695841882482435e-06, "loss": 0.0741, "step": 35331 }, { "epoch": 0.7785508491849698, "grad_norm": 0.15040847659111023, "learning_rate": 3.6951382257836186e-06, "loss": 0.0264, "step": 35332 }, { "epoch": 0.7785728844744859, "grad_norm": 0.8258470296859741, "learning_rate": 3.6944346266665296e-06, "loss": 0.0873, "step": 35333 }, { "epoch": 0.7785949197640021, "grad_norm": 0.2948716878890991, "learning_rate": 3.6937310851347438e-06, "loss": 0.0555, "step": 35334 }, { "epoch": 0.7786169550535182, "grad_norm": 0.5745453238487244, "learning_rate": 3.6930276011918467e-06, "loss": 0.0477, "step": 35335 }, { "epoch": 0.7786389903430344, "grad_norm": 0.8082971572875977, "learning_rate": 3.6923241748414303e-06, "loss": 0.064, "step": 35336 }, { "epoch": 0.7786610256325506, "grad_norm": 0.5190466642379761, "learning_rate": 3.691620806087064e-06, "loss": 0.0665, "step": 35337 }, { "epoch": 0.7786830609220667, "grad_norm": 0.5534639954566956, "learning_rate": 3.690917494932337e-06, "loss": 0.0611, "step": 35338 }, { "epoch": 0.7787050962115829, "grad_norm": 0.600293755531311, "learning_rate": 3.6902142413808325e-06, "loss": 0.0314, "step": 35339 }, { "epoch": 0.778727131501099, "grad_norm": 0.8665978908538818, "learning_rate": 3.6895110454361357e-06, "loss": 0.0421, "step": 35340 }, { "epoch": 0.7787491667906151, "grad_norm": 0.5036479234695435, "learning_rate": 3.6888079071018203e-06, "loss": 0.0764, "step": 35341 }, { "epoch": 0.7787712020801313, "grad_norm": 0.47668153047561646, "learning_rate": 3.688104826381472e-06, "loss": 0.0538, "step": 35342 }, { "epoch": 0.7787932373696475, "grad_norm": 0.6264811158180237, "learning_rate": 3.687401803278676e-06, "loss": 0.0586, "step": 35343 }, { "epoch": 0.7788152726591636, "grad_norm": 0.6936188340187073, "learning_rate": 3.6866988377970048e-06, "loss": 0.0632, "step": 35344 }, { "epoch": 0.7788373079486798, "grad_norm": 0.5933946371078491, "learning_rate": 3.6859959299400413e-06, "loss": 0.0479, "step": 35345 }, { "epoch": 0.778859343238196, "grad_norm": 0.8065239191055298, "learning_rate": 3.685293079711369e-06, "loss": 0.0741, "step": 35346 }, { "epoch": 0.7788813785277121, "grad_norm": 0.5593334436416626, "learning_rate": 3.6845902871145704e-06, "loss": 0.0468, "step": 35347 }, { "epoch": 0.7789034138172283, "grad_norm": 0.9845864176750183, "learning_rate": 3.6838875521532183e-06, "loss": 0.0619, "step": 35348 }, { "epoch": 0.7789254491067444, "grad_norm": 0.4680812656879425, "learning_rate": 3.6831848748308933e-06, "loss": 0.0394, "step": 35349 }, { "epoch": 0.7789474843962606, "grad_norm": 0.881154477596283, "learning_rate": 3.6824822551511806e-06, "loss": 0.0737, "step": 35350 }, { "epoch": 0.7789695196857768, "grad_norm": 0.36145323514938354, "learning_rate": 3.6817796931176524e-06, "loss": 0.0636, "step": 35351 }, { "epoch": 0.7789915549752929, "grad_norm": 0.48226919770240784, "learning_rate": 3.6810771887338923e-06, "loss": 0.0539, "step": 35352 }, { "epoch": 0.7790135902648091, "grad_norm": 0.5395346879959106, "learning_rate": 3.680374742003466e-06, "loss": 0.0483, "step": 35353 }, { "epoch": 0.7790356255543253, "grad_norm": 0.8957234025001526, "learning_rate": 3.6796723529299725e-06, "loss": 0.0665, "step": 35354 }, { "epoch": 0.7790576608438414, "grad_norm": 2.251110553741455, "learning_rate": 3.678970021516972e-06, "loss": 0.0491, "step": 35355 }, { "epoch": 0.7790796961333576, "grad_norm": 0.4462164640426636, "learning_rate": 3.678267747768053e-06, "loss": 0.0523, "step": 35356 }, { "epoch": 0.7791017314228738, "grad_norm": 0.6914175748825073, "learning_rate": 3.6775655316867783e-06, "loss": 0.0711, "step": 35357 }, { "epoch": 0.7791237667123899, "grad_norm": 0.24345673620700836, "learning_rate": 3.6768633732767434e-06, "loss": 0.0561, "step": 35358 }, { "epoch": 0.7791458020019061, "grad_norm": 0.7850156426429749, "learning_rate": 3.6761612725415143e-06, "loss": 0.0424, "step": 35359 }, { "epoch": 0.7791678372914222, "grad_norm": 0.4211502969264984, "learning_rate": 3.6754592294846646e-06, "loss": 0.0573, "step": 35360 }, { "epoch": 0.7791898725809384, "grad_norm": 0.5672568082809448, "learning_rate": 3.6747572441097736e-06, "loss": 0.0827, "step": 35361 }, { "epoch": 0.7792119078704546, "grad_norm": 0.6661352515220642, "learning_rate": 3.674055316420418e-06, "loss": 0.0723, "step": 35362 }, { "epoch": 0.7792339431599707, "grad_norm": 0.4637780487537384, "learning_rate": 3.673353446420175e-06, "loss": 0.0756, "step": 35363 }, { "epoch": 0.7792559784494869, "grad_norm": 0.6723044514656067, "learning_rate": 3.672651634112608e-06, "loss": 0.0515, "step": 35364 }, { "epoch": 0.779278013739003, "grad_norm": 0.5729155540466309, "learning_rate": 3.671949879501309e-06, "loss": 0.058, "step": 35365 }, { "epoch": 0.7793000490285191, "grad_norm": 0.4628317952156067, "learning_rate": 3.6712481825898413e-06, "loss": 0.0615, "step": 35366 }, { "epoch": 0.7793220843180353, "grad_norm": 0.6654835939407349, "learning_rate": 3.6705465433817842e-06, "loss": 0.0666, "step": 35367 }, { "epoch": 0.7793441196075515, "grad_norm": 0.7047665119171143, "learning_rate": 3.6698449618807046e-06, "loss": 0.0985, "step": 35368 }, { "epoch": 0.7793661548970676, "grad_norm": 0.32815465331077576, "learning_rate": 3.669143438090181e-06, "loss": 0.0524, "step": 35369 }, { "epoch": 0.7793881901865838, "grad_norm": 0.5647879838943481, "learning_rate": 3.66844197201379e-06, "loss": 0.0604, "step": 35370 }, { "epoch": 0.7794102254761, "grad_norm": 0.2836674451828003, "learning_rate": 3.667740563655097e-06, "loss": 0.0583, "step": 35371 }, { "epoch": 0.7794322607656161, "grad_norm": 0.7304143905639648, "learning_rate": 3.667039213017677e-06, "loss": 0.0643, "step": 35372 }, { "epoch": 0.7794542960551323, "grad_norm": 0.4258618652820587, "learning_rate": 3.666337920105104e-06, "loss": 0.0705, "step": 35373 }, { "epoch": 0.7794763313446484, "grad_norm": 0.18731902539730072, "learning_rate": 3.665636684920953e-06, "loss": 0.0639, "step": 35374 }, { "epoch": 0.7794983666341646, "grad_norm": 0.44513368606567383, "learning_rate": 3.664935507468789e-06, "loss": 0.035, "step": 35375 }, { "epoch": 0.7795204019236808, "grad_norm": 0.362081915140152, "learning_rate": 3.6642343877521868e-06, "loss": 0.0568, "step": 35376 }, { "epoch": 0.7795424372131969, "grad_norm": 0.6491819024085999, "learning_rate": 3.663533325774721e-06, "loss": 0.0508, "step": 35377 }, { "epoch": 0.7795644725027131, "grad_norm": 0.6463919878005981, "learning_rate": 3.662832321539956e-06, "loss": 0.0642, "step": 35378 }, { "epoch": 0.7795865077922293, "grad_norm": 0.8154104351997375, "learning_rate": 3.662131375051469e-06, "loss": 0.0766, "step": 35379 }, { "epoch": 0.7796085430817454, "grad_norm": 0.8197368383407593, "learning_rate": 3.6614304863128182e-06, "loss": 0.0884, "step": 35380 }, { "epoch": 0.7796305783712616, "grad_norm": 0.2905316650867462, "learning_rate": 3.6607296553275913e-06, "loss": 0.0798, "step": 35381 }, { "epoch": 0.7796526136607778, "grad_norm": 0.6362363696098328, "learning_rate": 3.660028882099344e-06, "loss": 0.0463, "step": 35382 }, { "epoch": 0.7796746489502939, "grad_norm": 0.5555927753448486, "learning_rate": 3.659328166631656e-06, "loss": 0.06, "step": 35383 }, { "epoch": 0.7796966842398101, "grad_norm": 0.4562956988811493, "learning_rate": 3.6586275089280845e-06, "loss": 0.0696, "step": 35384 }, { "epoch": 0.7797187195293263, "grad_norm": 0.8825253248214722, "learning_rate": 3.6579269089922076e-06, "loss": 0.0487, "step": 35385 }, { "epoch": 0.7797407548188424, "grad_norm": 0.3967874348163605, "learning_rate": 3.6572263668275927e-06, "loss": 0.0403, "step": 35386 }, { "epoch": 0.7797627901083586, "grad_norm": 0.9406455159187317, "learning_rate": 3.6565258824377984e-06, "loss": 0.0928, "step": 35387 }, { "epoch": 0.7797848253978747, "grad_norm": 0.492808073759079, "learning_rate": 3.655825455826411e-06, "loss": 0.0434, "step": 35388 }, { "epoch": 0.7798068606873908, "grad_norm": 0.5102498531341553, "learning_rate": 3.655125086996981e-06, "loss": 0.04, "step": 35389 }, { "epoch": 0.779828895976907, "grad_norm": 0.704963207244873, "learning_rate": 3.654424775953089e-06, "loss": 0.0545, "step": 35390 }, { "epoch": 0.7798509312664231, "grad_norm": 0.34755924344062805, "learning_rate": 3.6537245226982906e-06, "loss": 0.0598, "step": 35391 }, { "epoch": 0.7798729665559393, "grad_norm": 0.5812214016914368, "learning_rate": 3.6530243272361564e-06, "loss": 0.0366, "step": 35392 }, { "epoch": 0.7798950018454555, "grad_norm": 0.4435194432735443, "learning_rate": 3.6523241895702596e-06, "loss": 0.0615, "step": 35393 }, { "epoch": 0.7799170371349716, "grad_norm": 0.5646126866340637, "learning_rate": 3.6516241097041563e-06, "loss": 0.0594, "step": 35394 }, { "epoch": 0.7799390724244878, "grad_norm": 0.42382901906967163, "learning_rate": 3.650924087641417e-06, "loss": 0.0638, "step": 35395 }, { "epoch": 0.779961107714004, "grad_norm": 0.4583705961704254, "learning_rate": 3.6502241233856064e-06, "loss": 0.0588, "step": 35396 }, { "epoch": 0.7799831430035201, "grad_norm": 0.4704926311969757, "learning_rate": 3.649524216940296e-06, "loss": 0.0547, "step": 35397 }, { "epoch": 0.7800051782930363, "grad_norm": 0.8477996587753296, "learning_rate": 3.6488243683090404e-06, "loss": 0.0504, "step": 35398 }, { "epoch": 0.7800272135825524, "grad_norm": 0.4213114380836487, "learning_rate": 3.648124577495409e-06, "loss": 0.0498, "step": 35399 }, { "epoch": 0.7800492488720686, "grad_norm": 0.30304110050201416, "learning_rate": 3.6474248445029723e-06, "loss": 0.047, "step": 35400 }, { "epoch": 0.7800712841615848, "grad_norm": 0.3697417378425598, "learning_rate": 3.646725169335282e-06, "loss": 0.0245, "step": 35401 }, { "epoch": 0.7800933194511009, "grad_norm": 0.8558924198150635, "learning_rate": 3.646025551995914e-06, "loss": 0.0822, "step": 35402 }, { "epoch": 0.7801153547406171, "grad_norm": 0.6680425405502319, "learning_rate": 3.6453259924884186e-06, "loss": 0.0695, "step": 35403 }, { "epoch": 0.7801373900301333, "grad_norm": 0.7704551219940186, "learning_rate": 3.6446264908163746e-06, "loss": 0.0743, "step": 35404 }, { "epoch": 0.7801594253196494, "grad_norm": 0.5364981293678284, "learning_rate": 3.6439270469833324e-06, "loss": 0.07, "step": 35405 }, { "epoch": 0.7801814606091656, "grad_norm": 0.39050570130348206, "learning_rate": 3.6432276609928646e-06, "loss": 0.0301, "step": 35406 }, { "epoch": 0.7802034958986818, "grad_norm": 0.43132784962654114, "learning_rate": 3.6425283328485226e-06, "loss": 0.0584, "step": 35407 }, { "epoch": 0.7802255311881979, "grad_norm": 0.58707195520401, "learning_rate": 3.641829062553874e-06, "loss": 0.0614, "step": 35408 }, { "epoch": 0.7802475664777141, "grad_norm": 0.3862864375114441, "learning_rate": 3.6411298501124862e-06, "loss": 0.0584, "step": 35409 }, { "epoch": 0.7802696017672303, "grad_norm": 0.7297109365463257, "learning_rate": 3.6404306955279078e-06, "loss": 0.089, "step": 35410 }, { "epoch": 0.7802916370567464, "grad_norm": 1.0118865966796875, "learning_rate": 3.6397315988037137e-06, "loss": 0.0691, "step": 35411 }, { "epoch": 0.7803136723462626, "grad_norm": 0.6603575944900513, "learning_rate": 3.6390325599434552e-06, "loss": 0.0666, "step": 35412 }, { "epoch": 0.7803357076357788, "grad_norm": 0.698651134967804, "learning_rate": 3.638333578950699e-06, "loss": 0.0615, "step": 35413 }, { "epoch": 0.7803577429252948, "grad_norm": 0.5123671889305115, "learning_rate": 3.6376346558289998e-06, "loss": 0.0988, "step": 35414 }, { "epoch": 0.780379778214811, "grad_norm": 0.5085645318031311, "learning_rate": 3.63693579058192e-06, "loss": 0.0466, "step": 35415 }, { "epoch": 0.7804018135043271, "grad_norm": 0.6944042444229126, "learning_rate": 3.636236983213024e-06, "loss": 0.0798, "step": 35416 }, { "epoch": 0.7804238487938433, "grad_norm": 0.4759806990623474, "learning_rate": 3.6355382337258613e-06, "loss": 0.0506, "step": 35417 }, { "epoch": 0.7804458840833595, "grad_norm": 0.5656910538673401, "learning_rate": 3.6348395421239984e-06, "loss": 0.0934, "step": 35418 }, { "epoch": 0.7804679193728756, "grad_norm": 0.7602187991142273, "learning_rate": 3.6341409084109904e-06, "loss": 0.078, "step": 35419 }, { "epoch": 0.7804899546623918, "grad_norm": 0.2297133356332779, "learning_rate": 3.6334423325904036e-06, "loss": 0.0416, "step": 35420 }, { "epoch": 0.780511989951908, "grad_norm": 0.3895505964756012, "learning_rate": 3.6327438146657847e-06, "loss": 0.0422, "step": 35421 }, { "epoch": 0.7805340252414241, "grad_norm": 1.0322550535202026, "learning_rate": 3.632045354640698e-06, "loss": 0.0573, "step": 35422 }, { "epoch": 0.7805560605309403, "grad_norm": 0.6335294246673584, "learning_rate": 3.631346952518703e-06, "loss": 0.07, "step": 35423 }, { "epoch": 0.7805780958204565, "grad_norm": 0.596524178981781, "learning_rate": 3.6306486083033515e-06, "loss": 0.0562, "step": 35424 }, { "epoch": 0.7806001311099726, "grad_norm": 0.6212809681892395, "learning_rate": 3.6299503219982077e-06, "loss": 0.0522, "step": 35425 }, { "epoch": 0.7806221663994888, "grad_norm": 0.6645025014877319, "learning_rate": 3.6292520936068147e-06, "loss": 0.0427, "step": 35426 }, { "epoch": 0.780644201689005, "grad_norm": 0.7350146770477295, "learning_rate": 3.628553923132746e-06, "loss": 0.0631, "step": 35427 }, { "epoch": 0.7806662369785211, "grad_norm": 0.5884225368499756, "learning_rate": 3.6278558105795467e-06, "loss": 0.056, "step": 35428 }, { "epoch": 0.7806882722680373, "grad_norm": 0.3565557301044464, "learning_rate": 3.6271577559507813e-06, "loss": 0.0439, "step": 35429 }, { "epoch": 0.7807103075575534, "grad_norm": 0.7108167409896851, "learning_rate": 3.6264597592499947e-06, "loss": 0.0776, "step": 35430 }, { "epoch": 0.7807323428470696, "grad_norm": 0.31557929515838623, "learning_rate": 3.625761820480748e-06, "loss": 0.0375, "step": 35431 }, { "epoch": 0.7807543781365858, "grad_norm": 0.48667922616004944, "learning_rate": 3.6250639396465995e-06, "loss": 0.0657, "step": 35432 }, { "epoch": 0.7807764134261019, "grad_norm": 0.4604891240596771, "learning_rate": 3.6243661167510915e-06, "loss": 0.0831, "step": 35433 }, { "epoch": 0.7807984487156181, "grad_norm": 0.38439732789993286, "learning_rate": 3.6236683517977964e-06, "loss": 0.0579, "step": 35434 }, { "epoch": 0.7808204840051343, "grad_norm": 0.859812319278717, "learning_rate": 3.6229706447902526e-06, "loss": 0.0395, "step": 35435 }, { "epoch": 0.7808425192946504, "grad_norm": 0.8184766173362732, "learning_rate": 3.622272995732026e-06, "loss": 0.0534, "step": 35436 }, { "epoch": 0.7808645545841666, "grad_norm": 0.7960696220397949, "learning_rate": 3.6215754046266603e-06, "loss": 0.0645, "step": 35437 }, { "epoch": 0.7808865898736828, "grad_norm": 0.9155954718589783, "learning_rate": 3.6208778714777124e-06, "loss": 0.0978, "step": 35438 }, { "epoch": 0.7809086251631988, "grad_norm": 0.6732118725776672, "learning_rate": 3.620180396288738e-06, "loss": 0.0714, "step": 35439 }, { "epoch": 0.780930660452715, "grad_norm": 0.7406956553459167, "learning_rate": 3.6194829790632844e-06, "loss": 0.0517, "step": 35440 }, { "epoch": 0.7809526957422311, "grad_norm": 0.7353137731552124, "learning_rate": 3.618785619804907e-06, "loss": 0.0634, "step": 35441 }, { "epoch": 0.7809747310317473, "grad_norm": 0.7800627946853638, "learning_rate": 3.6180883185171576e-06, "loss": 0.0443, "step": 35442 }, { "epoch": 0.7809967663212635, "grad_norm": 0.343638151884079, "learning_rate": 3.617391075203592e-06, "loss": 0.0401, "step": 35443 }, { "epoch": 0.7810188016107796, "grad_norm": 0.6471811532974243, "learning_rate": 3.6166938898677524e-06, "loss": 0.0542, "step": 35444 }, { "epoch": 0.7810408369002958, "grad_norm": 0.32345688343048096, "learning_rate": 3.615996762513196e-06, "loss": 0.0305, "step": 35445 }, { "epoch": 0.781062872189812, "grad_norm": 0.5363319516181946, "learning_rate": 3.6152996931434768e-06, "loss": 0.0661, "step": 35446 }, { "epoch": 0.7810849074793281, "grad_norm": 0.4149020314216614, "learning_rate": 3.6146026817621365e-06, "loss": 0.038, "step": 35447 }, { "epoch": 0.7811069427688443, "grad_norm": 0.44990894198417664, "learning_rate": 3.6139057283727337e-06, "loss": 0.05, "step": 35448 }, { "epoch": 0.7811289780583605, "grad_norm": 0.5141600370407104, "learning_rate": 3.6132088329788065e-06, "loss": 0.0625, "step": 35449 }, { "epoch": 0.7811510133478766, "grad_norm": 0.6048769354820251, "learning_rate": 3.612511995583923e-06, "loss": 0.0589, "step": 35450 }, { "epoch": 0.7811730486373928, "grad_norm": 0.8628564476966858, "learning_rate": 3.611815216191616e-06, "loss": 0.0732, "step": 35451 }, { "epoch": 0.781195083926909, "grad_norm": 0.43688496947288513, "learning_rate": 3.6111184948054466e-06, "loss": 0.0441, "step": 35452 }, { "epoch": 0.7812171192164251, "grad_norm": 0.5394923090934753, "learning_rate": 3.6104218314289526e-06, "loss": 0.083, "step": 35453 }, { "epoch": 0.7812391545059413, "grad_norm": 0.840059757232666, "learning_rate": 3.6097252260656884e-06, "loss": 0.0735, "step": 35454 }, { "epoch": 0.7812611897954574, "grad_norm": 0.5765666961669922, "learning_rate": 3.609028678719207e-06, "loss": 0.077, "step": 35455 }, { "epoch": 0.7812832250849736, "grad_norm": 0.5381340384483337, "learning_rate": 3.6083321893930402e-06, "loss": 0.0539, "step": 35456 }, { "epoch": 0.7813052603744898, "grad_norm": 0.5870955586433411, "learning_rate": 3.607635758090755e-06, "loss": 0.0526, "step": 35457 }, { "epoch": 0.7813272956640059, "grad_norm": 0.7406248450279236, "learning_rate": 3.606939384815887e-06, "loss": 0.0474, "step": 35458 }, { "epoch": 0.7813493309535221, "grad_norm": 0.5885173678398132, "learning_rate": 3.6062430695719896e-06, "loss": 0.0448, "step": 35459 }, { "epoch": 0.7813713662430383, "grad_norm": 0.37127524614334106, "learning_rate": 3.6055468123625975e-06, "loss": 0.0579, "step": 35460 }, { "epoch": 0.7813934015325544, "grad_norm": 0.5670952200889587, "learning_rate": 3.6048506131912743e-06, "loss": 0.0777, "step": 35461 }, { "epoch": 0.7814154368220706, "grad_norm": 0.2922469973564148, "learning_rate": 3.6041544720615583e-06, "loss": 0.0243, "step": 35462 }, { "epoch": 0.7814374721115867, "grad_norm": 0.5773348212242126, "learning_rate": 3.603458388976989e-06, "loss": 0.0517, "step": 35463 }, { "epoch": 0.7814595074011028, "grad_norm": 0.7482106685638428, "learning_rate": 3.602762363941119e-06, "loss": 0.0843, "step": 35464 }, { "epoch": 0.781481542690619, "grad_norm": 0.4259638488292694, "learning_rate": 3.60206639695749e-06, "loss": 0.0521, "step": 35465 }, { "epoch": 0.7815035779801351, "grad_norm": 0.5297880172729492, "learning_rate": 3.601370488029655e-06, "loss": 0.0581, "step": 35466 }, { "epoch": 0.7815256132696513, "grad_norm": 0.7180557250976562, "learning_rate": 3.600674637161147e-06, "loss": 0.0512, "step": 35467 }, { "epoch": 0.7815476485591675, "grad_norm": 0.5170252323150635, "learning_rate": 3.5999788443555156e-06, "loss": 0.0547, "step": 35468 }, { "epoch": 0.7815696838486836, "grad_norm": 0.5382500886917114, "learning_rate": 3.599283109616307e-06, "loss": 0.076, "step": 35469 }, { "epoch": 0.7815917191381998, "grad_norm": 0.6527159214019775, "learning_rate": 3.5985874329470657e-06, "loss": 0.0542, "step": 35470 }, { "epoch": 0.781613754427716, "grad_norm": 0.6417603492736816, "learning_rate": 3.5978918143513324e-06, "loss": 0.0588, "step": 35471 }, { "epoch": 0.7816357897172321, "grad_norm": 0.5628464221954346, "learning_rate": 3.5971962538326418e-06, "loss": 0.0578, "step": 35472 }, { "epoch": 0.7816578250067483, "grad_norm": 0.4404076337814331, "learning_rate": 3.5965007513945552e-06, "loss": 0.0545, "step": 35473 }, { "epoch": 0.7816798602962645, "grad_norm": 0.5133528113365173, "learning_rate": 3.595805307040601e-06, "loss": 0.0479, "step": 35474 }, { "epoch": 0.7817018955857806, "grad_norm": 0.2432451993227005, "learning_rate": 3.5951099207743284e-06, "loss": 0.0631, "step": 35475 }, { "epoch": 0.7817239308752968, "grad_norm": 0.3764662742614746, "learning_rate": 3.5944145925992706e-06, "loss": 0.08, "step": 35476 }, { "epoch": 0.781745966164813, "grad_norm": 0.7001069784164429, "learning_rate": 3.5937193225189817e-06, "loss": 0.046, "step": 35477 }, { "epoch": 0.7817680014543291, "grad_norm": 0.7376362681388855, "learning_rate": 3.5930241105369945e-06, "loss": 0.0719, "step": 35478 }, { "epoch": 0.7817900367438453, "grad_norm": 0.6868848204612732, "learning_rate": 3.5923289566568502e-06, "loss": 0.0615, "step": 35479 }, { "epoch": 0.7818120720333614, "grad_norm": 0.6493290066719055, "learning_rate": 3.591633860882098e-06, "loss": 0.0488, "step": 35480 }, { "epoch": 0.7818341073228776, "grad_norm": 0.7470664381980896, "learning_rate": 3.590938823216267e-06, "loss": 0.0545, "step": 35481 }, { "epoch": 0.7818561426123938, "grad_norm": 0.7241845726966858, "learning_rate": 3.590243843662906e-06, "loss": 0.0615, "step": 35482 }, { "epoch": 0.7818781779019099, "grad_norm": 0.6368430852890015, "learning_rate": 3.5895489222255453e-06, "loss": 0.0528, "step": 35483 }, { "epoch": 0.7819002131914261, "grad_norm": 0.6591524481773376, "learning_rate": 3.588854058907737e-06, "loss": 0.0619, "step": 35484 }, { "epoch": 0.7819222484809423, "grad_norm": 0.47940829396247864, "learning_rate": 3.5881592537130113e-06, "loss": 0.0717, "step": 35485 }, { "epoch": 0.7819442837704584, "grad_norm": 0.22709956765174866, "learning_rate": 3.587464506644913e-06, "loss": 0.0622, "step": 35486 }, { "epoch": 0.7819663190599746, "grad_norm": 0.4209064543247223, "learning_rate": 3.5867698177069754e-06, "loss": 0.0646, "step": 35487 }, { "epoch": 0.7819883543494907, "grad_norm": 0.42657145857810974, "learning_rate": 3.5860751869027388e-06, "loss": 0.0519, "step": 35488 }, { "epoch": 0.7820103896390068, "grad_norm": 0.7213459610939026, "learning_rate": 3.5853806142357456e-06, "loss": 0.0688, "step": 35489 }, { "epoch": 0.782032424928523, "grad_norm": 0.6715168356895447, "learning_rate": 3.584686099709524e-06, "loss": 0.0372, "step": 35490 }, { "epoch": 0.7820544602180391, "grad_norm": 0.962860107421875, "learning_rate": 3.583991643327621e-06, "loss": 0.0798, "step": 35491 }, { "epoch": 0.7820764955075553, "grad_norm": 0.8987115621566772, "learning_rate": 3.5832972450935676e-06, "loss": 0.0771, "step": 35492 }, { "epoch": 0.7820985307970715, "grad_norm": 0.7513827085494995, "learning_rate": 3.5826029050109094e-06, "loss": 0.0764, "step": 35493 }, { "epoch": 0.7821205660865876, "grad_norm": 0.7593734860420227, "learning_rate": 3.5819086230831723e-06, "loss": 0.0719, "step": 35494 }, { "epoch": 0.7821426013761038, "grad_norm": 0.7585837841033936, "learning_rate": 3.5812143993138964e-06, "loss": 0.0584, "step": 35495 }, { "epoch": 0.78216463666562, "grad_norm": 0.5037245154380798, "learning_rate": 3.580520233706624e-06, "loss": 0.0465, "step": 35496 }, { "epoch": 0.7821866719551361, "grad_norm": 0.8900997638702393, "learning_rate": 3.579826126264881e-06, "loss": 0.0614, "step": 35497 }, { "epoch": 0.7822087072446523, "grad_norm": 0.5338645577430725, "learning_rate": 3.5791320769922085e-06, "loss": 0.0476, "step": 35498 }, { "epoch": 0.7822307425341685, "grad_norm": 0.8122318387031555, "learning_rate": 3.57843808589214e-06, "loss": 0.0951, "step": 35499 }, { "epoch": 0.7822527778236846, "grad_norm": 0.7340392470359802, "learning_rate": 3.577744152968215e-06, "loss": 0.0531, "step": 35500 }, { "epoch": 0.7822748131132008, "grad_norm": 0.5888248085975647, "learning_rate": 3.57705027822396e-06, "loss": 0.0761, "step": 35501 }, { "epoch": 0.782296848402717, "grad_norm": 0.7917248010635376, "learning_rate": 3.5763564616629148e-06, "loss": 0.0374, "step": 35502 }, { "epoch": 0.7823188836922331, "grad_norm": 0.5174223184585571, "learning_rate": 3.575662703288615e-06, "loss": 0.0497, "step": 35503 }, { "epoch": 0.7823409189817493, "grad_norm": 0.6783772110939026, "learning_rate": 3.5749690031045874e-06, "loss": 0.0712, "step": 35504 }, { "epoch": 0.7823629542712655, "grad_norm": 0.8228603005409241, "learning_rate": 3.574275361114373e-06, "loss": 0.067, "step": 35505 }, { "epoch": 0.7823849895607816, "grad_norm": 0.6023261547088623, "learning_rate": 3.5735817773214925e-06, "loss": 0.0785, "step": 35506 }, { "epoch": 0.7824070248502978, "grad_norm": 0.7820667624473572, "learning_rate": 3.5728882517294957e-06, "loss": 0.0789, "step": 35507 }, { "epoch": 0.782429060139814, "grad_norm": 0.534859836101532, "learning_rate": 3.5721947843419023e-06, "loss": 0.0762, "step": 35508 }, { "epoch": 0.7824510954293301, "grad_norm": 0.5391210317611694, "learning_rate": 3.5715013751622556e-06, "loss": 0.0675, "step": 35509 }, { "epoch": 0.7824731307188463, "grad_norm": 0.354629248380661, "learning_rate": 3.5708080241940744e-06, "loss": 0.0443, "step": 35510 }, { "epoch": 0.7824951660083624, "grad_norm": 0.1768340915441513, "learning_rate": 3.570114731440896e-06, "loss": 0.0425, "step": 35511 }, { "epoch": 0.7825172012978786, "grad_norm": 0.5597196221351624, "learning_rate": 3.5694214969062593e-06, "loss": 0.0536, "step": 35512 }, { "epoch": 0.7825392365873947, "grad_norm": 0.6208715438842773, "learning_rate": 3.5687283205936816e-06, "loss": 0.0452, "step": 35513 }, { "epoch": 0.7825612718769108, "grad_norm": 0.506586492061615, "learning_rate": 3.568035202506703e-06, "loss": 0.0663, "step": 35514 }, { "epoch": 0.782583307166427, "grad_norm": 0.8126577734947205, "learning_rate": 3.5673421426488496e-06, "loss": 0.0405, "step": 35515 }, { "epoch": 0.7826053424559432, "grad_norm": 0.49712660908699036, "learning_rate": 3.5666491410236578e-06, "loss": 0.0416, "step": 35516 }, { "epoch": 0.7826273777454593, "grad_norm": 0.5710951089859009, "learning_rate": 3.565956197634649e-06, "loss": 0.0468, "step": 35517 }, { "epoch": 0.7826494130349755, "grad_norm": 0.34667181968688965, "learning_rate": 3.565263312485358e-06, "loss": 0.048, "step": 35518 }, { "epoch": 0.7826714483244916, "grad_norm": 0.33453357219696045, "learning_rate": 3.5645704855793156e-06, "loss": 0.0544, "step": 35519 }, { "epoch": 0.7826934836140078, "grad_norm": 0.640722393989563, "learning_rate": 3.5638777169200437e-06, "loss": 0.0652, "step": 35520 }, { "epoch": 0.782715518903524, "grad_norm": 0.8674227595329285, "learning_rate": 3.5631850065110748e-06, "loss": 0.0943, "step": 35521 }, { "epoch": 0.7827375541930401, "grad_norm": 0.4237688481807709, "learning_rate": 3.562492354355938e-06, "loss": 0.0644, "step": 35522 }, { "epoch": 0.7827595894825563, "grad_norm": 0.4328802824020386, "learning_rate": 3.5617997604581647e-06, "loss": 0.0502, "step": 35523 }, { "epoch": 0.7827816247720725, "grad_norm": 0.8806753158569336, "learning_rate": 3.561107224821276e-06, "loss": 0.0929, "step": 35524 }, { "epoch": 0.7828036600615886, "grad_norm": 0.5330409407615662, "learning_rate": 3.560414747448801e-06, "loss": 0.0448, "step": 35525 }, { "epoch": 0.7828256953511048, "grad_norm": 0.6941651105880737, "learning_rate": 3.5597223283442724e-06, "loss": 0.0744, "step": 35526 }, { "epoch": 0.782847730640621, "grad_norm": 0.7524973750114441, "learning_rate": 3.5590299675112086e-06, "loss": 0.0658, "step": 35527 }, { "epoch": 0.7828697659301371, "grad_norm": 0.3702129125595093, "learning_rate": 3.5583376649531447e-06, "loss": 0.0839, "step": 35528 }, { "epoch": 0.7828918012196533, "grad_norm": 0.2901584506034851, "learning_rate": 3.557645420673594e-06, "loss": 0.0703, "step": 35529 }, { "epoch": 0.7829138365091695, "grad_norm": 0.4534948766231537, "learning_rate": 3.5569532346760996e-06, "loss": 0.0577, "step": 35530 }, { "epoch": 0.7829358717986856, "grad_norm": 0.8648360967636108, "learning_rate": 3.556261106964175e-06, "loss": 0.0749, "step": 35531 }, { "epoch": 0.7829579070882018, "grad_norm": 0.6863924860954285, "learning_rate": 3.555569037541352e-06, "loss": 0.0794, "step": 35532 }, { "epoch": 0.782979942377718, "grad_norm": 0.7275641560554504, "learning_rate": 3.5548770264111497e-06, "loss": 0.0686, "step": 35533 }, { "epoch": 0.7830019776672341, "grad_norm": 0.43705806136131287, "learning_rate": 3.554185073577095e-06, "loss": 0.056, "step": 35534 }, { "epoch": 0.7830240129567503, "grad_norm": 0.4512331187725067, "learning_rate": 3.5534931790427196e-06, "loss": 0.0617, "step": 35535 }, { "epoch": 0.7830460482462664, "grad_norm": 0.7656460404396057, "learning_rate": 3.5528013428115356e-06, "loss": 0.0587, "step": 35536 }, { "epoch": 0.7830680835357826, "grad_norm": 0.5171487331390381, "learning_rate": 3.552109564887075e-06, "loss": 0.0634, "step": 35537 }, { "epoch": 0.7830901188252987, "grad_norm": 0.8073098659515381, "learning_rate": 3.5514178452728575e-06, "loss": 0.0769, "step": 35538 }, { "epoch": 0.7831121541148148, "grad_norm": 1.1872845888137817, "learning_rate": 3.550726183972412e-06, "loss": 0.0756, "step": 35539 }, { "epoch": 0.783134189404331, "grad_norm": 1.1456665992736816, "learning_rate": 3.5500345809892544e-06, "loss": 0.1067, "step": 35540 }, { "epoch": 0.7831562246938472, "grad_norm": 0.4223663806915283, "learning_rate": 3.5493430363269097e-06, "loss": 0.0494, "step": 35541 }, { "epoch": 0.7831782599833633, "grad_norm": 0.53271484375, "learning_rate": 3.5486515499889054e-06, "loss": 0.0613, "step": 35542 }, { "epoch": 0.7832002952728795, "grad_norm": 0.4605119526386261, "learning_rate": 3.547960121978757e-06, "loss": 0.0566, "step": 35543 }, { "epoch": 0.7832223305623957, "grad_norm": 0.3990863263607025, "learning_rate": 3.547268752299987e-06, "loss": 0.0422, "step": 35544 }, { "epoch": 0.7832443658519118, "grad_norm": 0.5307164788246155, "learning_rate": 3.5465774409561185e-06, "loss": 0.059, "step": 35545 }, { "epoch": 0.783266401141428, "grad_norm": 0.5154244303703308, "learning_rate": 3.5458861879506795e-06, "loss": 0.0777, "step": 35546 }, { "epoch": 0.7832884364309441, "grad_norm": 0.2871920168399811, "learning_rate": 3.545194993287178e-06, "loss": 0.0722, "step": 35547 }, { "epoch": 0.7833104717204603, "grad_norm": 0.6389091610908508, "learning_rate": 3.5445038569691418e-06, "loss": 0.0785, "step": 35548 }, { "epoch": 0.7833325070099765, "grad_norm": 0.5286678671836853, "learning_rate": 3.5438127790000946e-06, "loss": 0.0804, "step": 35549 }, { "epoch": 0.7833545422994926, "grad_norm": 0.4302949905395508, "learning_rate": 3.543121759383547e-06, "loss": 0.051, "step": 35550 }, { "epoch": 0.7833765775890088, "grad_norm": 0.4791310131549835, "learning_rate": 3.5424307981230293e-06, "loss": 0.0496, "step": 35551 }, { "epoch": 0.783398612878525, "grad_norm": 0.722629189491272, "learning_rate": 3.5417398952220453e-06, "loss": 0.0589, "step": 35552 }, { "epoch": 0.7834206481680411, "grad_norm": 0.5445039868354797, "learning_rate": 3.541049050684134e-06, "loss": 0.0682, "step": 35553 }, { "epoch": 0.7834426834575573, "grad_norm": 0.4905179440975189, "learning_rate": 3.540358264512801e-06, "loss": 0.0716, "step": 35554 }, { "epoch": 0.7834647187470735, "grad_norm": 0.8472735285758972, "learning_rate": 3.539667536711574e-06, "loss": 0.0537, "step": 35555 }, { "epoch": 0.7834867540365896, "grad_norm": 0.5045334696769714, "learning_rate": 3.538976867283959e-06, "loss": 0.071, "step": 35556 }, { "epoch": 0.7835087893261058, "grad_norm": 0.4799445569515228, "learning_rate": 3.5382862562334823e-06, "loss": 0.0377, "step": 35557 }, { "epoch": 0.783530824615622, "grad_norm": 0.7139399647712708, "learning_rate": 3.5375957035636623e-06, "loss": 0.0711, "step": 35558 }, { "epoch": 0.7835528599051381, "grad_norm": 0.29574790596961975, "learning_rate": 3.536905209278012e-06, "loss": 0.0366, "step": 35559 }, { "epoch": 0.7835748951946543, "grad_norm": 0.9549697637557983, "learning_rate": 3.536214773380049e-06, "loss": 0.0642, "step": 35560 }, { "epoch": 0.7835969304841705, "grad_norm": 0.639122724533081, "learning_rate": 3.5355243958732918e-06, "loss": 0.0696, "step": 35561 }, { "epoch": 0.7836189657736865, "grad_norm": 0.7728974223136902, "learning_rate": 3.534834076761262e-06, "loss": 0.0895, "step": 35562 }, { "epoch": 0.7836410010632027, "grad_norm": 0.4637756645679474, "learning_rate": 3.5341438160474646e-06, "loss": 0.0386, "step": 35563 }, { "epoch": 0.7836630363527188, "grad_norm": 0.4242556393146515, "learning_rate": 3.533453613735421e-06, "loss": 0.045, "step": 35564 }, { "epoch": 0.783685071642235, "grad_norm": 0.5931239128112793, "learning_rate": 3.532763469828653e-06, "loss": 0.0672, "step": 35565 }, { "epoch": 0.7837071069317512, "grad_norm": 0.32807403802871704, "learning_rate": 3.5320733843306648e-06, "loss": 0.0406, "step": 35566 }, { "epoch": 0.7837291422212673, "grad_norm": 0.5499172806739807, "learning_rate": 3.531383357244976e-06, "loss": 0.0735, "step": 35567 }, { "epoch": 0.7837511775107835, "grad_norm": 0.7820285558700562, "learning_rate": 3.5306933885751016e-06, "loss": 0.063, "step": 35568 }, { "epoch": 0.7837732128002997, "grad_norm": 0.20148266851902008, "learning_rate": 3.530003478324561e-06, "loss": 0.0335, "step": 35569 }, { "epoch": 0.7837952480898158, "grad_norm": 0.47965511679649353, "learning_rate": 3.529313626496859e-06, "loss": 0.0672, "step": 35570 }, { "epoch": 0.783817283379332, "grad_norm": 0.873103678226471, "learning_rate": 3.528623833095514e-06, "loss": 0.0931, "step": 35571 }, { "epoch": 0.7838393186688482, "grad_norm": 0.4850703477859497, "learning_rate": 3.5279340981240416e-06, "loss": 0.0906, "step": 35572 }, { "epoch": 0.7838613539583643, "grad_norm": 0.495057612657547, "learning_rate": 3.5272444215859507e-06, "loss": 0.0666, "step": 35573 }, { "epoch": 0.7838833892478805, "grad_norm": 0.7138416171073914, "learning_rate": 3.52655480348476e-06, "loss": 0.0581, "step": 35574 }, { "epoch": 0.7839054245373966, "grad_norm": 0.20411750674247742, "learning_rate": 3.5258652438239685e-06, "loss": 0.0514, "step": 35575 }, { "epoch": 0.7839274598269128, "grad_norm": 0.5247002243995667, "learning_rate": 3.5251757426071084e-06, "loss": 0.0714, "step": 35576 }, { "epoch": 0.783949495116429, "grad_norm": 0.5238363146781921, "learning_rate": 3.524486299837677e-06, "loss": 0.0528, "step": 35577 }, { "epoch": 0.7839715304059451, "grad_norm": 0.589457631111145, "learning_rate": 3.523796915519193e-06, "loss": 0.0528, "step": 35578 }, { "epoch": 0.7839935656954613, "grad_norm": 0.7150046229362488, "learning_rate": 3.5231075896551585e-06, "loss": 0.0723, "step": 35579 }, { "epoch": 0.7840156009849775, "grad_norm": 0.4454246163368225, "learning_rate": 3.522418322249099e-06, "loss": 0.0593, "step": 35580 }, { "epoch": 0.7840376362744936, "grad_norm": 0.9163689017295837, "learning_rate": 3.5217291133045153e-06, "loss": 0.0713, "step": 35581 }, { "epoch": 0.7840596715640098, "grad_norm": 0.5722666382789612, "learning_rate": 3.521039962824923e-06, "loss": 0.0756, "step": 35582 }, { "epoch": 0.784081706853526, "grad_norm": 0.3225565254688263, "learning_rate": 3.5203508708138255e-06, "loss": 0.0339, "step": 35583 }, { "epoch": 0.7841037421430421, "grad_norm": 0.9809923768043518, "learning_rate": 3.5196618372747367e-06, "loss": 0.0492, "step": 35584 }, { "epoch": 0.7841257774325583, "grad_norm": 0.6132308840751648, "learning_rate": 3.518972862211171e-06, "loss": 0.0471, "step": 35585 }, { "epoch": 0.7841478127220745, "grad_norm": 0.6366192102432251, "learning_rate": 3.5182839456266303e-06, "loss": 0.0656, "step": 35586 }, { "epoch": 0.7841698480115905, "grad_norm": 0.5336934328079224, "learning_rate": 3.5175950875246247e-06, "loss": 0.0587, "step": 35587 }, { "epoch": 0.7841918833011067, "grad_norm": 0.4296330213546753, "learning_rate": 3.5169062879086654e-06, "loss": 0.047, "step": 35588 }, { "epoch": 0.7842139185906228, "grad_norm": 0.4720839262008667, "learning_rate": 3.5162175467822644e-06, "loss": 0.0262, "step": 35589 }, { "epoch": 0.784235953880139, "grad_norm": 0.7607951760292053, "learning_rate": 3.5155288641489204e-06, "loss": 0.0774, "step": 35590 }, { "epoch": 0.7842579891696552, "grad_norm": 0.6623274087905884, "learning_rate": 3.514840240012147e-06, "loss": 0.0588, "step": 35591 }, { "epoch": 0.7842800244591713, "grad_norm": 0.5997182130813599, "learning_rate": 3.5141516743754554e-06, "loss": 0.0434, "step": 35592 }, { "epoch": 0.7843020597486875, "grad_norm": 0.4337233603000641, "learning_rate": 3.513463167242344e-06, "loss": 0.0448, "step": 35593 }, { "epoch": 0.7843240950382037, "grad_norm": 0.7264665365219116, "learning_rate": 3.5127747186163232e-06, "loss": 0.066, "step": 35594 }, { "epoch": 0.7843461303277198, "grad_norm": 0.7453534007072449, "learning_rate": 3.5120863285009024e-06, "loss": 0.0399, "step": 35595 }, { "epoch": 0.784368165617236, "grad_norm": 0.5758814215660095, "learning_rate": 3.5113979968995904e-06, "loss": 0.0609, "step": 35596 }, { "epoch": 0.7843902009067522, "grad_norm": 0.558499276638031, "learning_rate": 3.5107097238158837e-06, "loss": 0.0617, "step": 35597 }, { "epoch": 0.7844122361962683, "grad_norm": 0.9257296323776245, "learning_rate": 3.510021509253295e-06, "loss": 0.0733, "step": 35598 }, { "epoch": 0.7844342714857845, "grad_norm": 0.5299587845802307, "learning_rate": 3.5093333532153316e-06, "loss": 0.0547, "step": 35599 }, { "epoch": 0.7844563067753006, "grad_norm": 0.6059333086013794, "learning_rate": 3.5086452557054905e-06, "loss": 0.0418, "step": 35600 }, { "epoch": 0.7844783420648168, "grad_norm": 0.2664482295513153, "learning_rate": 3.5079572167272857e-06, "loss": 0.0706, "step": 35601 }, { "epoch": 0.784500377354333, "grad_norm": 0.48928916454315186, "learning_rate": 3.507269236284209e-06, "loss": 0.0641, "step": 35602 }, { "epoch": 0.7845224126438491, "grad_norm": 0.6130949854850769, "learning_rate": 3.5065813143797814e-06, "loss": 0.056, "step": 35603 }, { "epoch": 0.7845444479333653, "grad_norm": 0.9711947441101074, "learning_rate": 3.505893451017495e-06, "loss": 0.0659, "step": 35604 }, { "epoch": 0.7845664832228815, "grad_norm": 0.5580406188964844, "learning_rate": 3.50520564620086e-06, "loss": 0.0366, "step": 35605 }, { "epoch": 0.7845885185123976, "grad_norm": 0.45977771282196045, "learning_rate": 3.5045178999333726e-06, "loss": 0.0629, "step": 35606 }, { "epoch": 0.7846105538019138, "grad_norm": 0.3220197260379791, "learning_rate": 3.503830212218541e-06, "loss": 0.044, "step": 35607 }, { "epoch": 0.78463258909143, "grad_norm": 0.891023576259613, "learning_rate": 3.5031425830598695e-06, "loss": 0.0982, "step": 35608 }, { "epoch": 0.7846546243809461, "grad_norm": 0.4848106801509857, "learning_rate": 3.50245501246085e-06, "loss": 0.0493, "step": 35609 }, { "epoch": 0.7846766596704623, "grad_norm": 0.8857552409172058, "learning_rate": 3.5017675004250017e-06, "loss": 0.0672, "step": 35610 }, { "epoch": 0.7846986949599785, "grad_norm": 0.6961287260055542, "learning_rate": 3.501080046955813e-06, "loss": 0.0626, "step": 35611 }, { "epoch": 0.7847207302494945, "grad_norm": 0.4618314504623413, "learning_rate": 3.500392652056793e-06, "loss": 0.0339, "step": 35612 }, { "epoch": 0.7847427655390107, "grad_norm": 0.27243921160697937, "learning_rate": 3.4997053157314376e-06, "loss": 0.0647, "step": 35613 }, { "epoch": 0.7847648008285268, "grad_norm": 0.5104325413703918, "learning_rate": 3.499018037983248e-06, "loss": 0.0504, "step": 35614 }, { "epoch": 0.784786836118043, "grad_norm": 0.5467058420181274, "learning_rate": 3.498330818815731e-06, "loss": 0.0454, "step": 35615 }, { "epoch": 0.7848088714075592, "grad_norm": 0.6841914057731628, "learning_rate": 3.4976436582323796e-06, "loss": 0.0746, "step": 35616 }, { "epoch": 0.7848309066970753, "grad_norm": 0.49923065304756165, "learning_rate": 3.496956556236698e-06, "loss": 0.0492, "step": 35617 }, { "epoch": 0.7848529419865915, "grad_norm": 0.7928112149238586, "learning_rate": 3.496269512832183e-06, "loss": 0.0593, "step": 35618 }, { "epoch": 0.7848749772761077, "grad_norm": 0.5578811168670654, "learning_rate": 3.4955825280223407e-06, "loss": 0.0578, "step": 35619 }, { "epoch": 0.7848970125656238, "grad_norm": 0.5360862016677856, "learning_rate": 3.4948956018106633e-06, "loss": 0.0681, "step": 35620 }, { "epoch": 0.78491904785514, "grad_norm": 0.37617355585098267, "learning_rate": 3.4942087342006507e-06, "loss": 0.0724, "step": 35621 }, { "epoch": 0.7849410831446562, "grad_norm": 0.421744704246521, "learning_rate": 3.4935219251958072e-06, "loss": 0.0664, "step": 35622 }, { "epoch": 0.7849631184341723, "grad_norm": 0.6725282073020935, "learning_rate": 3.4928351747996234e-06, "loss": 0.0537, "step": 35623 }, { "epoch": 0.7849851537236885, "grad_norm": 0.5396527647972107, "learning_rate": 3.492148483015603e-06, "loss": 0.0673, "step": 35624 }, { "epoch": 0.7850071890132047, "grad_norm": 0.46348515152931213, "learning_rate": 3.491461849847235e-06, "loss": 0.092, "step": 35625 }, { "epoch": 0.7850292243027208, "grad_norm": 0.41873547434806824, "learning_rate": 3.4907752752980314e-06, "loss": 0.0376, "step": 35626 }, { "epoch": 0.785051259592237, "grad_norm": 0.622006356716156, "learning_rate": 3.4900887593714763e-06, "loss": 0.0519, "step": 35627 }, { "epoch": 0.7850732948817531, "grad_norm": 0.48494839668273926, "learning_rate": 3.489402302071075e-06, "loss": 0.0802, "step": 35628 }, { "epoch": 0.7850953301712693, "grad_norm": 0.7642199993133545, "learning_rate": 3.488715903400317e-06, "loss": 0.1268, "step": 35629 }, { "epoch": 0.7851173654607855, "grad_norm": 0.5115727186203003, "learning_rate": 3.4880295633627e-06, "loss": 0.0868, "step": 35630 }, { "epoch": 0.7851394007503016, "grad_norm": 0.5466713905334473, "learning_rate": 3.4873432819617274e-06, "loss": 0.0548, "step": 35631 }, { "epoch": 0.7851614360398178, "grad_norm": 0.6429494023323059, "learning_rate": 3.4866570592008804e-06, "loss": 0.0804, "step": 35632 }, { "epoch": 0.785183471329334, "grad_norm": 0.9464536309242249, "learning_rate": 3.485970895083672e-06, "loss": 0.085, "step": 35633 }, { "epoch": 0.7852055066188501, "grad_norm": 0.6352981925010681, "learning_rate": 3.4852847896135827e-06, "loss": 0.0795, "step": 35634 }, { "epoch": 0.7852275419083663, "grad_norm": 1.1362130641937256, "learning_rate": 3.4845987427941175e-06, "loss": 0.0836, "step": 35635 }, { "epoch": 0.7852495771978824, "grad_norm": 0.4320336580276489, "learning_rate": 3.483912754628762e-06, "loss": 0.0623, "step": 35636 }, { "epoch": 0.7852716124873985, "grad_norm": 0.4796779155731201, "learning_rate": 3.4832268251210137e-06, "loss": 0.0614, "step": 35637 }, { "epoch": 0.7852936477769147, "grad_norm": 0.40303733944892883, "learning_rate": 3.4825409542743716e-06, "loss": 0.0611, "step": 35638 }, { "epoch": 0.7853156830664308, "grad_norm": 0.6150043606758118, "learning_rate": 3.4818551420923187e-06, "loss": 0.0514, "step": 35639 }, { "epoch": 0.785337718355947, "grad_norm": 0.3109850287437439, "learning_rate": 3.481169388578355e-06, "loss": 0.0483, "step": 35640 }, { "epoch": 0.7853597536454632, "grad_norm": 0.6692189574241638, "learning_rate": 3.4804836937359742e-06, "loss": 0.103, "step": 35641 }, { "epoch": 0.7853817889349793, "grad_norm": 0.6192381381988525, "learning_rate": 3.4797980575686693e-06, "loss": 0.0535, "step": 35642 }, { "epoch": 0.7854038242244955, "grad_norm": 0.440879762172699, "learning_rate": 3.4791124800799284e-06, "loss": 0.0418, "step": 35643 }, { "epoch": 0.7854258595140117, "grad_norm": 0.3167222738265991, "learning_rate": 3.4784269612732434e-06, "loss": 0.0497, "step": 35644 }, { "epoch": 0.7854478948035278, "grad_norm": 0.9260839819908142, "learning_rate": 3.477741501152113e-06, "loss": 0.0905, "step": 35645 }, { "epoch": 0.785469930093044, "grad_norm": 0.6030691266059875, "learning_rate": 3.4770560997200214e-06, "loss": 0.0676, "step": 35646 }, { "epoch": 0.7854919653825602, "grad_norm": 0.7783421874046326, "learning_rate": 3.4763707569804636e-06, "loss": 0.0719, "step": 35647 }, { "epoch": 0.7855140006720763, "grad_norm": 0.2837752401828766, "learning_rate": 3.4756854729369225e-06, "loss": 0.0479, "step": 35648 }, { "epoch": 0.7855360359615925, "grad_norm": 0.6679024696350098, "learning_rate": 3.475000247592903e-06, "loss": 0.0604, "step": 35649 }, { "epoch": 0.7855580712511087, "grad_norm": 0.5660086870193481, "learning_rate": 3.474315080951883e-06, "loss": 0.0952, "step": 35650 }, { "epoch": 0.7855801065406248, "grad_norm": 0.5198519229888916, "learning_rate": 3.4736299730173628e-06, "loss": 0.0548, "step": 35651 }, { "epoch": 0.785602141830141, "grad_norm": 0.5098186135292053, "learning_rate": 3.472944923792819e-06, "loss": 0.0949, "step": 35652 }, { "epoch": 0.7856241771196572, "grad_norm": 0.4396553039550781, "learning_rate": 3.4722599332817494e-06, "loss": 0.0757, "step": 35653 }, { "epoch": 0.7856462124091733, "grad_norm": 0.625771164894104, "learning_rate": 3.4715750014876464e-06, "loss": 0.0391, "step": 35654 }, { "epoch": 0.7856682476986895, "grad_norm": 0.7774602770805359, "learning_rate": 3.470890128413985e-06, "loss": 0.0805, "step": 35655 }, { "epoch": 0.7856902829882056, "grad_norm": 0.5924192667007446, "learning_rate": 3.470205314064271e-06, "loss": 0.0302, "step": 35656 }, { "epoch": 0.7857123182777218, "grad_norm": 0.7131907939910889, "learning_rate": 3.46952055844198e-06, "loss": 0.0784, "step": 35657 }, { "epoch": 0.785734353567238, "grad_norm": 0.5126876831054688, "learning_rate": 3.468835861550609e-06, "loss": 0.0823, "step": 35658 }, { "epoch": 0.7857563888567541, "grad_norm": 0.3423847556114197, "learning_rate": 3.468151223393635e-06, "loss": 0.0463, "step": 35659 }, { "epoch": 0.7857784241462703, "grad_norm": 0.7274792790412903, "learning_rate": 3.4674666439745513e-06, "loss": 0.0616, "step": 35660 }, { "epoch": 0.7858004594357864, "grad_norm": 0.8012142181396484, "learning_rate": 3.466782123296847e-06, "loss": 0.0695, "step": 35661 }, { "epoch": 0.7858224947253025, "grad_norm": 0.6700385212898254, "learning_rate": 3.466097661364004e-06, "loss": 0.0662, "step": 35662 }, { "epoch": 0.7858445300148187, "grad_norm": 0.8493631482124329, "learning_rate": 3.4654132581795082e-06, "loss": 0.1014, "step": 35663 }, { "epoch": 0.7858665653043349, "grad_norm": 0.702774167060852, "learning_rate": 3.4647289137468495e-06, "loss": 0.0603, "step": 35664 }, { "epoch": 0.785888600593851, "grad_norm": 0.7708920836448669, "learning_rate": 3.4640446280695163e-06, "loss": 0.0552, "step": 35665 }, { "epoch": 0.7859106358833672, "grad_norm": 0.3640102446079254, "learning_rate": 3.4633604011509845e-06, "loss": 0.0606, "step": 35666 }, { "epoch": 0.7859326711728833, "grad_norm": 0.6003318428993225, "learning_rate": 3.462676232994746e-06, "loss": 0.0561, "step": 35667 }, { "epoch": 0.7859547064623995, "grad_norm": 0.9589051008224487, "learning_rate": 3.4619921236042877e-06, "loss": 0.0608, "step": 35668 }, { "epoch": 0.7859767417519157, "grad_norm": 0.3824872672557831, "learning_rate": 3.4613080729830875e-06, "loss": 0.0413, "step": 35669 }, { "epoch": 0.7859987770414318, "grad_norm": 0.577322781085968, "learning_rate": 3.460624081134636e-06, "loss": 0.0858, "step": 35670 }, { "epoch": 0.786020812330948, "grad_norm": 0.7006150484085083, "learning_rate": 3.459940148062406e-06, "loss": 0.0571, "step": 35671 }, { "epoch": 0.7860428476204642, "grad_norm": 0.6166148781776428, "learning_rate": 3.459256273769897e-06, "loss": 0.0515, "step": 35672 }, { "epoch": 0.7860648829099803, "grad_norm": 0.6602785587310791, "learning_rate": 3.4585724582605804e-06, "loss": 0.0536, "step": 35673 }, { "epoch": 0.7860869181994965, "grad_norm": 0.5632981657981873, "learning_rate": 3.457888701537949e-06, "loss": 0.0659, "step": 35674 }, { "epoch": 0.7861089534890127, "grad_norm": 0.8416064977645874, "learning_rate": 3.4572050036054737e-06, "loss": 0.0685, "step": 35675 }, { "epoch": 0.7861309887785288, "grad_norm": 0.5663836598396301, "learning_rate": 3.4565213644666434e-06, "loss": 0.0759, "step": 35676 }, { "epoch": 0.786153024068045, "grad_norm": 0.5887889862060547, "learning_rate": 3.455837784124945e-06, "loss": 0.0796, "step": 35677 }, { "epoch": 0.7861750593575612, "grad_norm": 0.34412890672683716, "learning_rate": 3.455154262583846e-06, "loss": 0.0653, "step": 35678 }, { "epoch": 0.7861970946470773, "grad_norm": 0.7022567987442017, "learning_rate": 3.4544707998468455e-06, "loss": 0.0818, "step": 35679 }, { "epoch": 0.7862191299365935, "grad_norm": 0.6507033109664917, "learning_rate": 3.453787395917412e-06, "loss": 0.0689, "step": 35680 }, { "epoch": 0.7862411652261097, "grad_norm": 0.6442658305168152, "learning_rate": 3.4531040507990364e-06, "loss": 0.0734, "step": 35681 }, { "epoch": 0.7862632005156258, "grad_norm": 0.6015700101852417, "learning_rate": 3.452420764495184e-06, "loss": 0.0445, "step": 35682 }, { "epoch": 0.786285235805142, "grad_norm": 0.5263091921806335, "learning_rate": 3.4517375370093534e-06, "loss": 0.0363, "step": 35683 }, { "epoch": 0.7863072710946581, "grad_norm": 0.5008544921875, "learning_rate": 3.451054368345017e-06, "loss": 0.0477, "step": 35684 }, { "epoch": 0.7863293063841743, "grad_norm": 0.28450891375541687, "learning_rate": 3.4503712585056496e-06, "loss": 0.0609, "step": 35685 }, { "epoch": 0.7863513416736904, "grad_norm": 0.5529633164405823, "learning_rate": 3.449688207494734e-06, "loss": 0.042, "step": 35686 }, { "epoch": 0.7863733769632065, "grad_norm": 0.645423173904419, "learning_rate": 3.449005215315751e-06, "loss": 0.0519, "step": 35687 }, { "epoch": 0.7863954122527227, "grad_norm": 0.5732323527336121, "learning_rate": 3.448322281972183e-06, "loss": 0.0446, "step": 35688 }, { "epoch": 0.7864174475422389, "grad_norm": 0.47038501501083374, "learning_rate": 3.4476394074674984e-06, "loss": 0.0556, "step": 35689 }, { "epoch": 0.786439482831755, "grad_norm": 0.8463492393493652, "learning_rate": 3.4469565918051814e-06, "loss": 0.0827, "step": 35690 }, { "epoch": 0.7864615181212712, "grad_norm": 0.4071834683418274, "learning_rate": 3.446273834988711e-06, "loss": 0.0615, "step": 35691 }, { "epoch": 0.7864835534107874, "grad_norm": 0.4009118676185608, "learning_rate": 3.4455911370215663e-06, "loss": 0.0872, "step": 35692 }, { "epoch": 0.7865055887003035, "grad_norm": 1.1828304529190063, "learning_rate": 3.4449084979072194e-06, "loss": 0.082, "step": 35693 }, { "epoch": 0.7865276239898197, "grad_norm": 0.7171846032142639, "learning_rate": 3.4442259176491486e-06, "loss": 0.0892, "step": 35694 }, { "epoch": 0.7865496592793358, "grad_norm": 0.6143732666969299, "learning_rate": 3.4435433962508365e-06, "loss": 0.0878, "step": 35695 }, { "epoch": 0.786571694568852, "grad_norm": 0.5408302545547485, "learning_rate": 3.4428609337157506e-06, "loss": 0.0414, "step": 35696 }, { "epoch": 0.7865937298583682, "grad_norm": 0.47470614314079285, "learning_rate": 3.4421785300473763e-06, "loss": 0.0478, "step": 35697 }, { "epoch": 0.7866157651478843, "grad_norm": 0.7832853198051453, "learning_rate": 3.4414961852491755e-06, "loss": 0.0607, "step": 35698 }, { "epoch": 0.7866378004374005, "grad_norm": 0.34817424416542053, "learning_rate": 3.440813899324641e-06, "loss": 0.0442, "step": 35699 }, { "epoch": 0.7866598357269167, "grad_norm": 0.41679647564888, "learning_rate": 3.4401316722772356e-06, "loss": 0.0418, "step": 35700 }, { "epoch": 0.7866818710164328, "grad_norm": 0.47783613204956055, "learning_rate": 3.439449504110438e-06, "loss": 0.066, "step": 35701 }, { "epoch": 0.786703906305949, "grad_norm": 0.33845797181129456, "learning_rate": 3.438767394827729e-06, "loss": 0.0602, "step": 35702 }, { "epoch": 0.7867259415954652, "grad_norm": 0.4971580505371094, "learning_rate": 3.438085344432572e-06, "loss": 0.0306, "step": 35703 }, { "epoch": 0.7867479768849813, "grad_norm": 0.2462502419948578, "learning_rate": 3.4374033529284504e-06, "loss": 0.0368, "step": 35704 }, { "epoch": 0.7867700121744975, "grad_norm": 0.517500102519989, "learning_rate": 3.4367214203188264e-06, "loss": 0.0441, "step": 35705 }, { "epoch": 0.7867920474640137, "grad_norm": 0.5156040787696838, "learning_rate": 3.4360395466071887e-06, "loss": 0.0757, "step": 35706 }, { "epoch": 0.7868140827535298, "grad_norm": 0.25087180733680725, "learning_rate": 3.435357731797e-06, "loss": 0.0553, "step": 35707 }, { "epoch": 0.786836118043046, "grad_norm": 0.6288468241691589, "learning_rate": 3.4346759758917386e-06, "loss": 0.0422, "step": 35708 }, { "epoch": 0.7868581533325621, "grad_norm": 0.6676739454269409, "learning_rate": 3.4339942788948686e-06, "loss": 0.0672, "step": 35709 }, { "epoch": 0.7868801886220783, "grad_norm": 0.6073965430259705, "learning_rate": 3.433312640809871e-06, "loss": 0.056, "step": 35710 }, { "epoch": 0.7869022239115944, "grad_norm": 0.7888665795326233, "learning_rate": 3.4326310616402174e-06, "loss": 0.0838, "step": 35711 }, { "epoch": 0.7869242592011105, "grad_norm": 0.5507344603538513, "learning_rate": 3.431949541389372e-06, "loss": 0.0465, "step": 35712 }, { "epoch": 0.7869462944906267, "grad_norm": 0.3974054753780365, "learning_rate": 3.4312680800608105e-06, "loss": 0.0364, "step": 35713 }, { "epoch": 0.7869683297801429, "grad_norm": 0.7080872654914856, "learning_rate": 3.4305866776580045e-06, "loss": 0.0676, "step": 35714 }, { "epoch": 0.786990365069659, "grad_norm": 0.5885083675384521, "learning_rate": 3.4299053341844305e-06, "loss": 0.061, "step": 35715 }, { "epoch": 0.7870124003591752, "grad_norm": 0.8519675135612488, "learning_rate": 3.4292240496435485e-06, "loss": 0.1158, "step": 35716 }, { "epoch": 0.7870344356486914, "grad_norm": 0.8513979911804199, "learning_rate": 3.4285428240388324e-06, "loss": 0.0811, "step": 35717 }, { "epoch": 0.7870564709382075, "grad_norm": 0.364637553691864, "learning_rate": 3.427861657373756e-06, "loss": 0.0651, "step": 35718 }, { "epoch": 0.7870785062277237, "grad_norm": 0.6633467078208923, "learning_rate": 3.4271805496517834e-06, "loss": 0.0489, "step": 35719 }, { "epoch": 0.7871005415172398, "grad_norm": 0.7038449645042419, "learning_rate": 3.426499500876388e-06, "loss": 0.0611, "step": 35720 }, { "epoch": 0.787122576806756, "grad_norm": 0.43386006355285645, "learning_rate": 3.4258185110510297e-06, "loss": 0.0687, "step": 35721 }, { "epoch": 0.7871446120962722, "grad_norm": 0.2546688914299011, "learning_rate": 3.4251375801791936e-06, "loss": 0.0271, "step": 35722 }, { "epoch": 0.7871666473857883, "grad_norm": 0.695167601108551, "learning_rate": 3.424456708264333e-06, "loss": 0.0517, "step": 35723 }, { "epoch": 0.7871886826753045, "grad_norm": 0.9579617977142334, "learning_rate": 3.4237758953099222e-06, "loss": 0.0716, "step": 35724 }, { "epoch": 0.7872107179648207, "grad_norm": 0.3670135736465454, "learning_rate": 3.423095141319434e-06, "loss": 0.0422, "step": 35725 }, { "epoch": 0.7872327532543368, "grad_norm": 0.7231488227844238, "learning_rate": 3.422414446296325e-06, "loss": 0.0559, "step": 35726 }, { "epoch": 0.787254788543853, "grad_norm": 0.6099880933761597, "learning_rate": 3.4217338102440702e-06, "loss": 0.0856, "step": 35727 }, { "epoch": 0.7872768238333692, "grad_norm": 0.9148025512695312, "learning_rate": 3.421053233166128e-06, "loss": 0.0841, "step": 35728 }, { "epoch": 0.7872988591228853, "grad_norm": 0.5029196739196777, "learning_rate": 3.4203727150659785e-06, "loss": 0.0628, "step": 35729 }, { "epoch": 0.7873208944124015, "grad_norm": 0.5544217228889465, "learning_rate": 3.4196922559470756e-06, "loss": 0.0767, "step": 35730 }, { "epoch": 0.7873429297019177, "grad_norm": 0.43788716197013855, "learning_rate": 3.419011855812893e-06, "loss": 0.0575, "step": 35731 }, { "epoch": 0.7873649649914338, "grad_norm": 0.5845268368721008, "learning_rate": 3.4183315146668916e-06, "loss": 0.0501, "step": 35732 }, { "epoch": 0.78738700028095, "grad_norm": 0.6021497845649719, "learning_rate": 3.4176512325125363e-06, "loss": 0.0656, "step": 35733 }, { "epoch": 0.7874090355704662, "grad_norm": 0.5071727633476257, "learning_rate": 3.4169710093532985e-06, "loss": 0.0549, "step": 35734 }, { "epoch": 0.7874310708599822, "grad_norm": 0.4330185055732727, "learning_rate": 3.416290845192637e-06, "loss": 0.0582, "step": 35735 }, { "epoch": 0.7874531061494984, "grad_norm": 0.5256138443946838, "learning_rate": 3.4156107400340147e-06, "loss": 0.0805, "step": 35736 }, { "epoch": 0.7874751414390145, "grad_norm": 0.30025985836982727, "learning_rate": 3.4149306938809e-06, "loss": 0.0539, "step": 35737 }, { "epoch": 0.7874971767285307, "grad_norm": 1.076442003250122, "learning_rate": 3.4142507067367613e-06, "loss": 0.0658, "step": 35738 }, { "epoch": 0.7875192120180469, "grad_norm": 0.8602761030197144, "learning_rate": 3.4135707786050508e-06, "loss": 0.0351, "step": 35739 }, { "epoch": 0.787541247307563, "grad_norm": 0.48638686537742615, "learning_rate": 3.4128909094892373e-06, "loss": 0.0622, "step": 35740 }, { "epoch": 0.7875632825970792, "grad_norm": 0.6488733291625977, "learning_rate": 3.4122110993927885e-06, "loss": 0.0495, "step": 35741 }, { "epoch": 0.7875853178865954, "grad_norm": 0.6178443431854248, "learning_rate": 3.411531348319158e-06, "loss": 0.0654, "step": 35742 }, { "epoch": 0.7876073531761115, "grad_norm": 0.5008037090301514, "learning_rate": 3.4108516562718127e-06, "loss": 0.063, "step": 35743 }, { "epoch": 0.7876293884656277, "grad_norm": 0.8276068568229675, "learning_rate": 3.4101720232542156e-06, "loss": 0.0768, "step": 35744 }, { "epoch": 0.7876514237551439, "grad_norm": 0.5635392069816589, "learning_rate": 3.4094924492698314e-06, "loss": 0.0448, "step": 35745 }, { "epoch": 0.78767345904466, "grad_norm": 0.387032687664032, "learning_rate": 3.4088129343221114e-06, "loss": 0.0572, "step": 35746 }, { "epoch": 0.7876954943341762, "grad_norm": 0.6752632856369019, "learning_rate": 3.408133478414524e-06, "loss": 0.067, "step": 35747 }, { "epoch": 0.7877175296236923, "grad_norm": 0.5387812852859497, "learning_rate": 3.4074540815505335e-06, "loss": 0.0708, "step": 35748 }, { "epoch": 0.7877395649132085, "grad_norm": 0.5289754271507263, "learning_rate": 3.4067747437335916e-06, "loss": 0.0859, "step": 35749 }, { "epoch": 0.7877616002027247, "grad_norm": 0.6066028475761414, "learning_rate": 3.406095464967166e-06, "loss": 0.0522, "step": 35750 }, { "epoch": 0.7877836354922408, "grad_norm": 0.6025838255882263, "learning_rate": 3.4054162452547053e-06, "loss": 0.0594, "step": 35751 }, { "epoch": 0.787805670781757, "grad_norm": 0.6659762859344482, "learning_rate": 3.4047370845996854e-06, "loss": 0.063, "step": 35752 }, { "epoch": 0.7878277060712732, "grad_norm": 0.6515267491340637, "learning_rate": 3.4040579830055535e-06, "loss": 0.0536, "step": 35753 }, { "epoch": 0.7878497413607893, "grad_norm": 0.6335930228233337, "learning_rate": 3.4033789404757764e-06, "loss": 0.0684, "step": 35754 }, { "epoch": 0.7878717766503055, "grad_norm": 0.35935062170028687, "learning_rate": 3.402699957013805e-06, "loss": 0.0659, "step": 35755 }, { "epoch": 0.7878938119398217, "grad_norm": 0.6820040941238403, "learning_rate": 3.4020210326231014e-06, "loss": 0.0666, "step": 35756 }, { "epoch": 0.7879158472293378, "grad_norm": 0.5097648501396179, "learning_rate": 3.40134216730713e-06, "loss": 0.0541, "step": 35757 }, { "epoch": 0.787937882518854, "grad_norm": 0.7392130494117737, "learning_rate": 3.4006633610693354e-06, "loss": 0.0976, "step": 35758 }, { "epoch": 0.7879599178083702, "grad_norm": 1.0339434146881104, "learning_rate": 3.399984613913184e-06, "loss": 0.0703, "step": 35759 }, { "epoch": 0.7879819530978862, "grad_norm": 0.9549269080162048, "learning_rate": 3.399305925842133e-06, "loss": 0.0459, "step": 35760 }, { "epoch": 0.7880039883874024, "grad_norm": 0.808097243309021, "learning_rate": 3.3986272968596388e-06, "loss": 0.0574, "step": 35761 }, { "epoch": 0.7880260236769185, "grad_norm": 0.7347521781921387, "learning_rate": 3.3979487269691544e-06, "loss": 0.0788, "step": 35762 }, { "epoch": 0.7880480589664347, "grad_norm": 0.5147820711135864, "learning_rate": 3.3972702161741386e-06, "loss": 0.0537, "step": 35763 }, { "epoch": 0.7880700942559509, "grad_norm": 0.6372618079185486, "learning_rate": 3.396591764478052e-06, "loss": 0.0518, "step": 35764 }, { "epoch": 0.788092129545467, "grad_norm": 0.4073377549648285, "learning_rate": 3.39591337188434e-06, "loss": 0.0547, "step": 35765 }, { "epoch": 0.7881141648349832, "grad_norm": 0.4981197416782379, "learning_rate": 3.3952350383964647e-06, "loss": 0.0354, "step": 35766 }, { "epoch": 0.7881362001244994, "grad_norm": 0.7488607168197632, "learning_rate": 3.3945567640178804e-06, "loss": 0.0582, "step": 35767 }, { "epoch": 0.7881582354140155, "grad_norm": 0.7319781184196472, "learning_rate": 3.3938785487520474e-06, "loss": 0.0608, "step": 35768 }, { "epoch": 0.7881802707035317, "grad_norm": 0.5617995262145996, "learning_rate": 3.393200392602408e-06, "loss": 0.056, "step": 35769 }, { "epoch": 0.7882023059930479, "grad_norm": 0.37427762150764465, "learning_rate": 3.3925222955724242e-06, "loss": 0.0649, "step": 35770 }, { "epoch": 0.788224341282564, "grad_norm": 0.7688589692115784, "learning_rate": 3.391844257665554e-06, "loss": 0.0573, "step": 35771 }, { "epoch": 0.7882463765720802, "grad_norm": 0.4860253930091858, "learning_rate": 3.3911662788852406e-06, "loss": 0.0394, "step": 35772 }, { "epoch": 0.7882684118615964, "grad_norm": 0.8624710440635681, "learning_rate": 3.390488359234946e-06, "loss": 0.0655, "step": 35773 }, { "epoch": 0.7882904471511125, "grad_norm": 0.4779282808303833, "learning_rate": 3.3898104987181115e-06, "loss": 0.0644, "step": 35774 }, { "epoch": 0.7883124824406287, "grad_norm": 0.5887000560760498, "learning_rate": 3.3891326973382073e-06, "loss": 0.066, "step": 35775 }, { "epoch": 0.7883345177301448, "grad_norm": 0.7298586964607239, "learning_rate": 3.388454955098671e-06, "loss": 0.071, "step": 35776 }, { "epoch": 0.788356553019661, "grad_norm": 0.7420781254768372, "learning_rate": 3.387777272002966e-06, "loss": 0.0646, "step": 35777 }, { "epoch": 0.7883785883091772, "grad_norm": 0.5310612320899963, "learning_rate": 3.387099648054533e-06, "loss": 0.0526, "step": 35778 }, { "epoch": 0.7884006235986933, "grad_norm": 0.5879364013671875, "learning_rate": 3.3864220832568293e-06, "loss": 0.0793, "step": 35779 }, { "epoch": 0.7884226588882095, "grad_norm": 0.5445199608802795, "learning_rate": 3.3857445776133106e-06, "loss": 0.0593, "step": 35780 }, { "epoch": 0.7884446941777257, "grad_norm": 1.0241103172302246, "learning_rate": 3.3850671311274174e-06, "loss": 0.0954, "step": 35781 }, { "epoch": 0.7884667294672418, "grad_norm": 0.5467790961265564, "learning_rate": 3.384389743802605e-06, "loss": 0.0349, "step": 35782 }, { "epoch": 0.788488764756758, "grad_norm": 0.5644199252128601, "learning_rate": 3.383712415642326e-06, "loss": 0.0421, "step": 35783 }, { "epoch": 0.7885108000462742, "grad_norm": 0.7580605745315552, "learning_rate": 3.3830351466500324e-06, "loss": 0.0702, "step": 35784 }, { "epoch": 0.7885328353357902, "grad_norm": 0.5703534483909607, "learning_rate": 3.382357936829167e-06, "loss": 0.0587, "step": 35785 }, { "epoch": 0.7885548706253064, "grad_norm": 0.808440089225769, "learning_rate": 3.381680786183181e-06, "loss": 0.0636, "step": 35786 }, { "epoch": 0.7885769059148225, "grad_norm": 0.8454741835594177, "learning_rate": 3.381003694715531e-06, "loss": 0.0812, "step": 35787 }, { "epoch": 0.7885989412043387, "grad_norm": 1.029356598854065, "learning_rate": 3.3803266624296537e-06, "loss": 0.0544, "step": 35788 }, { "epoch": 0.7886209764938549, "grad_norm": 0.2272825688123703, "learning_rate": 3.3796496893290045e-06, "loss": 0.0211, "step": 35789 }, { "epoch": 0.788643011783371, "grad_norm": 0.5292690992355347, "learning_rate": 3.3789727754170314e-06, "loss": 0.0524, "step": 35790 }, { "epoch": 0.7886650470728872, "grad_norm": 0.7835037112236023, "learning_rate": 3.3782959206971843e-06, "loss": 0.0536, "step": 35791 }, { "epoch": 0.7886870823624034, "grad_norm": 0.3083905875682831, "learning_rate": 3.3776191251729056e-06, "loss": 0.0538, "step": 35792 }, { "epoch": 0.7887091176519195, "grad_norm": 0.6364853978157043, "learning_rate": 3.3769423888476443e-06, "loss": 0.0522, "step": 35793 }, { "epoch": 0.7887311529414357, "grad_norm": 0.3406775891780853, "learning_rate": 3.3762657117248476e-06, "loss": 0.0382, "step": 35794 }, { "epoch": 0.7887531882309519, "grad_norm": 0.32003071904182434, "learning_rate": 3.3755890938079674e-06, "loss": 0.0396, "step": 35795 }, { "epoch": 0.788775223520468, "grad_norm": 0.6459438800811768, "learning_rate": 3.374912535100445e-06, "loss": 0.0567, "step": 35796 }, { "epoch": 0.7887972588099842, "grad_norm": 0.6578607559204102, "learning_rate": 3.3742360356057207e-06, "loss": 0.085, "step": 35797 }, { "epoch": 0.7888192940995004, "grad_norm": 0.45733216404914856, "learning_rate": 3.373559595327253e-06, "loss": 0.0696, "step": 35798 }, { "epoch": 0.7888413293890165, "grad_norm": 0.8140358924865723, "learning_rate": 3.3728832142684762e-06, "loss": 0.0691, "step": 35799 }, { "epoch": 0.7888633646785327, "grad_norm": 0.5447848439216614, "learning_rate": 3.372206892432846e-06, "loss": 0.0448, "step": 35800 }, { "epoch": 0.7888853999680489, "grad_norm": 0.5047286152839661, "learning_rate": 3.3715306298237923e-06, "loss": 0.0615, "step": 35801 }, { "epoch": 0.788907435257565, "grad_norm": 0.4821365475654602, "learning_rate": 3.3708544264447764e-06, "loss": 0.0598, "step": 35802 }, { "epoch": 0.7889294705470812, "grad_norm": 0.5049766302108765, "learning_rate": 3.3701782822992323e-06, "loss": 0.0566, "step": 35803 }, { "epoch": 0.7889515058365973, "grad_norm": 0.476248174905777, "learning_rate": 3.36950219739061e-06, "loss": 0.061, "step": 35804 }, { "epoch": 0.7889735411261135, "grad_norm": 0.980658233165741, "learning_rate": 3.368826171722345e-06, "loss": 0.0871, "step": 35805 }, { "epoch": 0.7889955764156297, "grad_norm": 0.49844807386398315, "learning_rate": 3.3681502052978853e-06, "loss": 0.0522, "step": 35806 }, { "epoch": 0.7890176117051458, "grad_norm": 0.6184695363044739, "learning_rate": 3.367474298120679e-06, "loss": 0.0403, "step": 35807 }, { "epoch": 0.789039646994662, "grad_norm": 0.545344889163971, "learning_rate": 3.3667984501941605e-06, "loss": 0.0574, "step": 35808 }, { "epoch": 0.7890616822841781, "grad_norm": 0.4814680218696594, "learning_rate": 3.3661226615217738e-06, "loss": 0.0436, "step": 35809 }, { "epoch": 0.7890837175736942, "grad_norm": 0.6560495495796204, "learning_rate": 3.365446932106964e-06, "loss": 0.0683, "step": 35810 }, { "epoch": 0.7891057528632104, "grad_norm": 0.44014686346054077, "learning_rate": 3.3647712619531774e-06, "loss": 0.0266, "step": 35811 }, { "epoch": 0.7891277881527266, "grad_norm": 0.3298041522502899, "learning_rate": 3.364095651063844e-06, "loss": 0.0598, "step": 35812 }, { "epoch": 0.7891498234422427, "grad_norm": 0.429370254278183, "learning_rate": 3.3634200994424125e-06, "loss": 0.0534, "step": 35813 }, { "epoch": 0.7891718587317589, "grad_norm": 0.4506315588951111, "learning_rate": 3.3627446070923255e-06, "loss": 0.042, "step": 35814 }, { "epoch": 0.789193894021275, "grad_norm": 0.46050864458084106, "learning_rate": 3.3620691740170175e-06, "loss": 0.0387, "step": 35815 }, { "epoch": 0.7892159293107912, "grad_norm": 0.7503033876419067, "learning_rate": 3.3613938002199296e-06, "loss": 0.0693, "step": 35816 }, { "epoch": 0.7892379646003074, "grad_norm": 0.7056717276573181, "learning_rate": 3.3607184857045064e-06, "loss": 0.0612, "step": 35817 }, { "epoch": 0.7892599998898235, "grad_norm": 0.6281906366348267, "learning_rate": 3.36004323047419e-06, "loss": 0.0646, "step": 35818 }, { "epoch": 0.7892820351793397, "grad_norm": 0.8770200610160828, "learning_rate": 3.3593680345324096e-06, "loss": 0.0585, "step": 35819 }, { "epoch": 0.7893040704688559, "grad_norm": 0.7877386808395386, "learning_rate": 3.3586928978826105e-06, "loss": 0.0804, "step": 35820 }, { "epoch": 0.789326105758372, "grad_norm": 0.7278719544410706, "learning_rate": 3.358017820528235e-06, "loss": 0.0419, "step": 35821 }, { "epoch": 0.7893481410478882, "grad_norm": 0.5755162835121155, "learning_rate": 3.3573428024727148e-06, "loss": 0.0611, "step": 35822 }, { "epoch": 0.7893701763374044, "grad_norm": 0.7117416262626648, "learning_rate": 3.356667843719493e-06, "loss": 0.0595, "step": 35823 }, { "epoch": 0.7893922116269205, "grad_norm": 0.37580737471580505, "learning_rate": 3.3559929442719983e-06, "loss": 0.0784, "step": 35824 }, { "epoch": 0.7894142469164367, "grad_norm": 0.6582019329071045, "learning_rate": 3.3553181041336846e-06, "loss": 0.0666, "step": 35825 }, { "epoch": 0.7894362822059529, "grad_norm": 0.5909500122070312, "learning_rate": 3.3546433233079754e-06, "loss": 0.0463, "step": 35826 }, { "epoch": 0.789458317495469, "grad_norm": 0.5700123906135559, "learning_rate": 3.3539686017983166e-06, "loss": 0.0627, "step": 35827 }, { "epoch": 0.7894803527849852, "grad_norm": 0.4760845899581909, "learning_rate": 3.3532939396081356e-06, "loss": 0.0489, "step": 35828 }, { "epoch": 0.7895023880745013, "grad_norm": 0.6317929625511169, "learning_rate": 3.3526193367408766e-06, "loss": 0.0555, "step": 35829 }, { "epoch": 0.7895244233640175, "grad_norm": 0.6316907405853271, "learning_rate": 3.351944793199974e-06, "loss": 0.0628, "step": 35830 }, { "epoch": 0.7895464586535337, "grad_norm": 0.693193793296814, "learning_rate": 3.3512703089888613e-06, "loss": 0.0736, "step": 35831 }, { "epoch": 0.7895684939430498, "grad_norm": 0.3864801824092865, "learning_rate": 3.3505958841109753e-06, "loss": 0.0593, "step": 35832 }, { "epoch": 0.789590529232566, "grad_norm": 0.8003224730491638, "learning_rate": 3.3499215185697497e-06, "loss": 0.073, "step": 35833 }, { "epoch": 0.7896125645220821, "grad_norm": 0.4322431683540344, "learning_rate": 3.349247212368627e-06, "loss": 0.0756, "step": 35834 }, { "epoch": 0.7896345998115982, "grad_norm": 0.4281044900417328, "learning_rate": 3.348572965511029e-06, "loss": 0.0451, "step": 35835 }, { "epoch": 0.7896566351011144, "grad_norm": 0.8082100749015808, "learning_rate": 3.3478987780003987e-06, "loss": 0.0906, "step": 35836 }, { "epoch": 0.7896786703906306, "grad_norm": 0.3423917889595032, "learning_rate": 3.347224649840173e-06, "loss": 0.039, "step": 35837 }, { "epoch": 0.7897007056801467, "grad_norm": 0.524642825126648, "learning_rate": 3.346550581033776e-06, "loss": 0.0588, "step": 35838 }, { "epoch": 0.7897227409696629, "grad_norm": 0.6120892763137817, "learning_rate": 3.345876571584646e-06, "loss": 0.0657, "step": 35839 }, { "epoch": 0.789744776259179, "grad_norm": 0.43899187445640564, "learning_rate": 3.3452026214962163e-06, "loss": 0.0458, "step": 35840 }, { "epoch": 0.7897668115486952, "grad_norm": 0.5837418437004089, "learning_rate": 3.344528730771923e-06, "loss": 0.0445, "step": 35841 }, { "epoch": 0.7897888468382114, "grad_norm": 0.8625130653381348, "learning_rate": 3.3438548994151925e-06, "loss": 0.0448, "step": 35842 }, { "epoch": 0.7898108821277275, "grad_norm": 0.6738163828849792, "learning_rate": 3.3431811274294583e-06, "loss": 0.0876, "step": 35843 }, { "epoch": 0.7898329174172437, "grad_norm": 0.4680332839488983, "learning_rate": 3.3425074148181578e-06, "loss": 0.0624, "step": 35844 }, { "epoch": 0.7898549527067599, "grad_norm": 0.3264746367931366, "learning_rate": 3.3418337615847135e-06, "loss": 0.0648, "step": 35845 }, { "epoch": 0.789876987996276, "grad_norm": 0.5228690505027771, "learning_rate": 3.341160167732567e-06, "loss": 0.0576, "step": 35846 }, { "epoch": 0.7898990232857922, "grad_norm": 0.4740718603134155, "learning_rate": 3.3404866332651346e-06, "loss": 0.0733, "step": 35847 }, { "epoch": 0.7899210585753084, "grad_norm": 0.7218062877655029, "learning_rate": 3.339813158185863e-06, "loss": 0.0692, "step": 35848 }, { "epoch": 0.7899430938648245, "grad_norm": 0.8332529067993164, "learning_rate": 3.3391397424981734e-06, "loss": 0.0791, "step": 35849 }, { "epoch": 0.7899651291543407, "grad_norm": 1.146752119064331, "learning_rate": 3.3384663862055026e-06, "loss": 0.0903, "step": 35850 }, { "epoch": 0.7899871644438569, "grad_norm": 0.6890823245048523, "learning_rate": 3.3377930893112696e-06, "loss": 0.0756, "step": 35851 }, { "epoch": 0.790009199733373, "grad_norm": 0.5823465585708618, "learning_rate": 3.3371198518189118e-06, "loss": 0.0785, "step": 35852 }, { "epoch": 0.7900312350228892, "grad_norm": 0.4632306694984436, "learning_rate": 3.336446673731861e-06, "loss": 0.0425, "step": 35853 }, { "epoch": 0.7900532703124054, "grad_norm": 0.37434038519859314, "learning_rate": 3.335773555053537e-06, "loss": 0.0386, "step": 35854 }, { "epoch": 0.7900753056019215, "grad_norm": 0.4784969091415405, "learning_rate": 3.335100495787374e-06, "loss": 0.0678, "step": 35855 }, { "epoch": 0.7900973408914377, "grad_norm": 0.5260076522827148, "learning_rate": 3.334427495936798e-06, "loss": 0.0897, "step": 35856 }, { "epoch": 0.7901193761809538, "grad_norm": 0.6555970907211304, "learning_rate": 3.333754555505244e-06, "loss": 0.05, "step": 35857 }, { "epoch": 0.79014141147047, "grad_norm": 0.49733978509902954, "learning_rate": 3.333081674496129e-06, "loss": 0.0492, "step": 35858 }, { "epoch": 0.7901634467599861, "grad_norm": 0.6239549517631531, "learning_rate": 3.332408852912885e-06, "loss": 0.0505, "step": 35859 }, { "epoch": 0.7901854820495022, "grad_norm": 0.2438991367816925, "learning_rate": 3.331736090758944e-06, "loss": 0.0381, "step": 35860 }, { "epoch": 0.7902075173390184, "grad_norm": 0.6216452121734619, "learning_rate": 3.3310633880377234e-06, "loss": 0.0616, "step": 35861 }, { "epoch": 0.7902295526285346, "grad_norm": 0.15875668823719025, "learning_rate": 3.3303907447526555e-06, "loss": 0.0368, "step": 35862 }, { "epoch": 0.7902515879180507, "grad_norm": 0.5262035131454468, "learning_rate": 3.329718160907164e-06, "loss": 0.0657, "step": 35863 }, { "epoch": 0.7902736232075669, "grad_norm": 0.7473517656326294, "learning_rate": 3.329045636504682e-06, "loss": 0.0729, "step": 35864 }, { "epoch": 0.790295658497083, "grad_norm": 0.5902277231216431, "learning_rate": 3.3283731715486236e-06, "loss": 0.0577, "step": 35865 }, { "epoch": 0.7903176937865992, "grad_norm": 0.6216261982917786, "learning_rate": 3.327700766042419e-06, "loss": 0.0495, "step": 35866 }, { "epoch": 0.7903397290761154, "grad_norm": 0.4612261950969696, "learning_rate": 3.3270284199894983e-06, "loss": 0.0714, "step": 35867 }, { "epoch": 0.7903617643656315, "grad_norm": 0.5187646746635437, "learning_rate": 3.3263561333932774e-06, "loss": 0.0515, "step": 35868 }, { "epoch": 0.7903837996551477, "grad_norm": 0.8711630702018738, "learning_rate": 3.325683906257188e-06, "loss": 0.0686, "step": 35869 }, { "epoch": 0.7904058349446639, "grad_norm": 0.36856019496917725, "learning_rate": 3.325011738584643e-06, "loss": 0.0855, "step": 35870 }, { "epoch": 0.79042787023418, "grad_norm": 0.6211567521095276, "learning_rate": 3.3243396303790825e-06, "loss": 0.0867, "step": 35871 }, { "epoch": 0.7904499055236962, "grad_norm": 0.578029990196228, "learning_rate": 3.323667581643917e-06, "loss": 0.0629, "step": 35872 }, { "epoch": 0.7904719408132124, "grad_norm": 0.7091163992881775, "learning_rate": 3.322995592382578e-06, "loss": 0.0763, "step": 35873 }, { "epoch": 0.7904939761027285, "grad_norm": 0.8131670355796814, "learning_rate": 3.32232366259848e-06, "loss": 0.0854, "step": 35874 }, { "epoch": 0.7905160113922447, "grad_norm": 0.3970817029476166, "learning_rate": 3.321651792295049e-06, "loss": 0.067, "step": 35875 }, { "epoch": 0.7905380466817609, "grad_norm": 0.6777607202529907, "learning_rate": 3.3209799814757125e-06, "loss": 0.0653, "step": 35876 }, { "epoch": 0.790560081971277, "grad_norm": 0.35715460777282715, "learning_rate": 3.3203082301438793e-06, "loss": 0.0572, "step": 35877 }, { "epoch": 0.7905821172607932, "grad_norm": 0.5584978461265564, "learning_rate": 3.3196365383029864e-06, "loss": 0.0754, "step": 35878 }, { "epoch": 0.7906041525503094, "grad_norm": 0.5853695869445801, "learning_rate": 3.3189649059564463e-06, "loss": 0.0539, "step": 35879 }, { "epoch": 0.7906261878398255, "grad_norm": 0.5017602443695068, "learning_rate": 3.318293333107685e-06, "loss": 0.085, "step": 35880 }, { "epoch": 0.7906482231293417, "grad_norm": 0.28427964448928833, "learning_rate": 3.3176218197601154e-06, "loss": 0.0267, "step": 35881 }, { "epoch": 0.7906702584188579, "grad_norm": 0.5665662884712219, "learning_rate": 3.316950365917162e-06, "loss": 0.0587, "step": 35882 }, { "epoch": 0.7906922937083739, "grad_norm": 0.4711037576198578, "learning_rate": 3.3162789715822513e-06, "loss": 0.0842, "step": 35883 }, { "epoch": 0.7907143289978901, "grad_norm": 0.16981828212738037, "learning_rate": 3.3156076367587923e-06, "loss": 0.0358, "step": 35884 }, { "epoch": 0.7907363642874062, "grad_norm": 0.427888423204422, "learning_rate": 3.314936361450208e-06, "loss": 0.0497, "step": 35885 }, { "epoch": 0.7907583995769224, "grad_norm": 0.5279731750488281, "learning_rate": 3.314265145659921e-06, "loss": 0.0623, "step": 35886 }, { "epoch": 0.7907804348664386, "grad_norm": 0.5181527733802795, "learning_rate": 3.3135939893913513e-06, "loss": 0.0816, "step": 35887 }, { "epoch": 0.7908024701559547, "grad_norm": 0.7511787414550781, "learning_rate": 3.31292289264791e-06, "loss": 0.0667, "step": 35888 }, { "epoch": 0.7908245054454709, "grad_norm": 0.4806199073791504, "learning_rate": 3.312251855433019e-06, "loss": 0.0604, "step": 35889 }, { "epoch": 0.7908465407349871, "grad_norm": 0.8650275468826294, "learning_rate": 3.3115808777501004e-06, "loss": 0.0797, "step": 35890 }, { "epoch": 0.7908685760245032, "grad_norm": 0.5461841225624084, "learning_rate": 3.3109099596025654e-06, "loss": 0.0782, "step": 35891 }, { "epoch": 0.7908906113140194, "grad_norm": 0.7094391584396362, "learning_rate": 3.3102391009938378e-06, "loss": 0.0687, "step": 35892 }, { "epoch": 0.7909126466035356, "grad_norm": 0.5372057557106018, "learning_rate": 3.3095683019273225e-06, "loss": 0.1072, "step": 35893 }, { "epoch": 0.7909346818930517, "grad_norm": 0.641493022441864, "learning_rate": 3.3088975624064526e-06, "loss": 0.0517, "step": 35894 }, { "epoch": 0.7909567171825679, "grad_norm": 0.6745865345001221, "learning_rate": 3.3082268824346336e-06, "loss": 0.0954, "step": 35895 }, { "epoch": 0.790978752472084, "grad_norm": 0.5222904682159424, "learning_rate": 3.307556262015287e-06, "loss": 0.0728, "step": 35896 }, { "epoch": 0.7910007877616002, "grad_norm": 0.44282829761505127, "learning_rate": 3.3068857011518245e-06, "loss": 0.0672, "step": 35897 }, { "epoch": 0.7910228230511164, "grad_norm": 0.7960716485977173, "learning_rate": 3.3062151998476627e-06, "loss": 0.0608, "step": 35898 }, { "epoch": 0.7910448583406325, "grad_norm": 0.27580609917640686, "learning_rate": 3.305544758106222e-06, "loss": 0.0271, "step": 35899 }, { "epoch": 0.7910668936301487, "grad_norm": 0.515770673751831, "learning_rate": 3.3048743759309034e-06, "loss": 0.0653, "step": 35900 }, { "epoch": 0.7910889289196649, "grad_norm": 0.5363337993621826, "learning_rate": 3.3042040533251394e-06, "loss": 0.051, "step": 35901 }, { "epoch": 0.791110964209181, "grad_norm": 0.45401060581207275, "learning_rate": 3.3035337902923333e-06, "loss": 0.0507, "step": 35902 }, { "epoch": 0.7911329994986972, "grad_norm": 0.642699122428894, "learning_rate": 3.3028635868359046e-06, "loss": 0.0506, "step": 35903 }, { "epoch": 0.7911550347882134, "grad_norm": 0.3745756447315216, "learning_rate": 3.3021934429592572e-06, "loss": 0.0633, "step": 35904 }, { "epoch": 0.7911770700777295, "grad_norm": 0.7444550395011902, "learning_rate": 3.3015233586658178e-06, "loss": 0.067, "step": 35905 }, { "epoch": 0.7911991053672457, "grad_norm": 0.4713732898235321, "learning_rate": 3.300853333958991e-06, "loss": 0.0479, "step": 35906 }, { "epoch": 0.7912211406567619, "grad_norm": 0.6412215232849121, "learning_rate": 3.300183368842195e-06, "loss": 0.0555, "step": 35907 }, { "epoch": 0.7912431759462779, "grad_norm": 0.46895888447761536, "learning_rate": 3.2995134633188344e-06, "loss": 0.0683, "step": 35908 }, { "epoch": 0.7912652112357941, "grad_norm": 0.3253234922885895, "learning_rate": 3.2988436173923264e-06, "loss": 0.0378, "step": 35909 }, { "epoch": 0.7912872465253102, "grad_norm": 0.8876161575317383, "learning_rate": 3.298173831066088e-06, "loss": 0.0749, "step": 35910 }, { "epoch": 0.7913092818148264, "grad_norm": 0.59834223985672, "learning_rate": 3.29750410434352e-06, "loss": 0.0461, "step": 35911 }, { "epoch": 0.7913313171043426, "grad_norm": 0.5563145875930786, "learning_rate": 3.296834437228038e-06, "loss": 0.0529, "step": 35912 }, { "epoch": 0.7913533523938587, "grad_norm": 0.4578017294406891, "learning_rate": 3.296164829723056e-06, "loss": 0.0866, "step": 35913 }, { "epoch": 0.7913753876833749, "grad_norm": 0.3370389938354492, "learning_rate": 3.295495281831984e-06, "loss": 0.0616, "step": 35914 }, { "epoch": 0.7913974229728911, "grad_norm": 0.5556408166885376, "learning_rate": 3.2948257935582282e-06, "loss": 0.031, "step": 35915 }, { "epoch": 0.7914194582624072, "grad_norm": 0.41984838247299194, "learning_rate": 3.2941563649052023e-06, "loss": 0.0491, "step": 35916 }, { "epoch": 0.7914414935519234, "grad_norm": 0.6411604285240173, "learning_rate": 3.2934869958763194e-06, "loss": 0.0684, "step": 35917 }, { "epoch": 0.7914635288414396, "grad_norm": 0.6403608918190002, "learning_rate": 3.292817686474979e-06, "loss": 0.0764, "step": 35918 }, { "epoch": 0.7914855641309557, "grad_norm": 0.37715694308280945, "learning_rate": 3.2921484367046015e-06, "loss": 0.042, "step": 35919 }, { "epoch": 0.7915075994204719, "grad_norm": 0.47371843457221985, "learning_rate": 3.2914792465685824e-06, "loss": 0.0582, "step": 35920 }, { "epoch": 0.791529634709988, "grad_norm": 0.562847912311554, "learning_rate": 3.290810116070344e-06, "loss": 0.0494, "step": 35921 }, { "epoch": 0.7915516699995042, "grad_norm": 0.5285543203353882, "learning_rate": 3.2901410452132865e-06, "loss": 0.066, "step": 35922 }, { "epoch": 0.7915737052890204, "grad_norm": 0.2603890895843506, "learning_rate": 3.2894720340008195e-06, "loss": 0.0696, "step": 35923 }, { "epoch": 0.7915957405785365, "grad_norm": 0.3706481158733368, "learning_rate": 3.288803082436356e-06, "loss": 0.0569, "step": 35924 }, { "epoch": 0.7916177758680527, "grad_norm": 0.836514949798584, "learning_rate": 3.288134190523294e-06, "loss": 0.0877, "step": 35925 }, { "epoch": 0.7916398111575689, "grad_norm": 0.8511136770248413, "learning_rate": 3.2874653582650487e-06, "loss": 0.0636, "step": 35926 }, { "epoch": 0.791661846447085, "grad_norm": 0.4598881006240845, "learning_rate": 3.286796585665015e-06, "loss": 0.0632, "step": 35927 }, { "epoch": 0.7916838817366012, "grad_norm": 0.5598024129867554, "learning_rate": 3.2861278727266154e-06, "loss": 0.0605, "step": 35928 }, { "epoch": 0.7917059170261174, "grad_norm": 0.7275775074958801, "learning_rate": 3.2854592194532436e-06, "loss": 0.0636, "step": 35929 }, { "epoch": 0.7917279523156335, "grad_norm": 0.30211904644966125, "learning_rate": 3.284790625848315e-06, "loss": 0.0642, "step": 35930 }, { "epoch": 0.7917499876051497, "grad_norm": 0.48189011216163635, "learning_rate": 3.2841220919152244e-06, "loss": 0.0384, "step": 35931 }, { "epoch": 0.7917720228946659, "grad_norm": 0.6074008345603943, "learning_rate": 3.2834536176573833e-06, "loss": 0.0789, "step": 35932 }, { "epoch": 0.7917940581841819, "grad_norm": 0.6436424851417542, "learning_rate": 3.2827852030782006e-06, "loss": 0.0476, "step": 35933 }, { "epoch": 0.7918160934736981, "grad_norm": 0.37787574529647827, "learning_rate": 3.282116848181071e-06, "loss": 0.0345, "step": 35934 }, { "epoch": 0.7918381287632142, "grad_norm": 0.7661222219467163, "learning_rate": 3.281448552969404e-06, "loss": 0.0774, "step": 35935 }, { "epoch": 0.7918601640527304, "grad_norm": 0.7493696808815002, "learning_rate": 3.2807803174466033e-06, "loss": 0.0543, "step": 35936 }, { "epoch": 0.7918821993422466, "grad_norm": 0.6810638904571533, "learning_rate": 3.2801121416160784e-06, "loss": 0.0577, "step": 35937 }, { "epoch": 0.7919042346317627, "grad_norm": 0.7279093861579895, "learning_rate": 3.279444025481222e-06, "loss": 0.0472, "step": 35938 }, { "epoch": 0.7919262699212789, "grad_norm": 0.6441024541854858, "learning_rate": 3.278775969045441e-06, "loss": 0.0738, "step": 35939 }, { "epoch": 0.7919483052107951, "grad_norm": 0.3071189820766449, "learning_rate": 3.278107972312145e-06, "loss": 0.0401, "step": 35940 }, { "epoch": 0.7919703405003112, "grad_norm": 0.41080886125564575, "learning_rate": 3.2774400352847274e-06, "loss": 0.0569, "step": 35941 }, { "epoch": 0.7919923757898274, "grad_norm": 0.7512878179550171, "learning_rate": 3.276772157966597e-06, "loss": 0.0788, "step": 35942 }, { "epoch": 0.7920144110793436, "grad_norm": 0.46667423844337463, "learning_rate": 3.2761043403611434e-06, "loss": 0.0669, "step": 35943 }, { "epoch": 0.7920364463688597, "grad_norm": 0.5123037695884705, "learning_rate": 3.2754365824717867e-06, "loss": 0.0451, "step": 35944 }, { "epoch": 0.7920584816583759, "grad_norm": 0.8785251975059509, "learning_rate": 3.2747688843019145e-06, "loss": 0.0527, "step": 35945 }, { "epoch": 0.792080516947892, "grad_norm": 0.6991062164306641, "learning_rate": 3.2741012458549323e-06, "loss": 0.074, "step": 35946 }, { "epoch": 0.7921025522374082, "grad_norm": 0.45878225564956665, "learning_rate": 3.2734336671342443e-06, "loss": 0.0397, "step": 35947 }, { "epoch": 0.7921245875269244, "grad_norm": 0.41783463954925537, "learning_rate": 3.272766148143242e-06, "loss": 0.0586, "step": 35948 }, { "epoch": 0.7921466228164405, "grad_norm": 0.6345703601837158, "learning_rate": 3.272098688885335e-06, "loss": 0.0728, "step": 35949 }, { "epoch": 0.7921686581059567, "grad_norm": 0.520684540271759, "learning_rate": 3.2714312893639087e-06, "loss": 0.0547, "step": 35950 }, { "epoch": 0.7921906933954729, "grad_norm": 0.885235071182251, "learning_rate": 3.2707639495823822e-06, "loss": 0.0733, "step": 35951 }, { "epoch": 0.792212728684989, "grad_norm": 0.6775590777397156, "learning_rate": 3.270096669544141e-06, "loss": 0.088, "step": 35952 }, { "epoch": 0.7922347639745052, "grad_norm": 0.6225832104682922, "learning_rate": 3.2694294492525907e-06, "loss": 0.0734, "step": 35953 }, { "epoch": 0.7922567992640214, "grad_norm": 0.6004058122634888, "learning_rate": 3.268762288711122e-06, "loss": 0.0578, "step": 35954 }, { "epoch": 0.7922788345535375, "grad_norm": 1.0033248662948608, "learning_rate": 3.2680951879231385e-06, "loss": 0.1002, "step": 35955 }, { "epoch": 0.7923008698430537, "grad_norm": 0.7741430401802063, "learning_rate": 3.2674281468920414e-06, "loss": 0.0611, "step": 35956 }, { "epoch": 0.7923229051325699, "grad_norm": 0.7817848324775696, "learning_rate": 3.2667611656212193e-06, "loss": 0.0653, "step": 35957 }, { "epoch": 0.7923449404220859, "grad_norm": 0.9144595265388489, "learning_rate": 3.2660942441140744e-06, "loss": 0.0677, "step": 35958 }, { "epoch": 0.7923669757116021, "grad_norm": 0.680171012878418, "learning_rate": 3.2654273823740046e-06, "loss": 0.0551, "step": 35959 }, { "epoch": 0.7923890110011182, "grad_norm": 0.5922632813453674, "learning_rate": 3.2647605804044093e-06, "loss": 0.0445, "step": 35960 }, { "epoch": 0.7924110462906344, "grad_norm": 0.8555556535720825, "learning_rate": 3.264093838208679e-06, "loss": 0.0612, "step": 35961 }, { "epoch": 0.7924330815801506, "grad_norm": 0.558556854724884, "learning_rate": 3.2634271557902096e-06, "loss": 0.0948, "step": 35962 }, { "epoch": 0.7924551168696667, "grad_norm": 1.1297417879104614, "learning_rate": 3.2627605331524056e-06, "loss": 0.0766, "step": 35963 }, { "epoch": 0.7924771521591829, "grad_norm": 0.4662882089614868, "learning_rate": 3.26209397029865e-06, "loss": 0.0274, "step": 35964 }, { "epoch": 0.7924991874486991, "grad_norm": 0.8442274928092957, "learning_rate": 3.26142746723235e-06, "loss": 0.1164, "step": 35965 }, { "epoch": 0.7925212227382152, "grad_norm": 0.4805760085582733, "learning_rate": 3.2607610239568846e-06, "loss": 0.0588, "step": 35966 }, { "epoch": 0.7925432580277314, "grad_norm": 0.8271030187606812, "learning_rate": 3.2600946404756687e-06, "loss": 0.0685, "step": 35967 }, { "epoch": 0.7925652933172476, "grad_norm": 0.5383502840995789, "learning_rate": 3.259428316792081e-06, "loss": 0.0614, "step": 35968 }, { "epoch": 0.7925873286067637, "grad_norm": 0.5806321501731873, "learning_rate": 3.258762052909521e-06, "loss": 0.0779, "step": 35969 }, { "epoch": 0.7926093638962799, "grad_norm": 0.9495726227760315, "learning_rate": 3.258095848831386e-06, "loss": 0.0747, "step": 35970 }, { "epoch": 0.7926313991857961, "grad_norm": 0.6365836262702942, "learning_rate": 3.257429704561061e-06, "loss": 0.0558, "step": 35971 }, { "epoch": 0.7926534344753122, "grad_norm": 0.4376548230648041, "learning_rate": 3.2567636201019472e-06, "loss": 0.056, "step": 35972 }, { "epoch": 0.7926754697648284, "grad_norm": 0.4646373689174652, "learning_rate": 3.256097595457427e-06, "loss": 0.0609, "step": 35973 }, { "epoch": 0.7926975050543446, "grad_norm": 0.44904083013534546, "learning_rate": 3.2554316306309052e-06, "loss": 0.0464, "step": 35974 }, { "epoch": 0.7927195403438607, "grad_norm": 0.9263423681259155, "learning_rate": 3.2547657256257645e-06, "loss": 0.0657, "step": 35975 }, { "epoch": 0.7927415756333769, "grad_norm": 0.8820489645004272, "learning_rate": 3.254099880445406e-06, "loss": 0.0621, "step": 35976 }, { "epoch": 0.792763610922893, "grad_norm": 0.6656317114830017, "learning_rate": 3.2534340950932094e-06, "loss": 0.0826, "step": 35977 }, { "epoch": 0.7927856462124092, "grad_norm": 0.7857999801635742, "learning_rate": 3.252768369572573e-06, "loss": 0.0584, "step": 35978 }, { "epoch": 0.7928076815019254, "grad_norm": 0.4489171504974365, "learning_rate": 3.25210270388689e-06, "loss": 0.0596, "step": 35979 }, { "epoch": 0.7928297167914415, "grad_norm": 0.5635610818862915, "learning_rate": 3.2514370980395446e-06, "loss": 0.0547, "step": 35980 }, { "epoch": 0.7928517520809577, "grad_norm": 0.6479803919792175, "learning_rate": 3.250771552033929e-06, "loss": 0.0678, "step": 35981 }, { "epoch": 0.7928737873704738, "grad_norm": 0.5062368512153625, "learning_rate": 3.250106065873436e-06, "loss": 0.0653, "step": 35982 }, { "epoch": 0.7928958226599899, "grad_norm": 0.5518790483474731, "learning_rate": 3.2494406395614574e-06, "loss": 0.0635, "step": 35983 }, { "epoch": 0.7929178579495061, "grad_norm": 0.7861197590827942, "learning_rate": 3.248775273101374e-06, "loss": 0.0881, "step": 35984 }, { "epoch": 0.7929398932390223, "grad_norm": 0.41874566674232483, "learning_rate": 3.2481099664965792e-06, "loss": 0.0653, "step": 35985 }, { "epoch": 0.7929619285285384, "grad_norm": 0.6755679249763489, "learning_rate": 3.247444719750466e-06, "loss": 0.0848, "step": 35986 }, { "epoch": 0.7929839638180546, "grad_norm": 0.7004798650741577, "learning_rate": 3.2467795328664163e-06, "loss": 0.0687, "step": 35987 }, { "epoch": 0.7930059991075707, "grad_norm": 0.7791756987571716, "learning_rate": 3.2461144058478243e-06, "loss": 0.054, "step": 35988 }, { "epoch": 0.7930280343970869, "grad_norm": 0.4859698414802551, "learning_rate": 3.2454493386980656e-06, "loss": 0.0537, "step": 35989 }, { "epoch": 0.7930500696866031, "grad_norm": 0.49814513325691223, "learning_rate": 3.244784331420547e-06, "loss": 0.0357, "step": 35990 }, { "epoch": 0.7930721049761192, "grad_norm": 0.7072898149490356, "learning_rate": 3.2441193840186402e-06, "loss": 0.0833, "step": 35991 }, { "epoch": 0.7930941402656354, "grad_norm": 0.9637948274612427, "learning_rate": 3.2434544964957362e-06, "loss": 0.0881, "step": 35992 }, { "epoch": 0.7931161755551516, "grad_norm": 0.14719408750534058, "learning_rate": 3.2427896688552283e-06, "loss": 0.0353, "step": 35993 }, { "epoch": 0.7931382108446677, "grad_norm": 0.6852322220802307, "learning_rate": 3.242124901100493e-06, "loss": 0.064, "step": 35994 }, { "epoch": 0.7931602461341839, "grad_norm": 0.5953933596611023, "learning_rate": 3.241460193234925e-06, "loss": 0.0729, "step": 35995 }, { "epoch": 0.7931822814237001, "grad_norm": 0.8126391768455505, "learning_rate": 3.240795545261898e-06, "loss": 0.056, "step": 35996 }, { "epoch": 0.7932043167132162, "grad_norm": 0.4121589958667755, "learning_rate": 3.2401309571848125e-06, "loss": 0.0638, "step": 35997 }, { "epoch": 0.7932263520027324, "grad_norm": 0.4282655715942383, "learning_rate": 3.2394664290070407e-06, "loss": 0.0332, "step": 35998 }, { "epoch": 0.7932483872922486, "grad_norm": 0.4734194576740265, "learning_rate": 3.2388019607319785e-06, "loss": 0.0445, "step": 35999 }, { "epoch": 0.7932704225817647, "grad_norm": 0.5004146695137024, "learning_rate": 3.2381375523630013e-06, "loss": 0.0662, "step": 36000 }, { "epoch": 0.7932924578712809, "grad_norm": 0.7290318012237549, "learning_rate": 3.2374732039034964e-06, "loss": 0.0546, "step": 36001 }, { "epoch": 0.793314493160797, "grad_norm": 0.49558067321777344, "learning_rate": 3.2368089153568515e-06, "loss": 0.0605, "step": 36002 }, { "epoch": 0.7933365284503132, "grad_norm": 1.0477454662322998, "learning_rate": 3.2361446867264433e-06, "loss": 0.0785, "step": 36003 }, { "epoch": 0.7933585637398294, "grad_norm": 0.8412247896194458, "learning_rate": 3.235480518015658e-06, "loss": 0.0594, "step": 36004 }, { "epoch": 0.7933805990293455, "grad_norm": 0.5444626808166504, "learning_rate": 3.2348164092278786e-06, "loss": 0.067, "step": 36005 }, { "epoch": 0.7934026343188617, "grad_norm": 0.6261634230613708, "learning_rate": 3.234152360366492e-06, "loss": 0.067, "step": 36006 }, { "epoch": 0.7934246696083778, "grad_norm": 0.5394294261932373, "learning_rate": 3.2334883714348747e-06, "loss": 0.0818, "step": 36007 }, { "epoch": 0.7934467048978939, "grad_norm": 0.337434321641922, "learning_rate": 3.232824442436409e-06, "loss": 0.0877, "step": 36008 }, { "epoch": 0.7934687401874101, "grad_norm": 1.1210224628448486, "learning_rate": 3.2321605733744823e-06, "loss": 0.0614, "step": 36009 }, { "epoch": 0.7934907754769263, "grad_norm": 0.4349442422389984, "learning_rate": 3.231496764252469e-06, "loss": 0.0366, "step": 36010 }, { "epoch": 0.7935128107664424, "grad_norm": 0.6162897944450378, "learning_rate": 3.230833015073754e-06, "loss": 0.0511, "step": 36011 }, { "epoch": 0.7935348460559586, "grad_norm": 0.7220327854156494, "learning_rate": 3.230169325841717e-06, "loss": 0.0805, "step": 36012 }, { "epoch": 0.7935568813454748, "grad_norm": 0.6985172033309937, "learning_rate": 3.229505696559743e-06, "loss": 0.0543, "step": 36013 }, { "epoch": 0.7935789166349909, "grad_norm": 0.5800882577896118, "learning_rate": 3.2288421272312048e-06, "loss": 0.0883, "step": 36014 }, { "epoch": 0.7936009519245071, "grad_norm": 0.8067046403884888, "learning_rate": 3.2281786178594857e-06, "loss": 0.0523, "step": 36015 }, { "epoch": 0.7936229872140232, "grad_norm": 0.4740197956562042, "learning_rate": 3.2275151684479652e-06, "loss": 0.0555, "step": 36016 }, { "epoch": 0.7936450225035394, "grad_norm": 0.6976301074028015, "learning_rate": 3.226851779000027e-06, "loss": 0.0576, "step": 36017 }, { "epoch": 0.7936670577930556, "grad_norm": 0.8880933523178101, "learning_rate": 3.2261884495190424e-06, "loss": 0.0479, "step": 36018 }, { "epoch": 0.7936890930825717, "grad_norm": 0.990898072719574, "learning_rate": 3.225525180008393e-06, "loss": 0.0622, "step": 36019 }, { "epoch": 0.7937111283720879, "grad_norm": 0.6296063661575317, "learning_rate": 3.2248619704714612e-06, "loss": 0.0678, "step": 36020 }, { "epoch": 0.7937331636616041, "grad_norm": 0.780045747756958, "learning_rate": 3.224198820911619e-06, "loss": 0.0722, "step": 36021 }, { "epoch": 0.7937551989511202, "grad_norm": 0.5843995809555054, "learning_rate": 3.2235357313322494e-06, "loss": 0.0458, "step": 36022 }, { "epoch": 0.7937772342406364, "grad_norm": 0.4725402593612671, "learning_rate": 3.222872701736721e-06, "loss": 0.0481, "step": 36023 }, { "epoch": 0.7937992695301526, "grad_norm": 0.9319747090339661, "learning_rate": 3.2222097321284246e-06, "loss": 0.064, "step": 36024 }, { "epoch": 0.7938213048196687, "grad_norm": 0.578913688659668, "learning_rate": 3.2215468225107255e-06, "loss": 0.055, "step": 36025 }, { "epoch": 0.7938433401091849, "grad_norm": 0.7338358759880066, "learning_rate": 3.2208839728870093e-06, "loss": 0.0625, "step": 36026 }, { "epoch": 0.7938653753987011, "grad_norm": 0.47004666924476624, "learning_rate": 3.220221183260644e-06, "loss": 0.0621, "step": 36027 }, { "epoch": 0.7938874106882172, "grad_norm": 0.30308276414871216, "learning_rate": 3.2195584536350075e-06, "loss": 0.0597, "step": 36028 }, { "epoch": 0.7939094459777334, "grad_norm": 0.9627649188041687, "learning_rate": 3.2188957840134815e-06, "loss": 0.0701, "step": 36029 }, { "epoch": 0.7939314812672496, "grad_norm": 0.7111870646476746, "learning_rate": 3.2182331743994326e-06, "loss": 0.0611, "step": 36030 }, { "epoch": 0.7939535165567657, "grad_norm": 0.9269773364067078, "learning_rate": 3.21757062479624e-06, "loss": 0.0991, "step": 36031 }, { "epoch": 0.7939755518462818, "grad_norm": 0.45846274495124817, "learning_rate": 3.216908135207279e-06, "loss": 0.0532, "step": 36032 }, { "epoch": 0.7939975871357979, "grad_norm": 0.6589247584342957, "learning_rate": 3.216245705635928e-06, "loss": 0.0496, "step": 36033 }, { "epoch": 0.7940196224253141, "grad_norm": 0.39995771646499634, "learning_rate": 3.2155833360855515e-06, "loss": 0.0543, "step": 36034 }, { "epoch": 0.7940416577148303, "grad_norm": 0.3863522410392761, "learning_rate": 3.2149210265595285e-06, "loss": 0.0308, "step": 36035 }, { "epoch": 0.7940636930043464, "grad_norm": 0.4256468415260315, "learning_rate": 3.214258777061237e-06, "loss": 0.0458, "step": 36036 }, { "epoch": 0.7940857282938626, "grad_norm": 0.464033305644989, "learning_rate": 3.2135965875940403e-06, "loss": 0.0562, "step": 36037 }, { "epoch": 0.7941077635833788, "grad_norm": 0.9195569753646851, "learning_rate": 3.212934458161318e-06, "loss": 0.0854, "step": 36038 }, { "epoch": 0.7941297988728949, "grad_norm": 0.6505591869354248, "learning_rate": 3.2122723887664402e-06, "loss": 0.0743, "step": 36039 }, { "epoch": 0.7941518341624111, "grad_norm": 0.41056904196739197, "learning_rate": 3.2116103794127845e-06, "loss": 0.0514, "step": 36040 }, { "epoch": 0.7941738694519273, "grad_norm": 0.48494425415992737, "learning_rate": 3.2109484301037147e-06, "loss": 0.0464, "step": 36041 }, { "epoch": 0.7941959047414434, "grad_norm": 0.610887885093689, "learning_rate": 3.2102865408426062e-06, "loss": 0.0706, "step": 36042 }, { "epoch": 0.7942179400309596, "grad_norm": 0.5710960626602173, "learning_rate": 3.2096247116328354e-06, "loss": 0.0303, "step": 36043 }, { "epoch": 0.7942399753204757, "grad_norm": 0.5657957792282104, "learning_rate": 3.2089629424777623e-06, "loss": 0.0898, "step": 36044 }, { "epoch": 0.7942620106099919, "grad_norm": 0.6663299202919006, "learning_rate": 3.2083012333807692e-06, "loss": 0.066, "step": 36045 }, { "epoch": 0.7942840458995081, "grad_norm": 0.762751042842865, "learning_rate": 3.207639584345214e-06, "loss": 0.0856, "step": 36046 }, { "epoch": 0.7943060811890242, "grad_norm": 0.45996713638305664, "learning_rate": 3.20697799537448e-06, "loss": 0.0466, "step": 36047 }, { "epoch": 0.7943281164785404, "grad_norm": 0.23284539580345154, "learning_rate": 3.206316466471928e-06, "loss": 0.0751, "step": 36048 }, { "epoch": 0.7943501517680566, "grad_norm": 0.4819380044937134, "learning_rate": 3.2056549976409356e-06, "loss": 0.0412, "step": 36049 }, { "epoch": 0.7943721870575727, "grad_norm": 0.8146681189537048, "learning_rate": 3.2049935888848613e-06, "loss": 0.0585, "step": 36050 }, { "epoch": 0.7943942223470889, "grad_norm": 0.6595725417137146, "learning_rate": 3.2043322402070797e-06, "loss": 0.0575, "step": 36051 }, { "epoch": 0.7944162576366051, "grad_norm": 0.6003444790840149, "learning_rate": 3.203670951610964e-06, "loss": 0.0397, "step": 36052 }, { "epoch": 0.7944382929261212, "grad_norm": 0.7585002779960632, "learning_rate": 3.203009723099874e-06, "loss": 0.0695, "step": 36053 }, { "epoch": 0.7944603282156374, "grad_norm": 0.8148182034492493, "learning_rate": 3.2023485546771812e-06, "loss": 0.0666, "step": 36054 }, { "epoch": 0.7944823635051536, "grad_norm": 0.3448331356048584, "learning_rate": 3.2016874463462537e-06, "loss": 0.041, "step": 36055 }, { "epoch": 0.7945043987946696, "grad_norm": 0.49369415640830994, "learning_rate": 3.201026398110463e-06, "loss": 0.0686, "step": 36056 }, { "epoch": 0.7945264340841858, "grad_norm": 0.32450050115585327, "learning_rate": 3.2003654099731672e-06, "loss": 0.039, "step": 36057 }, { "epoch": 0.7945484693737019, "grad_norm": 0.6680687665939331, "learning_rate": 3.199704481937738e-06, "loss": 0.0686, "step": 36058 }, { "epoch": 0.7945705046632181, "grad_norm": 0.47932568192481995, "learning_rate": 3.1990436140075464e-06, "loss": 0.0644, "step": 36059 }, { "epoch": 0.7945925399527343, "grad_norm": 0.4554966688156128, "learning_rate": 3.1983828061859494e-06, "loss": 0.0461, "step": 36060 }, { "epoch": 0.7946145752422504, "grad_norm": 0.48246219754219055, "learning_rate": 3.1977220584763176e-06, "loss": 0.0481, "step": 36061 }, { "epoch": 0.7946366105317666, "grad_norm": 0.4082886874675751, "learning_rate": 3.1970613708820155e-06, "loss": 0.0663, "step": 36062 }, { "epoch": 0.7946586458212828, "grad_norm": 0.5336346626281738, "learning_rate": 3.1964007434064135e-06, "loss": 0.0607, "step": 36063 }, { "epoch": 0.7946806811107989, "grad_norm": 0.5250066518783569, "learning_rate": 3.1957401760528686e-06, "loss": 0.0625, "step": 36064 }, { "epoch": 0.7947027164003151, "grad_norm": 0.5795815587043762, "learning_rate": 3.1950796688247484e-06, "loss": 0.0428, "step": 36065 }, { "epoch": 0.7947247516898313, "grad_norm": 0.26052042841911316, "learning_rate": 3.194419221725422e-06, "loss": 0.0518, "step": 36066 }, { "epoch": 0.7947467869793474, "grad_norm": 0.5892818570137024, "learning_rate": 3.1937588347582446e-06, "loss": 0.0502, "step": 36067 }, { "epoch": 0.7947688222688636, "grad_norm": 0.5383846163749695, "learning_rate": 3.193098507926589e-06, "loss": 0.0505, "step": 36068 }, { "epoch": 0.7947908575583797, "grad_norm": 0.37075117230415344, "learning_rate": 3.1924382412338055e-06, "loss": 0.0603, "step": 36069 }, { "epoch": 0.7948128928478959, "grad_norm": 0.5262243747711182, "learning_rate": 3.1917780346832746e-06, "loss": 0.0576, "step": 36070 }, { "epoch": 0.7948349281374121, "grad_norm": 1.1370538473129272, "learning_rate": 3.191117888278346e-06, "loss": 0.0711, "step": 36071 }, { "epoch": 0.7948569634269282, "grad_norm": 0.7690092921257019, "learning_rate": 3.1904578020223894e-06, "loss": 0.0903, "step": 36072 }, { "epoch": 0.7948789987164444, "grad_norm": 0.43290677666664124, "learning_rate": 3.1897977759187614e-06, "loss": 0.0402, "step": 36073 }, { "epoch": 0.7949010340059606, "grad_norm": 0.5492141246795654, "learning_rate": 3.189137809970825e-06, "loss": 0.0429, "step": 36074 }, { "epoch": 0.7949230692954767, "grad_norm": 0.48273324966430664, "learning_rate": 3.1884779041819485e-06, "loss": 0.0734, "step": 36075 }, { "epoch": 0.7949451045849929, "grad_norm": 0.5075536966323853, "learning_rate": 3.1878180585554832e-06, "loss": 0.0526, "step": 36076 }, { "epoch": 0.7949671398745091, "grad_norm": 0.31816041469573975, "learning_rate": 3.1871582730947956e-06, "loss": 0.0358, "step": 36077 }, { "epoch": 0.7949891751640252, "grad_norm": 0.5320008397102356, "learning_rate": 3.1864985478032445e-06, "loss": 0.0611, "step": 36078 }, { "epoch": 0.7950112104535414, "grad_norm": 0.40586933493614197, "learning_rate": 3.1858388826841967e-06, "loss": 0.06, "step": 36079 }, { "epoch": 0.7950332457430576, "grad_norm": 0.6261367797851562, "learning_rate": 3.185179277741e-06, "loss": 0.0432, "step": 36080 }, { "epoch": 0.7950552810325736, "grad_norm": 0.8816222548484802, "learning_rate": 3.184519732977023e-06, "loss": 0.0943, "step": 36081 }, { "epoch": 0.7950773163220898, "grad_norm": 0.3490616977214813, "learning_rate": 3.183860248395627e-06, "loss": 0.0631, "step": 36082 }, { "epoch": 0.7950993516116059, "grad_norm": 0.6049004793167114, "learning_rate": 3.183200824000162e-06, "loss": 0.0486, "step": 36083 }, { "epoch": 0.7951213869011221, "grad_norm": 0.8512864708900452, "learning_rate": 3.1825414597939923e-06, "loss": 0.0775, "step": 36084 }, { "epoch": 0.7951434221906383, "grad_norm": 0.4746480882167816, "learning_rate": 3.1818821557804766e-06, "loss": 0.0777, "step": 36085 }, { "epoch": 0.7951654574801544, "grad_norm": 0.7517930865287781, "learning_rate": 3.181222911962976e-06, "loss": 0.0725, "step": 36086 }, { "epoch": 0.7951874927696706, "grad_norm": 0.6562409400939941, "learning_rate": 3.180563728344839e-06, "loss": 0.0403, "step": 36087 }, { "epoch": 0.7952095280591868, "grad_norm": 0.5015570521354675, "learning_rate": 3.179904604929432e-06, "loss": 0.0414, "step": 36088 }, { "epoch": 0.7952315633487029, "grad_norm": 0.47925177216529846, "learning_rate": 3.179245541720111e-06, "loss": 0.0508, "step": 36089 }, { "epoch": 0.7952535986382191, "grad_norm": 0.797685980796814, "learning_rate": 3.1785865387202296e-06, "loss": 0.0459, "step": 36090 }, { "epoch": 0.7952756339277353, "grad_norm": 0.5349526405334473, "learning_rate": 3.1779275959331478e-06, "loss": 0.0409, "step": 36091 }, { "epoch": 0.7952976692172514, "grad_norm": 0.8171605467796326, "learning_rate": 3.177268713362213e-06, "loss": 0.0724, "step": 36092 }, { "epoch": 0.7953197045067676, "grad_norm": 0.5097848176956177, "learning_rate": 3.1766098910107978e-06, "loss": 0.0709, "step": 36093 }, { "epoch": 0.7953417397962838, "grad_norm": 0.5971642732620239, "learning_rate": 3.1759511288822423e-06, "loss": 0.079, "step": 36094 }, { "epoch": 0.7953637750857999, "grad_norm": 0.819075882434845, "learning_rate": 3.1752924269799144e-06, "loss": 0.0604, "step": 36095 }, { "epoch": 0.7953858103753161, "grad_norm": 0.5344619750976562, "learning_rate": 3.1746337853071576e-06, "loss": 0.0572, "step": 36096 }, { "epoch": 0.7954078456648322, "grad_norm": 0.6950216889381409, "learning_rate": 3.173975203867333e-06, "loss": 0.0758, "step": 36097 }, { "epoch": 0.7954298809543484, "grad_norm": 0.46107226610183716, "learning_rate": 3.1733166826637976e-06, "loss": 0.0497, "step": 36098 }, { "epoch": 0.7954519162438646, "grad_norm": 0.4352891743183136, "learning_rate": 3.1726582216998988e-06, "loss": 0.0584, "step": 36099 }, { "epoch": 0.7954739515333807, "grad_norm": 0.6469221115112305, "learning_rate": 3.171999820978994e-06, "loss": 0.0646, "step": 36100 }, { "epoch": 0.7954959868228969, "grad_norm": 0.5398706197738647, "learning_rate": 3.171341480504437e-06, "loss": 0.0833, "step": 36101 }, { "epoch": 0.7955180221124131, "grad_norm": 0.5597473978996277, "learning_rate": 3.1706832002795853e-06, "loss": 0.0642, "step": 36102 }, { "epoch": 0.7955400574019292, "grad_norm": 0.7192926406860352, "learning_rate": 3.170024980307783e-06, "loss": 0.0653, "step": 36103 }, { "epoch": 0.7955620926914454, "grad_norm": 0.5726097822189331, "learning_rate": 3.169366820592387e-06, "loss": 0.0741, "step": 36104 }, { "epoch": 0.7955841279809616, "grad_norm": 0.37806808948516846, "learning_rate": 3.168708721136753e-06, "loss": 0.067, "step": 36105 }, { "epoch": 0.7956061632704776, "grad_norm": 0.5121657848358154, "learning_rate": 3.1680506819442274e-06, "loss": 0.0682, "step": 36106 }, { "epoch": 0.7956281985599938, "grad_norm": 0.6051262021064758, "learning_rate": 3.167392703018163e-06, "loss": 0.0738, "step": 36107 }, { "epoch": 0.79565023384951, "grad_norm": 0.6536794304847717, "learning_rate": 3.166734784361913e-06, "loss": 0.0636, "step": 36108 }, { "epoch": 0.7956722691390261, "grad_norm": 0.9041733145713806, "learning_rate": 3.166076925978833e-06, "loss": 0.0758, "step": 36109 }, { "epoch": 0.7956943044285423, "grad_norm": 0.703616738319397, "learning_rate": 3.165419127872265e-06, "loss": 0.0766, "step": 36110 }, { "epoch": 0.7957163397180584, "grad_norm": 0.8685656189918518, "learning_rate": 3.1647613900455645e-06, "loss": 0.0867, "step": 36111 }, { "epoch": 0.7957383750075746, "grad_norm": 0.5523650646209717, "learning_rate": 3.164103712502083e-06, "loss": 0.0481, "step": 36112 }, { "epoch": 0.7957604102970908, "grad_norm": 0.4228687882423401, "learning_rate": 3.163446095245165e-06, "loss": 0.0695, "step": 36113 }, { "epoch": 0.7957824455866069, "grad_norm": 0.7177910804748535, "learning_rate": 3.1627885382781666e-06, "loss": 0.0657, "step": 36114 }, { "epoch": 0.7958044808761231, "grad_norm": 0.44419893622398376, "learning_rate": 3.1621310416044275e-06, "loss": 0.0556, "step": 36115 }, { "epoch": 0.7958265161656393, "grad_norm": 0.49003735184669495, "learning_rate": 3.161473605227309e-06, "loss": 0.034, "step": 36116 }, { "epoch": 0.7958485514551554, "grad_norm": 0.9688612222671509, "learning_rate": 3.1608162291501508e-06, "loss": 0.0677, "step": 36117 }, { "epoch": 0.7958705867446716, "grad_norm": 0.3970673084259033, "learning_rate": 3.160158913376308e-06, "loss": 0.0412, "step": 36118 }, { "epoch": 0.7958926220341878, "grad_norm": 0.6449235677719116, "learning_rate": 3.1595016579091183e-06, "loss": 0.0463, "step": 36119 }, { "epoch": 0.7959146573237039, "grad_norm": 0.5669679641723633, "learning_rate": 3.1588444627519423e-06, "loss": 0.0484, "step": 36120 }, { "epoch": 0.7959366926132201, "grad_norm": 0.7160530686378479, "learning_rate": 3.158187327908121e-06, "loss": 0.0707, "step": 36121 }, { "epoch": 0.7959587279027363, "grad_norm": 0.7476854920387268, "learning_rate": 3.157530253380999e-06, "loss": 0.0558, "step": 36122 }, { "epoch": 0.7959807631922524, "grad_norm": 0.7725376486778259, "learning_rate": 3.1568732391739245e-06, "loss": 0.0599, "step": 36123 }, { "epoch": 0.7960027984817686, "grad_norm": 0.5984864830970764, "learning_rate": 3.1562162852902465e-06, "loss": 0.0535, "step": 36124 }, { "epoch": 0.7960248337712847, "grad_norm": 0.5552359819412231, "learning_rate": 3.1555593917333138e-06, "loss": 0.0696, "step": 36125 }, { "epoch": 0.7960468690608009, "grad_norm": 0.5570942163467407, "learning_rate": 3.15490255850646e-06, "loss": 0.0514, "step": 36126 }, { "epoch": 0.7960689043503171, "grad_norm": 0.5051048994064331, "learning_rate": 3.154245785613048e-06, "loss": 0.0711, "step": 36127 }, { "epoch": 0.7960909396398332, "grad_norm": 0.9112794399261475, "learning_rate": 3.1535890730564097e-06, "loss": 0.0753, "step": 36128 }, { "epoch": 0.7961129749293494, "grad_norm": 0.2923426926136017, "learning_rate": 3.1529324208398994e-06, "loss": 0.0543, "step": 36129 }, { "epoch": 0.7961350102188655, "grad_norm": 0.4003128409385681, "learning_rate": 3.152275828966853e-06, "loss": 0.0594, "step": 36130 }, { "epoch": 0.7961570455083816, "grad_norm": 0.7538991570472717, "learning_rate": 3.1516192974406207e-06, "loss": 0.0821, "step": 36131 }, { "epoch": 0.7961790807978978, "grad_norm": 0.9605297446250916, "learning_rate": 3.150962826264546e-06, "loss": 0.0691, "step": 36132 }, { "epoch": 0.796201116087414, "grad_norm": 0.8581743836402893, "learning_rate": 3.1503064154419693e-06, "loss": 0.0628, "step": 36133 }, { "epoch": 0.7962231513769301, "grad_norm": 0.7290225625038147, "learning_rate": 3.1496500649762366e-06, "loss": 0.0606, "step": 36134 }, { "epoch": 0.7962451866664463, "grad_norm": 0.298240065574646, "learning_rate": 3.1489937748706892e-06, "loss": 0.0428, "step": 36135 }, { "epoch": 0.7962672219559624, "grad_norm": 0.6111511588096619, "learning_rate": 3.1483375451286763e-06, "loss": 0.0738, "step": 36136 }, { "epoch": 0.7962892572454786, "grad_norm": 0.6446859240531921, "learning_rate": 3.1476813757535326e-06, "loss": 0.0495, "step": 36137 }, { "epoch": 0.7963112925349948, "grad_norm": 0.5379666090011597, "learning_rate": 3.1470252667486014e-06, "loss": 0.0611, "step": 36138 }, { "epoch": 0.7963333278245109, "grad_norm": 0.5447551608085632, "learning_rate": 3.146369218117233e-06, "loss": 0.0483, "step": 36139 }, { "epoch": 0.7963553631140271, "grad_norm": 0.6937280893325806, "learning_rate": 3.1457132298627567e-06, "loss": 0.074, "step": 36140 }, { "epoch": 0.7963773984035433, "grad_norm": 0.6148576140403748, "learning_rate": 3.145057301988524e-06, "loss": 0.061, "step": 36141 }, { "epoch": 0.7963994336930594, "grad_norm": 0.6083575487136841, "learning_rate": 3.144401434497865e-06, "loss": 0.0682, "step": 36142 }, { "epoch": 0.7964214689825756, "grad_norm": 0.9017506241798401, "learning_rate": 3.143745627394133e-06, "loss": 0.0565, "step": 36143 }, { "epoch": 0.7964435042720918, "grad_norm": 0.535965621471405, "learning_rate": 3.14308988068066e-06, "loss": 0.0529, "step": 36144 }, { "epoch": 0.7964655395616079, "grad_norm": 0.5749537944793701, "learning_rate": 3.1424341943607877e-06, "loss": 0.0432, "step": 36145 }, { "epoch": 0.7964875748511241, "grad_norm": 0.3415546417236328, "learning_rate": 3.141778568437861e-06, "loss": 0.0366, "step": 36146 }, { "epoch": 0.7965096101406403, "grad_norm": 0.6720638275146484, "learning_rate": 3.1411230029152103e-06, "loss": 0.0562, "step": 36147 }, { "epoch": 0.7965316454301564, "grad_norm": 0.9326041340827942, "learning_rate": 3.140467497796184e-06, "loss": 0.106, "step": 36148 }, { "epoch": 0.7965536807196726, "grad_norm": 0.24863208830356598, "learning_rate": 3.139812053084107e-06, "loss": 0.0594, "step": 36149 }, { "epoch": 0.7965757160091888, "grad_norm": 0.7422969341278076, "learning_rate": 3.1391566687823365e-06, "loss": 0.0551, "step": 36150 }, { "epoch": 0.7965977512987049, "grad_norm": 0.9609999656677246, "learning_rate": 3.138501344894197e-06, "loss": 0.092, "step": 36151 }, { "epoch": 0.7966197865882211, "grad_norm": 0.41469207406044006, "learning_rate": 3.1378460814230337e-06, "loss": 0.0717, "step": 36152 }, { "epoch": 0.7966418218777372, "grad_norm": 0.49567586183547974, "learning_rate": 3.137190878372179e-06, "loss": 0.0302, "step": 36153 }, { "epoch": 0.7966638571672534, "grad_norm": 0.5886545777320862, "learning_rate": 3.136535735744971e-06, "loss": 0.055, "step": 36154 }, { "epoch": 0.7966858924567695, "grad_norm": 0.8409703969955444, "learning_rate": 3.1358806535447543e-06, "loss": 0.0385, "step": 36155 }, { "epoch": 0.7967079277462856, "grad_norm": 0.60748291015625, "learning_rate": 3.1352256317748534e-06, "loss": 0.0781, "step": 36156 }, { "epoch": 0.7967299630358018, "grad_norm": 0.6750983595848083, "learning_rate": 3.1345706704386124e-06, "loss": 0.06, "step": 36157 }, { "epoch": 0.796751998325318, "grad_norm": 0.6363306045532227, "learning_rate": 3.133915769539363e-06, "loss": 0.0734, "step": 36158 }, { "epoch": 0.7967740336148341, "grad_norm": 0.47161632776260376, "learning_rate": 3.1332609290804505e-06, "loss": 0.0474, "step": 36159 }, { "epoch": 0.7967960689043503, "grad_norm": 0.8910368084907532, "learning_rate": 3.132606149065199e-06, "loss": 0.074, "step": 36160 }, { "epoch": 0.7968181041938664, "grad_norm": 0.5898155570030212, "learning_rate": 3.131951429496947e-06, "loss": 0.0578, "step": 36161 }, { "epoch": 0.7968401394833826, "grad_norm": 1.029872179031372, "learning_rate": 3.131296770379036e-06, "loss": 0.0785, "step": 36162 }, { "epoch": 0.7968621747728988, "grad_norm": 0.5313495993614197, "learning_rate": 3.130642171714791e-06, "loss": 0.0613, "step": 36163 }, { "epoch": 0.7968842100624149, "grad_norm": 0.6159308552742004, "learning_rate": 3.129987633507553e-06, "loss": 0.0402, "step": 36164 }, { "epoch": 0.7969062453519311, "grad_norm": 0.657846987247467, "learning_rate": 3.1293331557606457e-06, "loss": 0.0792, "step": 36165 }, { "epoch": 0.7969282806414473, "grad_norm": 0.23512591421604156, "learning_rate": 3.1286787384774174e-06, "loss": 0.0416, "step": 36166 }, { "epoch": 0.7969503159309634, "grad_norm": 0.8257924914360046, "learning_rate": 3.128024381661192e-06, "loss": 0.0617, "step": 36167 }, { "epoch": 0.7969723512204796, "grad_norm": 0.6756423115730286, "learning_rate": 3.1273700853153035e-06, "loss": 0.0838, "step": 36168 }, { "epoch": 0.7969943865099958, "grad_norm": 0.6471858620643616, "learning_rate": 3.1267158494430896e-06, "loss": 0.0503, "step": 36169 }, { "epoch": 0.7970164217995119, "grad_norm": 0.5665202736854553, "learning_rate": 3.126061674047876e-06, "loss": 0.0509, "step": 36170 }, { "epoch": 0.7970384570890281, "grad_norm": 0.7302978038787842, "learning_rate": 3.1254075591330013e-06, "loss": 0.0994, "step": 36171 }, { "epoch": 0.7970604923785443, "grad_norm": 0.6052544713020325, "learning_rate": 3.1247535047017846e-06, "loss": 0.0343, "step": 36172 }, { "epoch": 0.7970825276680604, "grad_norm": 0.38776540756225586, "learning_rate": 3.124099510757575e-06, "loss": 0.049, "step": 36173 }, { "epoch": 0.7971045629575766, "grad_norm": 0.4271198511123657, "learning_rate": 3.1234455773036915e-06, "loss": 0.0767, "step": 36174 }, { "epoch": 0.7971265982470928, "grad_norm": 0.71254563331604, "learning_rate": 3.122791704343471e-06, "loss": 0.0589, "step": 36175 }, { "epoch": 0.7971486335366089, "grad_norm": 0.7067803144454956, "learning_rate": 3.1221378918802384e-06, "loss": 0.089, "step": 36176 }, { "epoch": 0.7971706688261251, "grad_norm": 0.6237925291061401, "learning_rate": 3.1214841399173255e-06, "loss": 0.0761, "step": 36177 }, { "epoch": 0.7971927041156412, "grad_norm": 0.25595542788505554, "learning_rate": 3.1208304484580697e-06, "loss": 0.059, "step": 36178 }, { "epoch": 0.7972147394051574, "grad_norm": 0.714047908782959, "learning_rate": 3.12017681750579e-06, "loss": 0.0443, "step": 36179 }, { "epoch": 0.7972367746946735, "grad_norm": 0.7219188213348389, "learning_rate": 3.1195232470638185e-06, "loss": 0.0587, "step": 36180 }, { "epoch": 0.7972588099841896, "grad_norm": 0.46941426396369934, "learning_rate": 3.118869737135488e-06, "loss": 0.0529, "step": 36181 }, { "epoch": 0.7972808452737058, "grad_norm": 0.3314444124698639, "learning_rate": 3.118216287724129e-06, "loss": 0.0498, "step": 36182 }, { "epoch": 0.797302880563222, "grad_norm": 0.4547472298145294, "learning_rate": 3.11756289883306e-06, "loss": 0.0488, "step": 36183 }, { "epoch": 0.7973249158527381, "grad_norm": 0.1539895385503769, "learning_rate": 3.1169095704656174e-06, "loss": 0.0687, "step": 36184 }, { "epoch": 0.7973469511422543, "grad_norm": 0.5942546129226685, "learning_rate": 3.1162563026251283e-06, "loss": 0.0639, "step": 36185 }, { "epoch": 0.7973689864317705, "grad_norm": 0.40838485956192017, "learning_rate": 3.115603095314917e-06, "loss": 0.0643, "step": 36186 }, { "epoch": 0.7973910217212866, "grad_norm": 0.5110461711883545, "learning_rate": 3.114949948538315e-06, "loss": 0.0453, "step": 36187 }, { "epoch": 0.7974130570108028, "grad_norm": 0.623731255531311, "learning_rate": 3.114296862298637e-06, "loss": 0.049, "step": 36188 }, { "epoch": 0.797435092300319, "grad_norm": 0.5652604699134827, "learning_rate": 3.1136438365992283e-06, "loss": 0.0515, "step": 36189 }, { "epoch": 0.7974571275898351, "grad_norm": 0.5279579758644104, "learning_rate": 3.1129908714434e-06, "loss": 0.0514, "step": 36190 }, { "epoch": 0.7974791628793513, "grad_norm": 0.2369130551815033, "learning_rate": 3.112337966834484e-06, "loss": 0.0533, "step": 36191 }, { "epoch": 0.7975011981688674, "grad_norm": 0.8447113633155823, "learning_rate": 3.111685122775811e-06, "loss": 0.0699, "step": 36192 }, { "epoch": 0.7975232334583836, "grad_norm": 0.49147143959999084, "learning_rate": 3.1110323392706953e-06, "loss": 0.0543, "step": 36193 }, { "epoch": 0.7975452687478998, "grad_norm": 0.6128151416778564, "learning_rate": 3.1103796163224706e-06, "loss": 0.052, "step": 36194 }, { "epoch": 0.7975673040374159, "grad_norm": 0.3651622235774994, "learning_rate": 3.1097269539344513e-06, "loss": 0.0339, "step": 36195 }, { "epoch": 0.7975893393269321, "grad_norm": 0.6255697011947632, "learning_rate": 3.1090743521099764e-06, "loss": 0.0757, "step": 36196 }, { "epoch": 0.7976113746164483, "grad_norm": 0.981853723526001, "learning_rate": 3.1084218108523577e-06, "loss": 0.0514, "step": 36197 }, { "epoch": 0.7976334099059644, "grad_norm": 0.6440836191177368, "learning_rate": 3.107769330164928e-06, "loss": 0.0555, "step": 36198 }, { "epoch": 0.7976554451954806, "grad_norm": 0.42558449506759644, "learning_rate": 3.107116910051003e-06, "loss": 0.0465, "step": 36199 }, { "epoch": 0.7976774804849968, "grad_norm": 0.6881329417228699, "learning_rate": 3.1064645505139078e-06, "loss": 0.0727, "step": 36200 }, { "epoch": 0.7976995157745129, "grad_norm": 0.5474203824996948, "learning_rate": 3.1058122515569704e-06, "loss": 0.0854, "step": 36201 }, { "epoch": 0.7977215510640291, "grad_norm": 0.4374197721481323, "learning_rate": 3.1051600131835063e-06, "loss": 0.054, "step": 36202 }, { "epoch": 0.7977435863535453, "grad_norm": 0.302186518907547, "learning_rate": 3.104507835396841e-06, "loss": 0.0404, "step": 36203 }, { "epoch": 0.7977656216430614, "grad_norm": 0.6369866132736206, "learning_rate": 3.103855718200295e-06, "loss": 0.0528, "step": 36204 }, { "epoch": 0.7977876569325775, "grad_norm": 0.6204918622970581, "learning_rate": 3.1032036615971966e-06, "loss": 0.0505, "step": 36205 }, { "epoch": 0.7978096922220936, "grad_norm": 1.044012188911438, "learning_rate": 3.1025516655908555e-06, "loss": 0.0823, "step": 36206 }, { "epoch": 0.7978317275116098, "grad_norm": 0.9085096120834351, "learning_rate": 3.1018997301846006e-06, "loss": 0.0695, "step": 36207 }, { "epoch": 0.797853762801126, "grad_norm": 0.7252976894378662, "learning_rate": 3.1012478553817536e-06, "loss": 0.0814, "step": 36208 }, { "epoch": 0.7978757980906421, "grad_norm": 0.6600004434585571, "learning_rate": 3.1005960411856276e-06, "loss": 0.0676, "step": 36209 }, { "epoch": 0.7978978333801583, "grad_norm": 0.4609867036342621, "learning_rate": 3.099944287599552e-06, "loss": 0.043, "step": 36210 }, { "epoch": 0.7979198686696745, "grad_norm": 0.7717624306678772, "learning_rate": 3.0992925946268325e-06, "loss": 0.0623, "step": 36211 }, { "epoch": 0.7979419039591906, "grad_norm": 0.5697211623191833, "learning_rate": 3.098640962270806e-06, "loss": 0.0448, "step": 36212 }, { "epoch": 0.7979639392487068, "grad_norm": 0.753578782081604, "learning_rate": 3.097989390534779e-06, "loss": 0.0768, "step": 36213 }, { "epoch": 0.797985974538223, "grad_norm": 0.7970268726348877, "learning_rate": 3.0973378794220747e-06, "loss": 0.0747, "step": 36214 }, { "epoch": 0.7980080098277391, "grad_norm": 0.7822752594947815, "learning_rate": 3.096686428936014e-06, "loss": 0.0608, "step": 36215 }, { "epoch": 0.7980300451172553, "grad_norm": 0.3771490156650543, "learning_rate": 3.0960350390799098e-06, "loss": 0.0514, "step": 36216 }, { "epoch": 0.7980520804067714, "grad_norm": 0.8274605870246887, "learning_rate": 3.0953837098570852e-06, "loss": 0.0758, "step": 36217 }, { "epoch": 0.7980741156962876, "grad_norm": 0.6032289266586304, "learning_rate": 3.094732441270848e-06, "loss": 0.0568, "step": 36218 }, { "epoch": 0.7980961509858038, "grad_norm": 0.9686890840530396, "learning_rate": 3.094081233324531e-06, "loss": 0.072, "step": 36219 }, { "epoch": 0.7981181862753199, "grad_norm": 0.6590195894241333, "learning_rate": 3.0934300860214375e-06, "loss": 0.0662, "step": 36220 }, { "epoch": 0.7981402215648361, "grad_norm": 0.6439739465713501, "learning_rate": 3.0927789993648954e-06, "loss": 0.0633, "step": 36221 }, { "epoch": 0.7981622568543523, "grad_norm": 0.4397454857826233, "learning_rate": 3.0921279733582095e-06, "loss": 0.0576, "step": 36222 }, { "epoch": 0.7981842921438684, "grad_norm": 0.58467698097229, "learning_rate": 3.091477008004702e-06, "loss": 0.0512, "step": 36223 }, { "epoch": 0.7982063274333846, "grad_norm": 0.3748323619365692, "learning_rate": 3.090826103307692e-06, "loss": 0.0357, "step": 36224 }, { "epoch": 0.7982283627229008, "grad_norm": 0.5568686127662659, "learning_rate": 3.090175259270486e-06, "loss": 0.0525, "step": 36225 }, { "epoch": 0.7982503980124169, "grad_norm": 0.3631785809993744, "learning_rate": 3.089524475896404e-06, "loss": 0.0524, "step": 36226 }, { "epoch": 0.7982724333019331, "grad_norm": 0.45692625641822815, "learning_rate": 3.088873753188761e-06, "loss": 0.0457, "step": 36227 }, { "epoch": 0.7982944685914493, "grad_norm": 0.21186450123786926, "learning_rate": 3.0882230911508756e-06, "loss": 0.0373, "step": 36228 }, { "epoch": 0.7983165038809653, "grad_norm": 0.6117533445358276, "learning_rate": 3.0875724897860537e-06, "loss": 0.0658, "step": 36229 }, { "epoch": 0.7983385391704815, "grad_norm": 0.3240886330604553, "learning_rate": 3.086921949097612e-06, "loss": 0.0233, "step": 36230 }, { "epoch": 0.7983605744599976, "grad_norm": 0.7321926951408386, "learning_rate": 3.0862714690888666e-06, "loss": 0.0749, "step": 36231 }, { "epoch": 0.7983826097495138, "grad_norm": 0.6603166460990906, "learning_rate": 3.0856210497631315e-06, "loss": 0.0856, "step": 36232 }, { "epoch": 0.79840464503903, "grad_norm": 0.588736891746521, "learning_rate": 3.084970691123719e-06, "loss": 0.0568, "step": 36233 }, { "epoch": 0.7984266803285461, "grad_norm": 0.586902916431427, "learning_rate": 3.0843203931739315e-06, "loss": 0.0525, "step": 36234 }, { "epoch": 0.7984487156180623, "grad_norm": 0.46261605620384216, "learning_rate": 3.0836701559170983e-06, "loss": 0.0621, "step": 36235 }, { "epoch": 0.7984707509075785, "grad_norm": 0.4860973358154297, "learning_rate": 3.083019979356518e-06, "loss": 0.0454, "step": 36236 }, { "epoch": 0.7984927861970946, "grad_norm": 0.48694929480552673, "learning_rate": 3.082369863495508e-06, "loss": 0.0585, "step": 36237 }, { "epoch": 0.7985148214866108, "grad_norm": 0.589509904384613, "learning_rate": 3.0817198083373787e-06, "loss": 0.0674, "step": 36238 }, { "epoch": 0.798536856776127, "grad_norm": 0.7493168711662292, "learning_rate": 3.0810698138854464e-06, "loss": 0.0753, "step": 36239 }, { "epoch": 0.7985588920656431, "grad_norm": 1.025329828262329, "learning_rate": 3.0804198801430133e-06, "loss": 0.0564, "step": 36240 }, { "epoch": 0.7985809273551593, "grad_norm": 0.8251989483833313, "learning_rate": 3.0797700071133917e-06, "loss": 0.0744, "step": 36241 }, { "epoch": 0.7986029626446755, "grad_norm": 0.5557485818862915, "learning_rate": 3.0791201947998986e-06, "loss": 0.0568, "step": 36242 }, { "epoch": 0.7986249979341916, "grad_norm": 0.3698084056377411, "learning_rate": 3.0784704432058352e-06, "loss": 0.0501, "step": 36243 }, { "epoch": 0.7986470332237078, "grad_norm": 0.30392420291900635, "learning_rate": 3.0778207523345186e-06, "loss": 0.0512, "step": 36244 }, { "epoch": 0.798669068513224, "grad_norm": 0.4536406397819519, "learning_rate": 3.0771711221892445e-06, "loss": 0.0675, "step": 36245 }, { "epoch": 0.7986911038027401, "grad_norm": 0.461536705493927, "learning_rate": 3.076521552773342e-06, "loss": 0.0429, "step": 36246 }, { "epoch": 0.7987131390922563, "grad_norm": 0.6491494178771973, "learning_rate": 3.0758720440901034e-06, "loss": 0.0523, "step": 36247 }, { "epoch": 0.7987351743817724, "grad_norm": 0.24178633093833923, "learning_rate": 3.0752225961428475e-06, "loss": 0.0646, "step": 36248 }, { "epoch": 0.7987572096712886, "grad_norm": 0.431518018245697, "learning_rate": 3.0745732089348724e-06, "loss": 0.0578, "step": 36249 }, { "epoch": 0.7987792449608048, "grad_norm": 0.47890356183052063, "learning_rate": 3.073923882469491e-06, "loss": 0.0481, "step": 36250 }, { "epoch": 0.7988012802503209, "grad_norm": 0.5930797457695007, "learning_rate": 3.0732746167500148e-06, "loss": 0.0815, "step": 36251 }, { "epoch": 0.7988233155398371, "grad_norm": 1.1283478736877441, "learning_rate": 3.072625411779744e-06, "loss": 0.117, "step": 36252 }, { "epoch": 0.7988453508293533, "grad_norm": 0.4856399595737457, "learning_rate": 3.071976267561987e-06, "loss": 0.047, "step": 36253 }, { "epoch": 0.7988673861188693, "grad_norm": 0.7727447748184204, "learning_rate": 3.0713271841000495e-06, "loss": 0.0703, "step": 36254 }, { "epoch": 0.7988894214083855, "grad_norm": 0.4867388606071472, "learning_rate": 3.0706781613972453e-06, "loss": 0.0585, "step": 36255 }, { "epoch": 0.7989114566979016, "grad_norm": 0.5625014305114746, "learning_rate": 3.070029199456871e-06, "loss": 0.0483, "step": 36256 }, { "epoch": 0.7989334919874178, "grad_norm": 0.6860040426254272, "learning_rate": 3.0693802982822323e-06, "loss": 0.0766, "step": 36257 }, { "epoch": 0.798955527276934, "grad_norm": 0.5041965246200562, "learning_rate": 3.0687314578766435e-06, "loss": 0.0841, "step": 36258 }, { "epoch": 0.7989775625664501, "grad_norm": 0.4219770133495331, "learning_rate": 3.0680826782434e-06, "loss": 0.0452, "step": 36259 }, { "epoch": 0.7989995978559663, "grad_norm": 0.40893810987472534, "learning_rate": 3.067433959385808e-06, "loss": 0.0532, "step": 36260 }, { "epoch": 0.7990216331454825, "grad_norm": 0.3247954845428467, "learning_rate": 3.0667853013071743e-06, "loss": 0.0444, "step": 36261 }, { "epoch": 0.7990436684349986, "grad_norm": 0.6913713216781616, "learning_rate": 3.0661367040108068e-06, "loss": 0.0546, "step": 36262 }, { "epoch": 0.7990657037245148, "grad_norm": 0.3666539192199707, "learning_rate": 3.0654881675000007e-06, "loss": 0.0604, "step": 36263 }, { "epoch": 0.799087739014031, "grad_norm": 0.677257239818573, "learning_rate": 3.064839691778062e-06, "loss": 0.0614, "step": 36264 }, { "epoch": 0.7991097743035471, "grad_norm": 0.591512143611908, "learning_rate": 3.064191276848299e-06, "loss": 0.0686, "step": 36265 }, { "epoch": 0.7991318095930633, "grad_norm": 0.7065871953964233, "learning_rate": 3.0635429227140073e-06, "loss": 0.078, "step": 36266 }, { "epoch": 0.7991538448825795, "grad_norm": 0.843575656414032, "learning_rate": 3.062894629378496e-06, "loss": 0.076, "step": 36267 }, { "epoch": 0.7991758801720956, "grad_norm": 0.4219866096973419, "learning_rate": 3.062246396845055e-06, "loss": 0.0524, "step": 36268 }, { "epoch": 0.7991979154616118, "grad_norm": 0.5804848670959473, "learning_rate": 3.0615982251170043e-06, "loss": 0.0735, "step": 36269 }, { "epoch": 0.799219950751128, "grad_norm": 0.6026595830917358, "learning_rate": 3.0609501141976302e-06, "loss": 0.0545, "step": 36270 }, { "epoch": 0.7992419860406441, "grad_norm": 0.34463807940483093, "learning_rate": 3.0603020640902433e-06, "loss": 0.0397, "step": 36271 }, { "epoch": 0.7992640213301603, "grad_norm": 0.8086705803871155, "learning_rate": 3.059654074798136e-06, "loss": 0.0865, "step": 36272 }, { "epoch": 0.7992860566196764, "grad_norm": 0.7236548066139221, "learning_rate": 3.0590061463246155e-06, "loss": 0.0688, "step": 36273 }, { "epoch": 0.7993080919091926, "grad_norm": 0.4318629205226898, "learning_rate": 3.0583582786729824e-06, "loss": 0.0776, "step": 36274 }, { "epoch": 0.7993301271987088, "grad_norm": 0.6036538481712341, "learning_rate": 3.057710471846531e-06, "loss": 0.0568, "step": 36275 }, { "epoch": 0.7993521624882249, "grad_norm": 0.5279833078384399, "learning_rate": 3.0570627258485632e-06, "loss": 0.0582, "step": 36276 }, { "epoch": 0.7993741977777411, "grad_norm": 0.7443511486053467, "learning_rate": 3.0564150406823786e-06, "loss": 0.0668, "step": 36277 }, { "epoch": 0.7993962330672573, "grad_norm": 0.43120667338371277, "learning_rate": 3.0557674163512828e-06, "loss": 0.0505, "step": 36278 }, { "epoch": 0.7994182683567733, "grad_norm": 0.4893243908882141, "learning_rate": 3.0551198528585642e-06, "loss": 0.0544, "step": 36279 }, { "epoch": 0.7994403036462895, "grad_norm": 0.3818586766719818, "learning_rate": 3.0544723502075244e-06, "loss": 0.0701, "step": 36280 }, { "epoch": 0.7994623389358056, "grad_norm": 0.46693217754364014, "learning_rate": 3.053824908401467e-06, "loss": 0.0461, "step": 36281 }, { "epoch": 0.7994843742253218, "grad_norm": 0.49133265018463135, "learning_rate": 3.0531775274436824e-06, "loss": 0.0475, "step": 36282 }, { "epoch": 0.799506409514838, "grad_norm": 0.5736474990844727, "learning_rate": 3.05253020733747e-06, "loss": 0.058, "step": 36283 }, { "epoch": 0.7995284448043541, "grad_norm": 0.8370687961578369, "learning_rate": 3.0518829480861288e-06, "loss": 0.0602, "step": 36284 }, { "epoch": 0.7995504800938703, "grad_norm": 0.6333469152450562, "learning_rate": 3.0512357496929577e-06, "loss": 0.0656, "step": 36285 }, { "epoch": 0.7995725153833865, "grad_norm": 0.3599667549133301, "learning_rate": 3.0505886121612474e-06, "loss": 0.0624, "step": 36286 }, { "epoch": 0.7995945506729026, "grad_norm": 0.6692650318145752, "learning_rate": 3.0499415354942973e-06, "loss": 0.0813, "step": 36287 }, { "epoch": 0.7996165859624188, "grad_norm": 0.8349478840827942, "learning_rate": 3.049294519695406e-06, "loss": 0.0683, "step": 36288 }, { "epoch": 0.799638621251935, "grad_norm": 0.6999908089637756, "learning_rate": 3.0486475647678626e-06, "loss": 0.0514, "step": 36289 }, { "epoch": 0.7996606565414511, "grad_norm": 0.7912809252738953, "learning_rate": 3.0480006707149695e-06, "loss": 0.0687, "step": 36290 }, { "epoch": 0.7996826918309673, "grad_norm": 0.9031780362129211, "learning_rate": 3.0473538375400106e-06, "loss": 0.0703, "step": 36291 }, { "epoch": 0.7997047271204835, "grad_norm": 0.3784164488315582, "learning_rate": 3.046707065246297e-06, "loss": 0.0512, "step": 36292 }, { "epoch": 0.7997267624099996, "grad_norm": 0.756984293460846, "learning_rate": 3.046060353837109e-06, "loss": 0.0823, "step": 36293 }, { "epoch": 0.7997487976995158, "grad_norm": 0.2895548343658447, "learning_rate": 3.045413703315751e-06, "loss": 0.0476, "step": 36294 }, { "epoch": 0.799770832989032, "grad_norm": 0.4342404007911682, "learning_rate": 3.0447671136855064e-06, "loss": 0.0543, "step": 36295 }, { "epoch": 0.7997928682785481, "grad_norm": 0.5114179253578186, "learning_rate": 3.044120584949675e-06, "loss": 0.0476, "step": 36296 }, { "epoch": 0.7998149035680643, "grad_norm": 0.7312411069869995, "learning_rate": 3.0434741171115506e-06, "loss": 0.055, "step": 36297 }, { "epoch": 0.7998369388575804, "grad_norm": 0.6333319544792175, "learning_rate": 3.0428277101744217e-06, "loss": 0.0535, "step": 36298 }, { "epoch": 0.7998589741470966, "grad_norm": 0.6697724461555481, "learning_rate": 3.042181364141583e-06, "loss": 0.0442, "step": 36299 }, { "epoch": 0.7998810094366128, "grad_norm": 0.4601251482963562, "learning_rate": 3.0415350790163265e-06, "loss": 0.041, "step": 36300 }, { "epoch": 0.7999030447261289, "grad_norm": 0.6535865068435669, "learning_rate": 3.0408888548019496e-06, "loss": 0.0853, "step": 36301 }, { "epoch": 0.7999250800156451, "grad_norm": 0.44521135091781616, "learning_rate": 3.0402426915017333e-06, "loss": 0.0457, "step": 36302 }, { "epoch": 0.7999471153051612, "grad_norm": 0.7601155042648315, "learning_rate": 3.0395965891189763e-06, "loss": 0.065, "step": 36303 }, { "epoch": 0.7999691505946773, "grad_norm": 0.5192225575447083, "learning_rate": 3.0389505476569697e-06, "loss": 0.0749, "step": 36304 }, { "epoch": 0.7999911858841935, "grad_norm": 0.7987101078033447, "learning_rate": 3.0383045671189983e-06, "loss": 0.0856, "step": 36305 }, { "epoch": 0.8000132211737097, "grad_norm": 0.49292171001434326, "learning_rate": 3.0376586475083555e-06, "loss": 0.0459, "step": 36306 }, { "epoch": 0.8000352564632258, "grad_norm": 0.41714712977409363, "learning_rate": 3.0370127888283335e-06, "loss": 0.048, "step": 36307 }, { "epoch": 0.800057291752742, "grad_norm": 0.49101534485816956, "learning_rate": 3.036366991082224e-06, "loss": 0.0857, "step": 36308 }, { "epoch": 0.8000793270422581, "grad_norm": 0.7125563025474548, "learning_rate": 3.035721254273308e-06, "loss": 0.0578, "step": 36309 }, { "epoch": 0.8001013623317743, "grad_norm": 0.6664270758628845, "learning_rate": 3.03507557840488e-06, "loss": 0.0428, "step": 36310 }, { "epoch": 0.8001233976212905, "grad_norm": 0.668340802192688, "learning_rate": 3.0344299634802318e-06, "loss": 0.0844, "step": 36311 }, { "epoch": 0.8001454329108066, "grad_norm": 0.4835087060928345, "learning_rate": 3.0337844095026448e-06, "loss": 0.0292, "step": 36312 }, { "epoch": 0.8001674682003228, "grad_norm": 0.7948561906814575, "learning_rate": 3.0331389164754154e-06, "loss": 0.0576, "step": 36313 }, { "epoch": 0.800189503489839, "grad_norm": 0.9897897839546204, "learning_rate": 3.0324934844018166e-06, "loss": 0.0625, "step": 36314 }, { "epoch": 0.8002115387793551, "grad_norm": 0.6081427335739136, "learning_rate": 3.0318481132851556e-06, "loss": 0.0555, "step": 36315 }, { "epoch": 0.8002335740688713, "grad_norm": 0.4797934293746948, "learning_rate": 3.031202803128705e-06, "loss": 0.0664, "step": 36316 }, { "epoch": 0.8002556093583875, "grad_norm": 0.6902602314949036, "learning_rate": 3.0305575539357617e-06, "loss": 0.0665, "step": 36317 }, { "epoch": 0.8002776446479036, "grad_norm": 0.6348731517791748, "learning_rate": 3.0299123657096017e-06, "loss": 0.0716, "step": 36318 }, { "epoch": 0.8002996799374198, "grad_norm": 0.7573643326759338, "learning_rate": 3.0292672384535174e-06, "loss": 0.063, "step": 36319 }, { "epoch": 0.800321715226936, "grad_norm": 0.692234218120575, "learning_rate": 3.0286221721707996e-06, "loss": 0.073, "step": 36320 }, { "epoch": 0.8003437505164521, "grad_norm": 1.0681716203689575, "learning_rate": 3.0279771668647237e-06, "loss": 0.0703, "step": 36321 }, { "epoch": 0.8003657858059683, "grad_norm": 1.0826811790466309, "learning_rate": 3.027332222538579e-06, "loss": 0.1012, "step": 36322 }, { "epoch": 0.8003878210954845, "grad_norm": 0.435973584651947, "learning_rate": 3.026687339195653e-06, "loss": 0.0677, "step": 36323 }, { "epoch": 0.8004098563850006, "grad_norm": 0.5802773833274841, "learning_rate": 3.0260425168392326e-06, "loss": 0.0809, "step": 36324 }, { "epoch": 0.8004318916745168, "grad_norm": 0.6282193660736084, "learning_rate": 3.025397755472595e-06, "loss": 0.0787, "step": 36325 }, { "epoch": 0.800453926964033, "grad_norm": 0.7088574767112732, "learning_rate": 3.024753055099027e-06, "loss": 0.067, "step": 36326 }, { "epoch": 0.8004759622535491, "grad_norm": 0.720169723033905, "learning_rate": 3.0241084157218183e-06, "loss": 0.0606, "step": 36327 }, { "epoch": 0.8004979975430652, "grad_norm": 0.6874490976333618, "learning_rate": 3.0234638373442434e-06, "loss": 0.0793, "step": 36328 }, { "epoch": 0.8005200328325813, "grad_norm": 0.6856538653373718, "learning_rate": 3.022819319969588e-06, "loss": 0.0806, "step": 36329 }, { "epoch": 0.8005420681220975, "grad_norm": 0.5653062462806702, "learning_rate": 3.0221748636011376e-06, "loss": 0.0573, "step": 36330 }, { "epoch": 0.8005641034116137, "grad_norm": 0.4913230836391449, "learning_rate": 3.021530468242178e-06, "loss": 0.0664, "step": 36331 }, { "epoch": 0.8005861387011298, "grad_norm": 0.3812370300292969, "learning_rate": 3.020886133895983e-06, "loss": 0.0446, "step": 36332 }, { "epoch": 0.800608173990646, "grad_norm": 0.7274834513664246, "learning_rate": 3.020241860565837e-06, "loss": 0.0826, "step": 36333 }, { "epoch": 0.8006302092801622, "grad_norm": 0.6962750554084778, "learning_rate": 3.019597648255029e-06, "loss": 0.0816, "step": 36334 }, { "epoch": 0.8006522445696783, "grad_norm": 0.4424065947532654, "learning_rate": 3.0189534969668298e-06, "loss": 0.0484, "step": 36335 }, { "epoch": 0.8006742798591945, "grad_norm": 0.5594896078109741, "learning_rate": 3.01830940670453e-06, "loss": 0.0377, "step": 36336 }, { "epoch": 0.8006963151487106, "grad_norm": 0.5139763951301575, "learning_rate": 3.017665377471398e-06, "loss": 0.0336, "step": 36337 }, { "epoch": 0.8007183504382268, "grad_norm": 0.8872961401939392, "learning_rate": 3.017021409270727e-06, "loss": 0.0607, "step": 36338 }, { "epoch": 0.800740385727743, "grad_norm": 0.7327311635017395, "learning_rate": 3.016377502105791e-06, "loss": 0.0653, "step": 36339 }, { "epoch": 0.8007624210172591, "grad_norm": 0.6527063846588135, "learning_rate": 3.015733655979872e-06, "loss": 0.0609, "step": 36340 }, { "epoch": 0.8007844563067753, "grad_norm": 0.6801596879959106, "learning_rate": 3.015089870896241e-06, "loss": 0.0518, "step": 36341 }, { "epoch": 0.8008064915962915, "grad_norm": 0.4596448242664337, "learning_rate": 3.014446146858192e-06, "loss": 0.0781, "step": 36342 }, { "epoch": 0.8008285268858076, "grad_norm": 0.7712158560752869, "learning_rate": 3.013802483868992e-06, "loss": 0.0677, "step": 36343 }, { "epoch": 0.8008505621753238, "grad_norm": 0.6382710337638855, "learning_rate": 3.0131588819319273e-06, "loss": 0.0463, "step": 36344 }, { "epoch": 0.80087259746484, "grad_norm": 0.4710409343242645, "learning_rate": 3.012515341050268e-06, "loss": 0.0445, "step": 36345 }, { "epoch": 0.8008946327543561, "grad_norm": 0.43705615401268005, "learning_rate": 3.0118718612272973e-06, "loss": 0.0552, "step": 36346 }, { "epoch": 0.8009166680438723, "grad_norm": 0.770592451095581, "learning_rate": 3.011228442466295e-06, "loss": 0.0932, "step": 36347 }, { "epoch": 0.8009387033333885, "grad_norm": 0.6716139316558838, "learning_rate": 3.0105850847705274e-06, "loss": 0.0625, "step": 36348 }, { "epoch": 0.8009607386229046, "grad_norm": 0.35979127883911133, "learning_rate": 3.0099417881432868e-06, "loss": 0.0524, "step": 36349 }, { "epoch": 0.8009827739124208, "grad_norm": 0.6837475895881653, "learning_rate": 3.0092985525878387e-06, "loss": 0.0528, "step": 36350 }, { "epoch": 0.801004809201937, "grad_norm": 0.28729164600372314, "learning_rate": 3.008655378107467e-06, "loss": 0.0443, "step": 36351 }, { "epoch": 0.8010268444914531, "grad_norm": 0.43452954292297363, "learning_rate": 3.008012264705438e-06, "loss": 0.067, "step": 36352 }, { "epoch": 0.8010488797809692, "grad_norm": 0.25448572635650635, "learning_rate": 3.0073692123850357e-06, "loss": 0.0276, "step": 36353 }, { "epoch": 0.8010709150704853, "grad_norm": 0.603198766708374, "learning_rate": 3.006726221149535e-06, "loss": 0.0709, "step": 36354 }, { "epoch": 0.8010929503600015, "grad_norm": 0.42346370220184326, "learning_rate": 3.0060832910022056e-06, "loss": 0.05, "step": 36355 }, { "epoch": 0.8011149856495177, "grad_norm": 0.6248820424079895, "learning_rate": 3.0054404219463264e-06, "loss": 0.0554, "step": 36356 }, { "epoch": 0.8011370209390338, "grad_norm": 0.45228520035743713, "learning_rate": 3.00479761398517e-06, "loss": 0.0352, "step": 36357 }, { "epoch": 0.80115905622855, "grad_norm": 0.5333713889122009, "learning_rate": 3.004154867122017e-06, "loss": 0.0581, "step": 36358 }, { "epoch": 0.8011810915180662, "grad_norm": 0.5573006868362427, "learning_rate": 3.0035121813601315e-06, "loss": 0.0509, "step": 36359 }, { "epoch": 0.8012031268075823, "grad_norm": 0.38858044147491455, "learning_rate": 3.002869556702792e-06, "loss": 0.03, "step": 36360 }, { "epoch": 0.8012251620970985, "grad_norm": 0.3732283115386963, "learning_rate": 3.002226993153273e-06, "loss": 0.0665, "step": 36361 }, { "epoch": 0.8012471973866147, "grad_norm": 0.7368075251579285, "learning_rate": 3.0015844907148438e-06, "loss": 0.0778, "step": 36362 }, { "epoch": 0.8012692326761308, "grad_norm": 0.4098412096500397, "learning_rate": 3.0009420493907828e-06, "loss": 0.078, "step": 36363 }, { "epoch": 0.801291267965647, "grad_norm": 0.3476867377758026, "learning_rate": 3.00029966918435e-06, "loss": 0.0578, "step": 36364 }, { "epoch": 0.8013133032551631, "grad_norm": 0.5609395503997803, "learning_rate": 2.9996573500988317e-06, "loss": 0.0521, "step": 36365 }, { "epoch": 0.8013353385446793, "grad_norm": 0.7530640959739685, "learning_rate": 2.9990150921374926e-06, "loss": 0.059, "step": 36366 }, { "epoch": 0.8013573738341955, "grad_norm": 0.671516478061676, "learning_rate": 2.9983728953036067e-06, "loss": 0.0678, "step": 36367 }, { "epoch": 0.8013794091237116, "grad_norm": 0.782210648059845, "learning_rate": 2.997730759600441e-06, "loss": 0.0669, "step": 36368 }, { "epoch": 0.8014014444132278, "grad_norm": 0.7116528749465942, "learning_rate": 2.9970886850312682e-06, "loss": 0.0863, "step": 36369 }, { "epoch": 0.801423479702744, "grad_norm": 0.47140732407569885, "learning_rate": 2.996446671599363e-06, "loss": 0.0576, "step": 36370 }, { "epoch": 0.8014455149922601, "grad_norm": 0.44096824526786804, "learning_rate": 2.9958047193079846e-06, "loss": 0.0478, "step": 36371 }, { "epoch": 0.8014675502817763, "grad_norm": 0.44757279753685, "learning_rate": 2.9951628281604166e-06, "loss": 0.0453, "step": 36372 }, { "epoch": 0.8014895855712925, "grad_norm": 0.7238947153091431, "learning_rate": 2.994520998159919e-06, "loss": 0.0635, "step": 36373 }, { "epoch": 0.8015116208608086, "grad_norm": 0.8871439099311829, "learning_rate": 2.9938792293097665e-06, "loss": 0.0911, "step": 36374 }, { "epoch": 0.8015336561503248, "grad_norm": 0.6932843327522278, "learning_rate": 2.993237521613223e-06, "loss": 0.0773, "step": 36375 }, { "epoch": 0.801555691439841, "grad_norm": 0.7799181342124939, "learning_rate": 2.992595875073559e-06, "loss": 0.101, "step": 36376 }, { "epoch": 0.8015777267293571, "grad_norm": 0.5992767214775085, "learning_rate": 2.991954289694045e-06, "loss": 0.0541, "step": 36377 }, { "epoch": 0.8015997620188732, "grad_norm": 0.743371307849884, "learning_rate": 2.991312765477945e-06, "loss": 0.0493, "step": 36378 }, { "epoch": 0.8016217973083893, "grad_norm": 0.3596997559070587, "learning_rate": 2.9906713024285274e-06, "loss": 0.0862, "step": 36379 }, { "epoch": 0.8016438325979055, "grad_norm": 0.5522470474243164, "learning_rate": 2.9900299005490622e-06, "loss": 0.0701, "step": 36380 }, { "epoch": 0.8016658678874217, "grad_norm": 0.24582180380821228, "learning_rate": 2.9893885598428183e-06, "loss": 0.0474, "step": 36381 }, { "epoch": 0.8016879031769378, "grad_norm": 0.45104292035102844, "learning_rate": 2.988747280313055e-06, "loss": 0.0671, "step": 36382 }, { "epoch": 0.801709938466454, "grad_norm": 0.7953658103942871, "learning_rate": 2.988106061963043e-06, "loss": 0.0501, "step": 36383 }, { "epoch": 0.8017319737559702, "grad_norm": 0.19929155707359314, "learning_rate": 2.9874649047960512e-06, "loss": 0.0355, "step": 36384 }, { "epoch": 0.8017540090454863, "grad_norm": 0.39189136028289795, "learning_rate": 2.9868238088153403e-06, "loss": 0.0618, "step": 36385 }, { "epoch": 0.8017760443350025, "grad_norm": 0.7137926816940308, "learning_rate": 2.9861827740241797e-06, "loss": 0.074, "step": 36386 }, { "epoch": 0.8017980796245187, "grad_norm": 0.7057309150695801, "learning_rate": 2.985541800425825e-06, "loss": 0.0736, "step": 36387 }, { "epoch": 0.8018201149140348, "grad_norm": 0.6153829097747803, "learning_rate": 2.9849008880235567e-06, "loss": 0.0589, "step": 36388 }, { "epoch": 0.801842150203551, "grad_norm": 0.6614431738853455, "learning_rate": 2.9842600368206274e-06, "loss": 0.0614, "step": 36389 }, { "epoch": 0.8018641854930671, "grad_norm": 0.5790429711341858, "learning_rate": 2.983619246820306e-06, "loss": 0.0825, "step": 36390 }, { "epoch": 0.8018862207825833, "grad_norm": 0.48092857003211975, "learning_rate": 2.9829785180258585e-06, "loss": 0.0636, "step": 36391 }, { "epoch": 0.8019082560720995, "grad_norm": 0.8583117723464966, "learning_rate": 2.9823378504405402e-06, "loss": 0.0642, "step": 36392 }, { "epoch": 0.8019302913616156, "grad_norm": 0.6697316765785217, "learning_rate": 2.9816972440676253e-06, "loss": 0.0554, "step": 36393 }, { "epoch": 0.8019523266511318, "grad_norm": 0.6518313884735107, "learning_rate": 2.9810566989103635e-06, "loss": 0.0594, "step": 36394 }, { "epoch": 0.801974361940648, "grad_norm": 0.6264389157295227, "learning_rate": 2.980416214972031e-06, "loss": 0.0636, "step": 36395 }, { "epoch": 0.8019963972301641, "grad_norm": 0.4785834848880768, "learning_rate": 2.979775792255881e-06, "loss": 0.0431, "step": 36396 }, { "epoch": 0.8020184325196803, "grad_norm": 0.7156882882118225, "learning_rate": 2.979135430765184e-06, "loss": 0.0667, "step": 36397 }, { "epoch": 0.8020404678091965, "grad_norm": 0.6338298320770264, "learning_rate": 2.978495130503191e-06, "loss": 0.051, "step": 36398 }, { "epoch": 0.8020625030987126, "grad_norm": 0.8958418965339661, "learning_rate": 2.977854891473169e-06, "loss": 0.0765, "step": 36399 }, { "epoch": 0.8020845383882288, "grad_norm": 0.6999783515930176, "learning_rate": 2.9772147136783823e-06, "loss": 0.042, "step": 36400 }, { "epoch": 0.802106573677745, "grad_norm": 0.45033523440361023, "learning_rate": 2.9765745971220846e-06, "loss": 0.0538, "step": 36401 }, { "epoch": 0.802128608967261, "grad_norm": 0.862942099571228, "learning_rate": 2.9759345418075405e-06, "loss": 0.0774, "step": 36402 }, { "epoch": 0.8021506442567772, "grad_norm": 0.7237112522125244, "learning_rate": 2.9752945477380087e-06, "loss": 0.0433, "step": 36403 }, { "epoch": 0.8021726795462933, "grad_norm": 0.87401282787323, "learning_rate": 2.974654614916755e-06, "loss": 0.0674, "step": 36404 }, { "epoch": 0.8021947148358095, "grad_norm": 0.5975961685180664, "learning_rate": 2.974014743347029e-06, "loss": 0.0383, "step": 36405 }, { "epoch": 0.8022167501253257, "grad_norm": 0.500077486038208, "learning_rate": 2.973374933032095e-06, "loss": 0.101, "step": 36406 }, { "epoch": 0.8022387854148418, "grad_norm": 0.5762280225753784, "learning_rate": 2.9727351839752155e-06, "loss": 0.0651, "step": 36407 }, { "epoch": 0.802260820704358, "grad_norm": 0.6345013976097107, "learning_rate": 2.9720954961796414e-06, "loss": 0.0608, "step": 36408 }, { "epoch": 0.8022828559938742, "grad_norm": 0.5398072600364685, "learning_rate": 2.9714558696486394e-06, "loss": 0.0825, "step": 36409 }, { "epoch": 0.8023048912833903, "grad_norm": 0.9488121867179871, "learning_rate": 2.970816304385453e-06, "loss": 0.0826, "step": 36410 }, { "epoch": 0.8023269265729065, "grad_norm": 0.6664093732833862, "learning_rate": 2.97017680039336e-06, "loss": 0.0523, "step": 36411 }, { "epoch": 0.8023489618624227, "grad_norm": 0.4711340665817261, "learning_rate": 2.9695373576756024e-06, "loss": 0.0423, "step": 36412 }, { "epoch": 0.8023709971519388, "grad_norm": 0.5473487973213196, "learning_rate": 2.968897976235441e-06, "loss": 0.0418, "step": 36413 }, { "epoch": 0.802393032441455, "grad_norm": 0.5462405681610107, "learning_rate": 2.9682586560761406e-06, "loss": 0.0462, "step": 36414 }, { "epoch": 0.8024150677309712, "grad_norm": 0.42906156182289124, "learning_rate": 2.9676193972009443e-06, "loss": 0.0638, "step": 36415 }, { "epoch": 0.8024371030204873, "grad_norm": 0.56147301197052, "learning_rate": 2.9669801996131197e-06, "loss": 0.0732, "step": 36416 }, { "epoch": 0.8024591383100035, "grad_norm": 0.7005074620246887, "learning_rate": 2.9663410633159093e-06, "loss": 0.0665, "step": 36417 }, { "epoch": 0.8024811735995196, "grad_norm": 0.4360136389732361, "learning_rate": 2.965701988312586e-06, "loss": 0.0544, "step": 36418 }, { "epoch": 0.8025032088890358, "grad_norm": 0.6051792502403259, "learning_rate": 2.9650629746063896e-06, "loss": 0.0626, "step": 36419 }, { "epoch": 0.802525244178552, "grad_norm": 0.5943436622619629, "learning_rate": 2.9644240222005863e-06, "loss": 0.0504, "step": 36420 }, { "epoch": 0.8025472794680681, "grad_norm": 0.6018996238708496, "learning_rate": 2.963785131098422e-06, "loss": 0.0713, "step": 36421 }, { "epoch": 0.8025693147575843, "grad_norm": 1.0148653984069824, "learning_rate": 2.963146301303154e-06, "loss": 0.0673, "step": 36422 }, { "epoch": 0.8025913500471005, "grad_norm": 0.36687394976615906, "learning_rate": 2.9625075328180414e-06, "loss": 0.0644, "step": 36423 }, { "epoch": 0.8026133853366166, "grad_norm": 0.6996631026268005, "learning_rate": 2.961868825646328e-06, "loss": 0.0679, "step": 36424 }, { "epoch": 0.8026354206261328, "grad_norm": 0.8693513870239258, "learning_rate": 2.961230179791271e-06, "loss": 0.0807, "step": 36425 }, { "epoch": 0.802657455915649, "grad_norm": 0.6472012996673584, "learning_rate": 2.960591595256125e-06, "loss": 0.0711, "step": 36426 }, { "epoch": 0.802679491205165, "grad_norm": 0.4894630014896393, "learning_rate": 2.9599530720441474e-06, "loss": 0.0608, "step": 36427 }, { "epoch": 0.8027015264946812, "grad_norm": 0.5914157629013062, "learning_rate": 2.9593146101585807e-06, "loss": 0.055, "step": 36428 }, { "epoch": 0.8027235617841973, "grad_norm": 0.7159926891326904, "learning_rate": 2.9586762096026798e-06, "loss": 0.0518, "step": 36429 }, { "epoch": 0.8027455970737135, "grad_norm": 0.6374526619911194, "learning_rate": 2.9580378703797027e-06, "loss": 0.0449, "step": 36430 }, { "epoch": 0.8027676323632297, "grad_norm": 0.9750956296920776, "learning_rate": 2.9573995924928936e-06, "loss": 0.0861, "step": 36431 }, { "epoch": 0.8027896676527458, "grad_norm": 1.0014770030975342, "learning_rate": 2.956761375945508e-06, "loss": 0.0733, "step": 36432 }, { "epoch": 0.802811702942262, "grad_norm": 0.6496173143386841, "learning_rate": 2.9561232207407882e-06, "loss": 0.0531, "step": 36433 }, { "epoch": 0.8028337382317782, "grad_norm": 0.47249820828437805, "learning_rate": 2.955485126881999e-06, "loss": 0.0593, "step": 36434 }, { "epoch": 0.8028557735212943, "grad_norm": 0.4301111698150635, "learning_rate": 2.95484709437238e-06, "loss": 0.0586, "step": 36435 }, { "epoch": 0.8028778088108105, "grad_norm": 0.7242423892021179, "learning_rate": 2.9542091232151826e-06, "loss": 0.0652, "step": 36436 }, { "epoch": 0.8028998441003267, "grad_norm": 0.9193480014801025, "learning_rate": 2.9535712134136605e-06, "loss": 0.0622, "step": 36437 }, { "epoch": 0.8029218793898428, "grad_norm": 0.24086618423461914, "learning_rate": 2.952933364971058e-06, "loss": 0.0299, "step": 36438 }, { "epoch": 0.802943914679359, "grad_norm": 0.26300135254859924, "learning_rate": 2.9522955778906298e-06, "loss": 0.0735, "step": 36439 }, { "epoch": 0.8029659499688752, "grad_norm": 0.39274686574935913, "learning_rate": 2.951657852175612e-06, "loss": 0.0543, "step": 36440 }, { "epoch": 0.8029879852583913, "grad_norm": 0.9094042778015137, "learning_rate": 2.9510201878292697e-06, "loss": 0.0948, "step": 36441 }, { "epoch": 0.8030100205479075, "grad_norm": 0.6800310611724854, "learning_rate": 2.9503825848548384e-06, "loss": 0.072, "step": 36442 }, { "epoch": 0.8030320558374237, "grad_norm": 0.6080234050750732, "learning_rate": 2.949745043255575e-06, "loss": 0.0515, "step": 36443 }, { "epoch": 0.8030540911269398, "grad_norm": 0.5341240167617798, "learning_rate": 2.9491075630347125e-06, "loss": 0.0847, "step": 36444 }, { "epoch": 0.803076126416456, "grad_norm": 0.4998476803302765, "learning_rate": 2.9484701441955163e-06, "loss": 0.0539, "step": 36445 }, { "epoch": 0.8030981617059721, "grad_norm": 1.0171829462051392, "learning_rate": 2.947832786741226e-06, "loss": 0.0873, "step": 36446 }, { "epoch": 0.8031201969954883, "grad_norm": 0.4138948321342468, "learning_rate": 2.9471954906750796e-06, "loss": 0.0537, "step": 36447 }, { "epoch": 0.8031422322850045, "grad_norm": 0.3561485707759857, "learning_rate": 2.946558256000331e-06, "loss": 0.04, "step": 36448 }, { "epoch": 0.8031642675745206, "grad_norm": 0.5464436411857605, "learning_rate": 2.945921082720224e-06, "loss": 0.0535, "step": 36449 }, { "epoch": 0.8031863028640368, "grad_norm": 0.5667100548744202, "learning_rate": 2.9452839708380096e-06, "loss": 0.0517, "step": 36450 }, { "epoch": 0.803208338153553, "grad_norm": 0.716386079788208, "learning_rate": 2.944646920356924e-06, "loss": 0.0606, "step": 36451 }, { "epoch": 0.803230373443069, "grad_norm": 0.4304194450378418, "learning_rate": 2.9440099312802153e-06, "loss": 0.0528, "step": 36452 }, { "epoch": 0.8032524087325852, "grad_norm": 0.623220682144165, "learning_rate": 2.9433730036111305e-06, "loss": 0.0584, "step": 36453 }, { "epoch": 0.8032744440221014, "grad_norm": 0.4872276484966278, "learning_rate": 2.9427361373529153e-06, "loss": 0.0606, "step": 36454 }, { "epoch": 0.8032964793116175, "grad_norm": 1.0316497087478638, "learning_rate": 2.9420993325088075e-06, "loss": 0.0623, "step": 36455 }, { "epoch": 0.8033185146011337, "grad_norm": 0.6541933417320251, "learning_rate": 2.9414625890820523e-06, "loss": 0.0825, "step": 36456 }, { "epoch": 0.8033405498906498, "grad_norm": 0.4410264492034912, "learning_rate": 2.940825907075899e-06, "loss": 0.0331, "step": 36457 }, { "epoch": 0.803362585180166, "grad_norm": 0.46704259514808655, "learning_rate": 2.940189286493582e-06, "loss": 0.0644, "step": 36458 }, { "epoch": 0.8033846204696822, "grad_norm": 0.6350365281105042, "learning_rate": 2.939552727338348e-06, "loss": 0.0571, "step": 36459 }, { "epoch": 0.8034066557591983, "grad_norm": 0.6622087955474854, "learning_rate": 2.93891622961344e-06, "loss": 0.0867, "step": 36460 }, { "epoch": 0.8034286910487145, "grad_norm": 0.34256500005722046, "learning_rate": 2.9382797933221022e-06, "loss": 0.0644, "step": 36461 }, { "epoch": 0.8034507263382307, "grad_norm": 0.6561335325241089, "learning_rate": 2.93764341846757e-06, "loss": 0.0592, "step": 36462 }, { "epoch": 0.8034727616277468, "grad_norm": 0.2757558524608612, "learning_rate": 2.937007105053089e-06, "loss": 0.0638, "step": 36463 }, { "epoch": 0.803494796917263, "grad_norm": 0.31080198287963867, "learning_rate": 2.9363708530819017e-06, "loss": 0.0477, "step": 36464 }, { "epoch": 0.8035168322067792, "grad_norm": 0.26100507378578186, "learning_rate": 2.935734662557244e-06, "loss": 0.0478, "step": 36465 }, { "epoch": 0.8035388674962953, "grad_norm": 0.7611374258995056, "learning_rate": 2.9350985334823634e-06, "loss": 0.0618, "step": 36466 }, { "epoch": 0.8035609027858115, "grad_norm": 1.287222981452942, "learning_rate": 2.934462465860487e-06, "loss": 0.0492, "step": 36467 }, { "epoch": 0.8035829380753277, "grad_norm": 0.808066725730896, "learning_rate": 2.9338264596948704e-06, "loss": 0.092, "step": 36468 }, { "epoch": 0.8036049733648438, "grad_norm": 0.6014911532402039, "learning_rate": 2.9331905149887435e-06, "loss": 0.047, "step": 36469 }, { "epoch": 0.80362700865436, "grad_norm": 0.7685554027557373, "learning_rate": 2.9325546317453512e-06, "loss": 0.0774, "step": 36470 }, { "epoch": 0.8036490439438762, "grad_norm": 0.43956825137138367, "learning_rate": 2.9319188099679246e-06, "loss": 0.0954, "step": 36471 }, { "epoch": 0.8036710792333923, "grad_norm": 0.6282220482826233, "learning_rate": 2.931283049659709e-06, "loss": 0.0671, "step": 36472 }, { "epoch": 0.8036931145229085, "grad_norm": 0.6574510335922241, "learning_rate": 2.930647350823943e-06, "loss": 0.0501, "step": 36473 }, { "epoch": 0.8037151498124246, "grad_norm": 0.5944901704788208, "learning_rate": 2.9300117134638576e-06, "loss": 0.0657, "step": 36474 }, { "epoch": 0.8037371851019408, "grad_norm": 0.5376676917076111, "learning_rate": 2.929376137582696e-06, "loss": 0.0601, "step": 36475 }, { "epoch": 0.8037592203914569, "grad_norm": 0.6252631545066833, "learning_rate": 2.9287406231836957e-06, "loss": 0.062, "step": 36476 }, { "epoch": 0.803781255680973, "grad_norm": 0.4949232041835785, "learning_rate": 2.928105170270095e-06, "loss": 0.0601, "step": 36477 }, { "epoch": 0.8038032909704892, "grad_norm": 0.6984916925430298, "learning_rate": 2.927469778845124e-06, "loss": 0.063, "step": 36478 }, { "epoch": 0.8038253262600054, "grad_norm": 0.4152878522872925, "learning_rate": 2.926834448912024e-06, "loss": 0.0407, "step": 36479 }, { "epoch": 0.8038473615495215, "grad_norm": 0.5867034196853638, "learning_rate": 2.9261991804740335e-06, "loss": 0.0459, "step": 36480 }, { "epoch": 0.8038693968390377, "grad_norm": 0.44791123270988464, "learning_rate": 2.925563973534381e-06, "loss": 0.0574, "step": 36481 }, { "epoch": 0.8038914321285539, "grad_norm": 1.0730870962142944, "learning_rate": 2.9249288280963073e-06, "loss": 0.0566, "step": 36482 }, { "epoch": 0.80391346741807, "grad_norm": 0.8375751972198486, "learning_rate": 2.924293744163045e-06, "loss": 0.084, "step": 36483 }, { "epoch": 0.8039355027075862, "grad_norm": 0.532784640789032, "learning_rate": 2.923658721737834e-06, "loss": 0.0489, "step": 36484 }, { "epoch": 0.8039575379971023, "grad_norm": 0.6827182173728943, "learning_rate": 2.923023760823903e-06, "loss": 0.0552, "step": 36485 }, { "epoch": 0.8039795732866185, "grad_norm": 0.6570671200752258, "learning_rate": 2.922388861424487e-06, "loss": 0.0664, "step": 36486 }, { "epoch": 0.8040016085761347, "grad_norm": 0.6205223202705383, "learning_rate": 2.9217540235428235e-06, "loss": 0.0615, "step": 36487 }, { "epoch": 0.8040236438656508, "grad_norm": 0.346255898475647, "learning_rate": 2.9211192471821416e-06, "loss": 0.053, "step": 36488 }, { "epoch": 0.804045679155167, "grad_norm": 0.6727445721626282, "learning_rate": 2.9204845323456793e-06, "loss": 0.0615, "step": 36489 }, { "epoch": 0.8040677144446832, "grad_norm": 0.7576050162315369, "learning_rate": 2.9198498790366598e-06, "loss": 0.0701, "step": 36490 }, { "epoch": 0.8040897497341993, "grad_norm": 0.836223840713501, "learning_rate": 2.9192152872583307e-06, "loss": 0.0614, "step": 36491 }, { "epoch": 0.8041117850237155, "grad_norm": 0.8076878786087036, "learning_rate": 2.918580757013911e-06, "loss": 0.0724, "step": 36492 }, { "epoch": 0.8041338203132317, "grad_norm": 0.2642118036746979, "learning_rate": 2.917946288306644e-06, "loss": 0.0496, "step": 36493 }, { "epoch": 0.8041558556027478, "grad_norm": 0.39613038301467896, "learning_rate": 2.917311881139751e-06, "loss": 0.0554, "step": 36494 }, { "epoch": 0.804177890892264, "grad_norm": 0.4726684093475342, "learning_rate": 2.9166775355164664e-06, "loss": 0.0649, "step": 36495 }, { "epoch": 0.8041999261817802, "grad_norm": 0.41294774413108826, "learning_rate": 2.916043251440028e-06, "loss": 0.0351, "step": 36496 }, { "epoch": 0.8042219614712963, "grad_norm": 0.5360156893730164, "learning_rate": 2.9154090289136582e-06, "loss": 0.0547, "step": 36497 }, { "epoch": 0.8042439967608125, "grad_norm": 1.4351603984832764, "learning_rate": 2.914774867940589e-06, "loss": 0.095, "step": 36498 }, { "epoch": 0.8042660320503286, "grad_norm": 0.8820241689682007, "learning_rate": 2.914140768524051e-06, "loss": 0.0756, "step": 36499 }, { "epoch": 0.8042880673398448, "grad_norm": 0.5378929376602173, "learning_rate": 2.913506730667281e-06, "loss": 0.0394, "step": 36500 }, { "epoch": 0.8043101026293609, "grad_norm": 0.7348112463951111, "learning_rate": 2.9128727543734985e-06, "loss": 0.0609, "step": 36501 }, { "epoch": 0.804332137918877, "grad_norm": 0.4037686586380005, "learning_rate": 2.912238839645935e-06, "loss": 0.0385, "step": 36502 }, { "epoch": 0.8043541732083932, "grad_norm": 0.9115627408027649, "learning_rate": 2.9116049864878256e-06, "loss": 0.0768, "step": 36503 }, { "epoch": 0.8043762084979094, "grad_norm": 0.7823916673660278, "learning_rate": 2.910971194902392e-06, "loss": 0.067, "step": 36504 }, { "epoch": 0.8043982437874255, "grad_norm": 0.7500115633010864, "learning_rate": 2.910337464892863e-06, "loss": 0.0566, "step": 36505 }, { "epoch": 0.8044202790769417, "grad_norm": 0.5639967918395996, "learning_rate": 2.9097037964624686e-06, "loss": 0.0399, "step": 36506 }, { "epoch": 0.8044423143664579, "grad_norm": 0.5751316547393799, "learning_rate": 2.909070189614442e-06, "loss": 0.0795, "step": 36507 }, { "epoch": 0.804464349655974, "grad_norm": 0.7746582627296448, "learning_rate": 2.9084366443519984e-06, "loss": 0.0813, "step": 36508 }, { "epoch": 0.8044863849454902, "grad_norm": 0.5806178450584412, "learning_rate": 2.9078031606783704e-06, "loss": 0.0582, "step": 36509 }, { "epoch": 0.8045084202350063, "grad_norm": 0.3237830698490143, "learning_rate": 2.907169738596791e-06, "loss": 0.0896, "step": 36510 }, { "epoch": 0.8045304555245225, "grad_norm": 0.41821300983428955, "learning_rate": 2.906536378110475e-06, "loss": 0.0748, "step": 36511 }, { "epoch": 0.8045524908140387, "grad_norm": 0.32164230942726135, "learning_rate": 2.905903079222661e-06, "loss": 0.0421, "step": 36512 }, { "epoch": 0.8045745261035548, "grad_norm": 0.5958178639411926, "learning_rate": 2.9052698419365585e-06, "loss": 0.0604, "step": 36513 }, { "epoch": 0.804596561393071, "grad_norm": 0.4817279279232025, "learning_rate": 2.904636666255409e-06, "loss": 0.0664, "step": 36514 }, { "epoch": 0.8046185966825872, "grad_norm": 0.4790002107620239, "learning_rate": 2.90400355218243e-06, "loss": 0.0513, "step": 36515 }, { "epoch": 0.8046406319721033, "grad_norm": 0.39665156602859497, "learning_rate": 2.903370499720848e-06, "loss": 0.0673, "step": 36516 }, { "epoch": 0.8046626672616195, "grad_norm": 0.5367865562438965, "learning_rate": 2.902737508873885e-06, "loss": 0.0764, "step": 36517 }, { "epoch": 0.8046847025511357, "grad_norm": 0.5880429744720459, "learning_rate": 2.902104579644766e-06, "loss": 0.0406, "step": 36518 }, { "epoch": 0.8047067378406518, "grad_norm": 0.9793722629547119, "learning_rate": 2.90147171203672e-06, "loss": 0.0593, "step": 36519 }, { "epoch": 0.804728773130168, "grad_norm": 0.6426559686660767, "learning_rate": 2.9008389060529634e-06, "loss": 0.0668, "step": 36520 }, { "epoch": 0.8047508084196842, "grad_norm": 0.8195576667785645, "learning_rate": 2.9002061616967214e-06, "loss": 0.0842, "step": 36521 }, { "epoch": 0.8047728437092003, "grad_norm": 0.6047181487083435, "learning_rate": 2.899573478971218e-06, "loss": 0.0646, "step": 36522 }, { "epoch": 0.8047948789987165, "grad_norm": 0.42808425426483154, "learning_rate": 2.8989408578796795e-06, "loss": 0.0636, "step": 36523 }, { "epoch": 0.8048169142882327, "grad_norm": 0.5593540668487549, "learning_rate": 2.8983082984253195e-06, "loss": 0.0459, "step": 36524 }, { "epoch": 0.8048389495777488, "grad_norm": 0.5638756155967712, "learning_rate": 2.8976758006113656e-06, "loss": 0.0652, "step": 36525 }, { "epoch": 0.8048609848672649, "grad_norm": 0.7436270713806152, "learning_rate": 2.897043364441042e-06, "loss": 0.0624, "step": 36526 }, { "epoch": 0.804883020156781, "grad_norm": 0.6940791010856628, "learning_rate": 2.8964109899175644e-06, "loss": 0.058, "step": 36527 }, { "epoch": 0.8049050554462972, "grad_norm": 0.38737836480140686, "learning_rate": 2.8957786770441553e-06, "loss": 0.0533, "step": 36528 }, { "epoch": 0.8049270907358134, "grad_norm": 0.45944780111312866, "learning_rate": 2.895146425824035e-06, "loss": 0.0505, "step": 36529 }, { "epoch": 0.8049491260253295, "grad_norm": 0.4750581383705139, "learning_rate": 2.89451423626043e-06, "loss": 0.0758, "step": 36530 }, { "epoch": 0.8049711613148457, "grad_norm": 0.4906352758407593, "learning_rate": 2.893882108356552e-06, "loss": 0.0677, "step": 36531 }, { "epoch": 0.8049931966043619, "grad_norm": 0.5523057579994202, "learning_rate": 2.8932500421156223e-06, "loss": 0.0659, "step": 36532 }, { "epoch": 0.805015231893878, "grad_norm": 0.48226988315582275, "learning_rate": 2.892618037540868e-06, "loss": 0.0436, "step": 36533 }, { "epoch": 0.8050372671833942, "grad_norm": 0.36467501521110535, "learning_rate": 2.891986094635498e-06, "loss": 0.0423, "step": 36534 }, { "epoch": 0.8050593024729104, "grad_norm": 0.4316031336784363, "learning_rate": 2.8913542134027402e-06, "loss": 0.0478, "step": 36535 }, { "epoch": 0.8050813377624265, "grad_norm": 0.41893360018730164, "learning_rate": 2.8907223938458004e-06, "loss": 0.0642, "step": 36536 }, { "epoch": 0.8051033730519427, "grad_norm": 0.6002265810966492, "learning_rate": 2.8900906359679123e-06, "loss": 0.089, "step": 36537 }, { "epoch": 0.8051254083414588, "grad_norm": 0.48120546340942383, "learning_rate": 2.889458939772282e-06, "loss": 0.0846, "step": 36538 }, { "epoch": 0.805147443630975, "grad_norm": 0.46074098348617554, "learning_rate": 2.888827305262137e-06, "loss": 0.0794, "step": 36539 }, { "epoch": 0.8051694789204912, "grad_norm": 0.7438015937805176, "learning_rate": 2.8881957324406836e-06, "loss": 0.0665, "step": 36540 }, { "epoch": 0.8051915142100073, "grad_norm": 0.4793176054954529, "learning_rate": 2.887564221311146e-06, "loss": 0.0664, "step": 36541 }, { "epoch": 0.8052135494995235, "grad_norm": 0.429776668548584, "learning_rate": 2.886932771876742e-06, "loss": 0.0424, "step": 36542 }, { "epoch": 0.8052355847890397, "grad_norm": 0.6418779492378235, "learning_rate": 2.886301384140681e-06, "loss": 0.062, "step": 36543 }, { "epoch": 0.8052576200785558, "grad_norm": 0.5204522013664246, "learning_rate": 2.8856700581061825e-06, "loss": 0.0522, "step": 36544 }, { "epoch": 0.805279655368072, "grad_norm": 0.6697191596031189, "learning_rate": 2.885038793776463e-06, "loss": 0.0403, "step": 36545 }, { "epoch": 0.8053016906575882, "grad_norm": 1.117035984992981, "learning_rate": 2.8844075911547415e-06, "loss": 0.0791, "step": 36546 }, { "epoch": 0.8053237259471043, "grad_norm": 0.5160592198371887, "learning_rate": 2.8837764502442256e-06, "loss": 0.0363, "step": 36547 }, { "epoch": 0.8053457612366205, "grad_norm": 0.9170532822608948, "learning_rate": 2.883145371048133e-06, "loss": 0.0895, "step": 36548 }, { "epoch": 0.8053677965261367, "grad_norm": 0.6588928699493408, "learning_rate": 2.882514353569682e-06, "loss": 0.0688, "step": 36549 }, { "epoch": 0.8053898318156527, "grad_norm": 0.5641106963157654, "learning_rate": 2.8818833978120807e-06, "loss": 0.0709, "step": 36550 }, { "epoch": 0.8054118671051689, "grad_norm": 0.5677000880241394, "learning_rate": 2.8812525037785436e-06, "loss": 0.0601, "step": 36551 }, { "epoch": 0.805433902394685, "grad_norm": 0.6527820229530334, "learning_rate": 2.8806216714722865e-06, "loss": 0.0553, "step": 36552 }, { "epoch": 0.8054559376842012, "grad_norm": 1.3194133043289185, "learning_rate": 2.8799909008965268e-06, "loss": 0.082, "step": 36553 }, { "epoch": 0.8054779729737174, "grad_norm": 0.6651062369346619, "learning_rate": 2.879360192054469e-06, "loss": 0.044, "step": 36554 }, { "epoch": 0.8055000082632335, "grad_norm": 0.49850937724113464, "learning_rate": 2.8787295449493296e-06, "loss": 0.0586, "step": 36555 }, { "epoch": 0.8055220435527497, "grad_norm": 1.028602123260498, "learning_rate": 2.87809895958432e-06, "loss": 0.0809, "step": 36556 }, { "epoch": 0.8055440788422659, "grad_norm": 0.4821440577507019, "learning_rate": 2.877468435962656e-06, "loss": 0.0669, "step": 36557 }, { "epoch": 0.805566114131782, "grad_norm": 0.41984260082244873, "learning_rate": 2.876837974087546e-06, "loss": 0.0382, "step": 36558 }, { "epoch": 0.8055881494212982, "grad_norm": 0.5507006049156189, "learning_rate": 2.876207573962195e-06, "loss": 0.0551, "step": 36559 }, { "epoch": 0.8056101847108144, "grad_norm": 0.20363543927669525, "learning_rate": 2.875577235589827e-06, "loss": 0.0399, "step": 36560 }, { "epoch": 0.8056322200003305, "grad_norm": 0.6471084952354431, "learning_rate": 2.8749469589736403e-06, "loss": 0.0527, "step": 36561 }, { "epoch": 0.8056542552898467, "grad_norm": 0.5939907431602478, "learning_rate": 2.8743167441168566e-06, "loss": 0.0535, "step": 36562 }, { "epoch": 0.8056762905793629, "grad_norm": 0.5914884209632874, "learning_rate": 2.873686591022671e-06, "loss": 0.0598, "step": 36563 }, { "epoch": 0.805698325868879, "grad_norm": 0.8655258417129517, "learning_rate": 2.8730564996943116e-06, "loss": 0.0742, "step": 36564 }, { "epoch": 0.8057203611583952, "grad_norm": 0.37343013286590576, "learning_rate": 2.8724264701349733e-06, "loss": 0.0633, "step": 36565 }, { "epoch": 0.8057423964479113, "grad_norm": 0.6708533763885498, "learning_rate": 2.8717965023478743e-06, "loss": 0.0586, "step": 36566 }, { "epoch": 0.8057644317374275, "grad_norm": 0.8514782190322876, "learning_rate": 2.871166596336215e-06, "loss": 0.0637, "step": 36567 }, { "epoch": 0.8057864670269437, "grad_norm": 0.29929909110069275, "learning_rate": 2.8705367521032095e-06, "loss": 0.0476, "step": 36568 }, { "epoch": 0.8058085023164598, "grad_norm": 0.5194469094276428, "learning_rate": 2.8699069696520667e-06, "loss": 0.0661, "step": 36569 }, { "epoch": 0.805830537605976, "grad_norm": 0.8854777812957764, "learning_rate": 2.8692772489859864e-06, "loss": 0.0835, "step": 36570 }, { "epoch": 0.8058525728954922, "grad_norm": 0.7103970050811768, "learning_rate": 2.868647590108189e-06, "loss": 0.0693, "step": 36571 }, { "epoch": 0.8058746081850083, "grad_norm": 0.4362964332103729, "learning_rate": 2.8680179930218715e-06, "loss": 0.061, "step": 36572 }, { "epoch": 0.8058966434745245, "grad_norm": 0.49106016755104065, "learning_rate": 2.867388457730247e-06, "loss": 0.0547, "step": 36573 }, { "epoch": 0.8059186787640407, "grad_norm": 0.7194913625717163, "learning_rate": 2.8667589842365154e-06, "loss": 0.0544, "step": 36574 }, { "epoch": 0.8059407140535567, "grad_norm": 0.9691535830497742, "learning_rate": 2.8661295725438884e-06, "loss": 0.0768, "step": 36575 }, { "epoch": 0.8059627493430729, "grad_norm": 0.4888700544834137, "learning_rate": 2.8655002226555714e-06, "loss": 0.053, "step": 36576 }, { "epoch": 0.805984784632589, "grad_norm": 0.5443210005760193, "learning_rate": 2.864870934574767e-06, "loss": 0.0469, "step": 36577 }, { "epoch": 0.8060068199221052, "grad_norm": 0.5498951077461243, "learning_rate": 2.864241708304681e-06, "loss": 0.0592, "step": 36578 }, { "epoch": 0.8060288552116214, "grad_norm": 0.6661785840988159, "learning_rate": 2.86361254384852e-06, "loss": 0.0611, "step": 36579 }, { "epoch": 0.8060508905011375, "grad_norm": 0.7272814512252808, "learning_rate": 2.8629834412094923e-06, "loss": 0.0483, "step": 36580 }, { "epoch": 0.8060729257906537, "grad_norm": 0.7309955954551697, "learning_rate": 2.8623544003907953e-06, "loss": 0.0781, "step": 36581 }, { "epoch": 0.8060949610801699, "grad_norm": 0.6108458042144775, "learning_rate": 2.8617254213956345e-06, "loss": 0.0708, "step": 36582 }, { "epoch": 0.806116996369686, "grad_norm": 0.29033616185188293, "learning_rate": 2.861096504227221e-06, "loss": 0.0679, "step": 36583 }, { "epoch": 0.8061390316592022, "grad_norm": 0.6852944493293762, "learning_rate": 2.8604676488887467e-06, "loss": 0.0894, "step": 36584 }, { "epoch": 0.8061610669487184, "grad_norm": 0.9011914730072021, "learning_rate": 2.859838855383426e-06, "loss": 0.0863, "step": 36585 }, { "epoch": 0.8061831022382345, "grad_norm": 0.3001716434955597, "learning_rate": 2.8592101237144465e-06, "loss": 0.0583, "step": 36586 }, { "epoch": 0.8062051375277507, "grad_norm": 0.6921237111091614, "learning_rate": 2.8585814538850287e-06, "loss": 0.0633, "step": 36587 }, { "epoch": 0.8062271728172669, "grad_norm": 0.6309568881988525, "learning_rate": 2.8579528458983635e-06, "loss": 0.0522, "step": 36588 }, { "epoch": 0.806249208106783, "grad_norm": 0.6495642066001892, "learning_rate": 2.857324299757658e-06, "loss": 0.0429, "step": 36589 }, { "epoch": 0.8062712433962992, "grad_norm": 0.4663931727409363, "learning_rate": 2.8566958154661073e-06, "loss": 0.0643, "step": 36590 }, { "epoch": 0.8062932786858154, "grad_norm": 0.3650567829608917, "learning_rate": 2.8560673930269177e-06, "loss": 0.0822, "step": 36591 }, { "epoch": 0.8063153139753315, "grad_norm": 0.5742931365966797, "learning_rate": 2.855439032443292e-06, "loss": 0.0529, "step": 36592 }, { "epoch": 0.8063373492648477, "grad_norm": 0.444219708442688, "learning_rate": 2.85481073371842e-06, "loss": 0.0418, "step": 36593 }, { "epoch": 0.8063593845543638, "grad_norm": 0.5602816939353943, "learning_rate": 2.854182496855518e-06, "loss": 0.0562, "step": 36594 }, { "epoch": 0.80638141984388, "grad_norm": 0.6106863617897034, "learning_rate": 2.8535543218577724e-06, "loss": 0.0522, "step": 36595 }, { "epoch": 0.8064034551333962, "grad_norm": 0.556767463684082, "learning_rate": 2.852926208728394e-06, "loss": 0.0509, "step": 36596 }, { "epoch": 0.8064254904229123, "grad_norm": 0.6248669624328613, "learning_rate": 2.85229815747057e-06, "loss": 0.0683, "step": 36597 }, { "epoch": 0.8064475257124285, "grad_norm": 0.6245473623275757, "learning_rate": 2.8516701680875086e-06, "loss": 0.0409, "step": 36598 }, { "epoch": 0.8064695610019447, "grad_norm": 0.6409281492233276, "learning_rate": 2.851042240582406e-06, "loss": 0.0672, "step": 36599 }, { "epoch": 0.8064915962914607, "grad_norm": 0.4552147388458252, "learning_rate": 2.850414374958459e-06, "loss": 0.0749, "step": 36600 }, { "epoch": 0.8065136315809769, "grad_norm": 0.4807630181312561, "learning_rate": 2.849786571218866e-06, "loss": 0.0322, "step": 36601 }, { "epoch": 0.806535666870493, "grad_norm": 0.3223976194858551, "learning_rate": 2.8491588293668248e-06, "loss": 0.0397, "step": 36602 }, { "epoch": 0.8065577021600092, "grad_norm": 0.7593708038330078, "learning_rate": 2.848531149405538e-06, "loss": 0.069, "step": 36603 }, { "epoch": 0.8065797374495254, "grad_norm": 0.6053043007850647, "learning_rate": 2.8479035313381924e-06, "loss": 0.0758, "step": 36604 }, { "epoch": 0.8066017727390415, "grad_norm": 0.6230725049972534, "learning_rate": 2.847275975167993e-06, "loss": 0.0517, "step": 36605 }, { "epoch": 0.8066238080285577, "grad_norm": 0.7077847123146057, "learning_rate": 2.8466484808981362e-06, "loss": 0.0553, "step": 36606 }, { "epoch": 0.8066458433180739, "grad_norm": 1.1276483535766602, "learning_rate": 2.8460210485318122e-06, "loss": 0.0827, "step": 36607 }, { "epoch": 0.80666787860759, "grad_norm": 0.9519281387329102, "learning_rate": 2.8453936780722244e-06, "loss": 0.0698, "step": 36608 }, { "epoch": 0.8066899138971062, "grad_norm": 0.7809407711029053, "learning_rate": 2.8447663695225572e-06, "loss": 0.0652, "step": 36609 }, { "epoch": 0.8067119491866224, "grad_norm": 0.5743349194526672, "learning_rate": 2.8441391228860213e-06, "loss": 0.0446, "step": 36610 }, { "epoch": 0.8067339844761385, "grad_norm": 0.6949961185455322, "learning_rate": 2.8435119381657994e-06, "loss": 0.0687, "step": 36611 }, { "epoch": 0.8067560197656547, "grad_norm": 0.8959740996360779, "learning_rate": 2.8428848153650932e-06, "loss": 0.0556, "step": 36612 }, { "epoch": 0.8067780550551709, "grad_norm": 0.6531482934951782, "learning_rate": 2.8422577544870894e-06, "loss": 0.0909, "step": 36613 }, { "epoch": 0.806800090344687, "grad_norm": 0.7384087443351746, "learning_rate": 2.8416307555349863e-06, "loss": 0.0423, "step": 36614 }, { "epoch": 0.8068221256342032, "grad_norm": 0.7945973873138428, "learning_rate": 2.841003818511982e-06, "loss": 0.0706, "step": 36615 }, { "epoch": 0.8068441609237194, "grad_norm": 0.5216994881629944, "learning_rate": 2.8403769434212586e-06, "loss": 0.0429, "step": 36616 }, { "epoch": 0.8068661962132355, "grad_norm": 0.4062744081020355, "learning_rate": 2.8397501302660235e-06, "loss": 0.0499, "step": 36617 }, { "epoch": 0.8068882315027517, "grad_norm": 0.6409728527069092, "learning_rate": 2.839123379049456e-06, "loss": 0.0584, "step": 36618 }, { "epoch": 0.8069102667922678, "grad_norm": 0.5893133282661438, "learning_rate": 2.83849668977476e-06, "loss": 0.0652, "step": 36619 }, { "epoch": 0.806932302081784, "grad_norm": 0.7562910318374634, "learning_rate": 2.8378700624451184e-06, "loss": 0.0423, "step": 36620 }, { "epoch": 0.8069543373713002, "grad_norm": 0.30309951305389404, "learning_rate": 2.8372434970637247e-06, "loss": 0.0324, "step": 36621 }, { "epoch": 0.8069763726608163, "grad_norm": 0.3491683602333069, "learning_rate": 2.8366169936337765e-06, "loss": 0.0773, "step": 36622 }, { "epoch": 0.8069984079503325, "grad_norm": 0.45338112115859985, "learning_rate": 2.8359905521584566e-06, "loss": 0.0421, "step": 36623 }, { "epoch": 0.8070204432398487, "grad_norm": 0.4000023603439331, "learning_rate": 2.835364172640961e-06, "loss": 0.0571, "step": 36624 }, { "epoch": 0.8070424785293647, "grad_norm": 0.5646254420280457, "learning_rate": 2.8347378550844775e-06, "loss": 0.0708, "step": 36625 }, { "epoch": 0.8070645138188809, "grad_norm": 0.954900860786438, "learning_rate": 2.834111599492202e-06, "loss": 0.0689, "step": 36626 }, { "epoch": 0.8070865491083971, "grad_norm": 0.672292172908783, "learning_rate": 2.8334854058673155e-06, "loss": 0.0623, "step": 36627 }, { "epoch": 0.8071085843979132, "grad_norm": 0.5047051906585693, "learning_rate": 2.8328592742130126e-06, "loss": 0.0417, "step": 36628 }, { "epoch": 0.8071306196874294, "grad_norm": 0.43988242745399475, "learning_rate": 2.8322332045324846e-06, "loss": 0.0594, "step": 36629 }, { "epoch": 0.8071526549769455, "grad_norm": 0.33841240406036377, "learning_rate": 2.831607196828915e-06, "loss": 0.0623, "step": 36630 }, { "epoch": 0.8071746902664617, "grad_norm": 0.43553003668785095, "learning_rate": 2.8309812511054996e-06, "loss": 0.0487, "step": 36631 }, { "epoch": 0.8071967255559779, "grad_norm": 0.6679773330688477, "learning_rate": 2.8303553673654143e-06, "loss": 0.0299, "step": 36632 }, { "epoch": 0.807218760845494, "grad_norm": 0.5645571947097778, "learning_rate": 2.8297295456118616e-06, "loss": 0.0473, "step": 36633 }, { "epoch": 0.8072407961350102, "grad_norm": 0.4399299621582031, "learning_rate": 2.829103785848019e-06, "loss": 0.0493, "step": 36634 }, { "epoch": 0.8072628314245264, "grad_norm": 0.8603717088699341, "learning_rate": 2.8284780880770824e-06, "loss": 0.0898, "step": 36635 }, { "epoch": 0.8072848667140425, "grad_norm": 0.7033089995384216, "learning_rate": 2.8278524523022285e-06, "loss": 0.0448, "step": 36636 }, { "epoch": 0.8073069020035587, "grad_norm": 0.7481626272201538, "learning_rate": 2.827226878526651e-06, "loss": 0.0702, "step": 36637 }, { "epoch": 0.8073289372930749, "grad_norm": 0.49132072925567627, "learning_rate": 2.826601366753536e-06, "loss": 0.0719, "step": 36638 }, { "epoch": 0.807350972582591, "grad_norm": 0.7827687859535217, "learning_rate": 2.825975916986062e-06, "loss": 0.0619, "step": 36639 }, { "epoch": 0.8073730078721072, "grad_norm": 0.5878933668136597, "learning_rate": 2.825350529227428e-06, "loss": 0.0541, "step": 36640 }, { "epoch": 0.8073950431616234, "grad_norm": 1.1987464427947998, "learning_rate": 2.8247252034808092e-06, "loss": 0.0696, "step": 36641 }, { "epoch": 0.8074170784511395, "grad_norm": 0.7055763006210327, "learning_rate": 2.824099939749398e-06, "loss": 0.0549, "step": 36642 }, { "epoch": 0.8074391137406557, "grad_norm": 0.7080283761024475, "learning_rate": 2.8234747380363697e-06, "loss": 0.0558, "step": 36643 }, { "epoch": 0.8074611490301719, "grad_norm": 0.7486594319343567, "learning_rate": 2.8228495983449152e-06, "loss": 0.0611, "step": 36644 }, { "epoch": 0.807483184319688, "grad_norm": 0.432148277759552, "learning_rate": 2.8222245206782217e-06, "loss": 0.042, "step": 36645 }, { "epoch": 0.8075052196092042, "grad_norm": 0.8676989078521729, "learning_rate": 2.8215995050394637e-06, "loss": 0.0635, "step": 36646 }, { "epoch": 0.8075272548987203, "grad_norm": 0.3731418251991272, "learning_rate": 2.8209745514318312e-06, "loss": 0.0463, "step": 36647 }, { "epoch": 0.8075492901882365, "grad_norm": 0.3231634497642517, "learning_rate": 2.8203496598585066e-06, "loss": 0.0558, "step": 36648 }, { "epoch": 0.8075713254777526, "grad_norm": 0.37826019525527954, "learning_rate": 2.8197248303226755e-06, "loss": 0.0533, "step": 36649 }, { "epoch": 0.8075933607672687, "grad_norm": 0.5120990872383118, "learning_rate": 2.8191000628275133e-06, "loss": 0.1012, "step": 36650 }, { "epoch": 0.8076153960567849, "grad_norm": 0.5512638688087463, "learning_rate": 2.8184753573762064e-06, "loss": 0.0555, "step": 36651 }, { "epoch": 0.8076374313463011, "grad_norm": 0.6514760851860046, "learning_rate": 2.817850713971942e-06, "loss": 0.0725, "step": 36652 }, { "epoch": 0.8076594666358172, "grad_norm": 0.6775553226470947, "learning_rate": 2.8172261326178905e-06, "loss": 0.0542, "step": 36653 }, { "epoch": 0.8076815019253334, "grad_norm": 0.6850423216819763, "learning_rate": 2.8166016133172453e-06, "loss": 0.0831, "step": 36654 }, { "epoch": 0.8077035372148496, "grad_norm": 0.6808932423591614, "learning_rate": 2.8159771560731713e-06, "loss": 0.066, "step": 36655 }, { "epoch": 0.8077255725043657, "grad_norm": 0.6472330689430237, "learning_rate": 2.815352760888868e-06, "loss": 0.0698, "step": 36656 }, { "epoch": 0.8077476077938819, "grad_norm": 0.653960108757019, "learning_rate": 2.8147284277675044e-06, "loss": 0.0695, "step": 36657 }, { "epoch": 0.807769643083398, "grad_norm": 0.7361530065536499, "learning_rate": 2.8141041567122617e-06, "loss": 0.0755, "step": 36658 }, { "epoch": 0.8077916783729142, "grad_norm": 0.2774200141429901, "learning_rate": 2.8134799477263253e-06, "loss": 0.0453, "step": 36659 }, { "epoch": 0.8078137136624304, "grad_norm": 0.6175176501274109, "learning_rate": 2.8128558008128674e-06, "loss": 0.0518, "step": 36660 }, { "epoch": 0.8078357489519465, "grad_norm": 0.7754948139190674, "learning_rate": 2.8122317159750725e-06, "loss": 0.0983, "step": 36661 }, { "epoch": 0.8078577842414627, "grad_norm": 0.5923319458961487, "learning_rate": 2.81160769321611e-06, "loss": 0.0578, "step": 36662 }, { "epoch": 0.8078798195309789, "grad_norm": 0.6059993505477905, "learning_rate": 2.8109837325391734e-06, "loss": 0.0725, "step": 36663 }, { "epoch": 0.807901854820495, "grad_norm": 0.35844990611076355, "learning_rate": 2.810359833947429e-06, "loss": 0.056, "step": 36664 }, { "epoch": 0.8079238901100112, "grad_norm": 0.8236426115036011, "learning_rate": 2.809735997444062e-06, "loss": 0.075, "step": 36665 }, { "epoch": 0.8079459253995274, "grad_norm": 0.9215085506439209, "learning_rate": 2.809112223032241e-06, "loss": 0.0912, "step": 36666 }, { "epoch": 0.8079679606890435, "grad_norm": 0.32885774970054626, "learning_rate": 2.8084885107151536e-06, "loss": 0.0535, "step": 36667 }, { "epoch": 0.8079899959785597, "grad_norm": 0.5467613339424133, "learning_rate": 2.8078648604959685e-06, "loss": 0.0661, "step": 36668 }, { "epoch": 0.8080120312680759, "grad_norm": 0.7670264840126038, "learning_rate": 2.8072412723778705e-06, "loss": 0.085, "step": 36669 }, { "epoch": 0.808034066557592, "grad_norm": 0.21679943799972534, "learning_rate": 2.8066177463640265e-06, "loss": 0.0536, "step": 36670 }, { "epoch": 0.8080561018471082, "grad_norm": 0.5128687024116516, "learning_rate": 2.8059942824576177e-06, "loss": 0.0434, "step": 36671 }, { "epoch": 0.8080781371366244, "grad_norm": 0.43424084782600403, "learning_rate": 2.8053708806618212e-06, "loss": 0.0452, "step": 36672 }, { "epoch": 0.8081001724261405, "grad_norm": 0.51671302318573, "learning_rate": 2.8047475409798084e-06, "loss": 0.0996, "step": 36673 }, { "epoch": 0.8081222077156566, "grad_norm": 0.511917769908905, "learning_rate": 2.804124263414754e-06, "loss": 0.0642, "step": 36674 }, { "epoch": 0.8081442430051727, "grad_norm": 0.6111350655555725, "learning_rate": 2.8035010479698354e-06, "loss": 0.0482, "step": 36675 }, { "epoch": 0.8081662782946889, "grad_norm": 0.6560736894607544, "learning_rate": 2.8028778946482296e-06, "loss": 0.07, "step": 36676 }, { "epoch": 0.8081883135842051, "grad_norm": 0.6868243217468262, "learning_rate": 2.802254803453105e-06, "loss": 0.0632, "step": 36677 }, { "epoch": 0.8082103488737212, "grad_norm": 0.5268693566322327, "learning_rate": 2.8016317743876364e-06, "loss": 0.0626, "step": 36678 }, { "epoch": 0.8082323841632374, "grad_norm": 0.5095938444137573, "learning_rate": 2.8010088074550018e-06, "loss": 0.0783, "step": 36679 }, { "epoch": 0.8082544194527536, "grad_norm": 0.5689674019813538, "learning_rate": 2.8003859026583684e-06, "loss": 0.0875, "step": 36680 }, { "epoch": 0.8082764547422697, "grad_norm": 0.4488569498062134, "learning_rate": 2.7997630600009107e-06, "loss": 0.0447, "step": 36681 }, { "epoch": 0.8082984900317859, "grad_norm": 0.30786964297294617, "learning_rate": 2.7991402794858023e-06, "loss": 0.0494, "step": 36682 }, { "epoch": 0.808320525321302, "grad_norm": 0.4539032280445099, "learning_rate": 2.7985175611162193e-06, "loss": 0.0515, "step": 36683 }, { "epoch": 0.8083425606108182, "grad_norm": 1.1271467208862305, "learning_rate": 2.7978949048953245e-06, "loss": 0.0947, "step": 36684 }, { "epoch": 0.8083645959003344, "grad_norm": 0.4368842542171478, "learning_rate": 2.797272310826295e-06, "loss": 0.071, "step": 36685 }, { "epoch": 0.8083866311898505, "grad_norm": 0.5137709975242615, "learning_rate": 2.796649778912305e-06, "loss": 0.0567, "step": 36686 }, { "epoch": 0.8084086664793667, "grad_norm": 0.7354260087013245, "learning_rate": 2.796027309156518e-06, "loss": 0.0718, "step": 36687 }, { "epoch": 0.8084307017688829, "grad_norm": 0.6781359314918518, "learning_rate": 2.7954049015621125e-06, "loss": 0.0566, "step": 36688 }, { "epoch": 0.808452737058399, "grad_norm": 0.5135543942451477, "learning_rate": 2.7947825561322453e-06, "loss": 0.0647, "step": 36689 }, { "epoch": 0.8084747723479152, "grad_norm": 0.577811598777771, "learning_rate": 2.794160272870104e-06, "loss": 0.065, "step": 36690 }, { "epoch": 0.8084968076374314, "grad_norm": 0.4700991213321686, "learning_rate": 2.793538051778845e-06, "loss": 0.0655, "step": 36691 }, { "epoch": 0.8085188429269475, "grad_norm": 0.5867341756820679, "learning_rate": 2.792915892861648e-06, "loss": 0.0731, "step": 36692 }, { "epoch": 0.8085408782164637, "grad_norm": 0.8219080567359924, "learning_rate": 2.7922937961216715e-06, "loss": 0.0847, "step": 36693 }, { "epoch": 0.8085629135059799, "grad_norm": 0.7431305646896362, "learning_rate": 2.791671761562089e-06, "loss": 0.0658, "step": 36694 }, { "epoch": 0.808584948795496, "grad_norm": 0.3152657151222229, "learning_rate": 2.7910497891860712e-06, "loss": 0.0595, "step": 36695 }, { "epoch": 0.8086069840850122, "grad_norm": 0.5406413674354553, "learning_rate": 2.790427878996783e-06, "loss": 0.0407, "step": 36696 }, { "epoch": 0.8086290193745284, "grad_norm": 0.7110746502876282, "learning_rate": 2.789806030997391e-06, "loss": 0.0495, "step": 36697 }, { "epoch": 0.8086510546640445, "grad_norm": 0.446987122297287, "learning_rate": 2.789184245191065e-06, "loss": 0.0556, "step": 36698 }, { "epoch": 0.8086730899535606, "grad_norm": 0.30045345425605774, "learning_rate": 2.788562521580975e-06, "loss": 0.0461, "step": 36699 }, { "epoch": 0.8086951252430767, "grad_norm": 0.437786340713501, "learning_rate": 2.787940860170281e-06, "loss": 0.0415, "step": 36700 }, { "epoch": 0.8087171605325929, "grad_norm": 0.4982253909111023, "learning_rate": 2.787319260962152e-06, "loss": 0.0449, "step": 36701 }, { "epoch": 0.8087391958221091, "grad_norm": 0.37179771065711975, "learning_rate": 2.7866977239597597e-06, "loss": 0.05, "step": 36702 }, { "epoch": 0.8087612311116252, "grad_norm": 0.5281942486763, "learning_rate": 2.7860762491662607e-06, "loss": 0.0471, "step": 36703 }, { "epoch": 0.8087832664011414, "grad_norm": 0.5553551912307739, "learning_rate": 2.7854548365848253e-06, "loss": 0.0369, "step": 36704 }, { "epoch": 0.8088053016906576, "grad_norm": 0.3464672565460205, "learning_rate": 2.784833486218618e-06, "loss": 0.0321, "step": 36705 }, { "epoch": 0.8088273369801737, "grad_norm": 0.6637881398200989, "learning_rate": 2.784212198070808e-06, "loss": 0.043, "step": 36706 }, { "epoch": 0.8088493722696899, "grad_norm": 0.6442928910255432, "learning_rate": 2.7835909721445523e-06, "loss": 0.0641, "step": 36707 }, { "epoch": 0.8088714075592061, "grad_norm": 0.46870702505111694, "learning_rate": 2.782969808443019e-06, "loss": 0.0533, "step": 36708 }, { "epoch": 0.8088934428487222, "grad_norm": 0.5626901984214783, "learning_rate": 2.782348706969375e-06, "loss": 0.0512, "step": 36709 }, { "epoch": 0.8089154781382384, "grad_norm": 0.3255003094673157, "learning_rate": 2.7817276677267776e-06, "loss": 0.0522, "step": 36710 }, { "epoch": 0.8089375134277546, "grad_norm": 0.38996225595474243, "learning_rate": 2.781106690718397e-06, "loss": 0.0653, "step": 36711 }, { "epoch": 0.8089595487172707, "grad_norm": 0.7095726728439331, "learning_rate": 2.7804857759473844e-06, "loss": 0.077, "step": 36712 }, { "epoch": 0.8089815840067869, "grad_norm": 0.37981438636779785, "learning_rate": 2.7798649234169166e-06, "loss": 0.0551, "step": 36713 }, { "epoch": 0.809003619296303, "grad_norm": 0.42719799280166626, "learning_rate": 2.779244133130147e-06, "loss": 0.0676, "step": 36714 }, { "epoch": 0.8090256545858192, "grad_norm": 0.606098473072052, "learning_rate": 2.7786234050902455e-06, "loss": 0.0692, "step": 36715 }, { "epoch": 0.8090476898753354, "grad_norm": 0.8658291697502136, "learning_rate": 2.7780027393003636e-06, "loss": 0.0911, "step": 36716 }, { "epoch": 0.8090697251648515, "grad_norm": 0.42022785544395447, "learning_rate": 2.7773821357636662e-06, "loss": 0.0374, "step": 36717 }, { "epoch": 0.8090917604543677, "grad_norm": 0.5169602632522583, "learning_rate": 2.7767615944833226e-06, "loss": 0.0695, "step": 36718 }, { "epoch": 0.8091137957438839, "grad_norm": 1.116041660308838, "learning_rate": 2.776141115462481e-06, "loss": 0.0583, "step": 36719 }, { "epoch": 0.8091358310334, "grad_norm": 0.6032993793487549, "learning_rate": 2.775520698704308e-06, "loss": 0.0751, "step": 36720 }, { "epoch": 0.8091578663229162, "grad_norm": 0.3252577781677246, "learning_rate": 2.774900344211964e-06, "loss": 0.0269, "step": 36721 }, { "epoch": 0.8091799016124324, "grad_norm": 0.5252171158790588, "learning_rate": 2.774280051988612e-06, "loss": 0.0675, "step": 36722 }, { "epoch": 0.8092019369019484, "grad_norm": 0.5161476731300354, "learning_rate": 2.773659822037402e-06, "loss": 0.0642, "step": 36723 }, { "epoch": 0.8092239721914646, "grad_norm": 0.8996185660362244, "learning_rate": 2.7730396543615006e-06, "loss": 0.0637, "step": 36724 }, { "epoch": 0.8092460074809807, "grad_norm": 1.1371870040893555, "learning_rate": 2.772419548964068e-06, "loss": 0.0856, "step": 36725 }, { "epoch": 0.8092680427704969, "grad_norm": 0.6917959451675415, "learning_rate": 2.771799505848257e-06, "loss": 0.0727, "step": 36726 }, { "epoch": 0.8092900780600131, "grad_norm": 0.9490276575088501, "learning_rate": 2.7711795250172265e-06, "loss": 0.0698, "step": 36727 }, { "epoch": 0.8093121133495292, "grad_norm": 0.6329763531684875, "learning_rate": 2.7705596064741377e-06, "loss": 0.0492, "step": 36728 }, { "epoch": 0.8093341486390454, "grad_norm": 0.4876573979854584, "learning_rate": 2.7699397502221496e-06, "loss": 0.0564, "step": 36729 }, { "epoch": 0.8093561839285616, "grad_norm": 0.6716111898422241, "learning_rate": 2.7693199562644126e-06, "loss": 0.0639, "step": 36730 }, { "epoch": 0.8093782192180777, "grad_norm": 0.44579896330833435, "learning_rate": 2.7687002246040897e-06, "loss": 0.0403, "step": 36731 }, { "epoch": 0.8094002545075939, "grad_norm": 0.4764452576637268, "learning_rate": 2.768080555244338e-06, "loss": 0.0581, "step": 36732 }, { "epoch": 0.8094222897971101, "grad_norm": 0.7694047093391418, "learning_rate": 2.7674609481883073e-06, "loss": 0.0474, "step": 36733 }, { "epoch": 0.8094443250866262, "grad_norm": 0.5279239416122437, "learning_rate": 2.766841403439161e-06, "loss": 0.0644, "step": 36734 }, { "epoch": 0.8094663603761424, "grad_norm": 0.13999252021312714, "learning_rate": 2.766221921000045e-06, "loss": 0.0344, "step": 36735 }, { "epoch": 0.8094883956656586, "grad_norm": 0.33128392696380615, "learning_rate": 2.765602500874127e-06, "loss": 0.0459, "step": 36736 }, { "epoch": 0.8095104309551747, "grad_norm": 0.25060442090034485, "learning_rate": 2.7649831430645547e-06, "loss": 0.0546, "step": 36737 }, { "epoch": 0.8095324662446909, "grad_norm": 0.4175037145614624, "learning_rate": 2.7643638475744865e-06, "loss": 0.0449, "step": 36738 }, { "epoch": 0.809554501534207, "grad_norm": 0.6983309984207153, "learning_rate": 2.7637446144070706e-06, "loss": 0.0513, "step": 36739 }, { "epoch": 0.8095765368237232, "grad_norm": 1.467745304107666, "learning_rate": 2.763125443565464e-06, "loss": 0.0495, "step": 36740 }, { "epoch": 0.8095985721132394, "grad_norm": 0.5774333477020264, "learning_rate": 2.7625063350528278e-06, "loss": 0.0566, "step": 36741 }, { "epoch": 0.8096206074027555, "grad_norm": 0.3802669048309326, "learning_rate": 2.7618872888723027e-06, "loss": 0.0492, "step": 36742 }, { "epoch": 0.8096426426922717, "grad_norm": 0.6668407917022705, "learning_rate": 2.76126830502705e-06, "loss": 0.0741, "step": 36743 }, { "epoch": 0.8096646779817879, "grad_norm": 0.28558647632598877, "learning_rate": 2.7606493835202197e-06, "loss": 0.0556, "step": 36744 }, { "epoch": 0.809686713271304, "grad_norm": 0.41669172048568726, "learning_rate": 2.7600305243549695e-06, "loss": 0.0278, "step": 36745 }, { "epoch": 0.8097087485608202, "grad_norm": 0.5351802706718445, "learning_rate": 2.759411727534444e-06, "loss": 0.0605, "step": 36746 }, { "epoch": 0.8097307838503364, "grad_norm": 0.8540655970573425, "learning_rate": 2.7587929930617983e-06, "loss": 0.0643, "step": 36747 }, { "epoch": 0.8097528191398524, "grad_norm": 0.865332305431366, "learning_rate": 2.7581743209401872e-06, "loss": 0.0452, "step": 36748 }, { "epoch": 0.8097748544293686, "grad_norm": 0.7362353801727295, "learning_rate": 2.7575557111727562e-06, "loss": 0.0847, "step": 36749 }, { "epoch": 0.8097968897188847, "grad_norm": 0.49188336730003357, "learning_rate": 2.7569371637626596e-06, "loss": 0.0515, "step": 36750 }, { "epoch": 0.8098189250084009, "grad_norm": 0.39140456914901733, "learning_rate": 2.7563186787130446e-06, "loss": 0.0579, "step": 36751 }, { "epoch": 0.8098409602979171, "grad_norm": 0.529374361038208, "learning_rate": 2.7557002560270706e-06, "loss": 0.0634, "step": 36752 }, { "epoch": 0.8098629955874332, "grad_norm": 0.42231041193008423, "learning_rate": 2.755081895707875e-06, "loss": 0.0768, "step": 36753 }, { "epoch": 0.8098850308769494, "grad_norm": 0.6316422820091248, "learning_rate": 2.754463597758615e-06, "loss": 0.0617, "step": 36754 }, { "epoch": 0.8099070661664656, "grad_norm": 0.4584239721298218, "learning_rate": 2.7538453621824422e-06, "loss": 0.0567, "step": 36755 }, { "epoch": 0.8099291014559817, "grad_norm": 0.5761702060699463, "learning_rate": 2.753227188982498e-06, "loss": 0.068, "step": 36756 }, { "epoch": 0.8099511367454979, "grad_norm": 0.5777201652526855, "learning_rate": 2.7526090781619377e-06, "loss": 0.0778, "step": 36757 }, { "epoch": 0.8099731720350141, "grad_norm": 0.8464981317520142, "learning_rate": 2.7519910297239015e-06, "loss": 0.0921, "step": 36758 }, { "epoch": 0.8099952073245302, "grad_norm": 0.5665041208267212, "learning_rate": 2.7513730436715482e-06, "loss": 0.0748, "step": 36759 }, { "epoch": 0.8100172426140464, "grad_norm": 0.8986921906471252, "learning_rate": 2.750755120008018e-06, "loss": 0.0462, "step": 36760 }, { "epoch": 0.8100392779035626, "grad_norm": 1.0746756792068481, "learning_rate": 2.750137258736464e-06, "loss": 0.0719, "step": 36761 }, { "epoch": 0.8100613131930787, "grad_norm": 0.4805765151977539, "learning_rate": 2.7495194598600252e-06, "loss": 0.0317, "step": 36762 }, { "epoch": 0.8100833484825949, "grad_norm": 0.38507381081581116, "learning_rate": 2.7489017233818535e-06, "loss": 0.0435, "step": 36763 }, { "epoch": 0.810105383772111, "grad_norm": 0.6630502343177795, "learning_rate": 2.7482840493050992e-06, "loss": 0.0587, "step": 36764 }, { "epoch": 0.8101274190616272, "grad_norm": 0.48124024271965027, "learning_rate": 2.7476664376329014e-06, "loss": 0.0395, "step": 36765 }, { "epoch": 0.8101494543511434, "grad_norm": 0.5600640773773193, "learning_rate": 2.747048888368406e-06, "loss": 0.0694, "step": 36766 }, { "epoch": 0.8101714896406595, "grad_norm": 0.897005558013916, "learning_rate": 2.7464314015147635e-06, "loss": 0.0751, "step": 36767 }, { "epoch": 0.8101935249301757, "grad_norm": 0.43152323365211487, "learning_rate": 2.7458139770751185e-06, "loss": 0.0637, "step": 36768 }, { "epoch": 0.8102155602196919, "grad_norm": 0.7018871903419495, "learning_rate": 2.7451966150526114e-06, "loss": 0.0728, "step": 36769 }, { "epoch": 0.810237595509208, "grad_norm": 0.8424342274665833, "learning_rate": 2.7445793154503903e-06, "loss": 0.0599, "step": 36770 }, { "epoch": 0.8102596307987242, "grad_norm": 0.7312642335891724, "learning_rate": 2.7439620782716e-06, "loss": 0.0479, "step": 36771 }, { "epoch": 0.8102816660882404, "grad_norm": 0.618867039680481, "learning_rate": 2.7433449035193807e-06, "loss": 0.0533, "step": 36772 }, { "epoch": 0.8103037013777564, "grad_norm": 0.47170862555503845, "learning_rate": 2.7427277911968794e-06, "loss": 0.0662, "step": 36773 }, { "epoch": 0.8103257366672726, "grad_norm": 0.7910267114639282, "learning_rate": 2.7421107413072373e-06, "loss": 0.069, "step": 36774 }, { "epoch": 0.8103477719567888, "grad_norm": 0.696130633354187, "learning_rate": 2.741493753853603e-06, "loss": 0.0604, "step": 36775 }, { "epoch": 0.8103698072463049, "grad_norm": 0.6686900854110718, "learning_rate": 2.740876828839109e-06, "loss": 0.0534, "step": 36776 }, { "epoch": 0.8103918425358211, "grad_norm": 0.7705389857292175, "learning_rate": 2.740259966266905e-06, "loss": 0.0623, "step": 36777 }, { "epoch": 0.8104138778253372, "grad_norm": 1.09299898147583, "learning_rate": 2.7396431661401294e-06, "loss": 0.0812, "step": 36778 }, { "epoch": 0.8104359131148534, "grad_norm": 0.42680755257606506, "learning_rate": 2.7390264284619304e-06, "loss": 0.035, "step": 36779 }, { "epoch": 0.8104579484043696, "grad_norm": 0.869162380695343, "learning_rate": 2.7384097532354417e-06, "loss": 0.0932, "step": 36780 }, { "epoch": 0.8104799836938857, "grad_norm": 0.6843257546424866, "learning_rate": 2.7377931404638064e-06, "loss": 0.0761, "step": 36781 }, { "epoch": 0.8105020189834019, "grad_norm": 0.5503395199775696, "learning_rate": 2.7371765901501694e-06, "loss": 0.0532, "step": 36782 }, { "epoch": 0.8105240542729181, "grad_norm": 0.686154842376709, "learning_rate": 2.7365601022976657e-06, "loss": 0.0689, "step": 36783 }, { "epoch": 0.8105460895624342, "grad_norm": 0.5099059343338013, "learning_rate": 2.7359436769094385e-06, "loss": 0.0438, "step": 36784 }, { "epoch": 0.8105681248519504, "grad_norm": 0.4843721091747284, "learning_rate": 2.7353273139886215e-06, "loss": 0.0348, "step": 36785 }, { "epoch": 0.8105901601414666, "grad_norm": 0.7568168044090271, "learning_rate": 2.7347110135383664e-06, "loss": 0.0768, "step": 36786 }, { "epoch": 0.8106121954309827, "grad_norm": 0.69432133436203, "learning_rate": 2.7340947755618022e-06, "loss": 0.073, "step": 36787 }, { "epoch": 0.8106342307204989, "grad_norm": 0.5037775635719299, "learning_rate": 2.7334786000620736e-06, "loss": 0.0363, "step": 36788 }, { "epoch": 0.8106562660100151, "grad_norm": 0.5514956712722778, "learning_rate": 2.732862487042313e-06, "loss": 0.0643, "step": 36789 }, { "epoch": 0.8106783012995312, "grad_norm": 0.7866395711898804, "learning_rate": 2.732246436505661e-06, "loss": 0.0754, "step": 36790 }, { "epoch": 0.8107003365890474, "grad_norm": 0.8237378001213074, "learning_rate": 2.7316304484552617e-06, "loss": 0.0685, "step": 36791 }, { "epoch": 0.8107223718785636, "grad_norm": 0.48405760526657104, "learning_rate": 2.731014522894238e-06, "loss": 0.048, "step": 36792 }, { "epoch": 0.8107444071680797, "grad_norm": 0.6272127032279968, "learning_rate": 2.7303986598257454e-06, "loss": 0.0527, "step": 36793 }, { "epoch": 0.8107664424575959, "grad_norm": 0.30913180112838745, "learning_rate": 2.729782859252908e-06, "loss": 0.0337, "step": 36794 }, { "epoch": 0.810788477747112, "grad_norm": 1.0431408882141113, "learning_rate": 2.7291671211788698e-06, "loss": 0.0731, "step": 36795 }, { "epoch": 0.8108105130366282, "grad_norm": 0.586264431476593, "learning_rate": 2.728551445606759e-06, "loss": 0.0673, "step": 36796 }, { "epoch": 0.8108325483261444, "grad_norm": 0.4872722327709198, "learning_rate": 2.7279358325397172e-06, "loss": 0.0511, "step": 36797 }, { "epoch": 0.8108545836156604, "grad_norm": 0.6379541158676147, "learning_rate": 2.727320281980883e-06, "loss": 0.0527, "step": 36798 }, { "epoch": 0.8108766189051766, "grad_norm": 0.8381835222244263, "learning_rate": 2.7267047939333846e-06, "loss": 0.0653, "step": 36799 }, { "epoch": 0.8108986541946928, "grad_norm": 0.2721906304359436, "learning_rate": 2.7260893684003578e-06, "loss": 0.0488, "step": 36800 }, { "epoch": 0.8109206894842089, "grad_norm": 0.42316529154777527, "learning_rate": 2.725474005384941e-06, "loss": 0.0391, "step": 36801 }, { "epoch": 0.8109427247737251, "grad_norm": 0.7725616693496704, "learning_rate": 2.7248587048902707e-06, "loss": 0.0667, "step": 36802 }, { "epoch": 0.8109647600632413, "grad_norm": 0.6619048714637756, "learning_rate": 2.724243466919475e-06, "loss": 0.0633, "step": 36803 }, { "epoch": 0.8109867953527574, "grad_norm": 0.3134659230709076, "learning_rate": 2.7236282914756877e-06, "loss": 0.0453, "step": 36804 }, { "epoch": 0.8110088306422736, "grad_norm": 0.636580228805542, "learning_rate": 2.7230131785620497e-06, "loss": 0.0464, "step": 36805 }, { "epoch": 0.8110308659317897, "grad_norm": 0.541220486164093, "learning_rate": 2.7223981281816856e-06, "loss": 0.0416, "step": 36806 }, { "epoch": 0.8110529012213059, "grad_norm": 0.8097094297409058, "learning_rate": 2.721783140337736e-06, "loss": 0.0542, "step": 36807 }, { "epoch": 0.8110749365108221, "grad_norm": 0.5575887560844421, "learning_rate": 2.7211682150333196e-06, "loss": 0.0605, "step": 36808 }, { "epoch": 0.8110969718003382, "grad_norm": 0.32309043407440186, "learning_rate": 2.720553352271586e-06, "loss": 0.0671, "step": 36809 }, { "epoch": 0.8111190070898544, "grad_norm": 0.8271071910858154, "learning_rate": 2.7199385520556557e-06, "loss": 0.0695, "step": 36810 }, { "epoch": 0.8111410423793706, "grad_norm": 0.550369381904602, "learning_rate": 2.7193238143886667e-06, "loss": 0.0594, "step": 36811 }, { "epoch": 0.8111630776688867, "grad_norm": 0.33612996339797974, "learning_rate": 2.7187091392737447e-06, "loss": 0.062, "step": 36812 }, { "epoch": 0.8111851129584029, "grad_norm": 0.5772143006324768, "learning_rate": 2.718094526714022e-06, "loss": 0.0711, "step": 36813 }, { "epoch": 0.8112071482479191, "grad_norm": 0.5564448237419128, "learning_rate": 2.7174799767126326e-06, "loss": 0.0656, "step": 36814 }, { "epoch": 0.8112291835374352, "grad_norm": 0.5055555105209351, "learning_rate": 2.7168654892726974e-06, "loss": 0.0848, "step": 36815 }, { "epoch": 0.8112512188269514, "grad_norm": 0.8427682518959045, "learning_rate": 2.7162510643973593e-06, "loss": 0.0775, "step": 36816 }, { "epoch": 0.8112732541164676, "grad_norm": 1.2875927686691284, "learning_rate": 2.7156367020897404e-06, "loss": 0.0689, "step": 36817 }, { "epoch": 0.8112952894059837, "grad_norm": 0.5485445261001587, "learning_rate": 2.7150224023529735e-06, "loss": 0.0418, "step": 36818 }, { "epoch": 0.8113173246954999, "grad_norm": 0.7652220726013184, "learning_rate": 2.714408165190181e-06, "loss": 0.0762, "step": 36819 }, { "epoch": 0.811339359985016, "grad_norm": 0.7744255065917969, "learning_rate": 2.713793990604495e-06, "loss": 0.0495, "step": 36820 }, { "epoch": 0.8113613952745322, "grad_norm": 0.9424261450767517, "learning_rate": 2.7131798785990503e-06, "loss": 0.0631, "step": 36821 }, { "epoch": 0.8113834305640483, "grad_norm": 0.5596200227737427, "learning_rate": 2.7125658291769656e-06, "loss": 0.0559, "step": 36822 }, { "epoch": 0.8114054658535644, "grad_norm": 0.7246558666229248, "learning_rate": 2.7119518423413704e-06, "loss": 0.0414, "step": 36823 }, { "epoch": 0.8114275011430806, "grad_norm": 0.4400230646133423, "learning_rate": 2.7113379180953953e-06, "loss": 0.039, "step": 36824 }, { "epoch": 0.8114495364325968, "grad_norm": 0.5496651530265808, "learning_rate": 2.710724056442169e-06, "loss": 0.0742, "step": 36825 }, { "epoch": 0.8114715717221129, "grad_norm": 0.6152923107147217, "learning_rate": 2.7101102573848104e-06, "loss": 0.0633, "step": 36826 }, { "epoch": 0.8114936070116291, "grad_norm": 0.5522056221961975, "learning_rate": 2.709496520926452e-06, "loss": 0.0704, "step": 36827 }, { "epoch": 0.8115156423011453, "grad_norm": 0.5335836410522461, "learning_rate": 2.7088828470702226e-06, "loss": 0.0435, "step": 36828 }, { "epoch": 0.8115376775906614, "grad_norm": 0.3967178165912628, "learning_rate": 2.708269235819239e-06, "loss": 0.0566, "step": 36829 }, { "epoch": 0.8115597128801776, "grad_norm": 0.398807555437088, "learning_rate": 2.7076556871766354e-06, "loss": 0.0338, "step": 36830 }, { "epoch": 0.8115817481696938, "grad_norm": 0.5852336883544922, "learning_rate": 2.7070422011455264e-06, "loss": 0.0617, "step": 36831 }, { "epoch": 0.8116037834592099, "grad_norm": 0.6599037647247314, "learning_rate": 2.7064287777290507e-06, "loss": 0.0492, "step": 36832 }, { "epoch": 0.8116258187487261, "grad_norm": 0.37995830178260803, "learning_rate": 2.7058154169303213e-06, "loss": 0.0792, "step": 36833 }, { "epoch": 0.8116478540382422, "grad_norm": 0.7757543921470642, "learning_rate": 2.70520211875247e-06, "loss": 0.0804, "step": 36834 }, { "epoch": 0.8116698893277584, "grad_norm": 0.7665700912475586, "learning_rate": 2.7045888831986147e-06, "loss": 0.0722, "step": 36835 }, { "epoch": 0.8116919246172746, "grad_norm": 0.526285707950592, "learning_rate": 2.7039757102718815e-06, "loss": 0.0407, "step": 36836 }, { "epoch": 0.8117139599067907, "grad_norm": 0.590908944606781, "learning_rate": 2.703362599975396e-06, "loss": 0.0662, "step": 36837 }, { "epoch": 0.8117359951963069, "grad_norm": 0.8975192904472351, "learning_rate": 2.702749552312272e-06, "loss": 0.0828, "step": 36838 }, { "epoch": 0.8117580304858231, "grad_norm": 0.447520911693573, "learning_rate": 2.7021365672856476e-06, "loss": 0.0706, "step": 36839 }, { "epoch": 0.8117800657753392, "grad_norm": 0.24705642461776733, "learning_rate": 2.7015236448986313e-06, "loss": 0.0432, "step": 36840 }, { "epoch": 0.8118021010648554, "grad_norm": 0.49577292799949646, "learning_rate": 2.700910785154353e-06, "loss": 0.0586, "step": 36841 }, { "epoch": 0.8118241363543716, "grad_norm": 0.3414435386657715, "learning_rate": 2.7002979880559297e-06, "loss": 0.0612, "step": 36842 }, { "epoch": 0.8118461716438877, "grad_norm": 0.5000384449958801, "learning_rate": 2.6996852536064827e-06, "loss": 0.0645, "step": 36843 }, { "epoch": 0.8118682069334039, "grad_norm": 0.42404797673225403, "learning_rate": 2.699072581809139e-06, "loss": 0.0716, "step": 36844 }, { "epoch": 0.81189024222292, "grad_norm": 0.38348326086997986, "learning_rate": 2.698459972667011e-06, "loss": 0.0417, "step": 36845 }, { "epoch": 0.8119122775124362, "grad_norm": 0.5015933513641357, "learning_rate": 2.6978474261832235e-06, "loss": 0.0708, "step": 36846 }, { "epoch": 0.8119343128019523, "grad_norm": 0.34429416060447693, "learning_rate": 2.6972349423608954e-06, "loss": 0.0484, "step": 36847 }, { "epoch": 0.8119563480914684, "grad_norm": 0.7466681599617004, "learning_rate": 2.6966225212031507e-06, "loss": 0.0635, "step": 36848 }, { "epoch": 0.8119783833809846, "grad_norm": 0.5054935216903687, "learning_rate": 2.696010162713102e-06, "loss": 0.0658, "step": 36849 }, { "epoch": 0.8120004186705008, "grad_norm": 0.6183574795722961, "learning_rate": 2.6953978668938706e-06, "loss": 0.0529, "step": 36850 }, { "epoch": 0.8120224539600169, "grad_norm": 0.6950796842575073, "learning_rate": 2.6947856337485813e-06, "loss": 0.0699, "step": 36851 }, { "epoch": 0.8120444892495331, "grad_norm": 0.6846379041671753, "learning_rate": 2.694173463280343e-06, "loss": 0.0571, "step": 36852 }, { "epoch": 0.8120665245390493, "grad_norm": 0.44043195247650146, "learning_rate": 2.69356135549228e-06, "loss": 0.0497, "step": 36853 }, { "epoch": 0.8120885598285654, "grad_norm": 0.5017572045326233, "learning_rate": 2.6929493103875023e-06, "loss": 0.0474, "step": 36854 }, { "epoch": 0.8121105951180816, "grad_norm": 0.5626618266105652, "learning_rate": 2.692337327969141e-06, "loss": 0.063, "step": 36855 }, { "epoch": 0.8121326304075978, "grad_norm": 0.5722225904464722, "learning_rate": 2.6917254082403014e-06, "loss": 0.0684, "step": 36856 }, { "epoch": 0.8121546656971139, "grad_norm": 0.8876248002052307, "learning_rate": 2.6911135512041096e-06, "loss": 0.0656, "step": 36857 }, { "epoch": 0.8121767009866301, "grad_norm": 0.3210437595844269, "learning_rate": 2.6905017568636716e-06, "loss": 0.0408, "step": 36858 }, { "epoch": 0.8121987362761462, "grad_norm": 0.38187170028686523, "learning_rate": 2.6898900252221094e-06, "loss": 0.0501, "step": 36859 }, { "epoch": 0.8122207715656624, "grad_norm": 0.5305883884429932, "learning_rate": 2.6892783562825423e-06, "loss": 0.0554, "step": 36860 }, { "epoch": 0.8122428068551786, "grad_norm": 0.4361708462238312, "learning_rate": 2.688666750048075e-06, "loss": 0.0483, "step": 36861 }, { "epoch": 0.8122648421446947, "grad_norm": 0.8667052984237671, "learning_rate": 2.6880552065218376e-06, "loss": 0.0665, "step": 36862 }, { "epoch": 0.8122868774342109, "grad_norm": 0.8076228499412537, "learning_rate": 2.6874437257069333e-06, "loss": 0.0681, "step": 36863 }, { "epoch": 0.8123089127237271, "grad_norm": 0.6813082098960876, "learning_rate": 2.6868323076064843e-06, "loss": 0.0676, "step": 36864 }, { "epoch": 0.8123309480132432, "grad_norm": 0.9914581179618835, "learning_rate": 2.6862209522235964e-06, "loss": 0.0779, "step": 36865 }, { "epoch": 0.8123529833027594, "grad_norm": 0.640285074710846, "learning_rate": 2.6856096595613886e-06, "loss": 0.0663, "step": 36866 }, { "epoch": 0.8123750185922756, "grad_norm": 0.8040210008621216, "learning_rate": 2.684998429622979e-06, "loss": 0.0788, "step": 36867 }, { "epoch": 0.8123970538817917, "grad_norm": 0.7096569538116455, "learning_rate": 2.6843872624114728e-06, "loss": 0.0935, "step": 36868 }, { "epoch": 0.8124190891713079, "grad_norm": 0.4881081283092499, "learning_rate": 2.683776157929985e-06, "loss": 0.0524, "step": 36869 }, { "epoch": 0.8124411244608241, "grad_norm": 0.7706659436225891, "learning_rate": 2.68316511618163e-06, "loss": 0.0582, "step": 36870 }, { "epoch": 0.8124631597503402, "grad_norm": 0.7441077828407288, "learning_rate": 2.682554137169525e-06, "loss": 0.0797, "step": 36871 }, { "epoch": 0.8124851950398563, "grad_norm": 0.6635175347328186, "learning_rate": 2.681943220896772e-06, "loss": 0.0472, "step": 36872 }, { "epoch": 0.8125072303293724, "grad_norm": 0.18687903881072998, "learning_rate": 2.681332367366487e-06, "loss": 0.0627, "step": 36873 }, { "epoch": 0.8125292656188886, "grad_norm": 0.5178231000900269, "learning_rate": 2.680721576581787e-06, "loss": 0.0705, "step": 36874 }, { "epoch": 0.8125513009084048, "grad_norm": 0.38081151247024536, "learning_rate": 2.6801108485457738e-06, "loss": 0.0356, "step": 36875 }, { "epoch": 0.8125733361979209, "grad_norm": 0.549237072467804, "learning_rate": 2.6795001832615674e-06, "loss": 0.0623, "step": 36876 }, { "epoch": 0.8125953714874371, "grad_norm": 0.5545485615730286, "learning_rate": 2.678889580732264e-06, "loss": 0.0699, "step": 36877 }, { "epoch": 0.8126174067769533, "grad_norm": 0.5111642479896545, "learning_rate": 2.6782790409609924e-06, "loss": 0.0568, "step": 36878 }, { "epoch": 0.8126394420664694, "grad_norm": 0.4187864065170288, "learning_rate": 2.6776685639508476e-06, "loss": 0.0481, "step": 36879 }, { "epoch": 0.8126614773559856, "grad_norm": 0.7238584160804749, "learning_rate": 2.6770581497049494e-06, "loss": 0.0526, "step": 36880 }, { "epoch": 0.8126835126455018, "grad_norm": 0.49205949902534485, "learning_rate": 2.676447798226395e-06, "loss": 0.0547, "step": 36881 }, { "epoch": 0.8127055479350179, "grad_norm": 0.7818836569786072, "learning_rate": 2.675837509518307e-06, "loss": 0.0527, "step": 36882 }, { "epoch": 0.8127275832245341, "grad_norm": 0.6884753704071045, "learning_rate": 2.675227283583787e-06, "loss": 0.07, "step": 36883 }, { "epoch": 0.8127496185140503, "grad_norm": 0.7793163657188416, "learning_rate": 2.674617120425936e-06, "loss": 0.0692, "step": 36884 }, { "epoch": 0.8127716538035664, "grad_norm": 0.7769158482551575, "learning_rate": 2.6740070200478783e-06, "loss": 0.0699, "step": 36885 }, { "epoch": 0.8127936890930826, "grad_norm": 0.6172627806663513, "learning_rate": 2.6733969824527077e-06, "loss": 0.0743, "step": 36886 }, { "epoch": 0.8128157243825987, "grad_norm": 0.8955333828926086, "learning_rate": 2.67278700764354e-06, "loss": 0.0858, "step": 36887 }, { "epoch": 0.8128377596721149, "grad_norm": 0.6545083522796631, "learning_rate": 2.6721770956234713e-06, "loss": 0.0809, "step": 36888 }, { "epoch": 0.8128597949616311, "grad_norm": 0.44427424669265747, "learning_rate": 2.6715672463956224e-06, "loss": 0.0609, "step": 36889 }, { "epoch": 0.8128818302511472, "grad_norm": 0.519704282283783, "learning_rate": 2.6709574599630895e-06, "loss": 0.0409, "step": 36890 }, { "epoch": 0.8129038655406634, "grad_norm": 0.6471384763717651, "learning_rate": 2.670347736328986e-06, "loss": 0.0584, "step": 36891 }, { "epoch": 0.8129259008301796, "grad_norm": 0.4363391399383545, "learning_rate": 2.669738075496408e-06, "loss": 0.0822, "step": 36892 }, { "epoch": 0.8129479361196957, "grad_norm": 0.6170480847358704, "learning_rate": 2.6691284774684667e-06, "loss": 0.0554, "step": 36893 }, { "epoch": 0.8129699714092119, "grad_norm": 0.6333233714103699, "learning_rate": 2.668518942248269e-06, "loss": 0.0669, "step": 36894 }, { "epoch": 0.8129920066987281, "grad_norm": 0.8065789341926575, "learning_rate": 2.6679094698389152e-06, "loss": 0.0682, "step": 36895 }, { "epoch": 0.8130140419882441, "grad_norm": 0.7719139456748962, "learning_rate": 2.6673000602435098e-06, "loss": 0.0482, "step": 36896 }, { "epoch": 0.8130360772777603, "grad_norm": 0.29018086194992065, "learning_rate": 2.66669071346516e-06, "loss": 0.0619, "step": 36897 }, { "epoch": 0.8130581125672764, "grad_norm": 0.5794394016265869, "learning_rate": 2.66608142950697e-06, "loss": 0.0718, "step": 36898 }, { "epoch": 0.8130801478567926, "grad_norm": 0.946366548538208, "learning_rate": 2.665472208372039e-06, "loss": 0.0894, "step": 36899 }, { "epoch": 0.8131021831463088, "grad_norm": 0.7702910900115967, "learning_rate": 2.6648630500634717e-06, "loss": 0.0786, "step": 36900 }, { "epoch": 0.8131242184358249, "grad_norm": 0.7108132839202881, "learning_rate": 2.664253954584375e-06, "loss": 0.049, "step": 36901 }, { "epoch": 0.8131462537253411, "grad_norm": 0.4842761754989624, "learning_rate": 2.663644921937845e-06, "loss": 0.057, "step": 36902 }, { "epoch": 0.8131682890148573, "grad_norm": 0.8030076622962952, "learning_rate": 2.663035952126985e-06, "loss": 0.0569, "step": 36903 }, { "epoch": 0.8131903243043734, "grad_norm": 0.6609432101249695, "learning_rate": 2.6624270451548997e-06, "loss": 0.0412, "step": 36904 }, { "epoch": 0.8132123595938896, "grad_norm": 0.5571278929710388, "learning_rate": 2.6618182010246926e-06, "loss": 0.0661, "step": 36905 }, { "epoch": 0.8132343948834058, "grad_norm": 0.7461690306663513, "learning_rate": 2.6612094197394598e-06, "loss": 0.0568, "step": 36906 }, { "epoch": 0.8132564301729219, "grad_norm": 0.1956808865070343, "learning_rate": 2.660600701302302e-06, "loss": 0.0415, "step": 36907 }, { "epoch": 0.8132784654624381, "grad_norm": 0.9838788509368896, "learning_rate": 2.6599920457163266e-06, "loss": 0.086, "step": 36908 }, { "epoch": 0.8133005007519543, "grad_norm": 0.6240499019622803, "learning_rate": 2.6593834529846233e-06, "loss": 0.0528, "step": 36909 }, { "epoch": 0.8133225360414704, "grad_norm": 0.8519781231880188, "learning_rate": 2.658774923110304e-06, "loss": 0.0709, "step": 36910 }, { "epoch": 0.8133445713309866, "grad_norm": 0.479802668094635, "learning_rate": 2.6581664560964517e-06, "loss": 0.0668, "step": 36911 }, { "epoch": 0.8133666066205028, "grad_norm": 0.813327431678772, "learning_rate": 2.6575580519461855e-06, "loss": 0.0592, "step": 36912 }, { "epoch": 0.8133886419100189, "grad_norm": 0.6825985908508301, "learning_rate": 2.656949710662591e-06, "loss": 0.0791, "step": 36913 }, { "epoch": 0.8134106771995351, "grad_norm": 0.8869581818580627, "learning_rate": 2.656341432248773e-06, "loss": 0.0549, "step": 36914 }, { "epoch": 0.8134327124890512, "grad_norm": 0.4072595238685608, "learning_rate": 2.655733216707825e-06, "loss": 0.0418, "step": 36915 }, { "epoch": 0.8134547477785674, "grad_norm": 0.2955794036388397, "learning_rate": 2.655125064042846e-06, "loss": 0.0545, "step": 36916 }, { "epoch": 0.8134767830680836, "grad_norm": 0.6107510328292847, "learning_rate": 2.6545169742569374e-06, "loss": 0.0665, "step": 36917 }, { "epoch": 0.8134988183575997, "grad_norm": 0.7433121800422668, "learning_rate": 2.653908947353192e-06, "loss": 0.0332, "step": 36918 }, { "epoch": 0.8135208536471159, "grad_norm": 0.7965574860572815, "learning_rate": 2.65330098333471e-06, "loss": 0.063, "step": 36919 }, { "epoch": 0.8135428889366321, "grad_norm": 0.7709358334541321, "learning_rate": 2.652693082204584e-06, "loss": 0.0625, "step": 36920 }, { "epoch": 0.8135649242261481, "grad_norm": 0.33762067556381226, "learning_rate": 2.6520852439659186e-06, "loss": 0.0302, "step": 36921 }, { "epoch": 0.8135869595156643, "grad_norm": 0.24896031618118286, "learning_rate": 2.6514774686218013e-06, "loss": 0.0734, "step": 36922 }, { "epoch": 0.8136089948051805, "grad_norm": 0.652902364730835, "learning_rate": 2.650869756175331e-06, "loss": 0.0879, "step": 36923 }, { "epoch": 0.8136310300946966, "grad_norm": 0.3898059129714966, "learning_rate": 2.6502621066296066e-06, "loss": 0.0539, "step": 36924 }, { "epoch": 0.8136530653842128, "grad_norm": 0.6654436588287354, "learning_rate": 2.6496545199877146e-06, "loss": 0.0543, "step": 36925 }, { "epoch": 0.813675100673729, "grad_norm": 0.8652639389038086, "learning_rate": 2.6490469962527574e-06, "loss": 0.056, "step": 36926 }, { "epoch": 0.8136971359632451, "grad_norm": 0.7844610810279846, "learning_rate": 2.6484395354278256e-06, "loss": 0.0388, "step": 36927 }, { "epoch": 0.8137191712527613, "grad_norm": 0.853326678276062, "learning_rate": 2.6478321375160187e-06, "loss": 0.0859, "step": 36928 }, { "epoch": 0.8137412065422774, "grad_norm": 0.4869459271430969, "learning_rate": 2.647224802520422e-06, "loss": 0.0703, "step": 36929 }, { "epoch": 0.8137632418317936, "grad_norm": 0.5096237063407898, "learning_rate": 2.6466175304441337e-06, "loss": 0.0527, "step": 36930 }, { "epoch": 0.8137852771213098, "grad_norm": 0.42575451731681824, "learning_rate": 2.646010321290251e-06, "loss": 0.0575, "step": 36931 }, { "epoch": 0.8138073124108259, "grad_norm": 0.5765937566757202, "learning_rate": 2.645403175061858e-06, "loss": 0.0541, "step": 36932 }, { "epoch": 0.8138293477003421, "grad_norm": 0.596770167350769, "learning_rate": 2.6447960917620573e-06, "loss": 0.0708, "step": 36933 }, { "epoch": 0.8138513829898583, "grad_norm": 0.6509257555007935, "learning_rate": 2.6441890713939265e-06, "loss": 0.0589, "step": 36934 }, { "epoch": 0.8138734182793744, "grad_norm": 0.2882688045501709, "learning_rate": 2.6435821139605733e-06, "loss": 0.0437, "step": 36935 }, { "epoch": 0.8138954535688906, "grad_norm": 0.6114826202392578, "learning_rate": 2.64297521946508e-06, "loss": 0.0384, "step": 36936 }, { "epoch": 0.8139174888584068, "grad_norm": 0.5077805519104004, "learning_rate": 2.6423683879105436e-06, "loss": 0.0308, "step": 36937 }, { "epoch": 0.8139395241479229, "grad_norm": 0.46410587430000305, "learning_rate": 2.6417616193000475e-06, "loss": 0.0457, "step": 36938 }, { "epoch": 0.8139615594374391, "grad_norm": 0.32297492027282715, "learning_rate": 2.6411549136366876e-06, "loss": 0.0322, "step": 36939 }, { "epoch": 0.8139835947269553, "grad_norm": 0.6772302985191345, "learning_rate": 2.6405482709235555e-06, "loss": 0.0533, "step": 36940 }, { "epoch": 0.8140056300164714, "grad_norm": 0.6221833825111389, "learning_rate": 2.639941691163735e-06, "loss": 0.0592, "step": 36941 }, { "epoch": 0.8140276653059876, "grad_norm": 0.7523473501205444, "learning_rate": 2.6393351743603204e-06, "loss": 0.0659, "step": 36942 }, { "epoch": 0.8140497005955037, "grad_norm": 0.3785078525543213, "learning_rate": 2.638728720516399e-06, "loss": 0.0449, "step": 36943 }, { "epoch": 0.8140717358850199, "grad_norm": 0.4349554181098938, "learning_rate": 2.638122329635066e-06, "loss": 0.0264, "step": 36944 }, { "epoch": 0.8140937711745361, "grad_norm": 0.6395658850669861, "learning_rate": 2.6375160017194004e-06, "loss": 0.0472, "step": 36945 }, { "epoch": 0.8141158064640521, "grad_norm": 0.4416417181491852, "learning_rate": 2.636909736772495e-06, "loss": 0.0479, "step": 36946 }, { "epoch": 0.8141378417535683, "grad_norm": 0.34789547324180603, "learning_rate": 2.6363035347974413e-06, "loss": 0.0683, "step": 36947 }, { "epoch": 0.8141598770430845, "grad_norm": 0.4350265860557556, "learning_rate": 2.6356973957973206e-06, "loss": 0.05, "step": 36948 }, { "epoch": 0.8141819123326006, "grad_norm": 0.534044623374939, "learning_rate": 2.635091319775223e-06, "loss": 0.0614, "step": 36949 }, { "epoch": 0.8142039476221168, "grad_norm": 0.502244234085083, "learning_rate": 2.6344853067342355e-06, "loss": 0.0458, "step": 36950 }, { "epoch": 0.814225982911633, "grad_norm": 0.7306846380233765, "learning_rate": 2.633879356677451e-06, "loss": 0.0724, "step": 36951 }, { "epoch": 0.8142480182011491, "grad_norm": 0.6057774424552917, "learning_rate": 2.6332734696079436e-06, "loss": 0.1014, "step": 36952 }, { "epoch": 0.8142700534906653, "grad_norm": 0.6763747334480286, "learning_rate": 2.6326676455288074e-06, "loss": 0.0637, "step": 36953 }, { "epoch": 0.8142920887801814, "grad_norm": 0.9810610413551331, "learning_rate": 2.6320618844431316e-06, "loss": 0.0688, "step": 36954 }, { "epoch": 0.8143141240696976, "grad_norm": 0.6065793037414551, "learning_rate": 2.6314561863539922e-06, "loss": 0.0492, "step": 36955 }, { "epoch": 0.8143361593592138, "grad_norm": 0.5089872479438782, "learning_rate": 2.6308505512644813e-06, "loss": 0.0734, "step": 36956 }, { "epoch": 0.8143581946487299, "grad_norm": 0.5208545923233032, "learning_rate": 2.6302449791776758e-06, "loss": 0.0555, "step": 36957 }, { "epoch": 0.8143802299382461, "grad_norm": 0.6275404095649719, "learning_rate": 2.629639470096672e-06, "loss": 0.055, "step": 36958 }, { "epoch": 0.8144022652277623, "grad_norm": 0.4731740355491638, "learning_rate": 2.629034024024545e-06, "loss": 0.0599, "step": 36959 }, { "epoch": 0.8144243005172784, "grad_norm": 0.620406448841095, "learning_rate": 2.628428640964385e-06, "loss": 0.0579, "step": 36960 }, { "epoch": 0.8144463358067946, "grad_norm": 0.6381689310073853, "learning_rate": 2.6278233209192683e-06, "loss": 0.0404, "step": 36961 }, { "epoch": 0.8144683710963108, "grad_norm": 0.721687912940979, "learning_rate": 2.6272180638922823e-06, "loss": 0.0603, "step": 36962 }, { "epoch": 0.8144904063858269, "grad_norm": 0.6600878238677979, "learning_rate": 2.6266128698865127e-06, "loss": 0.0593, "step": 36963 }, { "epoch": 0.8145124416753431, "grad_norm": 0.3704112470149994, "learning_rate": 2.626007738905036e-06, "loss": 0.0729, "step": 36964 }, { "epoch": 0.8145344769648593, "grad_norm": 0.5387040376663208, "learning_rate": 2.6254026709509356e-06, "loss": 0.0695, "step": 36965 }, { "epoch": 0.8145565122543754, "grad_norm": 0.6567867398262024, "learning_rate": 2.624797666027298e-06, "loss": 0.0684, "step": 36966 }, { "epoch": 0.8145785475438916, "grad_norm": 0.5132961273193359, "learning_rate": 2.6241927241372033e-06, "loss": 0.0409, "step": 36967 }, { "epoch": 0.8146005828334077, "grad_norm": 0.8161531090736389, "learning_rate": 2.623587845283728e-06, "loss": 0.0542, "step": 36968 }, { "epoch": 0.8146226181229239, "grad_norm": 0.7223357558250427, "learning_rate": 2.622983029469958e-06, "loss": 0.0853, "step": 36969 }, { "epoch": 0.81464465341244, "grad_norm": 0.22719673812389374, "learning_rate": 2.622378276698974e-06, "loss": 0.0496, "step": 36970 }, { "epoch": 0.8146666887019561, "grad_norm": 0.4680118262767792, "learning_rate": 2.6217735869738534e-06, "loss": 0.0454, "step": 36971 }, { "epoch": 0.8146887239914723, "grad_norm": 0.4792628884315491, "learning_rate": 2.621168960297676e-06, "loss": 0.042, "step": 36972 }, { "epoch": 0.8147107592809885, "grad_norm": 0.531909704208374, "learning_rate": 2.620564396673525e-06, "loss": 0.0649, "step": 36973 }, { "epoch": 0.8147327945705046, "grad_norm": 0.460371732711792, "learning_rate": 2.6199598961044804e-06, "loss": 0.0471, "step": 36974 }, { "epoch": 0.8147548298600208, "grad_norm": 0.5370529294013977, "learning_rate": 2.619355458593614e-06, "loss": 0.0554, "step": 36975 }, { "epoch": 0.814776865149537, "grad_norm": 0.49745428562164307, "learning_rate": 2.618751084144011e-06, "loss": 0.0471, "step": 36976 }, { "epoch": 0.8147989004390531, "grad_norm": 0.3243437111377716, "learning_rate": 2.6181467727587516e-06, "loss": 0.0515, "step": 36977 }, { "epoch": 0.8148209357285693, "grad_norm": 0.9440617561340332, "learning_rate": 2.6175425244409052e-06, "loss": 0.0496, "step": 36978 }, { "epoch": 0.8148429710180854, "grad_norm": 0.3160836696624756, "learning_rate": 2.616938339193559e-06, "loss": 0.0414, "step": 36979 }, { "epoch": 0.8148650063076016, "grad_norm": 0.433475136756897, "learning_rate": 2.6163342170197797e-06, "loss": 0.0599, "step": 36980 }, { "epoch": 0.8148870415971178, "grad_norm": 0.7415828108787537, "learning_rate": 2.615730157922657e-06, "loss": 0.083, "step": 36981 }, { "epoch": 0.8149090768866339, "grad_norm": 0.5679342746734619, "learning_rate": 2.6151261619052598e-06, "loss": 0.0377, "step": 36982 }, { "epoch": 0.8149311121761501, "grad_norm": 0.632000207901001, "learning_rate": 2.6145222289706696e-06, "loss": 0.0642, "step": 36983 }, { "epoch": 0.8149531474656663, "grad_norm": 0.741951584815979, "learning_rate": 2.613918359121954e-06, "loss": 0.0574, "step": 36984 }, { "epoch": 0.8149751827551824, "grad_norm": 0.6480520963668823, "learning_rate": 2.613314552362196e-06, "loss": 0.0722, "step": 36985 }, { "epoch": 0.8149972180446986, "grad_norm": 0.5764366984367371, "learning_rate": 2.612710808694471e-06, "loss": 0.0503, "step": 36986 }, { "epoch": 0.8150192533342148, "grad_norm": 0.39935827255249023, "learning_rate": 2.6121071281218507e-06, "loss": 0.0632, "step": 36987 }, { "epoch": 0.8150412886237309, "grad_norm": 0.4797816872596741, "learning_rate": 2.61150351064741e-06, "loss": 0.0379, "step": 36988 }, { "epoch": 0.8150633239132471, "grad_norm": 0.6327342391014099, "learning_rate": 2.6108999562742253e-06, "loss": 0.0808, "step": 36989 }, { "epoch": 0.8150853592027633, "grad_norm": 0.6230183243751526, "learning_rate": 2.6102964650053757e-06, "loss": 0.0666, "step": 36990 }, { "epoch": 0.8151073944922794, "grad_norm": 0.6041256785392761, "learning_rate": 2.6096930368439255e-06, "loss": 0.0678, "step": 36991 }, { "epoch": 0.8151294297817956, "grad_norm": 0.8936364054679871, "learning_rate": 2.6090896717929538e-06, "loss": 0.0766, "step": 36992 }, { "epoch": 0.8151514650713118, "grad_norm": 0.41337284445762634, "learning_rate": 2.6084863698555313e-06, "loss": 0.047, "step": 36993 }, { "epoch": 0.8151735003608279, "grad_norm": 0.5800939202308655, "learning_rate": 2.607883131034737e-06, "loss": 0.0393, "step": 36994 }, { "epoch": 0.815195535650344, "grad_norm": 0.4217235743999481, "learning_rate": 2.6072799553336364e-06, "loss": 0.0387, "step": 36995 }, { "epoch": 0.8152175709398601, "grad_norm": 0.9231458306312561, "learning_rate": 2.606676842755304e-06, "loss": 0.0605, "step": 36996 }, { "epoch": 0.8152396062293763, "grad_norm": 0.5768703818321228, "learning_rate": 2.6060737933028143e-06, "loss": 0.0337, "step": 36997 }, { "epoch": 0.8152616415188925, "grad_norm": 1.0564759969711304, "learning_rate": 2.6054708069792354e-06, "loss": 0.0667, "step": 36998 }, { "epoch": 0.8152836768084086, "grad_norm": 0.7089360952377319, "learning_rate": 2.6048678837876393e-06, "loss": 0.0717, "step": 36999 }, { "epoch": 0.8153057120979248, "grad_norm": 0.6713321805000305, "learning_rate": 2.604265023731097e-06, "loss": 0.0569, "step": 37000 }, { "epoch": 0.815327747387441, "grad_norm": 0.8258437514305115, "learning_rate": 2.6036622268126863e-06, "loss": 0.041, "step": 37001 }, { "epoch": 0.8153497826769571, "grad_norm": 1.0960214138031006, "learning_rate": 2.6030594930354645e-06, "loss": 0.069, "step": 37002 }, { "epoch": 0.8153718179664733, "grad_norm": 0.7177522778511047, "learning_rate": 2.602456822402511e-06, "loss": 0.055, "step": 37003 }, { "epoch": 0.8153938532559895, "grad_norm": 0.4879970848560333, "learning_rate": 2.6018542149168943e-06, "loss": 0.0728, "step": 37004 }, { "epoch": 0.8154158885455056, "grad_norm": 0.8536942601203918, "learning_rate": 2.6012516705816815e-06, "loss": 0.0751, "step": 37005 }, { "epoch": 0.8154379238350218, "grad_norm": 0.7839276194572449, "learning_rate": 2.600649189399944e-06, "loss": 0.0454, "step": 37006 }, { "epoch": 0.815459959124538, "grad_norm": 0.5419155359268188, "learning_rate": 2.6000467713747435e-06, "loss": 0.079, "step": 37007 }, { "epoch": 0.8154819944140541, "grad_norm": 0.43970295786857605, "learning_rate": 2.59944441650916e-06, "loss": 0.0616, "step": 37008 }, { "epoch": 0.8155040297035703, "grad_norm": 0.6294946074485779, "learning_rate": 2.5988421248062537e-06, "loss": 0.0555, "step": 37009 }, { "epoch": 0.8155260649930864, "grad_norm": 0.7821943759918213, "learning_rate": 2.5982398962690984e-06, "loss": 0.067, "step": 37010 }, { "epoch": 0.8155481002826026, "grad_norm": 0.6690173745155334, "learning_rate": 2.5976377309007515e-06, "loss": 0.0656, "step": 37011 }, { "epoch": 0.8155701355721188, "grad_norm": 0.4727834165096283, "learning_rate": 2.597035628704289e-06, "loss": 0.0392, "step": 37012 }, { "epoch": 0.8155921708616349, "grad_norm": 0.7115543484687805, "learning_rate": 2.5964335896827768e-06, "loss": 0.0553, "step": 37013 }, { "epoch": 0.8156142061511511, "grad_norm": 0.6320624947547913, "learning_rate": 2.5958316138392775e-06, "loss": 0.0668, "step": 37014 }, { "epoch": 0.8156362414406673, "grad_norm": 0.6047329306602478, "learning_rate": 2.595229701176858e-06, "loss": 0.0554, "step": 37015 }, { "epoch": 0.8156582767301834, "grad_norm": 0.725519061088562, "learning_rate": 2.594627851698587e-06, "loss": 0.0831, "step": 37016 }, { "epoch": 0.8156803120196996, "grad_norm": 0.4682888686656952, "learning_rate": 2.5940260654075313e-06, "loss": 0.0301, "step": 37017 }, { "epoch": 0.8157023473092158, "grad_norm": 0.37247657775878906, "learning_rate": 2.5934243423067505e-06, "loss": 0.0467, "step": 37018 }, { "epoch": 0.8157243825987319, "grad_norm": 0.8341590166091919, "learning_rate": 2.5928226823993115e-06, "loss": 0.0719, "step": 37019 }, { "epoch": 0.815746417888248, "grad_norm": 0.910467803478241, "learning_rate": 2.5922210856882842e-06, "loss": 0.0934, "step": 37020 }, { "epoch": 0.8157684531777641, "grad_norm": 0.565841019153595, "learning_rate": 2.5916195521767246e-06, "loss": 0.06, "step": 37021 }, { "epoch": 0.8157904884672803, "grad_norm": 0.499893456697464, "learning_rate": 2.591018081867701e-06, "loss": 0.0757, "step": 37022 }, { "epoch": 0.8158125237567965, "grad_norm": 0.5603138208389282, "learning_rate": 2.590416674764275e-06, "loss": 0.0644, "step": 37023 }, { "epoch": 0.8158345590463126, "grad_norm": 0.4777676463127136, "learning_rate": 2.589815330869517e-06, "loss": 0.0566, "step": 37024 }, { "epoch": 0.8158565943358288, "grad_norm": 0.6140753626823425, "learning_rate": 2.5892140501864803e-06, "loss": 0.0507, "step": 37025 }, { "epoch": 0.815878629625345, "grad_norm": 1.0447585582733154, "learning_rate": 2.58861283271823e-06, "loss": 0.1248, "step": 37026 }, { "epoch": 0.8159006649148611, "grad_norm": 0.410451203584671, "learning_rate": 2.588011678467836e-06, "loss": 0.0695, "step": 37027 }, { "epoch": 0.8159227002043773, "grad_norm": 0.6035966277122498, "learning_rate": 2.58741058743835e-06, "loss": 0.0701, "step": 37028 }, { "epoch": 0.8159447354938935, "grad_norm": 0.740732729434967, "learning_rate": 2.5868095596328408e-06, "loss": 0.0693, "step": 37029 }, { "epoch": 0.8159667707834096, "grad_norm": 0.5737558603286743, "learning_rate": 2.5862085950543595e-06, "loss": 0.071, "step": 37030 }, { "epoch": 0.8159888060729258, "grad_norm": 0.48908302187919617, "learning_rate": 2.585607693705983e-06, "loss": 0.0397, "step": 37031 }, { "epoch": 0.816010841362442, "grad_norm": 0.6066106557846069, "learning_rate": 2.5850068555907604e-06, "loss": 0.0878, "step": 37032 }, { "epoch": 0.8160328766519581, "grad_norm": 0.5775598287582397, "learning_rate": 2.5844060807117586e-06, "loss": 0.0298, "step": 37033 }, { "epoch": 0.8160549119414743, "grad_norm": 0.6529454588890076, "learning_rate": 2.583805369072029e-06, "loss": 0.045, "step": 37034 }, { "epoch": 0.8160769472309904, "grad_norm": 0.7768951058387756, "learning_rate": 2.5832047206746396e-06, "loss": 0.0688, "step": 37035 }, { "epoch": 0.8160989825205066, "grad_norm": 0.6508615016937256, "learning_rate": 2.582604135522649e-06, "loss": 0.0565, "step": 37036 }, { "epoch": 0.8161210178100228, "grad_norm": 0.7360296249389648, "learning_rate": 2.582003613619106e-06, "loss": 0.0556, "step": 37037 }, { "epoch": 0.8161430530995389, "grad_norm": 0.4305780827999115, "learning_rate": 2.5814031549670873e-06, "loss": 0.0634, "step": 37038 }, { "epoch": 0.8161650883890551, "grad_norm": 0.41071856021881104, "learning_rate": 2.580802759569637e-06, "loss": 0.0427, "step": 37039 }, { "epoch": 0.8161871236785713, "grad_norm": 0.6348943710327148, "learning_rate": 2.580202427429821e-06, "loss": 0.0758, "step": 37040 }, { "epoch": 0.8162091589680874, "grad_norm": 0.7190868854522705, "learning_rate": 2.579602158550691e-06, "loss": 0.0786, "step": 37041 }, { "epoch": 0.8162311942576036, "grad_norm": 0.49409544467926025, "learning_rate": 2.5790019529353074e-06, "loss": 0.0632, "step": 37042 }, { "epoch": 0.8162532295471198, "grad_norm": 0.7319410443305969, "learning_rate": 2.5784018105867306e-06, "loss": 0.0633, "step": 37043 }, { "epoch": 0.8162752648366359, "grad_norm": 0.22911302745342255, "learning_rate": 2.5778017315080116e-06, "loss": 0.0352, "step": 37044 }, { "epoch": 0.816297300126152, "grad_norm": 0.8000164031982422, "learning_rate": 2.5772017157022094e-06, "loss": 0.0741, "step": 37045 }, { "epoch": 0.8163193354156681, "grad_norm": 0.9422078132629395, "learning_rate": 2.5766017631723815e-06, "loss": 0.0918, "step": 37046 }, { "epoch": 0.8163413707051843, "grad_norm": 0.7150290012359619, "learning_rate": 2.576001873921584e-06, "loss": 0.0825, "step": 37047 }, { "epoch": 0.8163634059947005, "grad_norm": 0.5688198208808899, "learning_rate": 2.5754020479528696e-06, "loss": 0.0427, "step": 37048 }, { "epoch": 0.8163854412842166, "grad_norm": 0.23805652558803558, "learning_rate": 2.5748022852692955e-06, "loss": 0.0354, "step": 37049 }, { "epoch": 0.8164074765737328, "grad_norm": 0.5181368589401245, "learning_rate": 2.5742025858739176e-06, "loss": 0.0799, "step": 37050 }, { "epoch": 0.816429511863249, "grad_norm": 0.4870908260345459, "learning_rate": 2.573602949769787e-06, "loss": 0.0715, "step": 37051 }, { "epoch": 0.8164515471527651, "grad_norm": 0.5870418548583984, "learning_rate": 2.5730033769599646e-06, "loss": 0.0655, "step": 37052 }, { "epoch": 0.8164735824422813, "grad_norm": 0.6276153922080994, "learning_rate": 2.5724038674474916e-06, "loss": 0.052, "step": 37053 }, { "epoch": 0.8164956177317975, "grad_norm": 0.5925435423851013, "learning_rate": 2.5718044212354366e-06, "loss": 0.0604, "step": 37054 }, { "epoch": 0.8165176530213136, "grad_norm": 0.7842873930931091, "learning_rate": 2.5712050383268437e-06, "loss": 0.0649, "step": 37055 }, { "epoch": 0.8165396883108298, "grad_norm": 0.2745491862297058, "learning_rate": 2.570605718724771e-06, "loss": 0.0616, "step": 37056 }, { "epoch": 0.816561723600346, "grad_norm": 0.5933136343955994, "learning_rate": 2.5700064624322662e-06, "loss": 0.0774, "step": 37057 }, { "epoch": 0.8165837588898621, "grad_norm": 0.6740520596504211, "learning_rate": 2.569407269452383e-06, "loss": 0.0714, "step": 37058 }, { "epoch": 0.8166057941793783, "grad_norm": 0.5051080584526062, "learning_rate": 2.5688081397881793e-06, "loss": 0.0422, "step": 37059 }, { "epoch": 0.8166278294688945, "grad_norm": 0.6906406879425049, "learning_rate": 2.568209073442692e-06, "loss": 0.064, "step": 37060 }, { "epoch": 0.8166498647584106, "grad_norm": 0.2338220477104187, "learning_rate": 2.5676100704189913e-06, "loss": 0.0419, "step": 37061 }, { "epoch": 0.8166719000479268, "grad_norm": 0.5780041813850403, "learning_rate": 2.567011130720116e-06, "loss": 0.0567, "step": 37062 }, { "epoch": 0.8166939353374429, "grad_norm": 0.46787068247795105, "learning_rate": 2.5664122543491236e-06, "loss": 0.0464, "step": 37063 }, { "epoch": 0.8167159706269591, "grad_norm": 0.5716522932052612, "learning_rate": 2.5658134413090565e-06, "loss": 0.0403, "step": 37064 }, { "epoch": 0.8167380059164753, "grad_norm": 0.7458722591400146, "learning_rate": 2.565214691602971e-06, "loss": 0.0641, "step": 37065 }, { "epoch": 0.8167600412059914, "grad_norm": 0.8624565601348877, "learning_rate": 2.564616005233918e-06, "loss": 0.0573, "step": 37066 }, { "epoch": 0.8167820764955076, "grad_norm": 0.8389943838119507, "learning_rate": 2.5640173822049397e-06, "loss": 0.0764, "step": 37067 }, { "epoch": 0.8168041117850238, "grad_norm": 0.5858795642852783, "learning_rate": 2.5634188225190904e-06, "loss": 0.0751, "step": 37068 }, { "epoch": 0.8168261470745398, "grad_norm": 0.41417598724365234, "learning_rate": 2.5628203261794173e-06, "loss": 0.0496, "step": 37069 }, { "epoch": 0.816848182364056, "grad_norm": 1.0228255987167358, "learning_rate": 2.5622218931889742e-06, "loss": 0.0832, "step": 37070 }, { "epoch": 0.8168702176535722, "grad_norm": 0.39340463280677795, "learning_rate": 2.561623523550799e-06, "loss": 0.0567, "step": 37071 }, { "epoch": 0.8168922529430883, "grad_norm": 0.40603843331336975, "learning_rate": 2.5610252172679466e-06, "loss": 0.0469, "step": 37072 }, { "epoch": 0.8169142882326045, "grad_norm": 0.45982876420021057, "learning_rate": 2.560426974343468e-06, "loss": 0.0414, "step": 37073 }, { "epoch": 0.8169363235221206, "grad_norm": 0.549579918384552, "learning_rate": 2.5598287947804e-06, "loss": 0.0577, "step": 37074 }, { "epoch": 0.8169583588116368, "grad_norm": 0.537010133266449, "learning_rate": 2.559230678581799e-06, "loss": 0.0755, "step": 37075 }, { "epoch": 0.816980394101153, "grad_norm": 0.8814085721969604, "learning_rate": 2.5586326257507e-06, "loss": 0.0545, "step": 37076 }, { "epoch": 0.8170024293906691, "grad_norm": 0.46511873602867126, "learning_rate": 2.558034636290164e-06, "loss": 0.0465, "step": 37077 }, { "epoch": 0.8170244646801853, "grad_norm": 0.32555487751960754, "learning_rate": 2.5574367102032263e-06, "loss": 0.0669, "step": 37078 }, { "epoch": 0.8170464999697015, "grad_norm": 0.4829317033290863, "learning_rate": 2.5568388474929395e-06, "loss": 0.0518, "step": 37079 }, { "epoch": 0.8170685352592176, "grad_norm": 0.7018901109695435, "learning_rate": 2.5562410481623407e-06, "loss": 0.0624, "step": 37080 }, { "epoch": 0.8170905705487338, "grad_norm": 0.644981324672699, "learning_rate": 2.555643312214479e-06, "loss": 0.0654, "step": 37081 }, { "epoch": 0.81711260583825, "grad_norm": 0.3525555431842804, "learning_rate": 2.5550456396524023e-06, "loss": 0.0475, "step": 37082 }, { "epoch": 0.8171346411277661, "grad_norm": 0.5775201320648193, "learning_rate": 2.554448030479145e-06, "loss": 0.0614, "step": 37083 }, { "epoch": 0.8171566764172823, "grad_norm": 0.2422124296426773, "learning_rate": 2.553850484697766e-06, "loss": 0.0371, "step": 37084 }, { "epoch": 0.8171787117067985, "grad_norm": 0.5092626810073853, "learning_rate": 2.553253002311295e-06, "loss": 0.048, "step": 37085 }, { "epoch": 0.8172007469963146, "grad_norm": 0.4698112905025482, "learning_rate": 2.5526555833227857e-06, "loss": 0.0585, "step": 37086 }, { "epoch": 0.8172227822858308, "grad_norm": 0.6498005390167236, "learning_rate": 2.552058227735274e-06, "loss": 0.0568, "step": 37087 }, { "epoch": 0.817244817575347, "grad_norm": 0.41658657789230347, "learning_rate": 2.5514609355518025e-06, "loss": 0.0686, "step": 37088 }, { "epoch": 0.8172668528648631, "grad_norm": 0.697310209274292, "learning_rate": 2.550863706775421e-06, "loss": 0.0824, "step": 37089 }, { "epoch": 0.8172888881543793, "grad_norm": 0.6080856919288635, "learning_rate": 2.5502665414091614e-06, "loss": 0.0706, "step": 37090 }, { "epoch": 0.8173109234438954, "grad_norm": 0.45212775468826294, "learning_rate": 2.5496694394560715e-06, "loss": 0.0488, "step": 37091 }, { "epoch": 0.8173329587334116, "grad_norm": 0.5051631927490234, "learning_rate": 2.5490724009191922e-06, "loss": 0.0475, "step": 37092 }, { "epoch": 0.8173549940229278, "grad_norm": 0.6235359907150269, "learning_rate": 2.5484754258015663e-06, "loss": 0.0445, "step": 37093 }, { "epoch": 0.8173770293124438, "grad_norm": 0.5329983830451965, "learning_rate": 2.5478785141062277e-06, "loss": 0.0394, "step": 37094 }, { "epoch": 0.81739906460196, "grad_norm": 0.5451514720916748, "learning_rate": 2.5472816658362223e-06, "loss": 0.0635, "step": 37095 }, { "epoch": 0.8174210998914762, "grad_norm": 0.4556431770324707, "learning_rate": 2.546684880994593e-06, "loss": 0.0503, "step": 37096 }, { "epoch": 0.8174431351809923, "grad_norm": 1.0254515409469604, "learning_rate": 2.5460881595843723e-06, "loss": 0.0823, "step": 37097 }, { "epoch": 0.8174651704705085, "grad_norm": 0.5861465930938721, "learning_rate": 2.5454915016086055e-06, "loss": 0.0638, "step": 37098 }, { "epoch": 0.8174872057600246, "grad_norm": 0.45389217138290405, "learning_rate": 2.5448949070703226e-06, "loss": 0.0352, "step": 37099 }, { "epoch": 0.8175092410495408, "grad_norm": 0.43184685707092285, "learning_rate": 2.5442983759725756e-06, "loss": 0.0551, "step": 37100 }, { "epoch": 0.817531276339057, "grad_norm": 0.6799647212028503, "learning_rate": 2.5437019083183925e-06, "loss": 0.0706, "step": 37101 }, { "epoch": 0.8175533116285731, "grad_norm": 0.6401037573814392, "learning_rate": 2.5431055041108193e-06, "loss": 0.073, "step": 37102 }, { "epoch": 0.8175753469180893, "grad_norm": 0.25638580322265625, "learning_rate": 2.542509163352883e-06, "loss": 0.042, "step": 37103 }, { "epoch": 0.8175973822076055, "grad_norm": 0.5337861180305481, "learning_rate": 2.541912886047633e-06, "loss": 0.0621, "step": 37104 }, { "epoch": 0.8176194174971216, "grad_norm": 0.566864013671875, "learning_rate": 2.541316672198099e-06, "loss": 0.0875, "step": 37105 }, { "epoch": 0.8176414527866378, "grad_norm": 0.7770384550094604, "learning_rate": 2.5407205218073197e-06, "loss": 0.0671, "step": 37106 }, { "epoch": 0.817663488076154, "grad_norm": 0.5741238594055176, "learning_rate": 2.5401244348783363e-06, "loss": 0.0619, "step": 37107 }, { "epoch": 0.8176855233656701, "grad_norm": 1.0674153566360474, "learning_rate": 2.5395284114141765e-06, "loss": 0.0725, "step": 37108 }, { "epoch": 0.8177075586551863, "grad_norm": 0.5789196491241455, "learning_rate": 2.538932451417884e-06, "loss": 0.0757, "step": 37109 }, { "epoch": 0.8177295939447025, "grad_norm": 1.1073347330093384, "learning_rate": 2.5383365548924836e-06, "loss": 0.0911, "step": 37110 }, { "epoch": 0.8177516292342186, "grad_norm": 0.590462327003479, "learning_rate": 2.537740721841024e-06, "loss": 0.0647, "step": 37111 }, { "epoch": 0.8177736645237348, "grad_norm": 0.4963844418525696, "learning_rate": 2.537144952266532e-06, "loss": 0.0671, "step": 37112 }, { "epoch": 0.817795699813251, "grad_norm": 0.7491641640663147, "learning_rate": 2.536549246172046e-06, "loss": 0.0533, "step": 37113 }, { "epoch": 0.8178177351027671, "grad_norm": 0.36153125762939453, "learning_rate": 2.535953603560595e-06, "loss": 0.0406, "step": 37114 }, { "epoch": 0.8178397703922833, "grad_norm": 0.3786514103412628, "learning_rate": 2.535358024435216e-06, "loss": 0.0574, "step": 37115 }, { "epoch": 0.8178618056817994, "grad_norm": 0.572356104850769, "learning_rate": 2.534762508798946e-06, "loss": 0.0625, "step": 37116 }, { "epoch": 0.8178838409713156, "grad_norm": 0.3027443289756775, "learning_rate": 2.5341670566548104e-06, "loss": 0.0519, "step": 37117 }, { "epoch": 0.8179058762608318, "grad_norm": 0.9722619652748108, "learning_rate": 2.533571668005849e-06, "loss": 0.0676, "step": 37118 }, { "epoch": 0.8179279115503478, "grad_norm": 0.7947522401809692, "learning_rate": 2.5329763428550896e-06, "loss": 0.0537, "step": 37119 }, { "epoch": 0.817949946839864, "grad_norm": 0.5002835988998413, "learning_rate": 2.5323810812055728e-06, "loss": 0.0503, "step": 37120 }, { "epoch": 0.8179719821293802, "grad_norm": 0.5502678751945496, "learning_rate": 2.531785883060319e-06, "loss": 0.0614, "step": 37121 }, { "epoch": 0.8179940174188963, "grad_norm": 0.7767685055732727, "learning_rate": 2.5311907484223682e-06, "loss": 0.0802, "step": 37122 }, { "epoch": 0.8180160527084125, "grad_norm": 0.28251343965530396, "learning_rate": 2.5305956772947512e-06, "loss": 0.0539, "step": 37123 }, { "epoch": 0.8180380879979287, "grad_norm": 0.7824233770370483, "learning_rate": 2.530000669680494e-06, "loss": 0.062, "step": 37124 }, { "epoch": 0.8180601232874448, "grad_norm": 0.42021068930625916, "learning_rate": 2.5294057255826335e-06, "loss": 0.0552, "step": 37125 }, { "epoch": 0.818082158576961, "grad_norm": 0.7220159769058228, "learning_rate": 2.5288108450041895e-06, "loss": 0.0696, "step": 37126 }, { "epoch": 0.8181041938664771, "grad_norm": 0.7301831841468811, "learning_rate": 2.528216027948208e-06, "loss": 0.0653, "step": 37127 }, { "epoch": 0.8181262291559933, "grad_norm": 0.6596913933753967, "learning_rate": 2.5276212744177046e-06, "loss": 0.0594, "step": 37128 }, { "epoch": 0.8181482644455095, "grad_norm": 0.7582686543464661, "learning_rate": 2.5270265844157153e-06, "loss": 0.0602, "step": 37129 }, { "epoch": 0.8181702997350256, "grad_norm": 0.49808868765830994, "learning_rate": 2.5264319579452725e-06, "loss": 0.0744, "step": 37130 }, { "epoch": 0.8181923350245418, "grad_norm": 0.5763039588928223, "learning_rate": 2.525837395009396e-06, "loss": 0.0683, "step": 37131 }, { "epoch": 0.818214370314058, "grad_norm": 0.6133943200111389, "learning_rate": 2.5252428956111227e-06, "loss": 0.0743, "step": 37132 }, { "epoch": 0.8182364056035741, "grad_norm": 0.8275278806686401, "learning_rate": 2.5246484597534706e-06, "loss": 0.0844, "step": 37133 }, { "epoch": 0.8182584408930903, "grad_norm": 0.21463647484779358, "learning_rate": 2.524054087439479e-06, "loss": 0.0478, "step": 37134 }, { "epoch": 0.8182804761826065, "grad_norm": 0.6322667002677917, "learning_rate": 2.5234597786721686e-06, "loss": 0.0501, "step": 37135 }, { "epoch": 0.8183025114721226, "grad_norm": 0.7681466341018677, "learning_rate": 2.5228655334545702e-06, "loss": 0.0678, "step": 37136 }, { "epoch": 0.8183245467616388, "grad_norm": 0.8606815338134766, "learning_rate": 2.522271351789705e-06, "loss": 0.0985, "step": 37137 }, { "epoch": 0.818346582051155, "grad_norm": 0.5951440334320068, "learning_rate": 2.5216772336806022e-06, "loss": 0.0594, "step": 37138 }, { "epoch": 0.8183686173406711, "grad_norm": 0.7359269857406616, "learning_rate": 2.521083179130294e-06, "loss": 0.0483, "step": 37139 }, { "epoch": 0.8183906526301873, "grad_norm": 0.28158673644065857, "learning_rate": 2.520489188141795e-06, "loss": 0.0401, "step": 37140 }, { "epoch": 0.8184126879197035, "grad_norm": 0.4830188751220703, "learning_rate": 2.5198952607181382e-06, "loss": 0.0724, "step": 37141 }, { "epoch": 0.8184347232092196, "grad_norm": 0.5401465892791748, "learning_rate": 2.519301396862347e-06, "loss": 0.0554, "step": 37142 }, { "epoch": 0.8184567584987357, "grad_norm": 0.6973366141319275, "learning_rate": 2.518707596577451e-06, "loss": 0.0506, "step": 37143 }, { "epoch": 0.8184787937882518, "grad_norm": 0.7022923827171326, "learning_rate": 2.5181138598664644e-06, "loss": 0.0584, "step": 37144 }, { "epoch": 0.818500829077768, "grad_norm": 0.8091732859611511, "learning_rate": 2.517520186732418e-06, "loss": 0.0687, "step": 37145 }, { "epoch": 0.8185228643672842, "grad_norm": 0.7397502660751343, "learning_rate": 2.51692657717834e-06, "loss": 0.0633, "step": 37146 }, { "epoch": 0.8185448996568003, "grad_norm": 0.5871734619140625, "learning_rate": 2.516333031207244e-06, "loss": 0.0728, "step": 37147 }, { "epoch": 0.8185669349463165, "grad_norm": 0.6049670577049255, "learning_rate": 2.5157395488221613e-06, "loss": 0.0649, "step": 37148 }, { "epoch": 0.8185889702358327, "grad_norm": 0.9901151657104492, "learning_rate": 2.5151461300261057e-06, "loss": 0.0698, "step": 37149 }, { "epoch": 0.8186110055253488, "grad_norm": 0.5392533540725708, "learning_rate": 2.514552774822112e-06, "loss": 0.0661, "step": 37150 }, { "epoch": 0.818633040814865, "grad_norm": 0.5263649225234985, "learning_rate": 2.513959483213194e-06, "loss": 0.0516, "step": 37151 }, { "epoch": 0.8186550761043812, "grad_norm": 0.4046822190284729, "learning_rate": 2.5133662552023744e-06, "loss": 0.042, "step": 37152 }, { "epoch": 0.8186771113938973, "grad_norm": 0.7750282287597656, "learning_rate": 2.512773090792681e-06, "loss": 0.0535, "step": 37153 }, { "epoch": 0.8186991466834135, "grad_norm": 0.7158975601196289, "learning_rate": 2.5121799899871263e-06, "loss": 0.0844, "step": 37154 }, { "epoch": 0.8187211819729296, "grad_norm": 0.7766240835189819, "learning_rate": 2.511586952788739e-06, "loss": 0.0738, "step": 37155 }, { "epoch": 0.8187432172624458, "grad_norm": 0.21495014429092407, "learning_rate": 2.5109939792005275e-06, "loss": 0.067, "step": 37156 }, { "epoch": 0.818765252551962, "grad_norm": 0.6173780560493469, "learning_rate": 2.5104010692255307e-06, "loss": 0.0693, "step": 37157 }, { "epoch": 0.8187872878414781, "grad_norm": 1.0904275178909302, "learning_rate": 2.5098082228667528e-06, "loss": 0.072, "step": 37158 }, { "epoch": 0.8188093231309943, "grad_norm": 0.7524217367172241, "learning_rate": 2.509215440127225e-06, "loss": 0.0492, "step": 37159 }, { "epoch": 0.8188313584205105, "grad_norm": 0.6111871004104614, "learning_rate": 2.508622721009956e-06, "loss": 0.0624, "step": 37160 }, { "epoch": 0.8188533937100266, "grad_norm": 0.35877636075019836, "learning_rate": 2.5080300655179705e-06, "loss": 0.0408, "step": 37161 }, { "epoch": 0.8188754289995428, "grad_norm": 0.49505916237831116, "learning_rate": 2.507437473654291e-06, "loss": 0.0528, "step": 37162 }, { "epoch": 0.818897464289059, "grad_norm": 0.7203877568244934, "learning_rate": 2.5068449454219268e-06, "loss": 0.0572, "step": 37163 }, { "epoch": 0.8189194995785751, "grad_norm": 0.6749311685562134, "learning_rate": 2.5062524808239e-06, "loss": 0.073, "step": 37164 }, { "epoch": 0.8189415348680913, "grad_norm": 0.5308600664138794, "learning_rate": 2.5056600798632305e-06, "loss": 0.0512, "step": 37165 }, { "epoch": 0.8189635701576075, "grad_norm": 0.2924340069293976, "learning_rate": 2.5050677425429376e-06, "loss": 0.0476, "step": 37166 }, { "epoch": 0.8189856054471236, "grad_norm": 0.4778202772140503, "learning_rate": 2.50447546886603e-06, "loss": 0.0966, "step": 37167 }, { "epoch": 0.8190076407366397, "grad_norm": 0.532684326171875, "learning_rate": 2.5038832588355306e-06, "loss": 0.07, "step": 37168 }, { "epoch": 0.8190296760261558, "grad_norm": 0.7804510593414307, "learning_rate": 2.5032911124544584e-06, "loss": 0.0815, "step": 37169 }, { "epoch": 0.819051711315672, "grad_norm": 0.9739143252372742, "learning_rate": 2.5026990297258212e-06, "loss": 0.0858, "step": 37170 }, { "epoch": 0.8190737466051882, "grad_norm": 0.810838520526886, "learning_rate": 2.502107010652642e-06, "loss": 0.0588, "step": 37171 }, { "epoch": 0.8190957818947043, "grad_norm": 0.5881160497665405, "learning_rate": 2.5015150552379305e-06, "loss": 0.0554, "step": 37172 }, { "epoch": 0.8191178171842205, "grad_norm": 0.6756213903427124, "learning_rate": 2.5009231634847107e-06, "loss": 0.0491, "step": 37173 }, { "epoch": 0.8191398524737367, "grad_norm": 0.47470563650131226, "learning_rate": 2.5003313353959878e-06, "loss": 0.0337, "step": 37174 }, { "epoch": 0.8191618877632528, "grad_norm": 0.8069626688957214, "learning_rate": 2.4997395709747796e-06, "loss": 0.063, "step": 37175 }, { "epoch": 0.819183923052769, "grad_norm": 0.3480517268180847, "learning_rate": 2.499147870224105e-06, "loss": 0.0582, "step": 37176 }, { "epoch": 0.8192059583422852, "grad_norm": 0.4578966200351715, "learning_rate": 2.4985562331469704e-06, "loss": 0.068, "step": 37177 }, { "epoch": 0.8192279936318013, "grad_norm": 0.44971099495887756, "learning_rate": 2.4979646597463964e-06, "loss": 0.0835, "step": 37178 }, { "epoch": 0.8192500289213175, "grad_norm": 0.8405683040618896, "learning_rate": 2.4973731500253837e-06, "loss": 0.0562, "step": 37179 }, { "epoch": 0.8192720642108337, "grad_norm": 0.4622851610183716, "learning_rate": 2.496781703986962e-06, "loss": 0.0515, "step": 37180 }, { "epoch": 0.8192940995003498, "grad_norm": 0.8428975343704224, "learning_rate": 2.4961903216341323e-06, "loss": 0.0679, "step": 37181 }, { "epoch": 0.819316134789866, "grad_norm": 0.45144709944725037, "learning_rate": 2.4955990029699157e-06, "loss": 0.043, "step": 37182 }, { "epoch": 0.8193381700793821, "grad_norm": 0.3468431234359741, "learning_rate": 2.4950077479973136e-06, "loss": 0.0459, "step": 37183 }, { "epoch": 0.8193602053688983, "grad_norm": 0.9681460857391357, "learning_rate": 2.4944165567193433e-06, "loss": 0.0678, "step": 37184 }, { "epoch": 0.8193822406584145, "grad_norm": 0.487746924161911, "learning_rate": 2.4938254291390184e-06, "loss": 0.0806, "step": 37185 }, { "epoch": 0.8194042759479306, "grad_norm": 0.6115315556526184, "learning_rate": 2.493234365259343e-06, "loss": 0.0562, "step": 37186 }, { "epoch": 0.8194263112374468, "grad_norm": 0.6482499241828918, "learning_rate": 2.492643365083332e-06, "loss": 0.0735, "step": 37187 }, { "epoch": 0.819448346526963, "grad_norm": 0.2519882619380951, "learning_rate": 2.4920524286139956e-06, "loss": 0.0875, "step": 37188 }, { "epoch": 0.8194703818164791, "grad_norm": 0.7073201537132263, "learning_rate": 2.4914615558543485e-06, "loss": 0.0366, "step": 37189 }, { "epoch": 0.8194924171059953, "grad_norm": 0.6325202584266663, "learning_rate": 2.49087074680739e-06, "loss": 0.0603, "step": 37190 }, { "epoch": 0.8195144523955115, "grad_norm": 0.5172363519668579, "learning_rate": 2.490280001476134e-06, "loss": 0.0716, "step": 37191 }, { "epoch": 0.8195364876850276, "grad_norm": 0.7658688426017761, "learning_rate": 2.489689319863595e-06, "loss": 0.0584, "step": 37192 }, { "epoch": 0.8195585229745437, "grad_norm": 1.0429022312164307, "learning_rate": 2.489098701972774e-06, "loss": 0.0829, "step": 37193 }, { "epoch": 0.8195805582640598, "grad_norm": 0.4868338108062744, "learning_rate": 2.4885081478066803e-06, "loss": 0.0468, "step": 37194 }, { "epoch": 0.819602593553576, "grad_norm": 0.555029571056366, "learning_rate": 2.487917657368326e-06, "loss": 0.0361, "step": 37195 }, { "epoch": 0.8196246288430922, "grad_norm": 0.5963971018791199, "learning_rate": 2.4873272306607177e-06, "loss": 0.0686, "step": 37196 }, { "epoch": 0.8196466641326083, "grad_norm": 0.7071384191513062, "learning_rate": 2.486736867686859e-06, "loss": 0.0689, "step": 37197 }, { "epoch": 0.8196686994221245, "grad_norm": 0.47223395109176636, "learning_rate": 2.4861465684497615e-06, "loss": 0.0451, "step": 37198 }, { "epoch": 0.8196907347116407, "grad_norm": 0.6076487302780151, "learning_rate": 2.4855563329524305e-06, "loss": 0.0598, "step": 37199 }, { "epoch": 0.8197127700011568, "grad_norm": 0.8735606670379639, "learning_rate": 2.4849661611978706e-06, "loss": 0.0768, "step": 37200 }, { "epoch": 0.819734805290673, "grad_norm": 0.49634337425231934, "learning_rate": 2.4843760531890914e-06, "loss": 0.059, "step": 37201 }, { "epoch": 0.8197568405801892, "grad_norm": 0.7643954753875732, "learning_rate": 2.48378600892909e-06, "loss": 0.0765, "step": 37202 }, { "epoch": 0.8197788758697053, "grad_norm": 0.723942756652832, "learning_rate": 2.4831960284208854e-06, "loss": 0.0786, "step": 37203 }, { "epoch": 0.8198009111592215, "grad_norm": 0.698340117931366, "learning_rate": 2.482606111667471e-06, "loss": 0.0847, "step": 37204 }, { "epoch": 0.8198229464487377, "grad_norm": 0.8035337328910828, "learning_rate": 2.48201625867186e-06, "loss": 0.0722, "step": 37205 }, { "epoch": 0.8198449817382538, "grad_norm": 0.548420250415802, "learning_rate": 2.4814264694370464e-06, "loss": 0.0466, "step": 37206 }, { "epoch": 0.81986701702777, "grad_norm": 0.9660544395446777, "learning_rate": 2.4808367439660466e-06, "loss": 0.0789, "step": 37207 }, { "epoch": 0.8198890523172861, "grad_norm": 0.585821270942688, "learning_rate": 2.480247082261859e-06, "loss": 0.0697, "step": 37208 }, { "epoch": 0.8199110876068023, "grad_norm": 0.6137564182281494, "learning_rate": 2.4796574843274838e-06, "loss": 0.0638, "step": 37209 }, { "epoch": 0.8199331228963185, "grad_norm": 0.5405049324035645, "learning_rate": 2.479067950165926e-06, "loss": 0.0611, "step": 37210 }, { "epoch": 0.8199551581858346, "grad_norm": 0.6261097192764282, "learning_rate": 2.4784784797801906e-06, "loss": 0.0623, "step": 37211 }, { "epoch": 0.8199771934753508, "grad_norm": 0.31970661878585815, "learning_rate": 2.477889073173281e-06, "loss": 0.0409, "step": 37212 }, { "epoch": 0.819999228764867, "grad_norm": 0.9650706052780151, "learning_rate": 2.477299730348195e-06, "loss": 0.0746, "step": 37213 }, { "epoch": 0.8200212640543831, "grad_norm": 0.30724388360977173, "learning_rate": 2.4767104513079357e-06, "loss": 0.0634, "step": 37214 }, { "epoch": 0.8200432993438993, "grad_norm": 0.5228525996208191, "learning_rate": 2.4761212360555068e-06, "loss": 0.0472, "step": 37215 }, { "epoch": 0.8200653346334155, "grad_norm": 0.5658591985702515, "learning_rate": 2.4755320845939105e-06, "loss": 0.0534, "step": 37216 }, { "epoch": 0.8200873699229315, "grad_norm": 0.40184473991394043, "learning_rate": 2.4749429969261428e-06, "loss": 0.0361, "step": 37217 }, { "epoch": 0.8201094052124477, "grad_norm": 0.6274628043174744, "learning_rate": 2.474353973055206e-06, "loss": 0.0465, "step": 37218 }, { "epoch": 0.8201314405019638, "grad_norm": 0.8322779536247253, "learning_rate": 2.473765012984105e-06, "loss": 0.0598, "step": 37219 }, { "epoch": 0.82015347579148, "grad_norm": 0.6741243600845337, "learning_rate": 2.4731761167158335e-06, "loss": 0.0578, "step": 37220 }, { "epoch": 0.8201755110809962, "grad_norm": 0.4398142695426941, "learning_rate": 2.4725872842533923e-06, "loss": 0.0382, "step": 37221 }, { "epoch": 0.8201975463705123, "grad_norm": 0.9280356764793396, "learning_rate": 2.4719985155997815e-06, "loss": 0.0936, "step": 37222 }, { "epoch": 0.8202195816600285, "grad_norm": 1.0114868879318237, "learning_rate": 2.4714098107580047e-06, "loss": 0.0861, "step": 37223 }, { "epoch": 0.8202416169495447, "grad_norm": 0.6652899384498596, "learning_rate": 2.4708211697310527e-06, "loss": 0.0507, "step": 37224 }, { "epoch": 0.8202636522390608, "grad_norm": 0.9377518892288208, "learning_rate": 2.470232592521927e-06, "loss": 0.0723, "step": 37225 }, { "epoch": 0.820285687528577, "grad_norm": 0.5683238506317139, "learning_rate": 2.4696440791336283e-06, "loss": 0.0611, "step": 37226 }, { "epoch": 0.8203077228180932, "grad_norm": 0.7690137028694153, "learning_rate": 2.469055629569149e-06, "loss": 0.0651, "step": 37227 }, { "epoch": 0.8203297581076093, "grad_norm": 0.6697310209274292, "learning_rate": 2.468467243831492e-06, "loss": 0.0492, "step": 37228 }, { "epoch": 0.8203517933971255, "grad_norm": 0.4337150454521179, "learning_rate": 2.4678789219236447e-06, "loss": 0.0604, "step": 37229 }, { "epoch": 0.8203738286866417, "grad_norm": 0.8303852081298828, "learning_rate": 2.4672906638486162e-06, "loss": 0.0716, "step": 37230 }, { "epoch": 0.8203958639761578, "grad_norm": 0.367462158203125, "learning_rate": 2.466702469609395e-06, "loss": 0.0556, "step": 37231 }, { "epoch": 0.820417899265674, "grad_norm": 0.6342645287513733, "learning_rate": 2.4661143392089825e-06, "loss": 0.0912, "step": 37232 }, { "epoch": 0.8204399345551902, "grad_norm": 1.1842259168624878, "learning_rate": 2.465526272650367e-06, "loss": 0.0814, "step": 37233 }, { "epoch": 0.8204619698447063, "grad_norm": 0.5229024887084961, "learning_rate": 2.4649382699365475e-06, "loss": 0.0453, "step": 37234 }, { "epoch": 0.8204840051342225, "grad_norm": 0.7260726690292358, "learning_rate": 2.464350331070522e-06, "loss": 0.0645, "step": 37235 }, { "epoch": 0.8205060404237386, "grad_norm": 0.8891053795814514, "learning_rate": 2.463762456055279e-06, "loss": 0.0716, "step": 37236 }, { "epoch": 0.8205280757132548, "grad_norm": 0.4617920517921448, "learning_rate": 2.463174644893817e-06, "loss": 0.0316, "step": 37237 }, { "epoch": 0.820550111002771, "grad_norm": 0.6695705056190491, "learning_rate": 2.4625868975891298e-06, "loss": 0.0743, "step": 37238 }, { "epoch": 0.8205721462922871, "grad_norm": 0.6622455716133118, "learning_rate": 2.461999214144213e-06, "loss": 0.0646, "step": 37239 }, { "epoch": 0.8205941815818033, "grad_norm": 0.4341064393520355, "learning_rate": 2.4614115945620553e-06, "loss": 0.0565, "step": 37240 }, { "epoch": 0.8206162168713195, "grad_norm": 0.3168672025203705, "learning_rate": 2.4608240388456514e-06, "loss": 0.0602, "step": 37241 }, { "epoch": 0.8206382521608355, "grad_norm": 0.7132585048675537, "learning_rate": 2.4602365469979983e-06, "loss": 0.0672, "step": 37242 }, { "epoch": 0.8206602874503517, "grad_norm": 0.37642526626586914, "learning_rate": 2.459649119022081e-06, "loss": 0.0505, "step": 37243 }, { "epoch": 0.8206823227398679, "grad_norm": 0.6739551424980164, "learning_rate": 2.459061754920896e-06, "loss": 0.0581, "step": 37244 }, { "epoch": 0.820704358029384, "grad_norm": 0.46505463123321533, "learning_rate": 2.4584744546974344e-06, "loss": 0.0639, "step": 37245 }, { "epoch": 0.8207263933189002, "grad_norm": 0.7753509879112244, "learning_rate": 2.4578872183546898e-06, "loss": 0.0689, "step": 37246 }, { "epoch": 0.8207484286084163, "grad_norm": 0.5772877335548401, "learning_rate": 2.4573000458956486e-06, "loss": 0.0543, "step": 37247 }, { "epoch": 0.8207704638979325, "grad_norm": 0.8398825526237488, "learning_rate": 2.456712937323305e-06, "loss": 0.0786, "step": 37248 }, { "epoch": 0.8207924991874487, "grad_norm": 0.7458464503288269, "learning_rate": 2.4561258926406517e-06, "loss": 0.0514, "step": 37249 }, { "epoch": 0.8208145344769648, "grad_norm": 0.777946412563324, "learning_rate": 2.4555389118506725e-06, "loss": 0.0636, "step": 37250 }, { "epoch": 0.820836569766481, "grad_norm": 0.40807345509529114, "learning_rate": 2.4549519949563624e-06, "loss": 0.0536, "step": 37251 }, { "epoch": 0.8208586050559972, "grad_norm": 0.4122002422809601, "learning_rate": 2.4543651419607037e-06, "loss": 0.0665, "step": 37252 }, { "epoch": 0.8208806403455133, "grad_norm": 0.4964800179004669, "learning_rate": 2.453778352866697e-06, "loss": 0.0629, "step": 37253 }, { "epoch": 0.8209026756350295, "grad_norm": 1.0174343585968018, "learning_rate": 2.4531916276773226e-06, "loss": 0.0944, "step": 37254 }, { "epoch": 0.8209247109245457, "grad_norm": 0.7613226771354675, "learning_rate": 2.4526049663955733e-06, "loss": 0.0866, "step": 37255 }, { "epoch": 0.8209467462140618, "grad_norm": 0.3613399863243103, "learning_rate": 2.452018369024433e-06, "loss": 0.0434, "step": 37256 }, { "epoch": 0.820968781503578, "grad_norm": 0.5723299980163574, "learning_rate": 2.451431835566892e-06, "loss": 0.0644, "step": 37257 }, { "epoch": 0.8209908167930942, "grad_norm": 0.435143381357193, "learning_rate": 2.450845366025941e-06, "loss": 0.0445, "step": 37258 }, { "epoch": 0.8210128520826103, "grad_norm": 0.8656960725784302, "learning_rate": 2.45025896040456e-06, "loss": 0.0826, "step": 37259 }, { "epoch": 0.8210348873721265, "grad_norm": 0.5073872208595276, "learning_rate": 2.4496726187057394e-06, "loss": 0.0579, "step": 37260 }, { "epoch": 0.8210569226616427, "grad_norm": 0.6038424372673035, "learning_rate": 2.449086340932467e-06, "loss": 0.0619, "step": 37261 }, { "epoch": 0.8210789579511588, "grad_norm": 0.38783106207847595, "learning_rate": 2.4485001270877324e-06, "loss": 0.06, "step": 37262 }, { "epoch": 0.821100993240675, "grad_norm": 0.6577913761138916, "learning_rate": 2.447913977174512e-06, "loss": 0.0748, "step": 37263 }, { "epoch": 0.8211230285301911, "grad_norm": 0.4280141294002533, "learning_rate": 2.447327891195797e-06, "loss": 0.0504, "step": 37264 }, { "epoch": 0.8211450638197073, "grad_norm": 0.830235481262207, "learning_rate": 2.4467418691545767e-06, "loss": 0.0542, "step": 37265 }, { "epoch": 0.8211670991092235, "grad_norm": 0.7246550917625427, "learning_rate": 2.446155911053828e-06, "loss": 0.0622, "step": 37266 }, { "epoch": 0.8211891343987395, "grad_norm": 0.8712592124938965, "learning_rate": 2.4455700168965395e-06, "loss": 0.0762, "step": 37267 }, { "epoch": 0.8212111696882557, "grad_norm": 0.364566832780838, "learning_rate": 2.4449841866856943e-06, "loss": 0.0484, "step": 37268 }, { "epoch": 0.8212332049777719, "grad_norm": 0.575049102306366, "learning_rate": 2.4443984204242814e-06, "loss": 0.0736, "step": 37269 }, { "epoch": 0.821255240267288, "grad_norm": 0.5558618903160095, "learning_rate": 2.443812718115277e-06, "loss": 0.0506, "step": 37270 }, { "epoch": 0.8212772755568042, "grad_norm": 0.45630404353141785, "learning_rate": 2.4432270797616667e-06, "loss": 0.046, "step": 37271 }, { "epoch": 0.8212993108463204, "grad_norm": 0.7631027102470398, "learning_rate": 2.442641505366437e-06, "loss": 0.0559, "step": 37272 }, { "epoch": 0.8213213461358365, "grad_norm": 0.6004475951194763, "learning_rate": 2.4420559949325665e-06, "loss": 0.047, "step": 37273 }, { "epoch": 0.8213433814253527, "grad_norm": 0.8760557770729065, "learning_rate": 2.44147054846304e-06, "loss": 0.0718, "step": 37274 }, { "epoch": 0.8213654167148688, "grad_norm": 0.6009066104888916, "learning_rate": 2.440885165960833e-06, "loss": 0.0756, "step": 37275 }, { "epoch": 0.821387452004385, "grad_norm": 0.9245786666870117, "learning_rate": 2.4402998474289406e-06, "loss": 0.055, "step": 37276 }, { "epoch": 0.8214094872939012, "grad_norm": 0.6612637042999268, "learning_rate": 2.439714592870331e-06, "loss": 0.0999, "step": 37277 }, { "epoch": 0.8214315225834173, "grad_norm": 0.572372317314148, "learning_rate": 2.4391294022879944e-06, "loss": 0.0609, "step": 37278 }, { "epoch": 0.8214535578729335, "grad_norm": 0.34156250953674316, "learning_rate": 2.438544275684903e-06, "loss": 0.0762, "step": 37279 }, { "epoch": 0.8214755931624497, "grad_norm": 0.6324568390846252, "learning_rate": 2.437959213064043e-06, "loss": 0.0626, "step": 37280 }, { "epoch": 0.8214976284519658, "grad_norm": 0.4487874209880829, "learning_rate": 2.4373742144283962e-06, "loss": 0.0863, "step": 37281 }, { "epoch": 0.821519663741482, "grad_norm": 0.41153812408447266, "learning_rate": 2.436789279780934e-06, "loss": 0.0422, "step": 37282 }, { "epoch": 0.8215416990309982, "grad_norm": 0.5119966864585876, "learning_rate": 2.4362044091246433e-06, "loss": 0.0427, "step": 37283 }, { "epoch": 0.8215637343205143, "grad_norm": 0.45006063580513, "learning_rate": 2.435619602462499e-06, "loss": 0.0436, "step": 37284 }, { "epoch": 0.8215857696100305, "grad_norm": 0.6475238800048828, "learning_rate": 2.435034859797487e-06, "loss": 0.062, "step": 37285 }, { "epoch": 0.8216078048995467, "grad_norm": 0.5194252729415894, "learning_rate": 2.4344501811325752e-06, "loss": 0.0524, "step": 37286 }, { "epoch": 0.8216298401890628, "grad_norm": 0.07834985852241516, "learning_rate": 2.4338655664707465e-06, "loss": 0.0503, "step": 37287 }, { "epoch": 0.821651875478579, "grad_norm": 0.17210985720157623, "learning_rate": 2.4332810158149832e-06, "loss": 0.0675, "step": 37288 }, { "epoch": 0.8216739107680952, "grad_norm": 0.8535737991333008, "learning_rate": 2.4326965291682563e-06, "loss": 0.0436, "step": 37289 }, { "epoch": 0.8216959460576113, "grad_norm": 0.5088138580322266, "learning_rate": 2.432112106533543e-06, "loss": 0.0219, "step": 37290 }, { "epoch": 0.8217179813471275, "grad_norm": 0.41396471858024597, "learning_rate": 2.4315277479138235e-06, "loss": 0.0771, "step": 37291 }, { "epoch": 0.8217400166366435, "grad_norm": 0.41971951723098755, "learning_rate": 2.430943453312075e-06, "loss": 0.0545, "step": 37292 }, { "epoch": 0.8217620519261597, "grad_norm": 0.4594171345233917, "learning_rate": 2.43035922273127e-06, "loss": 0.0522, "step": 37293 }, { "epoch": 0.8217840872156759, "grad_norm": 0.3676769733428955, "learning_rate": 2.4297750561743843e-06, "loss": 0.0371, "step": 37294 }, { "epoch": 0.821806122505192, "grad_norm": 0.9011633396148682, "learning_rate": 2.429190953644399e-06, "loss": 0.0554, "step": 37295 }, { "epoch": 0.8218281577947082, "grad_norm": 0.7062234282493591, "learning_rate": 2.428606915144282e-06, "loss": 0.0598, "step": 37296 }, { "epoch": 0.8218501930842244, "grad_norm": 0.6173629760742188, "learning_rate": 2.4280229406770136e-06, "loss": 0.0757, "step": 37297 }, { "epoch": 0.8218722283737405, "grad_norm": 0.5474081039428711, "learning_rate": 2.4274390302455605e-06, "loss": 0.05, "step": 37298 }, { "epoch": 0.8218942636632567, "grad_norm": 0.5468195676803589, "learning_rate": 2.4268551838529086e-06, "loss": 0.0585, "step": 37299 }, { "epoch": 0.8219162989527729, "grad_norm": 0.6456670165061951, "learning_rate": 2.4262714015020215e-06, "loss": 0.0491, "step": 37300 }, { "epoch": 0.821938334242289, "grad_norm": 0.7894182801246643, "learning_rate": 2.4256876831958825e-06, "loss": 0.0485, "step": 37301 }, { "epoch": 0.8219603695318052, "grad_norm": 0.681779146194458, "learning_rate": 2.425104028937454e-06, "loss": 0.0628, "step": 37302 }, { "epoch": 0.8219824048213213, "grad_norm": 0.5061388611793518, "learning_rate": 2.424520438729713e-06, "loss": 0.0545, "step": 37303 }, { "epoch": 0.8220044401108375, "grad_norm": 0.5138854384422302, "learning_rate": 2.4239369125756367e-06, "loss": 0.0573, "step": 37304 }, { "epoch": 0.8220264754003537, "grad_norm": 0.5675135850906372, "learning_rate": 2.423353450478187e-06, "loss": 0.0619, "step": 37305 }, { "epoch": 0.8220485106898698, "grad_norm": 0.7574143409729004, "learning_rate": 2.4227700524403497e-06, "loss": 0.0619, "step": 37306 }, { "epoch": 0.822070545979386, "grad_norm": 0.46681222319602966, "learning_rate": 2.4221867184650844e-06, "loss": 0.0441, "step": 37307 }, { "epoch": 0.8220925812689022, "grad_norm": 0.6802396774291992, "learning_rate": 2.4216034485553716e-06, "loss": 0.0757, "step": 37308 }, { "epoch": 0.8221146165584183, "grad_norm": 0.6208813786506653, "learning_rate": 2.421020242714173e-06, "loss": 0.0605, "step": 37309 }, { "epoch": 0.8221366518479345, "grad_norm": 0.35845911502838135, "learning_rate": 2.4204371009444642e-06, "loss": 0.0474, "step": 37310 }, { "epoch": 0.8221586871374507, "grad_norm": 0.34462282061576843, "learning_rate": 2.4198540232492196e-06, "loss": 0.0555, "step": 37311 }, { "epoch": 0.8221807224269668, "grad_norm": 0.5839429497718811, "learning_rate": 2.419271009631398e-06, "loss": 0.0617, "step": 37312 }, { "epoch": 0.822202757716483, "grad_norm": 1.0045111179351807, "learning_rate": 2.4186880600939776e-06, "loss": 0.0631, "step": 37313 }, { "epoch": 0.8222247930059992, "grad_norm": 0.7261025905609131, "learning_rate": 2.418105174639926e-06, "loss": 0.0642, "step": 37314 }, { "epoch": 0.8222468282955153, "grad_norm": 0.45970454812049866, "learning_rate": 2.4175223532722133e-06, "loss": 0.0511, "step": 37315 }, { "epoch": 0.8222688635850314, "grad_norm": 0.4372844398021698, "learning_rate": 2.416939595993805e-06, "loss": 0.073, "step": 37316 }, { "epoch": 0.8222908988745475, "grad_norm": 0.29117879271507263, "learning_rate": 2.41635690280767e-06, "loss": 0.0662, "step": 37317 }, { "epoch": 0.8223129341640637, "grad_norm": 0.3620057702064514, "learning_rate": 2.415774273716777e-06, "loss": 0.0706, "step": 37318 }, { "epoch": 0.8223349694535799, "grad_norm": 0.7451481223106384, "learning_rate": 2.4151917087240977e-06, "loss": 0.0776, "step": 37319 }, { "epoch": 0.822357004743096, "grad_norm": 0.5967095494270325, "learning_rate": 2.414609207832594e-06, "loss": 0.0543, "step": 37320 }, { "epoch": 0.8223790400326122, "grad_norm": 0.3577162027359009, "learning_rate": 2.4140267710452298e-06, "loss": 0.0555, "step": 37321 }, { "epoch": 0.8224010753221284, "grad_norm": 0.8420333862304688, "learning_rate": 2.413444398364983e-06, "loss": 0.0447, "step": 37322 }, { "epoch": 0.8224231106116445, "grad_norm": 0.787250816822052, "learning_rate": 2.4128620897948085e-06, "loss": 0.0747, "step": 37323 }, { "epoch": 0.8224451459011607, "grad_norm": 0.49738365411758423, "learning_rate": 2.412279845337682e-06, "loss": 0.0493, "step": 37324 }, { "epoch": 0.8224671811906769, "grad_norm": 0.7241270542144775, "learning_rate": 2.411697664996555e-06, "loss": 0.0719, "step": 37325 }, { "epoch": 0.822489216480193, "grad_norm": 0.3638457655906677, "learning_rate": 2.4111155487744115e-06, "loss": 0.0588, "step": 37326 }, { "epoch": 0.8225112517697092, "grad_norm": 0.27902060747146606, "learning_rate": 2.410533496674204e-06, "loss": 0.0351, "step": 37327 }, { "epoch": 0.8225332870592253, "grad_norm": 0.6312172412872314, "learning_rate": 2.4099515086988984e-06, "loss": 0.0351, "step": 37328 }, { "epoch": 0.8225553223487415, "grad_norm": 0.42138081789016724, "learning_rate": 2.409369584851466e-06, "loss": 0.0484, "step": 37329 }, { "epoch": 0.8225773576382577, "grad_norm": 0.6646273732185364, "learning_rate": 2.4087877251348624e-06, "loss": 0.0602, "step": 37330 }, { "epoch": 0.8225993929277738, "grad_norm": 0.7261884808540344, "learning_rate": 2.408205929552057e-06, "loss": 0.0437, "step": 37331 }, { "epoch": 0.82262142821729, "grad_norm": 0.4923265278339386, "learning_rate": 2.407624198106006e-06, "loss": 0.0572, "step": 37332 }, { "epoch": 0.8226434635068062, "grad_norm": 0.4913310110569, "learning_rate": 2.4070425307996837e-06, "loss": 0.0934, "step": 37333 }, { "epoch": 0.8226654987963223, "grad_norm": 0.6720952987670898, "learning_rate": 2.406460927636042e-06, "loss": 0.0771, "step": 37334 }, { "epoch": 0.8226875340858385, "grad_norm": 1.0032048225402832, "learning_rate": 2.405879388618053e-06, "loss": 0.0602, "step": 37335 }, { "epoch": 0.8227095693753547, "grad_norm": 0.4330136179924011, "learning_rate": 2.4052979137486704e-06, "loss": 0.045, "step": 37336 }, { "epoch": 0.8227316046648708, "grad_norm": 0.35894158482551575, "learning_rate": 2.404716503030858e-06, "loss": 0.0439, "step": 37337 }, { "epoch": 0.822753639954387, "grad_norm": 0.2989576458930969, "learning_rate": 2.404135156467583e-06, "loss": 0.0384, "step": 37338 }, { "epoch": 0.8227756752439032, "grad_norm": 0.6631913781166077, "learning_rate": 2.4035538740617986e-06, "loss": 0.0625, "step": 37339 }, { "epoch": 0.8227977105334193, "grad_norm": 0.6305283904075623, "learning_rate": 2.402972655816468e-06, "loss": 0.0837, "step": 37340 }, { "epoch": 0.8228197458229354, "grad_norm": 0.8930782079696655, "learning_rate": 2.4023915017345534e-06, "loss": 0.0571, "step": 37341 }, { "epoch": 0.8228417811124515, "grad_norm": 0.7881838083267212, "learning_rate": 2.4018104118190192e-06, "loss": 0.0845, "step": 37342 }, { "epoch": 0.8228638164019677, "grad_norm": 0.5851837992668152, "learning_rate": 2.4012293860728153e-06, "loss": 0.0799, "step": 37343 }, { "epoch": 0.8228858516914839, "grad_norm": 0.606227457523346, "learning_rate": 2.400648424498905e-06, "loss": 0.0666, "step": 37344 }, { "epoch": 0.822907886981, "grad_norm": 0.5815481543540955, "learning_rate": 2.4000675271002537e-06, "loss": 0.0766, "step": 37345 }, { "epoch": 0.8229299222705162, "grad_norm": 0.9863103628158569, "learning_rate": 2.3994866938798114e-06, "loss": 0.0698, "step": 37346 }, { "epoch": 0.8229519575600324, "grad_norm": 0.2675573527812958, "learning_rate": 2.398905924840545e-06, "loss": 0.0364, "step": 37347 }, { "epoch": 0.8229739928495485, "grad_norm": 0.6433087587356567, "learning_rate": 2.3983252199853993e-06, "loss": 0.0931, "step": 37348 }, { "epoch": 0.8229960281390647, "grad_norm": 0.6266147494316101, "learning_rate": 2.397744579317348e-06, "loss": 0.0851, "step": 37349 }, { "epoch": 0.8230180634285809, "grad_norm": 0.5395217537879944, "learning_rate": 2.3971640028393383e-06, "loss": 0.0383, "step": 37350 }, { "epoch": 0.823040098718097, "grad_norm": 0.7041300535202026, "learning_rate": 2.3965834905543294e-06, "loss": 0.0659, "step": 37351 }, { "epoch": 0.8230621340076132, "grad_norm": 0.7792844772338867, "learning_rate": 2.396003042465284e-06, "loss": 0.0562, "step": 37352 }, { "epoch": 0.8230841692971294, "grad_norm": 0.8537984490394592, "learning_rate": 2.39542265857515e-06, "loss": 0.0781, "step": 37353 }, { "epoch": 0.8231062045866455, "grad_norm": 0.5732394456863403, "learning_rate": 2.3948423388868913e-06, "loss": 0.0558, "step": 37354 }, { "epoch": 0.8231282398761617, "grad_norm": 0.5377519726753235, "learning_rate": 2.3942620834034523e-06, "loss": 0.0415, "step": 37355 }, { "epoch": 0.8231502751656778, "grad_norm": 0.572277307510376, "learning_rate": 2.393681892127804e-06, "loss": 0.05, "step": 37356 }, { "epoch": 0.823172310455194, "grad_norm": 0.5194791555404663, "learning_rate": 2.3931017650628913e-06, "loss": 0.0677, "step": 37357 }, { "epoch": 0.8231943457447102, "grad_norm": 0.5505660176277161, "learning_rate": 2.392521702211674e-06, "loss": 0.0532, "step": 37358 }, { "epoch": 0.8232163810342263, "grad_norm": 0.46224135160446167, "learning_rate": 2.3919417035771006e-06, "loss": 0.0711, "step": 37359 }, { "epoch": 0.8232384163237425, "grad_norm": 0.3895817697048187, "learning_rate": 2.3913617691621287e-06, "loss": 0.0568, "step": 37360 }, { "epoch": 0.8232604516132587, "grad_norm": 0.4613093435764313, "learning_rate": 2.390781898969717e-06, "loss": 0.0512, "step": 37361 }, { "epoch": 0.8232824869027748, "grad_norm": 0.5071762800216675, "learning_rate": 2.3902020930028107e-06, "loss": 0.0572, "step": 37362 }, { "epoch": 0.823304522192291, "grad_norm": 0.5441983342170715, "learning_rate": 2.389622351264367e-06, "loss": 0.0696, "step": 37363 }, { "epoch": 0.8233265574818072, "grad_norm": 0.7115218639373779, "learning_rate": 2.3890426737573394e-06, "loss": 0.0968, "step": 37364 }, { "epoch": 0.8233485927713233, "grad_norm": 0.6244207620620728, "learning_rate": 2.3884630604846836e-06, "loss": 0.0628, "step": 37365 }, { "epoch": 0.8233706280608394, "grad_norm": 0.5677216053009033, "learning_rate": 2.3878835114493447e-06, "loss": 0.0543, "step": 37366 }, { "epoch": 0.8233926633503555, "grad_norm": 0.5619331002235413, "learning_rate": 2.3873040266542775e-06, "loss": 0.0579, "step": 37367 }, { "epoch": 0.8234146986398717, "grad_norm": 0.7496966123580933, "learning_rate": 2.386724606102439e-06, "loss": 0.0576, "step": 37368 }, { "epoch": 0.8234367339293879, "grad_norm": 0.7230382561683655, "learning_rate": 2.3861452497967705e-06, "loss": 0.0704, "step": 37369 }, { "epoch": 0.823458769218904, "grad_norm": 0.5726305842399597, "learning_rate": 2.3855659577402333e-06, "loss": 0.0502, "step": 37370 }, { "epoch": 0.8234808045084202, "grad_norm": 0.7624862194061279, "learning_rate": 2.3849867299357665e-06, "loss": 0.0719, "step": 37371 }, { "epoch": 0.8235028397979364, "grad_norm": 0.4755725860595703, "learning_rate": 2.3844075663863323e-06, "loss": 0.0405, "step": 37372 }, { "epoch": 0.8235248750874525, "grad_norm": 0.4879803955554962, "learning_rate": 2.3838284670948723e-06, "loss": 0.0493, "step": 37373 }, { "epoch": 0.8235469103769687, "grad_norm": 0.398746520280838, "learning_rate": 2.3832494320643407e-06, "loss": 0.0379, "step": 37374 }, { "epoch": 0.8235689456664849, "grad_norm": 0.6862223744392395, "learning_rate": 2.382670461297688e-06, "loss": 0.0382, "step": 37375 }, { "epoch": 0.823590980956001, "grad_norm": 0.8378323912620544, "learning_rate": 2.3820915547978573e-06, "loss": 0.0638, "step": 37376 }, { "epoch": 0.8236130162455172, "grad_norm": 0.5823001861572266, "learning_rate": 2.3815127125678025e-06, "loss": 0.0762, "step": 37377 }, { "epoch": 0.8236350515350334, "grad_norm": 0.4374631941318512, "learning_rate": 2.380933934610465e-06, "loss": 0.0579, "step": 37378 }, { "epoch": 0.8236570868245495, "grad_norm": 0.6494740843772888, "learning_rate": 2.380355220928804e-06, "loss": 0.0581, "step": 37379 }, { "epoch": 0.8236791221140657, "grad_norm": 0.471587210893631, "learning_rate": 2.3797765715257575e-06, "loss": 0.0475, "step": 37380 }, { "epoch": 0.8237011574035819, "grad_norm": 0.4308115541934967, "learning_rate": 2.3791979864042805e-06, "loss": 0.074, "step": 37381 }, { "epoch": 0.823723192693098, "grad_norm": 0.6916276812553406, "learning_rate": 2.3786194655673115e-06, "loss": 0.0339, "step": 37382 }, { "epoch": 0.8237452279826142, "grad_norm": 0.3253720700740814, "learning_rate": 2.378041009017803e-06, "loss": 0.0393, "step": 37383 }, { "epoch": 0.8237672632721303, "grad_norm": 0.7181137204170227, "learning_rate": 2.3774626167587023e-06, "loss": 0.0591, "step": 37384 }, { "epoch": 0.8237892985616465, "grad_norm": 0.7274476885795593, "learning_rate": 2.3768842887929506e-06, "loss": 0.0407, "step": 37385 }, { "epoch": 0.8238113338511627, "grad_norm": 0.8321557641029358, "learning_rate": 2.3763060251234967e-06, "loss": 0.0525, "step": 37386 }, { "epoch": 0.8238333691406788, "grad_norm": 1.1157220602035522, "learning_rate": 2.375727825753284e-06, "loss": 0.0559, "step": 37387 }, { "epoch": 0.823855404430195, "grad_norm": 0.5553692579269409, "learning_rate": 2.3751496906852636e-06, "loss": 0.0731, "step": 37388 }, { "epoch": 0.8238774397197112, "grad_norm": 0.6733683943748474, "learning_rate": 2.374571619922373e-06, "loss": 0.0621, "step": 37389 }, { "epoch": 0.8238994750092272, "grad_norm": 0.43126896023750305, "learning_rate": 2.373993613467559e-06, "loss": 0.0416, "step": 37390 }, { "epoch": 0.8239215102987434, "grad_norm": 0.5278862714767456, "learning_rate": 2.373415671323768e-06, "loss": 0.0684, "step": 37391 }, { "epoch": 0.8239435455882596, "grad_norm": 0.44792836904525757, "learning_rate": 2.37283779349394e-06, "loss": 0.0718, "step": 37392 }, { "epoch": 0.8239655808777757, "grad_norm": 0.7702726125717163, "learning_rate": 2.3722599799810233e-06, "loss": 0.0367, "step": 37393 }, { "epoch": 0.8239876161672919, "grad_norm": 0.9107750654220581, "learning_rate": 2.3716822307879513e-06, "loss": 0.0774, "step": 37394 }, { "epoch": 0.824009651456808, "grad_norm": 0.47166842222213745, "learning_rate": 2.37110454591768e-06, "loss": 0.0829, "step": 37395 }, { "epoch": 0.8240316867463242, "grad_norm": 0.5343930721282959, "learning_rate": 2.370526925373143e-06, "loss": 0.0584, "step": 37396 }, { "epoch": 0.8240537220358404, "grad_norm": 0.6652653217315674, "learning_rate": 2.3699493691572836e-06, "loss": 0.0636, "step": 37397 }, { "epoch": 0.8240757573253565, "grad_norm": 0.813229501247406, "learning_rate": 2.369371877273049e-06, "loss": 0.0647, "step": 37398 }, { "epoch": 0.8240977926148727, "grad_norm": 0.45101553201675415, "learning_rate": 2.3687944497233744e-06, "loss": 0.0568, "step": 37399 }, { "epoch": 0.8241198279043889, "grad_norm": 0.6059532761573792, "learning_rate": 2.3682170865112047e-06, "loss": 0.0756, "step": 37400 }, { "epoch": 0.824141863193905, "grad_norm": 0.5975003242492676, "learning_rate": 2.367639787639473e-06, "loss": 0.0309, "step": 37401 }, { "epoch": 0.8241638984834212, "grad_norm": 0.7450557351112366, "learning_rate": 2.3670625531111317e-06, "loss": 0.0751, "step": 37402 }, { "epoch": 0.8241859337729374, "grad_norm": 0.970641553401947, "learning_rate": 2.3664853829291124e-06, "loss": 0.0638, "step": 37403 }, { "epoch": 0.8242079690624535, "grad_norm": 0.5518487095832825, "learning_rate": 2.36590827709636e-06, "loss": 0.0536, "step": 37404 }, { "epoch": 0.8242300043519697, "grad_norm": 0.4062383770942688, "learning_rate": 2.3653312356158103e-06, "loss": 0.0514, "step": 37405 }, { "epoch": 0.8242520396414859, "grad_norm": 0.5322331190109253, "learning_rate": 2.364754258490403e-06, "loss": 0.0785, "step": 37406 }, { "epoch": 0.824274074931002, "grad_norm": 0.7573494911193848, "learning_rate": 2.3641773457230816e-06, "loss": 0.1078, "step": 37407 }, { "epoch": 0.8242961102205182, "grad_norm": 0.42990079522132874, "learning_rate": 2.3636004973167765e-06, "loss": 0.0373, "step": 37408 }, { "epoch": 0.8243181455100344, "grad_norm": 0.3851125240325928, "learning_rate": 2.3630237132744302e-06, "loss": 0.0405, "step": 37409 }, { "epoch": 0.8243401807995505, "grad_norm": 0.5382395386695862, "learning_rate": 2.362446993598981e-06, "loss": 0.0519, "step": 37410 }, { "epoch": 0.8243622160890667, "grad_norm": 0.40431681275367737, "learning_rate": 2.36187033829337e-06, "loss": 0.0498, "step": 37411 }, { "epoch": 0.8243842513785828, "grad_norm": 0.5711601376533508, "learning_rate": 2.3612937473605256e-06, "loss": 0.0666, "step": 37412 }, { "epoch": 0.824406286668099, "grad_norm": 0.46527066826820374, "learning_rate": 2.360717220803391e-06, "loss": 0.0681, "step": 37413 }, { "epoch": 0.8244283219576152, "grad_norm": 0.8234582543373108, "learning_rate": 2.360140758624904e-06, "loss": 0.0802, "step": 37414 }, { "epoch": 0.8244503572471312, "grad_norm": 0.2764383554458618, "learning_rate": 2.3595643608279956e-06, "loss": 0.0678, "step": 37415 }, { "epoch": 0.8244723925366474, "grad_norm": 0.4540911912918091, "learning_rate": 2.358988027415606e-06, "loss": 0.0573, "step": 37416 }, { "epoch": 0.8244944278261636, "grad_norm": 0.2981359660625458, "learning_rate": 2.358411758390664e-06, "loss": 0.0474, "step": 37417 }, { "epoch": 0.8245164631156797, "grad_norm": 0.37341204285621643, "learning_rate": 2.3578355537561165e-06, "loss": 0.0557, "step": 37418 }, { "epoch": 0.8245384984051959, "grad_norm": 0.7102842926979065, "learning_rate": 2.357259413514888e-06, "loss": 0.0811, "step": 37419 }, { "epoch": 0.824560533694712, "grad_norm": 0.58575838804245, "learning_rate": 2.356683337669916e-06, "loss": 0.0585, "step": 37420 }, { "epoch": 0.8245825689842282, "grad_norm": 0.8219507932662964, "learning_rate": 2.35610732622414e-06, "loss": 0.0503, "step": 37421 }, { "epoch": 0.8246046042737444, "grad_norm": 0.5787466764450073, "learning_rate": 2.355531379180486e-06, "loss": 0.0687, "step": 37422 }, { "epoch": 0.8246266395632605, "grad_norm": 0.5993164777755737, "learning_rate": 2.354955496541895e-06, "loss": 0.07, "step": 37423 }, { "epoch": 0.8246486748527767, "grad_norm": 0.5837080478668213, "learning_rate": 2.354379678311288e-06, "loss": 0.0544, "step": 37424 }, { "epoch": 0.8246707101422929, "grad_norm": 0.6046508550643921, "learning_rate": 2.353803924491614e-06, "loss": 0.0846, "step": 37425 }, { "epoch": 0.824692745431809, "grad_norm": 0.7066501379013062, "learning_rate": 2.3532282350857966e-06, "loss": 0.0634, "step": 37426 }, { "epoch": 0.8247147807213252, "grad_norm": 0.7410207986831665, "learning_rate": 2.3526526100967703e-06, "loss": 0.0565, "step": 37427 }, { "epoch": 0.8247368160108414, "grad_norm": 0.47482064366340637, "learning_rate": 2.3520770495274605e-06, "loss": 0.052, "step": 37428 }, { "epoch": 0.8247588513003575, "grad_norm": 0.6554697751998901, "learning_rate": 2.3515015533808115e-06, "loss": 0.0737, "step": 37429 }, { "epoch": 0.8247808865898737, "grad_norm": 0.24341954290866852, "learning_rate": 2.3509261216597455e-06, "loss": 0.0514, "step": 37430 }, { "epoch": 0.8248029218793899, "grad_norm": 0.5066236257553101, "learning_rate": 2.350350754367197e-06, "loss": 0.0604, "step": 37431 }, { "epoch": 0.824824957168906, "grad_norm": 0.6274719834327698, "learning_rate": 2.3497754515060936e-06, "loss": 0.0697, "step": 37432 }, { "epoch": 0.8248469924584222, "grad_norm": 0.45437291264533997, "learning_rate": 2.3492002130793667e-06, "loss": 0.0358, "step": 37433 }, { "epoch": 0.8248690277479384, "grad_norm": 0.6394714117050171, "learning_rate": 2.3486250390899517e-06, "loss": 0.0528, "step": 37434 }, { "epoch": 0.8248910630374545, "grad_norm": 0.5203104019165039, "learning_rate": 2.3480499295407682e-06, "loss": 0.0419, "step": 37435 }, { "epoch": 0.8249130983269707, "grad_norm": 0.7838082909584045, "learning_rate": 2.3474748844347523e-06, "loss": 0.0571, "step": 37436 }, { "epoch": 0.8249351336164868, "grad_norm": 0.6554839611053467, "learning_rate": 2.346899903774831e-06, "loss": 0.0883, "step": 37437 }, { "epoch": 0.824957168906003, "grad_norm": 0.3291146457195282, "learning_rate": 2.3463249875639363e-06, "loss": 0.0615, "step": 37438 }, { "epoch": 0.8249792041955192, "grad_norm": 0.6790645122528076, "learning_rate": 2.3457501358049923e-06, "loss": 0.0627, "step": 37439 }, { "epoch": 0.8250012394850352, "grad_norm": 0.5982233881950378, "learning_rate": 2.3451753485009276e-06, "loss": 0.0633, "step": 37440 }, { "epoch": 0.8250232747745514, "grad_norm": 0.7431237101554871, "learning_rate": 2.344600625654674e-06, "loss": 0.0597, "step": 37441 }, { "epoch": 0.8250453100640676, "grad_norm": 1.200283408164978, "learning_rate": 2.3440259672691517e-06, "loss": 0.084, "step": 37442 }, { "epoch": 0.8250673453535837, "grad_norm": 0.4278445839881897, "learning_rate": 2.343451373347294e-06, "loss": 0.0544, "step": 37443 }, { "epoch": 0.8250893806430999, "grad_norm": 0.30015355348587036, "learning_rate": 2.3428768438920235e-06, "loss": 0.0358, "step": 37444 }, { "epoch": 0.825111415932616, "grad_norm": 0.45121780037879944, "learning_rate": 2.3423023789062727e-06, "loss": 0.043, "step": 37445 }, { "epoch": 0.8251334512221322, "grad_norm": 0.8173529505729675, "learning_rate": 2.3417279783929606e-06, "loss": 0.0834, "step": 37446 }, { "epoch": 0.8251554865116484, "grad_norm": 0.6793231964111328, "learning_rate": 2.341153642355014e-06, "loss": 0.0431, "step": 37447 }, { "epoch": 0.8251775218011645, "grad_norm": 0.5844253897666931, "learning_rate": 2.3405793707953648e-06, "loss": 0.0722, "step": 37448 }, { "epoch": 0.8251995570906807, "grad_norm": 0.5876809358596802, "learning_rate": 2.340005163716931e-06, "loss": 0.0441, "step": 37449 }, { "epoch": 0.8252215923801969, "grad_norm": 0.7323698401451111, "learning_rate": 2.339431021122642e-06, "loss": 0.0714, "step": 37450 }, { "epoch": 0.825243627669713, "grad_norm": 0.4288470149040222, "learning_rate": 2.338856943015412e-06, "loss": 0.0659, "step": 37451 }, { "epoch": 0.8252656629592292, "grad_norm": 0.9736356139183044, "learning_rate": 2.338282929398182e-06, "loss": 0.0687, "step": 37452 }, { "epoch": 0.8252876982487454, "grad_norm": 0.5709970593452454, "learning_rate": 2.3377089802738615e-06, "loss": 0.0401, "step": 37453 }, { "epoch": 0.8253097335382615, "grad_norm": 0.529131293296814, "learning_rate": 2.3371350956453844e-06, "loss": 0.0349, "step": 37454 }, { "epoch": 0.8253317688277777, "grad_norm": 0.4434538185596466, "learning_rate": 2.336561275515664e-06, "loss": 0.0645, "step": 37455 }, { "epoch": 0.8253538041172939, "grad_norm": 0.34272968769073486, "learning_rate": 2.335987519887629e-06, "loss": 0.0491, "step": 37456 }, { "epoch": 0.82537583940681, "grad_norm": 0.4916197657585144, "learning_rate": 2.3354138287642023e-06, "loss": 0.0409, "step": 37457 }, { "epoch": 0.8253978746963262, "grad_norm": 0.7104368209838867, "learning_rate": 2.334840202148302e-06, "loss": 0.0433, "step": 37458 }, { "epoch": 0.8254199099858424, "grad_norm": 0.5669918060302734, "learning_rate": 2.3342666400428513e-06, "loss": 0.052, "step": 37459 }, { "epoch": 0.8254419452753585, "grad_norm": 0.7683959603309631, "learning_rate": 2.3336931424507724e-06, "loss": 0.0717, "step": 37460 }, { "epoch": 0.8254639805648747, "grad_norm": 0.6886487603187561, "learning_rate": 2.3331197093749908e-06, "loss": 0.0588, "step": 37461 }, { "epoch": 0.8254860158543909, "grad_norm": 0.5498930215835571, "learning_rate": 2.3325463408184207e-06, "loss": 0.051, "step": 37462 }, { "epoch": 0.825508051143907, "grad_norm": 0.35257232189178467, "learning_rate": 2.331973036783983e-06, "loss": 0.0559, "step": 37463 }, { "epoch": 0.8255300864334232, "grad_norm": 0.4223114252090454, "learning_rate": 2.331399797274603e-06, "loss": 0.0565, "step": 37464 }, { "epoch": 0.8255521217229392, "grad_norm": 0.8156166672706604, "learning_rate": 2.330826622293195e-06, "loss": 0.0669, "step": 37465 }, { "epoch": 0.8255741570124554, "grad_norm": 0.6644027233123779, "learning_rate": 2.33025351184268e-06, "loss": 0.0647, "step": 37466 }, { "epoch": 0.8255961923019716, "grad_norm": 0.38958632946014404, "learning_rate": 2.3296804659259778e-06, "loss": 0.035, "step": 37467 }, { "epoch": 0.8256182275914877, "grad_norm": 0.3683455288410187, "learning_rate": 2.329107484546012e-06, "loss": 0.0447, "step": 37468 }, { "epoch": 0.8256402628810039, "grad_norm": 0.8360379934310913, "learning_rate": 2.3285345677056915e-06, "loss": 0.0742, "step": 37469 }, { "epoch": 0.8256622981705201, "grad_norm": 0.6649728417396545, "learning_rate": 2.32796171540794e-06, "loss": 0.0476, "step": 37470 }, { "epoch": 0.8256843334600362, "grad_norm": 0.1094709187746048, "learning_rate": 2.32738892765568e-06, "loss": 0.0301, "step": 37471 }, { "epoch": 0.8257063687495524, "grad_norm": 0.7384045720100403, "learning_rate": 2.3268162044518178e-06, "loss": 0.0716, "step": 37472 }, { "epoch": 0.8257284040390686, "grad_norm": 0.6742229461669922, "learning_rate": 2.3262435457992816e-06, "loss": 0.087, "step": 37473 }, { "epoch": 0.8257504393285847, "grad_norm": 0.5816571712493896, "learning_rate": 2.3256709517009755e-06, "loss": 0.049, "step": 37474 }, { "epoch": 0.8257724746181009, "grad_norm": 0.6277749538421631, "learning_rate": 2.325098422159831e-06, "loss": 0.0555, "step": 37475 }, { "epoch": 0.825794509907617, "grad_norm": 0.4327724277973175, "learning_rate": 2.324525957178752e-06, "loss": 0.0685, "step": 37476 }, { "epoch": 0.8258165451971332, "grad_norm": 0.40733879804611206, "learning_rate": 2.3239535567606646e-06, "loss": 0.0406, "step": 37477 }, { "epoch": 0.8258385804866494, "grad_norm": 0.7286877632141113, "learning_rate": 2.3233812209084764e-06, "loss": 0.0546, "step": 37478 }, { "epoch": 0.8258606157761655, "grad_norm": 0.6940365433692932, "learning_rate": 2.3228089496251037e-06, "loss": 0.0441, "step": 37479 }, { "epoch": 0.8258826510656817, "grad_norm": 0.44354721903800964, "learning_rate": 2.322236742913467e-06, "loss": 0.0338, "step": 37480 }, { "epoch": 0.8259046863551979, "grad_norm": 0.6537109017372131, "learning_rate": 2.3216646007764747e-06, "loss": 0.0582, "step": 37481 }, { "epoch": 0.825926721644714, "grad_norm": 0.6701406240463257, "learning_rate": 2.3210925232170422e-06, "loss": 0.071, "step": 37482 }, { "epoch": 0.8259487569342302, "grad_norm": 0.8852510452270508, "learning_rate": 2.3205205102380837e-06, "loss": 0.0607, "step": 37483 }, { "epoch": 0.8259707922237464, "grad_norm": 0.5826801061630249, "learning_rate": 2.3199485618425177e-06, "loss": 0.0544, "step": 37484 }, { "epoch": 0.8259928275132625, "grad_norm": 0.6639536023139954, "learning_rate": 2.3193766780332508e-06, "loss": 0.0754, "step": 37485 }, { "epoch": 0.8260148628027787, "grad_norm": 0.6587724089622498, "learning_rate": 2.3188048588131967e-06, "loss": 0.0457, "step": 37486 }, { "epoch": 0.8260368980922949, "grad_norm": 0.657227635383606, "learning_rate": 2.318233104185274e-06, "loss": 0.0756, "step": 37487 }, { "epoch": 0.826058933381811, "grad_norm": 0.5582293272018433, "learning_rate": 2.317661414152387e-06, "loss": 0.0625, "step": 37488 }, { "epoch": 0.8260809686713271, "grad_norm": 0.6724159121513367, "learning_rate": 2.317089788717451e-06, "loss": 0.0597, "step": 37489 }, { "epoch": 0.8261030039608432, "grad_norm": 0.7134541273117065, "learning_rate": 2.316518227883379e-06, "loss": 0.0649, "step": 37490 }, { "epoch": 0.8261250392503594, "grad_norm": 0.3006689250469208, "learning_rate": 2.315946731653083e-06, "loss": 0.0538, "step": 37491 }, { "epoch": 0.8261470745398756, "grad_norm": 0.6978756785392761, "learning_rate": 2.3153753000294688e-06, "loss": 0.0818, "step": 37492 }, { "epoch": 0.8261691098293917, "grad_norm": 0.35230767726898193, "learning_rate": 2.3148039330154503e-06, "loss": 0.0521, "step": 37493 }, { "epoch": 0.8261911451189079, "grad_norm": 0.6062092185020447, "learning_rate": 2.3142326306139424e-06, "loss": 0.0471, "step": 37494 }, { "epoch": 0.8262131804084241, "grad_norm": 0.7338554263114929, "learning_rate": 2.3136613928278456e-06, "loss": 0.0801, "step": 37495 }, { "epoch": 0.8262352156979402, "grad_norm": 0.4630059003829956, "learning_rate": 2.3130902196600777e-06, "loss": 0.0481, "step": 37496 }, { "epoch": 0.8262572509874564, "grad_norm": 0.6744195818901062, "learning_rate": 2.3125191111135387e-06, "loss": 0.0402, "step": 37497 }, { "epoch": 0.8262792862769726, "grad_norm": 0.5607519149780273, "learning_rate": 2.31194806719115e-06, "loss": 0.0594, "step": 37498 }, { "epoch": 0.8263013215664887, "grad_norm": 0.7885774970054626, "learning_rate": 2.311377087895809e-06, "loss": 0.0757, "step": 37499 }, { "epoch": 0.8263233568560049, "grad_norm": 0.5398817658424377, "learning_rate": 2.3108061732304343e-06, "loss": 0.0651, "step": 37500 }, { "epoch": 0.826345392145521, "grad_norm": 0.7973926663398743, "learning_rate": 2.3102353231979235e-06, "loss": 0.0704, "step": 37501 }, { "epoch": 0.8263674274350372, "grad_norm": 0.7059141993522644, "learning_rate": 2.309664537801188e-06, "loss": 0.0566, "step": 37502 }, { "epoch": 0.8263894627245534, "grad_norm": 0.4651115834712982, "learning_rate": 2.3090938170431415e-06, "loss": 0.0597, "step": 37503 }, { "epoch": 0.8264114980140695, "grad_norm": 0.4773455858230591, "learning_rate": 2.3085231609266805e-06, "loss": 0.0471, "step": 37504 }, { "epoch": 0.8264335333035857, "grad_norm": 0.7385444641113281, "learning_rate": 2.3079525694547177e-06, "loss": 0.0463, "step": 37505 }, { "epoch": 0.8264555685931019, "grad_norm": 0.6533988118171692, "learning_rate": 2.3073820426301574e-06, "loss": 0.061, "step": 37506 }, { "epoch": 0.826477603882618, "grad_norm": 0.47783276438713074, "learning_rate": 2.306811580455912e-06, "loss": 0.0543, "step": 37507 }, { "epoch": 0.8264996391721342, "grad_norm": 0.5156335234642029, "learning_rate": 2.3062411829348763e-06, "loss": 0.062, "step": 37508 }, { "epoch": 0.8265216744616504, "grad_norm": 0.7570248246192932, "learning_rate": 2.3056708500699625e-06, "loss": 0.054, "step": 37509 }, { "epoch": 0.8265437097511665, "grad_norm": 1.049636960029602, "learning_rate": 2.3051005818640787e-06, "loss": 0.1, "step": 37510 }, { "epoch": 0.8265657450406827, "grad_norm": 0.39428630471229553, "learning_rate": 2.3045303783201204e-06, "loss": 0.0502, "step": 37511 }, { "epoch": 0.8265877803301989, "grad_norm": 0.45073533058166504, "learning_rate": 2.303960239440996e-06, "loss": 0.0387, "step": 37512 }, { "epoch": 0.826609815619715, "grad_norm": 0.737946093082428, "learning_rate": 2.3033901652296108e-06, "loss": 0.0362, "step": 37513 }, { "epoch": 0.8266318509092311, "grad_norm": 0.5253621935844421, "learning_rate": 2.302820155688873e-06, "loss": 0.0342, "step": 37514 }, { "epoch": 0.8266538861987472, "grad_norm": 0.7573956251144409, "learning_rate": 2.302250210821675e-06, "loss": 0.0561, "step": 37515 }, { "epoch": 0.8266759214882634, "grad_norm": 0.5478580594062805, "learning_rate": 2.3016803306309277e-06, "loss": 0.0327, "step": 37516 }, { "epoch": 0.8266979567777796, "grad_norm": 0.6271764636039734, "learning_rate": 2.3011105151195337e-06, "loss": 0.0655, "step": 37517 }, { "epoch": 0.8267199920672957, "grad_norm": 0.8658098578453064, "learning_rate": 2.3005407642903916e-06, "loss": 0.0652, "step": 37518 }, { "epoch": 0.8267420273568119, "grad_norm": 0.6420713663101196, "learning_rate": 2.299971078146408e-06, "loss": 0.0818, "step": 37519 }, { "epoch": 0.8267640626463281, "grad_norm": 0.787913978099823, "learning_rate": 2.299401456690476e-06, "loss": 0.0847, "step": 37520 }, { "epoch": 0.8267860979358442, "grad_norm": 0.5104888081550598, "learning_rate": 2.298831899925508e-06, "loss": 0.0643, "step": 37521 }, { "epoch": 0.8268081332253604, "grad_norm": 0.5477709174156189, "learning_rate": 2.298262407854399e-06, "loss": 0.0514, "step": 37522 }, { "epoch": 0.8268301685148766, "grad_norm": 1.006021499633789, "learning_rate": 2.297692980480053e-06, "loss": 0.0638, "step": 37523 }, { "epoch": 0.8268522038043927, "grad_norm": 0.8214977383613586, "learning_rate": 2.297123617805364e-06, "loss": 0.0659, "step": 37524 }, { "epoch": 0.8268742390939089, "grad_norm": 0.5457497239112854, "learning_rate": 2.2965543198332366e-06, "loss": 0.0491, "step": 37525 }, { "epoch": 0.8268962743834251, "grad_norm": 0.8001095652580261, "learning_rate": 2.295985086566574e-06, "loss": 0.063, "step": 37526 }, { "epoch": 0.8269183096729412, "grad_norm": 0.5542411208152771, "learning_rate": 2.295415918008268e-06, "loss": 0.0483, "step": 37527 }, { "epoch": 0.8269403449624574, "grad_norm": 0.5869888067245483, "learning_rate": 2.2948468141612215e-06, "loss": 0.048, "step": 37528 }, { "epoch": 0.8269623802519736, "grad_norm": 0.597027063369751, "learning_rate": 2.2942777750283343e-06, "loss": 0.0429, "step": 37529 }, { "epoch": 0.8269844155414897, "grad_norm": 0.6507932543754578, "learning_rate": 2.2937088006125055e-06, "loss": 0.0584, "step": 37530 }, { "epoch": 0.8270064508310059, "grad_norm": 0.45581740140914917, "learning_rate": 2.293139890916626e-06, "loss": 0.044, "step": 37531 }, { "epoch": 0.827028486120522, "grad_norm": 0.49367424845695496, "learning_rate": 2.2925710459436055e-06, "loss": 0.0588, "step": 37532 }, { "epoch": 0.8270505214100382, "grad_norm": 0.6275578141212463, "learning_rate": 2.2920022656963347e-06, "loss": 0.0705, "step": 37533 }, { "epoch": 0.8270725566995544, "grad_norm": 0.4904598593711853, "learning_rate": 2.2914335501777077e-06, "loss": 0.0626, "step": 37534 }, { "epoch": 0.8270945919890705, "grad_norm": 0.5867806673049927, "learning_rate": 2.2908648993906247e-06, "loss": 0.0816, "step": 37535 }, { "epoch": 0.8271166272785867, "grad_norm": 0.7974346876144409, "learning_rate": 2.2902963133379805e-06, "loss": 0.0404, "step": 37536 }, { "epoch": 0.8271386625681029, "grad_norm": 0.7001468539237976, "learning_rate": 2.289727792022677e-06, "loss": 0.076, "step": 37537 }, { "epoch": 0.827160697857619, "grad_norm": 0.7669712901115417, "learning_rate": 2.289159335447602e-06, "loss": 0.0581, "step": 37538 }, { "epoch": 0.8271827331471351, "grad_norm": 0.9354199767112732, "learning_rate": 2.288590943615656e-06, "loss": 0.0782, "step": 37539 }, { "epoch": 0.8272047684366512, "grad_norm": 0.4737655818462372, "learning_rate": 2.2880226165297317e-06, "loss": 0.064, "step": 37540 }, { "epoch": 0.8272268037261674, "grad_norm": 0.5641043782234192, "learning_rate": 2.2874543541927274e-06, "loss": 0.0417, "step": 37541 }, { "epoch": 0.8272488390156836, "grad_norm": 0.7228168845176697, "learning_rate": 2.2868861566075368e-06, "loss": 0.0683, "step": 37542 }, { "epoch": 0.8272708743051997, "grad_norm": 0.32468846440315247, "learning_rate": 2.286318023777044e-06, "loss": 0.0537, "step": 37543 }, { "epoch": 0.8272929095947159, "grad_norm": 0.6376982927322388, "learning_rate": 2.2857499557041593e-06, "loss": 0.0704, "step": 37544 }, { "epoch": 0.8273149448842321, "grad_norm": 0.4992937445640564, "learning_rate": 2.2851819523917632e-06, "loss": 0.0564, "step": 37545 }, { "epoch": 0.8273369801737482, "grad_norm": 0.5950539708137512, "learning_rate": 2.2846140138427584e-06, "loss": 0.0523, "step": 37546 }, { "epoch": 0.8273590154632644, "grad_norm": 0.7736648321151733, "learning_rate": 2.284046140060026e-06, "loss": 0.0579, "step": 37547 }, { "epoch": 0.8273810507527806, "grad_norm": 0.45345959067344666, "learning_rate": 2.2834783310464717e-06, "loss": 0.0541, "step": 37548 }, { "epoch": 0.8274030860422967, "grad_norm": 0.6105203628540039, "learning_rate": 2.282910586804977e-06, "loss": 0.0673, "step": 37549 }, { "epoch": 0.8274251213318129, "grad_norm": 0.7067176699638367, "learning_rate": 2.2823429073384393e-06, "loss": 0.0595, "step": 37550 }, { "epoch": 0.8274471566213291, "grad_norm": 0.5650245547294617, "learning_rate": 2.281775292649753e-06, "loss": 0.0339, "step": 37551 }, { "epoch": 0.8274691919108452, "grad_norm": 0.7294296622276306, "learning_rate": 2.281207742741801e-06, "loss": 0.0585, "step": 37552 }, { "epoch": 0.8274912272003614, "grad_norm": 0.6367707252502441, "learning_rate": 2.2806402576174807e-06, "loss": 0.0529, "step": 37553 }, { "epoch": 0.8275132624898776, "grad_norm": 0.537176787853241, "learning_rate": 2.2800728372796746e-06, "loss": 0.0759, "step": 37554 }, { "epoch": 0.8275352977793937, "grad_norm": 0.8685528039932251, "learning_rate": 2.279505481731286e-06, "loss": 0.0691, "step": 37555 }, { "epoch": 0.8275573330689099, "grad_norm": 0.33766594529151917, "learning_rate": 2.278938190975192e-06, "loss": 0.0557, "step": 37556 }, { "epoch": 0.827579368358426, "grad_norm": 0.3227320611476898, "learning_rate": 2.2783709650142925e-06, "loss": 0.0705, "step": 37557 }, { "epoch": 0.8276014036479422, "grad_norm": 0.42772260308265686, "learning_rate": 2.2778038038514663e-06, "loss": 0.0594, "step": 37558 }, { "epoch": 0.8276234389374584, "grad_norm": 0.6090163588523865, "learning_rate": 2.27723670748961e-06, "loss": 0.0887, "step": 37559 }, { "epoch": 0.8276454742269745, "grad_norm": 0.4240885376930237, "learning_rate": 2.276669675931611e-06, "loss": 0.0472, "step": 37560 }, { "epoch": 0.8276675095164907, "grad_norm": 0.6413592100143433, "learning_rate": 2.2761027091803533e-06, "loss": 0.0483, "step": 37561 }, { "epoch": 0.8276895448060069, "grad_norm": 0.43117794394493103, "learning_rate": 2.2755358072387282e-06, "loss": 0.0533, "step": 37562 }, { "epoch": 0.8277115800955229, "grad_norm": 0.541472315788269, "learning_rate": 2.2749689701096223e-06, "loss": 0.0594, "step": 37563 }, { "epoch": 0.8277336153850391, "grad_norm": 0.5680387616157532, "learning_rate": 2.2744021977959255e-06, "loss": 0.0582, "step": 37564 }, { "epoch": 0.8277556506745553, "grad_norm": 0.6054340600967407, "learning_rate": 2.27383549030052e-06, "loss": 0.0675, "step": 37565 }, { "epoch": 0.8277776859640714, "grad_norm": 0.6060020923614502, "learning_rate": 2.273268847626294e-06, "loss": 0.0637, "step": 37566 }, { "epoch": 0.8277997212535876, "grad_norm": 0.5543079376220703, "learning_rate": 2.2727022697761396e-06, "loss": 0.0456, "step": 37567 }, { "epoch": 0.8278217565431037, "grad_norm": 0.7559329271316528, "learning_rate": 2.272135756752932e-06, "loss": 0.0598, "step": 37568 }, { "epoch": 0.8278437918326199, "grad_norm": 1.2777576446533203, "learning_rate": 2.271569308559566e-06, "loss": 0.0943, "step": 37569 }, { "epoch": 0.8278658271221361, "grad_norm": 0.5368375778198242, "learning_rate": 2.2710029251989162e-06, "loss": 0.0368, "step": 37570 }, { "epoch": 0.8278878624116522, "grad_norm": 0.673908531665802, "learning_rate": 2.270436606673881e-06, "loss": 0.0573, "step": 37571 }, { "epoch": 0.8279098977011684, "grad_norm": 0.5249149203300476, "learning_rate": 2.269870352987336e-06, "loss": 0.0628, "step": 37572 }, { "epoch": 0.8279319329906846, "grad_norm": 0.8496850728988647, "learning_rate": 2.269304164142168e-06, "loss": 0.0658, "step": 37573 }, { "epoch": 0.8279539682802007, "grad_norm": 0.5781410932540894, "learning_rate": 2.2687380401412634e-06, "loss": 0.0754, "step": 37574 }, { "epoch": 0.8279760035697169, "grad_norm": 0.5140408277511597, "learning_rate": 2.268171980987498e-06, "loss": 0.0436, "step": 37575 }, { "epoch": 0.8279980388592331, "grad_norm": 0.6430784463882446, "learning_rate": 2.2676059866837644e-06, "loss": 0.0375, "step": 37576 }, { "epoch": 0.8280200741487492, "grad_norm": 0.5814217925071716, "learning_rate": 2.2670400572329358e-06, "loss": 0.0647, "step": 37577 }, { "epoch": 0.8280421094382654, "grad_norm": 0.6614038944244385, "learning_rate": 2.2664741926379062e-06, "loss": 0.0553, "step": 37578 }, { "epoch": 0.8280641447277816, "grad_norm": 0.7793545722961426, "learning_rate": 2.265908392901547e-06, "loss": 0.0633, "step": 37579 }, { "epoch": 0.8280861800172977, "grad_norm": 0.38235652446746826, "learning_rate": 2.26534265802675e-06, "loss": 0.0554, "step": 37580 }, { "epoch": 0.8281082153068139, "grad_norm": 0.5389910340309143, "learning_rate": 2.264776988016387e-06, "loss": 0.0396, "step": 37581 }, { "epoch": 0.82813025059633, "grad_norm": 0.7648603320121765, "learning_rate": 2.264211382873346e-06, "loss": 0.0631, "step": 37582 }, { "epoch": 0.8281522858858462, "grad_norm": 0.41393235325813293, "learning_rate": 2.263645842600507e-06, "loss": 0.0599, "step": 37583 }, { "epoch": 0.8281743211753624, "grad_norm": 0.7790719866752625, "learning_rate": 2.263080367200747e-06, "loss": 0.0727, "step": 37584 }, { "epoch": 0.8281963564648785, "grad_norm": 0.5183695554733276, "learning_rate": 2.262514956676949e-06, "loss": 0.0332, "step": 37585 }, { "epoch": 0.8282183917543947, "grad_norm": 0.8103477358818054, "learning_rate": 2.2619496110319924e-06, "loss": 0.0546, "step": 37586 }, { "epoch": 0.8282404270439109, "grad_norm": 0.4218199551105499, "learning_rate": 2.2613843302687608e-06, "loss": 0.0674, "step": 37587 }, { "epoch": 0.8282624623334269, "grad_norm": 0.27472206950187683, "learning_rate": 2.260819114390127e-06, "loss": 0.0539, "step": 37588 }, { "epoch": 0.8282844976229431, "grad_norm": 1.1531684398651123, "learning_rate": 2.2602539633989717e-06, "loss": 0.0995, "step": 37589 }, { "epoch": 0.8283065329124593, "grad_norm": 0.6104811429977417, "learning_rate": 2.2596888772981776e-06, "loss": 0.0654, "step": 37590 }, { "epoch": 0.8283285682019754, "grad_norm": 0.7893813252449036, "learning_rate": 2.259123856090618e-06, "loss": 0.0816, "step": 37591 }, { "epoch": 0.8283506034914916, "grad_norm": 0.7406895160675049, "learning_rate": 2.258558899779175e-06, "loss": 0.0546, "step": 37592 }, { "epoch": 0.8283726387810078, "grad_norm": 0.49602484703063965, "learning_rate": 2.2579940083667167e-06, "loss": 0.0482, "step": 37593 }, { "epoch": 0.8283946740705239, "grad_norm": 0.28714028000831604, "learning_rate": 2.2574291818561356e-06, "loss": 0.0388, "step": 37594 }, { "epoch": 0.8284167093600401, "grad_norm": 0.42783284187316895, "learning_rate": 2.2568644202502974e-06, "loss": 0.0477, "step": 37595 }, { "epoch": 0.8284387446495562, "grad_norm": 0.4992193877696991, "learning_rate": 2.256299723552082e-06, "loss": 0.0617, "step": 37596 }, { "epoch": 0.8284607799390724, "grad_norm": 0.3190675377845764, "learning_rate": 2.2557350917643687e-06, "loss": 0.0586, "step": 37597 }, { "epoch": 0.8284828152285886, "grad_norm": 1.3168314695358276, "learning_rate": 2.2551705248900286e-06, "loss": 0.0734, "step": 37598 }, { "epoch": 0.8285048505181047, "grad_norm": 0.5117694139480591, "learning_rate": 2.254606022931942e-06, "loss": 0.0552, "step": 37599 }, { "epoch": 0.8285268858076209, "grad_norm": 0.6493894457817078, "learning_rate": 2.2540415858929742e-06, "loss": 0.043, "step": 37600 }, { "epoch": 0.8285489210971371, "grad_norm": 0.9637489914894104, "learning_rate": 2.253477213776014e-06, "loss": 0.0623, "step": 37601 }, { "epoch": 0.8285709563866532, "grad_norm": 0.6424643397331238, "learning_rate": 2.2529129065839277e-06, "loss": 0.0465, "step": 37602 }, { "epoch": 0.8285929916761694, "grad_norm": 0.4673328697681427, "learning_rate": 2.2523486643195927e-06, "loss": 0.0488, "step": 37603 }, { "epoch": 0.8286150269656856, "grad_norm": 0.958918035030365, "learning_rate": 2.2517844869858807e-06, "loss": 0.0626, "step": 37604 }, { "epoch": 0.8286370622552017, "grad_norm": 0.4803304374217987, "learning_rate": 2.2512203745856654e-06, "loss": 0.0372, "step": 37605 }, { "epoch": 0.8286590975447179, "grad_norm": 0.623548686504364, "learning_rate": 2.2506563271218245e-06, "loss": 0.0713, "step": 37606 }, { "epoch": 0.8286811328342341, "grad_norm": 0.7248535752296448, "learning_rate": 2.2500923445972243e-06, "loss": 0.0768, "step": 37607 }, { "epoch": 0.8287031681237502, "grad_norm": 0.22553148865699768, "learning_rate": 2.249528427014741e-06, "loss": 0.0522, "step": 37608 }, { "epoch": 0.8287252034132664, "grad_norm": 0.8389946222305298, "learning_rate": 2.248964574377247e-06, "loss": 0.0867, "step": 37609 }, { "epoch": 0.8287472387027826, "grad_norm": 0.6372729539871216, "learning_rate": 2.2484007866876167e-06, "loss": 0.0694, "step": 37610 }, { "epoch": 0.8287692739922987, "grad_norm": 0.5850512981414795, "learning_rate": 2.247837063948716e-06, "loss": 0.0575, "step": 37611 }, { "epoch": 0.8287913092818149, "grad_norm": 0.718826413154602, "learning_rate": 2.2472734061634197e-06, "loss": 0.0678, "step": 37612 }, { "epoch": 0.8288133445713309, "grad_norm": 0.9760429859161377, "learning_rate": 2.246709813334602e-06, "loss": 0.0648, "step": 37613 }, { "epoch": 0.8288353798608471, "grad_norm": 0.6399453282356262, "learning_rate": 2.246146285465127e-06, "loss": 0.0614, "step": 37614 }, { "epoch": 0.8288574151503633, "grad_norm": 0.7388128042221069, "learning_rate": 2.245582822557871e-06, "loss": 0.0619, "step": 37615 }, { "epoch": 0.8288794504398794, "grad_norm": 0.5579478144645691, "learning_rate": 2.2450194246156937e-06, "loss": 0.0606, "step": 37616 }, { "epoch": 0.8289014857293956, "grad_norm": 0.6029030680656433, "learning_rate": 2.2444560916414804e-06, "loss": 0.0409, "step": 37617 }, { "epoch": 0.8289235210189118, "grad_norm": 0.692093551158905, "learning_rate": 2.2438928236380874e-06, "loss": 0.0694, "step": 37618 }, { "epoch": 0.8289455563084279, "grad_norm": 0.81586092710495, "learning_rate": 2.243329620608389e-06, "loss": 0.061, "step": 37619 }, { "epoch": 0.8289675915979441, "grad_norm": 0.7429302930831909, "learning_rate": 2.2427664825552586e-06, "loss": 0.0614, "step": 37620 }, { "epoch": 0.8289896268874603, "grad_norm": 0.546447217464447, "learning_rate": 2.2422034094815546e-06, "loss": 0.0432, "step": 37621 }, { "epoch": 0.8290116621769764, "grad_norm": 0.6270520091056824, "learning_rate": 2.2416404013901537e-06, "loss": 0.0633, "step": 37622 }, { "epoch": 0.8290336974664926, "grad_norm": 0.5247234106063843, "learning_rate": 2.2410774582839117e-06, "loss": 0.0639, "step": 37623 }, { "epoch": 0.8290557327560087, "grad_norm": 0.5041602849960327, "learning_rate": 2.240514580165713e-06, "loss": 0.0468, "step": 37624 }, { "epoch": 0.8290777680455249, "grad_norm": 0.5479490756988525, "learning_rate": 2.23995176703841e-06, "loss": 0.0702, "step": 37625 }, { "epoch": 0.8290998033350411, "grad_norm": 0.48184606432914734, "learning_rate": 2.239389018904881e-06, "loss": 0.045, "step": 37626 }, { "epoch": 0.8291218386245572, "grad_norm": 0.4524904191493988, "learning_rate": 2.238826335767982e-06, "loss": 0.0406, "step": 37627 }, { "epoch": 0.8291438739140734, "grad_norm": 0.6936922669410706, "learning_rate": 2.2382637176305832e-06, "loss": 0.0758, "step": 37628 }, { "epoch": 0.8291659092035896, "grad_norm": 0.9635453820228577, "learning_rate": 2.2377011644955547e-06, "loss": 0.0632, "step": 37629 }, { "epoch": 0.8291879444931057, "grad_norm": 0.25661584734916687, "learning_rate": 2.237138676365754e-06, "loss": 0.033, "step": 37630 }, { "epoch": 0.8292099797826219, "grad_norm": 0.6639823317527771, "learning_rate": 2.2365762532440493e-06, "loss": 0.0561, "step": 37631 }, { "epoch": 0.8292320150721381, "grad_norm": 0.5539794564247131, "learning_rate": 2.2360138951333077e-06, "loss": 0.0639, "step": 37632 }, { "epoch": 0.8292540503616542, "grad_norm": 0.3617092967033386, "learning_rate": 2.2354516020363936e-06, "loss": 0.0407, "step": 37633 }, { "epoch": 0.8292760856511704, "grad_norm": 0.5209371447563171, "learning_rate": 2.2348893739561667e-06, "loss": 0.097, "step": 37634 }, { "epoch": 0.8292981209406866, "grad_norm": 0.5133836269378662, "learning_rate": 2.2343272108954944e-06, "loss": 0.0489, "step": 37635 }, { "epoch": 0.8293201562302027, "grad_norm": 0.3415355682373047, "learning_rate": 2.233765112857241e-06, "loss": 0.0738, "step": 37636 }, { "epoch": 0.8293421915197188, "grad_norm": 1.0652631521224976, "learning_rate": 2.2332030798442647e-06, "loss": 0.0624, "step": 37637 }, { "epoch": 0.8293642268092349, "grad_norm": 1.1601155996322632, "learning_rate": 2.232641111859435e-06, "loss": 0.0726, "step": 37638 }, { "epoch": 0.8293862620987511, "grad_norm": 0.37275952100753784, "learning_rate": 2.2320792089056037e-06, "loss": 0.0575, "step": 37639 }, { "epoch": 0.8294082973882673, "grad_norm": 0.6201568245887756, "learning_rate": 2.231517370985647e-06, "loss": 0.0693, "step": 37640 }, { "epoch": 0.8294303326777834, "grad_norm": 0.5622377991676331, "learning_rate": 2.2309555981024156e-06, "loss": 0.0415, "step": 37641 }, { "epoch": 0.8294523679672996, "grad_norm": 0.7642540335655212, "learning_rate": 2.230393890258774e-06, "loss": 0.0495, "step": 37642 }, { "epoch": 0.8294744032568158, "grad_norm": 0.3192306160926819, "learning_rate": 2.2298322474575837e-06, "loss": 0.072, "step": 37643 }, { "epoch": 0.8294964385463319, "grad_norm": 0.402340292930603, "learning_rate": 2.2292706697017094e-06, "loss": 0.0628, "step": 37644 }, { "epoch": 0.8295184738358481, "grad_norm": 0.4857886731624603, "learning_rate": 2.228709156994009e-06, "loss": 0.0558, "step": 37645 }, { "epoch": 0.8295405091253643, "grad_norm": 0.5666553974151611, "learning_rate": 2.2281477093373326e-06, "loss": 0.0665, "step": 37646 }, { "epoch": 0.8295625444148804, "grad_norm": 0.48588916659355164, "learning_rate": 2.227586326734557e-06, "loss": 0.0557, "step": 37647 }, { "epoch": 0.8295845797043966, "grad_norm": 0.2808842658996582, "learning_rate": 2.2270250091885296e-06, "loss": 0.0523, "step": 37648 }, { "epoch": 0.8296066149939127, "grad_norm": 0.7280776500701904, "learning_rate": 2.2264637567021166e-06, "loss": 0.0751, "step": 37649 }, { "epoch": 0.8296286502834289, "grad_norm": 0.4228939712047577, "learning_rate": 2.225902569278166e-06, "loss": 0.037, "step": 37650 }, { "epoch": 0.8296506855729451, "grad_norm": 0.5564318299293518, "learning_rate": 2.2253414469195516e-06, "loss": 0.0484, "step": 37651 }, { "epoch": 0.8296727208624612, "grad_norm": 0.5499351024627686, "learning_rate": 2.2247803896291195e-06, "loss": 0.0729, "step": 37652 }, { "epoch": 0.8296947561519774, "grad_norm": 0.6982595920562744, "learning_rate": 2.2242193974097346e-06, "loss": 0.0734, "step": 37653 }, { "epoch": 0.8297167914414936, "grad_norm": 0.493179589509964, "learning_rate": 2.2236584702642475e-06, "loss": 0.0635, "step": 37654 }, { "epoch": 0.8297388267310097, "grad_norm": 0.5671809315681458, "learning_rate": 2.223097608195521e-06, "loss": 0.0362, "step": 37655 }, { "epoch": 0.8297608620205259, "grad_norm": 0.5819451212882996, "learning_rate": 2.2225368112064115e-06, "loss": 0.0785, "step": 37656 }, { "epoch": 0.8297828973100421, "grad_norm": 0.4168979227542877, "learning_rate": 2.221976079299771e-06, "loss": 0.0324, "step": 37657 }, { "epoch": 0.8298049325995582, "grad_norm": 0.7812048196792603, "learning_rate": 2.221415412478458e-06, "loss": 0.0852, "step": 37658 }, { "epoch": 0.8298269678890744, "grad_norm": 0.6186449527740479, "learning_rate": 2.2208548107453297e-06, "loss": 0.076, "step": 37659 }, { "epoch": 0.8298490031785906, "grad_norm": 0.3230748474597931, "learning_rate": 2.2202942741032422e-06, "loss": 0.0437, "step": 37660 }, { "epoch": 0.8298710384681067, "grad_norm": 0.7468995451927185, "learning_rate": 2.219733802555047e-06, "loss": 0.0545, "step": 37661 }, { "epoch": 0.8298930737576228, "grad_norm": 0.8013719916343689, "learning_rate": 2.219173396103601e-06, "loss": 0.0543, "step": 37662 }, { "epoch": 0.8299151090471389, "grad_norm": 0.32707479596138, "learning_rate": 2.2186130547517607e-06, "loss": 0.0517, "step": 37663 }, { "epoch": 0.8299371443366551, "grad_norm": 0.8028072714805603, "learning_rate": 2.2180527785023757e-06, "loss": 0.0788, "step": 37664 }, { "epoch": 0.8299591796261713, "grad_norm": 0.863823652267456, "learning_rate": 2.217492567358302e-06, "loss": 0.0809, "step": 37665 }, { "epoch": 0.8299812149156874, "grad_norm": 0.6161072850227356, "learning_rate": 2.2169324213223935e-06, "loss": 0.0609, "step": 37666 }, { "epoch": 0.8300032502052036, "grad_norm": 0.6033787727355957, "learning_rate": 2.216372340397505e-06, "loss": 0.0808, "step": 37667 }, { "epoch": 0.8300252854947198, "grad_norm": 0.6513058543205261, "learning_rate": 2.215812324586486e-06, "loss": 0.0479, "step": 37668 }, { "epoch": 0.8300473207842359, "grad_norm": 0.6494669318199158, "learning_rate": 2.215252373892189e-06, "loss": 0.0576, "step": 37669 }, { "epoch": 0.8300693560737521, "grad_norm": 0.5240141749382019, "learning_rate": 2.2146924883174713e-06, "loss": 0.0652, "step": 37670 }, { "epoch": 0.8300913913632683, "grad_norm": 0.4751715362071991, "learning_rate": 2.2141326678651765e-06, "loss": 0.0561, "step": 37671 }, { "epoch": 0.8301134266527844, "grad_norm": 1.1930885314941406, "learning_rate": 2.213572912538163e-06, "loss": 0.0755, "step": 37672 }, { "epoch": 0.8301354619423006, "grad_norm": 0.4963124394416809, "learning_rate": 2.2130132223392745e-06, "loss": 0.0411, "step": 37673 }, { "epoch": 0.8301574972318168, "grad_norm": 0.31047323346138, "learning_rate": 2.2124535972713734e-06, "loss": 0.067, "step": 37674 }, { "epoch": 0.8301795325213329, "grad_norm": 0.49630895256996155, "learning_rate": 2.2118940373372985e-06, "loss": 0.0866, "step": 37675 }, { "epoch": 0.8302015678108491, "grad_norm": 0.47149500250816345, "learning_rate": 2.2113345425399086e-06, "loss": 0.0516, "step": 37676 }, { "epoch": 0.8302236031003652, "grad_norm": 0.45395082235336304, "learning_rate": 2.2107751128820468e-06, "loss": 0.0761, "step": 37677 }, { "epoch": 0.8302456383898814, "grad_norm": 0.3031317889690399, "learning_rate": 2.2102157483665653e-06, "loss": 0.0394, "step": 37678 }, { "epoch": 0.8302676736793976, "grad_norm": 0.4372605085372925, "learning_rate": 2.2096564489963163e-06, "loss": 0.0543, "step": 37679 }, { "epoch": 0.8302897089689137, "grad_norm": 0.6163830161094666, "learning_rate": 2.2090972147741424e-06, "loss": 0.0567, "step": 37680 }, { "epoch": 0.8303117442584299, "grad_norm": 0.4249558746814728, "learning_rate": 2.2085380457028963e-06, "loss": 0.0379, "step": 37681 }, { "epoch": 0.8303337795479461, "grad_norm": 0.5839989185333252, "learning_rate": 2.2079789417854245e-06, "loss": 0.0608, "step": 37682 }, { "epoch": 0.8303558148374622, "grad_norm": 0.6829910278320312, "learning_rate": 2.2074199030245793e-06, "loss": 0.0559, "step": 37683 }, { "epoch": 0.8303778501269784, "grad_norm": 0.7607871890068054, "learning_rate": 2.2068609294232006e-06, "loss": 0.0713, "step": 37684 }, { "epoch": 0.8303998854164946, "grad_norm": 0.5278798937797546, "learning_rate": 2.2063020209841406e-06, "loss": 0.062, "step": 37685 }, { "epoch": 0.8304219207060107, "grad_norm": 0.7667942643165588, "learning_rate": 2.205743177710247e-06, "loss": 0.0559, "step": 37686 }, { "epoch": 0.8304439559955268, "grad_norm": 0.5643758773803711, "learning_rate": 2.2051843996043613e-06, "loss": 0.0693, "step": 37687 }, { "epoch": 0.830465991285043, "grad_norm": 0.4665709435939789, "learning_rate": 2.2046256866693325e-06, "loss": 0.0552, "step": 37688 }, { "epoch": 0.8304880265745591, "grad_norm": 0.44906502962112427, "learning_rate": 2.204067038908007e-06, "loss": 0.0796, "step": 37689 }, { "epoch": 0.8305100618640753, "grad_norm": 0.6499910950660706, "learning_rate": 2.2035084563232324e-06, "loss": 0.0555, "step": 37690 }, { "epoch": 0.8305320971535914, "grad_norm": 0.4937271475791931, "learning_rate": 2.2029499389178477e-06, "loss": 0.0582, "step": 37691 }, { "epoch": 0.8305541324431076, "grad_norm": 0.4965931475162506, "learning_rate": 2.2023914866947027e-06, "loss": 0.0496, "step": 37692 }, { "epoch": 0.8305761677326238, "grad_norm": 0.6204419732093811, "learning_rate": 2.2018330996566434e-06, "loss": 0.0741, "step": 37693 }, { "epoch": 0.8305982030221399, "grad_norm": 0.6569132208824158, "learning_rate": 2.2012747778065074e-06, "loss": 0.0698, "step": 37694 }, { "epoch": 0.8306202383116561, "grad_norm": 0.8264813423156738, "learning_rate": 2.200716521147144e-06, "loss": 0.0942, "step": 37695 }, { "epoch": 0.8306422736011723, "grad_norm": 0.9117235541343689, "learning_rate": 2.2001583296813898e-06, "loss": 0.0921, "step": 37696 }, { "epoch": 0.8306643088906884, "grad_norm": 0.8637601137161255, "learning_rate": 2.1996002034121002e-06, "loss": 0.0655, "step": 37697 }, { "epoch": 0.8306863441802046, "grad_norm": 0.5148653984069824, "learning_rate": 2.199042142342107e-06, "loss": 0.0474, "step": 37698 }, { "epoch": 0.8307083794697208, "grad_norm": 0.8254539966583252, "learning_rate": 2.198484146474259e-06, "loss": 0.0602, "step": 37699 }, { "epoch": 0.8307304147592369, "grad_norm": 0.5583075881004333, "learning_rate": 2.1979262158113937e-06, "loss": 0.0566, "step": 37700 }, { "epoch": 0.8307524500487531, "grad_norm": 0.5273329019546509, "learning_rate": 2.1973683503563545e-06, "loss": 0.0624, "step": 37701 }, { "epoch": 0.8307744853382693, "grad_norm": 0.762164294719696, "learning_rate": 2.1968105501119873e-06, "loss": 0.0591, "step": 37702 }, { "epoch": 0.8307965206277854, "grad_norm": 0.5450578927993774, "learning_rate": 2.1962528150811264e-06, "loss": 0.0689, "step": 37703 }, { "epoch": 0.8308185559173016, "grad_norm": 0.6442375183105469, "learning_rate": 2.1956951452666155e-06, "loss": 0.0364, "step": 37704 }, { "epoch": 0.8308405912068177, "grad_norm": 0.5307544469833374, "learning_rate": 2.1951375406712954e-06, "loss": 0.0584, "step": 37705 }, { "epoch": 0.8308626264963339, "grad_norm": 0.42281392216682434, "learning_rate": 2.194580001298011e-06, "loss": 0.0587, "step": 37706 }, { "epoch": 0.8308846617858501, "grad_norm": 0.4565323293209076, "learning_rate": 2.194022527149592e-06, "loss": 0.0346, "step": 37707 }, { "epoch": 0.8309066970753662, "grad_norm": 0.4247007966041565, "learning_rate": 2.193465118228885e-06, "loss": 0.0492, "step": 37708 }, { "epoch": 0.8309287323648824, "grad_norm": 1.0189995765686035, "learning_rate": 2.1929077745387306e-06, "loss": 0.0692, "step": 37709 }, { "epoch": 0.8309507676543986, "grad_norm": 0.602639377117157, "learning_rate": 2.1923504960819614e-06, "loss": 0.0607, "step": 37710 }, { "epoch": 0.8309728029439147, "grad_norm": 0.4232916831970215, "learning_rate": 2.1917932828614184e-06, "loss": 0.0416, "step": 37711 }, { "epoch": 0.8309948382334308, "grad_norm": 0.4035179316997528, "learning_rate": 2.1912361348799396e-06, "loss": 0.0544, "step": 37712 }, { "epoch": 0.831016873522947, "grad_norm": 1.4248740673065186, "learning_rate": 2.1906790521403697e-06, "loss": 0.0859, "step": 37713 }, { "epoch": 0.8310389088124631, "grad_norm": 0.9592893123626709, "learning_rate": 2.1901220346455358e-06, "loss": 0.0678, "step": 37714 }, { "epoch": 0.8310609441019793, "grad_norm": 0.763867974281311, "learning_rate": 2.1895650823982795e-06, "loss": 0.0447, "step": 37715 }, { "epoch": 0.8310829793914954, "grad_norm": 0.45744946599006653, "learning_rate": 2.1890081954014397e-06, "loss": 0.0531, "step": 37716 }, { "epoch": 0.8311050146810116, "grad_norm": 0.6405451893806458, "learning_rate": 2.1884513736578494e-06, "loss": 0.0589, "step": 37717 }, { "epoch": 0.8311270499705278, "grad_norm": 0.5165956616401672, "learning_rate": 2.1878946171703494e-06, "loss": 0.0647, "step": 37718 }, { "epoch": 0.8311490852600439, "grad_norm": 0.3009290397167206, "learning_rate": 2.1873379259417647e-06, "loss": 0.0303, "step": 37719 }, { "epoch": 0.8311711205495601, "grad_norm": 0.6074486970901489, "learning_rate": 2.186781299974946e-06, "loss": 0.0458, "step": 37720 }, { "epoch": 0.8311931558390763, "grad_norm": 0.508240282535553, "learning_rate": 2.186224739272719e-06, "loss": 0.0484, "step": 37721 }, { "epoch": 0.8312151911285924, "grad_norm": 0.8113730549812317, "learning_rate": 2.185668243837922e-06, "loss": 0.0649, "step": 37722 }, { "epoch": 0.8312372264181086, "grad_norm": 0.9547656774520874, "learning_rate": 2.185111813673386e-06, "loss": 0.0802, "step": 37723 }, { "epoch": 0.8312592617076248, "grad_norm": 0.7371789813041687, "learning_rate": 2.1845554487819486e-06, "loss": 0.0584, "step": 37724 }, { "epoch": 0.8312812969971409, "grad_norm": 0.37140917778015137, "learning_rate": 2.183999149166444e-06, "loss": 0.0777, "step": 37725 }, { "epoch": 0.8313033322866571, "grad_norm": 0.7315810918807983, "learning_rate": 2.183442914829702e-06, "loss": 0.059, "step": 37726 }, { "epoch": 0.8313253675761733, "grad_norm": 0.9153745174407959, "learning_rate": 2.1828867457745566e-06, "loss": 0.0819, "step": 37727 }, { "epoch": 0.8313474028656894, "grad_norm": 0.48372653126716614, "learning_rate": 2.1823306420038424e-06, "loss": 0.0784, "step": 37728 }, { "epoch": 0.8313694381552056, "grad_norm": 0.6424186825752258, "learning_rate": 2.1817746035203958e-06, "loss": 0.0568, "step": 37729 }, { "epoch": 0.8313914734447218, "grad_norm": 0.7462779879570007, "learning_rate": 2.181218630327042e-06, "loss": 0.0592, "step": 37730 }, { "epoch": 0.8314135087342379, "grad_norm": 0.5261315703392029, "learning_rate": 2.1806627224266147e-06, "loss": 0.0607, "step": 37731 }, { "epoch": 0.8314355440237541, "grad_norm": 0.5177592635154724, "learning_rate": 2.180106879821949e-06, "loss": 0.0387, "step": 37732 }, { "epoch": 0.8314575793132702, "grad_norm": 1.0420098304748535, "learning_rate": 2.179551102515872e-06, "loss": 0.0805, "step": 37733 }, { "epoch": 0.8314796146027864, "grad_norm": 0.517128050327301, "learning_rate": 2.178995390511216e-06, "loss": 0.0499, "step": 37734 }, { "epoch": 0.8315016498923026, "grad_norm": 0.289429634809494, "learning_rate": 2.17843974381081e-06, "loss": 0.0572, "step": 37735 }, { "epoch": 0.8315236851818186, "grad_norm": 0.43897199630737305, "learning_rate": 2.1778841624174905e-06, "loss": 0.0518, "step": 37736 }, { "epoch": 0.8315457204713348, "grad_norm": 0.709162175655365, "learning_rate": 2.1773286463340785e-06, "loss": 0.0588, "step": 37737 }, { "epoch": 0.831567755760851, "grad_norm": 0.4415188133716583, "learning_rate": 2.1767731955634085e-06, "loss": 0.0725, "step": 37738 }, { "epoch": 0.8315897910503671, "grad_norm": 0.4879055917263031, "learning_rate": 2.1762178101083114e-06, "loss": 0.0469, "step": 37739 }, { "epoch": 0.8316118263398833, "grad_norm": 0.3697017431259155, "learning_rate": 2.1756624899716116e-06, "loss": 0.0493, "step": 37740 }, { "epoch": 0.8316338616293995, "grad_norm": 0.3578518331050873, "learning_rate": 2.175107235156142e-06, "loss": 0.0575, "step": 37741 }, { "epoch": 0.8316558969189156, "grad_norm": 0.7755348682403564, "learning_rate": 2.1745520456647217e-06, "loss": 0.0741, "step": 37742 }, { "epoch": 0.8316779322084318, "grad_norm": 0.4868355691432953, "learning_rate": 2.1739969215001937e-06, "loss": 0.0477, "step": 37743 }, { "epoch": 0.831699967497948, "grad_norm": 0.4984031617641449, "learning_rate": 2.173441862665372e-06, "loss": 0.0593, "step": 37744 }, { "epoch": 0.8317220027874641, "grad_norm": 0.46688905358314514, "learning_rate": 2.1728868691630938e-06, "loss": 0.0407, "step": 37745 }, { "epoch": 0.8317440380769803, "grad_norm": 0.9330708980560303, "learning_rate": 2.1723319409961774e-06, "loss": 0.0585, "step": 37746 }, { "epoch": 0.8317660733664964, "grad_norm": 0.522696852684021, "learning_rate": 2.1717770781674522e-06, "loss": 0.0551, "step": 37747 }, { "epoch": 0.8317881086560126, "grad_norm": 0.4828202426433563, "learning_rate": 2.17122228067975e-06, "loss": 0.0589, "step": 37748 }, { "epoch": 0.8318101439455288, "grad_norm": 0.6004570126533508, "learning_rate": 2.170667548535888e-06, "loss": 0.0672, "step": 37749 }, { "epoch": 0.8318321792350449, "grad_norm": 0.9807796478271484, "learning_rate": 2.1701128817386955e-06, "loss": 0.096, "step": 37750 }, { "epoch": 0.8318542145245611, "grad_norm": 0.5120671987533569, "learning_rate": 2.1695582802909992e-06, "loss": 0.0412, "step": 37751 }, { "epoch": 0.8318762498140773, "grad_norm": 0.7022807598114014, "learning_rate": 2.1690037441956247e-06, "loss": 0.0855, "step": 37752 }, { "epoch": 0.8318982851035934, "grad_norm": 0.5607863664627075, "learning_rate": 2.1684492734553897e-06, "loss": 0.0739, "step": 37753 }, { "epoch": 0.8319203203931096, "grad_norm": 0.4086906313896179, "learning_rate": 2.1678948680731286e-06, "loss": 0.0496, "step": 37754 }, { "epoch": 0.8319423556826258, "grad_norm": 0.5397855639457703, "learning_rate": 2.167340528051658e-06, "loss": 0.071, "step": 37755 }, { "epoch": 0.8319643909721419, "grad_norm": 0.5319569110870361, "learning_rate": 2.166786253393805e-06, "loss": 0.0657, "step": 37756 }, { "epoch": 0.8319864262616581, "grad_norm": 0.397236704826355, "learning_rate": 2.1662320441023892e-06, "loss": 0.054, "step": 37757 }, { "epoch": 0.8320084615511742, "grad_norm": 0.601570725440979, "learning_rate": 2.1656779001802352e-06, "loss": 0.0654, "step": 37758 }, { "epoch": 0.8320304968406904, "grad_norm": 0.5051525831222534, "learning_rate": 2.1651238216301706e-06, "loss": 0.0454, "step": 37759 }, { "epoch": 0.8320525321302066, "grad_norm": 0.3790896534919739, "learning_rate": 2.164569808455008e-06, "loss": 0.0552, "step": 37760 }, { "epoch": 0.8320745674197226, "grad_norm": 1.0370218753814697, "learning_rate": 2.1640158606575757e-06, "loss": 0.0589, "step": 37761 }, { "epoch": 0.8320966027092388, "grad_norm": 0.4776133894920349, "learning_rate": 2.163461978240692e-06, "loss": 0.0663, "step": 37762 }, { "epoch": 0.832118637998755, "grad_norm": 0.5610703229904175, "learning_rate": 2.162908161207184e-06, "loss": 0.0616, "step": 37763 }, { "epoch": 0.8321406732882711, "grad_norm": 0.5113065838813782, "learning_rate": 2.1623544095598663e-06, "loss": 0.0823, "step": 37764 }, { "epoch": 0.8321627085777873, "grad_norm": 0.6947847008705139, "learning_rate": 2.1618007233015606e-06, "loss": 0.0649, "step": 37765 }, { "epoch": 0.8321847438673035, "grad_norm": 0.31817230582237244, "learning_rate": 2.161247102435092e-06, "loss": 0.032, "step": 37766 }, { "epoch": 0.8322067791568196, "grad_norm": 0.8084784746170044, "learning_rate": 2.1606935469632734e-06, "loss": 0.0541, "step": 37767 }, { "epoch": 0.8322288144463358, "grad_norm": 0.7408010959625244, "learning_rate": 2.16014005688893e-06, "loss": 0.0697, "step": 37768 }, { "epoch": 0.832250849735852, "grad_norm": 0.684197187423706, "learning_rate": 2.159586632214872e-06, "loss": 0.0603, "step": 37769 }, { "epoch": 0.8322728850253681, "grad_norm": 0.5405712127685547, "learning_rate": 2.1590332729439322e-06, "loss": 0.0511, "step": 37770 }, { "epoch": 0.8322949203148843, "grad_norm": 0.7543171644210815, "learning_rate": 2.1584799790789194e-06, "loss": 0.0544, "step": 37771 }, { "epoch": 0.8323169556044004, "grad_norm": 0.6198931336402893, "learning_rate": 2.157926750622657e-06, "loss": 0.0711, "step": 37772 }, { "epoch": 0.8323389908939166, "grad_norm": 0.7199852466583252, "learning_rate": 2.1573735875779555e-06, "loss": 0.0767, "step": 37773 }, { "epoch": 0.8323610261834328, "grad_norm": 1.0974735021591187, "learning_rate": 2.156820489947638e-06, "loss": 0.084, "step": 37774 }, { "epoch": 0.8323830614729489, "grad_norm": 0.27011141180992126, "learning_rate": 2.1562674577345237e-06, "loss": 0.0571, "step": 37775 }, { "epoch": 0.8324050967624651, "grad_norm": 0.9594655632972717, "learning_rate": 2.1557144909414206e-06, "loss": 0.0611, "step": 37776 }, { "epoch": 0.8324271320519813, "grad_norm": 0.36159396171569824, "learning_rate": 2.1551615895711565e-06, "loss": 0.0738, "step": 37777 }, { "epoch": 0.8324491673414974, "grad_norm": 0.44493958353996277, "learning_rate": 2.15460875362654e-06, "loss": 0.0499, "step": 37778 }, { "epoch": 0.8324712026310136, "grad_norm": 0.4001995623111725, "learning_rate": 2.1540559831103933e-06, "loss": 0.0624, "step": 37779 }, { "epoch": 0.8324932379205298, "grad_norm": 0.5477465391159058, "learning_rate": 2.1535032780255237e-06, "loss": 0.0431, "step": 37780 }, { "epoch": 0.8325152732100459, "grad_norm": 0.5878081917762756, "learning_rate": 2.152950638374752e-06, "loss": 0.0694, "step": 37781 }, { "epoch": 0.8325373084995621, "grad_norm": 0.3718689978122711, "learning_rate": 2.152398064160893e-06, "loss": 0.0527, "step": 37782 }, { "epoch": 0.8325593437890783, "grad_norm": 0.6759371161460876, "learning_rate": 2.1518455553867595e-06, "loss": 0.0431, "step": 37783 }, { "epoch": 0.8325813790785944, "grad_norm": 0.555164635181427, "learning_rate": 2.151293112055164e-06, "loss": 0.0601, "step": 37784 }, { "epoch": 0.8326034143681106, "grad_norm": 0.5226560831069946, "learning_rate": 2.150740734168924e-06, "loss": 0.0486, "step": 37785 }, { "epoch": 0.8326254496576266, "grad_norm": 0.2709062099456787, "learning_rate": 2.1501884217308543e-06, "loss": 0.0654, "step": 37786 }, { "epoch": 0.8326474849471428, "grad_norm": 0.5440676808357239, "learning_rate": 2.1496361747437625e-06, "loss": 0.0892, "step": 37787 }, { "epoch": 0.832669520236659, "grad_norm": 0.8192827105522156, "learning_rate": 2.149083993210465e-06, "loss": 0.0669, "step": 37788 }, { "epoch": 0.8326915555261751, "grad_norm": 0.4795544445514679, "learning_rate": 2.1485318771337776e-06, "loss": 0.0642, "step": 37789 }, { "epoch": 0.8327135908156913, "grad_norm": 0.4029042720794678, "learning_rate": 2.1479798265165045e-06, "loss": 0.0541, "step": 37790 }, { "epoch": 0.8327356261052075, "grad_norm": 0.6996539831161499, "learning_rate": 2.1474278413614654e-06, "loss": 0.07, "step": 37791 }, { "epoch": 0.8327576613947236, "grad_norm": 0.7710662484169006, "learning_rate": 2.1468759216714616e-06, "loss": 0.0579, "step": 37792 }, { "epoch": 0.8327796966842398, "grad_norm": 0.6185348033905029, "learning_rate": 2.146324067449319e-06, "loss": 0.0511, "step": 37793 }, { "epoch": 0.832801731973756, "grad_norm": 0.5414549708366394, "learning_rate": 2.145772278697837e-06, "loss": 0.0637, "step": 37794 }, { "epoch": 0.8328237672632721, "grad_norm": 0.8501675724983215, "learning_rate": 2.1452205554198314e-06, "loss": 0.0493, "step": 37795 }, { "epoch": 0.8328458025527883, "grad_norm": 0.558158278465271, "learning_rate": 2.1446688976181084e-06, "loss": 0.0563, "step": 37796 }, { "epoch": 0.8328678378423044, "grad_norm": 0.7072134613990784, "learning_rate": 2.144117305295481e-06, "loss": 0.0855, "step": 37797 }, { "epoch": 0.8328898731318206, "grad_norm": 0.580438494682312, "learning_rate": 2.1435657784547617e-06, "loss": 0.0646, "step": 37798 }, { "epoch": 0.8329119084213368, "grad_norm": 0.8068702220916748, "learning_rate": 2.1430143170987486e-06, "loss": 0.0833, "step": 37799 }, { "epoch": 0.8329339437108529, "grad_norm": 0.47414353489875793, "learning_rate": 2.1424629212302645e-06, "loss": 0.0606, "step": 37800 }, { "epoch": 0.8329559790003691, "grad_norm": 0.3110716938972473, "learning_rate": 2.141911590852108e-06, "loss": 0.0418, "step": 37801 }, { "epoch": 0.8329780142898853, "grad_norm": 0.36797410249710083, "learning_rate": 2.1413603259670943e-06, "loss": 0.0392, "step": 37802 }, { "epoch": 0.8330000495794014, "grad_norm": 0.47983425855636597, "learning_rate": 2.1408091265780244e-06, "loss": 0.0585, "step": 37803 }, { "epoch": 0.8330220848689176, "grad_norm": 0.7921182513237, "learning_rate": 2.140257992687709e-06, "loss": 0.0828, "step": 37804 }, { "epoch": 0.8330441201584338, "grad_norm": 0.531463086605072, "learning_rate": 2.1397069242989596e-06, "loss": 0.0492, "step": 37805 }, { "epoch": 0.8330661554479499, "grad_norm": 0.8128259778022766, "learning_rate": 2.139155921414577e-06, "loss": 0.0647, "step": 37806 }, { "epoch": 0.8330881907374661, "grad_norm": 0.671134889125824, "learning_rate": 2.1386049840373677e-06, "loss": 0.0614, "step": 37807 }, { "epoch": 0.8331102260269823, "grad_norm": 0.4402436316013336, "learning_rate": 2.138054112170141e-06, "loss": 0.0394, "step": 37808 }, { "epoch": 0.8331322613164984, "grad_norm": 0.7204303741455078, "learning_rate": 2.1375033058157057e-06, "loss": 0.0647, "step": 37809 }, { "epoch": 0.8331542966060145, "grad_norm": 0.3771391212940216, "learning_rate": 2.1369525649768584e-06, "loss": 0.0495, "step": 37810 }, { "epoch": 0.8331763318955306, "grad_norm": 0.8850873708724976, "learning_rate": 2.1364018896564103e-06, "loss": 0.0653, "step": 37811 }, { "epoch": 0.8331983671850468, "grad_norm": 0.945557177066803, "learning_rate": 2.1358512798571687e-06, "loss": 0.0839, "step": 37812 }, { "epoch": 0.833220402474563, "grad_norm": 0.4828462302684784, "learning_rate": 2.135300735581932e-06, "loss": 0.0511, "step": 37813 }, { "epoch": 0.8332424377640791, "grad_norm": 0.7413923144340515, "learning_rate": 2.1347502568335108e-06, "loss": 0.0683, "step": 37814 }, { "epoch": 0.8332644730535953, "grad_norm": 0.7063577771186829, "learning_rate": 2.134199843614698e-06, "loss": 0.0521, "step": 37815 }, { "epoch": 0.8332865083431115, "grad_norm": 0.19732193648815155, "learning_rate": 2.1336494959283115e-06, "loss": 0.0367, "step": 37816 }, { "epoch": 0.8333085436326276, "grad_norm": 0.5107675194740295, "learning_rate": 2.133099213777144e-06, "loss": 0.0703, "step": 37817 }, { "epoch": 0.8333305789221438, "grad_norm": 0.6720353364944458, "learning_rate": 2.132548997164001e-06, "loss": 0.0627, "step": 37818 }, { "epoch": 0.83335261421166, "grad_norm": 0.6142431497573853, "learning_rate": 2.1319988460916912e-06, "loss": 0.0536, "step": 37819 }, { "epoch": 0.8333746495011761, "grad_norm": 0.9884106516838074, "learning_rate": 2.1314487605630083e-06, "loss": 0.0663, "step": 37820 }, { "epoch": 0.8333966847906923, "grad_norm": 0.5855191349983215, "learning_rate": 2.130898740580759e-06, "loss": 0.0664, "step": 37821 }, { "epoch": 0.8334187200802085, "grad_norm": 0.5531908273696899, "learning_rate": 2.130348786147739e-06, "loss": 0.0376, "step": 37822 }, { "epoch": 0.8334407553697246, "grad_norm": 0.7281715273857117, "learning_rate": 2.129798897266759e-06, "loss": 0.0542, "step": 37823 }, { "epoch": 0.8334627906592408, "grad_norm": 0.37249094247817993, "learning_rate": 2.1292490739406114e-06, "loss": 0.0496, "step": 37824 }, { "epoch": 0.833484825948757, "grad_norm": 0.8433479070663452, "learning_rate": 2.128699316172104e-06, "loss": 0.0659, "step": 37825 }, { "epoch": 0.8335068612382731, "grad_norm": 0.8604870438575745, "learning_rate": 2.1281496239640308e-06, "loss": 0.05, "step": 37826 }, { "epoch": 0.8335288965277893, "grad_norm": 0.5477675795555115, "learning_rate": 2.127599997319192e-06, "loss": 0.0508, "step": 37827 }, { "epoch": 0.8335509318173054, "grad_norm": 0.7017402648925781, "learning_rate": 2.127050436240393e-06, "loss": 0.0792, "step": 37828 }, { "epoch": 0.8335729671068216, "grad_norm": 0.30044025182724, "learning_rate": 2.1265009407304278e-06, "loss": 0.0397, "step": 37829 }, { "epoch": 0.8335950023963378, "grad_norm": 0.8566606640815735, "learning_rate": 2.1259515107920937e-06, "loss": 0.0713, "step": 37830 }, { "epoch": 0.8336170376858539, "grad_norm": 0.5896453857421875, "learning_rate": 2.1254021464281938e-06, "loss": 0.0418, "step": 37831 }, { "epoch": 0.8336390729753701, "grad_norm": 0.5217325687408447, "learning_rate": 2.1248528476415274e-06, "loss": 0.0494, "step": 37832 }, { "epoch": 0.8336611082648863, "grad_norm": 0.46942445635795593, "learning_rate": 2.124303614434888e-06, "loss": 0.0322, "step": 37833 }, { "epoch": 0.8336831435544024, "grad_norm": 0.6339551210403442, "learning_rate": 2.123754446811072e-06, "loss": 0.0699, "step": 37834 }, { "epoch": 0.8337051788439185, "grad_norm": 0.49980872869491577, "learning_rate": 2.123205344772885e-06, "loss": 0.046, "step": 37835 }, { "epoch": 0.8337272141334346, "grad_norm": 0.4264600872993469, "learning_rate": 2.122656308323114e-06, "loss": 0.1081, "step": 37836 }, { "epoch": 0.8337492494229508, "grad_norm": 0.3879435062408447, "learning_rate": 2.1221073374645626e-06, "loss": 0.0652, "step": 37837 }, { "epoch": 0.833771284712467, "grad_norm": 0.46071764826774597, "learning_rate": 2.121558432200018e-06, "loss": 0.0509, "step": 37838 }, { "epoch": 0.8337933200019831, "grad_norm": 0.1716204583644867, "learning_rate": 2.1210095925322897e-06, "loss": 0.0419, "step": 37839 }, { "epoch": 0.8338153552914993, "grad_norm": 0.4377085566520691, "learning_rate": 2.120460818464162e-06, "loss": 0.0509, "step": 37840 }, { "epoch": 0.8338373905810155, "grad_norm": 0.5025621056556702, "learning_rate": 2.119912109998433e-06, "loss": 0.0604, "step": 37841 }, { "epoch": 0.8338594258705316, "grad_norm": 0.09969690442085266, "learning_rate": 2.1193634671379034e-06, "loss": 0.037, "step": 37842 }, { "epoch": 0.8338814611600478, "grad_norm": 0.44957858324050903, "learning_rate": 2.1188148898853583e-06, "loss": 0.0772, "step": 37843 }, { "epoch": 0.833903496449564, "grad_norm": 0.4597269892692566, "learning_rate": 2.1182663782435994e-06, "loss": 0.0658, "step": 37844 }, { "epoch": 0.8339255317390801, "grad_norm": 0.83506178855896, "learning_rate": 2.1177179322154122e-06, "loss": 0.0465, "step": 37845 }, { "epoch": 0.8339475670285963, "grad_norm": 0.5840506553649902, "learning_rate": 2.117169551803602e-06, "loss": 0.0412, "step": 37846 }, { "epoch": 0.8339696023181125, "grad_norm": 0.5184914469718933, "learning_rate": 2.116621237010952e-06, "loss": 0.076, "step": 37847 }, { "epoch": 0.8339916376076286, "grad_norm": 0.8000216484069824, "learning_rate": 2.1160729878402617e-06, "loss": 0.0689, "step": 37848 }, { "epoch": 0.8340136728971448, "grad_norm": 0.48099228739738464, "learning_rate": 2.115524804294317e-06, "loss": 0.0597, "step": 37849 }, { "epoch": 0.834035708186661, "grad_norm": 0.3742634654045105, "learning_rate": 2.114976686375914e-06, "loss": 0.0602, "step": 37850 }, { "epoch": 0.8340577434761771, "grad_norm": 0.5176946520805359, "learning_rate": 2.1144286340878487e-06, "loss": 0.0403, "step": 37851 }, { "epoch": 0.8340797787656933, "grad_norm": 0.5947650671005249, "learning_rate": 2.113880647432904e-06, "loss": 0.0505, "step": 37852 }, { "epoch": 0.8341018140552094, "grad_norm": 0.46120569109916687, "learning_rate": 2.1133327264138748e-06, "loss": 0.0469, "step": 37853 }, { "epoch": 0.8341238493447256, "grad_norm": 0.6811137199401855, "learning_rate": 2.112784871033553e-06, "loss": 0.0662, "step": 37854 }, { "epoch": 0.8341458846342418, "grad_norm": 0.37178272008895874, "learning_rate": 2.112237081294732e-06, "loss": 0.0653, "step": 37855 }, { "epoch": 0.8341679199237579, "grad_norm": 0.516567051410675, "learning_rate": 2.111689357200194e-06, "loss": 0.032, "step": 37856 }, { "epoch": 0.8341899552132741, "grad_norm": 0.40776321291923523, "learning_rate": 2.111141698752734e-06, "loss": 0.0593, "step": 37857 }, { "epoch": 0.8342119905027903, "grad_norm": 0.635509729385376, "learning_rate": 2.1105941059551443e-06, "loss": 0.0596, "step": 37858 }, { "epoch": 0.8342340257923064, "grad_norm": 0.5207112431526184, "learning_rate": 2.1100465788102064e-06, "loss": 0.0567, "step": 37859 }, { "epoch": 0.8342560610818225, "grad_norm": 0.5194671750068665, "learning_rate": 2.109499117320718e-06, "loss": 0.0811, "step": 37860 }, { "epoch": 0.8342780963713387, "grad_norm": 0.7047863602638245, "learning_rate": 2.1089517214894564e-06, "loss": 0.0739, "step": 37861 }, { "epoch": 0.8343001316608548, "grad_norm": 0.7986361384391785, "learning_rate": 2.108404391319222e-06, "loss": 0.0875, "step": 37862 }, { "epoch": 0.834322166950371, "grad_norm": 0.6786827445030212, "learning_rate": 2.107857126812795e-06, "loss": 0.0767, "step": 37863 }, { "epoch": 0.8343442022398871, "grad_norm": 0.5224837064743042, "learning_rate": 2.107309927972965e-06, "loss": 0.0709, "step": 37864 }, { "epoch": 0.8343662375294033, "grad_norm": 0.5876286029815674, "learning_rate": 2.1067627948025186e-06, "loss": 0.0396, "step": 37865 }, { "epoch": 0.8343882728189195, "grad_norm": 0.6959789991378784, "learning_rate": 2.1062157273042464e-06, "loss": 0.0748, "step": 37866 }, { "epoch": 0.8344103081084356, "grad_norm": 1.0192300081253052, "learning_rate": 2.1056687254809313e-06, "loss": 0.113, "step": 37867 }, { "epoch": 0.8344323433979518, "grad_norm": 0.7954691052436829, "learning_rate": 2.105121789335353e-06, "loss": 0.0556, "step": 37868 }, { "epoch": 0.834454378687468, "grad_norm": 0.7968462109565735, "learning_rate": 2.1045749188703124e-06, "loss": 0.0753, "step": 37869 }, { "epoch": 0.8344764139769841, "grad_norm": 0.5858058333396912, "learning_rate": 2.104028114088582e-06, "loss": 0.0513, "step": 37870 }, { "epoch": 0.8344984492665003, "grad_norm": 0.8966612815856934, "learning_rate": 2.1034813749929567e-06, "loss": 0.098, "step": 37871 }, { "epoch": 0.8345204845560165, "grad_norm": 0.5374777913093567, "learning_rate": 2.102934701586209e-06, "loss": 0.0478, "step": 37872 }, { "epoch": 0.8345425198455326, "grad_norm": 0.9592604637145996, "learning_rate": 2.1023880938711364e-06, "loss": 0.0605, "step": 37873 }, { "epoch": 0.8345645551350488, "grad_norm": 0.816683292388916, "learning_rate": 2.101841551850516e-06, "loss": 0.0688, "step": 37874 }, { "epoch": 0.834586590424565, "grad_norm": 0.7469397783279419, "learning_rate": 2.101295075527134e-06, "loss": 0.0639, "step": 37875 }, { "epoch": 0.8346086257140811, "grad_norm": 0.56791752576828, "learning_rate": 2.1007486649037714e-06, "loss": 0.0421, "step": 37876 }, { "epoch": 0.8346306610035973, "grad_norm": 0.6521636247634888, "learning_rate": 2.1002023199832137e-06, "loss": 0.0489, "step": 37877 }, { "epoch": 0.8346526962931134, "grad_norm": 0.7904618382453918, "learning_rate": 2.099656040768245e-06, "loss": 0.0782, "step": 37878 }, { "epoch": 0.8346747315826296, "grad_norm": 0.6450263857841492, "learning_rate": 2.0991098272616422e-06, "loss": 0.0576, "step": 37879 }, { "epoch": 0.8346967668721458, "grad_norm": 0.5758535861968994, "learning_rate": 2.0985636794661923e-06, "loss": 0.0431, "step": 37880 }, { "epoch": 0.8347188021616619, "grad_norm": 0.4214935600757599, "learning_rate": 2.0980175973846748e-06, "loss": 0.0689, "step": 37881 }, { "epoch": 0.8347408374511781, "grad_norm": 0.8110688328742981, "learning_rate": 2.0974715810198764e-06, "loss": 0.0671, "step": 37882 }, { "epoch": 0.8347628727406943, "grad_norm": 0.6055516004562378, "learning_rate": 2.096925630374571e-06, "loss": 0.0869, "step": 37883 }, { "epoch": 0.8347849080302104, "grad_norm": 0.6517865061759949, "learning_rate": 2.0963797454515415e-06, "loss": 0.0521, "step": 37884 }, { "epoch": 0.8348069433197265, "grad_norm": 0.5166701674461365, "learning_rate": 2.095833926253574e-06, "loss": 0.0686, "step": 37885 }, { "epoch": 0.8348289786092427, "grad_norm": 0.7877784967422485, "learning_rate": 2.0952881727834398e-06, "loss": 0.0591, "step": 37886 }, { "epoch": 0.8348510138987588, "grad_norm": 0.6032758951187134, "learning_rate": 2.094742485043923e-06, "loss": 0.0707, "step": 37887 }, { "epoch": 0.834873049188275, "grad_norm": 0.822413980960846, "learning_rate": 2.094196863037804e-06, "loss": 0.0574, "step": 37888 }, { "epoch": 0.8348950844777911, "grad_norm": 0.6801546812057495, "learning_rate": 2.0936513067678626e-06, "loss": 0.0547, "step": 37889 }, { "epoch": 0.8349171197673073, "grad_norm": 0.8225128054618835, "learning_rate": 2.093105816236874e-06, "loss": 0.0583, "step": 37890 }, { "epoch": 0.8349391550568235, "grad_norm": 0.6013711094856262, "learning_rate": 2.092560391447619e-06, "loss": 0.0626, "step": 37891 }, { "epoch": 0.8349611903463396, "grad_norm": 0.5132738351821899, "learning_rate": 2.0920150324028768e-06, "loss": 0.0715, "step": 37892 }, { "epoch": 0.8349832256358558, "grad_norm": 0.43377965688705444, "learning_rate": 2.091469739105423e-06, "loss": 0.0582, "step": 37893 }, { "epoch": 0.835005260925372, "grad_norm": 0.5988202095031738, "learning_rate": 2.0909245115580374e-06, "loss": 0.0764, "step": 37894 }, { "epoch": 0.8350272962148881, "grad_norm": 0.5695146918296814, "learning_rate": 2.09037934976349e-06, "loss": 0.0576, "step": 37895 }, { "epoch": 0.8350493315044043, "grad_norm": 0.6568001508712769, "learning_rate": 2.08983425372457e-06, "loss": 0.057, "step": 37896 }, { "epoch": 0.8350713667939205, "grad_norm": 0.3705413043498993, "learning_rate": 2.0892892234440443e-06, "loss": 0.0474, "step": 37897 }, { "epoch": 0.8350934020834366, "grad_norm": 0.2164604514837265, "learning_rate": 2.088744258924693e-06, "loss": 0.0421, "step": 37898 }, { "epoch": 0.8351154373729528, "grad_norm": 0.38549673557281494, "learning_rate": 2.0881993601692897e-06, "loss": 0.0409, "step": 37899 }, { "epoch": 0.835137472662469, "grad_norm": 0.579652726650238, "learning_rate": 2.0876545271806097e-06, "loss": 0.0451, "step": 37900 }, { "epoch": 0.8351595079519851, "grad_norm": 0.8067365884780884, "learning_rate": 2.087109759961433e-06, "loss": 0.0834, "step": 37901 }, { "epoch": 0.8351815432415013, "grad_norm": 0.6359978914260864, "learning_rate": 2.0865650585145274e-06, "loss": 0.0589, "step": 37902 }, { "epoch": 0.8352035785310175, "grad_norm": 0.35152652859687805, "learning_rate": 2.0860204228426705e-06, "loss": 0.0716, "step": 37903 }, { "epoch": 0.8352256138205336, "grad_norm": 0.6933881044387817, "learning_rate": 2.085475852948637e-06, "loss": 0.0666, "step": 37904 }, { "epoch": 0.8352476491100498, "grad_norm": 0.3978886008262634, "learning_rate": 2.084931348835204e-06, "loss": 0.0591, "step": 37905 }, { "epoch": 0.835269684399566, "grad_norm": 0.6983561515808105, "learning_rate": 2.0843869105051366e-06, "loss": 0.0768, "step": 37906 }, { "epoch": 0.8352917196890821, "grad_norm": 0.5206704139709473, "learning_rate": 2.0838425379612125e-06, "loss": 0.0344, "step": 37907 }, { "epoch": 0.8353137549785983, "grad_norm": 0.673982560634613, "learning_rate": 2.0832982312062094e-06, "loss": 0.0573, "step": 37908 }, { "epoch": 0.8353357902681143, "grad_norm": 0.754488468170166, "learning_rate": 2.0827539902428917e-06, "loss": 0.0723, "step": 37909 }, { "epoch": 0.8353578255576305, "grad_norm": 0.7387789487838745, "learning_rate": 2.082209815074034e-06, "loss": 0.0759, "step": 37910 }, { "epoch": 0.8353798608471467, "grad_norm": 0.5002087950706482, "learning_rate": 2.081665705702407e-06, "loss": 0.0463, "step": 37911 }, { "epoch": 0.8354018961366628, "grad_norm": 0.3243291974067688, "learning_rate": 2.081121662130789e-06, "loss": 0.0516, "step": 37912 }, { "epoch": 0.835423931426179, "grad_norm": 0.4656735360622406, "learning_rate": 2.080577684361944e-06, "loss": 0.0513, "step": 37913 }, { "epoch": 0.8354459667156952, "grad_norm": 0.7541963458061218, "learning_rate": 2.0800337723986418e-06, "loss": 0.0531, "step": 37914 }, { "epoch": 0.8354680020052113, "grad_norm": 0.48053449392318726, "learning_rate": 2.0794899262436616e-06, "loss": 0.0552, "step": 37915 }, { "epoch": 0.8354900372947275, "grad_norm": 0.38395678997039795, "learning_rate": 2.0789461458997634e-06, "loss": 0.0525, "step": 37916 }, { "epoch": 0.8355120725842436, "grad_norm": 0.5569342970848083, "learning_rate": 2.0784024313697243e-06, "loss": 0.0679, "step": 37917 }, { "epoch": 0.8355341078737598, "grad_norm": 0.5251829624176025, "learning_rate": 2.0778587826563055e-06, "loss": 0.0548, "step": 37918 }, { "epoch": 0.835556143163276, "grad_norm": 0.802298367023468, "learning_rate": 2.077315199762287e-06, "loss": 0.066, "step": 37919 }, { "epoch": 0.8355781784527921, "grad_norm": 0.7306436896324158, "learning_rate": 2.0767716826904277e-06, "loss": 0.0449, "step": 37920 }, { "epoch": 0.8356002137423083, "grad_norm": 0.8129546046257019, "learning_rate": 2.0762282314435053e-06, "loss": 0.0583, "step": 37921 }, { "epoch": 0.8356222490318245, "grad_norm": 0.6417880058288574, "learning_rate": 2.0756848460242783e-06, "loss": 0.0561, "step": 37922 }, { "epoch": 0.8356442843213406, "grad_norm": 0.7859980463981628, "learning_rate": 2.075141526435519e-06, "loss": 0.0844, "step": 37923 }, { "epoch": 0.8356663196108568, "grad_norm": 0.6038910150527954, "learning_rate": 2.074598272679998e-06, "loss": 0.0837, "step": 37924 }, { "epoch": 0.835688354900373, "grad_norm": 0.3490888178348541, "learning_rate": 2.0740550847604762e-06, "loss": 0.045, "step": 37925 }, { "epoch": 0.8357103901898891, "grad_norm": 0.7204204201698303, "learning_rate": 2.0735119626797233e-06, "loss": 0.0339, "step": 37926 }, { "epoch": 0.8357324254794053, "grad_norm": 0.4323647916316986, "learning_rate": 2.0729689064405062e-06, "loss": 0.0416, "step": 37927 }, { "epoch": 0.8357544607689215, "grad_norm": 1.0786652565002441, "learning_rate": 2.0724259160455922e-06, "loss": 0.0726, "step": 37928 }, { "epoch": 0.8357764960584376, "grad_norm": 0.4149661660194397, "learning_rate": 2.0718829914977443e-06, "loss": 0.0489, "step": 37929 }, { "epoch": 0.8357985313479538, "grad_norm": 0.7112978100776672, "learning_rate": 2.0713401327997273e-06, "loss": 0.0611, "step": 37930 }, { "epoch": 0.83582056663747, "grad_norm": 0.7682656049728394, "learning_rate": 2.0707973399543117e-06, "loss": 0.0691, "step": 37931 }, { "epoch": 0.8358426019269861, "grad_norm": 0.5557994246482849, "learning_rate": 2.0702546129642543e-06, "loss": 0.0509, "step": 37932 }, { "epoch": 0.8358646372165023, "grad_norm": 0.5054166316986084, "learning_rate": 2.0697119518323256e-06, "loss": 0.0472, "step": 37933 }, { "epoch": 0.8358866725060183, "grad_norm": 0.27739888429641724, "learning_rate": 2.069169356561285e-06, "loss": 0.0615, "step": 37934 }, { "epoch": 0.8359087077955345, "grad_norm": 0.5396291613578796, "learning_rate": 2.068626827153904e-06, "loss": 0.062, "step": 37935 }, { "epoch": 0.8359307430850507, "grad_norm": 0.5788792371749878, "learning_rate": 2.0680843636129375e-06, "loss": 0.1086, "step": 37936 }, { "epoch": 0.8359527783745668, "grad_norm": 0.5828969478607178, "learning_rate": 2.0675419659411508e-06, "loss": 0.0709, "step": 37937 }, { "epoch": 0.835974813664083, "grad_norm": 0.5861756801605225, "learning_rate": 2.066999634141314e-06, "loss": 0.0795, "step": 37938 }, { "epoch": 0.8359968489535992, "grad_norm": 0.6721529960632324, "learning_rate": 2.0664573682161776e-06, "loss": 0.0517, "step": 37939 }, { "epoch": 0.8360188842431153, "grad_norm": 0.5994248986244202, "learning_rate": 2.065915168168513e-06, "loss": 0.0479, "step": 37940 }, { "epoch": 0.8360409195326315, "grad_norm": 0.5803006887435913, "learning_rate": 2.065373034001071e-06, "loss": 0.0649, "step": 37941 }, { "epoch": 0.8360629548221477, "grad_norm": 0.6124303340911865, "learning_rate": 2.0648309657166285e-06, "loss": 0.0645, "step": 37942 }, { "epoch": 0.8360849901116638, "grad_norm": 0.7293501496315002, "learning_rate": 2.064288963317933e-06, "loss": 0.0671, "step": 37943 }, { "epoch": 0.83610702540118, "grad_norm": 0.9457089304924011, "learning_rate": 2.063747026807755e-06, "loss": 0.0555, "step": 37944 }, { "epoch": 0.8361290606906961, "grad_norm": 0.738077700138092, "learning_rate": 2.063205156188848e-06, "loss": 0.0668, "step": 37945 }, { "epoch": 0.8361510959802123, "grad_norm": 0.7689464092254639, "learning_rate": 2.062663351463973e-06, "loss": 0.0845, "step": 37946 }, { "epoch": 0.8361731312697285, "grad_norm": 0.7427285313606262, "learning_rate": 2.0621216126358945e-06, "loss": 0.0725, "step": 37947 }, { "epoch": 0.8361951665592446, "grad_norm": 0.5817657709121704, "learning_rate": 2.061579939707365e-06, "loss": 0.0643, "step": 37948 }, { "epoch": 0.8362172018487608, "grad_norm": 0.3649328947067261, "learning_rate": 2.061038332681147e-06, "loss": 0.0412, "step": 37949 }, { "epoch": 0.836239237138277, "grad_norm": 0.5320018529891968, "learning_rate": 2.060496791560001e-06, "loss": 0.0416, "step": 37950 }, { "epoch": 0.8362612724277931, "grad_norm": 0.4251808226108551, "learning_rate": 2.0599553163466854e-06, "loss": 0.0558, "step": 37951 }, { "epoch": 0.8362833077173093, "grad_norm": 0.447512149810791, "learning_rate": 2.059413907043952e-06, "loss": 0.0589, "step": 37952 }, { "epoch": 0.8363053430068255, "grad_norm": 0.500791609287262, "learning_rate": 2.0588725636545638e-06, "loss": 0.0693, "step": 37953 }, { "epoch": 0.8363273782963416, "grad_norm": 0.46755728125572205, "learning_rate": 2.0583312861812808e-06, "loss": 0.0575, "step": 37954 }, { "epoch": 0.8363494135858578, "grad_norm": 0.6247791051864624, "learning_rate": 2.0577900746268537e-06, "loss": 0.0681, "step": 37955 }, { "epoch": 0.836371448875374, "grad_norm": 0.4695571959018707, "learning_rate": 2.057248928994041e-06, "loss": 0.0732, "step": 37956 }, { "epoch": 0.8363934841648901, "grad_norm": 0.7918339967727661, "learning_rate": 2.0567078492855993e-06, "loss": 0.0602, "step": 37957 }, { "epoch": 0.8364155194544063, "grad_norm": 0.7486154437065125, "learning_rate": 2.05616683550429e-06, "loss": 0.043, "step": 37958 }, { "epoch": 0.8364375547439223, "grad_norm": 0.6730808615684509, "learning_rate": 2.0556258876528607e-06, "loss": 0.0598, "step": 37959 }, { "epoch": 0.8364595900334385, "grad_norm": 0.6955820918083191, "learning_rate": 2.0550850057340686e-06, "loss": 0.0629, "step": 37960 }, { "epoch": 0.8364816253229547, "grad_norm": 0.5463213920593262, "learning_rate": 2.0545441897506754e-06, "loss": 0.0575, "step": 37961 }, { "epoch": 0.8365036606124708, "grad_norm": 0.7507593035697937, "learning_rate": 2.054003439705426e-06, "loss": 0.0686, "step": 37962 }, { "epoch": 0.836525695901987, "grad_norm": 0.919764518737793, "learning_rate": 2.053462755601084e-06, "loss": 0.0765, "step": 37963 }, { "epoch": 0.8365477311915032, "grad_norm": 0.361255019903183, "learning_rate": 2.0529221374403907e-06, "loss": 0.048, "step": 37964 }, { "epoch": 0.8365697664810193, "grad_norm": 0.2366415113210678, "learning_rate": 2.0523815852261142e-06, "loss": 0.0462, "step": 37965 }, { "epoch": 0.8365918017705355, "grad_norm": 0.5604223608970642, "learning_rate": 2.051841098960999e-06, "loss": 0.0506, "step": 37966 }, { "epoch": 0.8366138370600517, "grad_norm": 0.5837005376815796, "learning_rate": 2.051300678647802e-06, "loss": 0.0663, "step": 37967 }, { "epoch": 0.8366358723495678, "grad_norm": 0.828690767288208, "learning_rate": 2.0507603242892694e-06, "loss": 0.0762, "step": 37968 }, { "epoch": 0.836657907639084, "grad_norm": 0.5673891305923462, "learning_rate": 2.050220035888164e-06, "loss": 0.0612, "step": 37969 }, { "epoch": 0.8366799429286002, "grad_norm": 0.6671931743621826, "learning_rate": 2.049679813447233e-06, "loss": 0.0251, "step": 37970 }, { "epoch": 0.8367019782181163, "grad_norm": 0.5465763807296753, "learning_rate": 2.0491396569692232e-06, "loss": 0.0837, "step": 37971 }, { "epoch": 0.8367240135076325, "grad_norm": 0.8763144016265869, "learning_rate": 2.04859956645689e-06, "loss": 0.0557, "step": 37972 }, { "epoch": 0.8367460487971486, "grad_norm": 0.631596565246582, "learning_rate": 2.0480595419129842e-06, "loss": 0.0501, "step": 37973 }, { "epoch": 0.8367680840866648, "grad_norm": 0.5386724472045898, "learning_rate": 2.0475195833402606e-06, "loss": 0.051, "step": 37974 }, { "epoch": 0.836790119376181, "grad_norm": 0.5737430453300476, "learning_rate": 2.0469796907414595e-06, "loss": 0.0783, "step": 37975 }, { "epoch": 0.8368121546656971, "grad_norm": 0.5439655184745789, "learning_rate": 2.046439864119343e-06, "loss": 0.0438, "step": 37976 }, { "epoch": 0.8368341899552133, "grad_norm": 0.46251311898231506, "learning_rate": 2.0459001034766494e-06, "loss": 0.0426, "step": 37977 }, { "epoch": 0.8368562252447295, "grad_norm": 0.5219688415527344, "learning_rate": 2.0453604088161377e-06, "loss": 0.0503, "step": 37978 }, { "epoch": 0.8368782605342456, "grad_norm": 0.6189380288124084, "learning_rate": 2.0448207801405485e-06, "loss": 0.0598, "step": 37979 }, { "epoch": 0.8369002958237618, "grad_norm": 0.4663275480270386, "learning_rate": 2.0442812174526345e-06, "loss": 0.0639, "step": 37980 }, { "epoch": 0.836922331113278, "grad_norm": 0.6941487789154053, "learning_rate": 2.0437417207551465e-06, "loss": 0.0703, "step": 37981 }, { "epoch": 0.8369443664027941, "grad_norm": 0.5138075947761536, "learning_rate": 2.043202290050827e-06, "loss": 0.0611, "step": 37982 }, { "epoch": 0.8369664016923102, "grad_norm": 0.3009807765483856, "learning_rate": 2.042662925342427e-06, "loss": 0.0356, "step": 37983 }, { "epoch": 0.8369884369818263, "grad_norm": 0.8696125149726868, "learning_rate": 2.042123626632691e-06, "loss": 0.0663, "step": 37984 }, { "epoch": 0.8370104722713425, "grad_norm": 0.4442462921142578, "learning_rate": 2.0415843939243722e-06, "loss": 0.0598, "step": 37985 }, { "epoch": 0.8370325075608587, "grad_norm": 0.7228273153305054, "learning_rate": 2.0410452272202106e-06, "loss": 0.0668, "step": 37986 }, { "epoch": 0.8370545428503748, "grad_norm": 0.5066280364990234, "learning_rate": 2.040506126522954e-06, "loss": 0.0795, "step": 37987 }, { "epoch": 0.837076578139891, "grad_norm": 0.6486538648605347, "learning_rate": 2.039967091835352e-06, "loss": 0.0402, "step": 37988 }, { "epoch": 0.8370986134294072, "grad_norm": 0.5263770818710327, "learning_rate": 2.039428123160145e-06, "loss": 0.0479, "step": 37989 }, { "epoch": 0.8371206487189233, "grad_norm": 0.3375067412853241, "learning_rate": 2.038889220500082e-06, "loss": 0.0412, "step": 37990 }, { "epoch": 0.8371426840084395, "grad_norm": 0.45392709970474243, "learning_rate": 2.0383503838579016e-06, "loss": 0.0777, "step": 37991 }, { "epoch": 0.8371647192979557, "grad_norm": 0.676902174949646, "learning_rate": 2.03781161323636e-06, "loss": 0.0721, "step": 37992 }, { "epoch": 0.8371867545874718, "grad_norm": 0.3602744936943054, "learning_rate": 2.03727290863819e-06, "loss": 0.0552, "step": 37993 }, { "epoch": 0.837208789876988, "grad_norm": 0.6231141686439514, "learning_rate": 2.036734270066145e-06, "loss": 0.0585, "step": 37994 }, { "epoch": 0.8372308251665042, "grad_norm": 0.8124440908432007, "learning_rate": 2.036195697522959e-06, "loss": 0.0439, "step": 37995 }, { "epoch": 0.8372528604560203, "grad_norm": 0.5063177347183228, "learning_rate": 2.0356571910113812e-06, "loss": 0.0829, "step": 37996 }, { "epoch": 0.8372748957455365, "grad_norm": 0.7076608538627625, "learning_rate": 2.0351187505341566e-06, "loss": 0.0569, "step": 37997 }, { "epoch": 0.8372969310350526, "grad_norm": 0.4269103705883026, "learning_rate": 2.034580376094018e-06, "loss": 0.0559, "step": 37998 }, { "epoch": 0.8373189663245688, "grad_norm": 0.8277106881141663, "learning_rate": 2.034042067693721e-06, "loss": 0.0566, "step": 37999 }, { "epoch": 0.837341001614085, "grad_norm": 0.3779691755771637, "learning_rate": 2.033503825335997e-06, "loss": 0.0499, "step": 38000 }, { "epoch": 0.8373630369036011, "grad_norm": 0.8141213655471802, "learning_rate": 2.032965649023594e-06, "loss": 0.0523, "step": 38001 }, { "epoch": 0.8373850721931173, "grad_norm": 0.9727199673652649, "learning_rate": 2.032427538759247e-06, "loss": 0.0682, "step": 38002 }, { "epoch": 0.8374071074826335, "grad_norm": 0.4902275800704956, "learning_rate": 2.0318894945457007e-06, "loss": 0.0622, "step": 38003 }, { "epoch": 0.8374291427721496, "grad_norm": 0.5473891496658325, "learning_rate": 2.031351516385699e-06, "loss": 0.055, "step": 38004 }, { "epoch": 0.8374511780616658, "grad_norm": 0.6295700669288635, "learning_rate": 2.030813604281974e-06, "loss": 0.0493, "step": 38005 }, { "epoch": 0.837473213351182, "grad_norm": 0.7502651810646057, "learning_rate": 2.030275758237271e-06, "loss": 0.0672, "step": 38006 }, { "epoch": 0.8374952486406981, "grad_norm": 0.4645267724990845, "learning_rate": 2.029737978254329e-06, "loss": 0.0775, "step": 38007 }, { "epoch": 0.8375172839302142, "grad_norm": 0.786956787109375, "learning_rate": 2.0292002643358893e-06, "loss": 0.0554, "step": 38008 }, { "epoch": 0.8375393192197303, "grad_norm": 0.6747304797172546, "learning_rate": 2.0286626164846846e-06, "loss": 0.0603, "step": 38009 }, { "epoch": 0.8375613545092465, "grad_norm": 0.43031832575798035, "learning_rate": 2.028125034703456e-06, "loss": 0.0473, "step": 38010 }, { "epoch": 0.8375833897987627, "grad_norm": 0.7715153098106384, "learning_rate": 2.027587518994948e-06, "loss": 0.0548, "step": 38011 }, { "epoch": 0.8376054250882788, "grad_norm": 0.30834951996803284, "learning_rate": 2.027050069361888e-06, "loss": 0.0239, "step": 38012 }, { "epoch": 0.837627460377795, "grad_norm": 0.5149797201156616, "learning_rate": 2.026512685807023e-06, "loss": 0.0659, "step": 38013 }, { "epoch": 0.8376494956673112, "grad_norm": 0.28169307112693787, "learning_rate": 2.025975368333079e-06, "loss": 0.0544, "step": 38014 }, { "epoch": 0.8376715309568273, "grad_norm": 0.9191993474960327, "learning_rate": 2.0254381169428064e-06, "loss": 0.0639, "step": 38015 }, { "epoch": 0.8376935662463435, "grad_norm": 0.8000980019569397, "learning_rate": 2.024900931638932e-06, "loss": 0.0715, "step": 38016 }, { "epoch": 0.8377156015358597, "grad_norm": 0.5336796045303345, "learning_rate": 2.024363812424197e-06, "loss": 0.0577, "step": 38017 }, { "epoch": 0.8377376368253758, "grad_norm": 0.41750311851501465, "learning_rate": 2.023826759301334e-06, "loss": 0.0607, "step": 38018 }, { "epoch": 0.837759672114892, "grad_norm": 0.661502480506897, "learning_rate": 2.023289772273077e-06, "loss": 0.0834, "step": 38019 }, { "epoch": 0.8377817074044082, "grad_norm": 0.6709926724433899, "learning_rate": 2.0227528513421696e-06, "loss": 0.0654, "step": 38020 }, { "epoch": 0.8378037426939243, "grad_norm": 0.5010068416595459, "learning_rate": 2.0222159965113325e-06, "loss": 0.0565, "step": 38021 }, { "epoch": 0.8378257779834405, "grad_norm": 0.8139470815658569, "learning_rate": 2.021679207783315e-06, "loss": 0.053, "step": 38022 }, { "epoch": 0.8378478132729567, "grad_norm": 0.9934969544410706, "learning_rate": 2.021142485160844e-06, "loss": 0.0889, "step": 38023 }, { "epoch": 0.8378698485624728, "grad_norm": 0.28569772839546204, "learning_rate": 2.020605828646655e-06, "loss": 0.0747, "step": 38024 }, { "epoch": 0.837891883851989, "grad_norm": 0.919405996799469, "learning_rate": 2.020069238243478e-06, "loss": 0.0751, "step": 38025 }, { "epoch": 0.8379139191415051, "grad_norm": 1.02243971824646, "learning_rate": 2.01953271395405e-06, "loss": 0.0586, "step": 38026 }, { "epoch": 0.8379359544310213, "grad_norm": 0.6123660802841187, "learning_rate": 2.0189962557811043e-06, "loss": 0.0684, "step": 38027 }, { "epoch": 0.8379579897205375, "grad_norm": 0.25158748030662537, "learning_rate": 2.018459863727369e-06, "loss": 0.0397, "step": 38028 }, { "epoch": 0.8379800250100536, "grad_norm": 0.8365805149078369, "learning_rate": 2.0179235377955777e-06, "loss": 0.0724, "step": 38029 }, { "epoch": 0.8380020602995698, "grad_norm": 0.4376204013824463, "learning_rate": 2.0173872779884644e-06, "loss": 0.0487, "step": 38030 }, { "epoch": 0.838024095589086, "grad_norm": 0.7450606822967529, "learning_rate": 2.0168510843087627e-06, "loss": 0.0544, "step": 38031 }, { "epoch": 0.8380461308786021, "grad_norm": 1.0290138721466064, "learning_rate": 2.0163149567591974e-06, "loss": 0.0647, "step": 38032 }, { "epoch": 0.8380681661681182, "grad_norm": 0.9875971078872681, "learning_rate": 2.0157788953425033e-06, "loss": 0.0683, "step": 38033 }, { "epoch": 0.8380902014576344, "grad_norm": 0.5481916666030884, "learning_rate": 2.0152429000614124e-06, "loss": 0.0368, "step": 38034 }, { "epoch": 0.8381122367471505, "grad_norm": 0.6417779326438904, "learning_rate": 2.0147069709186484e-06, "loss": 0.0603, "step": 38035 }, { "epoch": 0.8381342720366667, "grad_norm": 0.3426772654056549, "learning_rate": 2.01417110791695e-06, "loss": 0.046, "step": 38036 }, { "epoch": 0.8381563073261828, "grad_norm": 0.9757052063941956, "learning_rate": 2.013635311059034e-06, "loss": 0.0827, "step": 38037 }, { "epoch": 0.838178342615699, "grad_norm": 0.8232246041297913, "learning_rate": 2.0130995803476427e-06, "loss": 0.0567, "step": 38038 }, { "epoch": 0.8382003779052152, "grad_norm": 0.6693392395973206, "learning_rate": 2.0125639157854974e-06, "loss": 0.0529, "step": 38039 }, { "epoch": 0.8382224131947313, "grad_norm": 0.7197914123535156, "learning_rate": 2.012028317375333e-06, "loss": 0.0582, "step": 38040 }, { "epoch": 0.8382444484842475, "grad_norm": 0.20286476612091064, "learning_rate": 2.0114927851198684e-06, "loss": 0.0512, "step": 38041 }, { "epoch": 0.8382664837737637, "grad_norm": 0.7906814217567444, "learning_rate": 2.010957319021835e-06, "loss": 0.062, "step": 38042 }, { "epoch": 0.8382885190632798, "grad_norm": 0.47030937671661377, "learning_rate": 2.010421919083966e-06, "loss": 0.0514, "step": 38043 }, { "epoch": 0.838310554352796, "grad_norm": 0.27907654643058777, "learning_rate": 2.0098865853089764e-06, "loss": 0.0518, "step": 38044 }, { "epoch": 0.8383325896423122, "grad_norm": 0.44582659006118774, "learning_rate": 2.009351317699607e-06, "loss": 0.0633, "step": 38045 }, { "epoch": 0.8383546249318283, "grad_norm": 0.6715607643127441, "learning_rate": 2.008816116258575e-06, "loss": 0.0488, "step": 38046 }, { "epoch": 0.8383766602213445, "grad_norm": 0.719470202922821, "learning_rate": 2.0082809809886115e-06, "loss": 0.0621, "step": 38047 }, { "epoch": 0.8383986955108607, "grad_norm": 0.7604089975357056, "learning_rate": 2.007745911892436e-06, "loss": 0.0465, "step": 38048 }, { "epoch": 0.8384207308003768, "grad_norm": 0.19805315136909485, "learning_rate": 2.0072109089727786e-06, "loss": 0.0625, "step": 38049 }, { "epoch": 0.838442766089893, "grad_norm": 0.6302147507667542, "learning_rate": 2.0066759722323664e-06, "loss": 0.0632, "step": 38050 }, { "epoch": 0.8384648013794092, "grad_norm": 0.6390325427055359, "learning_rate": 2.0061411016739173e-06, "loss": 0.0843, "step": 38051 }, { "epoch": 0.8384868366689253, "grad_norm": 0.3292452096939087, "learning_rate": 2.0056062973001595e-06, "loss": 0.0573, "step": 38052 }, { "epoch": 0.8385088719584415, "grad_norm": 0.4016202688217163, "learning_rate": 2.005071559113817e-06, "loss": 0.0467, "step": 38053 }, { "epoch": 0.8385309072479576, "grad_norm": 0.5801150798797607, "learning_rate": 2.004536887117618e-06, "loss": 0.0666, "step": 38054 }, { "epoch": 0.8385529425374738, "grad_norm": 0.624426543712616, "learning_rate": 2.0040022813142767e-06, "loss": 0.0697, "step": 38055 }, { "epoch": 0.83857497782699, "grad_norm": 0.27878108620643616, "learning_rate": 2.0034677417065196e-06, "loss": 0.0503, "step": 38056 }, { "epoch": 0.838597013116506, "grad_norm": 0.8357359170913696, "learning_rate": 2.002933268297076e-06, "loss": 0.0519, "step": 38057 }, { "epoch": 0.8386190484060222, "grad_norm": 0.9348927736282349, "learning_rate": 2.0023988610886586e-06, "loss": 0.0536, "step": 38058 }, { "epoch": 0.8386410836955384, "grad_norm": 0.8296562433242798, "learning_rate": 2.0018645200839968e-06, "loss": 0.0633, "step": 38059 }, { "epoch": 0.8386631189850545, "grad_norm": 0.552847146987915, "learning_rate": 2.001330245285804e-06, "loss": 0.0609, "step": 38060 }, { "epoch": 0.8386851542745707, "grad_norm": 0.6549772024154663, "learning_rate": 2.0007960366968125e-06, "loss": 0.061, "step": 38061 }, { "epoch": 0.8387071895640869, "grad_norm": 0.5299373865127563, "learning_rate": 2.0002618943197335e-06, "loss": 0.0652, "step": 38062 }, { "epoch": 0.838729224853603, "grad_norm": 0.4310234487056732, "learning_rate": 1.9997278181572953e-06, "loss": 0.0756, "step": 38063 }, { "epoch": 0.8387512601431192, "grad_norm": 0.45531603693962097, "learning_rate": 1.999193808212212e-06, "loss": 0.0598, "step": 38064 }, { "epoch": 0.8387732954326353, "grad_norm": 0.7706813216209412, "learning_rate": 1.9986598644872046e-06, "loss": 0.0471, "step": 38065 }, { "epoch": 0.8387953307221515, "grad_norm": 0.5140661001205444, "learning_rate": 1.9981259869849994e-06, "loss": 0.052, "step": 38066 }, { "epoch": 0.8388173660116677, "grad_norm": 0.4351250231266022, "learning_rate": 1.9975921757083028e-06, "loss": 0.0755, "step": 38067 }, { "epoch": 0.8388394013011838, "grad_norm": 0.4064750671386719, "learning_rate": 1.997058430659849e-06, "loss": 0.046, "step": 38068 }, { "epoch": 0.8388614365907, "grad_norm": 0.5836353302001953, "learning_rate": 1.996524751842346e-06, "loss": 0.0535, "step": 38069 }, { "epoch": 0.8388834718802162, "grad_norm": 0.5849714279174805, "learning_rate": 1.9959911392585193e-06, "loss": 0.0669, "step": 38070 }, { "epoch": 0.8389055071697323, "grad_norm": 0.4840262532234192, "learning_rate": 1.9954575929110798e-06, "loss": 0.0784, "step": 38071 }, { "epoch": 0.8389275424592485, "grad_norm": 0.4261532425880432, "learning_rate": 1.9949241128027475e-06, "loss": 0.0477, "step": 38072 }, { "epoch": 0.8389495777487647, "grad_norm": 0.6335489749908447, "learning_rate": 1.9943906989362436e-06, "loss": 0.0478, "step": 38073 }, { "epoch": 0.8389716130382808, "grad_norm": 0.3717585504055023, "learning_rate": 1.9938573513142798e-06, "loss": 0.0356, "step": 38074 }, { "epoch": 0.838993648327797, "grad_norm": 0.6266288757324219, "learning_rate": 1.993324069939574e-06, "loss": 0.068, "step": 38075 }, { "epoch": 0.8390156836173132, "grad_norm": 0.7272523045539856, "learning_rate": 1.9927908548148434e-06, "loss": 0.0934, "step": 38076 }, { "epoch": 0.8390377189068293, "grad_norm": 0.20591232180595398, "learning_rate": 1.9922577059428076e-06, "loss": 0.0492, "step": 38077 }, { "epoch": 0.8390597541963455, "grad_norm": 0.9214683771133423, "learning_rate": 1.9917246233261767e-06, "loss": 0.08, "step": 38078 }, { "epoch": 0.8390817894858617, "grad_norm": 0.6176660060882568, "learning_rate": 1.991191606967666e-06, "loss": 0.0607, "step": 38079 }, { "epoch": 0.8391038247753778, "grad_norm": 0.36272844672203064, "learning_rate": 1.9906586568699924e-06, "loss": 0.0357, "step": 38080 }, { "epoch": 0.839125860064894, "grad_norm": 0.47823119163513184, "learning_rate": 1.9901257730358752e-06, "loss": 0.0562, "step": 38081 }, { "epoch": 0.83914789535441, "grad_norm": 0.10409016907215118, "learning_rate": 1.989592955468022e-06, "loss": 0.0387, "step": 38082 }, { "epoch": 0.8391699306439262, "grad_norm": 0.7818261384963989, "learning_rate": 1.9890602041691442e-06, "loss": 0.0697, "step": 38083 }, { "epoch": 0.8391919659334424, "grad_norm": 0.3391343057155609, "learning_rate": 1.9885275191419645e-06, "loss": 0.0481, "step": 38084 }, { "epoch": 0.8392140012229585, "grad_norm": 0.8643283247947693, "learning_rate": 1.9879949003891888e-06, "loss": 0.0615, "step": 38085 }, { "epoch": 0.8392360365124747, "grad_norm": 0.2965032756328583, "learning_rate": 1.987462347913532e-06, "loss": 0.0596, "step": 38086 }, { "epoch": 0.8392580718019909, "grad_norm": 0.6560891270637512, "learning_rate": 1.986929861717709e-06, "loss": 0.0617, "step": 38087 }, { "epoch": 0.839280107091507, "grad_norm": 0.7750993967056274, "learning_rate": 1.986397441804433e-06, "loss": 0.0506, "step": 38088 }, { "epoch": 0.8393021423810232, "grad_norm": 0.8720436096191406, "learning_rate": 1.98586508817641e-06, "loss": 0.0513, "step": 38089 }, { "epoch": 0.8393241776705394, "grad_norm": 0.8603031635284424, "learning_rate": 1.9853328008363534e-06, "loss": 0.0667, "step": 38090 }, { "epoch": 0.8393462129600555, "grad_norm": 0.19355100393295288, "learning_rate": 1.98480057978698e-06, "loss": 0.0383, "step": 38091 }, { "epoch": 0.8393682482495717, "grad_norm": 0.41213715076446533, "learning_rate": 1.984268425030994e-06, "loss": 0.043, "step": 38092 }, { "epoch": 0.8393902835390878, "grad_norm": 0.60166335105896, "learning_rate": 1.9837363365711114e-06, "loss": 0.0605, "step": 38093 }, { "epoch": 0.839412318828604, "grad_norm": 0.7295094132423401, "learning_rate": 1.9832043144100335e-06, "loss": 0.0832, "step": 38094 }, { "epoch": 0.8394343541181202, "grad_norm": 0.6430694460868835, "learning_rate": 1.982672358550482e-06, "loss": 0.0514, "step": 38095 }, { "epoch": 0.8394563894076363, "grad_norm": 0.47141599655151367, "learning_rate": 1.9821404689951573e-06, "loss": 0.0827, "step": 38096 }, { "epoch": 0.8394784246971525, "grad_norm": 0.40736016631126404, "learning_rate": 1.9816086457467757e-06, "loss": 0.0603, "step": 38097 }, { "epoch": 0.8395004599866687, "grad_norm": 0.6142369508743286, "learning_rate": 1.981076888808039e-06, "loss": 0.0714, "step": 38098 }, { "epoch": 0.8395224952761848, "grad_norm": 1.030139446258545, "learning_rate": 1.9805451981816574e-06, "loss": 0.0616, "step": 38099 }, { "epoch": 0.839544530565701, "grad_norm": 0.2780100703239441, "learning_rate": 1.9800135738703464e-06, "loss": 0.0594, "step": 38100 }, { "epoch": 0.8395665658552172, "grad_norm": 0.5843190550804138, "learning_rate": 1.9794820158768027e-06, "loss": 0.0761, "step": 38101 }, { "epoch": 0.8395886011447333, "grad_norm": 0.813368022441864, "learning_rate": 1.9789505242037403e-06, "loss": 0.0466, "step": 38102 }, { "epoch": 0.8396106364342495, "grad_norm": 0.6162282228469849, "learning_rate": 1.9784190988538664e-06, "loss": 0.0602, "step": 38103 }, { "epoch": 0.8396326717237657, "grad_norm": 0.6935145258903503, "learning_rate": 1.977887739829889e-06, "loss": 0.0574, "step": 38104 }, { "epoch": 0.8396547070132818, "grad_norm": 0.6290960311889648, "learning_rate": 1.977356447134509e-06, "loss": 0.0693, "step": 38105 }, { "epoch": 0.839676742302798, "grad_norm": 0.21949313580989838, "learning_rate": 1.9768252207704348e-06, "loss": 0.0482, "step": 38106 }, { "epoch": 0.839698777592314, "grad_norm": 0.48189958930015564, "learning_rate": 1.9762940607403785e-06, "loss": 0.0793, "step": 38107 }, { "epoch": 0.8397208128818302, "grad_norm": 0.47725340723991394, "learning_rate": 1.975762967047035e-06, "loss": 0.051, "step": 38108 }, { "epoch": 0.8397428481713464, "grad_norm": 0.7419757843017578, "learning_rate": 1.9752319396931174e-06, "loss": 0.0535, "step": 38109 }, { "epoch": 0.8397648834608625, "grad_norm": 0.7238023281097412, "learning_rate": 1.9747009786813265e-06, "loss": 0.0642, "step": 38110 }, { "epoch": 0.8397869187503787, "grad_norm": 0.9274887442588806, "learning_rate": 1.9741700840143716e-06, "loss": 0.056, "step": 38111 }, { "epoch": 0.8398089540398949, "grad_norm": 0.7003858685493469, "learning_rate": 1.9736392556949514e-06, "loss": 0.06, "step": 38112 }, { "epoch": 0.839830989329411, "grad_norm": 0.48452097177505493, "learning_rate": 1.973108493725769e-06, "loss": 0.0693, "step": 38113 }, { "epoch": 0.8398530246189272, "grad_norm": 0.3706935942173004, "learning_rate": 1.972577798109537e-06, "loss": 0.046, "step": 38114 }, { "epoch": 0.8398750599084434, "grad_norm": 0.616881251335144, "learning_rate": 1.972047168848946e-06, "loss": 0.078, "step": 38115 }, { "epoch": 0.8398970951979595, "grad_norm": 0.4547468423843384, "learning_rate": 1.9715166059467106e-06, "loss": 0.0375, "step": 38116 }, { "epoch": 0.8399191304874757, "grad_norm": 0.6058623194694519, "learning_rate": 1.9709861094055197e-06, "loss": 0.0697, "step": 38117 }, { "epoch": 0.8399411657769918, "grad_norm": 0.6590108871459961, "learning_rate": 1.97045567922809e-06, "loss": 0.055, "step": 38118 }, { "epoch": 0.839963201066508, "grad_norm": 1.3508551120758057, "learning_rate": 1.9699253154171133e-06, "loss": 0.0803, "step": 38119 }, { "epoch": 0.8399852363560242, "grad_norm": 0.8955751657485962, "learning_rate": 1.969395017975298e-06, "loss": 0.0714, "step": 38120 }, { "epoch": 0.8400072716455403, "grad_norm": 0.23393334448337555, "learning_rate": 1.9688647869053387e-06, "loss": 0.0687, "step": 38121 }, { "epoch": 0.8400293069350565, "grad_norm": 0.7213773131370544, "learning_rate": 1.9683346222099374e-06, "loss": 0.0388, "step": 38122 }, { "epoch": 0.8400513422245727, "grad_norm": 0.7033343315124512, "learning_rate": 1.9678045238917995e-06, "loss": 0.0545, "step": 38123 }, { "epoch": 0.8400733775140888, "grad_norm": 0.9239944219589233, "learning_rate": 1.9672744919536186e-06, "loss": 0.0554, "step": 38124 }, { "epoch": 0.840095412803605, "grad_norm": 0.8250954747200012, "learning_rate": 1.9667445263980984e-06, "loss": 0.0795, "step": 38125 }, { "epoch": 0.8401174480931212, "grad_norm": 0.612550675868988, "learning_rate": 1.966214627227935e-06, "loss": 0.0435, "step": 38126 }, { "epoch": 0.8401394833826373, "grad_norm": 0.5566823482513428, "learning_rate": 1.9656847944458356e-06, "loss": 0.0678, "step": 38127 }, { "epoch": 0.8401615186721535, "grad_norm": 0.7730896472930908, "learning_rate": 1.965155028054489e-06, "loss": 0.0659, "step": 38128 }, { "epoch": 0.8401835539616697, "grad_norm": 0.3241512179374695, "learning_rate": 1.9646253280565964e-06, "loss": 0.056, "step": 38129 }, { "epoch": 0.8402055892511858, "grad_norm": 0.3261812925338745, "learning_rate": 1.964095694454862e-06, "loss": 0.0485, "step": 38130 }, { "epoch": 0.840227624540702, "grad_norm": 0.7562462687492371, "learning_rate": 1.9635661272519743e-06, "loss": 0.0562, "step": 38131 }, { "epoch": 0.840249659830218, "grad_norm": 0.5777836441993713, "learning_rate": 1.963036626450636e-06, "loss": 0.0703, "step": 38132 }, { "epoch": 0.8402716951197342, "grad_norm": 0.5977223515510559, "learning_rate": 1.9625071920535415e-06, "loss": 0.0612, "step": 38133 }, { "epoch": 0.8402937304092504, "grad_norm": 0.6653722524642944, "learning_rate": 1.9619778240633933e-06, "loss": 0.0742, "step": 38134 }, { "epoch": 0.8403157656987665, "grad_norm": 0.4365597367286682, "learning_rate": 1.9614485224828816e-06, "loss": 0.051, "step": 38135 }, { "epoch": 0.8403378009882827, "grad_norm": 0.4671827256679535, "learning_rate": 1.9609192873147036e-06, "loss": 0.0527, "step": 38136 }, { "epoch": 0.8403598362777989, "grad_norm": 0.48690760135650635, "learning_rate": 1.9603901185615573e-06, "loss": 0.0597, "step": 38137 }, { "epoch": 0.840381871567315, "grad_norm": 0.5296772718429565, "learning_rate": 1.959861016226136e-06, "loss": 0.0714, "step": 38138 }, { "epoch": 0.8404039068568312, "grad_norm": 0.530572772026062, "learning_rate": 1.9593319803111372e-06, "loss": 0.0538, "step": 38139 }, { "epoch": 0.8404259421463474, "grad_norm": 0.3471544682979584, "learning_rate": 1.9588030108192468e-06, "loss": 0.0402, "step": 38140 }, { "epoch": 0.8404479774358635, "grad_norm": 0.48091620206832886, "learning_rate": 1.958274107753173e-06, "loss": 0.0538, "step": 38141 }, { "epoch": 0.8404700127253797, "grad_norm": 0.3621135950088501, "learning_rate": 1.9577452711155987e-06, "loss": 0.0721, "step": 38142 }, { "epoch": 0.8404920480148959, "grad_norm": 0.7805392146110535, "learning_rate": 1.957216500909225e-06, "loss": 0.0806, "step": 38143 }, { "epoch": 0.840514083304412, "grad_norm": 0.9057733416557312, "learning_rate": 1.956687797136739e-06, "loss": 0.0697, "step": 38144 }, { "epoch": 0.8405361185939282, "grad_norm": 0.8528553247451782, "learning_rate": 1.956159159800835e-06, "loss": 0.075, "step": 38145 }, { "epoch": 0.8405581538834443, "grad_norm": 0.732175350189209, "learning_rate": 1.9556305889042114e-06, "loss": 0.0954, "step": 38146 }, { "epoch": 0.8405801891729605, "grad_norm": 1.076480507850647, "learning_rate": 1.955102084449553e-06, "loss": 0.07, "step": 38147 }, { "epoch": 0.8406022244624767, "grad_norm": 1.1828988790512085, "learning_rate": 1.954573646439555e-06, "loss": 0.092, "step": 38148 }, { "epoch": 0.8406242597519928, "grad_norm": 0.4568498432636261, "learning_rate": 1.954045274876909e-06, "loss": 0.0779, "step": 38149 }, { "epoch": 0.840646295041509, "grad_norm": 0.6640604138374329, "learning_rate": 1.95351696976431e-06, "loss": 0.0425, "step": 38150 }, { "epoch": 0.8406683303310252, "grad_norm": 0.4591362476348877, "learning_rate": 1.9529887311044397e-06, "loss": 0.0768, "step": 38151 }, { "epoch": 0.8406903656205413, "grad_norm": 0.7332930564880371, "learning_rate": 1.9524605588999973e-06, "loss": 0.072, "step": 38152 }, { "epoch": 0.8407124009100575, "grad_norm": 0.7887837290763855, "learning_rate": 1.95193245315367e-06, "loss": 0.0448, "step": 38153 }, { "epoch": 0.8407344361995737, "grad_norm": 1.0029276609420776, "learning_rate": 1.951404413868147e-06, "loss": 0.0726, "step": 38154 }, { "epoch": 0.8407564714890898, "grad_norm": 0.684324324131012, "learning_rate": 1.9508764410461165e-06, "loss": 0.0562, "step": 38155 }, { "epoch": 0.8407785067786059, "grad_norm": 0.34778422117233276, "learning_rate": 1.9503485346902707e-06, "loss": 0.0696, "step": 38156 }, { "epoch": 0.840800542068122, "grad_norm": 0.5689383745193481, "learning_rate": 1.9498206948033013e-06, "loss": 0.0521, "step": 38157 }, { "epoch": 0.8408225773576382, "grad_norm": 0.41701430082321167, "learning_rate": 1.949292921387889e-06, "loss": 0.0584, "step": 38158 }, { "epoch": 0.8408446126471544, "grad_norm": 0.9986579418182373, "learning_rate": 1.948765214446726e-06, "loss": 0.0501, "step": 38159 }, { "epoch": 0.8408666479366705, "grad_norm": 0.7988520860671997, "learning_rate": 1.9482375739825053e-06, "loss": 0.0502, "step": 38160 }, { "epoch": 0.8408886832261867, "grad_norm": 0.5330277681350708, "learning_rate": 1.9477099999979047e-06, "loss": 0.0404, "step": 38161 }, { "epoch": 0.8409107185157029, "grad_norm": 0.617840051651001, "learning_rate": 1.9471824924956203e-06, "loss": 0.0542, "step": 38162 }, { "epoch": 0.840932753805219, "grad_norm": 0.5114270448684692, "learning_rate": 1.946655051478329e-06, "loss": 0.0458, "step": 38163 }, { "epoch": 0.8409547890947352, "grad_norm": 0.33021822571754456, "learning_rate": 1.946127676948729e-06, "loss": 0.0562, "step": 38164 }, { "epoch": 0.8409768243842514, "grad_norm": 0.48741400241851807, "learning_rate": 1.9456003689094975e-06, "loss": 0.0799, "step": 38165 }, { "epoch": 0.8409988596737675, "grad_norm": 0.3912787139415741, "learning_rate": 1.945073127363327e-06, "loss": 0.0695, "step": 38166 }, { "epoch": 0.8410208949632837, "grad_norm": 0.609641432762146, "learning_rate": 1.9445459523128957e-06, "loss": 0.0601, "step": 38167 }, { "epoch": 0.8410429302527999, "grad_norm": 0.7565122842788696, "learning_rate": 1.944018843760894e-06, "loss": 0.0636, "step": 38168 }, { "epoch": 0.841064965542316, "grad_norm": 0.45607802271842957, "learning_rate": 1.9434918017100075e-06, "loss": 0.0408, "step": 38169 }, { "epoch": 0.8410870008318322, "grad_norm": 0.7122322916984558, "learning_rate": 1.942964826162916e-06, "loss": 0.0454, "step": 38170 }, { "epoch": 0.8411090361213484, "grad_norm": 0.8530943393707275, "learning_rate": 1.9424379171223074e-06, "loss": 0.0528, "step": 38171 }, { "epoch": 0.8411310714108645, "grad_norm": 0.5481643080711365, "learning_rate": 1.9419110745908632e-06, "loss": 0.0686, "step": 38172 }, { "epoch": 0.8411531067003807, "grad_norm": 0.5265812277793884, "learning_rate": 1.9413842985712714e-06, "loss": 0.0612, "step": 38173 }, { "epoch": 0.8411751419898968, "grad_norm": 0.8378902673721313, "learning_rate": 1.940857589066209e-06, "loss": 0.0831, "step": 38174 }, { "epoch": 0.841197177279413, "grad_norm": 0.5127025842666626, "learning_rate": 1.9403309460783612e-06, "loss": 0.057, "step": 38175 }, { "epoch": 0.8412192125689292, "grad_norm": 0.6033411026000977, "learning_rate": 1.9398043696104165e-06, "loss": 0.0839, "step": 38176 }, { "epoch": 0.8412412478584453, "grad_norm": 0.514423668384552, "learning_rate": 1.9392778596650463e-06, "loss": 0.0459, "step": 38177 }, { "epoch": 0.8412632831479615, "grad_norm": 0.5523184537887573, "learning_rate": 1.9387514162449377e-06, "loss": 0.0587, "step": 38178 }, { "epoch": 0.8412853184374777, "grad_norm": 0.6030738949775696, "learning_rate": 1.938225039352772e-06, "loss": 0.0631, "step": 38179 }, { "epoch": 0.8413073537269938, "grad_norm": 0.44094082713127136, "learning_rate": 1.9376987289912362e-06, "loss": 0.0432, "step": 38180 }, { "epoch": 0.8413293890165099, "grad_norm": 0.6666696071624756, "learning_rate": 1.937172485162999e-06, "loss": 0.0614, "step": 38181 }, { "epoch": 0.841351424306026, "grad_norm": 0.3695930540561676, "learning_rate": 1.9366463078707488e-06, "loss": 0.0464, "step": 38182 }, { "epoch": 0.8413734595955422, "grad_norm": 1.170001745223999, "learning_rate": 1.936120197117168e-06, "loss": 0.0728, "step": 38183 }, { "epoch": 0.8413954948850584, "grad_norm": 0.6891947388648987, "learning_rate": 1.9355941529049278e-06, "loss": 0.073, "step": 38184 }, { "epoch": 0.8414175301745745, "grad_norm": 0.397439181804657, "learning_rate": 1.9350681752367146e-06, "loss": 0.0375, "step": 38185 }, { "epoch": 0.8414395654640907, "grad_norm": 0.34047529101371765, "learning_rate": 1.934542264115201e-06, "loss": 0.0693, "step": 38186 }, { "epoch": 0.8414616007536069, "grad_norm": 0.599777102470398, "learning_rate": 1.9340164195430756e-06, "loss": 0.045, "step": 38187 }, { "epoch": 0.841483636043123, "grad_norm": 0.6641154289245605, "learning_rate": 1.933490641523007e-06, "loss": 0.0656, "step": 38188 }, { "epoch": 0.8415056713326392, "grad_norm": 0.3591402769088745, "learning_rate": 1.93296493005768e-06, "loss": 0.0453, "step": 38189 }, { "epoch": 0.8415277066221554, "grad_norm": 0.6241071224212646, "learning_rate": 1.9324392851497648e-06, "loss": 0.0467, "step": 38190 }, { "epoch": 0.8415497419116715, "grad_norm": 0.200018048286438, "learning_rate": 1.93191370680195e-06, "loss": 0.0345, "step": 38191 }, { "epoch": 0.8415717772011877, "grad_norm": 0.252424031496048, "learning_rate": 1.931388195016907e-06, "loss": 0.0434, "step": 38192 }, { "epoch": 0.8415938124907039, "grad_norm": 0.4976974129676819, "learning_rate": 1.9308627497973073e-06, "loss": 0.0578, "step": 38193 }, { "epoch": 0.84161584778022, "grad_norm": 0.3862513303756714, "learning_rate": 1.930337371145832e-06, "loss": 0.0386, "step": 38194 }, { "epoch": 0.8416378830697362, "grad_norm": 0.5212281346321106, "learning_rate": 1.9298120590651575e-06, "loss": 0.0273, "step": 38195 }, { "epoch": 0.8416599183592524, "grad_norm": 0.8441162109375, "learning_rate": 1.929286813557963e-06, "loss": 0.0622, "step": 38196 }, { "epoch": 0.8416819536487685, "grad_norm": 0.31140536069869995, "learning_rate": 1.928761634626911e-06, "loss": 0.0362, "step": 38197 }, { "epoch": 0.8417039889382847, "grad_norm": 0.493567556142807, "learning_rate": 1.9282365222746934e-06, "loss": 0.0522, "step": 38198 }, { "epoch": 0.8417260242278009, "grad_norm": 0.6797961592674255, "learning_rate": 1.9277114765039743e-06, "loss": 0.0704, "step": 38199 }, { "epoch": 0.841748059517317, "grad_norm": 0.6287432312965393, "learning_rate": 1.927186497317432e-06, "loss": 0.0687, "step": 38200 }, { "epoch": 0.8417700948068332, "grad_norm": 0.6217062473297119, "learning_rate": 1.9266615847177373e-06, "loss": 0.0695, "step": 38201 }, { "epoch": 0.8417921300963493, "grad_norm": 0.5451163649559021, "learning_rate": 1.9261367387075645e-06, "loss": 0.0605, "step": 38202 }, { "epoch": 0.8418141653858655, "grad_norm": 0.7005634903907776, "learning_rate": 1.925611959289592e-06, "loss": 0.0683, "step": 38203 }, { "epoch": 0.8418362006753817, "grad_norm": 0.44117188453674316, "learning_rate": 1.9250872464664857e-06, "loss": 0.0538, "step": 38204 }, { "epoch": 0.8418582359648978, "grad_norm": 0.8569350242614746, "learning_rate": 1.9245626002409208e-06, "loss": 0.0752, "step": 38205 }, { "epoch": 0.8418802712544139, "grad_norm": 0.5354956388473511, "learning_rate": 1.9240380206155706e-06, "loss": 0.0671, "step": 38206 }, { "epoch": 0.8419023065439301, "grad_norm": 0.48022618889808655, "learning_rate": 1.9235135075931105e-06, "loss": 0.0495, "step": 38207 }, { "epoch": 0.8419243418334462, "grad_norm": 0.517155647277832, "learning_rate": 1.922989061176203e-06, "loss": 0.0473, "step": 38208 }, { "epoch": 0.8419463771229624, "grad_norm": 0.4632912576198578, "learning_rate": 1.9224646813675262e-06, "loss": 0.0617, "step": 38209 }, { "epoch": 0.8419684124124786, "grad_norm": 0.8529003262519836, "learning_rate": 1.9219403681697527e-06, "loss": 0.0456, "step": 38210 }, { "epoch": 0.8419904477019947, "grad_norm": 0.5820048451423645, "learning_rate": 1.9214161215855474e-06, "loss": 0.0588, "step": 38211 }, { "epoch": 0.8420124829915109, "grad_norm": 0.5183278322219849, "learning_rate": 1.920891941617585e-06, "loss": 0.06, "step": 38212 }, { "epoch": 0.842034518281027, "grad_norm": 0.6750470995903015, "learning_rate": 1.9203678282685273e-06, "loss": 0.0554, "step": 38213 }, { "epoch": 0.8420565535705432, "grad_norm": 0.7264183759689331, "learning_rate": 1.9198437815410564e-06, "loss": 0.0352, "step": 38214 }, { "epoch": 0.8420785888600594, "grad_norm": 0.36795565485954285, "learning_rate": 1.919319801437833e-06, "loss": 0.0457, "step": 38215 }, { "epoch": 0.8421006241495755, "grad_norm": 0.9754547476768494, "learning_rate": 1.918795887961531e-06, "loss": 0.0393, "step": 38216 }, { "epoch": 0.8421226594390917, "grad_norm": 0.4102281332015991, "learning_rate": 1.918272041114812e-06, "loss": 0.0402, "step": 38217 }, { "epoch": 0.8421446947286079, "grad_norm": 0.5284777283668518, "learning_rate": 1.9177482609003495e-06, "loss": 0.0497, "step": 38218 }, { "epoch": 0.842166730018124, "grad_norm": 0.8267419338226318, "learning_rate": 1.9172245473208144e-06, "loss": 0.0747, "step": 38219 }, { "epoch": 0.8421887653076402, "grad_norm": 0.6563687920570374, "learning_rate": 1.9167009003788614e-06, "loss": 0.0718, "step": 38220 }, { "epoch": 0.8422108005971564, "grad_norm": 0.8986582159996033, "learning_rate": 1.9161773200771756e-06, "loss": 0.0626, "step": 38221 }, { "epoch": 0.8422328358866725, "grad_norm": 0.6827445030212402, "learning_rate": 1.91565380641841e-06, "loss": 0.0517, "step": 38222 }, { "epoch": 0.8422548711761887, "grad_norm": 0.7614766955375671, "learning_rate": 1.9151303594052394e-06, "loss": 0.0631, "step": 38223 }, { "epoch": 0.8422769064657049, "grad_norm": 0.9184426069259644, "learning_rate": 1.9146069790403237e-06, "loss": 0.0711, "step": 38224 }, { "epoch": 0.842298941755221, "grad_norm": 0.815292239189148, "learning_rate": 1.9140836653263313e-06, "loss": 0.0554, "step": 38225 }, { "epoch": 0.8423209770447372, "grad_norm": 0.5887890458106995, "learning_rate": 1.913560418265931e-06, "loss": 0.0456, "step": 38226 }, { "epoch": 0.8423430123342533, "grad_norm": 0.6043197512626648, "learning_rate": 1.913037237861783e-06, "loss": 0.0443, "step": 38227 }, { "epoch": 0.8423650476237695, "grad_norm": 0.40123239159584045, "learning_rate": 1.912514124116552e-06, "loss": 0.0471, "step": 38228 }, { "epoch": 0.8423870829132857, "grad_norm": 0.5872371196746826, "learning_rate": 1.9119910770329057e-06, "loss": 0.036, "step": 38229 }, { "epoch": 0.8424091182028017, "grad_norm": 0.7427197098731995, "learning_rate": 1.9114680966135116e-06, "loss": 0.1067, "step": 38230 }, { "epoch": 0.8424311534923179, "grad_norm": 0.7243516445159912, "learning_rate": 1.9109451828610246e-06, "loss": 0.055, "step": 38231 }, { "epoch": 0.8424531887818341, "grad_norm": 0.5397064685821533, "learning_rate": 1.9104223357781126e-06, "loss": 0.0451, "step": 38232 }, { "epoch": 0.8424752240713502, "grad_norm": 0.8351386189460754, "learning_rate": 1.909899555367443e-06, "loss": 0.0758, "step": 38233 }, { "epoch": 0.8424972593608664, "grad_norm": 0.5209008455276489, "learning_rate": 1.909376841631671e-06, "loss": 0.0238, "step": 38234 }, { "epoch": 0.8425192946503826, "grad_norm": 0.719784140586853, "learning_rate": 1.9088541945734656e-06, "loss": 0.0625, "step": 38235 }, { "epoch": 0.8425413299398987, "grad_norm": 0.4342796504497528, "learning_rate": 1.9083316141954784e-06, "loss": 0.066, "step": 38236 }, { "epoch": 0.8425633652294149, "grad_norm": 0.5247017741203308, "learning_rate": 1.907809100500385e-06, "loss": 0.0595, "step": 38237 }, { "epoch": 0.842585400518931, "grad_norm": 0.6281703114509583, "learning_rate": 1.907286653490839e-06, "loss": 0.0533, "step": 38238 }, { "epoch": 0.8426074358084472, "grad_norm": 0.6591628193855286, "learning_rate": 1.9067642731695044e-06, "loss": 0.0612, "step": 38239 }, { "epoch": 0.8426294710979634, "grad_norm": 0.32689720392227173, "learning_rate": 1.906241959539038e-06, "loss": 0.0366, "step": 38240 }, { "epoch": 0.8426515063874795, "grad_norm": 0.5065845251083374, "learning_rate": 1.9057197126021036e-06, "loss": 0.0991, "step": 38241 }, { "epoch": 0.8426735416769957, "grad_norm": 0.7213079333305359, "learning_rate": 1.905197532361363e-06, "loss": 0.0577, "step": 38242 }, { "epoch": 0.8426955769665119, "grad_norm": 0.5840796232223511, "learning_rate": 1.9046754188194653e-06, "loss": 0.0775, "step": 38243 }, { "epoch": 0.842717612256028, "grad_norm": 0.6736655831336975, "learning_rate": 1.9041533719790871e-06, "loss": 0.079, "step": 38244 }, { "epoch": 0.8427396475455442, "grad_norm": 0.9506067633628845, "learning_rate": 1.9036313918428727e-06, "loss": 0.0628, "step": 38245 }, { "epoch": 0.8427616828350604, "grad_norm": 0.8264460563659668, "learning_rate": 1.9031094784134917e-06, "loss": 0.0415, "step": 38246 }, { "epoch": 0.8427837181245765, "grad_norm": 0.43923917412757874, "learning_rate": 1.902587631693592e-06, "loss": 0.0554, "step": 38247 }, { "epoch": 0.8428057534140927, "grad_norm": 0.4308755099773407, "learning_rate": 1.902065851685838e-06, "loss": 0.0583, "step": 38248 }, { "epoch": 0.8428277887036089, "grad_norm": 0.8495953679084778, "learning_rate": 1.9015441383928894e-06, "loss": 0.0538, "step": 38249 }, { "epoch": 0.842849823993125, "grad_norm": 0.491809219121933, "learning_rate": 1.9010224918173974e-06, "loss": 0.0573, "step": 38250 }, { "epoch": 0.8428718592826412, "grad_norm": 0.4841010868549347, "learning_rate": 1.900500911962023e-06, "loss": 0.0496, "step": 38251 }, { "epoch": 0.8428938945721574, "grad_norm": 0.5704709887504578, "learning_rate": 1.8999793988294213e-06, "loss": 0.0502, "step": 38252 }, { "epoch": 0.8429159298616735, "grad_norm": 0.646761417388916, "learning_rate": 1.8994579524222511e-06, "loss": 0.0622, "step": 38253 }, { "epoch": 0.8429379651511897, "grad_norm": 0.6348909139633179, "learning_rate": 1.8989365727431656e-06, "loss": 0.0771, "step": 38254 }, { "epoch": 0.8429600004407057, "grad_norm": 0.5330119729042053, "learning_rate": 1.8984152597948196e-06, "loss": 0.0794, "step": 38255 }, { "epoch": 0.8429820357302219, "grad_norm": 0.7069122195243835, "learning_rate": 1.897894013579874e-06, "loss": 0.056, "step": 38256 }, { "epoch": 0.8430040710197381, "grad_norm": 0.6644111275672913, "learning_rate": 1.8973728341009784e-06, "loss": 0.046, "step": 38257 }, { "epoch": 0.8430261063092542, "grad_norm": 0.41389891505241394, "learning_rate": 1.8968517213607906e-06, "loss": 0.0681, "step": 38258 }, { "epoch": 0.8430481415987704, "grad_norm": 0.39122840762138367, "learning_rate": 1.8963306753619568e-06, "loss": 0.0623, "step": 38259 }, { "epoch": 0.8430701768882866, "grad_norm": 0.6263591051101685, "learning_rate": 1.8958096961071454e-06, "loss": 0.0816, "step": 38260 }, { "epoch": 0.8430922121778027, "grad_norm": 0.5003328919410706, "learning_rate": 1.8952887835989986e-06, "loss": 0.051, "step": 38261 }, { "epoch": 0.8431142474673189, "grad_norm": 0.47659963369369507, "learning_rate": 1.8947679378401761e-06, "loss": 0.0514, "step": 38262 }, { "epoch": 0.843136282756835, "grad_norm": 0.44839346408843994, "learning_rate": 1.8942471588333243e-06, "loss": 0.0599, "step": 38263 }, { "epoch": 0.8431583180463512, "grad_norm": 0.2113785296678543, "learning_rate": 1.893726446581101e-06, "loss": 0.0546, "step": 38264 }, { "epoch": 0.8431803533358674, "grad_norm": 0.3211282789707184, "learning_rate": 1.8932058010861593e-06, "loss": 0.0436, "step": 38265 }, { "epoch": 0.8432023886253835, "grad_norm": 0.5392656922340393, "learning_rate": 1.892685222351143e-06, "loss": 0.1008, "step": 38266 }, { "epoch": 0.8432244239148997, "grad_norm": 0.7015402913093567, "learning_rate": 1.892164710378716e-06, "loss": 0.0643, "step": 38267 }, { "epoch": 0.8432464592044159, "grad_norm": 0.3895798325538635, "learning_rate": 1.8916442651715205e-06, "loss": 0.082, "step": 38268 }, { "epoch": 0.843268494493932, "grad_norm": 0.2842625379562378, "learning_rate": 1.891123886732211e-06, "loss": 0.0458, "step": 38269 }, { "epoch": 0.8432905297834482, "grad_norm": 0.9766772985458374, "learning_rate": 1.8906035750634359e-06, "loss": 0.0834, "step": 38270 }, { "epoch": 0.8433125650729644, "grad_norm": 0.4136207401752472, "learning_rate": 1.8900833301678461e-06, "loss": 0.0598, "step": 38271 }, { "epoch": 0.8433346003624805, "grad_norm": 0.5665217041969299, "learning_rate": 1.889563152048096e-06, "loss": 0.0434, "step": 38272 }, { "epoch": 0.8433566356519967, "grad_norm": 0.5819548964500427, "learning_rate": 1.889043040706827e-06, "loss": 0.0549, "step": 38273 }, { "epoch": 0.8433786709415129, "grad_norm": 0.6929581165313721, "learning_rate": 1.8885229961466921e-06, "loss": 0.0794, "step": 38274 }, { "epoch": 0.843400706231029, "grad_norm": 0.58571457862854, "learning_rate": 1.8880030183703407e-06, "loss": 0.0359, "step": 38275 }, { "epoch": 0.8434227415205452, "grad_norm": 0.5311548709869385, "learning_rate": 1.8874831073804238e-06, "loss": 0.0575, "step": 38276 }, { "epoch": 0.8434447768100614, "grad_norm": 0.5613958239555359, "learning_rate": 1.8869632631795847e-06, "loss": 0.042, "step": 38277 }, { "epoch": 0.8434668120995775, "grad_norm": 0.8384522199630737, "learning_rate": 1.8864434857704742e-06, "loss": 0.074, "step": 38278 }, { "epoch": 0.8434888473890937, "grad_norm": 0.590177059173584, "learning_rate": 1.8859237751557407e-06, "loss": 0.0589, "step": 38279 }, { "epoch": 0.8435108826786097, "grad_norm": 0.505199134349823, "learning_rate": 1.8854041313380265e-06, "loss": 0.0675, "step": 38280 }, { "epoch": 0.8435329179681259, "grad_norm": 0.9364045262336731, "learning_rate": 1.8848845543199866e-06, "loss": 0.0822, "step": 38281 }, { "epoch": 0.8435549532576421, "grad_norm": 0.30824655294418335, "learning_rate": 1.8843650441042553e-06, "loss": 0.0409, "step": 38282 }, { "epoch": 0.8435769885471582, "grad_norm": 0.6469845175743103, "learning_rate": 1.8838456006934922e-06, "loss": 0.0606, "step": 38283 }, { "epoch": 0.8435990238366744, "grad_norm": 0.5397875308990479, "learning_rate": 1.8833262240903338e-06, "loss": 0.0607, "step": 38284 }, { "epoch": 0.8436210591261906, "grad_norm": 0.352841317653656, "learning_rate": 1.8828069142974308e-06, "loss": 0.0414, "step": 38285 }, { "epoch": 0.8436430944157067, "grad_norm": 0.7840979695320129, "learning_rate": 1.8822876713174248e-06, "loss": 0.0434, "step": 38286 }, { "epoch": 0.8436651297052229, "grad_norm": 0.6414477229118347, "learning_rate": 1.88176849515296e-06, "loss": 0.0257, "step": 38287 }, { "epoch": 0.8436871649947391, "grad_norm": 0.9350219368934631, "learning_rate": 1.8812493858066881e-06, "loss": 0.0626, "step": 38288 }, { "epoch": 0.8437092002842552, "grad_norm": 0.84816974401474, "learning_rate": 1.88073034328124e-06, "loss": 0.0465, "step": 38289 }, { "epoch": 0.8437312355737714, "grad_norm": 0.5698111653327942, "learning_rate": 1.8802113675792736e-06, "loss": 0.0587, "step": 38290 }, { "epoch": 0.8437532708632876, "grad_norm": 0.6805934309959412, "learning_rate": 1.8796924587034237e-06, "loss": 0.0499, "step": 38291 }, { "epoch": 0.8437753061528037, "grad_norm": 0.8483965992927551, "learning_rate": 1.8791736166563395e-06, "loss": 0.058, "step": 38292 }, { "epoch": 0.8437973414423199, "grad_norm": 0.2520340383052826, "learning_rate": 1.8786548414406523e-06, "loss": 0.0473, "step": 38293 }, { "epoch": 0.843819376731836, "grad_norm": 0.35070204734802246, "learning_rate": 1.8781361330590185e-06, "loss": 0.0491, "step": 38294 }, { "epoch": 0.8438414120213522, "grad_norm": 0.5598625540733337, "learning_rate": 1.877617491514076e-06, "loss": 0.0779, "step": 38295 }, { "epoch": 0.8438634473108684, "grad_norm": 0.44296184182167053, "learning_rate": 1.877098916808459e-06, "loss": 0.0495, "step": 38296 }, { "epoch": 0.8438854826003845, "grad_norm": 0.7081554532051086, "learning_rate": 1.8765804089448157e-06, "loss": 0.0598, "step": 38297 }, { "epoch": 0.8439075178899007, "grad_norm": 0.6880870461463928, "learning_rate": 1.876061967925784e-06, "loss": 0.0874, "step": 38298 }, { "epoch": 0.8439295531794169, "grad_norm": 0.5049241185188293, "learning_rate": 1.8755435937540117e-06, "loss": 0.0685, "step": 38299 }, { "epoch": 0.843951588468933, "grad_norm": 0.9194851517677307, "learning_rate": 1.8750252864321298e-06, "loss": 0.0539, "step": 38300 }, { "epoch": 0.8439736237584492, "grad_norm": 0.5837582945823669, "learning_rate": 1.8745070459627817e-06, "loss": 0.0828, "step": 38301 }, { "epoch": 0.8439956590479654, "grad_norm": 0.49791741371154785, "learning_rate": 1.8739888723486081e-06, "loss": 0.0443, "step": 38302 }, { "epoch": 0.8440176943374815, "grad_norm": 0.5135390162467957, "learning_rate": 1.8734707655922506e-06, "loss": 0.0613, "step": 38303 }, { "epoch": 0.8440397296269976, "grad_norm": 0.7222862839698792, "learning_rate": 1.8729527256963452e-06, "loss": 0.0495, "step": 38304 }, { "epoch": 0.8440617649165137, "grad_norm": 0.9511208534240723, "learning_rate": 1.8724347526635249e-06, "loss": 0.0799, "step": 38305 }, { "epoch": 0.8440838002060299, "grad_norm": 0.8752340078353882, "learning_rate": 1.8719168464964409e-06, "loss": 0.057, "step": 38306 }, { "epoch": 0.8441058354955461, "grad_norm": 0.5829628705978394, "learning_rate": 1.871399007197721e-06, "loss": 0.0675, "step": 38307 }, { "epoch": 0.8441278707850622, "grad_norm": 0.9727369546890259, "learning_rate": 1.8708812347700083e-06, "loss": 0.1044, "step": 38308 }, { "epoch": 0.8441499060745784, "grad_norm": 0.753049910068512, "learning_rate": 1.870363529215932e-06, "loss": 0.0696, "step": 38309 }, { "epoch": 0.8441719413640946, "grad_norm": 0.29353123903274536, "learning_rate": 1.8698458905381422e-06, "loss": 0.0334, "step": 38310 }, { "epoch": 0.8441939766536107, "grad_norm": 0.5855438709259033, "learning_rate": 1.8693283187392646e-06, "loss": 0.0628, "step": 38311 }, { "epoch": 0.8442160119431269, "grad_norm": 0.40691938996315, "learning_rate": 1.868810813821939e-06, "loss": 0.0772, "step": 38312 }, { "epoch": 0.8442380472326431, "grad_norm": 0.6360146999359131, "learning_rate": 1.8682933757888032e-06, "loss": 0.0825, "step": 38313 }, { "epoch": 0.8442600825221592, "grad_norm": 0.40451058745384216, "learning_rate": 1.8677760046424903e-06, "loss": 0.0438, "step": 38314 }, { "epoch": 0.8442821178116754, "grad_norm": 0.6312245726585388, "learning_rate": 1.8672587003856378e-06, "loss": 0.0556, "step": 38315 }, { "epoch": 0.8443041531011916, "grad_norm": 0.46325892210006714, "learning_rate": 1.8667414630208724e-06, "loss": 0.0284, "step": 38316 }, { "epoch": 0.8443261883907077, "grad_norm": 0.8256558179855347, "learning_rate": 1.8662242925508434e-06, "loss": 0.0702, "step": 38317 }, { "epoch": 0.8443482236802239, "grad_norm": 0.7630531787872314, "learning_rate": 1.8657071889781719e-06, "loss": 0.0934, "step": 38318 }, { "epoch": 0.84437025896974, "grad_norm": 0.4885402321815491, "learning_rate": 1.8651901523055009e-06, "loss": 0.0344, "step": 38319 }, { "epoch": 0.8443922942592562, "grad_norm": 0.22963343560695648, "learning_rate": 1.8646731825354568e-06, "loss": 0.042, "step": 38320 }, { "epoch": 0.8444143295487724, "grad_norm": 0.6490563154220581, "learning_rate": 1.8641562796706758e-06, "loss": 0.0635, "step": 38321 }, { "epoch": 0.8444363648382885, "grad_norm": 1.016282558441162, "learning_rate": 1.8636394437137939e-06, "loss": 0.0626, "step": 38322 }, { "epoch": 0.8444584001278047, "grad_norm": 0.5701899528503418, "learning_rate": 1.8631226746674373e-06, "loss": 0.077, "step": 38323 }, { "epoch": 0.8444804354173209, "grad_norm": 0.49580109119415283, "learning_rate": 1.8626059725342426e-06, "loss": 0.0571, "step": 38324 }, { "epoch": 0.844502470706837, "grad_norm": 0.6810207366943359, "learning_rate": 1.8620893373168391e-06, "loss": 0.0798, "step": 38325 }, { "epoch": 0.8445245059963532, "grad_norm": 0.5773832201957703, "learning_rate": 1.861572769017863e-06, "loss": 0.0994, "step": 38326 }, { "epoch": 0.8445465412858694, "grad_norm": 0.5048925876617432, "learning_rate": 1.861056267639939e-06, "loss": 0.0558, "step": 38327 }, { "epoch": 0.8445685765753855, "grad_norm": 0.5653194785118103, "learning_rate": 1.8605398331857016e-06, "loss": 0.0499, "step": 38328 }, { "epoch": 0.8445906118649016, "grad_norm": 0.17194342613220215, "learning_rate": 1.8600234656577836e-06, "loss": 0.0504, "step": 38329 }, { "epoch": 0.8446126471544178, "grad_norm": 0.5503998398780823, "learning_rate": 1.8595071650588097e-06, "loss": 0.0754, "step": 38330 }, { "epoch": 0.8446346824439339, "grad_norm": 0.43957847356796265, "learning_rate": 1.8589909313914128e-06, "loss": 0.0744, "step": 38331 }, { "epoch": 0.8446567177334501, "grad_norm": 0.7795602679252625, "learning_rate": 1.8584747646582206e-06, "loss": 0.0728, "step": 38332 }, { "epoch": 0.8446787530229662, "grad_norm": 0.3554859757423401, "learning_rate": 1.857958664861868e-06, "loss": 0.0673, "step": 38333 }, { "epoch": 0.8447007883124824, "grad_norm": 0.5120817422866821, "learning_rate": 1.8574426320049743e-06, "loss": 0.0705, "step": 38334 }, { "epoch": 0.8447228236019986, "grad_norm": 0.9124571681022644, "learning_rate": 1.856926666090174e-06, "loss": 0.0644, "step": 38335 }, { "epoch": 0.8447448588915147, "grad_norm": 0.6273823976516724, "learning_rate": 1.856410767120097e-06, "loss": 0.051, "step": 38336 }, { "epoch": 0.8447668941810309, "grad_norm": 0.531372606754303, "learning_rate": 1.8558949350973641e-06, "loss": 0.0682, "step": 38337 }, { "epoch": 0.8447889294705471, "grad_norm": 0.5874939560890198, "learning_rate": 1.8553791700246104e-06, "loss": 0.0501, "step": 38338 }, { "epoch": 0.8448109647600632, "grad_norm": 0.7860226035118103, "learning_rate": 1.8548634719044533e-06, "loss": 0.0568, "step": 38339 }, { "epoch": 0.8448330000495794, "grad_norm": 0.35052841901779175, "learning_rate": 1.8543478407395325e-06, "loss": 0.0411, "step": 38340 }, { "epoch": 0.8448550353390956, "grad_norm": 0.6412070989608765, "learning_rate": 1.8538322765324628e-06, "loss": 0.0666, "step": 38341 }, { "epoch": 0.8448770706286117, "grad_norm": 0.756134033203125, "learning_rate": 1.85331677928588e-06, "loss": 0.101, "step": 38342 }, { "epoch": 0.8448991059181279, "grad_norm": 0.9285744428634644, "learning_rate": 1.8528013490023992e-06, "loss": 0.0588, "step": 38343 }, { "epoch": 0.844921141207644, "grad_norm": 0.4902600646018982, "learning_rate": 1.852285985684653e-06, "loss": 0.0537, "step": 38344 }, { "epoch": 0.8449431764971602, "grad_norm": 0.6789861917495728, "learning_rate": 1.8517706893352659e-06, "loss": 0.054, "step": 38345 }, { "epoch": 0.8449652117866764, "grad_norm": 0.737138569355011, "learning_rate": 1.851255459956861e-06, "loss": 0.078, "step": 38346 }, { "epoch": 0.8449872470761925, "grad_norm": 0.5917004346847534, "learning_rate": 1.850740297552061e-06, "loss": 0.0587, "step": 38347 }, { "epoch": 0.8450092823657087, "grad_norm": 0.44516047835350037, "learning_rate": 1.8502252021234921e-06, "loss": 0.0685, "step": 38348 }, { "epoch": 0.8450313176552249, "grad_norm": 0.5649839639663696, "learning_rate": 1.8497101736737808e-06, "loss": 0.0703, "step": 38349 }, { "epoch": 0.845053352944741, "grad_norm": 0.881189227104187, "learning_rate": 1.8491952122055434e-06, "loss": 0.0646, "step": 38350 }, { "epoch": 0.8450753882342572, "grad_norm": 0.500735342502594, "learning_rate": 1.8486803177214074e-06, "loss": 0.0496, "step": 38351 }, { "epoch": 0.8450974235237734, "grad_norm": 0.7130184769630432, "learning_rate": 1.8481654902239992e-06, "loss": 0.0619, "step": 38352 }, { "epoch": 0.8451194588132895, "grad_norm": 0.5701022148132324, "learning_rate": 1.8476507297159317e-06, "loss": 0.0739, "step": 38353 }, { "epoch": 0.8451414941028056, "grad_norm": 0.4980466961860657, "learning_rate": 1.847136036199833e-06, "loss": 0.047, "step": 38354 }, { "epoch": 0.8451635293923218, "grad_norm": 0.8005576729774475, "learning_rate": 1.846621409678324e-06, "loss": 0.0777, "step": 38355 }, { "epoch": 0.8451855646818379, "grad_norm": 0.6839916706085205, "learning_rate": 1.846106850154028e-06, "loss": 0.0396, "step": 38356 }, { "epoch": 0.8452075999713541, "grad_norm": 0.5881691575050354, "learning_rate": 1.8455923576295607e-06, "loss": 0.0583, "step": 38357 }, { "epoch": 0.8452296352608702, "grad_norm": 0.6746395230293274, "learning_rate": 1.8450779321075456e-06, "loss": 0.0545, "step": 38358 }, { "epoch": 0.8452516705503864, "grad_norm": 0.2696228623390198, "learning_rate": 1.8445635735906051e-06, "loss": 0.0543, "step": 38359 }, { "epoch": 0.8452737058399026, "grad_norm": 0.6897205710411072, "learning_rate": 1.8440492820813542e-06, "loss": 0.0698, "step": 38360 }, { "epoch": 0.8452957411294187, "grad_norm": 0.603248655796051, "learning_rate": 1.8435350575824188e-06, "loss": 0.0687, "step": 38361 }, { "epoch": 0.8453177764189349, "grad_norm": 0.7432861924171448, "learning_rate": 1.8430209000964072e-06, "loss": 0.0971, "step": 38362 }, { "epoch": 0.8453398117084511, "grad_norm": 0.5256255865097046, "learning_rate": 1.8425068096259518e-06, "loss": 0.077, "step": 38363 }, { "epoch": 0.8453618469979672, "grad_norm": 0.5285453796386719, "learning_rate": 1.8419927861736625e-06, "loss": 0.0345, "step": 38364 }, { "epoch": 0.8453838822874834, "grad_norm": 0.6778231859207153, "learning_rate": 1.841478829742162e-06, "loss": 0.0537, "step": 38365 }, { "epoch": 0.8454059175769996, "grad_norm": 0.544036865234375, "learning_rate": 1.8409649403340634e-06, "loss": 0.0633, "step": 38366 }, { "epoch": 0.8454279528665157, "grad_norm": 0.761798083782196, "learning_rate": 1.840451117951986e-06, "loss": 0.0795, "step": 38367 }, { "epoch": 0.8454499881560319, "grad_norm": 0.7200483679771423, "learning_rate": 1.8399373625985516e-06, "loss": 0.068, "step": 38368 }, { "epoch": 0.8454720234455481, "grad_norm": 0.8398966789245605, "learning_rate": 1.839423674276371e-06, "loss": 0.058, "step": 38369 }, { "epoch": 0.8454940587350642, "grad_norm": 0.6801974773406982, "learning_rate": 1.838910052988062e-06, "loss": 0.0692, "step": 38370 }, { "epoch": 0.8455160940245804, "grad_norm": 0.6109153628349304, "learning_rate": 1.838396498736241e-06, "loss": 0.0546, "step": 38371 }, { "epoch": 0.8455381293140966, "grad_norm": 0.6678959727287292, "learning_rate": 1.8378830115235279e-06, "loss": 0.0647, "step": 38372 }, { "epoch": 0.8455601646036127, "grad_norm": 0.4129030704498291, "learning_rate": 1.8373695913525318e-06, "loss": 0.0431, "step": 38373 }, { "epoch": 0.8455821998931289, "grad_norm": 0.4748288094997406, "learning_rate": 1.8368562382258709e-06, "loss": 0.0474, "step": 38374 }, { "epoch": 0.845604235182645, "grad_norm": 0.6784718632698059, "learning_rate": 1.8363429521461628e-06, "loss": 0.0681, "step": 38375 }, { "epoch": 0.8456262704721612, "grad_norm": 0.5996409058570862, "learning_rate": 1.8358297331160156e-06, "loss": 0.0585, "step": 38376 }, { "epoch": 0.8456483057616774, "grad_norm": 0.7741832733154297, "learning_rate": 1.8353165811380472e-06, "loss": 0.0529, "step": 38377 }, { "epoch": 0.8456703410511935, "grad_norm": 0.6138874292373657, "learning_rate": 1.8348034962148708e-06, "loss": 0.0427, "step": 38378 }, { "epoch": 0.8456923763407096, "grad_norm": 0.5858531594276428, "learning_rate": 1.8342904783491037e-06, "loss": 0.0565, "step": 38379 }, { "epoch": 0.8457144116302258, "grad_norm": 0.7687489986419678, "learning_rate": 1.8337775275433511e-06, "loss": 0.0602, "step": 38380 }, { "epoch": 0.8457364469197419, "grad_norm": 1.2777515649795532, "learning_rate": 1.8332646438002304e-06, "loss": 0.0836, "step": 38381 }, { "epoch": 0.8457584822092581, "grad_norm": 0.2638629376888275, "learning_rate": 1.8327518271223564e-06, "loss": 0.0625, "step": 38382 }, { "epoch": 0.8457805174987743, "grad_norm": 1.0814495086669922, "learning_rate": 1.8322390775123338e-06, "loss": 0.0647, "step": 38383 }, { "epoch": 0.8458025527882904, "grad_norm": 0.2958136200904846, "learning_rate": 1.8317263949727837e-06, "loss": 0.0576, "step": 38384 }, { "epoch": 0.8458245880778066, "grad_norm": 0.6461908221244812, "learning_rate": 1.8312137795063055e-06, "loss": 0.0647, "step": 38385 }, { "epoch": 0.8458466233673227, "grad_norm": 0.38827723264694214, "learning_rate": 1.830701231115524e-06, "loss": 0.0535, "step": 38386 }, { "epoch": 0.8458686586568389, "grad_norm": 0.512803852558136, "learning_rate": 1.830188749803039e-06, "loss": 0.0456, "step": 38387 }, { "epoch": 0.8458906939463551, "grad_norm": 0.3900282382965088, "learning_rate": 1.829676335571468e-06, "loss": 0.0527, "step": 38388 }, { "epoch": 0.8459127292358712, "grad_norm": 0.3638819456100464, "learning_rate": 1.8291639884234157e-06, "loss": 0.0296, "step": 38389 }, { "epoch": 0.8459347645253874, "grad_norm": 0.8140835165977478, "learning_rate": 1.8286517083614934e-06, "loss": 0.0519, "step": 38390 }, { "epoch": 0.8459567998149036, "grad_norm": 0.3160612881183624, "learning_rate": 1.8281394953883141e-06, "loss": 0.0719, "step": 38391 }, { "epoch": 0.8459788351044197, "grad_norm": 0.8307247757911682, "learning_rate": 1.8276273495064805e-06, "loss": 0.0836, "step": 38392 }, { "epoch": 0.8460008703939359, "grad_norm": 0.8046895861625671, "learning_rate": 1.8271152707186039e-06, "loss": 0.0479, "step": 38393 }, { "epoch": 0.8460229056834521, "grad_norm": 0.5882339477539062, "learning_rate": 1.826603259027294e-06, "loss": 0.0692, "step": 38394 }, { "epoch": 0.8460449409729682, "grad_norm": 0.3225221633911133, "learning_rate": 1.8260913144351604e-06, "loss": 0.0426, "step": 38395 }, { "epoch": 0.8460669762624844, "grad_norm": 0.6384981274604797, "learning_rate": 1.8255794369448058e-06, "loss": 0.0597, "step": 38396 }, { "epoch": 0.8460890115520006, "grad_norm": 0.8719110488891602, "learning_rate": 1.8250676265588384e-06, "loss": 0.0846, "step": 38397 }, { "epoch": 0.8461110468415167, "grad_norm": 0.4949999749660492, "learning_rate": 1.824555883279871e-06, "loss": 0.0613, "step": 38398 }, { "epoch": 0.8461330821310329, "grad_norm": 0.22802188992500305, "learning_rate": 1.8240442071105011e-06, "loss": 0.0547, "step": 38399 }, { "epoch": 0.846155117420549, "grad_norm": 0.34832122921943665, "learning_rate": 1.8235325980533407e-06, "loss": 0.05, "step": 38400 }, { "epoch": 0.8461771527100652, "grad_norm": 0.4068550169467926, "learning_rate": 1.8230210561109955e-06, "loss": 0.0686, "step": 38401 }, { "epoch": 0.8461991879995814, "grad_norm": 0.400598406791687, "learning_rate": 1.822509581286072e-06, "loss": 0.0486, "step": 38402 }, { "epoch": 0.8462212232890974, "grad_norm": 0.3068365752696991, "learning_rate": 1.8219981735811698e-06, "loss": 0.0561, "step": 38403 }, { "epoch": 0.8462432585786136, "grad_norm": 0.5889378786087036, "learning_rate": 1.8214868329988983e-06, "loss": 0.0636, "step": 38404 }, { "epoch": 0.8462652938681298, "grad_norm": 0.5131834745407104, "learning_rate": 1.8209755595418654e-06, "loss": 0.05, "step": 38405 }, { "epoch": 0.8462873291576459, "grad_norm": 0.5113427639007568, "learning_rate": 1.8204643532126658e-06, "loss": 0.0397, "step": 38406 }, { "epoch": 0.8463093644471621, "grad_norm": 0.6576825976371765, "learning_rate": 1.819953214013914e-06, "loss": 0.0468, "step": 38407 }, { "epoch": 0.8463313997366783, "grad_norm": 0.37162503600120544, "learning_rate": 1.8194421419482e-06, "loss": 0.0421, "step": 38408 }, { "epoch": 0.8463534350261944, "grad_norm": 0.5257437229156494, "learning_rate": 1.8189311370181426e-06, "loss": 0.0513, "step": 38409 }, { "epoch": 0.8463754703157106, "grad_norm": 0.9943320751190186, "learning_rate": 1.818420199226334e-06, "loss": 0.0701, "step": 38410 }, { "epoch": 0.8463975056052268, "grad_norm": 0.6976813077926636, "learning_rate": 1.8179093285753829e-06, "loss": 0.0797, "step": 38411 }, { "epoch": 0.8464195408947429, "grad_norm": 0.5651801228523254, "learning_rate": 1.817398525067881e-06, "loss": 0.0618, "step": 38412 }, { "epoch": 0.8464415761842591, "grad_norm": 0.4323723614215851, "learning_rate": 1.8168877887064444e-06, "loss": 0.0547, "step": 38413 }, { "epoch": 0.8464636114737752, "grad_norm": 1.0442917346954346, "learning_rate": 1.8163771194936646e-06, "loss": 0.0624, "step": 38414 }, { "epoch": 0.8464856467632914, "grad_norm": 0.9019927978515625, "learning_rate": 1.8158665174321493e-06, "loss": 0.0423, "step": 38415 }, { "epoch": 0.8465076820528076, "grad_norm": 0.646097719669342, "learning_rate": 1.8153559825244932e-06, "loss": 0.0447, "step": 38416 }, { "epoch": 0.8465297173423237, "grad_norm": 0.47163134813308716, "learning_rate": 1.8148455147732974e-06, "loss": 0.0417, "step": 38417 }, { "epoch": 0.8465517526318399, "grad_norm": 0.6794173717498779, "learning_rate": 1.8143351141811681e-06, "loss": 0.0714, "step": 38418 }, { "epoch": 0.8465737879213561, "grad_norm": 0.5843369960784912, "learning_rate": 1.8138247807506986e-06, "loss": 0.0637, "step": 38419 }, { "epoch": 0.8465958232108722, "grad_norm": 0.5985611081123352, "learning_rate": 1.8133145144844882e-06, "loss": 0.0587, "step": 38420 }, { "epoch": 0.8466178585003884, "grad_norm": 0.4202861487865448, "learning_rate": 1.8128043153851399e-06, "loss": 0.04, "step": 38421 }, { "epoch": 0.8466398937899046, "grad_norm": 0.38712409138679504, "learning_rate": 1.812294183455253e-06, "loss": 0.0422, "step": 38422 }, { "epoch": 0.8466619290794207, "grad_norm": 0.8151831030845642, "learning_rate": 1.811784118697421e-06, "loss": 0.0794, "step": 38423 }, { "epoch": 0.8466839643689369, "grad_norm": 0.5239474773406982, "learning_rate": 1.8112741211142448e-06, "loss": 0.0327, "step": 38424 }, { "epoch": 0.8467059996584531, "grad_norm": 0.5551902055740356, "learning_rate": 1.8107641907083256e-06, "loss": 0.0695, "step": 38425 }, { "epoch": 0.8467280349479692, "grad_norm": 0.5010325908660889, "learning_rate": 1.8102543274822513e-06, "loss": 0.0476, "step": 38426 }, { "epoch": 0.8467500702374854, "grad_norm": 0.7128795385360718, "learning_rate": 1.8097445314386268e-06, "loss": 0.0571, "step": 38427 }, { "epoch": 0.8467721055270014, "grad_norm": 0.5538749694824219, "learning_rate": 1.8092348025800465e-06, "loss": 0.0798, "step": 38428 }, { "epoch": 0.8467941408165176, "grad_norm": 0.6325779557228088, "learning_rate": 1.80872514090911e-06, "loss": 0.057, "step": 38429 }, { "epoch": 0.8468161761060338, "grad_norm": 0.6064319014549255, "learning_rate": 1.808215546428405e-06, "loss": 0.0527, "step": 38430 }, { "epoch": 0.8468382113955499, "grad_norm": 0.6265968680381775, "learning_rate": 1.8077060191405348e-06, "loss": 0.0577, "step": 38431 }, { "epoch": 0.8468602466850661, "grad_norm": 0.3731420636177063, "learning_rate": 1.8071965590480938e-06, "loss": 0.0603, "step": 38432 }, { "epoch": 0.8468822819745823, "grad_norm": 0.5717840194702148, "learning_rate": 1.8066871661536715e-06, "loss": 0.0614, "step": 38433 }, { "epoch": 0.8469043172640984, "grad_norm": 0.893085241317749, "learning_rate": 1.8061778404598712e-06, "loss": 0.0704, "step": 38434 }, { "epoch": 0.8469263525536146, "grad_norm": 0.600466787815094, "learning_rate": 1.8056685819692752e-06, "loss": 0.0824, "step": 38435 }, { "epoch": 0.8469483878431308, "grad_norm": 0.5181530117988586, "learning_rate": 1.8051593906844905e-06, "loss": 0.0319, "step": 38436 }, { "epoch": 0.8469704231326469, "grad_norm": 0.6484345197677612, "learning_rate": 1.8046502666081011e-06, "loss": 0.0497, "step": 38437 }, { "epoch": 0.8469924584221631, "grad_norm": 0.5700770616531372, "learning_rate": 1.8041412097427085e-06, "loss": 0.0335, "step": 38438 }, { "epoch": 0.8470144937116793, "grad_norm": 0.4456051290035248, "learning_rate": 1.8036322200908955e-06, "loss": 0.0443, "step": 38439 }, { "epoch": 0.8470365290011954, "grad_norm": 0.9781361222267151, "learning_rate": 1.803123297655262e-06, "loss": 0.0927, "step": 38440 }, { "epoch": 0.8470585642907116, "grad_norm": 0.37600746750831604, "learning_rate": 1.8026144424384005e-06, "loss": 0.055, "step": 38441 }, { "epoch": 0.8470805995802277, "grad_norm": 0.5707433819770813, "learning_rate": 1.8021056544428977e-06, "loss": 0.0758, "step": 38442 }, { "epoch": 0.8471026348697439, "grad_norm": 0.36295875906944275, "learning_rate": 1.8015969336713478e-06, "loss": 0.0368, "step": 38443 }, { "epoch": 0.8471246701592601, "grad_norm": 0.7919925451278687, "learning_rate": 1.8010882801263422e-06, "loss": 0.0414, "step": 38444 }, { "epoch": 0.8471467054487762, "grad_norm": 0.6274131536483765, "learning_rate": 1.8005796938104757e-06, "loss": 0.0575, "step": 38445 }, { "epoch": 0.8471687407382924, "grad_norm": 0.41320571303367615, "learning_rate": 1.8000711747263326e-06, "loss": 0.0431, "step": 38446 }, { "epoch": 0.8471907760278086, "grad_norm": 0.5557065010070801, "learning_rate": 1.7995627228765027e-06, "loss": 0.0598, "step": 38447 }, { "epoch": 0.8472128113173247, "grad_norm": 0.45372629165649414, "learning_rate": 1.7990543382635838e-06, "loss": 0.0538, "step": 38448 }, { "epoch": 0.8472348466068409, "grad_norm": 0.5045353174209595, "learning_rate": 1.7985460208901555e-06, "loss": 0.0331, "step": 38449 }, { "epoch": 0.8472568818963571, "grad_norm": 0.6807810664176941, "learning_rate": 1.7980377707588125e-06, "loss": 0.0482, "step": 38450 }, { "epoch": 0.8472789171858732, "grad_norm": 0.615221381187439, "learning_rate": 1.7975295878721426e-06, "loss": 0.0542, "step": 38451 }, { "epoch": 0.8473009524753894, "grad_norm": 0.6916100382804871, "learning_rate": 1.7970214722327372e-06, "loss": 0.0893, "step": 38452 }, { "epoch": 0.8473229877649054, "grad_norm": 0.8717361092567444, "learning_rate": 1.7965134238431774e-06, "loss": 0.0702, "step": 38453 }, { "epoch": 0.8473450230544216, "grad_norm": 0.40062880516052246, "learning_rate": 1.7960054427060563e-06, "loss": 0.0278, "step": 38454 }, { "epoch": 0.8473670583439378, "grad_norm": 0.7093855142593384, "learning_rate": 1.7954975288239633e-06, "loss": 0.0654, "step": 38455 }, { "epoch": 0.8473890936334539, "grad_norm": 0.8844699263572693, "learning_rate": 1.7949896821994782e-06, "loss": 0.0702, "step": 38456 }, { "epoch": 0.8474111289229701, "grad_norm": 0.35531750321388245, "learning_rate": 1.7944819028351956e-06, "loss": 0.0605, "step": 38457 }, { "epoch": 0.8474331642124863, "grad_norm": 0.7047246694564819, "learning_rate": 1.7939741907336898e-06, "loss": 0.061, "step": 38458 }, { "epoch": 0.8474551995020024, "grad_norm": 0.48808109760284424, "learning_rate": 1.7934665458975641e-06, "loss": 0.0387, "step": 38459 }, { "epoch": 0.8474772347915186, "grad_norm": 0.3388398587703705, "learning_rate": 1.7929589683293896e-06, "loss": 0.0584, "step": 38460 }, { "epoch": 0.8474992700810348, "grad_norm": 0.9328795075416565, "learning_rate": 1.7924514580317626e-06, "loss": 0.0731, "step": 38461 }, { "epoch": 0.8475213053705509, "grad_norm": 0.5312249064445496, "learning_rate": 1.7919440150072574e-06, "loss": 0.054, "step": 38462 }, { "epoch": 0.8475433406600671, "grad_norm": 0.6337755918502808, "learning_rate": 1.7914366392584657e-06, "loss": 0.083, "step": 38463 }, { "epoch": 0.8475653759495833, "grad_norm": 0.40991443395614624, "learning_rate": 1.7909293307879737e-06, "loss": 0.046, "step": 38464 }, { "epoch": 0.8475874112390994, "grad_norm": 0.7391517162322998, "learning_rate": 1.7904220895983543e-06, "loss": 0.0528, "step": 38465 }, { "epoch": 0.8476094465286156, "grad_norm": 0.4780029356479645, "learning_rate": 1.789914915692205e-06, "loss": 0.0562, "step": 38466 }, { "epoch": 0.8476314818181317, "grad_norm": 0.38638365268707275, "learning_rate": 1.7894078090720995e-06, "loss": 0.0478, "step": 38467 }, { "epoch": 0.8476535171076479, "grad_norm": 0.6353983879089355, "learning_rate": 1.7889007697406285e-06, "loss": 0.0732, "step": 38468 }, { "epoch": 0.8476755523971641, "grad_norm": 0.4422430098056793, "learning_rate": 1.7883937977003668e-06, "loss": 0.0536, "step": 38469 }, { "epoch": 0.8476975876866802, "grad_norm": 0.652149498462677, "learning_rate": 1.7878868929539005e-06, "loss": 0.047, "step": 38470 }, { "epoch": 0.8477196229761964, "grad_norm": 0.5865625143051147, "learning_rate": 1.7873800555038129e-06, "loss": 0.0641, "step": 38471 }, { "epoch": 0.8477416582657126, "grad_norm": 0.5815000534057617, "learning_rate": 1.7868732853526814e-06, "loss": 0.0653, "step": 38472 }, { "epoch": 0.8477636935552287, "grad_norm": 0.44035106897354126, "learning_rate": 1.7863665825030896e-06, "loss": 0.064, "step": 38473 }, { "epoch": 0.8477857288447449, "grad_norm": 0.621902585029602, "learning_rate": 1.785859946957618e-06, "loss": 0.0419, "step": 38474 }, { "epoch": 0.8478077641342611, "grad_norm": 0.553631067276001, "learning_rate": 1.785353378718852e-06, "loss": 0.0615, "step": 38475 }, { "epoch": 0.8478297994237772, "grad_norm": 0.43041878938674927, "learning_rate": 1.784846877789364e-06, "loss": 0.0613, "step": 38476 }, { "epoch": 0.8478518347132933, "grad_norm": 0.5174992084503174, "learning_rate": 1.784340444171737e-06, "loss": 0.0625, "step": 38477 }, { "epoch": 0.8478738700028094, "grad_norm": 0.6067761778831482, "learning_rate": 1.7838340778685542e-06, "loss": 0.0537, "step": 38478 }, { "epoch": 0.8478959052923256, "grad_norm": 0.45526692271232605, "learning_rate": 1.7833277788823882e-06, "loss": 0.0346, "step": 38479 }, { "epoch": 0.8479179405818418, "grad_norm": 0.5577821731567383, "learning_rate": 1.7828215472158255e-06, "loss": 0.0445, "step": 38480 }, { "epoch": 0.8479399758713579, "grad_norm": 0.2101169377565384, "learning_rate": 1.7823153828714323e-06, "loss": 0.0488, "step": 38481 }, { "epoch": 0.8479620111608741, "grad_norm": 0.609878659248352, "learning_rate": 1.7818092858518015e-06, "loss": 0.0551, "step": 38482 }, { "epoch": 0.8479840464503903, "grad_norm": 0.642217755317688, "learning_rate": 1.7813032561595011e-06, "loss": 0.0467, "step": 38483 }, { "epoch": 0.8480060817399064, "grad_norm": 0.5832338333129883, "learning_rate": 1.7807972937971156e-06, "loss": 0.0716, "step": 38484 }, { "epoch": 0.8480281170294226, "grad_norm": 0.6586833000183105, "learning_rate": 1.780291398767213e-06, "loss": 0.0662, "step": 38485 }, { "epoch": 0.8480501523189388, "grad_norm": 0.35714584589004517, "learning_rate": 1.779785571072378e-06, "loss": 0.0586, "step": 38486 }, { "epoch": 0.8480721876084549, "grad_norm": 0.612252414226532, "learning_rate": 1.7792798107151854e-06, "loss": 0.0558, "step": 38487 }, { "epoch": 0.8480942228979711, "grad_norm": 0.8677986860275269, "learning_rate": 1.7787741176982042e-06, "loss": 0.0679, "step": 38488 }, { "epoch": 0.8481162581874873, "grad_norm": 0.40537598729133606, "learning_rate": 1.7782684920240244e-06, "loss": 0.0369, "step": 38489 }, { "epoch": 0.8481382934770034, "grad_norm": 0.1919841766357422, "learning_rate": 1.7777629336952072e-06, "loss": 0.0843, "step": 38490 }, { "epoch": 0.8481603287665196, "grad_norm": 0.3208825886249542, "learning_rate": 1.777257442714339e-06, "loss": 0.0449, "step": 38491 }, { "epoch": 0.8481823640560358, "grad_norm": 0.7234145998954773, "learning_rate": 1.7767520190839858e-06, "loss": 0.1003, "step": 38492 }, { "epoch": 0.8482043993455519, "grad_norm": 0.8616315126419067, "learning_rate": 1.7762466628067242e-06, "loss": 0.0565, "step": 38493 }, { "epoch": 0.8482264346350681, "grad_norm": 0.3145604133605957, "learning_rate": 1.7757413738851336e-06, "loss": 0.0358, "step": 38494 }, { "epoch": 0.8482484699245842, "grad_norm": 0.7308506369590759, "learning_rate": 1.7752361523217802e-06, "loss": 0.0748, "step": 38495 }, { "epoch": 0.8482705052141004, "grad_norm": 0.4431833326816559, "learning_rate": 1.7747309981192406e-06, "loss": 0.0707, "step": 38496 }, { "epoch": 0.8482925405036166, "grad_norm": 0.4766201674938202, "learning_rate": 1.774225911280089e-06, "loss": 0.0517, "step": 38497 }, { "epoch": 0.8483145757931327, "grad_norm": 0.689185619354248, "learning_rate": 1.7737208918068988e-06, "loss": 0.0554, "step": 38498 }, { "epoch": 0.8483366110826489, "grad_norm": 0.556961715221405, "learning_rate": 1.7732159397022392e-06, "loss": 0.0465, "step": 38499 }, { "epoch": 0.8483586463721651, "grad_norm": 1.0976957082748413, "learning_rate": 1.7727110549686816e-06, "loss": 0.0685, "step": 38500 }, { "epoch": 0.8483806816616812, "grad_norm": 0.5178174376487732, "learning_rate": 1.7722062376088039e-06, "loss": 0.0633, "step": 38501 }, { "epoch": 0.8484027169511973, "grad_norm": 1.2146716117858887, "learning_rate": 1.771701487625169e-06, "loss": 0.0879, "step": 38502 }, { "epoch": 0.8484247522407135, "grad_norm": 0.4212282598018646, "learning_rate": 1.771196805020357e-06, "loss": 0.0432, "step": 38503 }, { "epoch": 0.8484467875302296, "grad_norm": 0.4533444046974182, "learning_rate": 1.7706921897969252e-06, "loss": 0.0491, "step": 38504 }, { "epoch": 0.8484688228197458, "grad_norm": 0.6196814775466919, "learning_rate": 1.7701876419574604e-06, "loss": 0.0881, "step": 38505 }, { "epoch": 0.848490858109262, "grad_norm": 0.4928823709487915, "learning_rate": 1.7696831615045201e-06, "loss": 0.044, "step": 38506 }, { "epoch": 0.8485128933987781, "grad_norm": 0.46918976306915283, "learning_rate": 1.7691787484406807e-06, "loss": 0.06, "step": 38507 }, { "epoch": 0.8485349286882943, "grad_norm": 0.7988999485969543, "learning_rate": 1.768674402768507e-06, "loss": 0.0556, "step": 38508 }, { "epoch": 0.8485569639778104, "grad_norm": 0.5504677295684814, "learning_rate": 1.7681701244905686e-06, "loss": 0.0594, "step": 38509 }, { "epoch": 0.8485789992673266, "grad_norm": 0.6347858905792236, "learning_rate": 1.767665913609438e-06, "loss": 0.0428, "step": 38510 }, { "epoch": 0.8486010345568428, "grad_norm": 0.6641049385070801, "learning_rate": 1.7671617701276754e-06, "loss": 0.0564, "step": 38511 }, { "epoch": 0.8486230698463589, "grad_norm": 1.0952905416488647, "learning_rate": 1.7666576940478601e-06, "loss": 0.0654, "step": 38512 }, { "epoch": 0.8486451051358751, "grad_norm": 0.4703806936740875, "learning_rate": 1.7661536853725502e-06, "loss": 0.0336, "step": 38513 }, { "epoch": 0.8486671404253913, "grad_norm": 0.6780537366867065, "learning_rate": 1.76564974410432e-06, "loss": 0.0651, "step": 38514 }, { "epoch": 0.8486891757149074, "grad_norm": 0.6265119910240173, "learning_rate": 1.7651458702457247e-06, "loss": 0.0503, "step": 38515 }, { "epoch": 0.8487112110044236, "grad_norm": 0.5695368051528931, "learning_rate": 1.7646420637993465e-06, "loss": 0.0462, "step": 38516 }, { "epoch": 0.8487332462939398, "grad_norm": 0.7580283880233765, "learning_rate": 1.7641383247677423e-06, "loss": 0.0853, "step": 38517 }, { "epoch": 0.8487552815834559, "grad_norm": 0.5213039517402649, "learning_rate": 1.7636346531534764e-06, "loss": 0.0505, "step": 38518 }, { "epoch": 0.8487773168729721, "grad_norm": 0.6712924838066101, "learning_rate": 1.7631310489591186e-06, "loss": 0.0664, "step": 38519 }, { "epoch": 0.8487993521624883, "grad_norm": 0.35640132427215576, "learning_rate": 1.7626275121872298e-06, "loss": 0.0368, "step": 38520 }, { "epoch": 0.8488213874520044, "grad_norm": 0.7085921764373779, "learning_rate": 1.7621240428403833e-06, "loss": 0.0614, "step": 38521 }, { "epoch": 0.8488434227415206, "grad_norm": 0.9226165413856506, "learning_rate": 1.7616206409211321e-06, "loss": 0.065, "step": 38522 }, { "epoch": 0.8488654580310367, "grad_norm": 0.5020841360092163, "learning_rate": 1.7611173064320474e-06, "loss": 0.0715, "step": 38523 }, { "epoch": 0.8488874933205529, "grad_norm": 0.5540801882743835, "learning_rate": 1.760614039375692e-06, "loss": 0.0578, "step": 38524 }, { "epoch": 0.8489095286100691, "grad_norm": 0.3116494119167328, "learning_rate": 1.7601108397546307e-06, "loss": 0.0692, "step": 38525 }, { "epoch": 0.8489315638995852, "grad_norm": 0.44550177454948425, "learning_rate": 1.7596077075714228e-06, "loss": 0.0387, "step": 38526 }, { "epoch": 0.8489535991891013, "grad_norm": 0.8091844320297241, "learning_rate": 1.7591046428286317e-06, "loss": 0.0823, "step": 38527 }, { "epoch": 0.8489756344786175, "grad_norm": 0.32576996088027954, "learning_rate": 1.758601645528825e-06, "loss": 0.0385, "step": 38528 }, { "epoch": 0.8489976697681336, "grad_norm": 0.6392043828964233, "learning_rate": 1.7580987156745576e-06, "loss": 0.0537, "step": 38529 }, { "epoch": 0.8490197050576498, "grad_norm": 0.5682752132415771, "learning_rate": 1.7575958532683972e-06, "loss": 0.0621, "step": 38530 }, { "epoch": 0.849041740347166, "grad_norm": 0.6633381843566895, "learning_rate": 1.7570930583128968e-06, "loss": 0.0605, "step": 38531 }, { "epoch": 0.8490637756366821, "grad_norm": 1.063713788986206, "learning_rate": 1.756590330810628e-06, "loss": 0.0435, "step": 38532 }, { "epoch": 0.8490858109261983, "grad_norm": 0.5030453205108643, "learning_rate": 1.7560876707641433e-06, "loss": 0.0437, "step": 38533 }, { "epoch": 0.8491078462157144, "grad_norm": 0.4210740327835083, "learning_rate": 1.7555850781760059e-06, "loss": 0.0607, "step": 38534 }, { "epoch": 0.8491298815052306, "grad_norm": 0.5536154508590698, "learning_rate": 1.7550825530487803e-06, "loss": 0.0565, "step": 38535 }, { "epoch": 0.8491519167947468, "grad_norm": 1.227859377861023, "learning_rate": 1.754580095385016e-06, "loss": 0.0786, "step": 38536 }, { "epoch": 0.8491739520842629, "grad_norm": 0.3639546036720276, "learning_rate": 1.754077705187283e-06, "loss": 0.043, "step": 38537 }, { "epoch": 0.8491959873737791, "grad_norm": 0.5961859226226807, "learning_rate": 1.753575382458129e-06, "loss": 0.063, "step": 38538 }, { "epoch": 0.8492180226632953, "grad_norm": 0.7487759590148926, "learning_rate": 1.7530731272001255e-06, "loss": 0.0616, "step": 38539 }, { "epoch": 0.8492400579528114, "grad_norm": 0.6482248902320862, "learning_rate": 1.7525709394158202e-06, "loss": 0.0559, "step": 38540 }, { "epoch": 0.8492620932423276, "grad_norm": 0.5555103421211243, "learning_rate": 1.7520688191077794e-06, "loss": 0.0501, "step": 38541 }, { "epoch": 0.8492841285318438, "grad_norm": 0.7391870021820068, "learning_rate": 1.7515667662785513e-06, "loss": 0.0685, "step": 38542 }, { "epoch": 0.8493061638213599, "grad_norm": 0.3841172754764557, "learning_rate": 1.7510647809306984e-06, "loss": 0.0543, "step": 38543 }, { "epoch": 0.8493281991108761, "grad_norm": 0.6114612817764282, "learning_rate": 1.7505628630667808e-06, "loss": 0.0645, "step": 38544 }, { "epoch": 0.8493502344003923, "grad_norm": 0.8314770460128784, "learning_rate": 1.750061012689348e-06, "loss": 0.075, "step": 38545 }, { "epoch": 0.8493722696899084, "grad_norm": 0.6483226418495178, "learning_rate": 1.749559229800961e-06, "loss": 0.0749, "step": 38546 }, { "epoch": 0.8493943049794246, "grad_norm": 0.6635805368423462, "learning_rate": 1.7490575144041732e-06, "loss": 0.0528, "step": 38547 }, { "epoch": 0.8494163402689408, "grad_norm": 0.6734573841094971, "learning_rate": 1.748555866501544e-06, "loss": 0.0601, "step": 38548 }, { "epoch": 0.8494383755584569, "grad_norm": 0.8797471523284912, "learning_rate": 1.7480542860956228e-06, "loss": 0.07, "step": 38549 }, { "epoch": 0.8494604108479731, "grad_norm": 0.6829425692558289, "learning_rate": 1.7475527731889679e-06, "loss": 0.0881, "step": 38550 }, { "epoch": 0.8494824461374892, "grad_norm": 0.7435855865478516, "learning_rate": 1.7470513277841354e-06, "loss": 0.0612, "step": 38551 }, { "epoch": 0.8495044814270053, "grad_norm": 0.5740125775337219, "learning_rate": 1.7465499498836751e-06, "loss": 0.0594, "step": 38552 }, { "epoch": 0.8495265167165215, "grad_norm": 0.3980356454849243, "learning_rate": 1.7460486394901448e-06, "loss": 0.0752, "step": 38553 }, { "epoch": 0.8495485520060376, "grad_norm": 0.6311929821968079, "learning_rate": 1.7455473966060909e-06, "loss": 0.067, "step": 38554 }, { "epoch": 0.8495705872955538, "grad_norm": 0.40807077288627625, "learning_rate": 1.7450462212340778e-06, "loss": 0.0341, "step": 38555 }, { "epoch": 0.84959262258507, "grad_norm": 0.7945297956466675, "learning_rate": 1.7445451133766487e-06, "loss": 0.0739, "step": 38556 }, { "epoch": 0.8496146578745861, "grad_norm": 0.6325340867042542, "learning_rate": 1.7440440730363617e-06, "loss": 0.0479, "step": 38557 }, { "epoch": 0.8496366931641023, "grad_norm": 0.5637466907501221, "learning_rate": 1.7435431002157676e-06, "loss": 0.0562, "step": 38558 }, { "epoch": 0.8496587284536185, "grad_norm": 0.6455952525138855, "learning_rate": 1.743042194917413e-06, "loss": 0.0697, "step": 38559 }, { "epoch": 0.8496807637431346, "grad_norm": 0.7379606366157532, "learning_rate": 1.7425413571438593e-06, "loss": 0.0684, "step": 38560 }, { "epoch": 0.8497027990326508, "grad_norm": 0.3422072231769562, "learning_rate": 1.742040586897644e-06, "loss": 0.0645, "step": 38561 }, { "epoch": 0.8497248343221669, "grad_norm": 0.41908425092697144, "learning_rate": 1.7415398841813307e-06, "loss": 0.0453, "step": 38562 }, { "epoch": 0.8497468696116831, "grad_norm": 0.6012936234474182, "learning_rate": 1.7410392489974637e-06, "loss": 0.0585, "step": 38563 }, { "epoch": 0.8497689049011993, "grad_norm": 0.5201588869094849, "learning_rate": 1.7405386813485957e-06, "loss": 0.0503, "step": 38564 }, { "epoch": 0.8497909401907154, "grad_norm": 0.754150927066803, "learning_rate": 1.74003818123727e-06, "loss": 0.0717, "step": 38565 }, { "epoch": 0.8498129754802316, "grad_norm": 0.3773297071456909, "learning_rate": 1.739537748666043e-06, "loss": 0.0338, "step": 38566 }, { "epoch": 0.8498350107697478, "grad_norm": 0.7187342643737793, "learning_rate": 1.7390373836374624e-06, "loss": 0.0709, "step": 38567 }, { "epoch": 0.8498570460592639, "grad_norm": 0.704555094242096, "learning_rate": 1.7385370861540728e-06, "loss": 0.0679, "step": 38568 }, { "epoch": 0.8498790813487801, "grad_norm": 0.4923480153083801, "learning_rate": 1.7380368562184257e-06, "loss": 0.031, "step": 38569 }, { "epoch": 0.8499011166382963, "grad_norm": 0.30049392580986023, "learning_rate": 1.7375366938330689e-06, "loss": 0.0485, "step": 38570 }, { "epoch": 0.8499231519278124, "grad_norm": 0.5641633868217468, "learning_rate": 1.7370365990005522e-06, "loss": 0.0546, "step": 38571 }, { "epoch": 0.8499451872173286, "grad_norm": 0.6066099405288696, "learning_rate": 1.7365365717234165e-06, "loss": 0.0408, "step": 38572 }, { "epoch": 0.8499672225068448, "grad_norm": 0.4407462477684021, "learning_rate": 1.736036612004212e-06, "loss": 0.0601, "step": 38573 }, { "epoch": 0.8499892577963609, "grad_norm": 0.6143320202827454, "learning_rate": 1.7355367198454897e-06, "loss": 0.0639, "step": 38574 }, { "epoch": 0.8500112930858771, "grad_norm": 0.7499704360961914, "learning_rate": 1.7350368952497892e-06, "loss": 0.0451, "step": 38575 }, { "epoch": 0.8500333283753931, "grad_norm": 0.7802628874778748, "learning_rate": 1.7345371382196618e-06, "loss": 0.0745, "step": 38576 }, { "epoch": 0.8500553636649093, "grad_norm": 0.7168846726417542, "learning_rate": 1.7340374487576438e-06, "loss": 0.0654, "step": 38577 }, { "epoch": 0.8500773989544255, "grad_norm": 0.49027180671691895, "learning_rate": 1.7335378268662932e-06, "loss": 0.0498, "step": 38578 }, { "epoch": 0.8500994342439416, "grad_norm": 0.5306814312934875, "learning_rate": 1.7330382725481465e-06, "loss": 0.0622, "step": 38579 }, { "epoch": 0.8501214695334578, "grad_norm": 0.13821981847286224, "learning_rate": 1.7325387858057478e-06, "loss": 0.0534, "step": 38580 }, { "epoch": 0.850143504822974, "grad_norm": 0.9196635484695435, "learning_rate": 1.732039366641649e-06, "loss": 0.0809, "step": 38581 }, { "epoch": 0.8501655401124901, "grad_norm": 0.4385966658592224, "learning_rate": 1.7315400150583843e-06, "loss": 0.0352, "step": 38582 }, { "epoch": 0.8501875754020063, "grad_norm": 0.5431557297706604, "learning_rate": 1.7310407310585053e-06, "loss": 0.064, "step": 38583 }, { "epoch": 0.8502096106915225, "grad_norm": 0.5294890999794006, "learning_rate": 1.7305415146445447e-06, "loss": 0.054, "step": 38584 }, { "epoch": 0.8502316459810386, "grad_norm": 0.8051404356956482, "learning_rate": 1.7300423658190574e-06, "loss": 0.0715, "step": 38585 }, { "epoch": 0.8502536812705548, "grad_norm": 0.6090025305747986, "learning_rate": 1.7295432845845777e-06, "loss": 0.0509, "step": 38586 }, { "epoch": 0.850275716560071, "grad_norm": 0.6751923561096191, "learning_rate": 1.729044270943652e-06, "loss": 0.0839, "step": 38587 }, { "epoch": 0.8502977518495871, "grad_norm": 0.31817924976348877, "learning_rate": 1.7285453248988186e-06, "loss": 0.0373, "step": 38588 }, { "epoch": 0.8503197871391033, "grad_norm": 0.7163800001144409, "learning_rate": 1.7280464464526203e-06, "loss": 0.0582, "step": 38589 }, { "epoch": 0.8503418224286194, "grad_norm": 1.0196551084518433, "learning_rate": 1.7275476356076014e-06, "loss": 0.064, "step": 38590 }, { "epoch": 0.8503638577181356, "grad_norm": 0.6148422360420227, "learning_rate": 1.727048892366297e-06, "loss": 0.0713, "step": 38591 }, { "epoch": 0.8503858930076518, "grad_norm": 0.5230989456176758, "learning_rate": 1.7265502167312496e-06, "loss": 0.0563, "step": 38592 }, { "epoch": 0.8504079282971679, "grad_norm": 0.5298547148704529, "learning_rate": 1.726051608705001e-06, "loss": 0.0667, "step": 38593 }, { "epoch": 0.8504299635866841, "grad_norm": 0.8875837326049805, "learning_rate": 1.7255530682900921e-06, "loss": 0.0532, "step": 38594 }, { "epoch": 0.8504519988762003, "grad_norm": 0.5092689394950867, "learning_rate": 1.7250545954890562e-06, "loss": 0.0561, "step": 38595 }, { "epoch": 0.8504740341657164, "grad_norm": 0.7524545788764954, "learning_rate": 1.724556190304436e-06, "loss": 0.0515, "step": 38596 }, { "epoch": 0.8504960694552326, "grad_norm": 0.7869317531585693, "learning_rate": 1.724057852738773e-06, "loss": 0.0762, "step": 38597 }, { "epoch": 0.8505181047447488, "grad_norm": 0.43358126282691956, "learning_rate": 1.7235595827945982e-06, "loss": 0.0463, "step": 38598 }, { "epoch": 0.8505401400342649, "grad_norm": 0.4475994110107422, "learning_rate": 1.7230613804744549e-06, "loss": 0.0635, "step": 38599 }, { "epoch": 0.8505621753237811, "grad_norm": 0.5105885863304138, "learning_rate": 1.7225632457808793e-06, "loss": 0.0529, "step": 38600 }, { "epoch": 0.8505842106132971, "grad_norm": 0.5606217384338379, "learning_rate": 1.7220651787164127e-06, "loss": 0.0599, "step": 38601 }, { "epoch": 0.8506062459028133, "grad_norm": 0.8251015543937683, "learning_rate": 1.7215671792835862e-06, "loss": 0.0824, "step": 38602 }, { "epoch": 0.8506282811923295, "grad_norm": 0.4451574981212616, "learning_rate": 1.7210692474849366e-06, "loss": 0.0719, "step": 38603 }, { "epoch": 0.8506503164818456, "grad_norm": 0.3924141824245453, "learning_rate": 1.7205713833230064e-06, "loss": 0.0432, "step": 38604 }, { "epoch": 0.8506723517713618, "grad_norm": 0.4057343304157257, "learning_rate": 1.7200735868003237e-06, "loss": 0.0555, "step": 38605 }, { "epoch": 0.850694387060878, "grad_norm": 0.39462512731552124, "learning_rate": 1.71957585791943e-06, "loss": 0.0382, "step": 38606 }, { "epoch": 0.8507164223503941, "grad_norm": 0.7324191331863403, "learning_rate": 1.7190781966828528e-06, "loss": 0.0736, "step": 38607 }, { "epoch": 0.8507384576399103, "grad_norm": 0.18917757272720337, "learning_rate": 1.7185806030931372e-06, "loss": 0.0484, "step": 38608 }, { "epoch": 0.8507604929294265, "grad_norm": 0.5211142897605896, "learning_rate": 1.7180830771528094e-06, "loss": 0.0799, "step": 38609 }, { "epoch": 0.8507825282189426, "grad_norm": 0.7086820602416992, "learning_rate": 1.7175856188644107e-06, "loss": 0.0595, "step": 38610 }, { "epoch": 0.8508045635084588, "grad_norm": 0.4232185184955597, "learning_rate": 1.7170882282304689e-06, "loss": 0.0694, "step": 38611 }, { "epoch": 0.850826598797975, "grad_norm": 0.3981971740722656, "learning_rate": 1.716590905253519e-06, "loss": 0.0484, "step": 38612 }, { "epoch": 0.8508486340874911, "grad_norm": 0.4266200363636017, "learning_rate": 1.7160936499360969e-06, "loss": 0.0489, "step": 38613 }, { "epoch": 0.8508706693770073, "grad_norm": 0.6962670683860779, "learning_rate": 1.7155964622807307e-06, "loss": 0.0606, "step": 38614 }, { "epoch": 0.8508927046665234, "grad_norm": 0.431741327047348, "learning_rate": 1.7150993422899553e-06, "loss": 0.0469, "step": 38615 }, { "epoch": 0.8509147399560396, "grad_norm": 0.517782986164093, "learning_rate": 1.7146022899663032e-06, "loss": 0.0581, "step": 38616 }, { "epoch": 0.8509367752455558, "grad_norm": 0.5129032731056213, "learning_rate": 1.714105305312308e-06, "loss": 0.0669, "step": 38617 }, { "epoch": 0.8509588105350719, "grad_norm": 0.42725539207458496, "learning_rate": 1.7136083883304971e-06, "loss": 0.0342, "step": 38618 }, { "epoch": 0.8509808458245881, "grad_norm": 0.70991051197052, "learning_rate": 1.713111539023402e-06, "loss": 0.0755, "step": 38619 }, { "epoch": 0.8510028811141043, "grad_norm": 0.3500717878341675, "learning_rate": 1.712614757393559e-06, "loss": 0.0742, "step": 38620 }, { "epoch": 0.8510249164036204, "grad_norm": 0.711743175983429, "learning_rate": 1.7121180434434914e-06, "loss": 0.0632, "step": 38621 }, { "epoch": 0.8510469516931366, "grad_norm": 1.1132296323776245, "learning_rate": 1.7116213971757317e-06, "loss": 0.0957, "step": 38622 }, { "epoch": 0.8510689869826528, "grad_norm": 0.4525659680366516, "learning_rate": 1.7111248185928096e-06, "loss": 0.0482, "step": 38623 }, { "epoch": 0.8510910222721689, "grad_norm": 1.153996229171753, "learning_rate": 1.7106283076972583e-06, "loss": 0.0511, "step": 38624 }, { "epoch": 0.8511130575616851, "grad_norm": 0.4663703441619873, "learning_rate": 1.7101318644916009e-06, "loss": 0.0927, "step": 38625 }, { "epoch": 0.8511350928512011, "grad_norm": 0.43985825777053833, "learning_rate": 1.7096354889783683e-06, "loss": 0.0579, "step": 38626 }, { "epoch": 0.8511571281407173, "grad_norm": 0.6981560587882996, "learning_rate": 1.7091391811600871e-06, "loss": 0.0638, "step": 38627 }, { "epoch": 0.8511791634302335, "grad_norm": 0.5297853350639343, "learning_rate": 1.7086429410392918e-06, "loss": 0.05, "step": 38628 }, { "epoch": 0.8512011987197496, "grad_norm": 0.8939670920372009, "learning_rate": 1.7081467686185054e-06, "loss": 0.0652, "step": 38629 }, { "epoch": 0.8512232340092658, "grad_norm": 0.8262145519256592, "learning_rate": 1.7076506639002493e-06, "loss": 0.075, "step": 38630 }, { "epoch": 0.851245269298782, "grad_norm": 0.92997145652771, "learning_rate": 1.707154626887063e-06, "loss": 0.0979, "step": 38631 }, { "epoch": 0.8512673045882981, "grad_norm": 0.34979188442230225, "learning_rate": 1.7066586575814614e-06, "loss": 0.0486, "step": 38632 }, { "epoch": 0.8512893398778143, "grad_norm": 0.6303972005844116, "learning_rate": 1.7061627559859804e-06, "loss": 0.0693, "step": 38633 }, { "epoch": 0.8513113751673305, "grad_norm": 0.8777409195899963, "learning_rate": 1.705666922103135e-06, "loss": 0.0624, "step": 38634 }, { "epoch": 0.8513334104568466, "grad_norm": 0.5846379399299622, "learning_rate": 1.7051711559354616e-06, "loss": 0.0738, "step": 38635 }, { "epoch": 0.8513554457463628, "grad_norm": 0.7015253305435181, "learning_rate": 1.7046754574854794e-06, "loss": 0.0599, "step": 38636 }, { "epoch": 0.851377481035879, "grad_norm": 0.6746608018875122, "learning_rate": 1.7041798267557169e-06, "loss": 0.0712, "step": 38637 }, { "epoch": 0.8513995163253951, "grad_norm": 0.5146448016166687, "learning_rate": 1.7036842637486931e-06, "loss": 0.0387, "step": 38638 }, { "epoch": 0.8514215516149113, "grad_norm": 0.4630868434906006, "learning_rate": 1.7031887684669351e-06, "loss": 0.0485, "step": 38639 }, { "epoch": 0.8514435869044275, "grad_norm": 0.6825539469718933, "learning_rate": 1.7026933409129702e-06, "loss": 0.0564, "step": 38640 }, { "epoch": 0.8514656221939436, "grad_norm": 0.3421584367752075, "learning_rate": 1.7021979810893168e-06, "loss": 0.0356, "step": 38641 }, { "epoch": 0.8514876574834598, "grad_norm": 0.5263514518737793, "learning_rate": 1.7017026889984993e-06, "loss": 0.0744, "step": 38642 }, { "epoch": 0.851509692772976, "grad_norm": 0.5402172803878784, "learning_rate": 1.7012074646430408e-06, "loss": 0.0493, "step": 38643 }, { "epoch": 0.8515317280624921, "grad_norm": 0.43516236543655396, "learning_rate": 1.7007123080254678e-06, "loss": 0.0497, "step": 38644 }, { "epoch": 0.8515537633520083, "grad_norm": 0.9383330345153809, "learning_rate": 1.7002172191482944e-06, "loss": 0.0745, "step": 38645 }, { "epoch": 0.8515757986415244, "grad_norm": 0.8265135288238525, "learning_rate": 1.6997221980140477e-06, "loss": 0.0636, "step": 38646 }, { "epoch": 0.8515978339310406, "grad_norm": 0.6555780172348022, "learning_rate": 1.69922724462525e-06, "loss": 0.0558, "step": 38647 }, { "epoch": 0.8516198692205568, "grad_norm": 0.6105058193206787, "learning_rate": 1.6987323589844179e-06, "loss": 0.0649, "step": 38648 }, { "epoch": 0.8516419045100729, "grad_norm": 0.4141131639480591, "learning_rate": 1.698237541094076e-06, "loss": 0.0236, "step": 38649 }, { "epoch": 0.851663939799589, "grad_norm": 0.4204287528991699, "learning_rate": 1.6977427909567405e-06, "loss": 0.0673, "step": 38650 }, { "epoch": 0.8516859750891052, "grad_norm": 0.7348194718360901, "learning_rate": 1.6972481085749398e-06, "loss": 0.0387, "step": 38651 }, { "epoch": 0.8517080103786213, "grad_norm": 0.858566403388977, "learning_rate": 1.696753493951183e-06, "loss": 0.054, "step": 38652 }, { "epoch": 0.8517300456681375, "grad_norm": 0.6580979228019714, "learning_rate": 1.6962589470879953e-06, "loss": 0.0569, "step": 38653 }, { "epoch": 0.8517520809576536, "grad_norm": 0.5511595606803894, "learning_rate": 1.6957644679878993e-06, "loss": 0.0597, "step": 38654 }, { "epoch": 0.8517741162471698, "grad_norm": 0.8604450821876526, "learning_rate": 1.695270056653403e-06, "loss": 0.0852, "step": 38655 }, { "epoch": 0.851796151536686, "grad_norm": 0.5811108350753784, "learning_rate": 1.6947757130870362e-06, "loss": 0.0878, "step": 38656 }, { "epoch": 0.8518181868262021, "grad_norm": 0.47060075402259827, "learning_rate": 1.6942814372913051e-06, "loss": 0.0728, "step": 38657 }, { "epoch": 0.8518402221157183, "grad_norm": 0.5896917581558228, "learning_rate": 1.6937872292687396e-06, "loss": 0.0641, "step": 38658 }, { "epoch": 0.8518622574052345, "grad_norm": 0.4550018608570099, "learning_rate": 1.6932930890218473e-06, "loss": 0.0472, "step": 38659 }, { "epoch": 0.8518842926947506, "grad_norm": 0.43843501806259155, "learning_rate": 1.6927990165531531e-06, "loss": 0.0559, "step": 38660 }, { "epoch": 0.8519063279842668, "grad_norm": 0.6775712370872498, "learning_rate": 1.6923050118651651e-06, "loss": 0.102, "step": 38661 }, { "epoch": 0.851928363273783, "grad_norm": 0.25677186250686646, "learning_rate": 1.691811074960406e-06, "loss": 0.0565, "step": 38662 }, { "epoch": 0.8519503985632991, "grad_norm": 0.4334218204021454, "learning_rate": 1.6913172058413905e-06, "loss": 0.0625, "step": 38663 }, { "epoch": 0.8519724338528153, "grad_norm": 0.6415429711341858, "learning_rate": 1.6908234045106302e-06, "loss": 0.0581, "step": 38664 }, { "epoch": 0.8519944691423315, "grad_norm": 0.36301979422569275, "learning_rate": 1.6903296709706445e-06, "loss": 0.0927, "step": 38665 }, { "epoch": 0.8520165044318476, "grad_norm": 0.6444920301437378, "learning_rate": 1.6898360052239464e-06, "loss": 0.0698, "step": 38666 }, { "epoch": 0.8520385397213638, "grad_norm": 0.4087928235530853, "learning_rate": 1.689342407273054e-06, "loss": 0.0467, "step": 38667 }, { "epoch": 0.85206057501088, "grad_norm": 0.8109601140022278, "learning_rate": 1.6888488771204752e-06, "loss": 0.0666, "step": 38668 }, { "epoch": 0.8520826103003961, "grad_norm": 0.6326377391815186, "learning_rate": 1.688355414768728e-06, "loss": 0.0832, "step": 38669 }, { "epoch": 0.8521046455899123, "grad_norm": 0.5507286190986633, "learning_rate": 1.687862020220327e-06, "loss": 0.0625, "step": 38670 }, { "epoch": 0.8521266808794284, "grad_norm": 0.8074893355369568, "learning_rate": 1.6873686934777803e-06, "loss": 0.0646, "step": 38671 }, { "epoch": 0.8521487161689446, "grad_norm": 0.793631911277771, "learning_rate": 1.6868754345436028e-06, "loss": 0.0688, "step": 38672 }, { "epoch": 0.8521707514584608, "grad_norm": 0.43185946345329285, "learning_rate": 1.6863822434203102e-06, "loss": 0.0701, "step": 38673 }, { "epoch": 0.8521927867479769, "grad_norm": 0.6394628286361694, "learning_rate": 1.6858891201104126e-06, "loss": 0.0579, "step": 38674 }, { "epoch": 0.852214822037493, "grad_norm": 0.5129510760307312, "learning_rate": 1.6853960646164197e-06, "loss": 0.0782, "step": 38675 }, { "epoch": 0.8522368573270092, "grad_norm": 0.5644098520278931, "learning_rate": 1.684903076940844e-06, "loss": 0.0275, "step": 38676 }, { "epoch": 0.8522588926165253, "grad_norm": 0.6936675310134888, "learning_rate": 1.6844101570862009e-06, "loss": 0.0678, "step": 38677 }, { "epoch": 0.8522809279060415, "grad_norm": 0.5899208784103394, "learning_rate": 1.6839173050549928e-06, "loss": 0.0439, "step": 38678 }, { "epoch": 0.8523029631955577, "grad_norm": 0.5137686133384705, "learning_rate": 1.6834245208497395e-06, "loss": 0.0734, "step": 38679 }, { "epoch": 0.8523249984850738, "grad_norm": 0.7141929268836975, "learning_rate": 1.6829318044729391e-06, "loss": 0.0399, "step": 38680 }, { "epoch": 0.85234703377459, "grad_norm": 0.49575257301330566, "learning_rate": 1.682439155927113e-06, "loss": 0.048, "step": 38681 }, { "epoch": 0.8523690690641061, "grad_norm": 0.5492328405380249, "learning_rate": 1.681946575214764e-06, "loss": 0.0509, "step": 38682 }, { "epoch": 0.8523911043536223, "grad_norm": 0.7963789105415344, "learning_rate": 1.681454062338405e-06, "loss": 0.052, "step": 38683 }, { "epoch": 0.8524131396431385, "grad_norm": 0.6346157789230347, "learning_rate": 1.680961617300541e-06, "loss": 0.0605, "step": 38684 }, { "epoch": 0.8524351749326546, "grad_norm": 0.8024927377700806, "learning_rate": 1.6804692401036796e-06, "loss": 0.0834, "step": 38685 }, { "epoch": 0.8524572102221708, "grad_norm": 1.0052192211151123, "learning_rate": 1.679976930750336e-06, "loss": 0.0876, "step": 38686 }, { "epoch": 0.852479245511687, "grad_norm": 0.6556996703147888, "learning_rate": 1.6794846892430076e-06, "loss": 0.08, "step": 38687 }, { "epoch": 0.8525012808012031, "grad_norm": 1.0061627626419067, "learning_rate": 1.678992515584208e-06, "loss": 0.0801, "step": 38688 }, { "epoch": 0.8525233160907193, "grad_norm": 0.5385034680366516, "learning_rate": 1.6785004097764427e-06, "loss": 0.0643, "step": 38689 }, { "epoch": 0.8525453513802355, "grad_norm": 0.584626317024231, "learning_rate": 1.6780083718222221e-06, "loss": 0.0564, "step": 38690 }, { "epoch": 0.8525673866697516, "grad_norm": 0.9170512557029724, "learning_rate": 1.677516401724044e-06, "loss": 0.059, "step": 38691 }, { "epoch": 0.8525894219592678, "grad_norm": 0.5373783111572266, "learning_rate": 1.6770244994844197e-06, "loss": 0.0646, "step": 38692 }, { "epoch": 0.852611457248784, "grad_norm": 0.1967664659023285, "learning_rate": 1.676532665105857e-06, "loss": 0.0388, "step": 38693 }, { "epoch": 0.8526334925383001, "grad_norm": 0.7122241258621216, "learning_rate": 1.6760408985908542e-06, "loss": 0.0626, "step": 38694 }, { "epoch": 0.8526555278278163, "grad_norm": 0.6680197715759277, "learning_rate": 1.675549199941921e-06, "loss": 0.0539, "step": 38695 }, { "epoch": 0.8526775631173324, "grad_norm": 0.5348272919654846, "learning_rate": 1.6750575691615616e-06, "loss": 0.0374, "step": 38696 }, { "epoch": 0.8526995984068486, "grad_norm": 0.6632999777793884, "learning_rate": 1.674566006252281e-06, "loss": 0.0422, "step": 38697 }, { "epoch": 0.8527216336963648, "grad_norm": 0.5020575523376465, "learning_rate": 1.6740745112165789e-06, "loss": 0.0508, "step": 38698 }, { "epoch": 0.8527436689858809, "grad_norm": 0.7043253779411316, "learning_rate": 1.6735830840569615e-06, "loss": 0.0526, "step": 38699 }, { "epoch": 0.852765704275397, "grad_norm": 0.6930763125419617, "learning_rate": 1.6730917247759336e-06, "loss": 0.0889, "step": 38700 }, { "epoch": 0.8527877395649132, "grad_norm": 0.17078262567520142, "learning_rate": 1.6726004333759948e-06, "loss": 0.0507, "step": 38701 }, { "epoch": 0.8528097748544293, "grad_norm": 0.3771846294403076, "learning_rate": 1.6721092098596496e-06, "loss": 0.0538, "step": 38702 }, { "epoch": 0.8528318101439455, "grad_norm": 0.4738757908344269, "learning_rate": 1.6716180542293946e-06, "loss": 0.0401, "step": 38703 }, { "epoch": 0.8528538454334617, "grad_norm": 0.474423348903656, "learning_rate": 1.6711269664877427e-06, "loss": 0.068, "step": 38704 }, { "epoch": 0.8528758807229778, "grad_norm": 0.42609626054763794, "learning_rate": 1.6706359466371834e-06, "loss": 0.0459, "step": 38705 }, { "epoch": 0.852897916012494, "grad_norm": 0.30855339765548706, "learning_rate": 1.6701449946802283e-06, "loss": 0.0515, "step": 38706 }, { "epoch": 0.8529199513020101, "grad_norm": 0.33163270354270935, "learning_rate": 1.6696541106193685e-06, "loss": 0.0244, "step": 38707 }, { "epoch": 0.8529419865915263, "grad_norm": 0.4944770932197571, "learning_rate": 1.6691632944571088e-06, "loss": 0.0602, "step": 38708 }, { "epoch": 0.8529640218810425, "grad_norm": 0.48051217198371887, "learning_rate": 1.6686725461959523e-06, "loss": 0.0607, "step": 38709 }, { "epoch": 0.8529860571705586, "grad_norm": 0.6238144636154175, "learning_rate": 1.6681818658383919e-06, "loss": 0.079, "step": 38710 }, { "epoch": 0.8530080924600748, "grad_norm": 0.5477425456047058, "learning_rate": 1.6676912533869304e-06, "loss": 0.0567, "step": 38711 }, { "epoch": 0.853030127749591, "grad_norm": 1.0291372537612915, "learning_rate": 1.6672007088440676e-06, "loss": 0.0902, "step": 38712 }, { "epoch": 0.8530521630391071, "grad_norm": 0.7418742775917053, "learning_rate": 1.6667102322123034e-06, "loss": 0.0473, "step": 38713 }, { "epoch": 0.8530741983286233, "grad_norm": 0.7683658003807068, "learning_rate": 1.6662198234941307e-06, "loss": 0.0641, "step": 38714 }, { "epoch": 0.8530962336181395, "grad_norm": 0.6820026636123657, "learning_rate": 1.6657294826920505e-06, "loss": 0.0687, "step": 38715 }, { "epoch": 0.8531182689076556, "grad_norm": 0.5169994235038757, "learning_rate": 1.6652392098085645e-06, "loss": 0.0742, "step": 38716 }, { "epoch": 0.8531403041971718, "grad_norm": 0.7805175185203552, "learning_rate": 1.6647490048461622e-06, "loss": 0.038, "step": 38717 }, { "epoch": 0.853162339486688, "grad_norm": 0.3302578628063202, "learning_rate": 1.6642588678073434e-06, "loss": 0.0295, "step": 38718 }, { "epoch": 0.8531843747762041, "grad_norm": 0.6287346482276917, "learning_rate": 1.663768798694606e-06, "loss": 0.0684, "step": 38719 }, { "epoch": 0.8532064100657203, "grad_norm": 0.3577997386455536, "learning_rate": 1.663278797510448e-06, "loss": 0.063, "step": 38720 }, { "epoch": 0.8532284453552365, "grad_norm": 0.94975346326828, "learning_rate": 1.6627888642573608e-06, "loss": 0.0873, "step": 38721 }, { "epoch": 0.8532504806447526, "grad_norm": 0.6057442426681519, "learning_rate": 1.6622989989378424e-06, "loss": 0.0537, "step": 38722 }, { "epoch": 0.8532725159342688, "grad_norm": 0.9107309579849243, "learning_rate": 1.661809201554389e-06, "loss": 0.0872, "step": 38723 }, { "epoch": 0.8532945512237848, "grad_norm": 0.5800339579582214, "learning_rate": 1.6613194721094921e-06, "loss": 0.0628, "step": 38724 }, { "epoch": 0.853316586513301, "grad_norm": 0.6422359347343445, "learning_rate": 1.6608298106056497e-06, "loss": 0.0666, "step": 38725 }, { "epoch": 0.8533386218028172, "grad_norm": 0.5793620347976685, "learning_rate": 1.660340217045348e-06, "loss": 0.0464, "step": 38726 }, { "epoch": 0.8533606570923333, "grad_norm": 0.5836646556854248, "learning_rate": 1.6598506914310935e-06, "loss": 0.0452, "step": 38727 }, { "epoch": 0.8533826923818495, "grad_norm": 0.663805365562439, "learning_rate": 1.6593612337653707e-06, "loss": 0.0917, "step": 38728 }, { "epoch": 0.8534047276713657, "grad_norm": 0.46852532029151917, "learning_rate": 1.6588718440506761e-06, "loss": 0.049, "step": 38729 }, { "epoch": 0.8534267629608818, "grad_norm": 0.6061677932739258, "learning_rate": 1.6583825222894993e-06, "loss": 0.0525, "step": 38730 }, { "epoch": 0.853448798250398, "grad_norm": 0.6876817345619202, "learning_rate": 1.6578932684843351e-06, "loss": 0.0451, "step": 38731 }, { "epoch": 0.8534708335399142, "grad_norm": 0.5066835284233093, "learning_rate": 1.657404082637678e-06, "loss": 0.0516, "step": 38732 }, { "epoch": 0.8534928688294303, "grad_norm": 0.5335832238197327, "learning_rate": 1.656914964752011e-06, "loss": 0.0576, "step": 38733 }, { "epoch": 0.8535149041189465, "grad_norm": 0.8446441888809204, "learning_rate": 1.6564259148298372e-06, "loss": 0.0627, "step": 38734 }, { "epoch": 0.8535369394084626, "grad_norm": 0.17430762946605682, "learning_rate": 1.6559369328736379e-06, "loss": 0.0291, "step": 38735 }, { "epoch": 0.8535589746979788, "grad_norm": 0.4164404273033142, "learning_rate": 1.6554480188859094e-06, "loss": 0.0497, "step": 38736 }, { "epoch": 0.853581009987495, "grad_norm": 0.6681510806083679, "learning_rate": 1.6549591728691366e-06, "loss": 0.0318, "step": 38737 }, { "epoch": 0.8536030452770111, "grad_norm": 0.777725100517273, "learning_rate": 1.6544703948258173e-06, "loss": 0.0705, "step": 38738 }, { "epoch": 0.8536250805665273, "grad_norm": 0.3387811779975891, "learning_rate": 1.6539816847584344e-06, "loss": 0.0357, "step": 38739 }, { "epoch": 0.8536471158560435, "grad_norm": 0.2229045033454895, "learning_rate": 1.6534930426694812e-06, "loss": 0.0355, "step": 38740 }, { "epoch": 0.8536691511455596, "grad_norm": 0.47889411449432373, "learning_rate": 1.6530044685614437e-06, "loss": 0.0498, "step": 38741 }, { "epoch": 0.8536911864350758, "grad_norm": 0.9205496311187744, "learning_rate": 1.6525159624368102e-06, "loss": 0.0832, "step": 38742 }, { "epoch": 0.853713221724592, "grad_norm": 0.960912823677063, "learning_rate": 1.6520275242980738e-06, "loss": 0.065, "step": 38743 }, { "epoch": 0.8537352570141081, "grad_norm": 0.630209743976593, "learning_rate": 1.651539154147717e-06, "loss": 0.0677, "step": 38744 }, { "epoch": 0.8537572923036243, "grad_norm": 0.77801114320755, "learning_rate": 1.6510508519882283e-06, "loss": 0.0548, "step": 38745 }, { "epoch": 0.8537793275931405, "grad_norm": 0.15910044312477112, "learning_rate": 1.6505626178220956e-06, "loss": 0.0504, "step": 38746 }, { "epoch": 0.8538013628826566, "grad_norm": 0.6354755163192749, "learning_rate": 1.6500744516518101e-06, "loss": 0.0673, "step": 38747 }, { "epoch": 0.8538233981721728, "grad_norm": 0.7174393534660339, "learning_rate": 1.64958635347985e-06, "loss": 0.0641, "step": 38748 }, { "epoch": 0.8538454334616888, "grad_norm": 0.726462721824646, "learning_rate": 1.6490983233087064e-06, "loss": 0.06, "step": 38749 }, { "epoch": 0.853867468751205, "grad_norm": 0.6600719094276428, "learning_rate": 1.6486103611408677e-06, "loss": 0.079, "step": 38750 }, { "epoch": 0.8538895040407212, "grad_norm": 0.5603495836257935, "learning_rate": 1.6481224669788115e-06, "loss": 0.0527, "step": 38751 }, { "epoch": 0.8539115393302373, "grad_norm": 0.5899007320404053, "learning_rate": 1.6476346408250325e-06, "loss": 0.0571, "step": 38752 }, { "epoch": 0.8539335746197535, "grad_norm": 0.8530194759368896, "learning_rate": 1.6471468826820024e-06, "loss": 0.082, "step": 38753 }, { "epoch": 0.8539556099092697, "grad_norm": 0.5204797983169556, "learning_rate": 1.6466591925522206e-06, "loss": 0.0558, "step": 38754 }, { "epoch": 0.8539776451987858, "grad_norm": 0.36442458629608154, "learning_rate": 1.6461715704381619e-06, "loss": 0.0489, "step": 38755 }, { "epoch": 0.853999680488302, "grad_norm": 0.36766043305397034, "learning_rate": 1.645684016342311e-06, "loss": 0.0405, "step": 38756 }, { "epoch": 0.8540217157778182, "grad_norm": 0.6272401213645935, "learning_rate": 1.6451965302671557e-06, "loss": 0.0644, "step": 38757 }, { "epoch": 0.8540437510673343, "grad_norm": 0.307874470949173, "learning_rate": 1.6447091122151742e-06, "loss": 0.0484, "step": 38758 }, { "epoch": 0.8540657863568505, "grad_norm": 0.7509512901306152, "learning_rate": 1.6442217621888528e-06, "loss": 0.079, "step": 38759 }, { "epoch": 0.8540878216463667, "grad_norm": 0.9876638054847717, "learning_rate": 1.6437344801906662e-06, "loss": 0.0856, "step": 38760 }, { "epoch": 0.8541098569358828, "grad_norm": 0.6239995360374451, "learning_rate": 1.6432472662231073e-06, "loss": 0.0589, "step": 38761 }, { "epoch": 0.854131892225399, "grad_norm": 0.6516825556755066, "learning_rate": 1.6427601202886495e-06, "loss": 0.0705, "step": 38762 }, { "epoch": 0.8541539275149151, "grad_norm": 0.4841281771659851, "learning_rate": 1.6422730423897819e-06, "loss": 0.0803, "step": 38763 }, { "epoch": 0.8541759628044313, "grad_norm": 0.41817161440849304, "learning_rate": 1.6417860325289764e-06, "loss": 0.0373, "step": 38764 }, { "epoch": 0.8541979980939475, "grad_norm": 0.4677867591381073, "learning_rate": 1.6412990907087173e-06, "loss": 0.0405, "step": 38765 }, { "epoch": 0.8542200333834636, "grad_norm": 0.5050555467605591, "learning_rate": 1.6408122169314892e-06, "loss": 0.0375, "step": 38766 }, { "epoch": 0.8542420686729798, "grad_norm": 0.8271551132202148, "learning_rate": 1.6403254111997657e-06, "loss": 0.0577, "step": 38767 }, { "epoch": 0.854264103962496, "grad_norm": 0.46923261880874634, "learning_rate": 1.6398386735160277e-06, "loss": 0.0506, "step": 38768 }, { "epoch": 0.8542861392520121, "grad_norm": 0.3762926757335663, "learning_rate": 1.6393520038827563e-06, "loss": 0.0392, "step": 38769 }, { "epoch": 0.8543081745415283, "grad_norm": 0.4096587896347046, "learning_rate": 1.6388654023024335e-06, "loss": 0.023, "step": 38770 }, { "epoch": 0.8543302098310445, "grad_norm": 0.5956733822822571, "learning_rate": 1.6383788687775304e-06, "loss": 0.0833, "step": 38771 }, { "epoch": 0.8543522451205606, "grad_norm": 0.6741405725479126, "learning_rate": 1.637892403310528e-06, "loss": 0.0613, "step": 38772 }, { "epoch": 0.8543742804100768, "grad_norm": 0.4290721118450165, "learning_rate": 1.637406005903908e-06, "loss": 0.0294, "step": 38773 }, { "epoch": 0.8543963156995928, "grad_norm": 0.6791644096374512, "learning_rate": 1.6369196765601435e-06, "loss": 0.0691, "step": 38774 }, { "epoch": 0.854418350989109, "grad_norm": 0.4246745705604553, "learning_rate": 1.636433415281714e-06, "loss": 0.0401, "step": 38775 }, { "epoch": 0.8544403862786252, "grad_norm": 0.39932572841644287, "learning_rate": 1.635947222071091e-06, "loss": 0.0641, "step": 38776 }, { "epoch": 0.8544624215681413, "grad_norm": 0.5495554208755493, "learning_rate": 1.6354610969307587e-06, "loss": 0.0449, "step": 38777 }, { "epoch": 0.8544844568576575, "grad_norm": 0.4161686599254608, "learning_rate": 1.634975039863189e-06, "loss": 0.0523, "step": 38778 }, { "epoch": 0.8545064921471737, "grad_norm": 0.37773698568344116, "learning_rate": 1.6344890508708565e-06, "loss": 0.0355, "step": 38779 }, { "epoch": 0.8545285274366898, "grad_norm": 0.4857425391674042, "learning_rate": 1.6340031299562426e-06, "loss": 0.0666, "step": 38780 }, { "epoch": 0.854550562726206, "grad_norm": 0.6800968050956726, "learning_rate": 1.6335172771218148e-06, "loss": 0.0826, "step": 38781 }, { "epoch": 0.8545725980157222, "grad_norm": 1.0128470659255981, "learning_rate": 1.6330314923700551e-06, "loss": 0.0763, "step": 38782 }, { "epoch": 0.8545946333052383, "grad_norm": 0.605354368686676, "learning_rate": 1.6325457757034263e-06, "loss": 0.0517, "step": 38783 }, { "epoch": 0.8546166685947545, "grad_norm": 0.9783623814582825, "learning_rate": 1.6320601271244178e-06, "loss": 0.0676, "step": 38784 }, { "epoch": 0.8546387038842707, "grad_norm": 0.9986339807510376, "learning_rate": 1.6315745466354915e-06, "loss": 0.0637, "step": 38785 }, { "epoch": 0.8546607391737868, "grad_norm": 0.5275187492370605, "learning_rate": 1.6310890342391282e-06, "loss": 0.0376, "step": 38786 }, { "epoch": 0.854682774463303, "grad_norm": 0.2861763536930084, "learning_rate": 1.6306035899377948e-06, "loss": 0.1074, "step": 38787 }, { "epoch": 0.8547048097528192, "grad_norm": 0.2231435775756836, "learning_rate": 1.6301182137339654e-06, "loss": 0.042, "step": 38788 }, { "epoch": 0.8547268450423353, "grad_norm": 0.676504373550415, "learning_rate": 1.6296329056301168e-06, "loss": 0.0499, "step": 38789 }, { "epoch": 0.8547488803318515, "grad_norm": 0.9147647619247437, "learning_rate": 1.6291476656287153e-06, "loss": 0.0698, "step": 38790 }, { "epoch": 0.8547709156213676, "grad_norm": 0.4366253912448883, "learning_rate": 1.6286624937322337e-06, "loss": 0.0467, "step": 38791 }, { "epoch": 0.8547929509108838, "grad_norm": 0.6428443193435669, "learning_rate": 1.628177389943145e-06, "loss": 0.0621, "step": 38792 }, { "epoch": 0.8548149862004, "grad_norm": 0.6727693676948547, "learning_rate": 1.627692354263921e-06, "loss": 0.0721, "step": 38793 }, { "epoch": 0.8548370214899161, "grad_norm": 0.6996675729751587, "learning_rate": 1.6272073866970293e-06, "loss": 0.048, "step": 38794 }, { "epoch": 0.8548590567794323, "grad_norm": 0.3686263859272003, "learning_rate": 1.6267224872449416e-06, "loss": 0.0484, "step": 38795 }, { "epoch": 0.8548810920689485, "grad_norm": 0.4713971018791199, "learning_rate": 1.6262376559101287e-06, "loss": 0.0362, "step": 38796 }, { "epoch": 0.8549031273584646, "grad_norm": 0.7280038595199585, "learning_rate": 1.6257528926950576e-06, "loss": 0.0688, "step": 38797 }, { "epoch": 0.8549251626479808, "grad_norm": 0.7728824615478516, "learning_rate": 1.6252681976022011e-06, "loss": 0.0699, "step": 38798 }, { "epoch": 0.8549471979374968, "grad_norm": 0.2967805564403534, "learning_rate": 1.6247835706340203e-06, "loss": 0.0612, "step": 38799 }, { "epoch": 0.854969233227013, "grad_norm": 0.6658373475074768, "learning_rate": 1.6242990117929952e-06, "loss": 0.071, "step": 38800 }, { "epoch": 0.8549912685165292, "grad_norm": 0.3804415166378021, "learning_rate": 1.6238145210815852e-06, "loss": 0.0477, "step": 38801 }, { "epoch": 0.8550133038060453, "grad_norm": 0.6586164236068726, "learning_rate": 1.6233300985022604e-06, "loss": 0.0734, "step": 38802 }, { "epoch": 0.8550353390955615, "grad_norm": 0.4900708794593811, "learning_rate": 1.6228457440574901e-06, "loss": 0.0263, "step": 38803 }, { "epoch": 0.8550573743850777, "grad_norm": 0.7021613121032715, "learning_rate": 1.6223614577497391e-06, "loss": 0.0584, "step": 38804 }, { "epoch": 0.8550794096745938, "grad_norm": 0.7079663276672363, "learning_rate": 1.6218772395814756e-06, "loss": 0.0606, "step": 38805 }, { "epoch": 0.85510144496411, "grad_norm": 0.42148107290267944, "learning_rate": 1.621393089555161e-06, "loss": 0.0526, "step": 38806 }, { "epoch": 0.8551234802536262, "grad_norm": 0.6072044968605042, "learning_rate": 1.6209090076732713e-06, "loss": 0.0754, "step": 38807 }, { "epoch": 0.8551455155431423, "grad_norm": 0.8433017134666443, "learning_rate": 1.6204249939382632e-06, "loss": 0.0494, "step": 38808 }, { "epoch": 0.8551675508326585, "grad_norm": 0.48931851983070374, "learning_rate": 1.6199410483526077e-06, "loss": 0.0429, "step": 38809 }, { "epoch": 0.8551895861221747, "grad_norm": 0.6305223703384399, "learning_rate": 1.619457170918765e-06, "loss": 0.0469, "step": 38810 }, { "epoch": 0.8552116214116908, "grad_norm": 0.7033816576004028, "learning_rate": 1.6189733616392027e-06, "loss": 0.0583, "step": 38811 }, { "epoch": 0.855233656701207, "grad_norm": 0.46147456765174866, "learning_rate": 1.6184896205163874e-06, "loss": 0.0441, "step": 38812 }, { "epoch": 0.8552556919907232, "grad_norm": 0.6644406318664551, "learning_rate": 1.618005947552777e-06, "loss": 0.0616, "step": 38813 }, { "epoch": 0.8552777272802393, "grad_norm": 0.7270622253417969, "learning_rate": 1.6175223427508379e-06, "loss": 0.0538, "step": 38814 }, { "epoch": 0.8552997625697555, "grad_norm": 0.5215710997581482, "learning_rate": 1.617038806113033e-06, "loss": 0.0588, "step": 38815 }, { "epoch": 0.8553217978592716, "grad_norm": 0.3798735737800598, "learning_rate": 1.6165553376418307e-06, "loss": 0.0639, "step": 38816 }, { "epoch": 0.8553438331487878, "grad_norm": 0.5217825174331665, "learning_rate": 1.6160719373396837e-06, "loss": 0.0738, "step": 38817 }, { "epoch": 0.855365868438304, "grad_norm": 0.629128634929657, "learning_rate": 1.6155886052090618e-06, "loss": 0.0772, "step": 38818 }, { "epoch": 0.8553879037278201, "grad_norm": 0.703853189945221, "learning_rate": 1.6151053412524264e-06, "loss": 0.0607, "step": 38819 }, { "epoch": 0.8554099390173363, "grad_norm": 0.4772103428840637, "learning_rate": 1.6146221454722337e-06, "loss": 0.0559, "step": 38820 }, { "epoch": 0.8554319743068525, "grad_norm": 0.5531668663024902, "learning_rate": 1.614139017870952e-06, "loss": 0.0562, "step": 38821 }, { "epoch": 0.8554540095963686, "grad_norm": 0.44988298416137695, "learning_rate": 1.6136559584510307e-06, "loss": 0.0513, "step": 38822 }, { "epoch": 0.8554760448858847, "grad_norm": 0.5428910255432129, "learning_rate": 1.6131729672149447e-06, "loss": 0.0559, "step": 38823 }, { "epoch": 0.8554980801754009, "grad_norm": 0.7994359135627747, "learning_rate": 1.6126900441651455e-06, "loss": 0.0468, "step": 38824 }, { "epoch": 0.855520115464917, "grad_norm": 0.4426197111606598, "learning_rate": 1.612207189304094e-06, "loss": 0.0599, "step": 38825 }, { "epoch": 0.8555421507544332, "grad_norm": 0.5472074747085571, "learning_rate": 1.6117244026342538e-06, "loss": 0.0549, "step": 38826 }, { "epoch": 0.8555641860439493, "grad_norm": 0.6030119061470032, "learning_rate": 1.6112416841580775e-06, "loss": 0.1097, "step": 38827 }, { "epoch": 0.8555862213334655, "grad_norm": 0.782067596912384, "learning_rate": 1.610759033878028e-06, "loss": 0.0629, "step": 38828 }, { "epoch": 0.8556082566229817, "grad_norm": 0.405836820602417, "learning_rate": 1.610276451796559e-06, "loss": 0.0608, "step": 38829 }, { "epoch": 0.8556302919124978, "grad_norm": 0.47364479303359985, "learning_rate": 1.609793937916138e-06, "loss": 0.0684, "step": 38830 }, { "epoch": 0.855652327202014, "grad_norm": 0.501258134841919, "learning_rate": 1.6093114922392133e-06, "loss": 0.0606, "step": 38831 }, { "epoch": 0.8556743624915302, "grad_norm": 0.8225881457328796, "learning_rate": 1.6088291147682478e-06, "loss": 0.0799, "step": 38832 }, { "epoch": 0.8556963977810463, "grad_norm": 0.36918768286705017, "learning_rate": 1.6083468055056943e-06, "loss": 0.0498, "step": 38833 }, { "epoch": 0.8557184330705625, "grad_norm": 0.5157399773597717, "learning_rate": 1.607864564454013e-06, "loss": 0.0537, "step": 38834 }, { "epoch": 0.8557404683600787, "grad_norm": 0.6640874147415161, "learning_rate": 1.60738239161566e-06, "loss": 0.0628, "step": 38835 }, { "epoch": 0.8557625036495948, "grad_norm": 0.9356265068054199, "learning_rate": 1.6069002869930883e-06, "loss": 0.0697, "step": 38836 }, { "epoch": 0.855784538939111, "grad_norm": 0.2297869771718979, "learning_rate": 1.6064182505887543e-06, "loss": 0.0511, "step": 38837 }, { "epoch": 0.8558065742286272, "grad_norm": 0.6604217290878296, "learning_rate": 1.605936282405116e-06, "loss": 0.0687, "step": 38838 }, { "epoch": 0.8558286095181433, "grad_norm": 0.8159227967262268, "learning_rate": 1.6054543824446284e-06, "loss": 0.0547, "step": 38839 }, { "epoch": 0.8558506448076595, "grad_norm": 0.3334977328777313, "learning_rate": 1.6049725507097423e-06, "loss": 0.0445, "step": 38840 }, { "epoch": 0.8558726800971757, "grad_norm": 0.3628595769405365, "learning_rate": 1.604490787202913e-06, "loss": 0.0688, "step": 38841 }, { "epoch": 0.8558947153866918, "grad_norm": 0.5287445187568665, "learning_rate": 1.6040090919265982e-06, "loss": 0.0604, "step": 38842 }, { "epoch": 0.855916750676208, "grad_norm": 0.39115971326828003, "learning_rate": 1.603527464883246e-06, "loss": 0.0437, "step": 38843 }, { "epoch": 0.8559387859657241, "grad_norm": 0.6905481219291687, "learning_rate": 1.6030459060753127e-06, "loss": 0.0377, "step": 38844 }, { "epoch": 0.8559608212552403, "grad_norm": 0.571380078792572, "learning_rate": 1.60256441550525e-06, "loss": 0.0628, "step": 38845 }, { "epoch": 0.8559828565447565, "grad_norm": 0.41268041729927063, "learning_rate": 1.6020829931755155e-06, "loss": 0.0382, "step": 38846 }, { "epoch": 0.8560048918342726, "grad_norm": 0.5409790277481079, "learning_rate": 1.6016016390885524e-06, "loss": 0.0523, "step": 38847 }, { "epoch": 0.8560269271237887, "grad_norm": 0.4329473078250885, "learning_rate": 1.6011203532468172e-06, "loss": 0.0463, "step": 38848 }, { "epoch": 0.8560489624133049, "grad_norm": 0.5109958052635193, "learning_rate": 1.600639135652761e-06, "loss": 0.0609, "step": 38849 }, { "epoch": 0.856070997702821, "grad_norm": 0.511161208152771, "learning_rate": 1.6001579863088373e-06, "loss": 0.0538, "step": 38850 }, { "epoch": 0.8560930329923372, "grad_norm": 0.24026137590408325, "learning_rate": 1.5996769052174919e-06, "loss": 0.0438, "step": 38851 }, { "epoch": 0.8561150682818534, "grad_norm": 0.5096274018287659, "learning_rate": 1.5991958923811784e-06, "loss": 0.0713, "step": 38852 }, { "epoch": 0.8561371035713695, "grad_norm": 0.8635557293891907, "learning_rate": 1.5987149478023494e-06, "loss": 0.0815, "step": 38853 }, { "epoch": 0.8561591388608857, "grad_norm": 0.7044613361358643, "learning_rate": 1.5982340714834481e-06, "loss": 0.0582, "step": 38854 }, { "epoch": 0.8561811741504018, "grad_norm": 0.9657958745956421, "learning_rate": 1.597753263426931e-06, "loss": 0.075, "step": 38855 }, { "epoch": 0.856203209439918, "grad_norm": 0.6335045099258423, "learning_rate": 1.5972725236352375e-06, "loss": 0.0369, "step": 38856 }, { "epoch": 0.8562252447294342, "grad_norm": 0.483513742685318, "learning_rate": 1.596791852110826e-06, "loss": 0.0659, "step": 38857 }, { "epoch": 0.8562472800189503, "grad_norm": 0.43145814538002014, "learning_rate": 1.5963112488561409e-06, "loss": 0.06, "step": 38858 }, { "epoch": 0.8562693153084665, "grad_norm": 0.7985260486602783, "learning_rate": 1.5958307138736306e-06, "loss": 0.0826, "step": 38859 }, { "epoch": 0.8562913505979827, "grad_norm": 0.6876710057258606, "learning_rate": 1.595350247165741e-06, "loss": 0.0637, "step": 38860 }, { "epoch": 0.8563133858874988, "grad_norm": 0.635828971862793, "learning_rate": 1.5948698487349206e-06, "loss": 0.0666, "step": 38861 }, { "epoch": 0.856335421177015, "grad_norm": 0.501022458076477, "learning_rate": 1.5943895185836189e-06, "loss": 0.0541, "step": 38862 }, { "epoch": 0.8563574564665312, "grad_norm": 0.49281182885169983, "learning_rate": 1.593909256714277e-06, "loss": 0.0449, "step": 38863 }, { "epoch": 0.8563794917560473, "grad_norm": 0.6660724878311157, "learning_rate": 1.5934290631293453e-06, "loss": 0.0518, "step": 38864 }, { "epoch": 0.8564015270455635, "grad_norm": 0.48802676796913147, "learning_rate": 1.5929489378312663e-06, "loss": 0.0754, "step": 38865 }, { "epoch": 0.8564235623350797, "grad_norm": 0.6031110882759094, "learning_rate": 1.592468880822493e-06, "loss": 0.0582, "step": 38866 }, { "epoch": 0.8564455976245958, "grad_norm": 0.5967947840690613, "learning_rate": 1.591988892105462e-06, "loss": 0.0673, "step": 38867 }, { "epoch": 0.856467632914112, "grad_norm": 0.7001314759254456, "learning_rate": 1.5915089716826198e-06, "loss": 0.0534, "step": 38868 }, { "epoch": 0.8564896682036282, "grad_norm": 0.4129096567630768, "learning_rate": 1.5910291195564176e-06, "loss": 0.0396, "step": 38869 }, { "epoch": 0.8565117034931443, "grad_norm": 0.3911781907081604, "learning_rate": 1.59054933572929e-06, "loss": 0.0566, "step": 38870 }, { "epoch": 0.8565337387826605, "grad_norm": 0.7470268607139587, "learning_rate": 1.5900696202036868e-06, "loss": 0.0686, "step": 38871 }, { "epoch": 0.8565557740721766, "grad_norm": 0.5160516500473022, "learning_rate": 1.5895899729820496e-06, "loss": 0.0364, "step": 38872 }, { "epoch": 0.8565778093616927, "grad_norm": 0.7588793039321899, "learning_rate": 1.5891103940668245e-06, "loss": 0.0757, "step": 38873 }, { "epoch": 0.8565998446512089, "grad_norm": 0.4714161157608032, "learning_rate": 1.5886308834604497e-06, "loss": 0.0411, "step": 38874 }, { "epoch": 0.856621879940725, "grad_norm": 0.7433356642723083, "learning_rate": 1.588151441165368e-06, "loss": 0.0615, "step": 38875 }, { "epoch": 0.8566439152302412, "grad_norm": 0.7911941409111023, "learning_rate": 1.5876720671840262e-06, "loss": 0.057, "step": 38876 }, { "epoch": 0.8566659505197574, "grad_norm": 0.5899696946144104, "learning_rate": 1.5871927615188603e-06, "loss": 0.0507, "step": 38877 }, { "epoch": 0.8566879858092735, "grad_norm": 0.2252875566482544, "learning_rate": 1.586713524172317e-06, "loss": 0.0604, "step": 38878 }, { "epoch": 0.8567100210987897, "grad_norm": 0.6322042942047119, "learning_rate": 1.5862343551468289e-06, "loss": 0.0529, "step": 38879 }, { "epoch": 0.8567320563883059, "grad_norm": 0.32550927996635437, "learning_rate": 1.5857552544448478e-06, "loss": 0.0512, "step": 38880 }, { "epoch": 0.856754091677822, "grad_norm": 0.3281186521053314, "learning_rate": 1.5852762220688066e-06, "loss": 0.0793, "step": 38881 }, { "epoch": 0.8567761269673382, "grad_norm": 0.3577629625797272, "learning_rate": 1.5847972580211484e-06, "loss": 0.0265, "step": 38882 }, { "epoch": 0.8567981622568543, "grad_norm": 0.3073614239692688, "learning_rate": 1.5843183623043094e-06, "loss": 0.0615, "step": 38883 }, { "epoch": 0.8568201975463705, "grad_norm": 0.5312590003013611, "learning_rate": 1.5838395349207314e-06, "loss": 0.0473, "step": 38884 }, { "epoch": 0.8568422328358867, "grad_norm": 0.4960392415523529, "learning_rate": 1.583360775872857e-06, "loss": 0.0612, "step": 38885 }, { "epoch": 0.8568642681254028, "grad_norm": 0.42445600032806396, "learning_rate": 1.582882085163116e-06, "loss": 0.0403, "step": 38886 }, { "epoch": 0.856886303414919, "grad_norm": 0.6302492618560791, "learning_rate": 1.5824034627939515e-06, "loss": 0.0803, "step": 38887 }, { "epoch": 0.8569083387044352, "grad_norm": 0.5981358885765076, "learning_rate": 1.5819249087678017e-06, "loss": 0.0584, "step": 38888 }, { "epoch": 0.8569303739939513, "grad_norm": 0.8207595944404602, "learning_rate": 1.5814464230871063e-06, "loss": 0.074, "step": 38889 }, { "epoch": 0.8569524092834675, "grad_norm": 0.38891932368278503, "learning_rate": 1.5809680057542963e-06, "loss": 0.0576, "step": 38890 }, { "epoch": 0.8569744445729837, "grad_norm": 0.6172924637794495, "learning_rate": 1.5804896567718136e-06, "loss": 0.0866, "step": 38891 }, { "epoch": 0.8569964798624998, "grad_norm": 0.4567272365093231, "learning_rate": 1.5800113761420958e-06, "loss": 0.043, "step": 38892 }, { "epoch": 0.857018515152016, "grad_norm": 0.920774519443512, "learning_rate": 1.579533163867573e-06, "loss": 0.0964, "step": 38893 }, { "epoch": 0.8570405504415322, "grad_norm": 0.3497703969478607, "learning_rate": 1.5790550199506848e-06, "loss": 0.0425, "step": 38894 }, { "epoch": 0.8570625857310483, "grad_norm": 0.6817774772644043, "learning_rate": 1.578576944393866e-06, "loss": 0.0592, "step": 38895 }, { "epoch": 0.8570846210205645, "grad_norm": 0.37354013323783875, "learning_rate": 1.578098937199554e-06, "loss": 0.0365, "step": 38896 }, { "epoch": 0.8571066563100805, "grad_norm": 0.6881160736083984, "learning_rate": 1.5776209983701795e-06, "loss": 0.0485, "step": 38897 }, { "epoch": 0.8571286915995967, "grad_norm": 0.6497257351875305, "learning_rate": 1.57714312790818e-06, "loss": 0.0545, "step": 38898 }, { "epoch": 0.8571507268891129, "grad_norm": 0.29377391934394836, "learning_rate": 1.5766653258159902e-06, "loss": 0.0284, "step": 38899 }, { "epoch": 0.857172762178629, "grad_norm": 0.28665536642074585, "learning_rate": 1.5761875920960384e-06, "loss": 0.044, "step": 38900 }, { "epoch": 0.8571947974681452, "grad_norm": 0.47837722301483154, "learning_rate": 1.5757099267507658e-06, "loss": 0.0715, "step": 38901 }, { "epoch": 0.8572168327576614, "grad_norm": 0.597774863243103, "learning_rate": 1.5752323297825939e-06, "loss": 0.0567, "step": 38902 }, { "epoch": 0.8572388680471775, "grad_norm": 0.5744946599006653, "learning_rate": 1.5747548011939705e-06, "loss": 0.0651, "step": 38903 }, { "epoch": 0.8572609033366937, "grad_norm": 0.4547024071216583, "learning_rate": 1.5742773409873157e-06, "loss": 0.0627, "step": 38904 }, { "epoch": 0.8572829386262099, "grad_norm": 1.0904252529144287, "learning_rate": 1.573799949165069e-06, "loss": 0.0798, "step": 38905 }, { "epoch": 0.857304973915726, "grad_norm": 0.7064330577850342, "learning_rate": 1.5733226257296569e-06, "loss": 0.07, "step": 38906 }, { "epoch": 0.8573270092052422, "grad_norm": 0.7295762896537781, "learning_rate": 1.5728453706835122e-06, "loss": 0.0471, "step": 38907 }, { "epoch": 0.8573490444947583, "grad_norm": 0.6414443850517273, "learning_rate": 1.5723681840290682e-06, "loss": 0.0601, "step": 38908 }, { "epoch": 0.8573710797842745, "grad_norm": 0.6754446625709534, "learning_rate": 1.5718910657687512e-06, "loss": 0.0736, "step": 38909 }, { "epoch": 0.8573931150737907, "grad_norm": 0.8611414432525635, "learning_rate": 1.5714140159049944e-06, "loss": 0.0669, "step": 38910 }, { "epoch": 0.8574151503633068, "grad_norm": 0.5949868559837341, "learning_rate": 1.5709370344402258e-06, "loss": 0.0423, "step": 38911 }, { "epoch": 0.857437185652823, "grad_norm": 1.459788203239441, "learning_rate": 1.5704601213768798e-06, "loss": 0.1177, "step": 38912 }, { "epoch": 0.8574592209423392, "grad_norm": 0.543250560760498, "learning_rate": 1.5699832767173784e-06, "loss": 0.044, "step": 38913 }, { "epoch": 0.8574812562318553, "grad_norm": 0.8213605880737305, "learning_rate": 1.569506500464154e-06, "loss": 0.0515, "step": 38914 }, { "epoch": 0.8575032915213715, "grad_norm": 0.5204381942749023, "learning_rate": 1.5690297926196385e-06, "loss": 0.0766, "step": 38915 }, { "epoch": 0.8575253268108877, "grad_norm": 0.5240641832351685, "learning_rate": 1.5685531531862546e-06, "loss": 0.061, "step": 38916 }, { "epoch": 0.8575473621004038, "grad_norm": 0.5576415061950684, "learning_rate": 1.5680765821664306e-06, "loss": 0.0559, "step": 38917 }, { "epoch": 0.85756939738992, "grad_norm": 0.4565839171409607, "learning_rate": 1.5676000795625944e-06, "loss": 0.0707, "step": 38918 }, { "epoch": 0.8575914326794362, "grad_norm": 0.6682792901992798, "learning_rate": 1.5671236453771775e-06, "loss": 0.0772, "step": 38919 }, { "epoch": 0.8576134679689523, "grad_norm": 0.6121441721916199, "learning_rate": 1.5666472796126014e-06, "loss": 0.0592, "step": 38920 }, { "epoch": 0.8576355032584685, "grad_norm": 1.0351324081420898, "learning_rate": 1.5661709822712938e-06, "loss": 0.0516, "step": 38921 }, { "epoch": 0.8576575385479845, "grad_norm": 0.5277674794197083, "learning_rate": 1.5656947533556831e-06, "loss": 0.0515, "step": 38922 }, { "epoch": 0.8576795738375007, "grad_norm": 0.7303974032402039, "learning_rate": 1.5652185928681922e-06, "loss": 0.075, "step": 38923 }, { "epoch": 0.8577016091270169, "grad_norm": 0.4692454934120178, "learning_rate": 1.5647425008112492e-06, "loss": 0.0512, "step": 38924 }, { "epoch": 0.857723644416533, "grad_norm": 0.31257590651512146, "learning_rate": 1.5642664771872706e-06, "loss": 0.0426, "step": 38925 }, { "epoch": 0.8577456797060492, "grad_norm": 0.3515128791332245, "learning_rate": 1.5637905219986942e-06, "loss": 0.0477, "step": 38926 }, { "epoch": 0.8577677149955654, "grad_norm": 0.7917612791061401, "learning_rate": 1.563314635247935e-06, "loss": 0.0523, "step": 38927 }, { "epoch": 0.8577897502850815, "grad_norm": 0.5599640011787415, "learning_rate": 1.5628388169374208e-06, "loss": 0.0499, "step": 38928 }, { "epoch": 0.8578117855745977, "grad_norm": 0.6834261417388916, "learning_rate": 1.5623630670695732e-06, "loss": 0.0766, "step": 38929 }, { "epoch": 0.8578338208641139, "grad_norm": 0.5812965631484985, "learning_rate": 1.5618873856468152e-06, "loss": 0.0662, "step": 38930 }, { "epoch": 0.85785585615363, "grad_norm": 0.33191412687301636, "learning_rate": 1.5614117726715749e-06, "loss": 0.0412, "step": 38931 }, { "epoch": 0.8578778914431462, "grad_norm": 0.6797173619270325, "learning_rate": 1.560936228146267e-06, "loss": 0.0474, "step": 38932 }, { "epoch": 0.8578999267326624, "grad_norm": 0.39465171098709106, "learning_rate": 1.5604607520733178e-06, "loss": 0.0576, "step": 38933 }, { "epoch": 0.8579219620221785, "grad_norm": 0.4841073155403137, "learning_rate": 1.5599853444551488e-06, "loss": 0.0395, "step": 38934 }, { "epoch": 0.8579439973116947, "grad_norm": 0.36951690912246704, "learning_rate": 1.5595100052941845e-06, "loss": 0.0493, "step": 38935 }, { "epoch": 0.8579660326012108, "grad_norm": 0.6523644924163818, "learning_rate": 1.55903473459284e-06, "loss": 0.0678, "step": 38936 }, { "epoch": 0.857988067890727, "grad_norm": 0.36067190766334534, "learning_rate": 1.55855953235354e-06, "loss": 0.0366, "step": 38937 }, { "epoch": 0.8580101031802432, "grad_norm": 0.8729162812232971, "learning_rate": 1.5580843985787057e-06, "loss": 0.0711, "step": 38938 }, { "epoch": 0.8580321384697593, "grad_norm": 0.6211808323860168, "learning_rate": 1.5576093332707537e-06, "loss": 0.0675, "step": 38939 }, { "epoch": 0.8580541737592755, "grad_norm": 0.28332439064979553, "learning_rate": 1.557134336432105e-06, "loss": 0.0497, "step": 38940 }, { "epoch": 0.8580762090487917, "grad_norm": 0.8814231753349304, "learning_rate": 1.5566594080651813e-06, "loss": 0.0593, "step": 38941 }, { "epoch": 0.8580982443383078, "grad_norm": 0.4813045263290405, "learning_rate": 1.5561845481724007e-06, "loss": 0.0391, "step": 38942 }, { "epoch": 0.858120279627824, "grad_norm": 0.5791943073272705, "learning_rate": 1.5557097567561813e-06, "loss": 0.0755, "step": 38943 }, { "epoch": 0.8581423149173402, "grad_norm": 0.27770116925239563, "learning_rate": 1.555235033818939e-06, "loss": 0.0557, "step": 38944 }, { "epoch": 0.8581643502068563, "grad_norm": 0.43879732489585876, "learning_rate": 1.5547603793630977e-06, "loss": 0.0405, "step": 38945 }, { "epoch": 0.8581863854963725, "grad_norm": 0.3562006950378418, "learning_rate": 1.5542857933910697e-06, "loss": 0.0693, "step": 38946 }, { "epoch": 0.8582084207858885, "grad_norm": 0.48270002007484436, "learning_rate": 1.553811275905277e-06, "loss": 0.0792, "step": 38947 }, { "epoch": 0.8582304560754047, "grad_norm": 1.1653289794921875, "learning_rate": 1.553336826908127e-06, "loss": 0.0556, "step": 38948 }, { "epoch": 0.8582524913649209, "grad_norm": 0.9040598273277283, "learning_rate": 1.5528624464020503e-06, "loss": 0.0956, "step": 38949 }, { "epoch": 0.858274526654437, "grad_norm": 0.31256571412086487, "learning_rate": 1.5523881343894542e-06, "loss": 0.0375, "step": 38950 }, { "epoch": 0.8582965619439532, "grad_norm": 0.7152858972549438, "learning_rate": 1.551913890872757e-06, "loss": 0.0873, "step": 38951 }, { "epoch": 0.8583185972334694, "grad_norm": 0.3079265356063843, "learning_rate": 1.5514397158543685e-06, "loss": 0.0329, "step": 38952 }, { "epoch": 0.8583406325229855, "grad_norm": 0.6879820823669434, "learning_rate": 1.550965609336717e-06, "loss": 0.0494, "step": 38953 }, { "epoch": 0.8583626678125017, "grad_norm": 0.4627070426940918, "learning_rate": 1.5504915713222067e-06, "loss": 0.0606, "step": 38954 }, { "epoch": 0.8583847031020179, "grad_norm": 0.4577099680900574, "learning_rate": 1.5500176018132545e-06, "loss": 0.0679, "step": 38955 }, { "epoch": 0.858406738391534, "grad_norm": 0.5389804840087891, "learning_rate": 1.5495437008122731e-06, "loss": 0.0786, "step": 38956 }, { "epoch": 0.8584287736810502, "grad_norm": 0.6260250210762024, "learning_rate": 1.5490698683216808e-06, "loss": 0.0451, "step": 38957 }, { "epoch": 0.8584508089705664, "grad_norm": 0.653232991695404, "learning_rate": 1.548596104343889e-06, "loss": 0.0715, "step": 38958 }, { "epoch": 0.8584728442600825, "grad_norm": 0.7975810170173645, "learning_rate": 1.5481224088813056e-06, "loss": 0.0608, "step": 38959 }, { "epoch": 0.8584948795495987, "grad_norm": 0.4469357430934906, "learning_rate": 1.5476487819363556e-06, "loss": 0.0411, "step": 38960 }, { "epoch": 0.8585169148391149, "grad_norm": 0.7445288300514221, "learning_rate": 1.5471752235114383e-06, "loss": 0.0543, "step": 38961 }, { "epoch": 0.858538950128631, "grad_norm": 0.6818633079528809, "learning_rate": 1.5467017336089773e-06, "loss": 0.0565, "step": 38962 }, { "epoch": 0.8585609854181472, "grad_norm": 0.21737922728061676, "learning_rate": 1.5462283122313735e-06, "loss": 0.04, "step": 38963 }, { "epoch": 0.8585830207076633, "grad_norm": 0.9415509700775146, "learning_rate": 1.5457549593810438e-06, "loss": 0.0801, "step": 38964 }, { "epoch": 0.8586050559971795, "grad_norm": 0.4578646719455719, "learning_rate": 1.5452816750604027e-06, "loss": 0.0455, "step": 38965 }, { "epoch": 0.8586270912866957, "grad_norm": 0.5590097308158875, "learning_rate": 1.544808459271853e-06, "loss": 0.052, "step": 38966 }, { "epoch": 0.8586491265762118, "grad_norm": 0.8195750117301941, "learning_rate": 1.5443353120178099e-06, "loss": 0.0529, "step": 38967 }, { "epoch": 0.858671161865728, "grad_norm": 0.5147681832313538, "learning_rate": 1.543862233300683e-06, "loss": 0.0704, "step": 38968 }, { "epoch": 0.8586931971552442, "grad_norm": 0.5502241849899292, "learning_rate": 1.5433892231228836e-06, "loss": 0.0562, "step": 38969 }, { "epoch": 0.8587152324447603, "grad_norm": 0.77590012550354, "learning_rate": 1.5429162814868165e-06, "loss": 0.0481, "step": 38970 }, { "epoch": 0.8587372677342764, "grad_norm": 0.9595917463302612, "learning_rate": 1.5424434083948929e-06, "loss": 0.0664, "step": 38971 }, { "epoch": 0.8587593030237926, "grad_norm": 0.4367290437221527, "learning_rate": 1.541970603849523e-06, "loss": 0.0277, "step": 38972 }, { "epoch": 0.8587813383133087, "grad_norm": 0.8007882237434387, "learning_rate": 1.5414978678531127e-06, "loss": 0.0497, "step": 38973 }, { "epoch": 0.8588033736028249, "grad_norm": 0.7846276760101318, "learning_rate": 1.5410252004080722e-06, "loss": 0.0673, "step": 38974 }, { "epoch": 0.858825408892341, "grad_norm": 0.5703624486923218, "learning_rate": 1.5405526015168026e-06, "loss": 0.0627, "step": 38975 }, { "epoch": 0.8588474441818572, "grad_norm": 0.6307796835899353, "learning_rate": 1.540080071181722e-06, "loss": 0.0652, "step": 38976 }, { "epoch": 0.8588694794713734, "grad_norm": 0.503451406955719, "learning_rate": 1.5396076094052286e-06, "loss": 0.0696, "step": 38977 }, { "epoch": 0.8588915147608895, "grad_norm": 0.6011975407600403, "learning_rate": 1.5391352161897304e-06, "loss": 0.052, "step": 38978 }, { "epoch": 0.8589135500504057, "grad_norm": 0.6122931838035583, "learning_rate": 1.5386628915376389e-06, "loss": 0.0672, "step": 38979 }, { "epoch": 0.8589355853399219, "grad_norm": 0.9577043652534485, "learning_rate": 1.5381906354513535e-06, "loss": 0.0613, "step": 38980 }, { "epoch": 0.858957620629438, "grad_norm": 0.40984874963760376, "learning_rate": 1.5377184479332846e-06, "loss": 0.0619, "step": 38981 }, { "epoch": 0.8589796559189542, "grad_norm": 0.6748133897781372, "learning_rate": 1.537246328985828e-06, "loss": 0.061, "step": 38982 }, { "epoch": 0.8590016912084704, "grad_norm": 0.5749761462211609, "learning_rate": 1.5367742786114004e-06, "loss": 0.051, "step": 38983 }, { "epoch": 0.8590237264979865, "grad_norm": 0.7177594304084778, "learning_rate": 1.5363022968123995e-06, "loss": 0.0544, "step": 38984 }, { "epoch": 0.8590457617875027, "grad_norm": 0.5829060673713684, "learning_rate": 1.535830383591234e-06, "loss": 0.06, "step": 38985 }, { "epoch": 0.8590677970770189, "grad_norm": 0.8030209541320801, "learning_rate": 1.5353585389503e-06, "loss": 0.0521, "step": 38986 }, { "epoch": 0.859089832366535, "grad_norm": 0.38410377502441406, "learning_rate": 1.5348867628920055e-06, "loss": 0.038, "step": 38987 }, { "epoch": 0.8591118676560512, "grad_norm": 0.5190511345863342, "learning_rate": 1.534415055418757e-06, "loss": 0.0515, "step": 38988 }, { "epoch": 0.8591339029455674, "grad_norm": 0.8185569047927856, "learning_rate": 1.5339434165329508e-06, "loss": 0.0433, "step": 38989 }, { "epoch": 0.8591559382350835, "grad_norm": 0.6478846669197083, "learning_rate": 1.5334718462369917e-06, "loss": 0.0638, "step": 38990 }, { "epoch": 0.8591779735245997, "grad_norm": 0.3261476755142212, "learning_rate": 1.5330003445332813e-06, "loss": 0.0693, "step": 38991 }, { "epoch": 0.8592000088141158, "grad_norm": 0.5989829301834106, "learning_rate": 1.532528911424224e-06, "loss": 0.0627, "step": 38992 }, { "epoch": 0.859222044103632, "grad_norm": 0.373127281665802, "learning_rate": 1.5320575469122166e-06, "loss": 0.0619, "step": 38993 }, { "epoch": 0.8592440793931482, "grad_norm": 0.6186398267745972, "learning_rate": 1.531586250999662e-06, "loss": 0.0475, "step": 38994 }, { "epoch": 0.8592661146826643, "grad_norm": 0.9218344688415527, "learning_rate": 1.5311150236889648e-06, "loss": 0.0708, "step": 38995 }, { "epoch": 0.8592881499721804, "grad_norm": 0.406054824590683, "learning_rate": 1.5306438649825166e-06, "loss": 0.0548, "step": 38996 }, { "epoch": 0.8593101852616966, "grad_norm": 0.6511174440383911, "learning_rate": 1.530172774882727e-06, "loss": 0.0505, "step": 38997 }, { "epoch": 0.8593322205512127, "grad_norm": 0.3381599485874176, "learning_rate": 1.5297017533919845e-06, "loss": 0.041, "step": 38998 }, { "epoch": 0.8593542558407289, "grad_norm": 0.6281030774116516, "learning_rate": 1.529230800512698e-06, "loss": 0.0461, "step": 38999 }, { "epoch": 0.859376291130245, "grad_norm": 0.557062029838562, "learning_rate": 1.5287599162472616e-06, "loss": 0.0546, "step": 39000 }, { "epoch": 0.8593983264197612, "grad_norm": 0.5456287264823914, "learning_rate": 1.5282891005980742e-06, "loss": 0.0698, "step": 39001 }, { "epoch": 0.8594203617092774, "grad_norm": 0.5100427865982056, "learning_rate": 1.5278183535675377e-06, "loss": 0.0432, "step": 39002 }, { "epoch": 0.8594423969987935, "grad_norm": 0.6080189347267151, "learning_rate": 1.5273476751580434e-06, "loss": 0.0788, "step": 39003 }, { "epoch": 0.8594644322883097, "grad_norm": 0.8489875793457031, "learning_rate": 1.526877065371996e-06, "loss": 0.0679, "step": 39004 }, { "epoch": 0.8594864675778259, "grad_norm": 0.5632317066192627, "learning_rate": 1.526406524211782e-06, "loss": 0.0426, "step": 39005 }, { "epoch": 0.859508502867342, "grad_norm": 0.4397895634174347, "learning_rate": 1.5259360516798093e-06, "loss": 0.0408, "step": 39006 }, { "epoch": 0.8595305381568582, "grad_norm": 0.4229692220687866, "learning_rate": 1.5254656477784678e-06, "loss": 0.0426, "step": 39007 }, { "epoch": 0.8595525734463744, "grad_norm": 0.850147008895874, "learning_rate": 1.524995312510159e-06, "loss": 0.069, "step": 39008 }, { "epoch": 0.8595746087358905, "grad_norm": 0.5682287216186523, "learning_rate": 1.5245250458772708e-06, "loss": 0.0556, "step": 39009 }, { "epoch": 0.8595966440254067, "grad_norm": 0.6285224556922913, "learning_rate": 1.5240548478822047e-06, "loss": 0.0679, "step": 39010 }, { "epoch": 0.8596186793149229, "grad_norm": 0.7275922894477844, "learning_rate": 1.5235847185273538e-06, "loss": 0.0712, "step": 39011 }, { "epoch": 0.859640714604439, "grad_norm": 0.48558732867240906, "learning_rate": 1.5231146578151112e-06, "loss": 0.0976, "step": 39012 }, { "epoch": 0.8596627498939552, "grad_norm": 0.7991660833358765, "learning_rate": 1.5226446657478733e-06, "loss": 0.064, "step": 39013 }, { "epoch": 0.8596847851834714, "grad_norm": 0.5677163600921631, "learning_rate": 1.5221747423280314e-06, "loss": 0.082, "step": 39014 }, { "epoch": 0.8597068204729875, "grad_norm": 0.5409793257713318, "learning_rate": 1.5217048875579853e-06, "loss": 0.0644, "step": 39015 }, { "epoch": 0.8597288557625037, "grad_norm": 0.3769775927066803, "learning_rate": 1.52123510144012e-06, "loss": 0.092, "step": 39016 }, { "epoch": 0.8597508910520198, "grad_norm": 0.8265674114227295, "learning_rate": 1.5207653839768314e-06, "loss": 0.1058, "step": 39017 }, { "epoch": 0.859772926341536, "grad_norm": 0.9548655152320862, "learning_rate": 1.5202957351705183e-06, "loss": 0.0796, "step": 39018 }, { "epoch": 0.8597949616310522, "grad_norm": 0.5452834963798523, "learning_rate": 1.5198261550235631e-06, "loss": 0.0717, "step": 39019 }, { "epoch": 0.8598169969205683, "grad_norm": 0.6678227186203003, "learning_rate": 1.5193566435383644e-06, "loss": 0.0522, "step": 39020 }, { "epoch": 0.8598390322100844, "grad_norm": 0.7910757660865784, "learning_rate": 1.518887200717305e-06, "loss": 0.05, "step": 39021 }, { "epoch": 0.8598610674996006, "grad_norm": 0.6743031740188599, "learning_rate": 1.5184178265627897e-06, "loss": 0.054, "step": 39022 }, { "epoch": 0.8598831027891167, "grad_norm": 0.7705503702163696, "learning_rate": 1.5179485210771982e-06, "loss": 0.0703, "step": 39023 }, { "epoch": 0.8599051380786329, "grad_norm": 0.8311170935630798, "learning_rate": 1.5174792842629238e-06, "loss": 0.0422, "step": 39024 }, { "epoch": 0.8599271733681491, "grad_norm": 0.6997808218002319, "learning_rate": 1.517010116122361e-06, "loss": 0.0702, "step": 39025 }, { "epoch": 0.8599492086576652, "grad_norm": 0.3475329279899597, "learning_rate": 1.516541016657893e-06, "loss": 0.077, "step": 39026 }, { "epoch": 0.8599712439471814, "grad_norm": 0.714453935623169, "learning_rate": 1.5160719858719146e-06, "loss": 0.0659, "step": 39027 }, { "epoch": 0.8599932792366975, "grad_norm": 1.1092538833618164, "learning_rate": 1.5156030237668056e-06, "loss": 0.0861, "step": 39028 }, { "epoch": 0.8600153145262137, "grad_norm": 0.7145997881889343, "learning_rate": 1.515134130344969e-06, "loss": 0.0545, "step": 39029 }, { "epoch": 0.8600373498157299, "grad_norm": 0.7631394267082214, "learning_rate": 1.5146653056087811e-06, "loss": 0.0756, "step": 39030 }, { "epoch": 0.860059385105246, "grad_norm": 0.5083907246589661, "learning_rate": 1.5141965495606368e-06, "loss": 0.0523, "step": 39031 }, { "epoch": 0.8600814203947622, "grad_norm": 0.567608654499054, "learning_rate": 1.5137278622029193e-06, "loss": 0.0681, "step": 39032 }, { "epoch": 0.8601034556842784, "grad_norm": 0.905832052230835, "learning_rate": 1.5132592435380182e-06, "loss": 0.0509, "step": 39033 }, { "epoch": 0.8601254909737945, "grad_norm": 0.8407077193260193, "learning_rate": 1.5127906935683216e-06, "loss": 0.1057, "step": 39034 }, { "epoch": 0.8601475262633107, "grad_norm": 0.6544250249862671, "learning_rate": 1.512322212296211e-06, "loss": 0.0502, "step": 39035 }, { "epoch": 0.8601695615528269, "grad_norm": 0.7672702670097351, "learning_rate": 1.5118537997240777e-06, "loss": 0.0592, "step": 39036 }, { "epoch": 0.860191596842343, "grad_norm": 0.7472895383834839, "learning_rate": 1.511385455854305e-06, "loss": 0.066, "step": 39037 }, { "epoch": 0.8602136321318592, "grad_norm": 0.3623168468475342, "learning_rate": 1.5109171806892825e-06, "loss": 0.0729, "step": 39038 }, { "epoch": 0.8602356674213754, "grad_norm": 0.5361283421516418, "learning_rate": 1.5104489742313899e-06, "loss": 0.0594, "step": 39039 }, { "epoch": 0.8602577027108915, "grad_norm": 0.28990912437438965, "learning_rate": 1.509980836483012e-06, "loss": 0.0438, "step": 39040 }, { "epoch": 0.8602797380004077, "grad_norm": 0.5191510915756226, "learning_rate": 1.5095127674465403e-06, "loss": 0.0402, "step": 39041 }, { "epoch": 0.8603017732899239, "grad_norm": 0.8638975024223328, "learning_rate": 1.5090447671243512e-06, "loss": 0.0754, "step": 39042 }, { "epoch": 0.86032380857944, "grad_norm": 0.6303769946098328, "learning_rate": 1.5085768355188346e-06, "loss": 0.0586, "step": 39043 }, { "epoch": 0.8603458438689562, "grad_norm": 0.5099952816963196, "learning_rate": 1.5081089726323649e-06, "loss": 0.0445, "step": 39044 }, { "epoch": 0.8603678791584723, "grad_norm": 0.8568868041038513, "learning_rate": 1.5076411784673371e-06, "loss": 0.0655, "step": 39045 }, { "epoch": 0.8603899144479884, "grad_norm": 0.48312199115753174, "learning_rate": 1.5071734530261245e-06, "loss": 0.0423, "step": 39046 }, { "epoch": 0.8604119497375046, "grad_norm": 0.4946097731590271, "learning_rate": 1.5067057963111147e-06, "loss": 0.05, "step": 39047 }, { "epoch": 0.8604339850270207, "grad_norm": 0.5515680313110352, "learning_rate": 1.5062382083246895e-06, "loss": 0.0567, "step": 39048 }, { "epoch": 0.8604560203165369, "grad_norm": 0.6423100829124451, "learning_rate": 1.5057706890692252e-06, "loss": 0.0557, "step": 39049 }, { "epoch": 0.8604780556060531, "grad_norm": 0.22541099786758423, "learning_rate": 1.5053032385471116e-06, "loss": 0.03, "step": 39050 }, { "epoch": 0.8605000908955692, "grad_norm": 0.6606450080871582, "learning_rate": 1.5048358567607184e-06, "loss": 0.0513, "step": 39051 }, { "epoch": 0.8605221261850854, "grad_norm": 0.37443268299102783, "learning_rate": 1.5043685437124389e-06, "loss": 0.0656, "step": 39052 }, { "epoch": 0.8605441614746016, "grad_norm": 0.5048213005065918, "learning_rate": 1.5039012994046442e-06, "loss": 0.0523, "step": 39053 }, { "epoch": 0.8605661967641177, "grad_norm": 0.495640367269516, "learning_rate": 1.5034341238397193e-06, "loss": 0.0603, "step": 39054 }, { "epoch": 0.8605882320536339, "grad_norm": 0.6941925883293152, "learning_rate": 1.5029670170200405e-06, "loss": 0.1014, "step": 39055 }, { "epoch": 0.86061026734315, "grad_norm": 0.7017353177070618, "learning_rate": 1.5024999789479876e-06, "loss": 0.0498, "step": 39056 }, { "epoch": 0.8606323026326662, "grad_norm": 0.34377652406692505, "learning_rate": 1.5020330096259438e-06, "loss": 0.0316, "step": 39057 }, { "epoch": 0.8606543379221824, "grad_norm": 0.7849431037902832, "learning_rate": 1.5015661090562804e-06, "loss": 0.0703, "step": 39058 }, { "epoch": 0.8606763732116985, "grad_norm": 0.2269228845834732, "learning_rate": 1.501099277241379e-06, "loss": 0.0537, "step": 39059 }, { "epoch": 0.8606984085012147, "grad_norm": 0.6611675024032593, "learning_rate": 1.5006325141836176e-06, "loss": 0.0659, "step": 39060 }, { "epoch": 0.8607204437907309, "grad_norm": 0.25388744473457336, "learning_rate": 1.5001658198853773e-06, "loss": 0.0305, "step": 39061 }, { "epoch": 0.860742479080247, "grad_norm": 0.46762073040008545, "learning_rate": 1.4996991943490285e-06, "loss": 0.0524, "step": 39062 }, { "epoch": 0.8607645143697632, "grad_norm": 0.602403998374939, "learning_rate": 1.4992326375769521e-06, "loss": 0.0685, "step": 39063 }, { "epoch": 0.8607865496592794, "grad_norm": 0.8155442476272583, "learning_rate": 1.4987661495715233e-06, "loss": 0.0643, "step": 39064 }, { "epoch": 0.8608085849487955, "grad_norm": 0.535546064376831, "learning_rate": 1.4982997303351199e-06, "loss": 0.0796, "step": 39065 }, { "epoch": 0.8608306202383117, "grad_norm": 0.34630027413368225, "learning_rate": 1.4978333798701166e-06, "loss": 0.0327, "step": 39066 }, { "epoch": 0.8608526555278279, "grad_norm": 0.5086149573326111, "learning_rate": 1.4973670981788833e-06, "loss": 0.0576, "step": 39067 }, { "epoch": 0.860874690817344, "grad_norm": 0.4465157687664032, "learning_rate": 1.4969008852638067e-06, "loss": 0.0563, "step": 39068 }, { "epoch": 0.8608967261068602, "grad_norm": 1.0229382514953613, "learning_rate": 1.4964347411272494e-06, "loss": 0.0633, "step": 39069 }, { "epoch": 0.8609187613963762, "grad_norm": 0.5318890810012817, "learning_rate": 1.4959686657715931e-06, "loss": 0.0761, "step": 39070 }, { "epoch": 0.8609407966858924, "grad_norm": 0.49842607975006104, "learning_rate": 1.4955026591992094e-06, "loss": 0.0748, "step": 39071 }, { "epoch": 0.8609628319754086, "grad_norm": 0.8672292828559875, "learning_rate": 1.495036721412476e-06, "loss": 0.0605, "step": 39072 }, { "epoch": 0.8609848672649247, "grad_norm": 0.7619372606277466, "learning_rate": 1.494570852413758e-06, "loss": 0.0598, "step": 39073 }, { "epoch": 0.8610069025544409, "grad_norm": 0.5740705728530884, "learning_rate": 1.4941050522054334e-06, "loss": 0.0467, "step": 39074 }, { "epoch": 0.8610289378439571, "grad_norm": 0.5619509220123291, "learning_rate": 1.493639320789877e-06, "loss": 0.0505, "step": 39075 }, { "epoch": 0.8610509731334732, "grad_norm": 0.6481519341468811, "learning_rate": 1.4931736581694567e-06, "loss": 0.0275, "step": 39076 }, { "epoch": 0.8610730084229894, "grad_norm": 0.570260226726532, "learning_rate": 1.4927080643465474e-06, "loss": 0.0638, "step": 39077 }, { "epoch": 0.8610950437125056, "grad_norm": 0.7790226340293884, "learning_rate": 1.492242539323514e-06, "loss": 0.0626, "step": 39078 }, { "epoch": 0.8611170790020217, "grad_norm": 0.5796107649803162, "learning_rate": 1.4917770831027377e-06, "loss": 0.059, "step": 39079 }, { "epoch": 0.8611391142915379, "grad_norm": 0.5337506532669067, "learning_rate": 1.4913116956865818e-06, "loss": 0.0525, "step": 39080 }, { "epoch": 0.861161149581054, "grad_norm": 0.7744617462158203, "learning_rate": 1.4908463770774228e-06, "loss": 0.0627, "step": 39081 }, { "epoch": 0.8611831848705702, "grad_norm": 0.46842724084854126, "learning_rate": 1.4903811272776235e-06, "loss": 0.0812, "step": 39082 }, { "epoch": 0.8612052201600864, "grad_norm": 0.24494433403015137, "learning_rate": 1.489915946289559e-06, "loss": 0.0419, "step": 39083 }, { "epoch": 0.8612272554496025, "grad_norm": 0.7658290863037109, "learning_rate": 1.4894508341155989e-06, "loss": 0.0671, "step": 39084 }, { "epoch": 0.8612492907391187, "grad_norm": 0.42720580101013184, "learning_rate": 1.4889857907581078e-06, "loss": 0.0699, "step": 39085 }, { "epoch": 0.8612713260286349, "grad_norm": 0.6212533712387085, "learning_rate": 1.4885208162194575e-06, "loss": 0.0605, "step": 39086 }, { "epoch": 0.861293361318151, "grad_norm": 0.8824695944786072, "learning_rate": 1.4880559105020158e-06, "loss": 0.0699, "step": 39087 }, { "epoch": 0.8613153966076672, "grad_norm": 0.6954852342605591, "learning_rate": 1.4875910736081545e-06, "loss": 0.0485, "step": 39088 }, { "epoch": 0.8613374318971834, "grad_norm": 0.7059200406074524, "learning_rate": 1.4871263055402346e-06, "loss": 0.0479, "step": 39089 }, { "epoch": 0.8613594671866995, "grad_norm": 0.4713285267353058, "learning_rate": 1.4866616063006278e-06, "loss": 0.0548, "step": 39090 }, { "epoch": 0.8613815024762157, "grad_norm": 0.8440504670143127, "learning_rate": 1.486196975891702e-06, "loss": 0.0897, "step": 39091 }, { "epoch": 0.8614035377657319, "grad_norm": 0.7105669379234314, "learning_rate": 1.4857324143158174e-06, "loss": 0.0779, "step": 39092 }, { "epoch": 0.861425573055248, "grad_norm": 0.34573253989219666, "learning_rate": 1.4852679215753467e-06, "loss": 0.0547, "step": 39093 }, { "epoch": 0.8614476083447642, "grad_norm": 1.2531148195266724, "learning_rate": 1.4848034976726516e-06, "loss": 0.0887, "step": 39094 }, { "epoch": 0.8614696436342802, "grad_norm": 0.5851097106933594, "learning_rate": 1.4843391426101032e-06, "loss": 0.0686, "step": 39095 }, { "epoch": 0.8614916789237964, "grad_norm": 0.41267016530036926, "learning_rate": 1.4838748563900618e-06, "loss": 0.0371, "step": 39096 }, { "epoch": 0.8615137142133126, "grad_norm": 0.7094660401344299, "learning_rate": 1.4834106390148917e-06, "loss": 0.0602, "step": 39097 }, { "epoch": 0.8615357495028287, "grad_norm": 0.5009886622428894, "learning_rate": 1.4829464904869644e-06, "loss": 0.0597, "step": 39098 }, { "epoch": 0.8615577847923449, "grad_norm": 0.5670764446258545, "learning_rate": 1.482482410808635e-06, "loss": 0.0512, "step": 39099 }, { "epoch": 0.8615798200818611, "grad_norm": 0.7582624554634094, "learning_rate": 1.4820183999822729e-06, "loss": 0.046, "step": 39100 }, { "epoch": 0.8616018553713772, "grad_norm": 0.4897545576095581, "learning_rate": 1.4815544580102363e-06, "loss": 0.0547, "step": 39101 }, { "epoch": 0.8616238906608934, "grad_norm": 0.571861982345581, "learning_rate": 1.481090584894897e-06, "loss": 0.0691, "step": 39102 }, { "epoch": 0.8616459259504096, "grad_norm": 0.2487822324037552, "learning_rate": 1.4806267806386093e-06, "loss": 0.0361, "step": 39103 }, { "epoch": 0.8616679612399257, "grad_norm": 0.5775630474090576, "learning_rate": 1.4801630452437432e-06, "loss": 0.0475, "step": 39104 }, { "epoch": 0.8616899965294419, "grad_norm": 0.910118579864502, "learning_rate": 1.4796993787126534e-06, "loss": 0.0703, "step": 39105 }, { "epoch": 0.8617120318189581, "grad_norm": 0.42743176221847534, "learning_rate": 1.479235781047703e-06, "loss": 0.0441, "step": 39106 }, { "epoch": 0.8617340671084742, "grad_norm": 0.5601414442062378, "learning_rate": 1.4787722522512603e-06, "loss": 0.0552, "step": 39107 }, { "epoch": 0.8617561023979904, "grad_norm": 0.45057258009910583, "learning_rate": 1.4783087923256783e-06, "loss": 0.0522, "step": 39108 }, { "epoch": 0.8617781376875066, "grad_norm": 0.5280262231826782, "learning_rate": 1.4778454012733184e-06, "loss": 0.051, "step": 39109 }, { "epoch": 0.8618001729770227, "grad_norm": 0.5065588355064392, "learning_rate": 1.4773820790965454e-06, "loss": 0.0587, "step": 39110 }, { "epoch": 0.8618222082665389, "grad_norm": 0.6115192770957947, "learning_rate": 1.4769188257977189e-06, "loss": 0.0567, "step": 39111 }, { "epoch": 0.861844243556055, "grad_norm": 0.3953741788864136, "learning_rate": 1.4764556413791924e-06, "loss": 0.0485, "step": 39112 }, { "epoch": 0.8618662788455712, "grad_norm": 0.26528334617614746, "learning_rate": 1.4759925258433304e-06, "loss": 0.0413, "step": 39113 }, { "epoch": 0.8618883141350874, "grad_norm": 0.608301043510437, "learning_rate": 1.4755294791924929e-06, "loss": 0.0784, "step": 39114 }, { "epoch": 0.8619103494246035, "grad_norm": 0.5036226511001587, "learning_rate": 1.4750665014290343e-06, "loss": 0.0365, "step": 39115 }, { "epoch": 0.8619323847141197, "grad_norm": 0.5136016011238098, "learning_rate": 1.4746035925553147e-06, "loss": 0.0668, "step": 39116 }, { "epoch": 0.8619544200036359, "grad_norm": 0.7598019242286682, "learning_rate": 1.4741407525736906e-06, "loss": 0.0645, "step": 39117 }, { "epoch": 0.861976455293152, "grad_norm": 0.6585355401039124, "learning_rate": 1.4736779814865247e-06, "loss": 0.0522, "step": 39118 }, { "epoch": 0.8619984905826682, "grad_norm": 1.1070317029953003, "learning_rate": 1.4732152792961674e-06, "loss": 0.0601, "step": 39119 }, { "epoch": 0.8620205258721843, "grad_norm": 0.6234107613563538, "learning_rate": 1.4727526460049779e-06, "loss": 0.0598, "step": 39120 }, { "epoch": 0.8620425611617004, "grad_norm": 0.6281028389930725, "learning_rate": 1.4722900816153146e-06, "loss": 0.0577, "step": 39121 }, { "epoch": 0.8620645964512166, "grad_norm": 0.5403958559036255, "learning_rate": 1.4718275861295306e-06, "loss": 0.0952, "step": 39122 }, { "epoch": 0.8620866317407327, "grad_norm": 0.7070916295051575, "learning_rate": 1.4713651595499871e-06, "loss": 0.0613, "step": 39123 }, { "epoch": 0.8621086670302489, "grad_norm": 0.6733438968658447, "learning_rate": 1.4709028018790278e-06, "loss": 0.054, "step": 39124 }, { "epoch": 0.8621307023197651, "grad_norm": 0.7752729058265686, "learning_rate": 1.470440513119022e-06, "loss": 0.0772, "step": 39125 }, { "epoch": 0.8621527376092812, "grad_norm": 0.7259240746498108, "learning_rate": 1.4699782932723144e-06, "loss": 0.068, "step": 39126 }, { "epoch": 0.8621747728987974, "grad_norm": 0.7915414571762085, "learning_rate": 1.4695161423412668e-06, "loss": 0.051, "step": 39127 }, { "epoch": 0.8621968081883136, "grad_norm": 0.5780986547470093, "learning_rate": 1.4690540603282253e-06, "loss": 0.0396, "step": 39128 }, { "epoch": 0.8622188434778297, "grad_norm": 0.6271787285804749, "learning_rate": 1.4685920472355468e-06, "loss": 0.0728, "step": 39129 }, { "epoch": 0.8622408787673459, "grad_norm": 0.4491479694843292, "learning_rate": 1.4681301030655892e-06, "loss": 0.07, "step": 39130 }, { "epoch": 0.8622629140568621, "grad_norm": 0.2146492451429367, "learning_rate": 1.4676682278206972e-06, "loss": 0.0462, "step": 39131 }, { "epoch": 0.8622849493463782, "grad_norm": 0.4843108355998993, "learning_rate": 1.4672064215032288e-06, "loss": 0.0459, "step": 39132 }, { "epoch": 0.8623069846358944, "grad_norm": 0.4228785037994385, "learning_rate": 1.466744684115534e-06, "loss": 0.0334, "step": 39133 }, { "epoch": 0.8623290199254106, "grad_norm": 0.20570853352546692, "learning_rate": 1.4662830156599693e-06, "loss": 0.0485, "step": 39134 }, { "epoch": 0.8623510552149267, "grad_norm": 0.5410913825035095, "learning_rate": 1.4658214161388795e-06, "loss": 0.0521, "step": 39135 }, { "epoch": 0.8623730905044429, "grad_norm": 0.5005691647529602, "learning_rate": 1.4653598855546173e-06, "loss": 0.0442, "step": 39136 }, { "epoch": 0.862395125793959, "grad_norm": 0.5386191010475159, "learning_rate": 1.4648984239095398e-06, "loss": 0.042, "step": 39137 }, { "epoch": 0.8624171610834752, "grad_norm": 0.5159992575645447, "learning_rate": 1.4644370312059896e-06, "loss": 0.0426, "step": 39138 }, { "epoch": 0.8624391963729914, "grad_norm": 0.8042553067207336, "learning_rate": 1.46397570744632e-06, "loss": 0.075, "step": 39139 }, { "epoch": 0.8624612316625075, "grad_norm": 0.5616923570632935, "learning_rate": 1.4635144526328793e-06, "loss": 0.0553, "step": 39140 }, { "epoch": 0.8624832669520237, "grad_norm": 0.3562350869178772, "learning_rate": 1.4630532667680235e-06, "loss": 0.0638, "step": 39141 }, { "epoch": 0.8625053022415399, "grad_norm": 0.4682410955429077, "learning_rate": 1.4625921498540913e-06, "loss": 0.055, "step": 39142 }, { "epoch": 0.862527337531056, "grad_norm": 0.7090166807174683, "learning_rate": 1.4621311018934385e-06, "loss": 0.0883, "step": 39143 }, { "epoch": 0.8625493728205721, "grad_norm": 0.6041249632835388, "learning_rate": 1.4616701228884138e-06, "loss": 0.0416, "step": 39144 }, { "epoch": 0.8625714081100883, "grad_norm": 0.7079772353172302, "learning_rate": 1.4612092128413602e-06, "loss": 0.0446, "step": 39145 }, { "epoch": 0.8625934433996044, "grad_norm": 0.5661701560020447, "learning_rate": 1.4607483717546304e-06, "loss": 0.0633, "step": 39146 }, { "epoch": 0.8626154786891206, "grad_norm": 0.5049282312393188, "learning_rate": 1.4602875996305631e-06, "loss": 0.0516, "step": 39147 }, { "epoch": 0.8626375139786367, "grad_norm": 0.6646467447280884, "learning_rate": 1.4598268964715195e-06, "loss": 0.0638, "step": 39148 }, { "epoch": 0.8626595492681529, "grad_norm": 0.7590951323509216, "learning_rate": 1.4593662622798342e-06, "loss": 0.0443, "step": 39149 }, { "epoch": 0.8626815845576691, "grad_norm": 0.8044046759605408, "learning_rate": 1.4589056970578608e-06, "loss": 0.0575, "step": 39150 }, { "epoch": 0.8627036198471852, "grad_norm": 0.6197922825813293, "learning_rate": 1.4584452008079385e-06, "loss": 0.0715, "step": 39151 }, { "epoch": 0.8627256551367014, "grad_norm": 0.43929874897003174, "learning_rate": 1.4579847735324176e-06, "loss": 0.0631, "step": 39152 }, { "epoch": 0.8627476904262176, "grad_norm": 0.7010836601257324, "learning_rate": 1.4575244152336426e-06, "loss": 0.0374, "step": 39153 }, { "epoch": 0.8627697257157337, "grad_norm": 0.6802034378051758, "learning_rate": 1.4570641259139566e-06, "loss": 0.0526, "step": 39154 }, { "epoch": 0.8627917610052499, "grad_norm": 0.47305184602737427, "learning_rate": 1.4566039055757063e-06, "loss": 0.0722, "step": 39155 }, { "epoch": 0.8628137962947661, "grad_norm": 0.5964327454566956, "learning_rate": 1.456143754221233e-06, "loss": 0.0439, "step": 39156 }, { "epoch": 0.8628358315842822, "grad_norm": 0.536734938621521, "learning_rate": 1.4556836718528866e-06, "loss": 0.0664, "step": 39157 }, { "epoch": 0.8628578668737984, "grad_norm": 0.17558102309703827, "learning_rate": 1.4552236584730017e-06, "loss": 0.065, "step": 39158 }, { "epoch": 0.8628799021633146, "grad_norm": 0.6169108748435974, "learning_rate": 1.4547637140839282e-06, "loss": 0.0684, "step": 39159 }, { "epoch": 0.8629019374528307, "grad_norm": 0.5210832357406616, "learning_rate": 1.4543038386880075e-06, "loss": 0.0427, "step": 39160 }, { "epoch": 0.8629239727423469, "grad_norm": 0.728571355342865, "learning_rate": 1.4538440322875796e-06, "loss": 0.0597, "step": 39161 }, { "epoch": 0.862946008031863, "grad_norm": 0.5869583487510681, "learning_rate": 1.4533842948849873e-06, "loss": 0.0578, "step": 39162 }, { "epoch": 0.8629680433213792, "grad_norm": 0.9854479432106018, "learning_rate": 1.4529246264825723e-06, "loss": 0.0809, "step": 39163 }, { "epoch": 0.8629900786108954, "grad_norm": 0.5922630429267883, "learning_rate": 1.4524650270826812e-06, "loss": 0.061, "step": 39164 }, { "epoch": 0.8630121139004115, "grad_norm": 0.4590502977371216, "learning_rate": 1.4520054966876466e-06, "loss": 0.04, "step": 39165 }, { "epoch": 0.8630341491899277, "grad_norm": 0.5481094717979431, "learning_rate": 1.4515460352998139e-06, "loss": 0.074, "step": 39166 }, { "epoch": 0.8630561844794439, "grad_norm": 0.9326333403587341, "learning_rate": 1.451086642921526e-06, "loss": 0.0837, "step": 39167 }, { "epoch": 0.86307821976896, "grad_norm": 0.49142932891845703, "learning_rate": 1.4506273195551139e-06, "loss": 0.071, "step": 39168 }, { "epoch": 0.8631002550584761, "grad_norm": 0.7024558782577515, "learning_rate": 1.4501680652029281e-06, "loss": 0.0613, "step": 39169 }, { "epoch": 0.8631222903479923, "grad_norm": 0.5175681710243225, "learning_rate": 1.4497088798672948e-06, "loss": 0.0428, "step": 39170 }, { "epoch": 0.8631443256375084, "grad_norm": 0.6414898037910461, "learning_rate": 1.449249763550567e-06, "loss": 0.0352, "step": 39171 }, { "epoch": 0.8631663609270246, "grad_norm": 0.5690569281578064, "learning_rate": 1.448790716255073e-06, "loss": 0.0438, "step": 39172 }, { "epoch": 0.8631883962165408, "grad_norm": 0.528560996055603, "learning_rate": 1.448331737983159e-06, "loss": 0.0769, "step": 39173 }, { "epoch": 0.8632104315060569, "grad_norm": 0.6726138591766357, "learning_rate": 1.4478728287371519e-06, "loss": 0.0579, "step": 39174 }, { "epoch": 0.8632324667955731, "grad_norm": 0.5088626742362976, "learning_rate": 1.447413988519401e-06, "loss": 0.0396, "step": 39175 }, { "epoch": 0.8632545020850892, "grad_norm": 0.5169762372970581, "learning_rate": 1.4469552173322349e-06, "loss": 0.0554, "step": 39176 }, { "epoch": 0.8632765373746054, "grad_norm": 0.4331907331943512, "learning_rate": 1.446496515177998e-06, "loss": 0.0546, "step": 39177 }, { "epoch": 0.8632985726641216, "grad_norm": 0.8140308856964111, "learning_rate": 1.446037882059017e-06, "loss": 0.0685, "step": 39178 }, { "epoch": 0.8633206079536377, "grad_norm": 1.29169499874115, "learning_rate": 1.445579317977635e-06, "loss": 0.1096, "step": 39179 }, { "epoch": 0.8633426432431539, "grad_norm": 0.3743863105773926, "learning_rate": 1.44512082293619e-06, "loss": 0.0593, "step": 39180 }, { "epoch": 0.8633646785326701, "grad_norm": 0.35848379135131836, "learning_rate": 1.4446623969370055e-06, "loss": 0.0338, "step": 39181 }, { "epoch": 0.8633867138221862, "grad_norm": 0.5598270893096924, "learning_rate": 1.4442040399824308e-06, "loss": 0.0586, "step": 39182 }, { "epoch": 0.8634087491117024, "grad_norm": 0.3962893784046173, "learning_rate": 1.4437457520747909e-06, "loss": 0.052, "step": 39183 }, { "epoch": 0.8634307844012186, "grad_norm": 0.6819841861724854, "learning_rate": 1.4432875332164275e-06, "loss": 0.0584, "step": 39184 }, { "epoch": 0.8634528196907347, "grad_norm": 0.2608693242073059, "learning_rate": 1.4428293834096668e-06, "loss": 0.0536, "step": 39185 }, { "epoch": 0.8634748549802509, "grad_norm": 0.4997333884239197, "learning_rate": 1.4423713026568453e-06, "loss": 0.0664, "step": 39186 }, { "epoch": 0.8634968902697671, "grad_norm": 0.607082188129425, "learning_rate": 1.4419132909603012e-06, "loss": 0.0614, "step": 39187 }, { "epoch": 0.8635189255592832, "grad_norm": 0.6274493336677551, "learning_rate": 1.4414553483223593e-06, "loss": 0.09, "step": 39188 }, { "epoch": 0.8635409608487994, "grad_norm": 0.5974223613739014, "learning_rate": 1.4409974747453557e-06, "loss": 0.0563, "step": 39189 }, { "epoch": 0.8635629961383156, "grad_norm": 0.3626098930835724, "learning_rate": 1.4405396702316226e-06, "loss": 0.0578, "step": 39190 }, { "epoch": 0.8635850314278317, "grad_norm": 0.5906501412391663, "learning_rate": 1.4400819347834958e-06, "loss": 0.0551, "step": 39191 }, { "epoch": 0.8636070667173479, "grad_norm": 0.5890596508979797, "learning_rate": 1.4396242684033006e-06, "loss": 0.066, "step": 39192 }, { "epoch": 0.863629102006864, "grad_norm": 0.6487770080566406, "learning_rate": 1.43916667109337e-06, "loss": 0.0695, "step": 39193 }, { "epoch": 0.8636511372963801, "grad_norm": 0.4653066098690033, "learning_rate": 1.4387091428560384e-06, "loss": 0.038, "step": 39194 }, { "epoch": 0.8636731725858963, "grad_norm": 0.5093138217926025, "learning_rate": 1.438251683693631e-06, "loss": 0.0546, "step": 39195 }, { "epoch": 0.8636952078754124, "grad_norm": 0.5111086368560791, "learning_rate": 1.4377942936084826e-06, "loss": 0.049, "step": 39196 }, { "epoch": 0.8637172431649286, "grad_norm": 0.5185485482215881, "learning_rate": 1.4373369726029145e-06, "loss": 0.0456, "step": 39197 }, { "epoch": 0.8637392784544448, "grad_norm": 0.4607599079608917, "learning_rate": 1.4368797206792684e-06, "loss": 0.0413, "step": 39198 }, { "epoch": 0.8637613137439609, "grad_norm": 0.4814634919166565, "learning_rate": 1.4364225378398637e-06, "loss": 0.0376, "step": 39199 }, { "epoch": 0.8637833490334771, "grad_norm": 0.776924192905426, "learning_rate": 1.435965424087034e-06, "loss": 0.0709, "step": 39200 }, { "epoch": 0.8638053843229933, "grad_norm": 0.9604920148849487, "learning_rate": 1.4355083794231055e-06, "loss": 0.0647, "step": 39201 }, { "epoch": 0.8638274196125094, "grad_norm": 0.6708144545555115, "learning_rate": 1.4350514038504048e-06, "loss": 0.0862, "step": 39202 }, { "epoch": 0.8638494549020256, "grad_norm": 0.3665165901184082, "learning_rate": 1.434594497371265e-06, "loss": 0.0365, "step": 39203 }, { "epoch": 0.8638714901915417, "grad_norm": 1.0355604887008667, "learning_rate": 1.434137659988004e-06, "loss": 0.0755, "step": 39204 }, { "epoch": 0.8638935254810579, "grad_norm": 0.5295238494873047, "learning_rate": 1.4336808917029604e-06, "loss": 0.0485, "step": 39205 }, { "epoch": 0.8639155607705741, "grad_norm": 0.7642710208892822, "learning_rate": 1.433224192518452e-06, "loss": 0.0649, "step": 39206 }, { "epoch": 0.8639375960600902, "grad_norm": 0.7847844362258911, "learning_rate": 1.4327675624368104e-06, "loss": 0.0659, "step": 39207 }, { "epoch": 0.8639596313496064, "grad_norm": 0.638230562210083, "learning_rate": 1.432311001460357e-06, "loss": 0.0526, "step": 39208 }, { "epoch": 0.8639816666391226, "grad_norm": 0.6288710832595825, "learning_rate": 1.4318545095914181e-06, "loss": 0.0655, "step": 39209 }, { "epoch": 0.8640037019286387, "grad_norm": 0.5426518321037292, "learning_rate": 1.431398086832324e-06, "loss": 0.0602, "step": 39210 }, { "epoch": 0.8640257372181549, "grad_norm": 0.4830692410469055, "learning_rate": 1.4309417331853908e-06, "loss": 0.0511, "step": 39211 }, { "epoch": 0.8640477725076711, "grad_norm": 0.49833860993385315, "learning_rate": 1.43048544865295e-06, "loss": 0.0641, "step": 39212 }, { "epoch": 0.8640698077971872, "grad_norm": 0.35216742753982544, "learning_rate": 1.4300292332373216e-06, "loss": 0.0573, "step": 39213 }, { "epoch": 0.8640918430867034, "grad_norm": 0.4299788177013397, "learning_rate": 1.4295730869408353e-06, "loss": 0.0625, "step": 39214 }, { "epoch": 0.8641138783762196, "grad_norm": 0.6240054965019226, "learning_rate": 1.4291170097658056e-06, "loss": 0.0609, "step": 39215 }, { "epoch": 0.8641359136657357, "grad_norm": 1.1241079568862915, "learning_rate": 1.4286610017145613e-06, "loss": 0.0808, "step": 39216 }, { "epoch": 0.8641579489552519, "grad_norm": 1.0499848127365112, "learning_rate": 1.4282050627894265e-06, "loss": 0.0726, "step": 39217 }, { "epoch": 0.864179984244768, "grad_norm": 0.7779833078384399, "learning_rate": 1.42774919299272e-06, "loss": 0.0686, "step": 39218 }, { "epoch": 0.8642020195342841, "grad_norm": 0.38323330879211426, "learning_rate": 1.427293392326766e-06, "loss": 0.0689, "step": 39219 }, { "epoch": 0.8642240548238003, "grad_norm": 0.4366324543952942, "learning_rate": 1.4268376607938798e-06, "loss": 0.0382, "step": 39220 }, { "epoch": 0.8642460901133164, "grad_norm": 0.5576927065849304, "learning_rate": 1.4263819983963927e-06, "loss": 0.0546, "step": 39221 }, { "epoch": 0.8642681254028326, "grad_norm": 0.7204366326332092, "learning_rate": 1.4259264051366194e-06, "loss": 0.066, "step": 39222 }, { "epoch": 0.8642901606923488, "grad_norm": 0.3352654278278351, "learning_rate": 1.4254708810168848e-06, "loss": 0.0478, "step": 39223 }, { "epoch": 0.8643121959818649, "grad_norm": 0.6318578124046326, "learning_rate": 1.425015426039502e-06, "loss": 0.0829, "step": 39224 }, { "epoch": 0.8643342312713811, "grad_norm": 0.8857590556144714, "learning_rate": 1.4245600402067959e-06, "loss": 0.0867, "step": 39225 }, { "epoch": 0.8643562665608973, "grad_norm": 0.6911249756813049, "learning_rate": 1.424104723521088e-06, "loss": 0.0719, "step": 39226 }, { "epoch": 0.8643783018504134, "grad_norm": 0.4978971481323242, "learning_rate": 1.4236494759846896e-06, "loss": 0.086, "step": 39227 }, { "epoch": 0.8644003371399296, "grad_norm": 0.4392376244068146, "learning_rate": 1.423194297599929e-06, "loss": 0.0586, "step": 39228 }, { "epoch": 0.8644223724294458, "grad_norm": 0.17467187345027924, "learning_rate": 1.4227391883691194e-06, "loss": 0.038, "step": 39229 }, { "epoch": 0.8644444077189619, "grad_norm": 0.9220452308654785, "learning_rate": 1.4222841482945803e-06, "loss": 0.0827, "step": 39230 }, { "epoch": 0.8644664430084781, "grad_norm": 0.7805816531181335, "learning_rate": 1.4218291773786285e-06, "loss": 0.0629, "step": 39231 }, { "epoch": 0.8644884782979942, "grad_norm": 0.6189084649085999, "learning_rate": 1.4213742756235804e-06, "loss": 0.0932, "step": 39232 }, { "epoch": 0.8645105135875104, "grad_norm": 0.6817324161529541, "learning_rate": 1.4209194430317573e-06, "loss": 0.0556, "step": 39233 }, { "epoch": 0.8645325488770266, "grad_norm": 0.7166255712509155, "learning_rate": 1.420464679605471e-06, "loss": 0.0526, "step": 39234 }, { "epoch": 0.8645545841665427, "grad_norm": 0.9155443906784058, "learning_rate": 1.4200099853470394e-06, "loss": 0.1024, "step": 39235 }, { "epoch": 0.8645766194560589, "grad_norm": 0.5212213397026062, "learning_rate": 1.4195553602587791e-06, "loss": 0.0463, "step": 39236 }, { "epoch": 0.8645986547455751, "grad_norm": 0.3526860773563385, "learning_rate": 1.4191008043430099e-06, "loss": 0.0857, "step": 39237 }, { "epoch": 0.8646206900350912, "grad_norm": 0.5749080777168274, "learning_rate": 1.4186463176020397e-06, "loss": 0.034, "step": 39238 }, { "epoch": 0.8646427253246074, "grad_norm": 0.5381636619567871, "learning_rate": 1.4181919000381854e-06, "loss": 0.0433, "step": 39239 }, { "epoch": 0.8646647606141236, "grad_norm": 0.8315949440002441, "learning_rate": 1.4177375516537667e-06, "loss": 0.0765, "step": 39240 }, { "epoch": 0.8646867959036397, "grad_norm": 0.5019487738609314, "learning_rate": 1.4172832724510915e-06, "loss": 0.0696, "step": 39241 }, { "epoch": 0.8647088311931559, "grad_norm": 0.36077919602394104, "learning_rate": 1.4168290624324782e-06, "loss": 0.0342, "step": 39242 }, { "epoch": 0.864730866482672, "grad_norm": 0.7973330616950989, "learning_rate": 1.416374921600233e-06, "loss": 0.0764, "step": 39243 }, { "epoch": 0.8647529017721881, "grad_norm": 0.61627197265625, "learning_rate": 1.4159208499566811e-06, "loss": 0.0402, "step": 39244 }, { "epoch": 0.8647749370617043, "grad_norm": 0.46449849009513855, "learning_rate": 1.4154668475041239e-06, "loss": 0.0521, "step": 39245 }, { "epoch": 0.8647969723512204, "grad_norm": 0.7028229236602783, "learning_rate": 1.4150129142448809e-06, "loss": 0.0704, "step": 39246 }, { "epoch": 0.8648190076407366, "grad_norm": 0.5861353278160095, "learning_rate": 1.4145590501812622e-06, "loss": 0.0595, "step": 39247 }, { "epoch": 0.8648410429302528, "grad_norm": 0.68210369348526, "learning_rate": 1.4141052553155791e-06, "loss": 0.0499, "step": 39248 }, { "epoch": 0.8648630782197689, "grad_norm": 0.36728423833847046, "learning_rate": 1.4136515296501452e-06, "loss": 0.0403, "step": 39249 }, { "epoch": 0.8648851135092851, "grad_norm": 0.6963366270065308, "learning_rate": 1.413197873187263e-06, "loss": 0.1027, "step": 39250 }, { "epoch": 0.8649071487988013, "grad_norm": 0.5763896107673645, "learning_rate": 1.412744285929256e-06, "loss": 0.0596, "step": 39251 }, { "epoch": 0.8649291840883174, "grad_norm": 0.600192666053772, "learning_rate": 1.4122907678784258e-06, "loss": 0.0601, "step": 39252 }, { "epoch": 0.8649512193778336, "grad_norm": 0.3674312233924866, "learning_rate": 1.4118373190370888e-06, "loss": 0.0563, "step": 39253 }, { "epoch": 0.8649732546673498, "grad_norm": 0.6666370630264282, "learning_rate": 1.4113839394075461e-06, "loss": 0.0469, "step": 39254 }, { "epoch": 0.8649952899568659, "grad_norm": 0.5216527581214905, "learning_rate": 1.410930628992113e-06, "loss": 0.0632, "step": 39255 }, { "epoch": 0.8650173252463821, "grad_norm": 0.989620566368103, "learning_rate": 1.4104773877930993e-06, "loss": 0.0855, "step": 39256 }, { "epoch": 0.8650393605358982, "grad_norm": 0.42751339077949524, "learning_rate": 1.4100242158128095e-06, "loss": 0.0381, "step": 39257 }, { "epoch": 0.8650613958254144, "grad_norm": 0.4813184142112732, "learning_rate": 1.4095711130535538e-06, "loss": 0.0666, "step": 39258 }, { "epoch": 0.8650834311149306, "grad_norm": 0.5883325338363647, "learning_rate": 1.40911807951764e-06, "loss": 0.0464, "step": 39259 }, { "epoch": 0.8651054664044467, "grad_norm": 0.6619642376899719, "learning_rate": 1.4086651152073799e-06, "loss": 0.0511, "step": 39260 }, { "epoch": 0.8651275016939629, "grad_norm": 0.5891361236572266, "learning_rate": 1.4082122201250714e-06, "loss": 0.0713, "step": 39261 }, { "epoch": 0.8651495369834791, "grad_norm": 0.6600615978240967, "learning_rate": 1.4077593942730277e-06, "loss": 0.0698, "step": 39262 }, { "epoch": 0.8651715722729952, "grad_norm": 0.7414419651031494, "learning_rate": 1.407306637653557e-06, "loss": 0.0619, "step": 39263 }, { "epoch": 0.8651936075625114, "grad_norm": 0.295068621635437, "learning_rate": 1.406853950268961e-06, "loss": 0.0401, "step": 39264 }, { "epoch": 0.8652156428520276, "grad_norm": 0.6488904356956482, "learning_rate": 1.4064013321215475e-06, "loss": 0.0567, "step": 39265 }, { "epoch": 0.8652376781415437, "grad_norm": 0.7456785440444946, "learning_rate": 1.405948783213618e-06, "loss": 0.0627, "step": 39266 }, { "epoch": 0.8652597134310599, "grad_norm": 0.8325597643852234, "learning_rate": 1.405496303547486e-06, "loss": 0.0746, "step": 39267 }, { "epoch": 0.865281748720576, "grad_norm": 0.6290596723556519, "learning_rate": 1.4050438931254479e-06, "loss": 0.0569, "step": 39268 }, { "epoch": 0.8653037840100921, "grad_norm": 0.7058447003364563, "learning_rate": 1.4045915519498115e-06, "loss": 0.0452, "step": 39269 }, { "epoch": 0.8653258192996083, "grad_norm": 0.48856619000434875, "learning_rate": 1.4041392800228837e-06, "loss": 0.0704, "step": 39270 }, { "epoch": 0.8653478545891244, "grad_norm": 0.3217795193195343, "learning_rate": 1.4036870773469623e-06, "loss": 0.0676, "step": 39271 }, { "epoch": 0.8653698898786406, "grad_norm": 0.8393857479095459, "learning_rate": 1.4032349439243558e-06, "loss": 0.0733, "step": 39272 }, { "epoch": 0.8653919251681568, "grad_norm": 0.4897851049900055, "learning_rate": 1.4027828797573605e-06, "loss": 0.0802, "step": 39273 }, { "epoch": 0.8654139604576729, "grad_norm": 0.3235415518283844, "learning_rate": 1.4023308848482896e-06, "loss": 0.0615, "step": 39274 }, { "epoch": 0.8654359957471891, "grad_norm": 0.616145133972168, "learning_rate": 1.4018789591994346e-06, "loss": 0.0545, "step": 39275 }, { "epoch": 0.8654580310367053, "grad_norm": 0.40580034255981445, "learning_rate": 1.4014271028131054e-06, "loss": 0.0826, "step": 39276 }, { "epoch": 0.8654800663262214, "grad_norm": 0.828447163105011, "learning_rate": 1.400975315691595e-06, "loss": 0.0613, "step": 39277 }, { "epoch": 0.8655021016157376, "grad_norm": 0.49098002910614014, "learning_rate": 1.400523597837215e-06, "loss": 0.0315, "step": 39278 }, { "epoch": 0.8655241369052538, "grad_norm": 0.5940794348716736, "learning_rate": 1.4000719492522601e-06, "loss": 0.0407, "step": 39279 }, { "epoch": 0.8655461721947699, "grad_norm": 0.5275018215179443, "learning_rate": 1.3996203699390303e-06, "loss": 0.054, "step": 39280 }, { "epoch": 0.8655682074842861, "grad_norm": 0.45143499970436096, "learning_rate": 1.3991688598998254e-06, "loss": 0.0463, "step": 39281 }, { "epoch": 0.8655902427738023, "grad_norm": 0.46752965450286865, "learning_rate": 1.3987174191369483e-06, "loss": 0.0473, "step": 39282 }, { "epoch": 0.8656122780633184, "grad_norm": 0.75029456615448, "learning_rate": 1.3982660476526992e-06, "loss": 0.0648, "step": 39283 }, { "epoch": 0.8656343133528346, "grad_norm": 0.499089777469635, "learning_rate": 1.397814745449371e-06, "loss": 0.0669, "step": 39284 }, { "epoch": 0.8656563486423507, "grad_norm": 0.3371741473674774, "learning_rate": 1.397363512529269e-06, "loss": 0.044, "step": 39285 }, { "epoch": 0.8656783839318669, "grad_norm": 0.525671124458313, "learning_rate": 1.3969123488946873e-06, "loss": 0.051, "step": 39286 }, { "epoch": 0.8657004192213831, "grad_norm": 0.5519965291023254, "learning_rate": 1.3964612545479278e-06, "loss": 0.067, "step": 39287 }, { "epoch": 0.8657224545108992, "grad_norm": 0.7312911152839661, "learning_rate": 1.3960102294912836e-06, "loss": 0.0346, "step": 39288 }, { "epoch": 0.8657444898004154, "grad_norm": 0.6002199053764343, "learning_rate": 1.3955592737270545e-06, "loss": 0.052, "step": 39289 }, { "epoch": 0.8657665250899316, "grad_norm": 0.6096614003181458, "learning_rate": 1.3951083872575404e-06, "loss": 0.0469, "step": 39290 }, { "epoch": 0.8657885603794477, "grad_norm": 0.8112496733665466, "learning_rate": 1.394657570085031e-06, "loss": 0.0719, "step": 39291 }, { "epoch": 0.8658105956689639, "grad_norm": 0.6114729642868042, "learning_rate": 1.3942068222118282e-06, "loss": 0.0789, "step": 39292 }, { "epoch": 0.86583263095848, "grad_norm": 0.5017029643058777, "learning_rate": 1.3937561436402247e-06, "loss": 0.0854, "step": 39293 }, { "epoch": 0.8658546662479961, "grad_norm": 0.5005652904510498, "learning_rate": 1.3933055343725205e-06, "loss": 0.0471, "step": 39294 }, { "epoch": 0.8658767015375123, "grad_norm": 0.7320572733879089, "learning_rate": 1.3928549944110052e-06, "loss": 0.0571, "step": 39295 }, { "epoch": 0.8658987368270284, "grad_norm": 0.3347347676753998, "learning_rate": 1.3924045237579757e-06, "loss": 0.0538, "step": 39296 }, { "epoch": 0.8659207721165446, "grad_norm": 0.7625900506973267, "learning_rate": 1.3919541224157301e-06, "loss": 0.0543, "step": 39297 }, { "epoch": 0.8659428074060608, "grad_norm": 0.9204666614532471, "learning_rate": 1.3915037903865563e-06, "loss": 0.0794, "step": 39298 }, { "epoch": 0.8659648426955769, "grad_norm": 0.31596848368644714, "learning_rate": 1.3910535276727525e-06, "loss": 0.0503, "step": 39299 }, { "epoch": 0.8659868779850931, "grad_norm": 0.4391622841358185, "learning_rate": 1.3906033342766072e-06, "loss": 0.0447, "step": 39300 }, { "epoch": 0.8660089132746093, "grad_norm": 0.48839065432548523, "learning_rate": 1.3901532102004217e-06, "loss": 0.0624, "step": 39301 }, { "epoch": 0.8660309485641254, "grad_norm": 0.4143938720226288, "learning_rate": 1.3897031554464822e-06, "loss": 0.0568, "step": 39302 }, { "epoch": 0.8660529838536416, "grad_norm": 0.495299369096756, "learning_rate": 1.3892531700170856e-06, "loss": 0.049, "step": 39303 }, { "epoch": 0.8660750191431578, "grad_norm": 0.7096771597862244, "learning_rate": 1.3888032539145184e-06, "loss": 0.0798, "step": 39304 }, { "epoch": 0.8660970544326739, "grad_norm": 0.3508821427822113, "learning_rate": 1.3883534071410735e-06, "loss": 0.0508, "step": 39305 }, { "epoch": 0.8661190897221901, "grad_norm": 0.4280455410480499, "learning_rate": 1.3879036296990493e-06, "loss": 0.0543, "step": 39306 }, { "epoch": 0.8661411250117063, "grad_norm": 0.6338601112365723, "learning_rate": 1.3874539215907272e-06, "loss": 0.065, "step": 39307 }, { "epoch": 0.8661631603012224, "grad_norm": 0.4312785565853119, "learning_rate": 1.387004282818402e-06, "loss": 0.0528, "step": 39308 }, { "epoch": 0.8661851955907386, "grad_norm": 0.6119073629379272, "learning_rate": 1.3865547133843638e-06, "loss": 0.0456, "step": 39309 }, { "epoch": 0.8662072308802548, "grad_norm": 0.38572657108306885, "learning_rate": 1.3861052132909053e-06, "loss": 0.054, "step": 39310 }, { "epoch": 0.8662292661697709, "grad_norm": 0.9524659514427185, "learning_rate": 1.3856557825403099e-06, "loss": 0.0915, "step": 39311 }, { "epoch": 0.8662513014592871, "grad_norm": 0.5640408992767334, "learning_rate": 1.385206421134871e-06, "loss": 0.0537, "step": 39312 }, { "epoch": 0.8662733367488032, "grad_norm": 0.4879808723926544, "learning_rate": 1.3847571290768796e-06, "loss": 0.0489, "step": 39313 }, { "epoch": 0.8662953720383194, "grad_norm": 0.48285529017448425, "learning_rate": 1.3843079063686176e-06, "loss": 0.0435, "step": 39314 }, { "epoch": 0.8663174073278356, "grad_norm": 0.6076955795288086, "learning_rate": 1.3838587530123764e-06, "loss": 0.0472, "step": 39315 }, { "epoch": 0.8663394426173517, "grad_norm": 0.4435447156429291, "learning_rate": 1.3834096690104458e-06, "loss": 0.06, "step": 39316 }, { "epoch": 0.8663614779068678, "grad_norm": 0.4372701942920685, "learning_rate": 1.3829606543651124e-06, "loss": 0.0609, "step": 39317 }, { "epoch": 0.866383513196384, "grad_norm": 0.8294905424118042, "learning_rate": 1.382511709078661e-06, "loss": 0.0557, "step": 39318 }, { "epoch": 0.8664055484859001, "grad_norm": 0.9219672679901123, "learning_rate": 1.3820628331533797e-06, "loss": 0.05, "step": 39319 }, { "epoch": 0.8664275837754163, "grad_norm": 0.5365197062492371, "learning_rate": 1.3816140265915567e-06, "loss": 0.0688, "step": 39320 }, { "epoch": 0.8664496190649325, "grad_norm": 0.48501306772232056, "learning_rate": 1.3811652893954752e-06, "loss": 0.0557, "step": 39321 }, { "epoch": 0.8664716543544486, "grad_norm": 0.8883605003356934, "learning_rate": 1.3807166215674234e-06, "loss": 0.0684, "step": 39322 }, { "epoch": 0.8664936896439648, "grad_norm": 0.7856531143188477, "learning_rate": 1.3802680231096792e-06, "loss": 0.0729, "step": 39323 }, { "epoch": 0.866515724933481, "grad_norm": 0.49498313665390015, "learning_rate": 1.3798194940245395e-06, "loss": 0.0349, "step": 39324 }, { "epoch": 0.8665377602229971, "grad_norm": 0.7245953679084778, "learning_rate": 1.3793710343142806e-06, "loss": 0.0594, "step": 39325 }, { "epoch": 0.8665597955125133, "grad_norm": 0.3769110143184662, "learning_rate": 1.3789226439811908e-06, "loss": 0.0623, "step": 39326 }, { "epoch": 0.8665818308020294, "grad_norm": 0.47624823451042175, "learning_rate": 1.37847432302755e-06, "loss": 0.0589, "step": 39327 }, { "epoch": 0.8666038660915456, "grad_norm": 0.35493704676628113, "learning_rate": 1.3780260714556442e-06, "loss": 0.0316, "step": 39328 }, { "epoch": 0.8666259013810618, "grad_norm": 0.6144368052482605, "learning_rate": 1.3775778892677587e-06, "loss": 0.0691, "step": 39329 }, { "epoch": 0.8666479366705779, "grad_norm": 0.3768077492713928, "learning_rate": 1.37712977646617e-06, "loss": 0.0312, "step": 39330 }, { "epoch": 0.8666699719600941, "grad_norm": 0.45353108644485474, "learning_rate": 1.3766817330531661e-06, "loss": 0.073, "step": 39331 }, { "epoch": 0.8666920072496103, "grad_norm": 0.15167388319969177, "learning_rate": 1.376233759031027e-06, "loss": 0.0372, "step": 39332 }, { "epoch": 0.8667140425391264, "grad_norm": 0.4725853502750397, "learning_rate": 1.3757858544020374e-06, "loss": 0.051, "step": 39333 }, { "epoch": 0.8667360778286426, "grad_norm": 0.21047617495059967, "learning_rate": 1.375338019168474e-06, "loss": 0.0486, "step": 39334 }, { "epoch": 0.8667581131181588, "grad_norm": 0.598635733127594, "learning_rate": 1.3748902533326212e-06, "loss": 0.0743, "step": 39335 }, { "epoch": 0.8667801484076749, "grad_norm": 0.7706044316291809, "learning_rate": 1.3744425568967594e-06, "loss": 0.0736, "step": 39336 }, { "epoch": 0.8668021836971911, "grad_norm": 0.4588856101036072, "learning_rate": 1.3739949298631666e-06, "loss": 0.033, "step": 39337 }, { "epoch": 0.8668242189867073, "grad_norm": 0.3359360992908478, "learning_rate": 1.373547372234124e-06, "loss": 0.0496, "step": 39338 }, { "epoch": 0.8668462542762234, "grad_norm": 0.3842393457889557, "learning_rate": 1.3730998840119136e-06, "loss": 0.0349, "step": 39339 }, { "epoch": 0.8668682895657396, "grad_norm": 0.8189519643783569, "learning_rate": 1.372652465198813e-06, "loss": 0.0359, "step": 39340 }, { "epoch": 0.8668903248552557, "grad_norm": 0.35526689887046814, "learning_rate": 1.3722051157970995e-06, "loss": 0.0796, "step": 39341 }, { "epoch": 0.8669123601447718, "grad_norm": 0.5800034999847412, "learning_rate": 1.3717578358090539e-06, "loss": 0.0603, "step": 39342 }, { "epoch": 0.866934395434288, "grad_norm": 0.7453637719154358, "learning_rate": 1.3713106252369545e-06, "loss": 0.057, "step": 39343 }, { "epoch": 0.8669564307238041, "grad_norm": 0.7306931614875793, "learning_rate": 1.3708634840830763e-06, "loss": 0.0517, "step": 39344 }, { "epoch": 0.8669784660133203, "grad_norm": 0.2974858283996582, "learning_rate": 1.3704164123497027e-06, "loss": 0.0331, "step": 39345 }, { "epoch": 0.8670005013028365, "grad_norm": 0.601229190826416, "learning_rate": 1.3699694100391014e-06, "loss": 0.0626, "step": 39346 }, { "epoch": 0.8670225365923526, "grad_norm": 0.29760271310806274, "learning_rate": 1.3695224771535591e-06, "loss": 0.0416, "step": 39347 }, { "epoch": 0.8670445718818688, "grad_norm": 0.5036695599555969, "learning_rate": 1.3690756136953458e-06, "loss": 0.0352, "step": 39348 }, { "epoch": 0.867066607171385, "grad_norm": 0.8043577671051025, "learning_rate": 1.368628819666743e-06, "loss": 0.0644, "step": 39349 }, { "epoch": 0.8670886424609011, "grad_norm": 0.4288385808467865, "learning_rate": 1.3681820950700202e-06, "loss": 0.0667, "step": 39350 }, { "epoch": 0.8671106777504173, "grad_norm": 0.8981650471687317, "learning_rate": 1.367735439907456e-06, "loss": 0.0631, "step": 39351 }, { "epoch": 0.8671327130399334, "grad_norm": 0.510744571685791, "learning_rate": 1.3672888541813284e-06, "loss": 0.0512, "step": 39352 }, { "epoch": 0.8671547483294496, "grad_norm": 0.33961552381515503, "learning_rate": 1.366842337893907e-06, "loss": 0.0539, "step": 39353 }, { "epoch": 0.8671767836189658, "grad_norm": 1.138366460800171, "learning_rate": 1.3663958910474673e-06, "loss": 0.0881, "step": 39354 }, { "epoch": 0.8671988189084819, "grad_norm": 0.41999706625938416, "learning_rate": 1.365949513644285e-06, "loss": 0.0438, "step": 39355 }, { "epoch": 0.8672208541979981, "grad_norm": 0.4612031579017639, "learning_rate": 1.365503205686634e-06, "loss": 0.0401, "step": 39356 }, { "epoch": 0.8672428894875143, "grad_norm": 0.5992361903190613, "learning_rate": 1.3650569671767854e-06, "loss": 0.0416, "step": 39357 }, { "epoch": 0.8672649247770304, "grad_norm": 0.4150978624820709, "learning_rate": 1.3646107981170125e-06, "loss": 0.0292, "step": 39358 }, { "epoch": 0.8672869600665466, "grad_norm": 0.5472977161407471, "learning_rate": 1.3641646985095935e-06, "loss": 0.0627, "step": 39359 }, { "epoch": 0.8673089953560628, "grad_norm": 0.5036704540252686, "learning_rate": 1.3637186683567899e-06, "loss": 0.0456, "step": 39360 }, { "epoch": 0.8673310306455789, "grad_norm": 0.4544711709022522, "learning_rate": 1.3632727076608815e-06, "loss": 0.0583, "step": 39361 }, { "epoch": 0.8673530659350951, "grad_norm": 0.6201580166816711, "learning_rate": 1.3628268164241382e-06, "loss": 0.0497, "step": 39362 }, { "epoch": 0.8673751012246113, "grad_norm": 0.6754066348075867, "learning_rate": 1.3623809946488314e-06, "loss": 0.0374, "step": 39363 }, { "epoch": 0.8673971365141274, "grad_norm": 0.6384761333465576, "learning_rate": 1.3619352423372294e-06, "loss": 0.0716, "step": 39364 }, { "epoch": 0.8674191718036436, "grad_norm": 0.6760691404342651, "learning_rate": 1.3614895594916056e-06, "loss": 0.0707, "step": 39365 }, { "epoch": 0.8674412070931597, "grad_norm": 0.47912660241127014, "learning_rate": 1.361043946114231e-06, "loss": 0.063, "step": 39366 }, { "epoch": 0.8674632423826758, "grad_norm": 0.4794735908508301, "learning_rate": 1.3605984022073709e-06, "loss": 0.0575, "step": 39367 }, { "epoch": 0.867485277672192, "grad_norm": 0.7919855713844299, "learning_rate": 1.3601529277733e-06, "loss": 0.0737, "step": 39368 }, { "epoch": 0.8675073129617081, "grad_norm": 1.0361075401306152, "learning_rate": 1.359707522814278e-06, "loss": 0.0508, "step": 39369 }, { "epoch": 0.8675293482512243, "grad_norm": 0.34889376163482666, "learning_rate": 1.3592621873325866e-06, "loss": 0.0611, "step": 39370 }, { "epoch": 0.8675513835407405, "grad_norm": 0.7373672127723694, "learning_rate": 1.358816921330484e-06, "loss": 0.0798, "step": 39371 }, { "epoch": 0.8675734188302566, "grad_norm": 0.720077633857727, "learning_rate": 1.3583717248102452e-06, "loss": 0.0496, "step": 39372 }, { "epoch": 0.8675954541197728, "grad_norm": 0.6493961215019226, "learning_rate": 1.3579265977741311e-06, "loss": 0.0605, "step": 39373 }, { "epoch": 0.867617489409289, "grad_norm": 0.7081538438796997, "learning_rate": 1.357481540224414e-06, "loss": 0.0569, "step": 39374 }, { "epoch": 0.8676395246988051, "grad_norm": 0.40349864959716797, "learning_rate": 1.35703655216336e-06, "loss": 0.0429, "step": 39375 }, { "epoch": 0.8676615599883213, "grad_norm": 0.5565558671951294, "learning_rate": 1.3565916335932321e-06, "loss": 0.0393, "step": 39376 }, { "epoch": 0.8676835952778374, "grad_norm": 0.9329209923744202, "learning_rate": 1.3561467845163006e-06, "loss": 0.0731, "step": 39377 }, { "epoch": 0.8677056305673536, "grad_norm": 0.6529214978218079, "learning_rate": 1.3557020049348284e-06, "loss": 0.0717, "step": 39378 }, { "epoch": 0.8677276658568698, "grad_norm": 0.49224016070365906, "learning_rate": 1.3552572948510871e-06, "loss": 0.0495, "step": 39379 }, { "epoch": 0.8677497011463859, "grad_norm": 0.6937916278839111, "learning_rate": 1.3548126542673334e-06, "loss": 0.0822, "step": 39380 }, { "epoch": 0.8677717364359021, "grad_norm": 0.7741813659667969, "learning_rate": 1.354368083185835e-06, "loss": 0.0748, "step": 39381 }, { "epoch": 0.8677937717254183, "grad_norm": 0.6277793049812317, "learning_rate": 1.3539235816088608e-06, "loss": 0.0607, "step": 39382 }, { "epoch": 0.8678158070149344, "grad_norm": 0.6928906440734863, "learning_rate": 1.35347914953867e-06, "loss": 0.0714, "step": 39383 }, { "epoch": 0.8678378423044506, "grad_norm": 0.8770116567611694, "learning_rate": 1.3530347869775262e-06, "loss": 0.0587, "step": 39384 }, { "epoch": 0.8678598775939668, "grad_norm": 0.6453326940536499, "learning_rate": 1.3525904939276939e-06, "loss": 0.0575, "step": 39385 }, { "epoch": 0.8678819128834829, "grad_norm": 0.47572243213653564, "learning_rate": 1.3521462703914417e-06, "loss": 0.0665, "step": 39386 }, { "epoch": 0.8679039481729991, "grad_norm": 0.6608866453170776, "learning_rate": 1.3517021163710224e-06, "loss": 0.0699, "step": 39387 }, { "epoch": 0.8679259834625153, "grad_norm": 0.7292765378952026, "learning_rate": 1.3512580318687028e-06, "loss": 0.0725, "step": 39388 }, { "epoch": 0.8679480187520314, "grad_norm": 0.36616361141204834, "learning_rate": 1.3508140168867462e-06, "loss": 0.0432, "step": 39389 }, { "epoch": 0.8679700540415476, "grad_norm": 0.35144421458244324, "learning_rate": 1.3503700714274153e-06, "loss": 0.0699, "step": 39390 }, { "epoch": 0.8679920893310636, "grad_norm": 0.6059176325798035, "learning_rate": 1.3499261954929688e-06, "loss": 0.0662, "step": 39391 }, { "epoch": 0.8680141246205798, "grad_norm": 0.3427909016609192, "learning_rate": 1.349482389085663e-06, "loss": 0.0646, "step": 39392 }, { "epoch": 0.868036159910096, "grad_norm": 0.462589830160141, "learning_rate": 1.349038652207769e-06, "loss": 0.0652, "step": 39393 }, { "epoch": 0.8680581951996121, "grad_norm": 0.9352313876152039, "learning_rate": 1.3485949848615374e-06, "loss": 0.08, "step": 39394 }, { "epoch": 0.8680802304891283, "grad_norm": 0.6118976473808289, "learning_rate": 1.3481513870492363e-06, "loss": 0.0414, "step": 39395 }, { "epoch": 0.8681022657786445, "grad_norm": 0.6613377332687378, "learning_rate": 1.347707858773115e-06, "loss": 0.0521, "step": 39396 }, { "epoch": 0.8681243010681606, "grad_norm": 0.5601029992103577, "learning_rate": 1.347264400035444e-06, "loss": 0.0622, "step": 39397 }, { "epoch": 0.8681463363576768, "grad_norm": 0.3428693115711212, "learning_rate": 1.3468210108384726e-06, "loss": 0.0619, "step": 39398 }, { "epoch": 0.868168371647193, "grad_norm": 0.7117494344711304, "learning_rate": 1.3463776911844677e-06, "loss": 0.0618, "step": 39399 }, { "epoch": 0.8681904069367091, "grad_norm": 0.5224746465682983, "learning_rate": 1.345934441075679e-06, "loss": 0.0692, "step": 39400 }, { "epoch": 0.8682124422262253, "grad_norm": 0.6873183250427246, "learning_rate": 1.3454912605143683e-06, "loss": 0.0802, "step": 39401 }, { "epoch": 0.8682344775157415, "grad_norm": 0.6587514281272888, "learning_rate": 1.3450481495027967e-06, "loss": 0.0569, "step": 39402 }, { "epoch": 0.8682565128052576, "grad_norm": 0.6305304169654846, "learning_rate": 1.3446051080432109e-06, "loss": 0.0655, "step": 39403 }, { "epoch": 0.8682785480947738, "grad_norm": 0.3350793719291687, "learning_rate": 1.3441621361378792e-06, "loss": 0.043, "step": 39404 }, { "epoch": 0.86830058338429, "grad_norm": 0.6416805982589722, "learning_rate": 1.3437192337890497e-06, "loss": 0.0504, "step": 39405 }, { "epoch": 0.8683226186738061, "grad_norm": 0.6756672263145447, "learning_rate": 1.343276400998984e-06, "loss": 0.0793, "step": 39406 }, { "epoch": 0.8683446539633223, "grad_norm": 0.6242665648460388, "learning_rate": 1.3428336377699318e-06, "loss": 0.0749, "step": 39407 }, { "epoch": 0.8683666892528384, "grad_norm": 0.9065642356872559, "learning_rate": 1.3423909441041516e-06, "loss": 0.0834, "step": 39408 }, { "epoch": 0.8683887245423546, "grad_norm": 0.7153690457344055, "learning_rate": 1.3419483200039012e-06, "loss": 0.0451, "step": 39409 }, { "epoch": 0.8684107598318708, "grad_norm": 0.6508526802062988, "learning_rate": 1.3415057654714291e-06, "loss": 0.0676, "step": 39410 }, { "epoch": 0.8684327951213869, "grad_norm": 0.7365832924842834, "learning_rate": 1.3410632805089935e-06, "loss": 0.0464, "step": 39411 }, { "epoch": 0.8684548304109031, "grad_norm": 0.7323126792907715, "learning_rate": 1.3406208651188456e-06, "loss": 0.0916, "step": 39412 }, { "epoch": 0.8684768657004193, "grad_norm": 0.4502413868904114, "learning_rate": 1.340178519303244e-06, "loss": 0.0566, "step": 39413 }, { "epoch": 0.8684989009899354, "grad_norm": 0.28390952944755554, "learning_rate": 1.339736243064435e-06, "loss": 0.0559, "step": 39414 }, { "epoch": 0.8685209362794516, "grad_norm": 0.8801313042640686, "learning_rate": 1.3392940364046736e-06, "loss": 0.0623, "step": 39415 }, { "epoch": 0.8685429715689676, "grad_norm": 0.8473280072212219, "learning_rate": 1.3388518993262177e-06, "loss": 0.0682, "step": 39416 }, { "epoch": 0.8685650068584838, "grad_norm": 0.4375198781490326, "learning_rate": 1.338409831831311e-06, "loss": 0.0829, "step": 39417 }, { "epoch": 0.868587042148, "grad_norm": 0.894783616065979, "learning_rate": 1.3379678339222112e-06, "loss": 0.0584, "step": 39418 }, { "epoch": 0.8686090774375161, "grad_norm": 1.3869493007659912, "learning_rate": 1.3375259056011636e-06, "loss": 0.0936, "step": 39419 }, { "epoch": 0.8686311127270323, "grad_norm": 0.6816340684890747, "learning_rate": 1.3370840468704276e-06, "loss": 0.084, "step": 39420 }, { "epoch": 0.8686531480165485, "grad_norm": 0.42819350957870483, "learning_rate": 1.336642257732245e-06, "loss": 0.0432, "step": 39421 }, { "epoch": 0.8686751833060646, "grad_norm": 0.3812140226364136, "learning_rate": 1.336200538188876e-06, "loss": 0.0571, "step": 39422 }, { "epoch": 0.8686972185955808, "grad_norm": 0.3958031237125397, "learning_rate": 1.3357588882425597e-06, "loss": 0.0609, "step": 39423 }, { "epoch": 0.868719253885097, "grad_norm": 0.601962685585022, "learning_rate": 1.3353173078955516e-06, "loss": 0.0802, "step": 39424 }, { "epoch": 0.8687412891746131, "grad_norm": 0.5300310850143433, "learning_rate": 1.334875797150103e-06, "loss": 0.0741, "step": 39425 }, { "epoch": 0.8687633244641293, "grad_norm": 0.7508077025413513, "learning_rate": 1.3344343560084538e-06, "loss": 0.0854, "step": 39426 }, { "epoch": 0.8687853597536455, "grad_norm": 0.32839280366897583, "learning_rate": 1.3339929844728638e-06, "loss": 0.0458, "step": 39427 }, { "epoch": 0.8688073950431616, "grad_norm": 0.9431545734405518, "learning_rate": 1.333551682545573e-06, "loss": 0.0675, "step": 39428 }, { "epoch": 0.8688294303326778, "grad_norm": 0.5968768000602722, "learning_rate": 1.3331104502288344e-06, "loss": 0.0546, "step": 39429 }, { "epoch": 0.868851465622194, "grad_norm": 0.5530737638473511, "learning_rate": 1.3326692875248913e-06, "loss": 0.048, "step": 39430 }, { "epoch": 0.8688735009117101, "grad_norm": 0.69465172290802, "learning_rate": 1.3322281944359921e-06, "loss": 0.0694, "step": 39431 }, { "epoch": 0.8688955362012263, "grad_norm": 0.7156447768211365, "learning_rate": 1.3317871709643863e-06, "loss": 0.0939, "step": 39432 }, { "epoch": 0.8689175714907424, "grad_norm": 0.7133674025535583, "learning_rate": 1.3313462171123158e-06, "loss": 0.0652, "step": 39433 }, { "epoch": 0.8689396067802586, "grad_norm": 0.4970547556877136, "learning_rate": 1.3309053328820286e-06, "loss": 0.0854, "step": 39434 }, { "epoch": 0.8689616420697748, "grad_norm": 0.6459042429924011, "learning_rate": 1.3304645182757713e-06, "loss": 0.064, "step": 39435 }, { "epoch": 0.8689836773592909, "grad_norm": 0.7993927597999573, "learning_rate": 1.3300237732957904e-06, "loss": 0.0561, "step": 39436 }, { "epoch": 0.8690057126488071, "grad_norm": 0.7621728777885437, "learning_rate": 1.3295830979443258e-06, "loss": 0.0894, "step": 39437 }, { "epoch": 0.8690277479383233, "grad_norm": 0.5559563636779785, "learning_rate": 1.3291424922236257e-06, "loss": 0.0633, "step": 39438 }, { "epoch": 0.8690497832278394, "grad_norm": 0.228733628988266, "learning_rate": 1.3287019561359348e-06, "loss": 0.0701, "step": 39439 }, { "epoch": 0.8690718185173556, "grad_norm": 0.7343880534172058, "learning_rate": 1.3282614896834948e-06, "loss": 0.0757, "step": 39440 }, { "epoch": 0.8690938538068717, "grad_norm": 0.4929356575012207, "learning_rate": 1.3278210928685525e-06, "loss": 0.0643, "step": 39441 }, { "epoch": 0.8691158890963878, "grad_norm": 0.5071406960487366, "learning_rate": 1.3273807656933439e-06, "loss": 0.0606, "step": 39442 }, { "epoch": 0.869137924385904, "grad_norm": 0.623855471611023, "learning_rate": 1.3269405081601226e-06, "loss": 0.0747, "step": 39443 }, { "epoch": 0.8691599596754201, "grad_norm": 0.9043996930122375, "learning_rate": 1.3265003202711234e-06, "loss": 0.0695, "step": 39444 }, { "epoch": 0.8691819949649363, "grad_norm": 0.7006661295890808, "learning_rate": 1.3260602020285911e-06, "loss": 0.0348, "step": 39445 }, { "epoch": 0.8692040302544525, "grad_norm": 0.6355565190315247, "learning_rate": 1.3256201534347657e-06, "loss": 0.0654, "step": 39446 }, { "epoch": 0.8692260655439686, "grad_norm": 0.5084921717643738, "learning_rate": 1.3251801744918885e-06, "loss": 0.0474, "step": 39447 }, { "epoch": 0.8692481008334848, "grad_norm": 0.9616561532020569, "learning_rate": 1.324740265202206e-06, "loss": 0.0613, "step": 39448 }, { "epoch": 0.869270136123001, "grad_norm": 0.7484691143035889, "learning_rate": 1.324300425567947e-06, "loss": 0.0848, "step": 39449 }, { "epoch": 0.8692921714125171, "grad_norm": 0.524366557598114, "learning_rate": 1.3238606555913656e-06, "loss": 0.0561, "step": 39450 }, { "epoch": 0.8693142067020333, "grad_norm": 0.29535844922065735, "learning_rate": 1.323420955274694e-06, "loss": 0.04, "step": 39451 }, { "epoch": 0.8693362419915495, "grad_norm": 0.49697157740592957, "learning_rate": 1.322981324620175e-06, "loss": 0.0595, "step": 39452 }, { "epoch": 0.8693582772810656, "grad_norm": 0.6718313694000244, "learning_rate": 1.3225417636300435e-06, "loss": 0.0589, "step": 39453 }, { "epoch": 0.8693803125705818, "grad_norm": 0.8020480275154114, "learning_rate": 1.322102272306543e-06, "loss": 0.0799, "step": 39454 }, { "epoch": 0.869402347860098, "grad_norm": 0.6820695996284485, "learning_rate": 1.3216628506519112e-06, "loss": 0.068, "step": 39455 }, { "epoch": 0.8694243831496141, "grad_norm": 0.7866225838661194, "learning_rate": 1.3212234986683836e-06, "loss": 0.0421, "step": 39456 }, { "epoch": 0.8694464184391303, "grad_norm": 0.4818034768104553, "learning_rate": 1.3207842163581995e-06, "loss": 0.085, "step": 39457 }, { "epoch": 0.8694684537286465, "grad_norm": 0.6457471251487732, "learning_rate": 1.3203450037235959e-06, "loss": 0.0706, "step": 39458 }, { "epoch": 0.8694904890181626, "grad_norm": 0.6943442225456238, "learning_rate": 1.319905860766814e-06, "loss": 0.0597, "step": 39459 }, { "epoch": 0.8695125243076788, "grad_norm": 0.5210238099098206, "learning_rate": 1.3194667874900856e-06, "loss": 0.059, "step": 39460 }, { "epoch": 0.869534559597195, "grad_norm": 0.5188100934028625, "learning_rate": 1.319027783895649e-06, "loss": 0.0622, "step": 39461 }, { "epoch": 0.8695565948867111, "grad_norm": 0.7329840660095215, "learning_rate": 1.3185888499857419e-06, "loss": 0.0607, "step": 39462 }, { "epoch": 0.8695786301762273, "grad_norm": 0.34067070484161377, "learning_rate": 1.3181499857625961e-06, "loss": 0.0445, "step": 39463 }, { "epoch": 0.8696006654657434, "grad_norm": 0.6460543870925903, "learning_rate": 1.3177111912284517e-06, "loss": 0.0482, "step": 39464 }, { "epoch": 0.8696227007552596, "grad_norm": 0.5982715487480164, "learning_rate": 1.317272466385535e-06, "loss": 0.0548, "step": 39465 }, { "epoch": 0.8696447360447757, "grad_norm": 0.5057838559150696, "learning_rate": 1.3168338112360945e-06, "loss": 0.0567, "step": 39466 }, { "epoch": 0.8696667713342918, "grad_norm": 0.35909637808799744, "learning_rate": 1.316395225782353e-06, "loss": 0.0472, "step": 39467 }, { "epoch": 0.869688806623808, "grad_norm": 0.8064466118812561, "learning_rate": 1.3159567100265507e-06, "loss": 0.0394, "step": 39468 }, { "epoch": 0.8697108419133242, "grad_norm": 0.4758812487125397, "learning_rate": 1.3155182639709158e-06, "loss": 0.0378, "step": 39469 }, { "epoch": 0.8697328772028403, "grad_norm": 0.6511804461479187, "learning_rate": 1.3150798876176862e-06, "loss": 0.0498, "step": 39470 }, { "epoch": 0.8697549124923565, "grad_norm": 0.9128003716468811, "learning_rate": 1.3146415809690937e-06, "loss": 0.0662, "step": 39471 }, { "epoch": 0.8697769477818726, "grad_norm": 0.6322590112686157, "learning_rate": 1.3142033440273665e-06, "loss": 0.072, "step": 39472 }, { "epoch": 0.8697989830713888, "grad_norm": 0.5901181101799011, "learning_rate": 1.3137651767947461e-06, "loss": 0.0607, "step": 39473 }, { "epoch": 0.869821018360905, "grad_norm": 0.8472256660461426, "learning_rate": 1.3133270792734558e-06, "loss": 0.0577, "step": 39474 }, { "epoch": 0.8698430536504211, "grad_norm": 0.9739800691604614, "learning_rate": 1.3128890514657321e-06, "loss": 0.0845, "step": 39475 }, { "epoch": 0.8698650889399373, "grad_norm": 0.5735426545143127, "learning_rate": 1.3124510933738014e-06, "loss": 0.0712, "step": 39476 }, { "epoch": 0.8698871242294535, "grad_norm": 0.41393882036209106, "learning_rate": 1.3120132049998973e-06, "loss": 0.0424, "step": 39477 }, { "epoch": 0.8699091595189696, "grad_norm": 0.622269868850708, "learning_rate": 1.3115753863462543e-06, "loss": 0.0468, "step": 39478 }, { "epoch": 0.8699311948084858, "grad_norm": 0.7418563365936279, "learning_rate": 1.311137637415094e-06, "loss": 0.0903, "step": 39479 }, { "epoch": 0.869953230098002, "grad_norm": 0.45098939538002014, "learning_rate": 1.31069995820865e-06, "loss": 0.0651, "step": 39480 }, { "epoch": 0.8699752653875181, "grad_norm": 0.6415897011756897, "learning_rate": 1.3102623487291516e-06, "loss": 0.0613, "step": 39481 }, { "epoch": 0.8699973006770343, "grad_norm": 0.6863835453987122, "learning_rate": 1.3098248089788323e-06, "loss": 0.0682, "step": 39482 }, { "epoch": 0.8700193359665505, "grad_norm": 0.5917273759841919, "learning_rate": 1.309387338959912e-06, "loss": 0.0721, "step": 39483 }, { "epoch": 0.8700413712560666, "grad_norm": 0.3410220146179199, "learning_rate": 1.3089499386746257e-06, "loss": 0.0611, "step": 39484 }, { "epoch": 0.8700634065455828, "grad_norm": 0.5169512629508972, "learning_rate": 1.3085126081251998e-06, "loss": 0.0678, "step": 39485 }, { "epoch": 0.870085441835099, "grad_norm": 0.6782207489013672, "learning_rate": 1.308075347313859e-06, "loss": 0.0467, "step": 39486 }, { "epoch": 0.8701074771246151, "grad_norm": 0.4578956663608551, "learning_rate": 1.3076381562428351e-06, "loss": 0.0635, "step": 39487 }, { "epoch": 0.8701295124141313, "grad_norm": 0.9892573952674866, "learning_rate": 1.3072010349143482e-06, "loss": 0.0499, "step": 39488 }, { "epoch": 0.8701515477036474, "grad_norm": 0.5339974164962769, "learning_rate": 1.3067639833306345e-06, "loss": 0.058, "step": 39489 }, { "epoch": 0.8701735829931635, "grad_norm": 0.32836461067199707, "learning_rate": 1.3063270014939105e-06, "loss": 0.039, "step": 39490 }, { "epoch": 0.8701956182826797, "grad_norm": 0.466634064912796, "learning_rate": 1.3058900894064062e-06, "loss": 0.063, "step": 39491 }, { "epoch": 0.8702176535721958, "grad_norm": 0.2578169107437134, "learning_rate": 1.3054532470703518e-06, "loss": 0.0522, "step": 39492 }, { "epoch": 0.870239688861712, "grad_norm": 0.7218846082687378, "learning_rate": 1.3050164744879634e-06, "loss": 0.0716, "step": 39493 }, { "epoch": 0.8702617241512282, "grad_norm": 0.5839356184005737, "learning_rate": 1.3045797716614728e-06, "loss": 0.0626, "step": 39494 }, { "epoch": 0.8702837594407443, "grad_norm": 0.4852382242679596, "learning_rate": 1.3041431385930962e-06, "loss": 0.0557, "step": 39495 }, { "epoch": 0.8703057947302605, "grad_norm": 0.588067889213562, "learning_rate": 1.303706575285069e-06, "loss": 0.0582, "step": 39496 }, { "epoch": 0.8703278300197766, "grad_norm": 0.5841894149780273, "learning_rate": 1.3032700817396043e-06, "loss": 0.0454, "step": 39497 }, { "epoch": 0.8703498653092928, "grad_norm": 0.27678024768829346, "learning_rate": 1.3028336579589335e-06, "loss": 0.0388, "step": 39498 }, { "epoch": 0.870371900598809, "grad_norm": 0.9125254154205322, "learning_rate": 1.3023973039452713e-06, "loss": 0.0701, "step": 39499 }, { "epoch": 0.8703939358883251, "grad_norm": 0.4323417544364929, "learning_rate": 1.3019610197008496e-06, "loss": 0.0789, "step": 39500 }, { "epoch": 0.8704159711778413, "grad_norm": 0.41665709018707275, "learning_rate": 1.3015248052278832e-06, "loss": 0.046, "step": 39501 }, { "epoch": 0.8704380064673575, "grad_norm": 0.24852962791919708, "learning_rate": 1.3010886605285988e-06, "loss": 0.0305, "step": 39502 }, { "epoch": 0.8704600417568736, "grad_norm": 0.3836197853088379, "learning_rate": 1.3006525856052143e-06, "loss": 0.0519, "step": 39503 }, { "epoch": 0.8704820770463898, "grad_norm": 0.6124085187911987, "learning_rate": 1.3002165804599515e-06, "loss": 0.0825, "step": 39504 }, { "epoch": 0.870504112335906, "grad_norm": 0.744892418384552, "learning_rate": 1.299780645095035e-06, "loss": 0.0505, "step": 39505 }, { "epoch": 0.8705261476254221, "grad_norm": 0.6200903654098511, "learning_rate": 1.2993447795126785e-06, "loss": 0.0551, "step": 39506 }, { "epoch": 0.8705481829149383, "grad_norm": 0.5722717046737671, "learning_rate": 1.298908983715108e-06, "loss": 0.0565, "step": 39507 }, { "epoch": 0.8705702182044545, "grad_norm": 0.9325821995735168, "learning_rate": 1.298473257704539e-06, "loss": 0.0664, "step": 39508 }, { "epoch": 0.8705922534939706, "grad_norm": 0.7851360440254211, "learning_rate": 1.2980376014831974e-06, "loss": 0.0634, "step": 39509 }, { "epoch": 0.8706142887834868, "grad_norm": 0.8094409108161926, "learning_rate": 1.2976020150532952e-06, "loss": 0.0757, "step": 39510 }, { "epoch": 0.870636324073003, "grad_norm": 0.3911491930484772, "learning_rate": 1.2971664984170522e-06, "loss": 0.0634, "step": 39511 }, { "epoch": 0.8706583593625191, "grad_norm": 0.7628276944160461, "learning_rate": 1.2967310515766917e-06, "loss": 0.0668, "step": 39512 }, { "epoch": 0.8706803946520353, "grad_norm": 0.6898444890975952, "learning_rate": 1.2962956745344267e-06, "loss": 0.0704, "step": 39513 }, { "epoch": 0.8707024299415514, "grad_norm": 0.732148289680481, "learning_rate": 1.2958603672924757e-06, "loss": 0.0714, "step": 39514 }, { "epoch": 0.8707244652310675, "grad_norm": 0.8637360334396362, "learning_rate": 1.295425129853055e-06, "loss": 0.0828, "step": 39515 }, { "epoch": 0.8707465005205837, "grad_norm": 0.5470677614212036, "learning_rate": 1.294989962218388e-06, "loss": 0.0591, "step": 39516 }, { "epoch": 0.8707685358100998, "grad_norm": 0.3450336754322052, "learning_rate": 1.294554864390683e-06, "loss": 0.0434, "step": 39517 }, { "epoch": 0.870790571099616, "grad_norm": 0.8124350309371948, "learning_rate": 1.2941198363721579e-06, "loss": 0.058, "step": 39518 }, { "epoch": 0.8708126063891322, "grad_norm": 0.775567352771759, "learning_rate": 1.2936848781650346e-06, "loss": 0.0834, "step": 39519 }, { "epoch": 0.8708346416786483, "grad_norm": 0.5413069128990173, "learning_rate": 1.2932499897715212e-06, "loss": 0.0626, "step": 39520 }, { "epoch": 0.8708566769681645, "grad_norm": 0.3122151792049408, "learning_rate": 1.292815171193839e-06, "loss": 0.0748, "step": 39521 }, { "epoch": 0.8708787122576807, "grad_norm": 0.3407568037509918, "learning_rate": 1.2923804224341934e-06, "loss": 0.0451, "step": 39522 }, { "epoch": 0.8709007475471968, "grad_norm": 0.2890077531337738, "learning_rate": 1.291945743494809e-06, "loss": 0.0443, "step": 39523 }, { "epoch": 0.870922782836713, "grad_norm": 0.5801074504852295, "learning_rate": 1.2915111343778957e-06, "loss": 0.0513, "step": 39524 }, { "epoch": 0.8709448181262291, "grad_norm": 0.8960750699043274, "learning_rate": 1.2910765950856684e-06, "loss": 0.077, "step": 39525 }, { "epoch": 0.8709668534157453, "grad_norm": 0.8546498417854309, "learning_rate": 1.2906421256203354e-06, "loss": 0.0882, "step": 39526 }, { "epoch": 0.8709888887052615, "grad_norm": 0.5735318660736084, "learning_rate": 1.2902077259841167e-06, "loss": 0.0589, "step": 39527 }, { "epoch": 0.8710109239947776, "grad_norm": 0.6117910742759705, "learning_rate": 1.2897733961792218e-06, "loss": 0.0609, "step": 39528 }, { "epoch": 0.8710329592842938, "grad_norm": 0.5299864411354065, "learning_rate": 1.289339136207861e-06, "loss": 0.0766, "step": 39529 }, { "epoch": 0.87105499457381, "grad_norm": 0.8912705779075623, "learning_rate": 1.2889049460722492e-06, "loss": 0.0526, "step": 39530 }, { "epoch": 0.8710770298633261, "grad_norm": 0.8771958947181702, "learning_rate": 1.288470825774596e-06, "loss": 0.0513, "step": 39531 }, { "epoch": 0.8710990651528423, "grad_norm": 0.6168093681335449, "learning_rate": 1.2880367753171163e-06, "loss": 0.059, "step": 39532 }, { "epoch": 0.8711211004423585, "grad_norm": 0.5857093334197998, "learning_rate": 1.2876027947020168e-06, "loss": 0.0612, "step": 39533 }, { "epoch": 0.8711431357318746, "grad_norm": 0.7273934483528137, "learning_rate": 1.2871688839315076e-06, "loss": 0.0575, "step": 39534 }, { "epoch": 0.8711651710213908, "grad_norm": 0.9360079765319824, "learning_rate": 1.286735043007805e-06, "loss": 0.087, "step": 39535 }, { "epoch": 0.871187206310907, "grad_norm": 0.5654932260513306, "learning_rate": 1.2863012719331107e-06, "loss": 0.0357, "step": 39536 }, { "epoch": 0.8712092416004231, "grad_norm": 0.5682103633880615, "learning_rate": 1.285867570709638e-06, "loss": 0.0533, "step": 39537 }, { "epoch": 0.8712312768899393, "grad_norm": 0.7576612234115601, "learning_rate": 1.2854339393395965e-06, "loss": 0.0512, "step": 39538 }, { "epoch": 0.8712533121794555, "grad_norm": 0.6240437030792236, "learning_rate": 1.2850003778251962e-06, "loss": 0.0546, "step": 39539 }, { "epoch": 0.8712753474689715, "grad_norm": 1.0553524494171143, "learning_rate": 1.2845668861686422e-06, "loss": 0.0888, "step": 39540 }, { "epoch": 0.8712973827584877, "grad_norm": 0.8725834488868713, "learning_rate": 1.2841334643721426e-06, "loss": 0.0581, "step": 39541 }, { "epoch": 0.8713194180480038, "grad_norm": 0.41978004574775696, "learning_rate": 1.283700112437909e-06, "loss": 0.0606, "step": 39542 }, { "epoch": 0.87134145333752, "grad_norm": 0.8645696043968201, "learning_rate": 1.2832668303681428e-06, "loss": 0.0628, "step": 39543 }, { "epoch": 0.8713634886270362, "grad_norm": 0.4509679675102234, "learning_rate": 1.2828336181650575e-06, "loss": 0.0363, "step": 39544 }, { "epoch": 0.8713855239165523, "grad_norm": 0.7138522863388062, "learning_rate": 1.2824004758308511e-06, "loss": 0.0614, "step": 39545 }, { "epoch": 0.8714075592060685, "grad_norm": 0.8290371894836426, "learning_rate": 1.2819674033677404e-06, "loss": 0.0873, "step": 39546 }, { "epoch": 0.8714295944955847, "grad_norm": 0.36009538173675537, "learning_rate": 1.2815344007779233e-06, "loss": 0.0631, "step": 39547 }, { "epoch": 0.8714516297851008, "grad_norm": 0.6436401605606079, "learning_rate": 1.2811014680636102e-06, "loss": 0.0699, "step": 39548 }, { "epoch": 0.871473665074617, "grad_norm": 0.6287993788719177, "learning_rate": 1.2806686052270023e-06, "loss": 0.0812, "step": 39549 }, { "epoch": 0.8714957003641332, "grad_norm": 0.5610554218292236, "learning_rate": 1.2802358122703044e-06, "loss": 0.0622, "step": 39550 }, { "epoch": 0.8715177356536493, "grad_norm": 0.5146598815917969, "learning_rate": 1.2798030891957268e-06, "loss": 0.0551, "step": 39551 }, { "epoch": 0.8715397709431655, "grad_norm": 0.24845238029956818, "learning_rate": 1.2793704360054658e-06, "loss": 0.0617, "step": 39552 }, { "epoch": 0.8715618062326816, "grad_norm": 0.6583069562911987, "learning_rate": 1.2789378527017298e-06, "loss": 0.0539, "step": 39553 }, { "epoch": 0.8715838415221978, "grad_norm": 0.5966587662696838, "learning_rate": 1.2785053392867201e-06, "loss": 0.0752, "step": 39554 }, { "epoch": 0.871605876811714, "grad_norm": 0.6981253027915955, "learning_rate": 1.2780728957626436e-06, "loss": 0.0684, "step": 39555 }, { "epoch": 0.8716279121012301, "grad_norm": 0.15463368594646454, "learning_rate": 1.2776405221316983e-06, "loss": 0.0376, "step": 39556 }, { "epoch": 0.8716499473907463, "grad_norm": 0.5398305654525757, "learning_rate": 1.2772082183960876e-06, "loss": 0.0673, "step": 39557 }, { "epoch": 0.8716719826802625, "grad_norm": 0.3011378049850464, "learning_rate": 1.2767759845580163e-06, "loss": 0.0518, "step": 39558 }, { "epoch": 0.8716940179697786, "grad_norm": 0.4967150092124939, "learning_rate": 1.2763438206196826e-06, "loss": 0.0375, "step": 39559 }, { "epoch": 0.8717160532592948, "grad_norm": 0.8167232275009155, "learning_rate": 1.2759117265832882e-06, "loss": 0.0697, "step": 39560 }, { "epoch": 0.871738088548811, "grad_norm": 0.3400265872478485, "learning_rate": 1.2754797024510344e-06, "loss": 0.062, "step": 39561 }, { "epoch": 0.8717601238383271, "grad_norm": 0.6992691159248352, "learning_rate": 1.275047748225125e-06, "loss": 0.0647, "step": 39562 }, { "epoch": 0.8717821591278433, "grad_norm": 0.5502462387084961, "learning_rate": 1.274615863907756e-06, "loss": 0.06, "step": 39563 }, { "epoch": 0.8718041944173593, "grad_norm": 0.33814311027526855, "learning_rate": 1.2741840495011276e-06, "loss": 0.0443, "step": 39564 }, { "epoch": 0.8718262297068755, "grad_norm": 0.5757737159729004, "learning_rate": 1.273752305007443e-06, "loss": 0.0594, "step": 39565 }, { "epoch": 0.8718482649963917, "grad_norm": 0.7711647748947144, "learning_rate": 1.2733206304288952e-06, "loss": 0.0501, "step": 39566 }, { "epoch": 0.8718703002859078, "grad_norm": 0.39908066391944885, "learning_rate": 1.2728890257676895e-06, "loss": 0.0651, "step": 39567 }, { "epoch": 0.871892335575424, "grad_norm": 0.4782557487487793, "learning_rate": 1.2724574910260155e-06, "loss": 0.053, "step": 39568 }, { "epoch": 0.8719143708649402, "grad_norm": 0.7339419722557068, "learning_rate": 1.2720260262060819e-06, "loss": 0.0717, "step": 39569 }, { "epoch": 0.8719364061544563, "grad_norm": 0.4869365692138672, "learning_rate": 1.2715946313100796e-06, "loss": 0.0483, "step": 39570 }, { "epoch": 0.8719584414439725, "grad_norm": 0.3749324679374695, "learning_rate": 1.271163306340209e-06, "loss": 0.0272, "step": 39571 }, { "epoch": 0.8719804767334887, "grad_norm": 0.5648325085639954, "learning_rate": 1.270732051298663e-06, "loss": 0.0471, "step": 39572 }, { "epoch": 0.8720025120230048, "grad_norm": 0.5270246267318726, "learning_rate": 1.270300866187642e-06, "loss": 0.0604, "step": 39573 }, { "epoch": 0.872024547312521, "grad_norm": 0.4437544345855713, "learning_rate": 1.2698697510093438e-06, "loss": 0.0485, "step": 39574 }, { "epoch": 0.8720465826020372, "grad_norm": 0.4740311801433563, "learning_rate": 1.2694387057659568e-06, "loss": 0.0508, "step": 39575 }, { "epoch": 0.8720686178915533, "grad_norm": 1.095725417137146, "learning_rate": 1.2690077304596843e-06, "loss": 0.0911, "step": 39576 }, { "epoch": 0.8720906531810695, "grad_norm": 0.9820540547370911, "learning_rate": 1.268576825092716e-06, "loss": 0.0726, "step": 39577 }, { "epoch": 0.8721126884705857, "grad_norm": 0.43328917026519775, "learning_rate": 1.2681459896672536e-06, "loss": 0.0432, "step": 39578 }, { "epoch": 0.8721347237601018, "grad_norm": 0.8942272067070007, "learning_rate": 1.2677152241854838e-06, "loss": 0.076, "step": 39579 }, { "epoch": 0.872156759049618, "grad_norm": 0.515851616859436, "learning_rate": 1.267284528649605e-06, "loss": 0.0606, "step": 39580 }, { "epoch": 0.8721787943391341, "grad_norm": 0.6174983978271484, "learning_rate": 1.2668539030618131e-06, "loss": 0.0496, "step": 39581 }, { "epoch": 0.8722008296286503, "grad_norm": 0.6335741281509399, "learning_rate": 1.2664233474242953e-06, "loss": 0.0454, "step": 39582 }, { "epoch": 0.8722228649181665, "grad_norm": 0.6625770926475525, "learning_rate": 1.265992861739248e-06, "loss": 0.0864, "step": 39583 }, { "epoch": 0.8722449002076826, "grad_norm": 0.5090939998626709, "learning_rate": 1.2655624460088627e-06, "loss": 0.0588, "step": 39584 }, { "epoch": 0.8722669354971988, "grad_norm": 0.6293178200721741, "learning_rate": 1.2651321002353377e-06, "loss": 0.0592, "step": 39585 }, { "epoch": 0.872288970786715, "grad_norm": 0.6188147068023682, "learning_rate": 1.2647018244208564e-06, "loss": 0.0718, "step": 39586 }, { "epoch": 0.8723110060762311, "grad_norm": 0.579162061214447, "learning_rate": 1.2642716185676134e-06, "loss": 0.0514, "step": 39587 }, { "epoch": 0.8723330413657473, "grad_norm": 0.555364191532135, "learning_rate": 1.2638414826778038e-06, "loss": 0.059, "step": 39588 }, { "epoch": 0.8723550766552634, "grad_norm": 0.690929114818573, "learning_rate": 1.2634114167536126e-06, "loss": 0.0506, "step": 39589 }, { "epoch": 0.8723771119447795, "grad_norm": 0.3650481700897217, "learning_rate": 1.262981420797238e-06, "loss": 0.0445, "step": 39590 }, { "epoch": 0.8723991472342957, "grad_norm": 0.3345223665237427, "learning_rate": 1.2625514948108579e-06, "loss": 0.054, "step": 39591 }, { "epoch": 0.8724211825238118, "grad_norm": 0.553104817867279, "learning_rate": 1.2621216387966744e-06, "loss": 0.0609, "step": 39592 }, { "epoch": 0.872443217813328, "grad_norm": 0.435784250497818, "learning_rate": 1.2616918527568705e-06, "loss": 0.0467, "step": 39593 }, { "epoch": 0.8724652531028442, "grad_norm": 1.0148531198501587, "learning_rate": 1.2612621366936412e-06, "loss": 0.0737, "step": 39594 }, { "epoch": 0.8724872883923603, "grad_norm": 0.38225165009498596, "learning_rate": 1.2608324906091662e-06, "loss": 0.0705, "step": 39595 }, { "epoch": 0.8725093236818765, "grad_norm": 0.6065419316291809, "learning_rate": 1.2604029145056406e-06, "loss": 0.0711, "step": 39596 }, { "epoch": 0.8725313589713927, "grad_norm": 0.48159608244895935, "learning_rate": 1.259973408385251e-06, "loss": 0.0458, "step": 39597 }, { "epoch": 0.8725533942609088, "grad_norm": 1.0951200723648071, "learning_rate": 1.259543972250184e-06, "loss": 0.0754, "step": 39598 }, { "epoch": 0.872575429550425, "grad_norm": 0.6152094602584839, "learning_rate": 1.2591146061026276e-06, "loss": 0.0793, "step": 39599 }, { "epoch": 0.8725974648399412, "grad_norm": 0.6626105904579163, "learning_rate": 1.2586853099447688e-06, "loss": 0.1026, "step": 39600 }, { "epoch": 0.8726195001294573, "grad_norm": 0.32531046867370605, "learning_rate": 1.2582560837787972e-06, "loss": 0.0513, "step": 39601 }, { "epoch": 0.8726415354189735, "grad_norm": 1.0393049716949463, "learning_rate": 1.2578269276068942e-06, "loss": 0.0707, "step": 39602 }, { "epoch": 0.8726635707084897, "grad_norm": 0.5119696259498596, "learning_rate": 1.257397841431247e-06, "loss": 0.0581, "step": 39603 }, { "epoch": 0.8726856059980058, "grad_norm": 0.5803818106651306, "learning_rate": 1.2569688252540435e-06, "loss": 0.0528, "step": 39604 }, { "epoch": 0.872707641287522, "grad_norm": 0.649794340133667, "learning_rate": 1.2565398790774668e-06, "loss": 0.0656, "step": 39605 }, { "epoch": 0.8727296765770381, "grad_norm": 0.5646985173225403, "learning_rate": 1.2561110029037003e-06, "loss": 0.0388, "step": 39606 }, { "epoch": 0.8727517118665543, "grad_norm": 0.7559331655502319, "learning_rate": 1.2556821967349325e-06, "loss": 0.0753, "step": 39607 }, { "epoch": 0.8727737471560705, "grad_norm": 0.5262126922607422, "learning_rate": 1.255253460573348e-06, "loss": 0.0423, "step": 39608 }, { "epoch": 0.8727957824455866, "grad_norm": 0.656552255153656, "learning_rate": 1.2548247944211234e-06, "loss": 0.0297, "step": 39609 }, { "epoch": 0.8728178177351028, "grad_norm": 0.4581323564052582, "learning_rate": 1.2543961982804486e-06, "loss": 0.0519, "step": 39610 }, { "epoch": 0.872839853024619, "grad_norm": 0.6392382383346558, "learning_rate": 1.2539676721535055e-06, "loss": 0.0537, "step": 39611 }, { "epoch": 0.8728618883141351, "grad_norm": 0.7814799547195435, "learning_rate": 1.253539216042477e-06, "loss": 0.0721, "step": 39612 }, { "epoch": 0.8728839236036513, "grad_norm": 0.27231690287590027, "learning_rate": 1.2531108299495431e-06, "loss": 0.0479, "step": 39613 }, { "epoch": 0.8729059588931674, "grad_norm": 0.8544691205024719, "learning_rate": 1.2526825138768871e-06, "loss": 0.0639, "step": 39614 }, { "epoch": 0.8729279941826835, "grad_norm": 0.6100268959999084, "learning_rate": 1.252254267826694e-06, "loss": 0.0344, "step": 39615 }, { "epoch": 0.8729500294721997, "grad_norm": 0.6770459413528442, "learning_rate": 1.2518260918011404e-06, "loss": 0.0511, "step": 39616 }, { "epoch": 0.8729720647617158, "grad_norm": 0.7785465717315674, "learning_rate": 1.2513979858024093e-06, "loss": 0.0663, "step": 39617 }, { "epoch": 0.872994100051232, "grad_norm": 0.7798775434494019, "learning_rate": 1.2509699498326776e-06, "loss": 0.0708, "step": 39618 }, { "epoch": 0.8730161353407482, "grad_norm": 0.5656702518463135, "learning_rate": 1.2505419838941333e-06, "loss": 0.0559, "step": 39619 }, { "epoch": 0.8730381706302643, "grad_norm": 0.4762166738510132, "learning_rate": 1.2501140879889483e-06, "loss": 0.0717, "step": 39620 }, { "epoch": 0.8730602059197805, "grad_norm": 0.3117690980434418, "learning_rate": 1.2496862621193088e-06, "loss": 0.0517, "step": 39621 }, { "epoch": 0.8730822412092967, "grad_norm": 0.41808563470840454, "learning_rate": 1.2492585062873884e-06, "loss": 0.0684, "step": 39622 }, { "epoch": 0.8731042764988128, "grad_norm": 0.5022391676902771, "learning_rate": 1.2488308204953669e-06, "loss": 0.0555, "step": 39623 }, { "epoch": 0.873126311788329, "grad_norm": 0.4780293107032776, "learning_rate": 1.2484032047454275e-06, "loss": 0.0603, "step": 39624 }, { "epoch": 0.8731483470778452, "grad_norm": 0.5836014747619629, "learning_rate": 1.24797565903974e-06, "loss": 0.0762, "step": 39625 }, { "epoch": 0.8731703823673613, "grad_norm": 0.4837687313556671, "learning_rate": 1.2475481833804913e-06, "loss": 0.0537, "step": 39626 }, { "epoch": 0.8731924176568775, "grad_norm": 0.8279478549957275, "learning_rate": 1.247120777769853e-06, "loss": 0.0712, "step": 39627 }, { "epoch": 0.8732144529463937, "grad_norm": 0.6006640791893005, "learning_rate": 1.2466934422100047e-06, "loss": 0.0467, "step": 39628 }, { "epoch": 0.8732364882359098, "grad_norm": 0.602896511554718, "learning_rate": 1.2462661767031197e-06, "loss": 0.0721, "step": 39629 }, { "epoch": 0.873258523525426, "grad_norm": 0.37945517897605896, "learning_rate": 1.2458389812513766e-06, "loss": 0.0709, "step": 39630 }, { "epoch": 0.8732805588149422, "grad_norm": 0.2304297536611557, "learning_rate": 1.2454118558569533e-06, "loss": 0.0324, "step": 39631 }, { "epoch": 0.8733025941044583, "grad_norm": 0.36071261763572693, "learning_rate": 1.24498480052202e-06, "loss": 0.0318, "step": 39632 }, { "epoch": 0.8733246293939745, "grad_norm": 0.7341166138648987, "learning_rate": 1.2445578152487563e-06, "loss": 0.0602, "step": 39633 }, { "epoch": 0.8733466646834906, "grad_norm": 0.789496898651123, "learning_rate": 1.2441309000393357e-06, "loss": 0.0768, "step": 39634 }, { "epoch": 0.8733686999730068, "grad_norm": 0.47242456674575806, "learning_rate": 1.243704054895935e-06, "loss": 0.044, "step": 39635 }, { "epoch": 0.873390735262523, "grad_norm": 0.9442312121391296, "learning_rate": 1.2432772798207238e-06, "loss": 0.0385, "step": 39636 }, { "epoch": 0.8734127705520391, "grad_norm": 0.49160701036453247, "learning_rate": 1.2428505748158785e-06, "loss": 0.0389, "step": 39637 }, { "epoch": 0.8734348058415553, "grad_norm": 0.43609973788261414, "learning_rate": 1.2424239398835764e-06, "loss": 0.0599, "step": 39638 }, { "epoch": 0.8734568411310714, "grad_norm": 0.5013775825500488, "learning_rate": 1.2419973750259817e-06, "loss": 0.0623, "step": 39639 }, { "epoch": 0.8734788764205875, "grad_norm": 0.5213003754615784, "learning_rate": 1.2415708802452763e-06, "loss": 0.0703, "step": 39640 }, { "epoch": 0.8735009117101037, "grad_norm": 0.7705147862434387, "learning_rate": 1.2411444555436218e-06, "loss": 0.061, "step": 39641 }, { "epoch": 0.8735229469996199, "grad_norm": 0.48067227005958557, "learning_rate": 1.2407181009232016e-06, "loss": 0.0465, "step": 39642 }, { "epoch": 0.873544982289136, "grad_norm": 0.7852400541305542, "learning_rate": 1.2402918163861803e-06, "loss": 0.0738, "step": 39643 }, { "epoch": 0.8735670175786522, "grad_norm": 0.6006405353546143, "learning_rate": 1.2398656019347348e-06, "loss": 0.0665, "step": 39644 }, { "epoch": 0.8735890528681683, "grad_norm": 0.9314666986465454, "learning_rate": 1.23943945757103e-06, "loss": 0.0779, "step": 39645 }, { "epoch": 0.8736110881576845, "grad_norm": 0.8107817769050598, "learning_rate": 1.2390133832972373e-06, "loss": 0.0749, "step": 39646 }, { "epoch": 0.8736331234472007, "grad_norm": 0.27962860465049744, "learning_rate": 1.2385873791155333e-06, "loss": 0.0645, "step": 39647 }, { "epoch": 0.8736551587367168, "grad_norm": 0.6376246809959412, "learning_rate": 1.2381614450280765e-06, "loss": 0.0758, "step": 39648 }, { "epoch": 0.873677194026233, "grad_norm": 0.49315834045410156, "learning_rate": 1.2377355810370484e-06, "loss": 0.0668, "step": 39649 }, { "epoch": 0.8736992293157492, "grad_norm": 0.6344686150550842, "learning_rate": 1.237309787144612e-06, "loss": 0.0519, "step": 39650 }, { "epoch": 0.8737212646052653, "grad_norm": 0.5492932796478271, "learning_rate": 1.2368840633529377e-06, "loss": 0.0544, "step": 39651 }, { "epoch": 0.8737432998947815, "grad_norm": 0.7659834027290344, "learning_rate": 1.2364584096641917e-06, "loss": 0.0442, "step": 39652 }, { "epoch": 0.8737653351842977, "grad_norm": 0.5835367441177368, "learning_rate": 1.2360328260805426e-06, "loss": 0.0633, "step": 39653 }, { "epoch": 0.8737873704738138, "grad_norm": 0.29823729395866394, "learning_rate": 1.2356073126041633e-06, "loss": 0.0611, "step": 39654 }, { "epoch": 0.87380940576333, "grad_norm": 0.4366503953933716, "learning_rate": 1.2351818692372125e-06, "loss": 0.0705, "step": 39655 }, { "epoch": 0.8738314410528462, "grad_norm": 0.42387330532073975, "learning_rate": 1.234756495981863e-06, "loss": 0.0558, "step": 39656 }, { "epoch": 0.8738534763423623, "grad_norm": 0.5682788491249084, "learning_rate": 1.23433119284028e-06, "loss": 0.0473, "step": 39657 }, { "epoch": 0.8738755116318785, "grad_norm": 0.8074519038200378, "learning_rate": 1.2339059598146319e-06, "loss": 0.0766, "step": 39658 }, { "epoch": 0.8738975469213947, "grad_norm": 0.3845428228378296, "learning_rate": 1.2334807969070816e-06, "loss": 0.0571, "step": 39659 }, { "epoch": 0.8739195822109108, "grad_norm": 0.3930000960826874, "learning_rate": 1.2330557041197943e-06, "loss": 0.0426, "step": 39660 }, { "epoch": 0.873941617500427, "grad_norm": 0.5333423018455505, "learning_rate": 1.2326306814549398e-06, "loss": 0.0783, "step": 39661 }, { "epoch": 0.8739636527899431, "grad_norm": 0.8456223607063293, "learning_rate": 1.2322057289146781e-06, "loss": 0.0544, "step": 39662 }, { "epoch": 0.8739856880794592, "grad_norm": 0.5502886772155762, "learning_rate": 1.2317808465011775e-06, "loss": 0.0329, "step": 39663 }, { "epoch": 0.8740077233689754, "grad_norm": 0.590774416923523, "learning_rate": 1.2313560342165947e-06, "loss": 0.0337, "step": 39664 }, { "epoch": 0.8740297586584915, "grad_norm": 0.9115752577781677, "learning_rate": 1.2309312920631043e-06, "loss": 0.0417, "step": 39665 }, { "epoch": 0.8740517939480077, "grad_norm": 1.0868935585021973, "learning_rate": 1.2305066200428616e-06, "loss": 0.0951, "step": 39666 }, { "epoch": 0.8740738292375239, "grad_norm": 0.7836215496063232, "learning_rate": 1.2300820181580346e-06, "loss": 0.0757, "step": 39667 }, { "epoch": 0.87409586452704, "grad_norm": 0.543043851852417, "learning_rate": 1.2296574864107818e-06, "loss": 0.0546, "step": 39668 }, { "epoch": 0.8741178998165562, "grad_norm": 0.6753993630409241, "learning_rate": 1.229233024803268e-06, "loss": 0.0373, "step": 39669 }, { "epoch": 0.8741399351060724, "grad_norm": 0.42780962586402893, "learning_rate": 1.2288086333376581e-06, "loss": 0.0613, "step": 39670 }, { "epoch": 0.8741619703955885, "grad_norm": 0.4260488450527191, "learning_rate": 1.2283843120161036e-06, "loss": 0.0667, "step": 39671 }, { "epoch": 0.8741840056851047, "grad_norm": 0.8380617499351501, "learning_rate": 1.2279600608407798e-06, "loss": 0.0608, "step": 39672 }, { "epoch": 0.8742060409746208, "grad_norm": 0.3448963761329651, "learning_rate": 1.2275358798138364e-06, "loss": 0.0404, "step": 39673 }, { "epoch": 0.874228076264137, "grad_norm": 0.6392305493354797, "learning_rate": 1.2271117689374416e-06, "loss": 0.0927, "step": 39674 }, { "epoch": 0.8742501115536532, "grad_norm": 0.5359712839126587, "learning_rate": 1.226687728213749e-06, "loss": 0.0683, "step": 39675 }, { "epoch": 0.8742721468431693, "grad_norm": 0.2743183970451355, "learning_rate": 1.2262637576449232e-06, "loss": 0.0538, "step": 39676 }, { "epoch": 0.8742941821326855, "grad_norm": 0.6557849645614624, "learning_rate": 1.2258398572331226e-06, "loss": 0.0586, "step": 39677 }, { "epoch": 0.8743162174222017, "grad_norm": 0.5940008163452148, "learning_rate": 1.2254160269805038e-06, "loss": 0.0473, "step": 39678 }, { "epoch": 0.8743382527117178, "grad_norm": 0.4140678942203522, "learning_rate": 1.2249922668892283e-06, "loss": 0.0617, "step": 39679 }, { "epoch": 0.874360288001234, "grad_norm": 0.9147158861160278, "learning_rate": 1.2245685769614544e-06, "loss": 0.0703, "step": 39680 }, { "epoch": 0.8743823232907502, "grad_norm": 0.7673945426940918, "learning_rate": 1.2241449571993407e-06, "loss": 0.0726, "step": 39681 }, { "epoch": 0.8744043585802663, "grad_norm": 0.6051777005195618, "learning_rate": 1.2237214076050435e-06, "loss": 0.0533, "step": 39682 }, { "epoch": 0.8744263938697825, "grad_norm": 0.7370858788490295, "learning_rate": 1.2232979281807195e-06, "loss": 0.0669, "step": 39683 }, { "epoch": 0.8744484291592987, "grad_norm": 0.7489414215087891, "learning_rate": 1.2228745189285284e-06, "loss": 0.0531, "step": 39684 }, { "epoch": 0.8744704644488148, "grad_norm": 0.6341630220413208, "learning_rate": 1.222451179850624e-06, "loss": 0.0407, "step": 39685 }, { "epoch": 0.874492499738331, "grad_norm": 0.7839475274085999, "learning_rate": 1.2220279109491656e-06, "loss": 0.0542, "step": 39686 }, { "epoch": 0.8745145350278472, "grad_norm": 0.43005356192588806, "learning_rate": 1.2216047122263018e-06, "loss": 0.0264, "step": 39687 }, { "epoch": 0.8745365703173632, "grad_norm": 0.6397111415863037, "learning_rate": 1.221181583684201e-06, "loss": 0.0452, "step": 39688 }, { "epoch": 0.8745586056068794, "grad_norm": 0.515952467918396, "learning_rate": 1.2207585253250064e-06, "loss": 0.0475, "step": 39689 }, { "epoch": 0.8745806408963955, "grad_norm": 1.0843298435211182, "learning_rate": 1.2203355371508812e-06, "loss": 0.0526, "step": 39690 }, { "epoch": 0.8746026761859117, "grad_norm": 0.7031973004341125, "learning_rate": 1.2199126191639736e-06, "loss": 0.0892, "step": 39691 }, { "epoch": 0.8746247114754279, "grad_norm": 0.44803714752197266, "learning_rate": 1.2194897713664404e-06, "loss": 0.0484, "step": 39692 }, { "epoch": 0.874646746764944, "grad_norm": 0.49016883969306946, "learning_rate": 1.2190669937604381e-06, "loss": 0.0676, "step": 39693 }, { "epoch": 0.8746687820544602, "grad_norm": 0.4625702500343323, "learning_rate": 1.2186442863481134e-06, "loss": 0.0855, "step": 39694 }, { "epoch": 0.8746908173439764, "grad_norm": 0.5321747064590454, "learning_rate": 1.2182216491316278e-06, "loss": 0.0897, "step": 39695 }, { "epoch": 0.8747128526334925, "grad_norm": 0.47488123178482056, "learning_rate": 1.2177990821131262e-06, "loss": 0.0668, "step": 39696 }, { "epoch": 0.8747348879230087, "grad_norm": 0.837699294090271, "learning_rate": 1.2173765852947688e-06, "loss": 0.0594, "step": 39697 }, { "epoch": 0.8747569232125249, "grad_norm": 0.5416147708892822, "learning_rate": 1.2169541586787003e-06, "loss": 0.0579, "step": 39698 }, { "epoch": 0.874778958502041, "grad_norm": 0.33421555161476135, "learning_rate": 1.216531802267074e-06, "loss": 0.0338, "step": 39699 }, { "epoch": 0.8748009937915572, "grad_norm": 0.6309462189674377, "learning_rate": 1.2161095160620467e-06, "loss": 0.0468, "step": 39700 }, { "epoch": 0.8748230290810733, "grad_norm": 0.8415120244026184, "learning_rate": 1.2156873000657615e-06, "loss": 0.0907, "step": 39701 }, { "epoch": 0.8748450643705895, "grad_norm": 0.43199920654296875, "learning_rate": 1.2152651542803733e-06, "loss": 0.0382, "step": 39702 }, { "epoch": 0.8748670996601057, "grad_norm": 0.5337586998939514, "learning_rate": 1.2148430787080306e-06, "loss": 0.0581, "step": 39703 }, { "epoch": 0.8748891349496218, "grad_norm": 0.7182731032371521, "learning_rate": 1.2144210733508881e-06, "loss": 0.0712, "step": 39704 }, { "epoch": 0.874911170239138, "grad_norm": 0.359059602022171, "learning_rate": 1.2139991382110876e-06, "loss": 0.0593, "step": 39705 }, { "epoch": 0.8749332055286542, "grad_norm": 0.6872186064720154, "learning_rate": 1.213577273290784e-06, "loss": 0.0507, "step": 39706 }, { "epoch": 0.8749552408181703, "grad_norm": 0.7490084171295166, "learning_rate": 1.2131554785921255e-06, "loss": 0.082, "step": 39707 }, { "epoch": 0.8749772761076865, "grad_norm": 0.6712331771850586, "learning_rate": 1.2127337541172572e-06, "loss": 0.0634, "step": 39708 }, { "epoch": 0.8749993113972027, "grad_norm": 0.4535314440727234, "learning_rate": 1.2123120998683307e-06, "loss": 0.0505, "step": 39709 }, { "epoch": 0.8750213466867188, "grad_norm": 0.7786712646484375, "learning_rate": 1.2118905158474874e-06, "loss": 0.0364, "step": 39710 }, { "epoch": 0.875043381976235, "grad_norm": 0.5212311744689941, "learning_rate": 1.2114690020568858e-06, "loss": 0.0578, "step": 39711 }, { "epoch": 0.8750654172657512, "grad_norm": 0.5504111051559448, "learning_rate": 1.2110475584986623e-06, "loss": 0.0691, "step": 39712 }, { "epoch": 0.8750874525552672, "grad_norm": 0.3232339024543762, "learning_rate": 1.2106261851749724e-06, "loss": 0.0499, "step": 39713 }, { "epoch": 0.8751094878447834, "grad_norm": 0.6808021068572998, "learning_rate": 1.2102048820879519e-06, "loss": 0.0722, "step": 39714 }, { "epoch": 0.8751315231342995, "grad_norm": 0.7609747052192688, "learning_rate": 1.2097836492397562e-06, "loss": 0.0519, "step": 39715 }, { "epoch": 0.8751535584238157, "grad_norm": 0.5724917054176331, "learning_rate": 1.2093624866325287e-06, "loss": 0.0552, "step": 39716 }, { "epoch": 0.8751755937133319, "grad_norm": 0.7770943641662598, "learning_rate": 1.208941394268409e-06, "loss": 0.0581, "step": 39717 }, { "epoch": 0.875197629002848, "grad_norm": 0.8041953444480896, "learning_rate": 1.2085203721495491e-06, "loss": 0.0578, "step": 39718 }, { "epoch": 0.8752196642923642, "grad_norm": 0.7198954224586487, "learning_rate": 1.2080994202780887e-06, "loss": 0.0578, "step": 39719 }, { "epoch": 0.8752416995818804, "grad_norm": 0.3523433804512024, "learning_rate": 1.2076785386561778e-06, "loss": 0.0453, "step": 39720 }, { "epoch": 0.8752637348713965, "grad_norm": 0.7597929835319519, "learning_rate": 1.2072577272859498e-06, "loss": 0.0606, "step": 39721 }, { "epoch": 0.8752857701609127, "grad_norm": 0.7181834578514099, "learning_rate": 1.2068369861695594e-06, "loss": 0.0522, "step": 39722 }, { "epoch": 0.8753078054504289, "grad_norm": 0.8366224765777588, "learning_rate": 1.2064163153091418e-06, "loss": 0.0602, "step": 39723 }, { "epoch": 0.875329840739945, "grad_norm": 0.5756877064704895, "learning_rate": 1.205995714706845e-06, "loss": 0.0582, "step": 39724 }, { "epoch": 0.8753518760294612, "grad_norm": 0.6370391845703125, "learning_rate": 1.2055751843648077e-06, "loss": 0.0749, "step": 39725 }, { "epoch": 0.8753739113189773, "grad_norm": 0.48163166642189026, "learning_rate": 1.205154724285173e-06, "loss": 0.0549, "step": 39726 }, { "epoch": 0.8753959466084935, "grad_norm": 0.5371529459953308, "learning_rate": 1.204734334470084e-06, "loss": 0.033, "step": 39727 }, { "epoch": 0.8754179818980097, "grad_norm": 0.6578238606452942, "learning_rate": 1.204314014921679e-06, "loss": 0.0485, "step": 39728 }, { "epoch": 0.8754400171875258, "grad_norm": 0.8211994171142578, "learning_rate": 1.2038937656421e-06, "loss": 0.0859, "step": 39729 }, { "epoch": 0.875462052477042, "grad_norm": 0.5423889756202698, "learning_rate": 1.2034735866334884e-06, "loss": 0.0654, "step": 39730 }, { "epoch": 0.8754840877665582, "grad_norm": 0.8004385828971863, "learning_rate": 1.2030534778979874e-06, "loss": 0.0894, "step": 39731 }, { "epoch": 0.8755061230560743, "grad_norm": 0.9268913269042969, "learning_rate": 1.2026334394377303e-06, "loss": 0.0605, "step": 39732 }, { "epoch": 0.8755281583455905, "grad_norm": 0.5747538208961487, "learning_rate": 1.2022134712548605e-06, "loss": 0.0389, "step": 39733 }, { "epoch": 0.8755501936351067, "grad_norm": 0.6741436719894409, "learning_rate": 1.2017935733515178e-06, "loss": 0.0591, "step": 39734 }, { "epoch": 0.8755722289246228, "grad_norm": 0.6204349994659424, "learning_rate": 1.2013737457298375e-06, "loss": 0.0608, "step": 39735 }, { "epoch": 0.875594264214139, "grad_norm": 0.47317108511924744, "learning_rate": 1.200953988391964e-06, "loss": 0.0496, "step": 39736 }, { "epoch": 0.875616299503655, "grad_norm": 0.308015376329422, "learning_rate": 1.2005343013400244e-06, "loss": 0.0494, "step": 39737 }, { "epoch": 0.8756383347931712, "grad_norm": 0.5184813141822815, "learning_rate": 1.20011468457617e-06, "loss": 0.0842, "step": 39738 }, { "epoch": 0.8756603700826874, "grad_norm": 0.846238374710083, "learning_rate": 1.199695138102528e-06, "loss": 0.0514, "step": 39739 }, { "epoch": 0.8756824053722035, "grad_norm": 0.5934468507766724, "learning_rate": 1.1992756619212392e-06, "loss": 0.0665, "step": 39740 }, { "epoch": 0.8757044406617197, "grad_norm": 0.7126119136810303, "learning_rate": 1.1988562560344425e-06, "loss": 0.0765, "step": 39741 }, { "epoch": 0.8757264759512359, "grad_norm": 0.7417096495628357, "learning_rate": 1.1984369204442691e-06, "loss": 0.0543, "step": 39742 }, { "epoch": 0.875748511240752, "grad_norm": 0.7951326370239258, "learning_rate": 1.198017655152861e-06, "loss": 0.078, "step": 39743 }, { "epoch": 0.8757705465302682, "grad_norm": 0.9195320010185242, "learning_rate": 1.1975984601623446e-06, "loss": 0.0547, "step": 39744 }, { "epoch": 0.8757925818197844, "grad_norm": 0.6491689085960388, "learning_rate": 1.197179335474865e-06, "loss": 0.0622, "step": 39745 }, { "epoch": 0.8758146171093005, "grad_norm": 0.750043511390686, "learning_rate": 1.1967602810925504e-06, "loss": 0.0667, "step": 39746 }, { "epoch": 0.8758366523988167, "grad_norm": 0.5178351998329163, "learning_rate": 1.1963412970175409e-06, "loss": 0.0851, "step": 39747 }, { "epoch": 0.8758586876883329, "grad_norm": 0.5296851992607117, "learning_rate": 1.1959223832519644e-06, "loss": 0.0676, "step": 39748 }, { "epoch": 0.875880722977849, "grad_norm": 0.650654137134552, "learning_rate": 1.1955035397979563e-06, "loss": 0.0647, "step": 39749 }, { "epoch": 0.8759027582673652, "grad_norm": 0.5814284682273865, "learning_rate": 1.1950847666576548e-06, "loss": 0.0535, "step": 39750 }, { "epoch": 0.8759247935568814, "grad_norm": 0.9739011526107788, "learning_rate": 1.194666063833188e-06, "loss": 0.082, "step": 39751 }, { "epoch": 0.8759468288463975, "grad_norm": 0.8875011801719666, "learning_rate": 1.1942474313266876e-06, "loss": 0.0876, "step": 39752 }, { "epoch": 0.8759688641359137, "grad_norm": 0.33542948961257935, "learning_rate": 1.1938288691402904e-06, "loss": 0.0677, "step": 39753 }, { "epoch": 0.8759908994254298, "grad_norm": 0.6922823190689087, "learning_rate": 1.1934103772761278e-06, "loss": 0.0355, "step": 39754 }, { "epoch": 0.876012934714946, "grad_norm": 0.5781576633453369, "learning_rate": 1.1929919557363268e-06, "loss": 0.0496, "step": 39755 }, { "epoch": 0.8760349700044622, "grad_norm": 0.52605140209198, "learning_rate": 1.1925736045230219e-06, "loss": 0.0368, "step": 39756 }, { "epoch": 0.8760570052939783, "grad_norm": 0.666135311126709, "learning_rate": 1.1921553236383465e-06, "loss": 0.0589, "step": 39757 }, { "epoch": 0.8760790405834945, "grad_norm": 0.31693020462989807, "learning_rate": 1.1917371130844257e-06, "loss": 0.062, "step": 39758 }, { "epoch": 0.8761010758730107, "grad_norm": 0.8643481135368347, "learning_rate": 1.1913189728633927e-06, "loss": 0.0945, "step": 39759 }, { "epoch": 0.8761231111625268, "grad_norm": 0.40653538703918457, "learning_rate": 1.1909009029773775e-06, "loss": 0.0657, "step": 39760 }, { "epoch": 0.876145146452043, "grad_norm": 0.47213584184646606, "learning_rate": 1.1904829034285104e-06, "loss": 0.0513, "step": 39761 }, { "epoch": 0.876167181741559, "grad_norm": 0.592485249042511, "learning_rate": 1.1900649742189174e-06, "loss": 0.0626, "step": 39762 }, { "epoch": 0.8761892170310752, "grad_norm": 0.7704382538795471, "learning_rate": 1.1896471153507287e-06, "loss": 0.069, "step": 39763 }, { "epoch": 0.8762112523205914, "grad_norm": 0.5306777954101562, "learning_rate": 1.1892293268260761e-06, "loss": 0.0428, "step": 39764 }, { "epoch": 0.8762332876101075, "grad_norm": 0.6852657198905945, "learning_rate": 1.1888116086470812e-06, "loss": 0.0723, "step": 39765 }, { "epoch": 0.8762553228996237, "grad_norm": 0.7856795191764832, "learning_rate": 1.1883939608158772e-06, "loss": 0.0474, "step": 39766 }, { "epoch": 0.8762773581891399, "grad_norm": 0.4694739878177643, "learning_rate": 1.187976383334584e-06, "loss": 0.0724, "step": 39767 }, { "epoch": 0.876299393478656, "grad_norm": 0.9790552854537964, "learning_rate": 1.1875588762053385e-06, "loss": 0.0798, "step": 39768 }, { "epoch": 0.8763214287681722, "grad_norm": 0.7466471195220947, "learning_rate": 1.1871414394302621e-06, "loss": 0.0546, "step": 39769 }, { "epoch": 0.8763434640576884, "grad_norm": 0.529310941696167, "learning_rate": 1.1867240730114815e-06, "loss": 0.0523, "step": 39770 }, { "epoch": 0.8763654993472045, "grad_norm": 0.5715890526771545, "learning_rate": 1.1863067769511216e-06, "loss": 0.0785, "step": 39771 }, { "epoch": 0.8763875346367207, "grad_norm": 0.5506314039230347, "learning_rate": 1.1858895512513073e-06, "loss": 0.0506, "step": 39772 }, { "epoch": 0.8764095699262369, "grad_norm": 0.5478146076202393, "learning_rate": 1.1854723959141688e-06, "loss": 0.0433, "step": 39773 }, { "epoch": 0.876431605215753, "grad_norm": 1.1440863609313965, "learning_rate": 1.1850553109418244e-06, "loss": 0.0849, "step": 39774 }, { "epoch": 0.8764536405052692, "grad_norm": 0.8876380920410156, "learning_rate": 1.1846382963364005e-06, "loss": 0.0588, "step": 39775 }, { "epoch": 0.8764756757947854, "grad_norm": 0.579835832118988, "learning_rate": 1.184221352100024e-06, "loss": 0.0499, "step": 39776 }, { "epoch": 0.8764977110843015, "grad_norm": 0.3906550109386444, "learning_rate": 1.1838044782348178e-06, "loss": 0.0441, "step": 39777 }, { "epoch": 0.8765197463738177, "grad_norm": 0.4385729432106018, "learning_rate": 1.1833876747429023e-06, "loss": 0.0703, "step": 39778 }, { "epoch": 0.8765417816633339, "grad_norm": 0.5820640921592712, "learning_rate": 1.1829709416264023e-06, "loss": 0.0478, "step": 39779 }, { "epoch": 0.87656381695285, "grad_norm": 0.419318288564682, "learning_rate": 1.1825542788874443e-06, "loss": 0.0383, "step": 39780 }, { "epoch": 0.8765858522423662, "grad_norm": 0.7866179347038269, "learning_rate": 1.1821376865281437e-06, "loss": 0.0768, "step": 39781 }, { "epoch": 0.8766078875318823, "grad_norm": 0.7938418388366699, "learning_rate": 1.181721164550625e-06, "loss": 0.0733, "step": 39782 }, { "epoch": 0.8766299228213985, "grad_norm": 0.6994978785514832, "learning_rate": 1.1813047129570098e-06, "loss": 0.0608, "step": 39783 }, { "epoch": 0.8766519581109147, "grad_norm": 0.7587907314300537, "learning_rate": 1.1808883317494235e-06, "loss": 0.0562, "step": 39784 }, { "epoch": 0.8766739934004308, "grad_norm": 0.4383941888809204, "learning_rate": 1.1804720209299807e-06, "loss": 0.0452, "step": 39785 }, { "epoch": 0.876696028689947, "grad_norm": 0.7155143618583679, "learning_rate": 1.1800557805008034e-06, "loss": 0.0484, "step": 39786 }, { "epoch": 0.8767180639794631, "grad_norm": 0.5282250642776489, "learning_rate": 1.179639610464016e-06, "loss": 0.062, "step": 39787 }, { "epoch": 0.8767400992689792, "grad_norm": 0.7083396911621094, "learning_rate": 1.1792235108217342e-06, "loss": 0.0379, "step": 39788 }, { "epoch": 0.8767621345584954, "grad_norm": 0.7638674974441528, "learning_rate": 1.1788074815760792e-06, "loss": 0.0699, "step": 39789 }, { "epoch": 0.8767841698480116, "grad_norm": 0.8944184184074402, "learning_rate": 1.1783915227291643e-06, "loss": 0.0586, "step": 39790 }, { "epoch": 0.8768062051375277, "grad_norm": 0.5766283869743347, "learning_rate": 1.1779756342831177e-06, "loss": 0.0471, "step": 39791 }, { "epoch": 0.8768282404270439, "grad_norm": 0.4358653426170349, "learning_rate": 1.1775598162400513e-06, "loss": 0.0399, "step": 39792 }, { "epoch": 0.87685027571656, "grad_norm": 0.625220537185669, "learning_rate": 1.1771440686020885e-06, "loss": 0.0655, "step": 39793 }, { "epoch": 0.8768723110060762, "grad_norm": 0.5719835162162781, "learning_rate": 1.1767283913713389e-06, "loss": 0.0504, "step": 39794 }, { "epoch": 0.8768943462955924, "grad_norm": 0.7629355788230896, "learning_rate": 1.1763127845499244e-06, "loss": 0.0506, "step": 39795 }, { "epoch": 0.8769163815851085, "grad_norm": 0.5856010913848877, "learning_rate": 1.175897248139965e-06, "loss": 0.057, "step": 39796 }, { "epoch": 0.8769384168746247, "grad_norm": 0.2901340425014496, "learning_rate": 1.175481782143572e-06, "loss": 0.0516, "step": 39797 }, { "epoch": 0.8769604521641409, "grad_norm": 0.4435926377773285, "learning_rate": 1.1750663865628625e-06, "loss": 0.0522, "step": 39798 }, { "epoch": 0.876982487453657, "grad_norm": 0.9306995868682861, "learning_rate": 1.1746510613999545e-06, "loss": 0.0757, "step": 39799 }, { "epoch": 0.8770045227431732, "grad_norm": 0.465442031621933, "learning_rate": 1.1742358066569647e-06, "loss": 0.0302, "step": 39800 }, { "epoch": 0.8770265580326894, "grad_norm": 0.3998912572860718, "learning_rate": 1.173820622336003e-06, "loss": 0.0659, "step": 39801 }, { "epoch": 0.8770485933222055, "grad_norm": 0.6556320786476135, "learning_rate": 1.1734055084391865e-06, "loss": 0.106, "step": 39802 }, { "epoch": 0.8770706286117217, "grad_norm": 0.9401021003723145, "learning_rate": 1.172990464968633e-06, "loss": 0.0911, "step": 39803 }, { "epoch": 0.8770926639012379, "grad_norm": 0.5049203038215637, "learning_rate": 1.172575491926451e-06, "loss": 0.0532, "step": 39804 }, { "epoch": 0.877114699190754, "grad_norm": 0.37950894236564636, "learning_rate": 1.1721605893147586e-06, "loss": 0.0438, "step": 39805 }, { "epoch": 0.8771367344802702, "grad_norm": 0.8505097031593323, "learning_rate": 1.171745757135666e-06, "loss": 0.1211, "step": 39806 }, { "epoch": 0.8771587697697864, "grad_norm": 0.39679422974586487, "learning_rate": 1.1713309953912898e-06, "loss": 0.056, "step": 39807 }, { "epoch": 0.8771808050593025, "grad_norm": 0.7490666508674622, "learning_rate": 1.1709163040837385e-06, "loss": 0.0588, "step": 39808 }, { "epoch": 0.8772028403488187, "grad_norm": 0.42920204997062683, "learning_rate": 1.170501683215125e-06, "loss": 0.0355, "step": 39809 }, { "epoch": 0.8772248756383348, "grad_norm": 0.37357664108276367, "learning_rate": 1.170087132787566e-06, "loss": 0.0773, "step": 39810 }, { "epoch": 0.8772469109278509, "grad_norm": 0.7057427763938904, "learning_rate": 1.1696726528031671e-06, "loss": 0.0848, "step": 39811 }, { "epoch": 0.8772689462173671, "grad_norm": 0.5271897315979004, "learning_rate": 1.1692582432640424e-06, "loss": 0.062, "step": 39812 }, { "epoch": 0.8772909815068832, "grad_norm": 0.4821820855140686, "learning_rate": 1.1688439041722992e-06, "loss": 0.0495, "step": 39813 }, { "epoch": 0.8773130167963994, "grad_norm": 1.060846209526062, "learning_rate": 1.1684296355300556e-06, "loss": 0.0552, "step": 39814 }, { "epoch": 0.8773350520859156, "grad_norm": 0.6014904379844666, "learning_rate": 1.1680154373394146e-06, "loss": 0.0809, "step": 39815 }, { "epoch": 0.8773570873754317, "grad_norm": 0.40375280380249023, "learning_rate": 1.1676013096024901e-06, "loss": 0.0636, "step": 39816 }, { "epoch": 0.8773791226649479, "grad_norm": 0.45975393056869507, "learning_rate": 1.1671872523213867e-06, "loss": 0.0643, "step": 39817 }, { "epoch": 0.877401157954464, "grad_norm": 0.6333088278770447, "learning_rate": 1.1667732654982177e-06, "loss": 0.0407, "step": 39818 }, { "epoch": 0.8774231932439802, "grad_norm": 0.645213782787323, "learning_rate": 1.1663593491350916e-06, "loss": 0.0506, "step": 39819 }, { "epoch": 0.8774452285334964, "grad_norm": 0.5684708952903748, "learning_rate": 1.1659455032341133e-06, "loss": 0.0536, "step": 39820 }, { "epoch": 0.8774672638230125, "grad_norm": 0.5827465653419495, "learning_rate": 1.1655317277973942e-06, "loss": 0.0372, "step": 39821 }, { "epoch": 0.8774892991125287, "grad_norm": 0.26991668343544006, "learning_rate": 1.1651180228270398e-06, "loss": 0.0319, "step": 39822 }, { "epoch": 0.8775113344020449, "grad_norm": 0.6406370997428894, "learning_rate": 1.1647043883251595e-06, "loss": 0.0507, "step": 39823 }, { "epoch": 0.877533369691561, "grad_norm": 0.530972957611084, "learning_rate": 1.164290824293857e-06, "loss": 0.0296, "step": 39824 }, { "epoch": 0.8775554049810772, "grad_norm": 0.9561254382133484, "learning_rate": 1.1638773307352423e-06, "loss": 0.0872, "step": 39825 }, { "epoch": 0.8775774402705934, "grad_norm": 0.6047270894050598, "learning_rate": 1.1634639076514187e-06, "loss": 0.0672, "step": 39826 }, { "epoch": 0.8775994755601095, "grad_norm": 0.8555729985237122, "learning_rate": 1.1630505550444942e-06, "loss": 0.0531, "step": 39827 }, { "epoch": 0.8776215108496257, "grad_norm": 0.6974366307258606, "learning_rate": 1.1626372729165724e-06, "loss": 0.0561, "step": 39828 }, { "epoch": 0.8776435461391419, "grad_norm": 0.6392706036567688, "learning_rate": 1.1622240612697582e-06, "loss": 0.0559, "step": 39829 }, { "epoch": 0.877665581428658, "grad_norm": 0.7575991749763489, "learning_rate": 1.1618109201061617e-06, "loss": 0.0671, "step": 39830 }, { "epoch": 0.8776876167181742, "grad_norm": 0.8521298766136169, "learning_rate": 1.1613978494278776e-06, "loss": 0.0492, "step": 39831 }, { "epoch": 0.8777096520076904, "grad_norm": 0.47869256138801575, "learning_rate": 1.1609848492370178e-06, "loss": 0.0482, "step": 39832 }, { "epoch": 0.8777316872972065, "grad_norm": 0.42395487427711487, "learning_rate": 1.1605719195356806e-06, "loss": 0.0217, "step": 39833 }, { "epoch": 0.8777537225867227, "grad_norm": 0.35380828380584717, "learning_rate": 1.160159060325976e-06, "loss": 0.0476, "step": 39834 }, { "epoch": 0.8777757578762388, "grad_norm": 0.30829256772994995, "learning_rate": 1.1597462716100004e-06, "loss": 0.0376, "step": 39835 }, { "epoch": 0.8777977931657549, "grad_norm": 0.8612303137779236, "learning_rate": 1.1593335533898574e-06, "loss": 0.0578, "step": 39836 }, { "epoch": 0.8778198284552711, "grad_norm": 0.7282750606536865, "learning_rate": 1.1589209056676552e-06, "loss": 0.0762, "step": 39837 }, { "epoch": 0.8778418637447872, "grad_norm": 0.5348253846168518, "learning_rate": 1.1585083284454872e-06, "loss": 0.048, "step": 39838 }, { "epoch": 0.8778638990343034, "grad_norm": 0.8767545819282532, "learning_rate": 1.1580958217254617e-06, "loss": 0.0584, "step": 39839 }, { "epoch": 0.8778859343238196, "grad_norm": 0.6587586998939514, "learning_rate": 1.1576833855096702e-06, "loss": 0.0418, "step": 39840 }, { "epoch": 0.8779079696133357, "grad_norm": 0.8528180122375488, "learning_rate": 1.1572710198002263e-06, "loss": 0.0679, "step": 39841 }, { "epoch": 0.8779300049028519, "grad_norm": 1.0070066452026367, "learning_rate": 1.156858724599223e-06, "loss": 0.052, "step": 39842 }, { "epoch": 0.877952040192368, "grad_norm": 0.40088436007499695, "learning_rate": 1.156446499908762e-06, "loss": 0.0643, "step": 39843 }, { "epoch": 0.8779740754818842, "grad_norm": 0.21572436392307281, "learning_rate": 1.1560343457309403e-06, "loss": 0.0446, "step": 39844 }, { "epoch": 0.8779961107714004, "grad_norm": 0.9455891251564026, "learning_rate": 1.155622262067859e-06, "loss": 0.0834, "step": 39845 }, { "epoch": 0.8780181460609165, "grad_norm": 0.5424892902374268, "learning_rate": 1.1552102489216221e-06, "loss": 0.0435, "step": 39846 }, { "epoch": 0.8780401813504327, "grad_norm": 0.593920886516571, "learning_rate": 1.1547983062943191e-06, "loss": 0.0345, "step": 39847 }, { "epoch": 0.8780622166399489, "grad_norm": 0.30447182059288025, "learning_rate": 1.1543864341880534e-06, "loss": 0.0215, "step": 39848 }, { "epoch": 0.878084251929465, "grad_norm": 0.44556236267089844, "learning_rate": 1.1539746326049217e-06, "loss": 0.0383, "step": 39849 }, { "epoch": 0.8781062872189812, "grad_norm": 0.42324113845825195, "learning_rate": 1.1535629015470256e-06, "loss": 0.032, "step": 39850 }, { "epoch": 0.8781283225084974, "grad_norm": 0.5251272916793823, "learning_rate": 1.1531512410164552e-06, "loss": 0.0485, "step": 39851 }, { "epoch": 0.8781503577980135, "grad_norm": 0.8913145065307617, "learning_rate": 1.1527396510153105e-06, "loss": 0.0503, "step": 39852 }, { "epoch": 0.8781723930875297, "grad_norm": 0.7365944385528564, "learning_rate": 1.1523281315456913e-06, "loss": 0.0669, "step": 39853 }, { "epoch": 0.8781944283770459, "grad_norm": 0.577364444732666, "learning_rate": 1.1519166826096878e-06, "loss": 0.0443, "step": 39854 }, { "epoch": 0.878216463666562, "grad_norm": 0.8312208652496338, "learning_rate": 1.1515053042093998e-06, "loss": 0.072, "step": 39855 }, { "epoch": 0.8782384989560782, "grad_norm": 0.6221312284469604, "learning_rate": 1.1510939963469209e-06, "loss": 0.045, "step": 39856 }, { "epoch": 0.8782605342455944, "grad_norm": 0.6552320718765259, "learning_rate": 1.1506827590243491e-06, "loss": 0.0507, "step": 39857 }, { "epoch": 0.8782825695351105, "grad_norm": 0.31121647357940674, "learning_rate": 1.1502715922437728e-06, "loss": 0.0241, "step": 39858 }, { "epoch": 0.8783046048246267, "grad_norm": 0.5213780999183655, "learning_rate": 1.1498604960072907e-06, "loss": 0.0545, "step": 39859 }, { "epoch": 0.8783266401141429, "grad_norm": 0.6372668147087097, "learning_rate": 1.149449470316999e-06, "loss": 0.092, "step": 39860 }, { "epoch": 0.8783486754036589, "grad_norm": 0.5501336455345154, "learning_rate": 1.149038515174986e-06, "loss": 0.0435, "step": 39861 }, { "epoch": 0.8783707106931751, "grad_norm": 0.7786194682121277, "learning_rate": 1.1486276305833505e-06, "loss": 0.0868, "step": 39862 }, { "epoch": 0.8783927459826912, "grad_norm": 0.5443745255470276, "learning_rate": 1.1482168165441753e-06, "loss": 0.0564, "step": 39863 }, { "epoch": 0.8784147812722074, "grad_norm": 0.5140625238418579, "learning_rate": 1.1478060730595673e-06, "loss": 0.0533, "step": 39864 }, { "epoch": 0.8784368165617236, "grad_norm": 0.33618679642677307, "learning_rate": 1.1473954001316067e-06, "loss": 0.0729, "step": 39865 }, { "epoch": 0.8784588518512397, "grad_norm": 0.6912382245063782, "learning_rate": 1.1469847977623932e-06, "loss": 0.0484, "step": 39866 }, { "epoch": 0.8784808871407559, "grad_norm": 0.42009150981903076, "learning_rate": 1.1465742659540118e-06, "loss": 0.0714, "step": 39867 }, { "epoch": 0.8785029224302721, "grad_norm": 0.5977970361709595, "learning_rate": 1.146163804708556e-06, "loss": 0.0725, "step": 39868 }, { "epoch": 0.8785249577197882, "grad_norm": 0.5224739909172058, "learning_rate": 1.145753414028119e-06, "loss": 0.0538, "step": 39869 }, { "epoch": 0.8785469930093044, "grad_norm": 0.5489565134048462, "learning_rate": 1.1453430939147873e-06, "loss": 0.0558, "step": 39870 }, { "epoch": 0.8785690282988206, "grad_norm": 0.8328730463981628, "learning_rate": 1.1449328443706531e-06, "loss": 0.0646, "step": 39871 }, { "epoch": 0.8785910635883367, "grad_norm": 0.4402517080307007, "learning_rate": 1.1445226653978058e-06, "loss": 0.0405, "step": 39872 }, { "epoch": 0.8786130988778529, "grad_norm": 0.7271690368652344, "learning_rate": 1.1441125569983357e-06, "loss": 0.0381, "step": 39873 }, { "epoch": 0.878635134167369, "grad_norm": 0.7288966774940491, "learning_rate": 1.1437025191743294e-06, "loss": 0.0422, "step": 39874 }, { "epoch": 0.8786571694568852, "grad_norm": 0.5585619211196899, "learning_rate": 1.1432925519278754e-06, "loss": 0.0696, "step": 39875 }, { "epoch": 0.8786792047464014, "grad_norm": 0.5480272173881531, "learning_rate": 1.1428826552610665e-06, "loss": 0.0705, "step": 39876 }, { "epoch": 0.8787012400359175, "grad_norm": 0.7713326811790466, "learning_rate": 1.1424728291759835e-06, "loss": 0.0938, "step": 39877 }, { "epoch": 0.8787232753254337, "grad_norm": 0.585965096950531, "learning_rate": 1.1420630736747173e-06, "loss": 0.079, "step": 39878 }, { "epoch": 0.8787453106149499, "grad_norm": 0.7458468079566956, "learning_rate": 1.141653388759355e-06, "loss": 0.0609, "step": 39879 }, { "epoch": 0.878767345904466, "grad_norm": 0.4568527340888977, "learning_rate": 1.1412437744319865e-06, "loss": 0.0739, "step": 39880 }, { "epoch": 0.8787893811939822, "grad_norm": 0.5626789331436157, "learning_rate": 1.1408342306946935e-06, "loss": 0.0561, "step": 39881 }, { "epoch": 0.8788114164834984, "grad_norm": 0.3886410892009735, "learning_rate": 1.1404247575495624e-06, "loss": 0.0396, "step": 39882 }, { "epoch": 0.8788334517730145, "grad_norm": 0.5823494791984558, "learning_rate": 1.1400153549986835e-06, "loss": 0.0645, "step": 39883 }, { "epoch": 0.8788554870625307, "grad_norm": 0.4414990246295929, "learning_rate": 1.1396060230441353e-06, "loss": 0.0539, "step": 39884 }, { "epoch": 0.8788775223520469, "grad_norm": 0.7930961847305298, "learning_rate": 1.139196761688009e-06, "loss": 0.0646, "step": 39885 }, { "epoch": 0.8788995576415629, "grad_norm": 0.7492159605026245, "learning_rate": 1.1387875709323815e-06, "loss": 0.0765, "step": 39886 }, { "epoch": 0.8789215929310791, "grad_norm": 0.7662122845649719, "learning_rate": 1.138378450779346e-06, "loss": 0.07, "step": 39887 }, { "epoch": 0.8789436282205952, "grad_norm": 0.8755149841308594, "learning_rate": 1.137969401230981e-06, "loss": 0.0614, "step": 39888 }, { "epoch": 0.8789656635101114, "grad_norm": 0.673971951007843, "learning_rate": 1.1375604222893733e-06, "loss": 0.0903, "step": 39889 }, { "epoch": 0.8789876987996276, "grad_norm": 0.665038526058197, "learning_rate": 1.1371515139566008e-06, "loss": 0.037, "step": 39890 }, { "epoch": 0.8790097340891437, "grad_norm": 0.1995231658220291, "learning_rate": 1.1367426762347504e-06, "loss": 0.0326, "step": 39891 }, { "epoch": 0.8790317693786599, "grad_norm": 0.4169725775718689, "learning_rate": 1.1363339091259056e-06, "loss": 0.0541, "step": 39892 }, { "epoch": 0.8790538046681761, "grad_norm": 0.502630352973938, "learning_rate": 1.1359252126321428e-06, "loss": 0.062, "step": 39893 }, { "epoch": 0.8790758399576922, "grad_norm": 0.4966587722301483, "learning_rate": 1.1355165867555506e-06, "loss": 0.0471, "step": 39894 }, { "epoch": 0.8790978752472084, "grad_norm": 0.5111135840415955, "learning_rate": 1.1351080314982055e-06, "loss": 0.0408, "step": 39895 }, { "epoch": 0.8791199105367246, "grad_norm": 0.5145647525787354, "learning_rate": 1.1346995468621923e-06, "loss": 0.0592, "step": 39896 }, { "epoch": 0.8791419458262407, "grad_norm": 0.6847796440124512, "learning_rate": 1.1342911328495864e-06, "loss": 0.0575, "step": 39897 }, { "epoch": 0.8791639811157569, "grad_norm": 0.7554306983947754, "learning_rate": 1.1338827894624726e-06, "loss": 0.0563, "step": 39898 }, { "epoch": 0.879186016405273, "grad_norm": 0.4768962860107422, "learning_rate": 1.133474516702931e-06, "loss": 0.048, "step": 39899 }, { "epoch": 0.8792080516947892, "grad_norm": 0.6883660554885864, "learning_rate": 1.133066314573038e-06, "loss": 0.0704, "step": 39900 }, { "epoch": 0.8792300869843054, "grad_norm": 0.7378577589988708, "learning_rate": 1.1326581830748722e-06, "loss": 0.0694, "step": 39901 }, { "epoch": 0.8792521222738215, "grad_norm": 0.6513533592224121, "learning_rate": 1.132250122210517e-06, "loss": 0.0759, "step": 39902 }, { "epoch": 0.8792741575633377, "grad_norm": 0.5084952712059021, "learning_rate": 1.1318421319820487e-06, "loss": 0.0581, "step": 39903 }, { "epoch": 0.8792961928528539, "grad_norm": 0.27423742413520813, "learning_rate": 1.1314342123915445e-06, "loss": 0.0393, "step": 39904 }, { "epoch": 0.87931822814237, "grad_norm": 0.5019141435623169, "learning_rate": 1.1310263634410823e-06, "loss": 0.0605, "step": 39905 }, { "epoch": 0.8793402634318862, "grad_norm": 0.4017259478569031, "learning_rate": 1.1306185851327404e-06, "loss": 0.0264, "step": 39906 }, { "epoch": 0.8793622987214024, "grad_norm": 0.7443243861198425, "learning_rate": 1.130210877468596e-06, "loss": 0.0588, "step": 39907 }, { "epoch": 0.8793843340109185, "grad_norm": 0.5232065320014954, "learning_rate": 1.1298032404507251e-06, "loss": 0.0705, "step": 39908 }, { "epoch": 0.8794063693004347, "grad_norm": 0.6500594615936279, "learning_rate": 1.1293956740812e-06, "loss": 0.068, "step": 39909 }, { "epoch": 0.8794284045899508, "grad_norm": 0.6580235362052917, "learning_rate": 1.128988178362107e-06, "loss": 0.0583, "step": 39910 }, { "epoch": 0.8794504398794669, "grad_norm": 0.6991625428199768, "learning_rate": 1.1285807532955112e-06, "loss": 0.0625, "step": 39911 }, { "epoch": 0.8794724751689831, "grad_norm": 0.49299612641334534, "learning_rate": 1.1281733988834957e-06, "loss": 0.0478, "step": 39912 }, { "epoch": 0.8794945104584992, "grad_norm": 0.20985397696495056, "learning_rate": 1.1277661151281292e-06, "loss": 0.0445, "step": 39913 }, { "epoch": 0.8795165457480154, "grad_norm": 0.6531450748443604, "learning_rate": 1.1273589020314884e-06, "loss": 0.0476, "step": 39914 }, { "epoch": 0.8795385810375316, "grad_norm": 0.1537020355463028, "learning_rate": 1.1269517595956497e-06, "loss": 0.0405, "step": 39915 }, { "epoch": 0.8795606163270477, "grad_norm": 0.6515814661979675, "learning_rate": 1.1265446878226798e-06, "loss": 0.075, "step": 39916 }, { "epoch": 0.8795826516165639, "grad_norm": 0.34346646070480347, "learning_rate": 1.126137686714664e-06, "loss": 0.0511, "step": 39917 }, { "epoch": 0.8796046869060801, "grad_norm": 0.578476071357727, "learning_rate": 1.1257307562736652e-06, "loss": 0.0491, "step": 39918 }, { "epoch": 0.8796267221955962, "grad_norm": 0.90882807970047, "learning_rate": 1.1253238965017621e-06, "loss": 0.0608, "step": 39919 }, { "epoch": 0.8796487574851124, "grad_norm": 0.6507028341293335, "learning_rate": 1.1249171074010228e-06, "loss": 0.0508, "step": 39920 }, { "epoch": 0.8796707927746286, "grad_norm": 0.24172966182231903, "learning_rate": 1.1245103889735208e-06, "loss": 0.0534, "step": 39921 }, { "epoch": 0.8796928280641447, "grad_norm": 0.5167374014854431, "learning_rate": 1.1241037412213296e-06, "loss": 0.0495, "step": 39922 }, { "epoch": 0.8797148633536609, "grad_norm": 0.429980605840683, "learning_rate": 1.123697164146517e-06, "loss": 0.0393, "step": 39923 }, { "epoch": 0.8797368986431771, "grad_norm": 1.1533780097961426, "learning_rate": 1.1232906577511554e-06, "loss": 0.0538, "step": 39924 }, { "epoch": 0.8797589339326932, "grad_norm": 0.11734876781702042, "learning_rate": 1.1228842220373175e-06, "loss": 0.0402, "step": 39925 }, { "epoch": 0.8797809692222094, "grad_norm": 0.6470261812210083, "learning_rate": 1.122477857007072e-06, "loss": 0.0794, "step": 39926 }, { "epoch": 0.8798030045117256, "grad_norm": 0.4944039285182953, "learning_rate": 1.1220715626624873e-06, "loss": 0.0395, "step": 39927 }, { "epoch": 0.8798250398012417, "grad_norm": 0.8924640417098999, "learning_rate": 1.1216653390056331e-06, "loss": 0.0627, "step": 39928 }, { "epoch": 0.8798470750907579, "grad_norm": 0.47863373160362244, "learning_rate": 1.121259186038583e-06, "loss": 0.0504, "step": 39929 }, { "epoch": 0.879869110380274, "grad_norm": 0.4565693140029907, "learning_rate": 1.1208531037634e-06, "loss": 0.0683, "step": 39930 }, { "epoch": 0.8798911456697902, "grad_norm": 0.6345863342285156, "learning_rate": 1.1204470921821564e-06, "loss": 0.0696, "step": 39931 }, { "epoch": 0.8799131809593064, "grad_norm": 0.42858773469924927, "learning_rate": 1.1200411512969134e-06, "loss": 0.0486, "step": 39932 }, { "epoch": 0.8799352162488225, "grad_norm": 1.018220067024231, "learning_rate": 1.1196352811097492e-06, "loss": 0.0695, "step": 39933 }, { "epoch": 0.8799572515383387, "grad_norm": 0.6984204649925232, "learning_rate": 1.1192294816227227e-06, "loss": 0.0381, "step": 39934 }, { "epoch": 0.8799792868278548, "grad_norm": 1.0913944244384766, "learning_rate": 1.118823752837907e-06, "loss": 0.0702, "step": 39935 }, { "epoch": 0.8800013221173709, "grad_norm": 0.655468761920929, "learning_rate": 1.1184180947573603e-06, "loss": 0.0544, "step": 39936 }, { "epoch": 0.8800233574068871, "grad_norm": 0.6716320514678955, "learning_rate": 1.1180125073831593e-06, "loss": 0.0488, "step": 39937 }, { "epoch": 0.8800453926964033, "grad_norm": 0.48339706659317017, "learning_rate": 1.1176069907173624e-06, "loss": 0.0829, "step": 39938 }, { "epoch": 0.8800674279859194, "grad_norm": 0.39922165870666504, "learning_rate": 1.1172015447620382e-06, "loss": 0.0648, "step": 39939 }, { "epoch": 0.8800894632754356, "grad_norm": 0.436689555644989, "learning_rate": 1.116796169519253e-06, "loss": 0.0644, "step": 39940 }, { "epoch": 0.8801114985649517, "grad_norm": 0.545263409614563, "learning_rate": 1.116390864991067e-06, "loss": 0.0407, "step": 39941 }, { "epoch": 0.8801335338544679, "grad_norm": 0.8446346521377563, "learning_rate": 1.1159856311795486e-06, "loss": 0.0696, "step": 39942 }, { "epoch": 0.8801555691439841, "grad_norm": 0.9303774833679199, "learning_rate": 1.1155804680867576e-06, "loss": 0.0701, "step": 39943 }, { "epoch": 0.8801776044335002, "grad_norm": 0.7863052487373352, "learning_rate": 1.1151753757147642e-06, "loss": 0.0783, "step": 39944 }, { "epoch": 0.8801996397230164, "grad_norm": 0.28011253476142883, "learning_rate": 1.1147703540656268e-06, "loss": 0.0368, "step": 39945 }, { "epoch": 0.8802216750125326, "grad_norm": 0.8245804905891418, "learning_rate": 1.114365403141412e-06, "loss": 0.073, "step": 39946 }, { "epoch": 0.8802437103020487, "grad_norm": 0.45745664834976196, "learning_rate": 1.1139605229441762e-06, "loss": 0.0439, "step": 39947 }, { "epoch": 0.8802657455915649, "grad_norm": 0.5615512728691101, "learning_rate": 1.1135557134759882e-06, "loss": 0.077, "step": 39948 }, { "epoch": 0.8802877808810811, "grad_norm": 0.6560591459274292, "learning_rate": 1.113150974738908e-06, "loss": 0.0486, "step": 39949 }, { "epoch": 0.8803098161705972, "grad_norm": 0.4965977370738983, "learning_rate": 1.1127463067349953e-06, "loss": 0.072, "step": 39950 }, { "epoch": 0.8803318514601134, "grad_norm": 0.2925732433795929, "learning_rate": 1.1123417094663119e-06, "loss": 0.0552, "step": 39951 }, { "epoch": 0.8803538867496296, "grad_norm": 0.6757014393806458, "learning_rate": 1.1119371829349212e-06, "loss": 0.089, "step": 39952 }, { "epoch": 0.8803759220391457, "grad_norm": 0.9838601350784302, "learning_rate": 1.1115327271428816e-06, "loss": 0.0829, "step": 39953 }, { "epoch": 0.8803979573286619, "grad_norm": 0.5151636600494385, "learning_rate": 1.111128342092253e-06, "loss": 0.0484, "step": 39954 }, { "epoch": 0.880419992618178, "grad_norm": 0.7621054649353027, "learning_rate": 1.110724027785094e-06, "loss": 0.0626, "step": 39955 }, { "epoch": 0.8804420279076942, "grad_norm": 0.3238546550273895, "learning_rate": 1.110319784223469e-06, "loss": 0.0402, "step": 39956 }, { "epoch": 0.8804640631972104, "grad_norm": 0.5216593146324158, "learning_rate": 1.1099156114094305e-06, "loss": 0.0696, "step": 39957 }, { "epoch": 0.8804860984867265, "grad_norm": 0.5343043804168701, "learning_rate": 1.1095115093450432e-06, "loss": 0.0742, "step": 39958 }, { "epoch": 0.8805081337762427, "grad_norm": 0.6419894099235535, "learning_rate": 1.1091074780323568e-06, "loss": 0.0608, "step": 39959 }, { "epoch": 0.8805301690657588, "grad_norm": 0.4055235981941223, "learning_rate": 1.1087035174734417e-06, "loss": 0.0661, "step": 39960 }, { "epoch": 0.8805522043552749, "grad_norm": 0.6365609765052795, "learning_rate": 1.1082996276703445e-06, "loss": 0.0634, "step": 39961 }, { "epoch": 0.8805742396447911, "grad_norm": 0.7155477404594421, "learning_rate": 1.1078958086251267e-06, "loss": 0.0374, "step": 39962 }, { "epoch": 0.8805962749343073, "grad_norm": 0.5889768004417419, "learning_rate": 1.1074920603398487e-06, "loss": 0.0658, "step": 39963 }, { "epoch": 0.8806183102238234, "grad_norm": 0.5719749331474304, "learning_rate": 1.10708838281656e-06, "loss": 0.0761, "step": 39964 }, { "epoch": 0.8806403455133396, "grad_norm": 0.5000501871109009, "learning_rate": 1.106684776057323e-06, "loss": 0.0616, "step": 39965 }, { "epoch": 0.8806623808028557, "grad_norm": 0.33680999279022217, "learning_rate": 1.1062812400641852e-06, "loss": 0.0274, "step": 39966 }, { "epoch": 0.8806844160923719, "grad_norm": 0.5308061242103577, "learning_rate": 1.1058777748392124e-06, "loss": 0.0884, "step": 39967 }, { "epoch": 0.8807064513818881, "grad_norm": 0.6011136770248413, "learning_rate": 1.1054743803844524e-06, "loss": 0.0755, "step": 39968 }, { "epoch": 0.8807284866714042, "grad_norm": 0.830803394317627, "learning_rate": 1.105071056701964e-06, "loss": 0.0732, "step": 39969 }, { "epoch": 0.8807505219609204, "grad_norm": 0.32334840297698975, "learning_rate": 1.1046678037937986e-06, "loss": 0.0663, "step": 39970 }, { "epoch": 0.8807725572504366, "grad_norm": 0.9599061012268066, "learning_rate": 1.1042646216620111e-06, "loss": 0.0906, "step": 39971 }, { "epoch": 0.8807945925399527, "grad_norm": 0.4099763333797455, "learning_rate": 1.103861510308657e-06, "loss": 0.0569, "step": 39972 }, { "epoch": 0.8808166278294689, "grad_norm": 0.5288858413696289, "learning_rate": 1.1034584697357859e-06, "loss": 0.0712, "step": 39973 }, { "epoch": 0.8808386631189851, "grad_norm": 0.21939736604690552, "learning_rate": 1.1030554999454528e-06, "loss": 0.0327, "step": 39974 }, { "epoch": 0.8808606984085012, "grad_norm": 0.4753057658672333, "learning_rate": 1.1026526009397098e-06, "loss": 0.0455, "step": 39975 }, { "epoch": 0.8808827336980174, "grad_norm": 0.38347652554512024, "learning_rate": 1.1022497727206132e-06, "loss": 0.0372, "step": 39976 }, { "epoch": 0.8809047689875336, "grad_norm": 0.8393456935882568, "learning_rate": 1.1018470152902066e-06, "loss": 0.0917, "step": 39977 }, { "epoch": 0.8809268042770497, "grad_norm": 0.6457558274269104, "learning_rate": 1.1014443286505465e-06, "loss": 0.0525, "step": 39978 }, { "epoch": 0.8809488395665659, "grad_norm": 0.46112680435180664, "learning_rate": 1.1010417128036865e-06, "loss": 0.0336, "step": 39979 }, { "epoch": 0.880970874856082, "grad_norm": 0.43592777848243713, "learning_rate": 1.1006391677516731e-06, "loss": 0.0441, "step": 39980 }, { "epoch": 0.8809929101455982, "grad_norm": 0.5565284490585327, "learning_rate": 1.100236693496558e-06, "loss": 0.0381, "step": 39981 }, { "epoch": 0.8810149454351144, "grad_norm": 0.5227363705635071, "learning_rate": 1.099834290040388e-06, "loss": 0.0583, "step": 39982 }, { "epoch": 0.8810369807246305, "grad_norm": 0.5581098794937134, "learning_rate": 1.0994319573852196e-06, "loss": 0.0534, "step": 39983 }, { "epoch": 0.8810590160141466, "grad_norm": 0.7783826589584351, "learning_rate": 1.0990296955330948e-06, "loss": 0.07, "step": 39984 }, { "epoch": 0.8810810513036628, "grad_norm": 0.47009575366973877, "learning_rate": 1.0986275044860684e-06, "loss": 0.0482, "step": 39985 }, { "epoch": 0.8811030865931789, "grad_norm": 0.4901067018508911, "learning_rate": 1.098225384246187e-06, "loss": 0.049, "step": 39986 }, { "epoch": 0.8811251218826951, "grad_norm": 0.5914236307144165, "learning_rate": 1.0978233348154975e-06, "loss": 0.0498, "step": 39987 }, { "epoch": 0.8811471571722113, "grad_norm": 0.370057612657547, "learning_rate": 1.09742135619605e-06, "loss": 0.0404, "step": 39988 }, { "epoch": 0.8811691924617274, "grad_norm": 0.4321809411048889, "learning_rate": 1.0970194483898876e-06, "loss": 0.0561, "step": 39989 }, { "epoch": 0.8811912277512436, "grad_norm": 0.3742597997188568, "learning_rate": 1.096617611399064e-06, "loss": 0.0809, "step": 39990 }, { "epoch": 0.8812132630407598, "grad_norm": 0.5702767968177795, "learning_rate": 1.0962158452256205e-06, "loss": 0.0612, "step": 39991 }, { "epoch": 0.8812352983302759, "grad_norm": 0.7599635124206543, "learning_rate": 1.0958141498716074e-06, "loss": 0.0775, "step": 39992 }, { "epoch": 0.8812573336197921, "grad_norm": 0.7064307332038879, "learning_rate": 1.0954125253390662e-06, "loss": 0.0545, "step": 39993 }, { "epoch": 0.8812793689093082, "grad_norm": 0.7808473110198975, "learning_rate": 1.0950109716300455e-06, "loss": 0.0502, "step": 39994 }, { "epoch": 0.8813014041988244, "grad_norm": 0.5475678443908691, "learning_rate": 1.0946094887465934e-06, "loss": 0.0659, "step": 39995 }, { "epoch": 0.8813234394883406, "grad_norm": 0.744653582572937, "learning_rate": 1.0942080766907487e-06, "loss": 0.0591, "step": 39996 }, { "epoch": 0.8813454747778567, "grad_norm": 0.6533642411231995, "learning_rate": 1.0938067354645592e-06, "loss": 0.0699, "step": 39997 }, { "epoch": 0.8813675100673729, "grad_norm": 0.615318775177002, "learning_rate": 1.0934054650700704e-06, "loss": 0.0686, "step": 39998 }, { "epoch": 0.8813895453568891, "grad_norm": 0.7903996706008911, "learning_rate": 1.0930042655093252e-06, "loss": 0.0745, "step": 39999 }, { "epoch": 0.8814115806464052, "grad_norm": 0.7329701781272888, "learning_rate": 1.092603136784366e-06, "loss": 0.0714, "step": 40000 }, { "epoch": 0.8814336159359214, "grad_norm": 0.6705493927001953, "learning_rate": 1.092202078897237e-06, "loss": 0.0685, "step": 40001 }, { "epoch": 0.8814556512254376, "grad_norm": 0.4883200228214264, "learning_rate": 1.091801091849982e-06, "loss": 0.0243, "step": 40002 }, { "epoch": 0.8814776865149537, "grad_norm": 0.5339348912239075, "learning_rate": 1.0914001756446397e-06, "loss": 0.0539, "step": 40003 }, { "epoch": 0.8814997218044699, "grad_norm": 0.6546818017959595, "learning_rate": 1.0909993302832577e-06, "loss": 0.0478, "step": 40004 }, { "epoch": 0.8815217570939861, "grad_norm": 0.5267066955566406, "learning_rate": 1.09059855576787e-06, "loss": 0.0634, "step": 40005 }, { "epoch": 0.8815437923835022, "grad_norm": 0.6734201908111572, "learning_rate": 1.090197852100528e-06, "loss": 0.0639, "step": 40006 }, { "epoch": 0.8815658276730184, "grad_norm": 0.6174314618110657, "learning_rate": 1.0897972192832634e-06, "loss": 0.0743, "step": 40007 }, { "epoch": 0.8815878629625346, "grad_norm": 0.39549902081489563, "learning_rate": 1.0893966573181214e-06, "loss": 0.0578, "step": 40008 }, { "epoch": 0.8816098982520506, "grad_norm": 0.8386384844779968, "learning_rate": 1.0889961662071452e-06, "loss": 0.0707, "step": 40009 }, { "epoch": 0.8816319335415668, "grad_norm": 0.7455416917800903, "learning_rate": 1.0885957459523682e-06, "loss": 0.08, "step": 40010 }, { "epoch": 0.8816539688310829, "grad_norm": 0.6316236853599548, "learning_rate": 1.0881953965558339e-06, "loss": 0.0671, "step": 40011 }, { "epoch": 0.8816760041205991, "grad_norm": 0.4200887382030487, "learning_rate": 1.0877951180195771e-06, "loss": 0.0486, "step": 40012 }, { "epoch": 0.8816980394101153, "grad_norm": 0.4981025457382202, "learning_rate": 1.0873949103456448e-06, "loss": 0.0374, "step": 40013 }, { "epoch": 0.8817200746996314, "grad_norm": 0.42221593856811523, "learning_rate": 1.0869947735360686e-06, "loss": 0.0516, "step": 40014 }, { "epoch": 0.8817421099891476, "grad_norm": 0.6024596095085144, "learning_rate": 1.08659470759289e-06, "loss": 0.0584, "step": 40015 }, { "epoch": 0.8817641452786638, "grad_norm": 0.6096359491348267, "learning_rate": 1.0861947125181442e-06, "loss": 0.06, "step": 40016 }, { "epoch": 0.8817861805681799, "grad_norm": 0.6646914482116699, "learning_rate": 1.0857947883138697e-06, "loss": 0.085, "step": 40017 }, { "epoch": 0.8818082158576961, "grad_norm": 0.3558827340602875, "learning_rate": 1.0853949349821064e-06, "loss": 0.0406, "step": 40018 }, { "epoch": 0.8818302511472123, "grad_norm": 0.6860955357551575, "learning_rate": 1.0849951525248858e-06, "loss": 0.0678, "step": 40019 }, { "epoch": 0.8818522864367284, "grad_norm": 0.2825116515159607, "learning_rate": 1.0845954409442466e-06, "loss": 0.0583, "step": 40020 }, { "epoch": 0.8818743217262446, "grad_norm": 0.47300994396209717, "learning_rate": 1.0841958002422236e-06, "loss": 0.0391, "step": 40021 }, { "epoch": 0.8818963570157607, "grad_norm": 0.4772181510925293, "learning_rate": 1.0837962304208587e-06, "loss": 0.0548, "step": 40022 }, { "epoch": 0.8819183923052769, "grad_norm": 0.5553876161575317, "learning_rate": 1.0833967314821769e-06, "loss": 0.0451, "step": 40023 }, { "epoch": 0.8819404275947931, "grad_norm": 0.6591288447380066, "learning_rate": 1.0829973034282197e-06, "loss": 0.0559, "step": 40024 }, { "epoch": 0.8819624628843092, "grad_norm": 0.7809292078018188, "learning_rate": 1.0825979462610225e-06, "loss": 0.0662, "step": 40025 }, { "epoch": 0.8819844981738254, "grad_norm": 0.7395033836364746, "learning_rate": 1.082198659982615e-06, "loss": 0.0901, "step": 40026 }, { "epoch": 0.8820065334633416, "grad_norm": 0.5224509835243225, "learning_rate": 1.0817994445950324e-06, "loss": 0.0481, "step": 40027 }, { "epoch": 0.8820285687528577, "grad_norm": 0.4496709406375885, "learning_rate": 1.0814003001003081e-06, "loss": 0.0813, "step": 40028 }, { "epoch": 0.8820506040423739, "grad_norm": 0.4753584563732147, "learning_rate": 1.0810012265004786e-06, "loss": 0.0432, "step": 40029 }, { "epoch": 0.8820726393318901, "grad_norm": 0.46188652515411377, "learning_rate": 1.0806022237975728e-06, "loss": 0.0499, "step": 40030 }, { "epoch": 0.8820946746214062, "grad_norm": 0.6420191526412964, "learning_rate": 1.0802032919936218e-06, "loss": 0.0723, "step": 40031 }, { "epoch": 0.8821167099109224, "grad_norm": 0.7885597944259644, "learning_rate": 1.079804431090664e-06, "loss": 0.047, "step": 40032 }, { "epoch": 0.8821387452004386, "grad_norm": 0.5025399923324585, "learning_rate": 1.0794056410907232e-06, "loss": 0.0428, "step": 40033 }, { "epoch": 0.8821607804899546, "grad_norm": 0.6770037412643433, "learning_rate": 1.0790069219958376e-06, "loss": 0.0712, "step": 40034 }, { "epoch": 0.8821828157794708, "grad_norm": 0.3933080732822418, "learning_rate": 1.0786082738080304e-06, "loss": 0.0707, "step": 40035 }, { "epoch": 0.8822048510689869, "grad_norm": 0.5824214220046997, "learning_rate": 1.0782096965293402e-06, "loss": 0.0412, "step": 40036 }, { "epoch": 0.8822268863585031, "grad_norm": 0.43856045603752136, "learning_rate": 1.077811190161792e-06, "loss": 0.0454, "step": 40037 }, { "epoch": 0.8822489216480193, "grad_norm": 0.7298516631126404, "learning_rate": 1.0774127547074192e-06, "loss": 0.0516, "step": 40038 }, { "epoch": 0.8822709569375354, "grad_norm": 0.473804235458374, "learning_rate": 1.0770143901682433e-06, "loss": 0.0655, "step": 40039 }, { "epoch": 0.8822929922270516, "grad_norm": 0.324373334646225, "learning_rate": 1.0766160965463046e-06, "loss": 0.0509, "step": 40040 }, { "epoch": 0.8823150275165678, "grad_norm": 0.379536509513855, "learning_rate": 1.0762178738436263e-06, "loss": 0.0681, "step": 40041 }, { "epoch": 0.8823370628060839, "grad_norm": 0.5092649459838867, "learning_rate": 1.075819722062235e-06, "loss": 0.049, "step": 40042 }, { "epoch": 0.8823590980956001, "grad_norm": 0.9149765968322754, "learning_rate": 1.0754216412041611e-06, "loss": 0.0553, "step": 40043 }, { "epoch": 0.8823811333851163, "grad_norm": 0.5240764021873474, "learning_rate": 1.0750236312714312e-06, "loss": 0.0469, "step": 40044 }, { "epoch": 0.8824031686746324, "grad_norm": 0.5542967319488525, "learning_rate": 1.0746256922660751e-06, "loss": 0.067, "step": 40045 }, { "epoch": 0.8824252039641486, "grad_norm": 0.36947107315063477, "learning_rate": 1.0742278241901148e-06, "loss": 0.0532, "step": 40046 }, { "epoch": 0.8824472392536648, "grad_norm": 0.4593615233898163, "learning_rate": 1.073830027045582e-06, "loss": 0.0439, "step": 40047 }, { "epoch": 0.8824692745431809, "grad_norm": 0.8578602075576782, "learning_rate": 1.0734323008344982e-06, "loss": 0.0572, "step": 40048 }, { "epoch": 0.8824913098326971, "grad_norm": 0.44189491868019104, "learning_rate": 1.0730346455588951e-06, "loss": 0.046, "step": 40049 }, { "epoch": 0.8825133451222132, "grad_norm": 0.3791504204273224, "learning_rate": 1.072637061220793e-06, "loss": 0.0355, "step": 40050 }, { "epoch": 0.8825353804117294, "grad_norm": 0.456207275390625, "learning_rate": 1.0722395478222186e-06, "loss": 0.0578, "step": 40051 }, { "epoch": 0.8825574157012456, "grad_norm": 0.6038633584976196, "learning_rate": 1.0718421053651983e-06, "loss": 0.0556, "step": 40052 }, { "epoch": 0.8825794509907617, "grad_norm": 0.5539296269416809, "learning_rate": 1.0714447338517524e-06, "loss": 0.0735, "step": 40053 }, { "epoch": 0.8826014862802779, "grad_norm": 0.4206748902797699, "learning_rate": 1.0710474332839094e-06, "loss": 0.0473, "step": 40054 }, { "epoch": 0.8826235215697941, "grad_norm": 0.3039925694465637, "learning_rate": 1.070650203663689e-06, "loss": 0.0546, "step": 40055 }, { "epoch": 0.8826455568593102, "grad_norm": 0.619378924369812, "learning_rate": 1.0702530449931214e-06, "loss": 0.0602, "step": 40056 }, { "epoch": 0.8826675921488264, "grad_norm": 0.38677269220352173, "learning_rate": 1.06985595727422e-06, "loss": 0.0652, "step": 40057 }, { "epoch": 0.8826896274383425, "grad_norm": 0.6591152548789978, "learning_rate": 1.0694589405090132e-06, "loss": 0.062, "step": 40058 }, { "epoch": 0.8827116627278586, "grad_norm": 0.6638934016227722, "learning_rate": 1.0690619946995245e-06, "loss": 0.0622, "step": 40059 }, { "epoch": 0.8827336980173748, "grad_norm": 0.639836847782135, "learning_rate": 1.068665119847772e-06, "loss": 0.0729, "step": 40060 }, { "epoch": 0.8827557333068909, "grad_norm": 0.6112085580825806, "learning_rate": 1.0682683159557794e-06, "loss": 0.0563, "step": 40061 }, { "epoch": 0.8827777685964071, "grad_norm": 0.6824166774749756, "learning_rate": 1.0678715830255632e-06, "loss": 0.0519, "step": 40062 }, { "epoch": 0.8827998038859233, "grad_norm": 0.5188341736793518, "learning_rate": 1.0674749210591533e-06, "loss": 0.0531, "step": 40063 }, { "epoch": 0.8828218391754394, "grad_norm": 0.8509902954101562, "learning_rate": 1.0670783300585618e-06, "loss": 0.0549, "step": 40064 }, { "epoch": 0.8828438744649556, "grad_norm": 0.6342664957046509, "learning_rate": 1.0666818100258135e-06, "loss": 0.0561, "step": 40065 }, { "epoch": 0.8828659097544718, "grad_norm": 0.4383314847946167, "learning_rate": 1.0662853609629254e-06, "loss": 0.0418, "step": 40066 }, { "epoch": 0.8828879450439879, "grad_norm": 0.6888511180877686, "learning_rate": 1.0658889828719155e-06, "loss": 0.0883, "step": 40067 }, { "epoch": 0.8829099803335041, "grad_norm": 0.6690706014633179, "learning_rate": 1.0654926757548089e-06, "loss": 0.0464, "step": 40068 }, { "epoch": 0.8829320156230203, "grad_norm": 0.5608280897140503, "learning_rate": 1.0650964396136193e-06, "loss": 0.0498, "step": 40069 }, { "epoch": 0.8829540509125364, "grad_norm": 0.7058542370796204, "learning_rate": 1.0647002744503648e-06, "loss": 0.0662, "step": 40070 }, { "epoch": 0.8829760862020526, "grad_norm": 0.8872009515762329, "learning_rate": 1.0643041802670638e-06, "loss": 0.0467, "step": 40071 }, { "epoch": 0.8829981214915688, "grad_norm": 0.5795418620109558, "learning_rate": 1.0639081570657384e-06, "loss": 0.0572, "step": 40072 }, { "epoch": 0.8830201567810849, "grad_norm": 0.6365819573402405, "learning_rate": 1.063512204848398e-06, "loss": 0.0609, "step": 40073 }, { "epoch": 0.8830421920706011, "grad_norm": 0.28916770219802856, "learning_rate": 1.0631163236170631e-06, "loss": 0.0447, "step": 40074 }, { "epoch": 0.8830642273601172, "grad_norm": 0.608181357383728, "learning_rate": 1.062720513373754e-06, "loss": 0.065, "step": 40075 }, { "epoch": 0.8830862626496334, "grad_norm": 0.39658063650131226, "learning_rate": 1.0623247741204801e-06, "loss": 0.0859, "step": 40076 }, { "epoch": 0.8831082979391496, "grad_norm": 0.6187813878059387, "learning_rate": 1.06192910585926e-06, "loss": 0.1046, "step": 40077 }, { "epoch": 0.8831303332286657, "grad_norm": 0.4055447280406952, "learning_rate": 1.0615335085921107e-06, "loss": 0.0557, "step": 40078 }, { "epoch": 0.8831523685181819, "grad_norm": 0.40148216485977173, "learning_rate": 1.0611379823210471e-06, "loss": 0.0567, "step": 40079 }, { "epoch": 0.8831744038076981, "grad_norm": 0.6684138178825378, "learning_rate": 1.0607425270480791e-06, "loss": 0.0838, "step": 40080 }, { "epoch": 0.8831964390972142, "grad_norm": 0.4296184182167053, "learning_rate": 1.0603471427752238e-06, "loss": 0.0622, "step": 40081 }, { "epoch": 0.8832184743867304, "grad_norm": 0.7360771894454956, "learning_rate": 1.059951829504499e-06, "loss": 0.0613, "step": 40082 }, { "epoch": 0.8832405096762465, "grad_norm": 0.6838804483413696, "learning_rate": 1.059556587237912e-06, "loss": 0.0681, "step": 40083 }, { "epoch": 0.8832625449657626, "grad_norm": 0.7341629266738892, "learning_rate": 1.0591614159774827e-06, "loss": 0.0367, "step": 40084 }, { "epoch": 0.8832845802552788, "grad_norm": 0.6236748099327087, "learning_rate": 1.0587663157252142e-06, "loss": 0.0574, "step": 40085 }, { "epoch": 0.883306615544795, "grad_norm": 0.7418686151504517, "learning_rate": 1.0583712864831285e-06, "loss": 0.0642, "step": 40086 }, { "epoch": 0.8833286508343111, "grad_norm": 0.4111505448818207, "learning_rate": 1.0579763282532323e-06, "loss": 0.0455, "step": 40087 }, { "epoch": 0.8833506861238273, "grad_norm": 0.7063658833503723, "learning_rate": 1.0575814410375407e-06, "loss": 0.0564, "step": 40088 }, { "epoch": 0.8833727214133434, "grad_norm": 0.7051117420196533, "learning_rate": 1.0571866248380618e-06, "loss": 0.0457, "step": 40089 }, { "epoch": 0.8833947567028596, "grad_norm": 0.3862282931804657, "learning_rate": 1.0567918796568076e-06, "loss": 0.0344, "step": 40090 }, { "epoch": 0.8834167919923758, "grad_norm": 0.4648292064666748, "learning_rate": 1.0563972054957932e-06, "loss": 0.0506, "step": 40091 }, { "epoch": 0.8834388272818919, "grad_norm": 0.3692905902862549, "learning_rate": 1.0560026023570202e-06, "loss": 0.0638, "step": 40092 }, { "epoch": 0.8834608625714081, "grad_norm": 0.7015660405158997, "learning_rate": 1.0556080702425053e-06, "loss": 0.0682, "step": 40093 }, { "epoch": 0.8834828978609243, "grad_norm": 0.5614306330680847, "learning_rate": 1.0552136091542553e-06, "loss": 0.0659, "step": 40094 }, { "epoch": 0.8835049331504404, "grad_norm": 0.40191593766212463, "learning_rate": 1.0548192190942817e-06, "loss": 0.0659, "step": 40095 }, { "epoch": 0.8835269684399566, "grad_norm": 0.8010010719299316, "learning_rate": 1.0544249000645917e-06, "loss": 0.0867, "step": 40096 }, { "epoch": 0.8835490037294728, "grad_norm": 0.8631024360656738, "learning_rate": 1.0540306520671915e-06, "loss": 0.0574, "step": 40097 }, { "epoch": 0.8835710390189889, "grad_norm": 0.7689285278320312, "learning_rate": 1.053636475104095e-06, "loss": 0.0606, "step": 40098 }, { "epoch": 0.8835930743085051, "grad_norm": 0.701599657535553, "learning_rate": 1.0532423691773052e-06, "loss": 0.0583, "step": 40099 }, { "epoch": 0.8836151095980213, "grad_norm": 0.41836774349212646, "learning_rate": 1.052848334288829e-06, "loss": 0.0723, "step": 40100 }, { "epoch": 0.8836371448875374, "grad_norm": 0.4523070156574249, "learning_rate": 1.0524543704406765e-06, "loss": 0.0588, "step": 40101 }, { "epoch": 0.8836591801770536, "grad_norm": 0.7293725609779358, "learning_rate": 1.052060477634854e-06, "loss": 0.0602, "step": 40102 }, { "epoch": 0.8836812154665697, "grad_norm": 0.5686967968940735, "learning_rate": 1.0516666558733673e-06, "loss": 0.0928, "step": 40103 }, { "epoch": 0.8837032507560859, "grad_norm": 0.6344287991523743, "learning_rate": 1.0512729051582193e-06, "loss": 0.0498, "step": 40104 }, { "epoch": 0.8837252860456021, "grad_norm": 0.6949694752693176, "learning_rate": 1.0508792254914217e-06, "loss": 0.0559, "step": 40105 }, { "epoch": 0.8837473213351182, "grad_norm": 0.8340121507644653, "learning_rate": 1.050485616874975e-06, "loss": 0.0656, "step": 40106 }, { "epoch": 0.8837693566246344, "grad_norm": 0.6077156066894531, "learning_rate": 1.050092079310887e-06, "loss": 0.0566, "step": 40107 }, { "epoch": 0.8837913919141505, "grad_norm": 0.883270800113678, "learning_rate": 1.0496986128011548e-06, "loss": 0.0679, "step": 40108 }, { "epoch": 0.8838134272036666, "grad_norm": 0.8419399857521057, "learning_rate": 1.0493052173477935e-06, "loss": 0.0775, "step": 40109 }, { "epoch": 0.8838354624931828, "grad_norm": 0.8148558139801025, "learning_rate": 1.0489118929527996e-06, "loss": 0.0508, "step": 40110 }, { "epoch": 0.883857497782699, "grad_norm": 0.7988736629486084, "learning_rate": 1.0485186396181818e-06, "loss": 0.0726, "step": 40111 }, { "epoch": 0.8838795330722151, "grad_norm": 0.7642849683761597, "learning_rate": 1.0481254573459364e-06, "loss": 0.0593, "step": 40112 }, { "epoch": 0.8839015683617313, "grad_norm": 0.42819520831108093, "learning_rate": 1.0477323461380706e-06, "loss": 0.0401, "step": 40113 }, { "epoch": 0.8839236036512474, "grad_norm": 0.7122673392295837, "learning_rate": 1.0473393059965875e-06, "loss": 0.0595, "step": 40114 }, { "epoch": 0.8839456389407636, "grad_norm": 0.10079693794250488, "learning_rate": 1.0469463369234855e-06, "loss": 0.0365, "step": 40115 }, { "epoch": 0.8839676742302798, "grad_norm": 0.8175660371780396, "learning_rate": 1.0465534389207665e-06, "loss": 0.0564, "step": 40116 }, { "epoch": 0.8839897095197959, "grad_norm": 0.5237026810646057, "learning_rate": 1.0461606119904354e-06, "loss": 0.0453, "step": 40117 }, { "epoch": 0.8840117448093121, "grad_norm": 0.6717385053634644, "learning_rate": 1.0457678561344924e-06, "loss": 0.0584, "step": 40118 }, { "epoch": 0.8840337800988283, "grad_norm": 0.37728452682495117, "learning_rate": 1.0453751713549341e-06, "loss": 0.0431, "step": 40119 }, { "epoch": 0.8840558153883444, "grad_norm": 0.29060643911361694, "learning_rate": 1.0449825576537626e-06, "loss": 0.0497, "step": 40120 }, { "epoch": 0.8840778506778606, "grad_norm": 0.4676174819469452, "learning_rate": 1.0445900150329807e-06, "loss": 0.0524, "step": 40121 }, { "epoch": 0.8840998859673768, "grad_norm": 0.7110471725463867, "learning_rate": 1.0441975434945838e-06, "loss": 0.0956, "step": 40122 }, { "epoch": 0.8841219212568929, "grad_norm": 0.9247017502784729, "learning_rate": 1.043805143040572e-06, "loss": 0.0781, "step": 40123 }, { "epoch": 0.8841439565464091, "grad_norm": 0.4449017643928528, "learning_rate": 1.0434128136729438e-06, "loss": 0.0398, "step": 40124 }, { "epoch": 0.8841659918359253, "grad_norm": 0.4828605651855469, "learning_rate": 1.0430205553937023e-06, "loss": 0.0566, "step": 40125 }, { "epoch": 0.8841880271254414, "grad_norm": 0.7794541120529175, "learning_rate": 1.0426283682048377e-06, "loss": 0.049, "step": 40126 }, { "epoch": 0.8842100624149576, "grad_norm": 0.6588670611381531, "learning_rate": 1.0422362521083518e-06, "loss": 0.0588, "step": 40127 }, { "epoch": 0.8842320977044738, "grad_norm": 0.7796924114227295, "learning_rate": 1.0418442071062445e-06, "loss": 0.0538, "step": 40128 }, { "epoch": 0.8842541329939899, "grad_norm": 0.4539218544960022, "learning_rate": 1.041452233200506e-06, "loss": 0.0444, "step": 40129 }, { "epoch": 0.8842761682835061, "grad_norm": 0.5177585482597351, "learning_rate": 1.04106033039314e-06, "loss": 0.0522, "step": 40130 }, { "epoch": 0.8842982035730222, "grad_norm": 0.5278159379959106, "learning_rate": 1.0406684986861326e-06, "loss": 0.0442, "step": 40131 }, { "epoch": 0.8843202388625384, "grad_norm": 0.7706285715103149, "learning_rate": 1.0402767380814925e-06, "loss": 0.0552, "step": 40132 }, { "epoch": 0.8843422741520545, "grad_norm": 0.5958575010299683, "learning_rate": 1.039885048581205e-06, "loss": 0.0673, "step": 40133 }, { "epoch": 0.8843643094415706, "grad_norm": 0.6702791452407837, "learning_rate": 1.0394934301872733e-06, "loss": 0.0531, "step": 40134 }, { "epoch": 0.8843863447310868, "grad_norm": 0.4495986998081207, "learning_rate": 1.0391018829016841e-06, "loss": 0.0608, "step": 40135 }, { "epoch": 0.884408380020603, "grad_norm": 0.5407990217208862, "learning_rate": 1.0387104067264357e-06, "loss": 0.0773, "step": 40136 }, { "epoch": 0.8844304153101191, "grad_norm": 0.4903877377510071, "learning_rate": 1.0383190016635235e-06, "loss": 0.0446, "step": 40137 }, { "epoch": 0.8844524505996353, "grad_norm": 0.36474552750587463, "learning_rate": 1.0379276677149358e-06, "loss": 0.0368, "step": 40138 }, { "epoch": 0.8844744858891515, "grad_norm": 0.3066648840904236, "learning_rate": 1.0375364048826741e-06, "loss": 0.0508, "step": 40139 }, { "epoch": 0.8844965211786676, "grad_norm": 0.7530861496925354, "learning_rate": 1.0371452131687237e-06, "loss": 0.0554, "step": 40140 }, { "epoch": 0.8845185564681838, "grad_norm": 0.313355028629303, "learning_rate": 1.036754092575083e-06, "loss": 0.0358, "step": 40141 }, { "epoch": 0.8845405917577, "grad_norm": 0.19589082896709442, "learning_rate": 1.0363630431037385e-06, "loss": 0.057, "step": 40142 }, { "epoch": 0.8845626270472161, "grad_norm": 0.8516579270362854, "learning_rate": 1.0359720647566857e-06, "loss": 0.0973, "step": 40143 }, { "epoch": 0.8845846623367323, "grad_norm": 0.488965779542923, "learning_rate": 1.0355811575359176e-06, "loss": 0.0383, "step": 40144 }, { "epoch": 0.8846066976262484, "grad_norm": 0.5906303524971008, "learning_rate": 1.0351903214434211e-06, "loss": 0.0548, "step": 40145 }, { "epoch": 0.8846287329157646, "grad_norm": 0.3264741599559784, "learning_rate": 1.0347995564811879e-06, "loss": 0.0474, "step": 40146 }, { "epoch": 0.8846507682052808, "grad_norm": 0.5973814129829407, "learning_rate": 1.03440886265121e-06, "loss": 0.0383, "step": 40147 }, { "epoch": 0.8846728034947969, "grad_norm": 0.8689852356910706, "learning_rate": 1.0340182399554787e-06, "loss": 0.049, "step": 40148 }, { "epoch": 0.8846948387843131, "grad_norm": 0.7568405270576477, "learning_rate": 1.0336276883959777e-06, "loss": 0.0793, "step": 40149 }, { "epoch": 0.8847168740738293, "grad_norm": 0.4778329133987427, "learning_rate": 1.033237207974702e-06, "loss": 0.0444, "step": 40150 }, { "epoch": 0.8847389093633454, "grad_norm": 0.5571642518043518, "learning_rate": 1.0328467986936385e-06, "loss": 0.0754, "step": 40151 }, { "epoch": 0.8847609446528616, "grad_norm": 0.559937596321106, "learning_rate": 1.0324564605547787e-06, "loss": 0.0509, "step": 40152 }, { "epoch": 0.8847829799423778, "grad_norm": 1.7450045347213745, "learning_rate": 1.0320661935601079e-06, "loss": 0.0764, "step": 40153 }, { "epoch": 0.8848050152318939, "grad_norm": 0.7978875041007996, "learning_rate": 1.0316759977116092e-06, "loss": 0.059, "step": 40154 }, { "epoch": 0.8848270505214101, "grad_norm": 0.6944060921669006, "learning_rate": 1.0312858730112813e-06, "loss": 0.0561, "step": 40155 }, { "epoch": 0.8848490858109263, "grad_norm": 0.5000798106193542, "learning_rate": 1.0308958194611007e-06, "loss": 0.0451, "step": 40156 }, { "epoch": 0.8848711211004423, "grad_norm": 0.41493648290634155, "learning_rate": 1.030505837063061e-06, "loss": 0.0579, "step": 40157 }, { "epoch": 0.8848931563899585, "grad_norm": 0.34795814752578735, "learning_rate": 1.0301159258191422e-06, "loss": 0.0696, "step": 40158 }, { "epoch": 0.8849151916794746, "grad_norm": 0.8704951405525208, "learning_rate": 1.0297260857313396e-06, "loss": 0.0818, "step": 40159 }, { "epoch": 0.8849372269689908, "grad_norm": 0.5354028940200806, "learning_rate": 1.0293363168016295e-06, "loss": 0.0854, "step": 40160 }, { "epoch": 0.884959262258507, "grad_norm": 1.080035924911499, "learning_rate": 1.0289466190320024e-06, "loss": 0.0739, "step": 40161 }, { "epoch": 0.8849812975480231, "grad_norm": 0.5090339779853821, "learning_rate": 1.0285569924244432e-06, "loss": 0.0674, "step": 40162 }, { "epoch": 0.8850033328375393, "grad_norm": 0.43337535858154297, "learning_rate": 1.0281674369809336e-06, "loss": 0.0391, "step": 40163 }, { "epoch": 0.8850253681270555, "grad_norm": 0.6583690643310547, "learning_rate": 1.027777952703462e-06, "loss": 0.0666, "step": 40164 }, { "epoch": 0.8850474034165716, "grad_norm": 1.2025549411773682, "learning_rate": 1.0273885395940053e-06, "loss": 0.0785, "step": 40165 }, { "epoch": 0.8850694387060878, "grad_norm": 0.12173029035329819, "learning_rate": 1.0269991976545568e-06, "loss": 0.0345, "step": 40166 }, { "epoch": 0.885091473995604, "grad_norm": 0.6187169551849365, "learning_rate": 1.0266099268870898e-06, "loss": 0.0497, "step": 40167 }, { "epoch": 0.8851135092851201, "grad_norm": 0.5435214638710022, "learning_rate": 1.0262207272935964e-06, "loss": 0.0565, "step": 40168 }, { "epoch": 0.8851355445746363, "grad_norm": 0.6886289715766907, "learning_rate": 1.0258315988760498e-06, "loss": 0.0611, "step": 40169 }, { "epoch": 0.8851575798641524, "grad_norm": 0.614819347858429, "learning_rate": 1.0254425416364383e-06, "loss": 0.04, "step": 40170 }, { "epoch": 0.8851796151536686, "grad_norm": 0.5780134797096252, "learning_rate": 1.025053555576742e-06, "loss": 0.0578, "step": 40171 }, { "epoch": 0.8852016504431848, "grad_norm": 0.6254692673683167, "learning_rate": 1.0246646406989413e-06, "loss": 0.0475, "step": 40172 }, { "epoch": 0.8852236857327009, "grad_norm": 0.4686753749847412, "learning_rate": 1.024275797005016e-06, "loss": 0.0384, "step": 40173 }, { "epoch": 0.8852457210222171, "grad_norm": 0.6537207961082458, "learning_rate": 1.0238870244969496e-06, "loss": 0.0623, "step": 40174 }, { "epoch": 0.8852677563117333, "grad_norm": 0.4407404363155365, "learning_rate": 1.0234983231767237e-06, "loss": 0.032, "step": 40175 }, { "epoch": 0.8852897916012494, "grad_norm": 0.44119927287101746, "learning_rate": 1.0231096930463135e-06, "loss": 0.0514, "step": 40176 }, { "epoch": 0.8853118268907656, "grad_norm": 0.5445889234542847, "learning_rate": 1.0227211341076992e-06, "loss": 0.0617, "step": 40177 }, { "epoch": 0.8853338621802818, "grad_norm": 0.34781625866889954, "learning_rate": 1.0223326463628657e-06, "loss": 0.0677, "step": 40178 }, { "epoch": 0.8853558974697979, "grad_norm": 0.6550445556640625, "learning_rate": 1.0219442298137832e-06, "loss": 0.0724, "step": 40179 }, { "epoch": 0.8853779327593141, "grad_norm": 1.2203176021575928, "learning_rate": 1.0215558844624384e-06, "loss": 0.0439, "step": 40180 }, { "epoch": 0.8853999680488303, "grad_norm": 0.9727506041526794, "learning_rate": 1.0211676103107998e-06, "loss": 0.0773, "step": 40181 }, { "epoch": 0.8854220033383463, "grad_norm": 0.7340949177742004, "learning_rate": 1.020779407360854e-06, "loss": 0.0588, "step": 40182 }, { "epoch": 0.8854440386278625, "grad_norm": 0.9526752829551697, "learning_rate": 1.0203912756145745e-06, "loss": 0.0565, "step": 40183 }, { "epoch": 0.8854660739173786, "grad_norm": 0.7792679667472839, "learning_rate": 1.020003215073938e-06, "loss": 0.0801, "step": 40184 }, { "epoch": 0.8854881092068948, "grad_norm": 0.5678273439407349, "learning_rate": 1.0196152257409247e-06, "loss": 0.049, "step": 40185 }, { "epoch": 0.885510144496411, "grad_norm": 0.5074408650398254, "learning_rate": 1.0192273076175046e-06, "loss": 0.0508, "step": 40186 }, { "epoch": 0.8855321797859271, "grad_norm": 0.8029211163520813, "learning_rate": 1.0188394607056594e-06, "loss": 0.0438, "step": 40187 }, { "epoch": 0.8855542150754433, "grad_norm": 0.6093722581863403, "learning_rate": 1.0184516850073578e-06, "loss": 0.0852, "step": 40188 }, { "epoch": 0.8855762503649595, "grad_norm": 0.5812286734580994, "learning_rate": 1.0180639805245845e-06, "loss": 0.0628, "step": 40189 }, { "epoch": 0.8855982856544756, "grad_norm": 0.6774912476539612, "learning_rate": 1.0176763472593065e-06, "loss": 0.0516, "step": 40190 }, { "epoch": 0.8856203209439918, "grad_norm": 0.4512489438056946, "learning_rate": 1.0172887852135021e-06, "loss": 0.0535, "step": 40191 }, { "epoch": 0.885642356233508, "grad_norm": 0.4316136837005615, "learning_rate": 1.0169012943891432e-06, "loss": 0.0637, "step": 40192 }, { "epoch": 0.8856643915230241, "grad_norm": 0.5579984784126282, "learning_rate": 1.016513874788203e-06, "loss": 0.0465, "step": 40193 }, { "epoch": 0.8856864268125403, "grad_norm": 0.920627236366272, "learning_rate": 1.0161265264126585e-06, "loss": 0.0626, "step": 40194 }, { "epoch": 0.8857084621020564, "grad_norm": 0.4474279582500458, "learning_rate": 1.0157392492644778e-06, "loss": 0.0694, "step": 40195 }, { "epoch": 0.8857304973915726, "grad_norm": 0.42533212900161743, "learning_rate": 1.0153520433456382e-06, "loss": 0.042, "step": 40196 }, { "epoch": 0.8857525326810888, "grad_norm": 1.0553966760635376, "learning_rate": 1.0149649086581075e-06, "loss": 0.0849, "step": 40197 }, { "epoch": 0.8857745679706049, "grad_norm": 0.3288263976573944, "learning_rate": 1.0145778452038628e-06, "loss": 0.0584, "step": 40198 }, { "epoch": 0.8857966032601211, "grad_norm": 0.9792346358299255, "learning_rate": 1.0141908529848709e-06, "loss": 0.0653, "step": 40199 }, { "epoch": 0.8858186385496373, "grad_norm": 0.7496128082275391, "learning_rate": 1.013803932003105e-06, "loss": 0.0445, "step": 40200 }, { "epoch": 0.8858406738391534, "grad_norm": 0.7697591781616211, "learning_rate": 1.013417082260537e-06, "loss": 0.0502, "step": 40201 }, { "epoch": 0.8858627091286696, "grad_norm": 0.5980826020240784, "learning_rate": 1.0130303037591355e-06, "loss": 0.068, "step": 40202 }, { "epoch": 0.8858847444181858, "grad_norm": 0.5319054126739502, "learning_rate": 1.0126435965008718e-06, "loss": 0.0592, "step": 40203 }, { "epoch": 0.8859067797077019, "grad_norm": 0.9884092211723328, "learning_rate": 1.0122569604877097e-06, "loss": 0.0602, "step": 40204 }, { "epoch": 0.8859288149972181, "grad_norm": 0.47553446888923645, "learning_rate": 1.0118703957216292e-06, "loss": 0.0513, "step": 40205 }, { "epoch": 0.8859508502867343, "grad_norm": 0.36691269278526306, "learning_rate": 1.011483902204592e-06, "loss": 0.0415, "step": 40206 }, { "epoch": 0.8859728855762503, "grad_norm": 0.6925278306007385, "learning_rate": 1.011097479938568e-06, "loss": 0.0647, "step": 40207 }, { "epoch": 0.8859949208657665, "grad_norm": 0.565797746181488, "learning_rate": 1.0107111289255277e-06, "loss": 0.0758, "step": 40208 }, { "epoch": 0.8860169561552826, "grad_norm": 0.6483088135719299, "learning_rate": 1.010324849167436e-06, "loss": 0.0549, "step": 40209 }, { "epoch": 0.8860389914447988, "grad_norm": 0.6733806133270264, "learning_rate": 1.009938640666263e-06, "loss": 0.0633, "step": 40210 }, { "epoch": 0.886061026734315, "grad_norm": 1.0448724031448364, "learning_rate": 1.0095525034239705e-06, "loss": 0.1022, "step": 40211 }, { "epoch": 0.8860830620238311, "grad_norm": 0.30594274401664734, "learning_rate": 1.0091664374425352e-06, "loss": 0.0433, "step": 40212 }, { "epoch": 0.8861050973133473, "grad_norm": 0.9323678612709045, "learning_rate": 1.008780442723914e-06, "loss": 0.0795, "step": 40213 }, { "epoch": 0.8861271326028635, "grad_norm": 0.5299034118652344, "learning_rate": 1.0083945192700782e-06, "loss": 0.0488, "step": 40214 }, { "epoch": 0.8861491678923796, "grad_norm": 0.8295331597328186, "learning_rate": 1.008008667082992e-06, "loss": 0.0668, "step": 40215 }, { "epoch": 0.8861712031818958, "grad_norm": 0.4458540976047516, "learning_rate": 1.0076228861646197e-06, "loss": 0.0587, "step": 40216 }, { "epoch": 0.886193238471412, "grad_norm": 0.5725514888763428, "learning_rate": 1.0072371765169286e-06, "loss": 0.0682, "step": 40217 }, { "epoch": 0.8862152737609281, "grad_norm": 0.7643792629241943, "learning_rate": 1.0068515381418801e-06, "loss": 0.0557, "step": 40218 }, { "epoch": 0.8862373090504443, "grad_norm": 0.4725876748561859, "learning_rate": 1.0064659710414414e-06, "loss": 0.0686, "step": 40219 }, { "epoch": 0.8862593443399605, "grad_norm": 0.5984161496162415, "learning_rate": 1.0060804752175757e-06, "loss": 0.0674, "step": 40220 }, { "epoch": 0.8862813796294766, "grad_norm": 0.8371888399124146, "learning_rate": 1.005695050672248e-06, "loss": 0.0931, "step": 40221 }, { "epoch": 0.8863034149189928, "grad_norm": 0.6572787165641785, "learning_rate": 1.0053096974074166e-06, "loss": 0.0804, "step": 40222 }, { "epoch": 0.886325450208509, "grad_norm": 0.7569135427474976, "learning_rate": 1.0049244154250486e-06, "loss": 0.0798, "step": 40223 }, { "epoch": 0.8863474854980251, "grad_norm": 0.3490813374519348, "learning_rate": 1.0045392047271073e-06, "loss": 0.0489, "step": 40224 }, { "epoch": 0.8863695207875413, "grad_norm": 0.7620484232902527, "learning_rate": 1.0041540653155496e-06, "loss": 0.0666, "step": 40225 }, { "epoch": 0.8863915560770574, "grad_norm": 0.47363659739494324, "learning_rate": 1.0037689971923435e-06, "loss": 0.0481, "step": 40226 }, { "epoch": 0.8864135913665736, "grad_norm": 0.7079286575317383, "learning_rate": 1.003384000359443e-06, "loss": 0.0447, "step": 40227 }, { "epoch": 0.8864356266560898, "grad_norm": 0.7104068398475647, "learning_rate": 1.0029990748188179e-06, "loss": 0.0843, "step": 40228 }, { "epoch": 0.8864576619456059, "grad_norm": 0.5985183715820312, "learning_rate": 1.0026142205724215e-06, "loss": 0.0389, "step": 40229 }, { "epoch": 0.8864796972351221, "grad_norm": 0.4871154725551605, "learning_rate": 1.0022294376222173e-06, "loss": 0.0649, "step": 40230 }, { "epoch": 0.8865017325246382, "grad_norm": 0.43566274642944336, "learning_rate": 1.0018447259701657e-06, "loss": 0.0438, "step": 40231 }, { "epoch": 0.8865237678141543, "grad_norm": 0.6374766230583191, "learning_rate": 1.0014600856182248e-06, "loss": 0.0739, "step": 40232 }, { "epoch": 0.8865458031036705, "grad_norm": 0.5307684540748596, "learning_rate": 1.0010755165683565e-06, "loss": 0.0528, "step": 40233 }, { "epoch": 0.8865678383931866, "grad_norm": 0.41984546184539795, "learning_rate": 1.0006910188225126e-06, "loss": 0.0604, "step": 40234 }, { "epoch": 0.8865898736827028, "grad_norm": 0.46607983112335205, "learning_rate": 1.0003065923826598e-06, "loss": 0.0449, "step": 40235 }, { "epoch": 0.886611908972219, "grad_norm": 0.5093342065811157, "learning_rate": 9.999222372507516e-07, "loss": 0.0668, "step": 40236 }, { "epoch": 0.8866339442617351, "grad_norm": 0.7612516283988953, "learning_rate": 9.995379534287497e-07, "loss": 0.0539, "step": 40237 }, { "epoch": 0.8866559795512513, "grad_norm": 0.600351095199585, "learning_rate": 9.99153740918604e-07, "loss": 0.0401, "step": 40238 }, { "epoch": 0.8866780148407675, "grad_norm": 0.586481511592865, "learning_rate": 9.987695997222784e-07, "loss": 0.0589, "step": 40239 }, { "epoch": 0.8867000501302836, "grad_norm": 0.5579153299331665, "learning_rate": 9.983855298417294e-07, "loss": 0.0699, "step": 40240 }, { "epoch": 0.8867220854197998, "grad_norm": 0.34758642315864563, "learning_rate": 9.98001531278907e-07, "loss": 0.0355, "step": 40241 }, { "epoch": 0.886744120709316, "grad_norm": 0.6788315773010254, "learning_rate": 9.976176040357733e-07, "loss": 0.0605, "step": 40242 }, { "epoch": 0.8867661559988321, "grad_norm": 0.28476354479789734, "learning_rate": 9.972337481142812e-07, "loss": 0.0615, "step": 40243 }, { "epoch": 0.8867881912883483, "grad_norm": 0.6453654766082764, "learning_rate": 9.96849963516388e-07, "loss": 0.0763, "step": 40244 }, { "epoch": 0.8868102265778645, "grad_norm": 0.36993739008903503, "learning_rate": 9.96466250244047e-07, "loss": 0.0487, "step": 40245 }, { "epoch": 0.8868322618673806, "grad_norm": 0.5464287996292114, "learning_rate": 9.960826082992096e-07, "loss": 0.0519, "step": 40246 }, { "epoch": 0.8868542971568968, "grad_norm": 0.7085462808609009, "learning_rate": 9.956990376838381e-07, "loss": 0.0636, "step": 40247 }, { "epoch": 0.886876332446413, "grad_norm": 0.28155264258384705, "learning_rate": 9.953155383998775e-07, "loss": 0.0318, "step": 40248 }, { "epoch": 0.8868983677359291, "grad_norm": 0.7723642587661743, "learning_rate": 9.94932110449286e-07, "loss": 0.0682, "step": 40249 }, { "epoch": 0.8869204030254453, "grad_norm": 0.581378698348999, "learning_rate": 9.94548753834012e-07, "loss": 0.086, "step": 40250 }, { "epoch": 0.8869424383149614, "grad_norm": 0.3259853422641754, "learning_rate": 9.94165468556016e-07, "loss": 0.0562, "step": 40251 }, { "epoch": 0.8869644736044776, "grad_norm": 0.7106927633285522, "learning_rate": 9.937822546172443e-07, "loss": 0.0495, "step": 40252 }, { "epoch": 0.8869865088939938, "grad_norm": 0.5335684418678284, "learning_rate": 9.933991120196489e-07, "loss": 0.099, "step": 40253 }, { "epoch": 0.8870085441835099, "grad_norm": 0.760915219783783, "learning_rate": 9.93016040765185e-07, "loss": 0.07, "step": 40254 }, { "epoch": 0.8870305794730261, "grad_norm": 0.4071323573589325, "learning_rate": 9.92633040855801e-07, "loss": 0.0347, "step": 40255 }, { "epoch": 0.8870526147625422, "grad_norm": 0.598759651184082, "learning_rate": 9.922501122934485e-07, "loss": 0.0702, "step": 40256 }, { "epoch": 0.8870746500520583, "grad_norm": 0.6566972136497498, "learning_rate": 9.918672550800746e-07, "loss": 0.0583, "step": 40257 }, { "epoch": 0.8870966853415745, "grad_norm": 0.34532859921455383, "learning_rate": 9.914844692176356e-07, "loss": 0.0617, "step": 40258 }, { "epoch": 0.8871187206310907, "grad_norm": 0.4547961354255676, "learning_rate": 9.91101754708077e-07, "loss": 0.0395, "step": 40259 }, { "epoch": 0.8871407559206068, "grad_norm": 0.8658309578895569, "learning_rate": 9.90719111553352e-07, "loss": 0.0867, "step": 40260 }, { "epoch": 0.887162791210123, "grad_norm": 0.8591775894165039, "learning_rate": 9.903365397554009e-07, "loss": 0.0683, "step": 40261 }, { "epoch": 0.8871848264996391, "grad_norm": 0.4587731957435608, "learning_rate": 9.899540393161854e-07, "loss": 0.0425, "step": 40262 }, { "epoch": 0.8872068617891553, "grad_norm": 0.6955423355102539, "learning_rate": 9.895716102376422e-07, "loss": 0.0644, "step": 40263 }, { "epoch": 0.8872288970786715, "grad_norm": 0.6788517236709595, "learning_rate": 9.891892525217267e-07, "loss": 0.0701, "step": 40264 }, { "epoch": 0.8872509323681876, "grad_norm": 0.6644066572189331, "learning_rate": 9.88806966170382e-07, "loss": 0.0801, "step": 40265 }, { "epoch": 0.8872729676577038, "grad_norm": 0.4054338037967682, "learning_rate": 9.884247511855566e-07, "loss": 0.0621, "step": 40266 }, { "epoch": 0.88729500294722, "grad_norm": 0.5535939931869507, "learning_rate": 9.88042607569199e-07, "loss": 0.0508, "step": 40267 }, { "epoch": 0.8873170382367361, "grad_norm": 0.1463625282049179, "learning_rate": 9.876605353232525e-07, "loss": 0.0348, "step": 40268 }, { "epoch": 0.8873390735262523, "grad_norm": 0.9085543751716614, "learning_rate": 9.872785344496643e-07, "loss": 0.0579, "step": 40269 }, { "epoch": 0.8873611088157685, "grad_norm": 0.5245285630226135, "learning_rate": 9.86896604950381e-07, "loss": 0.0507, "step": 40270 }, { "epoch": 0.8873831441052846, "grad_norm": 0.7051073312759399, "learning_rate": 9.865147468273506e-07, "loss": 0.1063, "step": 40271 }, { "epoch": 0.8874051793948008, "grad_norm": 0.75241619348526, "learning_rate": 9.86132960082512e-07, "loss": 0.0421, "step": 40272 }, { "epoch": 0.887427214684317, "grad_norm": 0.39049893617630005, "learning_rate": 9.857512447178135e-07, "loss": 0.0466, "step": 40273 }, { "epoch": 0.8874492499738331, "grad_norm": 0.5380799174308777, "learning_rate": 9.85369600735202e-07, "loss": 0.0728, "step": 40274 }, { "epoch": 0.8874712852633493, "grad_norm": 0.6774395108222961, "learning_rate": 9.849880281366142e-07, "loss": 0.059, "step": 40275 }, { "epoch": 0.8874933205528655, "grad_norm": 0.8460888266563416, "learning_rate": 9.84606526924e-07, "loss": 0.0903, "step": 40276 }, { "epoch": 0.8875153558423816, "grad_norm": 0.5570636987686157, "learning_rate": 9.842250970992982e-07, "loss": 0.044, "step": 40277 }, { "epoch": 0.8875373911318978, "grad_norm": 0.42797136306762695, "learning_rate": 9.838437386644589e-07, "loss": 0.0466, "step": 40278 }, { "epoch": 0.8875594264214139, "grad_norm": 0.6197579503059387, "learning_rate": 9.834624516214152e-07, "loss": 0.0503, "step": 40279 }, { "epoch": 0.8875814617109301, "grad_norm": 0.43752893805503845, "learning_rate": 9.830812359721143e-07, "loss": 0.0628, "step": 40280 }, { "epoch": 0.8876034970004462, "grad_norm": 0.3116395175457001, "learning_rate": 9.827000917184992e-07, "loss": 0.0382, "step": 40281 }, { "epoch": 0.8876255322899623, "grad_norm": 0.3237353563308716, "learning_rate": 9.823190188625053e-07, "loss": 0.0508, "step": 40282 }, { "epoch": 0.8876475675794785, "grad_norm": 0.7094661593437195, "learning_rate": 9.819380174060827e-07, "loss": 0.0561, "step": 40283 }, { "epoch": 0.8876696028689947, "grad_norm": 0.5272808074951172, "learning_rate": 9.815570873511599e-07, "loss": 0.0754, "step": 40284 }, { "epoch": 0.8876916381585108, "grad_norm": 0.5233811736106873, "learning_rate": 9.8117622869969e-07, "loss": 0.0755, "step": 40285 }, { "epoch": 0.887713673448027, "grad_norm": 0.5006498694419861, "learning_rate": 9.807954414536036e-07, "loss": 0.0345, "step": 40286 }, { "epoch": 0.8877357087375431, "grad_norm": 0.5340704321861267, "learning_rate": 9.804147256148472e-07, "loss": 0.0594, "step": 40287 }, { "epoch": 0.8877577440270593, "grad_norm": 0.6301851272583008, "learning_rate": 9.80034081185354e-07, "loss": 0.0635, "step": 40288 }, { "epoch": 0.8877797793165755, "grad_norm": 0.4286089837551117, "learning_rate": 9.796535081670648e-07, "loss": 0.0293, "step": 40289 }, { "epoch": 0.8878018146060916, "grad_norm": 0.7362740635871887, "learning_rate": 9.792730065619227e-07, "loss": 0.062, "step": 40290 }, { "epoch": 0.8878238498956078, "grad_norm": 0.9546219706535339, "learning_rate": 9.788925763718577e-07, "loss": 0.0588, "step": 40291 }, { "epoch": 0.887845885185124, "grad_norm": 0.4228942394256592, "learning_rate": 9.785122175988114e-07, "loss": 0.0548, "step": 40292 }, { "epoch": 0.8878679204746401, "grad_norm": 0.3866861164569855, "learning_rate": 9.781319302447212e-07, "loss": 0.0546, "step": 40293 }, { "epoch": 0.8878899557641563, "grad_norm": 0.8025438189506531, "learning_rate": 9.777517143115266e-07, "loss": 0.0618, "step": 40294 }, { "epoch": 0.8879119910536725, "grad_norm": 0.3985683023929596, "learning_rate": 9.773715698011598e-07, "loss": 0.0283, "step": 40295 }, { "epoch": 0.8879340263431886, "grad_norm": 0.6324963569641113, "learning_rate": 9.769914967155607e-07, "loss": 0.0667, "step": 40296 }, { "epoch": 0.8879560616327048, "grad_norm": 0.49669238924980164, "learning_rate": 9.766114950566646e-07, "loss": 0.0437, "step": 40297 }, { "epoch": 0.887978096922221, "grad_norm": 0.3339281678199768, "learning_rate": 9.76231564826403e-07, "loss": 0.0459, "step": 40298 }, { "epoch": 0.8880001322117371, "grad_norm": 0.3776094615459442, "learning_rate": 9.758517060267148e-07, "loss": 0.0422, "step": 40299 }, { "epoch": 0.8880221675012533, "grad_norm": 0.27366873621940613, "learning_rate": 9.754719186595346e-07, "loss": 0.0467, "step": 40300 }, { "epoch": 0.8880442027907695, "grad_norm": 0.6660369634628296, "learning_rate": 9.750922027267978e-07, "loss": 0.0557, "step": 40301 }, { "epoch": 0.8880662380802856, "grad_norm": 0.7879725694656372, "learning_rate": 9.747125582304361e-07, "loss": 0.0949, "step": 40302 }, { "epoch": 0.8880882733698018, "grad_norm": 0.5231934785842896, "learning_rate": 9.743329851723831e-07, "loss": 0.0488, "step": 40303 }, { "epoch": 0.888110308659318, "grad_norm": 0.6102381944656372, "learning_rate": 9.73953483554577e-07, "loss": 0.0557, "step": 40304 }, { "epoch": 0.8881323439488341, "grad_norm": 0.5932514667510986, "learning_rate": 9.735740533789434e-07, "loss": 0.0728, "step": 40305 }, { "epoch": 0.8881543792383502, "grad_norm": 0.4900470972061157, "learning_rate": 9.731946946474219e-07, "loss": 0.0603, "step": 40306 }, { "epoch": 0.8881764145278663, "grad_norm": 1.0510375499725342, "learning_rate": 9.72815407361936e-07, "loss": 0.091, "step": 40307 }, { "epoch": 0.8881984498173825, "grad_norm": 0.49002620577812195, "learning_rate": 9.724361915244295e-07, "loss": 0.0441, "step": 40308 }, { "epoch": 0.8882204851068987, "grad_norm": 0.7711312770843506, "learning_rate": 9.72057047136824e-07, "loss": 0.0819, "step": 40309 }, { "epoch": 0.8882425203964148, "grad_norm": 0.9873337149620056, "learning_rate": 9.716779742010578e-07, "loss": 0.0623, "step": 40310 }, { "epoch": 0.888264555685931, "grad_norm": 0.45142504572868347, "learning_rate": 9.712989727190547e-07, "loss": 0.0886, "step": 40311 }, { "epoch": 0.8882865909754472, "grad_norm": 0.37931326031684875, "learning_rate": 9.709200426927495e-07, "loss": 0.0433, "step": 40312 }, { "epoch": 0.8883086262649633, "grad_norm": 0.6016141772270203, "learning_rate": 9.705411841240742e-07, "loss": 0.0585, "step": 40313 }, { "epoch": 0.8883306615544795, "grad_norm": 0.9267488121986389, "learning_rate": 9.70162397014952e-07, "loss": 0.0866, "step": 40314 }, { "epoch": 0.8883526968439956, "grad_norm": 0.5618589520454407, "learning_rate": 9.697836813673184e-07, "loss": 0.0571, "step": 40315 }, { "epoch": 0.8883747321335118, "grad_norm": 0.37439897656440735, "learning_rate": 9.694050371830982e-07, "loss": 0.0551, "step": 40316 }, { "epoch": 0.888396767423028, "grad_norm": 0.5695803165435791, "learning_rate": 9.69026464464225e-07, "loss": 0.0488, "step": 40317 }, { "epoch": 0.8884188027125441, "grad_norm": 0.6108794212341309, "learning_rate": 9.686479632126205e-07, "loss": 0.0611, "step": 40318 }, { "epoch": 0.8884408380020603, "grad_norm": 0.44921404123306274, "learning_rate": 9.682695334302182e-07, "loss": 0.0509, "step": 40319 }, { "epoch": 0.8884628732915765, "grad_norm": 0.6771364212036133, "learning_rate": 9.678911751189452e-07, "loss": 0.0637, "step": 40320 }, { "epoch": 0.8884849085810926, "grad_norm": 0.6165186762809753, "learning_rate": 9.675128882807243e-07, "loss": 0.0488, "step": 40321 }, { "epoch": 0.8885069438706088, "grad_norm": 0.5608129501342773, "learning_rate": 9.671346729174845e-07, "loss": 0.0574, "step": 40322 }, { "epoch": 0.888528979160125, "grad_norm": 0.36623501777648926, "learning_rate": 9.667565290311543e-07, "loss": 0.062, "step": 40323 }, { "epoch": 0.8885510144496411, "grad_norm": 0.4970342814922333, "learning_rate": 9.663784566236582e-07, "loss": 0.0407, "step": 40324 }, { "epoch": 0.8885730497391573, "grad_norm": 0.5236628651618958, "learning_rate": 9.66000455696922e-07, "loss": 0.0454, "step": 40325 }, { "epoch": 0.8885950850286735, "grad_norm": 0.6999045610427856, "learning_rate": 9.656225262528706e-07, "loss": 0.0696, "step": 40326 }, { "epoch": 0.8886171203181896, "grad_norm": 0.5280085206031799, "learning_rate": 9.652446682934325e-07, "loss": 0.0753, "step": 40327 }, { "epoch": 0.8886391556077058, "grad_norm": 0.5661389827728271, "learning_rate": 9.648668818205258e-07, "loss": 0.0654, "step": 40328 }, { "epoch": 0.888661190897222, "grad_norm": 0.8899165391921997, "learning_rate": 9.644891668360812e-07, "loss": 0.0567, "step": 40329 }, { "epoch": 0.888683226186738, "grad_norm": 0.5970689654350281, "learning_rate": 9.64111523342015e-07, "loss": 0.0731, "step": 40330 }, { "epoch": 0.8887052614762542, "grad_norm": 0.9588093757629395, "learning_rate": 9.637339513402593e-07, "loss": 0.0656, "step": 40331 }, { "epoch": 0.8887272967657703, "grad_norm": 0.7679499983787537, "learning_rate": 9.633564508327324e-07, "loss": 0.0787, "step": 40332 }, { "epoch": 0.8887493320552865, "grad_norm": 0.34191253781318665, "learning_rate": 9.629790218213598e-07, "loss": 0.0444, "step": 40333 }, { "epoch": 0.8887713673448027, "grad_norm": 0.7294052243232727, "learning_rate": 9.626016643080593e-07, "loss": 0.0546, "step": 40334 }, { "epoch": 0.8887934026343188, "grad_norm": 0.7929800152778625, "learning_rate": 9.622243782947564e-07, "loss": 0.0749, "step": 40335 }, { "epoch": 0.888815437923835, "grad_norm": 0.6547341346740723, "learning_rate": 9.618471637833764e-07, "loss": 0.0633, "step": 40336 }, { "epoch": 0.8888374732133512, "grad_norm": 0.18337395787239075, "learning_rate": 9.614700207758325e-07, "loss": 0.0412, "step": 40337 }, { "epoch": 0.8888595085028673, "grad_norm": 0.6300336122512817, "learning_rate": 9.6109294927405e-07, "loss": 0.042, "step": 40338 }, { "epoch": 0.8888815437923835, "grad_norm": 0.4907005727291107, "learning_rate": 9.607159492799506e-07, "loss": 0.0422, "step": 40339 }, { "epoch": 0.8889035790818997, "grad_norm": 0.38151609897613525, "learning_rate": 9.603390207954544e-07, "loss": 0.0316, "step": 40340 }, { "epoch": 0.8889256143714158, "grad_norm": 0.47226959466934204, "learning_rate": 9.59962163822478e-07, "loss": 0.0682, "step": 40341 }, { "epoch": 0.888947649660932, "grad_norm": 0.6726353764533997, "learning_rate": 9.595853783629439e-07, "loss": 0.0709, "step": 40342 }, { "epoch": 0.8889696849504481, "grad_norm": 0.8205662369728088, "learning_rate": 9.592086644187713e-07, "loss": 0.0726, "step": 40343 }, { "epoch": 0.8889917202399643, "grad_norm": 0.5246363878250122, "learning_rate": 9.588320219918777e-07, "loss": 0.0565, "step": 40344 }, { "epoch": 0.8890137555294805, "grad_norm": 0.5167638063430786, "learning_rate": 9.584554510841814e-07, "loss": 0.0473, "step": 40345 }, { "epoch": 0.8890357908189966, "grad_norm": 0.5664283633232117, "learning_rate": 9.580789516976006e-07, "loss": 0.051, "step": 40346 }, { "epoch": 0.8890578261085128, "grad_norm": 0.6715623140335083, "learning_rate": 9.577025238340559e-07, "loss": 0.0395, "step": 40347 }, { "epoch": 0.889079861398029, "grad_norm": 0.832748293876648, "learning_rate": 9.57326167495462e-07, "loss": 0.0659, "step": 40348 }, { "epoch": 0.8891018966875451, "grad_norm": 0.6729204058647156, "learning_rate": 9.56949882683734e-07, "loss": 0.0537, "step": 40349 }, { "epoch": 0.8891239319770613, "grad_norm": 0.5638089776039124, "learning_rate": 9.565736694007944e-07, "loss": 0.0658, "step": 40350 }, { "epoch": 0.8891459672665775, "grad_norm": 0.7295308113098145, "learning_rate": 9.561975276485525e-07, "loss": 0.0597, "step": 40351 }, { "epoch": 0.8891680025560936, "grad_norm": 0.14459975063800812, "learning_rate": 9.558214574289303e-07, "loss": 0.0228, "step": 40352 }, { "epoch": 0.8891900378456098, "grad_norm": 0.5259133577346802, "learning_rate": 9.554454587438348e-07, "loss": 0.0525, "step": 40353 }, { "epoch": 0.889212073135126, "grad_norm": 0.4339018762111664, "learning_rate": 9.55069531595193e-07, "loss": 0.0429, "step": 40354 }, { "epoch": 0.889234108424642, "grad_norm": 0.6694586873054504, "learning_rate": 9.546936759849112e-07, "loss": 0.0489, "step": 40355 }, { "epoch": 0.8892561437141582, "grad_norm": 0.49596071243286133, "learning_rate": 9.543178919149064e-07, "loss": 0.0604, "step": 40356 }, { "epoch": 0.8892781790036743, "grad_norm": 0.43631502985954285, "learning_rate": 9.539421793870923e-07, "loss": 0.0384, "step": 40357 }, { "epoch": 0.8893002142931905, "grad_norm": 0.7840155959129333, "learning_rate": 9.53566538403382e-07, "loss": 0.0616, "step": 40358 }, { "epoch": 0.8893222495827067, "grad_norm": 0.5363372564315796, "learning_rate": 9.531909689656926e-07, "loss": 0.0518, "step": 40359 }, { "epoch": 0.8893442848722228, "grad_norm": 0.967261016368866, "learning_rate": 9.528154710759307e-07, "loss": 0.0656, "step": 40360 }, { "epoch": 0.889366320161739, "grad_norm": 0.9377014636993408, "learning_rate": 9.524400447360132e-07, "loss": 0.0811, "step": 40361 }, { "epoch": 0.8893883554512552, "grad_norm": 0.34098130464553833, "learning_rate": 9.520646899478519e-07, "loss": 0.0707, "step": 40362 }, { "epoch": 0.8894103907407713, "grad_norm": 0.7731648683547974, "learning_rate": 9.516894067133586e-07, "loss": 0.063, "step": 40363 }, { "epoch": 0.8894324260302875, "grad_norm": 0.4298709034919739, "learning_rate": 9.513141950344418e-07, "loss": 0.0349, "step": 40364 }, { "epoch": 0.8894544613198037, "grad_norm": 0.767894446849823, "learning_rate": 9.509390549130198e-07, "loss": 0.0536, "step": 40365 }, { "epoch": 0.8894764966093198, "grad_norm": 0.5780150294303894, "learning_rate": 9.505639863509979e-07, "loss": 0.0825, "step": 40366 }, { "epoch": 0.889498531898836, "grad_norm": 0.4143141508102417, "learning_rate": 9.501889893502847e-07, "loss": 0.0281, "step": 40367 }, { "epoch": 0.8895205671883522, "grad_norm": 0.5971593856811523, "learning_rate": 9.49814063912795e-07, "loss": 0.0754, "step": 40368 }, { "epoch": 0.8895426024778683, "grad_norm": 0.4098856747150421, "learning_rate": 9.494392100404359e-07, "loss": 0.0717, "step": 40369 }, { "epoch": 0.8895646377673845, "grad_norm": 0.5276702642440796, "learning_rate": 9.49064427735119e-07, "loss": 0.039, "step": 40370 }, { "epoch": 0.8895866730569006, "grad_norm": 0.8400309085845947, "learning_rate": 9.486897169987497e-07, "loss": 0.0496, "step": 40371 }, { "epoch": 0.8896087083464168, "grad_norm": 0.8392817378044128, "learning_rate": 9.483150778332378e-07, "loss": 0.0732, "step": 40372 }, { "epoch": 0.889630743635933, "grad_norm": 0.4392993152141571, "learning_rate": 9.479405102404937e-07, "loss": 0.051, "step": 40373 }, { "epoch": 0.8896527789254491, "grad_norm": 0.3458056151866913, "learning_rate": 9.475660142224258e-07, "loss": 0.0476, "step": 40374 }, { "epoch": 0.8896748142149653, "grad_norm": 0.46661999821662903, "learning_rate": 9.471915897809391e-07, "loss": 0.0408, "step": 40375 }, { "epoch": 0.8896968495044815, "grad_norm": 0.42468592524528503, "learning_rate": 9.468172369179373e-07, "loss": 0.0696, "step": 40376 }, { "epoch": 0.8897188847939976, "grad_norm": 0.5703416466712952, "learning_rate": 9.464429556353354e-07, "loss": 0.0616, "step": 40377 }, { "epoch": 0.8897409200835138, "grad_norm": 0.7304624319076538, "learning_rate": 9.460687459350337e-07, "loss": 0.0657, "step": 40378 }, { "epoch": 0.88976295537303, "grad_norm": 0.631517767906189, "learning_rate": 9.456946078189421e-07, "loss": 0.0632, "step": 40379 }, { "epoch": 0.889784990662546, "grad_norm": 1.1493160724639893, "learning_rate": 9.453205412889609e-07, "loss": 0.0889, "step": 40380 }, { "epoch": 0.8898070259520622, "grad_norm": 0.8265681266784668, "learning_rate": 9.44946546347002e-07, "loss": 0.0538, "step": 40381 }, { "epoch": 0.8898290612415783, "grad_norm": 0.793404221534729, "learning_rate": 9.445726229949653e-07, "loss": 0.048, "step": 40382 }, { "epoch": 0.8898510965310945, "grad_norm": 0.6336402297019958, "learning_rate": 9.441987712347611e-07, "loss": 0.0467, "step": 40383 }, { "epoch": 0.8898731318206107, "grad_norm": 0.3325898051261902, "learning_rate": 9.438249910682861e-07, "loss": 0.0377, "step": 40384 }, { "epoch": 0.8898951671101268, "grad_norm": 0.3638414144515991, "learning_rate": 9.434512824974489e-07, "loss": 0.0256, "step": 40385 }, { "epoch": 0.889917202399643, "grad_norm": 0.6436260342597961, "learning_rate": 9.43077645524153e-07, "loss": 0.0699, "step": 40386 }, { "epoch": 0.8899392376891592, "grad_norm": 0.7361051440238953, "learning_rate": 9.427040801502968e-07, "loss": 0.0643, "step": 40387 }, { "epoch": 0.8899612729786753, "grad_norm": 0.4947415292263031, "learning_rate": 9.42330586377792e-07, "loss": 0.0514, "step": 40388 }, { "epoch": 0.8899833082681915, "grad_norm": 0.5343862175941467, "learning_rate": 9.419571642085323e-07, "loss": 0.0462, "step": 40389 }, { "epoch": 0.8900053435577077, "grad_norm": 0.5753626823425293, "learning_rate": 9.41583813644426e-07, "loss": 0.052, "step": 40390 }, { "epoch": 0.8900273788472238, "grad_norm": 0.6308355927467346, "learning_rate": 9.412105346873701e-07, "loss": 0.0664, "step": 40391 }, { "epoch": 0.89004941413674, "grad_norm": 0.6469904780387878, "learning_rate": 9.408373273392679e-07, "loss": 0.0839, "step": 40392 }, { "epoch": 0.8900714494262562, "grad_norm": 0.7052419185638428, "learning_rate": 9.404641916020229e-07, "loss": 0.0421, "step": 40393 }, { "epoch": 0.8900934847157723, "grad_norm": 1.3543459177017212, "learning_rate": 9.400911274775304e-07, "loss": 0.0882, "step": 40394 }, { "epoch": 0.8901155200052885, "grad_norm": 0.6092272996902466, "learning_rate": 9.397181349676936e-07, "loss": 0.0608, "step": 40395 }, { "epoch": 0.8901375552948046, "grad_norm": 0.8039512038230896, "learning_rate": 9.393452140744113e-07, "loss": 0.0509, "step": 40396 }, { "epoch": 0.8901595905843208, "grad_norm": 0.38120177388191223, "learning_rate": 9.389723647995868e-07, "loss": 0.0579, "step": 40397 }, { "epoch": 0.890181625873837, "grad_norm": 0.2158495932817459, "learning_rate": 9.385995871451137e-07, "loss": 0.0708, "step": 40398 }, { "epoch": 0.8902036611633531, "grad_norm": 0.7414199709892273, "learning_rate": 9.382268811128919e-07, "loss": 0.0563, "step": 40399 }, { "epoch": 0.8902256964528693, "grad_norm": 1.0507868528366089, "learning_rate": 9.378542467048251e-07, "loss": 0.0897, "step": 40400 }, { "epoch": 0.8902477317423855, "grad_norm": 0.45930325984954834, "learning_rate": 9.374816839228051e-07, "loss": 0.0348, "step": 40401 }, { "epoch": 0.8902697670319016, "grad_norm": 0.8346366882324219, "learning_rate": 9.371091927687319e-07, "loss": 0.0469, "step": 40402 }, { "epoch": 0.8902918023214178, "grad_norm": 0.3663993179798126, "learning_rate": 9.367367732445009e-07, "loss": 0.0387, "step": 40403 }, { "epoch": 0.8903138376109339, "grad_norm": 0.25408098101615906, "learning_rate": 9.363644253520137e-07, "loss": 0.0569, "step": 40404 }, { "epoch": 0.89033587290045, "grad_norm": 0.629327654838562, "learning_rate": 9.359921490931622e-07, "loss": 0.0477, "step": 40405 }, { "epoch": 0.8903579081899662, "grad_norm": 0.9093498587608337, "learning_rate": 9.356199444698449e-07, "loss": 0.0641, "step": 40406 }, { "epoch": 0.8903799434794823, "grad_norm": 0.9351924657821655, "learning_rate": 9.352478114839586e-07, "loss": 0.0593, "step": 40407 }, { "epoch": 0.8904019787689985, "grad_norm": 0.7556235790252686, "learning_rate": 9.34875750137395e-07, "loss": 0.069, "step": 40408 }, { "epoch": 0.8904240140585147, "grad_norm": 0.6291766166687012, "learning_rate": 9.345037604320545e-07, "loss": 0.0673, "step": 40409 }, { "epoch": 0.8904460493480308, "grad_norm": 0.6592180728912354, "learning_rate": 9.341318423698237e-07, "loss": 0.0728, "step": 40410 }, { "epoch": 0.890468084637547, "grad_norm": 0.6894164085388184, "learning_rate": 9.337599959526078e-07, "loss": 0.0527, "step": 40411 }, { "epoch": 0.8904901199270632, "grad_norm": 0.6636808514595032, "learning_rate": 9.333882211822919e-07, "loss": 0.0553, "step": 40412 }, { "epoch": 0.8905121552165793, "grad_norm": 0.8209129571914673, "learning_rate": 9.330165180607747e-07, "loss": 0.0829, "step": 40413 }, { "epoch": 0.8905341905060955, "grad_norm": 0.34546393156051636, "learning_rate": 9.326448865899462e-07, "loss": 0.0693, "step": 40414 }, { "epoch": 0.8905562257956117, "grad_norm": 0.5888131260871887, "learning_rate": 9.322733267717015e-07, "loss": 0.0419, "step": 40415 }, { "epoch": 0.8905782610851278, "grad_norm": 0.6659184098243713, "learning_rate": 9.319018386079342e-07, "loss": 0.0644, "step": 40416 }, { "epoch": 0.890600296374644, "grad_norm": 0.49254336953163147, "learning_rate": 9.315304221005311e-07, "loss": 0.0744, "step": 40417 }, { "epoch": 0.8906223316641602, "grad_norm": 0.4940357804298401, "learning_rate": 9.31159077251389e-07, "loss": 0.0488, "step": 40418 }, { "epoch": 0.8906443669536763, "grad_norm": 1.0120446681976318, "learning_rate": 9.307878040623979e-07, "loss": 0.098, "step": 40419 }, { "epoch": 0.8906664022431925, "grad_norm": 0.32192564010620117, "learning_rate": 9.304166025354516e-07, "loss": 0.0447, "step": 40420 }, { "epoch": 0.8906884375327087, "grad_norm": 0.34461623430252075, "learning_rate": 9.30045472672435e-07, "loss": 0.0564, "step": 40421 }, { "epoch": 0.8907104728222248, "grad_norm": 0.5395466685295105, "learning_rate": 9.296744144752434e-07, "loss": 0.0465, "step": 40422 }, { "epoch": 0.890732508111741, "grad_norm": 0.619719922542572, "learning_rate": 9.293034279457652e-07, "loss": 0.063, "step": 40423 }, { "epoch": 0.8907545434012571, "grad_norm": 0.7375975847244263, "learning_rate": 9.289325130858889e-07, "loss": 0.0796, "step": 40424 }, { "epoch": 0.8907765786907733, "grad_norm": 0.33767303824424744, "learning_rate": 9.28561669897508e-07, "loss": 0.033, "step": 40425 }, { "epoch": 0.8907986139802895, "grad_norm": 0.4875556230545044, "learning_rate": 9.28190898382501e-07, "loss": 0.0419, "step": 40426 }, { "epoch": 0.8908206492698056, "grad_norm": 0.35389962792396545, "learning_rate": 9.278201985427714e-07, "loss": 0.0384, "step": 40427 }, { "epoch": 0.8908426845593218, "grad_norm": 0.5364969968795776, "learning_rate": 9.274495703801944e-07, "loss": 0.0643, "step": 40428 }, { "epoch": 0.8908647198488379, "grad_norm": 0.8158592581748962, "learning_rate": 9.27079013896665e-07, "loss": 0.0748, "step": 40429 }, { "epoch": 0.890886755138354, "grad_norm": 0.8013381958007812, "learning_rate": 9.267085290940702e-07, "loss": 0.0542, "step": 40430 }, { "epoch": 0.8909087904278702, "grad_norm": 0.9739236235618591, "learning_rate": 9.263381159742934e-07, "loss": 0.0522, "step": 40431 }, { "epoch": 0.8909308257173864, "grad_norm": 0.9699791669845581, "learning_rate": 9.259677745392265e-07, "loss": 0.0608, "step": 40432 }, { "epoch": 0.8909528610069025, "grad_norm": 0.6589037775993347, "learning_rate": 9.255975047907477e-07, "loss": 0.0449, "step": 40433 }, { "epoch": 0.8909748962964187, "grad_norm": 0.4926866292953491, "learning_rate": 9.252273067307526e-07, "loss": 0.0664, "step": 40434 }, { "epoch": 0.8909969315859348, "grad_norm": 0.4013286232948303, "learning_rate": 9.248571803611194e-07, "loss": 0.045, "step": 40435 }, { "epoch": 0.891018966875451, "grad_norm": 0.6272935271263123, "learning_rate": 9.244871256837384e-07, "loss": 0.0623, "step": 40436 }, { "epoch": 0.8910410021649672, "grad_norm": 0.8338149189949036, "learning_rate": 9.241171427004913e-07, "loss": 0.0555, "step": 40437 }, { "epoch": 0.8910630374544833, "grad_norm": 0.7798957824707031, "learning_rate": 9.237472314132633e-07, "loss": 0.069, "step": 40438 }, { "epoch": 0.8910850727439995, "grad_norm": 0.6972697973251343, "learning_rate": 9.233773918239413e-07, "loss": 0.0549, "step": 40439 }, { "epoch": 0.8911071080335157, "grad_norm": 0.6748725771903992, "learning_rate": 9.230076239344038e-07, "loss": 0.0473, "step": 40440 }, { "epoch": 0.8911291433230318, "grad_norm": 0.5881654024124146, "learning_rate": 9.226379277465375e-07, "loss": 0.049, "step": 40441 }, { "epoch": 0.891151178612548, "grad_norm": 0.43845558166503906, "learning_rate": 9.222683032622259e-07, "loss": 0.0482, "step": 40442 }, { "epoch": 0.8911732139020642, "grad_norm": 0.6010318994522095, "learning_rate": 9.218987504833526e-07, "loss": 0.0796, "step": 40443 }, { "epoch": 0.8911952491915803, "grad_norm": 0.8721207976341248, "learning_rate": 9.21529269411796e-07, "loss": 0.0878, "step": 40444 }, { "epoch": 0.8912172844810965, "grad_norm": 0.6145010590553284, "learning_rate": 9.211598600494398e-07, "loss": 0.0491, "step": 40445 }, { "epoch": 0.8912393197706127, "grad_norm": 0.47919759154319763, "learning_rate": 9.207905223981689e-07, "loss": 0.0596, "step": 40446 }, { "epoch": 0.8912613550601288, "grad_norm": 0.5081390738487244, "learning_rate": 9.204212564598586e-07, "loss": 0.0481, "step": 40447 }, { "epoch": 0.891283390349645, "grad_norm": 0.5644693970680237, "learning_rate": 9.200520622363956e-07, "loss": 0.0706, "step": 40448 }, { "epoch": 0.8913054256391612, "grad_norm": 0.6839885711669922, "learning_rate": 9.196829397296536e-07, "loss": 0.0796, "step": 40449 }, { "epoch": 0.8913274609286773, "grad_norm": 0.21573872864246368, "learning_rate": 9.193138889415226e-07, "loss": 0.0543, "step": 40450 }, { "epoch": 0.8913494962181935, "grad_norm": 0.45753738284111023, "learning_rate": 9.189449098738728e-07, "loss": 0.0315, "step": 40451 }, { "epoch": 0.8913715315077096, "grad_norm": 0.3761118948459625, "learning_rate": 9.185760025285878e-07, "loss": 0.0646, "step": 40452 }, { "epoch": 0.8913935667972258, "grad_norm": 0.5249577760696411, "learning_rate": 9.182071669075475e-07, "loss": 0.0768, "step": 40453 }, { "epoch": 0.8914156020867419, "grad_norm": 0.4594508409500122, "learning_rate": 9.178384030126275e-07, "loss": 0.036, "step": 40454 }, { "epoch": 0.891437637376258, "grad_norm": 0.6588287353515625, "learning_rate": 9.174697108457109e-07, "loss": 0.0474, "step": 40455 }, { "epoch": 0.8914596726657742, "grad_norm": 0.6714159250259399, "learning_rate": 9.17101090408668e-07, "loss": 0.0626, "step": 40456 }, { "epoch": 0.8914817079552904, "grad_norm": 0.45823848247528076, "learning_rate": 9.167325417033856e-07, "loss": 0.0639, "step": 40457 }, { "epoch": 0.8915037432448065, "grad_norm": 0.4617314636707306, "learning_rate": 9.16364064731734e-07, "loss": 0.0428, "step": 40458 }, { "epoch": 0.8915257785343227, "grad_norm": 0.6309138536453247, "learning_rate": 9.159956594955949e-07, "loss": 0.0551, "step": 40459 }, { "epoch": 0.8915478138238389, "grad_norm": 0.38685810565948486, "learning_rate": 9.1562732599684e-07, "loss": 0.0611, "step": 40460 }, { "epoch": 0.891569849113355, "grad_norm": 0.18578433990478516, "learning_rate": 9.152590642373482e-07, "loss": 0.0606, "step": 40461 }, { "epoch": 0.8915918844028712, "grad_norm": 0.44688311219215393, "learning_rate": 9.148908742189977e-07, "loss": 0.0491, "step": 40462 }, { "epoch": 0.8916139196923873, "grad_norm": 0.8420103192329407, "learning_rate": 9.14522755943657e-07, "loss": 0.0676, "step": 40463 }, { "epoch": 0.8916359549819035, "grad_norm": 0.30269676446914673, "learning_rate": 9.141547094132064e-07, "loss": 0.0304, "step": 40464 }, { "epoch": 0.8916579902714197, "grad_norm": 0.9279307126998901, "learning_rate": 9.137867346295192e-07, "loss": 0.0755, "step": 40465 }, { "epoch": 0.8916800255609358, "grad_norm": 0.5940970182418823, "learning_rate": 9.134188315944708e-07, "loss": 0.0512, "step": 40466 }, { "epoch": 0.891702060850452, "grad_norm": 0.4150421619415283, "learning_rate": 9.13051000309933e-07, "loss": 0.0558, "step": 40467 }, { "epoch": 0.8917240961399682, "grad_norm": 0.45973655581474304, "learning_rate": 9.126832407777807e-07, "loss": 0.0442, "step": 40468 }, { "epoch": 0.8917461314294843, "grad_norm": 0.3075418770313263, "learning_rate": 9.123155529998878e-07, "loss": 0.0583, "step": 40469 }, { "epoch": 0.8917681667190005, "grad_norm": 0.5167632102966309, "learning_rate": 9.119479369781258e-07, "loss": 0.0293, "step": 40470 }, { "epoch": 0.8917902020085167, "grad_norm": 0.2224847972393036, "learning_rate": 9.115803927143684e-07, "loss": 0.0663, "step": 40471 }, { "epoch": 0.8918122372980328, "grad_norm": 0.5402266979217529, "learning_rate": 9.112129202104824e-07, "loss": 0.057, "step": 40472 }, { "epoch": 0.891834272587549, "grad_norm": 0.3852149546146393, "learning_rate": 9.108455194683496e-07, "loss": 0.032, "step": 40473 }, { "epoch": 0.8918563078770652, "grad_norm": 0.514244794845581, "learning_rate": 9.104781904898318e-07, "loss": 0.0554, "step": 40474 }, { "epoch": 0.8918783431665813, "grad_norm": 0.46268871426582336, "learning_rate": 9.10110933276806e-07, "loss": 0.0602, "step": 40475 }, { "epoch": 0.8919003784560975, "grad_norm": 0.7931711673736572, "learning_rate": 9.097437478311388e-07, "loss": 0.0663, "step": 40476 }, { "epoch": 0.8919224137456137, "grad_norm": 0.37332451343536377, "learning_rate": 9.093766341547055e-07, "loss": 0.0371, "step": 40477 }, { "epoch": 0.8919444490351297, "grad_norm": 0.3665350079536438, "learning_rate": 9.090095922493729e-07, "loss": 0.0526, "step": 40478 }, { "epoch": 0.8919664843246459, "grad_norm": 0.704360842704773, "learning_rate": 9.08642622117008e-07, "loss": 0.058, "step": 40479 }, { "epoch": 0.891988519614162, "grad_norm": 0.49607327580451965, "learning_rate": 9.082757237594841e-07, "loss": 0.0494, "step": 40480 }, { "epoch": 0.8920105549036782, "grad_norm": 0.6120463013648987, "learning_rate": 9.07908897178668e-07, "loss": 0.0476, "step": 40481 }, { "epoch": 0.8920325901931944, "grad_norm": 0.594068169593811, "learning_rate": 9.075421423764319e-07, "loss": 0.0521, "step": 40482 }, { "epoch": 0.8920546254827105, "grad_norm": 0.5866595506668091, "learning_rate": 9.071754593546338e-07, "loss": 0.0359, "step": 40483 }, { "epoch": 0.8920766607722267, "grad_norm": 0.566109299659729, "learning_rate": 9.068088481151543e-07, "loss": 0.059, "step": 40484 }, { "epoch": 0.8920986960617429, "grad_norm": 0.720691978931427, "learning_rate": 9.064423086598517e-07, "loss": 0.058, "step": 40485 }, { "epoch": 0.892120731351259, "grad_norm": 0.5121973156929016, "learning_rate": 9.060758409905994e-07, "loss": 0.0623, "step": 40486 }, { "epoch": 0.8921427666407752, "grad_norm": 0.6795024871826172, "learning_rate": 9.057094451092563e-07, "loss": 0.0607, "step": 40487 }, { "epoch": 0.8921648019302914, "grad_norm": 0.8369323015213013, "learning_rate": 9.053431210176954e-07, "loss": 0.0613, "step": 40488 }, { "epoch": 0.8921868372198075, "grad_norm": 0.5399128198623657, "learning_rate": 9.049768687177807e-07, "loss": 0.0386, "step": 40489 }, { "epoch": 0.8922088725093237, "grad_norm": 0.4753173887729645, "learning_rate": 9.046106882113753e-07, "loss": 0.0432, "step": 40490 }, { "epoch": 0.8922309077988398, "grad_norm": 0.4701920449733734, "learning_rate": 9.042445795003463e-07, "loss": 0.0471, "step": 40491 }, { "epoch": 0.892252943088356, "grad_norm": 0.7811453342437744, "learning_rate": 9.038785425865587e-07, "loss": 0.0632, "step": 40492 }, { "epoch": 0.8922749783778722, "grad_norm": 0.6128518581390381, "learning_rate": 9.035125774718777e-07, "loss": 0.0567, "step": 40493 }, { "epoch": 0.8922970136673883, "grad_norm": 0.4919723868370056, "learning_rate": 9.031466841581653e-07, "loss": 0.0541, "step": 40494 }, { "epoch": 0.8923190489569045, "grad_norm": 0.5615733861923218, "learning_rate": 9.02780862647285e-07, "loss": 0.0596, "step": 40495 }, { "epoch": 0.8923410842464207, "grad_norm": 0.22786730527877808, "learning_rate": 9.024151129411052e-07, "loss": 0.0254, "step": 40496 }, { "epoch": 0.8923631195359368, "grad_norm": 0.4487367570400238, "learning_rate": 9.02049435041481e-07, "loss": 0.0617, "step": 40497 }, { "epoch": 0.892385154825453, "grad_norm": 0.4388209283351898, "learning_rate": 9.016838289502794e-07, "loss": 0.0458, "step": 40498 }, { "epoch": 0.8924071901149692, "grad_norm": 0.4867664873600006, "learning_rate": 9.013182946693638e-07, "loss": 0.0599, "step": 40499 }, { "epoch": 0.8924292254044853, "grad_norm": 0.5273475050926208, "learning_rate": 9.009528322005945e-07, "loss": 0.068, "step": 40500 }, { "epoch": 0.8924512606940015, "grad_norm": 0.7242641448974609, "learning_rate": 9.005874415458315e-07, "loss": 0.063, "step": 40501 }, { "epoch": 0.8924732959835177, "grad_norm": 0.6423023343086243, "learning_rate": 9.002221227069385e-07, "loss": 0.0382, "step": 40502 }, { "epoch": 0.8924953312730337, "grad_norm": 0.4153206944465637, "learning_rate": 8.998568756857756e-07, "loss": 0.0348, "step": 40503 }, { "epoch": 0.8925173665625499, "grad_norm": 0.5401370525360107, "learning_rate": 8.994917004842013e-07, "loss": 0.0384, "step": 40504 }, { "epoch": 0.892539401852066, "grad_norm": 0.5026758313179016, "learning_rate": 8.991265971040791e-07, "loss": 0.0642, "step": 40505 }, { "epoch": 0.8925614371415822, "grad_norm": 0.6532633304595947, "learning_rate": 8.987615655472625e-07, "loss": 0.0456, "step": 40506 }, { "epoch": 0.8925834724310984, "grad_norm": 0.6756393909454346, "learning_rate": 8.983966058156184e-07, "loss": 0.0577, "step": 40507 }, { "epoch": 0.8926055077206145, "grad_norm": 0.6217902302742004, "learning_rate": 8.98031717911002e-07, "loss": 0.038, "step": 40508 }, { "epoch": 0.8926275430101307, "grad_norm": 0.5196256041526794, "learning_rate": 8.976669018352718e-07, "loss": 0.0536, "step": 40509 }, { "epoch": 0.8926495782996469, "grad_norm": 0.7111953496932983, "learning_rate": 8.973021575902862e-07, "loss": 0.0492, "step": 40510 }, { "epoch": 0.892671613589163, "grad_norm": 0.4116534888744354, "learning_rate": 8.969374851779022e-07, "loss": 0.053, "step": 40511 }, { "epoch": 0.8926936488786792, "grad_norm": 0.5883955955505371, "learning_rate": 8.965728845999815e-07, "loss": 0.0574, "step": 40512 }, { "epoch": 0.8927156841681954, "grad_norm": 0.5113368630409241, "learning_rate": 8.962083558583744e-07, "loss": 0.0397, "step": 40513 }, { "epoch": 0.8927377194577115, "grad_norm": 0.4719415605068207, "learning_rate": 8.958438989549411e-07, "loss": 0.0516, "step": 40514 }, { "epoch": 0.8927597547472277, "grad_norm": 0.6060146689414978, "learning_rate": 8.9547951389154e-07, "loss": 0.0565, "step": 40515 }, { "epoch": 0.8927817900367438, "grad_norm": 0.7457990050315857, "learning_rate": 8.951152006700248e-07, "loss": 0.0719, "step": 40516 }, { "epoch": 0.89280382532626, "grad_norm": 0.6795760989189148, "learning_rate": 8.947509592922521e-07, "loss": 0.0612, "step": 40517 }, { "epoch": 0.8928258606157762, "grad_norm": 1.1924289464950562, "learning_rate": 8.943867897600738e-07, "loss": 0.078, "step": 40518 }, { "epoch": 0.8928478959052923, "grad_norm": 0.3363058269023895, "learning_rate": 8.940226920753519e-07, "loss": 0.0431, "step": 40519 }, { "epoch": 0.8928699311948085, "grad_norm": 0.36779165267944336, "learning_rate": 8.936586662399332e-07, "loss": 0.0433, "step": 40520 }, { "epoch": 0.8928919664843247, "grad_norm": 0.28063058853149414, "learning_rate": 8.932947122556761e-07, "loss": 0.0626, "step": 40521 }, { "epoch": 0.8929140017738408, "grad_norm": 0.5500650405883789, "learning_rate": 8.929308301244326e-07, "loss": 0.0535, "step": 40522 }, { "epoch": 0.892936037063357, "grad_norm": 0.5293506979942322, "learning_rate": 8.925670198480612e-07, "loss": 0.0552, "step": 40523 }, { "epoch": 0.8929580723528732, "grad_norm": 0.6733638644218445, "learning_rate": 8.92203281428407e-07, "loss": 0.0709, "step": 40524 }, { "epoch": 0.8929801076423893, "grad_norm": 0.5866627097129822, "learning_rate": 8.918396148673286e-07, "loss": 0.0688, "step": 40525 }, { "epoch": 0.8930021429319055, "grad_norm": 0.5325837135314941, "learning_rate": 8.914760201666761e-07, "loss": 0.0688, "step": 40526 }, { "epoch": 0.8930241782214217, "grad_norm": 0.845366895198822, "learning_rate": 8.911124973283013e-07, "loss": 0.0777, "step": 40527 }, { "epoch": 0.8930462135109377, "grad_norm": 0.51921546459198, "learning_rate": 8.90749046354058e-07, "loss": 0.0594, "step": 40528 }, { "epoch": 0.8930682488004539, "grad_norm": 0.6700257658958435, "learning_rate": 8.903856672457927e-07, "loss": 0.0606, "step": 40529 }, { "epoch": 0.89309028408997, "grad_norm": 0.7669796943664551, "learning_rate": 8.900223600053625e-07, "loss": 0.0395, "step": 40530 }, { "epoch": 0.8931123193794862, "grad_norm": 0.3048626184463501, "learning_rate": 8.896591246346125e-07, "loss": 0.0453, "step": 40531 }, { "epoch": 0.8931343546690024, "grad_norm": 0.4230625331401825, "learning_rate": 8.892959611353979e-07, "loss": 0.0525, "step": 40532 }, { "epoch": 0.8931563899585185, "grad_norm": 0.6673117876052856, "learning_rate": 8.889328695095622e-07, "loss": 0.0488, "step": 40533 }, { "epoch": 0.8931784252480347, "grad_norm": 0.4438532590866089, "learning_rate": 8.885698497589606e-07, "loss": 0.0563, "step": 40534 }, { "epoch": 0.8932004605375509, "grad_norm": 0.4913763105869293, "learning_rate": 8.882069018854399e-07, "loss": 0.053, "step": 40535 }, { "epoch": 0.893222495827067, "grad_norm": 0.4292173385620117, "learning_rate": 8.878440258908488e-07, "loss": 0.0477, "step": 40536 }, { "epoch": 0.8932445311165832, "grad_norm": 0.7083340883255005, "learning_rate": 8.87481221777034e-07, "loss": 0.053, "step": 40537 }, { "epoch": 0.8932665664060994, "grad_norm": 0.4841914772987366, "learning_rate": 8.871184895458456e-07, "loss": 0.0549, "step": 40538 }, { "epoch": 0.8932886016956155, "grad_norm": 0.5375396609306335, "learning_rate": 8.867558291991324e-07, "loss": 0.0697, "step": 40539 }, { "epoch": 0.8933106369851317, "grad_norm": 0.5631681680679321, "learning_rate": 8.863932407387376e-07, "loss": 0.0527, "step": 40540 }, { "epoch": 0.8933326722746479, "grad_norm": 0.3211129605770111, "learning_rate": 8.860307241665117e-07, "loss": 0.0475, "step": 40541 }, { "epoch": 0.893354707564164, "grad_norm": 0.7288151979446411, "learning_rate": 8.856682794842996e-07, "loss": 0.0888, "step": 40542 }, { "epoch": 0.8933767428536802, "grad_norm": 0.6252390742301941, "learning_rate": 8.853059066939484e-07, "loss": 0.0553, "step": 40543 }, { "epoch": 0.8933987781431963, "grad_norm": 0.6891068816184998, "learning_rate": 8.849436057973015e-07, "loss": 0.0691, "step": 40544 }, { "epoch": 0.8934208134327125, "grad_norm": 0.5484171509742737, "learning_rate": 8.845813767962057e-07, "loss": 0.0632, "step": 40545 }, { "epoch": 0.8934428487222287, "grad_norm": 0.8704528212547302, "learning_rate": 8.842192196925081e-07, "loss": 0.0747, "step": 40546 }, { "epoch": 0.8934648840117448, "grad_norm": 0.4502962529659271, "learning_rate": 8.838571344880503e-07, "loss": 0.0658, "step": 40547 }, { "epoch": 0.893486919301261, "grad_norm": 0.433028906583786, "learning_rate": 8.834951211846776e-07, "loss": 0.0543, "step": 40548 }, { "epoch": 0.8935089545907772, "grad_norm": 0.3732738792896271, "learning_rate": 8.831331797842368e-07, "loss": 0.0374, "step": 40549 }, { "epoch": 0.8935309898802933, "grad_norm": 0.49392661452293396, "learning_rate": 8.827713102885648e-07, "loss": 0.0421, "step": 40550 }, { "epoch": 0.8935530251698095, "grad_norm": 0.7532873749732971, "learning_rate": 8.824095126995136e-07, "loss": 0.0621, "step": 40551 }, { "epoch": 0.8935750604593257, "grad_norm": 0.9770199656486511, "learning_rate": 8.820477870189148e-07, "loss": 0.0516, "step": 40552 }, { "epoch": 0.8935970957488417, "grad_norm": 0.5575651526451111, "learning_rate": 8.81686133248622e-07, "loss": 0.0616, "step": 40553 }, { "epoch": 0.8936191310383579, "grad_norm": 0.637239933013916, "learning_rate": 8.813245513904705e-07, "loss": 0.0539, "step": 40554 }, { "epoch": 0.893641166327874, "grad_norm": 0.5995641350746155, "learning_rate": 8.809630414463072e-07, "loss": 0.0439, "step": 40555 }, { "epoch": 0.8936632016173902, "grad_norm": 0.7888379096984863, "learning_rate": 8.80601603417967e-07, "loss": 0.0499, "step": 40556 }, { "epoch": 0.8936852369069064, "grad_norm": 1.0086381435394287, "learning_rate": 8.802402373072937e-07, "loss": 0.0647, "step": 40557 }, { "epoch": 0.8937072721964225, "grad_norm": 0.6499627828598022, "learning_rate": 8.798789431161324e-07, "loss": 0.0532, "step": 40558 }, { "epoch": 0.8937293074859387, "grad_norm": 0.8733798265457153, "learning_rate": 8.795177208463168e-07, "loss": 0.0848, "step": 40559 }, { "epoch": 0.8937513427754549, "grad_norm": 0.4210017919540405, "learning_rate": 8.791565704996884e-07, "loss": 0.059, "step": 40560 }, { "epoch": 0.893773378064971, "grad_norm": 0.25453272461891174, "learning_rate": 8.787954920780894e-07, "loss": 0.0578, "step": 40561 }, { "epoch": 0.8937954133544872, "grad_norm": 0.9270135760307312, "learning_rate": 8.784344855833581e-07, "loss": 0.0708, "step": 40562 }, { "epoch": 0.8938174486440034, "grad_norm": 0.7252129316329956, "learning_rate": 8.780735510173316e-07, "loss": 0.0703, "step": 40563 }, { "epoch": 0.8938394839335195, "grad_norm": 0.34438613057136536, "learning_rate": 8.777126883818481e-07, "loss": 0.0426, "step": 40564 }, { "epoch": 0.8938615192230357, "grad_norm": 0.2851445972919464, "learning_rate": 8.773518976787498e-07, "loss": 0.0851, "step": 40565 }, { "epoch": 0.8938835545125519, "grad_norm": 0.39110466837882996, "learning_rate": 8.769911789098684e-07, "loss": 0.0551, "step": 40566 }, { "epoch": 0.893905589802068, "grad_norm": 0.5824528336524963, "learning_rate": 8.76630532077044e-07, "loss": 0.0675, "step": 40567 }, { "epoch": 0.8939276250915842, "grad_norm": 0.4556012749671936, "learning_rate": 8.762699571821153e-07, "loss": 0.0724, "step": 40568 }, { "epoch": 0.8939496603811004, "grad_norm": 0.7123983502388, "learning_rate": 8.759094542269191e-07, "loss": 0.0636, "step": 40569 }, { "epoch": 0.8939716956706165, "grad_norm": 0.4315214157104492, "learning_rate": 8.755490232132873e-07, "loss": 0.0628, "step": 40570 }, { "epoch": 0.8939937309601327, "grad_norm": 0.8488679528236389, "learning_rate": 8.751886641430601e-07, "loss": 0.0989, "step": 40571 }, { "epoch": 0.8940157662496488, "grad_norm": 0.49604833126068115, "learning_rate": 8.748283770180709e-07, "loss": 0.05, "step": 40572 }, { "epoch": 0.894037801539165, "grad_norm": 0.3292478024959564, "learning_rate": 8.744681618401551e-07, "loss": 0.0447, "step": 40573 }, { "epoch": 0.8940598368286812, "grad_norm": 0.8197581171989441, "learning_rate": 8.741080186111494e-07, "loss": 0.0739, "step": 40574 }, { "epoch": 0.8940818721181973, "grad_norm": 0.595822811126709, "learning_rate": 8.737479473328824e-07, "loss": 0.0564, "step": 40575 }, { "epoch": 0.8941039074077135, "grad_norm": 0.48622560501098633, "learning_rate": 8.733879480071961e-07, "loss": 0.0547, "step": 40576 }, { "epoch": 0.8941259426972296, "grad_norm": 0.4386483132839203, "learning_rate": 8.730280206359187e-07, "loss": 0.0344, "step": 40577 }, { "epoch": 0.8941479779867457, "grad_norm": 0.3225800693035126, "learning_rate": 8.726681652208857e-07, "loss": 0.0466, "step": 40578 }, { "epoch": 0.8941700132762619, "grad_norm": 0.46093830466270447, "learning_rate": 8.723083817639288e-07, "loss": 0.0706, "step": 40579 }, { "epoch": 0.894192048565778, "grad_norm": 0.47841280698776245, "learning_rate": 8.7194867026688e-07, "loss": 0.0536, "step": 40580 }, { "epoch": 0.8942140838552942, "grad_norm": 0.6801332831382751, "learning_rate": 8.715890307315761e-07, "loss": 0.0493, "step": 40581 }, { "epoch": 0.8942361191448104, "grad_norm": 0.3283679187297821, "learning_rate": 8.712294631598422e-07, "loss": 0.0343, "step": 40582 }, { "epoch": 0.8942581544343265, "grad_norm": 0.9096767902374268, "learning_rate": 8.708699675535137e-07, "loss": 0.1098, "step": 40583 }, { "epoch": 0.8942801897238427, "grad_norm": 0.573698878288269, "learning_rate": 8.705105439144207e-07, "loss": 0.0539, "step": 40584 }, { "epoch": 0.8943022250133589, "grad_norm": 0.9109762907028198, "learning_rate": 8.701511922443967e-07, "loss": 0.0568, "step": 40585 }, { "epoch": 0.894324260302875, "grad_norm": 0.536475658416748, "learning_rate": 8.697919125452653e-07, "loss": 0.0396, "step": 40586 }, { "epoch": 0.8943462955923912, "grad_norm": 0.556524395942688, "learning_rate": 8.694327048188666e-07, "loss": 0.0522, "step": 40587 }, { "epoch": 0.8943683308819074, "grad_norm": 0.4552716016769409, "learning_rate": 8.69073569067021e-07, "loss": 0.0453, "step": 40588 }, { "epoch": 0.8943903661714235, "grad_norm": 0.23101554811000824, "learning_rate": 8.687145052915635e-07, "loss": 0.0783, "step": 40589 }, { "epoch": 0.8944124014609397, "grad_norm": 0.4078533947467804, "learning_rate": 8.683555134943194e-07, "loss": 0.0506, "step": 40590 }, { "epoch": 0.8944344367504559, "grad_norm": 0.4488108456134796, "learning_rate": 8.679965936771172e-07, "loss": 0.0566, "step": 40591 }, { "epoch": 0.894456472039972, "grad_norm": 0.969647228717804, "learning_rate": 8.676377458417905e-07, "loss": 0.0708, "step": 40592 }, { "epoch": 0.8944785073294882, "grad_norm": 0.40709030628204346, "learning_rate": 8.672789699901596e-07, "loss": 0.0389, "step": 40593 }, { "epoch": 0.8945005426190044, "grad_norm": 0.43366169929504395, "learning_rate": 8.669202661240561e-07, "loss": 0.0426, "step": 40594 }, { "epoch": 0.8945225779085205, "grad_norm": 0.7018899321556091, "learning_rate": 8.665616342453053e-07, "loss": 0.0801, "step": 40595 }, { "epoch": 0.8945446131980367, "grad_norm": 0.49764496088027954, "learning_rate": 8.662030743557392e-07, "loss": 0.0491, "step": 40596 }, { "epoch": 0.8945666484875529, "grad_norm": 0.5981922745704651, "learning_rate": 8.658445864571763e-07, "loss": 0.0452, "step": 40597 }, { "epoch": 0.894588683777069, "grad_norm": 0.8561241626739502, "learning_rate": 8.654861705514466e-07, "loss": 0.084, "step": 40598 }, { "epoch": 0.8946107190665852, "grad_norm": 0.6880499720573425, "learning_rate": 8.65127826640379e-07, "loss": 0.0441, "step": 40599 }, { "epoch": 0.8946327543561013, "grad_norm": 0.6181364059448242, "learning_rate": 8.6476955472579e-07, "loss": 0.0388, "step": 40600 }, { "epoch": 0.8946547896456175, "grad_norm": 0.8486590385437012, "learning_rate": 8.644113548095133e-07, "loss": 0.0637, "step": 40601 }, { "epoch": 0.8946768249351336, "grad_norm": 0.3767712116241455, "learning_rate": 8.640532268933659e-07, "loss": 0.0695, "step": 40602 }, { "epoch": 0.8946988602246497, "grad_norm": 0.5815805792808533, "learning_rate": 8.636951709791797e-07, "loss": 0.0562, "step": 40603 }, { "epoch": 0.8947208955141659, "grad_norm": 0.6150246262550354, "learning_rate": 8.633371870687729e-07, "loss": 0.0578, "step": 40604 }, { "epoch": 0.8947429308036821, "grad_norm": 0.19540083408355713, "learning_rate": 8.629792751639709e-07, "loss": 0.0503, "step": 40605 }, { "epoch": 0.8947649660931982, "grad_norm": 0.38462647795677185, "learning_rate": 8.626214352665956e-07, "loss": 0.041, "step": 40606 }, { "epoch": 0.8947870013827144, "grad_norm": 0.9068487882614136, "learning_rate": 8.622636673784706e-07, "loss": 0.0624, "step": 40607 }, { "epoch": 0.8948090366722306, "grad_norm": 0.7077671885490417, "learning_rate": 8.619059715014194e-07, "loss": 0.0521, "step": 40608 }, { "epoch": 0.8948310719617467, "grad_norm": 0.5815064311027527, "learning_rate": 8.615483476372571e-07, "loss": 0.06, "step": 40609 }, { "epoch": 0.8948531072512629, "grad_norm": 0.5841301083564758, "learning_rate": 8.611907957878174e-07, "loss": 0.049, "step": 40610 }, { "epoch": 0.894875142540779, "grad_norm": 0.22758391499519348, "learning_rate": 8.608333159549103e-07, "loss": 0.0255, "step": 40611 }, { "epoch": 0.8948971778302952, "grad_norm": 0.5225110650062561, "learning_rate": 8.604759081403646e-07, "loss": 0.0461, "step": 40612 }, { "epoch": 0.8949192131198114, "grad_norm": 0.6895120143890381, "learning_rate": 8.601185723459953e-07, "loss": 0.0494, "step": 40613 }, { "epoch": 0.8949412484093275, "grad_norm": 0.36835259199142456, "learning_rate": 8.597613085736244e-07, "loss": 0.0498, "step": 40614 }, { "epoch": 0.8949632836988437, "grad_norm": 0.5245707035064697, "learning_rate": 8.594041168250738e-07, "loss": 0.0427, "step": 40615 }, { "epoch": 0.8949853189883599, "grad_norm": 0.9750903248786926, "learning_rate": 8.590469971021586e-07, "loss": 0.0546, "step": 40616 }, { "epoch": 0.895007354277876, "grad_norm": 0.446992427110672, "learning_rate": 8.586899494067007e-07, "loss": 0.0414, "step": 40617 }, { "epoch": 0.8950293895673922, "grad_norm": 0.4980408549308777, "learning_rate": 8.583329737405171e-07, "loss": 0.0559, "step": 40618 }, { "epoch": 0.8950514248569084, "grad_norm": 0.7124349474906921, "learning_rate": 8.579760701054296e-07, "loss": 0.0486, "step": 40619 }, { "epoch": 0.8950734601464245, "grad_norm": 0.9216049313545227, "learning_rate": 8.576192385032517e-07, "loss": 0.0682, "step": 40620 }, { "epoch": 0.8950954954359407, "grad_norm": 0.6094744205474854, "learning_rate": 8.572624789358019e-07, "loss": 0.0569, "step": 40621 }, { "epoch": 0.8951175307254569, "grad_norm": 0.9328989386558533, "learning_rate": 8.569057914049005e-07, "loss": 0.0813, "step": 40622 }, { "epoch": 0.895139566014973, "grad_norm": 0.39137518405914307, "learning_rate": 8.565491759123612e-07, "loss": 0.0465, "step": 40623 }, { "epoch": 0.8951616013044892, "grad_norm": 0.6863608360290527, "learning_rate": 8.561926324600022e-07, "loss": 0.0737, "step": 40624 }, { "epoch": 0.8951836365940053, "grad_norm": 0.4695926904678345, "learning_rate": 8.55836161049634e-07, "loss": 0.0511, "step": 40625 }, { "epoch": 0.8952056718835215, "grad_norm": 0.3635428845882416, "learning_rate": 8.554797616830817e-07, "loss": 0.0646, "step": 40626 }, { "epoch": 0.8952277071730376, "grad_norm": 0.2891903519630432, "learning_rate": 8.551234343621539e-07, "loss": 0.0462, "step": 40627 }, { "epoch": 0.8952497424625537, "grad_norm": 0.4283227026462555, "learning_rate": 8.547671790886707e-07, "loss": 0.064, "step": 40628 }, { "epoch": 0.8952717777520699, "grad_norm": 0.45499187707901, "learning_rate": 8.544109958644392e-07, "loss": 0.0499, "step": 40629 }, { "epoch": 0.8952938130415861, "grad_norm": 0.3247271776199341, "learning_rate": 8.540548846912794e-07, "loss": 0.0698, "step": 40630 }, { "epoch": 0.8953158483311022, "grad_norm": 0.7700778841972351, "learning_rate": 8.536988455710065e-07, "loss": 0.0633, "step": 40631 }, { "epoch": 0.8953378836206184, "grad_norm": 0.5765864849090576, "learning_rate": 8.533428785054259e-07, "loss": 0.0563, "step": 40632 }, { "epoch": 0.8953599189101346, "grad_norm": 0.6868124604225159, "learning_rate": 8.529869834963594e-07, "loss": 0.0556, "step": 40633 }, { "epoch": 0.8953819541996507, "grad_norm": 0.7069836258888245, "learning_rate": 8.526311605456155e-07, "loss": 0.0892, "step": 40634 }, { "epoch": 0.8954039894891669, "grad_norm": 0.66366046667099, "learning_rate": 8.522754096550095e-07, "loss": 0.0597, "step": 40635 }, { "epoch": 0.895426024778683, "grad_norm": 0.5915088057518005, "learning_rate": 8.519197308263499e-07, "loss": 0.0616, "step": 40636 }, { "epoch": 0.8954480600681992, "grad_norm": 0.916945219039917, "learning_rate": 8.515641240614485e-07, "loss": 0.0895, "step": 40637 }, { "epoch": 0.8954700953577154, "grad_norm": 0.49559590220451355, "learning_rate": 8.512085893621208e-07, "loss": 0.0825, "step": 40638 }, { "epoch": 0.8954921306472315, "grad_norm": 0.5024163126945496, "learning_rate": 8.508531267301733e-07, "loss": 0.0589, "step": 40639 }, { "epoch": 0.8955141659367477, "grad_norm": 0.5240179300308228, "learning_rate": 8.504977361674182e-07, "loss": 0.0509, "step": 40640 }, { "epoch": 0.8955362012262639, "grad_norm": 0.21495187282562256, "learning_rate": 8.501424176756656e-07, "loss": 0.0499, "step": 40641 }, { "epoch": 0.89555823651578, "grad_norm": 0.6296984553337097, "learning_rate": 8.497871712567274e-07, "loss": 0.0823, "step": 40642 }, { "epoch": 0.8955802718052962, "grad_norm": 0.0946623831987381, "learning_rate": 8.494319969124087e-07, "loss": 0.0529, "step": 40643 }, { "epoch": 0.8956023070948124, "grad_norm": 0.5028354525566101, "learning_rate": 8.490768946445215e-07, "loss": 0.0881, "step": 40644 }, { "epoch": 0.8956243423843285, "grad_norm": 0.9014475345611572, "learning_rate": 8.487218644548744e-07, "loss": 0.081, "step": 40645 }, { "epoch": 0.8956463776738447, "grad_norm": 0.8175125122070312, "learning_rate": 8.48366906345276e-07, "loss": 0.0627, "step": 40646 }, { "epoch": 0.8956684129633609, "grad_norm": 0.7019486427307129, "learning_rate": 8.480120203175329e-07, "loss": 0.0565, "step": 40647 }, { "epoch": 0.895690448252877, "grad_norm": 0.6247421503067017, "learning_rate": 8.476572063734506e-07, "loss": 0.0707, "step": 40648 }, { "epoch": 0.8957124835423932, "grad_norm": 0.4623419940471649, "learning_rate": 8.473024645148442e-07, "loss": 0.0355, "step": 40649 }, { "epoch": 0.8957345188319094, "grad_norm": 0.7881325483322144, "learning_rate": 8.469477947435122e-07, "loss": 0.0536, "step": 40650 }, { "epoch": 0.8957565541214254, "grad_norm": 0.38394004106521606, "learning_rate": 8.465931970612684e-07, "loss": 0.0718, "step": 40651 }, { "epoch": 0.8957785894109416, "grad_norm": 0.5993161797523499, "learning_rate": 8.46238671469911e-07, "loss": 0.0543, "step": 40652 }, { "epoch": 0.8958006247004577, "grad_norm": 0.7933624386787415, "learning_rate": 8.458842179712523e-07, "loss": 0.0366, "step": 40653 }, { "epoch": 0.8958226599899739, "grad_norm": 0.4648593068122864, "learning_rate": 8.455298365670955e-07, "loss": 0.0697, "step": 40654 }, { "epoch": 0.8958446952794901, "grad_norm": 0.5857874751091003, "learning_rate": 8.451755272592409e-07, "loss": 0.0515, "step": 40655 }, { "epoch": 0.8958667305690062, "grad_norm": 0.42889946699142456, "learning_rate": 8.448212900495039e-07, "loss": 0.0457, "step": 40656 }, { "epoch": 0.8958887658585224, "grad_norm": 0.4008822441101074, "learning_rate": 8.444671249396796e-07, "loss": 0.0641, "step": 40657 }, { "epoch": 0.8959108011480386, "grad_norm": 0.6960236430168152, "learning_rate": 8.441130319315765e-07, "loss": 0.0533, "step": 40658 }, { "epoch": 0.8959328364375547, "grad_norm": 0.6672174334526062, "learning_rate": 8.437590110269949e-07, "loss": 0.0719, "step": 40659 }, { "epoch": 0.8959548717270709, "grad_norm": 0.6474840641021729, "learning_rate": 8.4340506222774e-07, "loss": 0.0654, "step": 40660 }, { "epoch": 0.895976907016587, "grad_norm": 0.8264411687850952, "learning_rate": 8.430511855356171e-07, "loss": 0.089, "step": 40661 }, { "epoch": 0.8959989423061032, "grad_norm": 0.7777050733566284, "learning_rate": 8.42697380952423e-07, "loss": 0.0552, "step": 40662 }, { "epoch": 0.8960209775956194, "grad_norm": 0.5802596807479858, "learning_rate": 8.42343648479963e-07, "loss": 0.0744, "step": 40663 }, { "epoch": 0.8960430128851355, "grad_norm": 0.769683301448822, "learning_rate": 8.419899881200388e-07, "loss": 0.0297, "step": 40664 }, { "epoch": 0.8960650481746517, "grad_norm": 0.8720152378082275, "learning_rate": 8.416363998744542e-07, "loss": 0.0559, "step": 40665 }, { "epoch": 0.8960870834641679, "grad_norm": 0.8072909712791443, "learning_rate": 8.41282883745006e-07, "loss": 0.0687, "step": 40666 }, { "epoch": 0.896109118753684, "grad_norm": 0.6277305483818054, "learning_rate": 8.40929439733496e-07, "loss": 0.0482, "step": 40667 }, { "epoch": 0.8961311540432002, "grad_norm": 0.8375217914581299, "learning_rate": 8.405760678417279e-07, "loss": 0.0696, "step": 40668 }, { "epoch": 0.8961531893327164, "grad_norm": 0.5890603065490723, "learning_rate": 8.402227680714952e-07, "loss": 0.0811, "step": 40669 }, { "epoch": 0.8961752246222325, "grad_norm": 0.7884078025817871, "learning_rate": 8.398695404246048e-07, "loss": 0.0536, "step": 40670 }, { "epoch": 0.8961972599117487, "grad_norm": 0.6519362330436707, "learning_rate": 8.39516384902847e-07, "loss": 0.0553, "step": 40671 }, { "epoch": 0.8962192952012649, "grad_norm": 0.3678010106086731, "learning_rate": 8.391633015080303e-07, "loss": 0.0575, "step": 40672 }, { "epoch": 0.896241330490781, "grad_norm": 0.6759974956512451, "learning_rate": 8.388102902419465e-07, "loss": 0.0302, "step": 40673 }, { "epoch": 0.8962633657802972, "grad_norm": 0.46484243869781494, "learning_rate": 8.384573511063942e-07, "loss": 0.0672, "step": 40674 }, { "epoch": 0.8962854010698134, "grad_norm": 0.45302391052246094, "learning_rate": 8.381044841031771e-07, "loss": 0.0557, "step": 40675 }, { "epoch": 0.8963074363593294, "grad_norm": 0.6165039539337158, "learning_rate": 8.377516892340836e-07, "loss": 0.0579, "step": 40676 }, { "epoch": 0.8963294716488456, "grad_norm": 0.3270898461341858, "learning_rate": 8.373989665009191e-07, "loss": 0.0404, "step": 40677 }, { "epoch": 0.8963515069383617, "grad_norm": 0.5483009815216064, "learning_rate": 8.370463159054719e-07, "loss": 0.0444, "step": 40678 }, { "epoch": 0.8963735422278779, "grad_norm": 0.2280607521533966, "learning_rate": 8.366937374495459e-07, "loss": 0.0493, "step": 40679 }, { "epoch": 0.8963955775173941, "grad_norm": 0.42351600527763367, "learning_rate": 8.363412311349311e-07, "loss": 0.0579, "step": 40680 }, { "epoch": 0.8964176128069102, "grad_norm": 0.61348557472229, "learning_rate": 8.359887969634295e-07, "loss": 0.0786, "step": 40681 }, { "epoch": 0.8964396480964264, "grad_norm": 0.5530741810798645, "learning_rate": 8.356364349368295e-07, "loss": 0.066, "step": 40682 }, { "epoch": 0.8964616833859426, "grad_norm": 0.48757556080818176, "learning_rate": 8.352841450569282e-07, "loss": 0.0473, "step": 40683 }, { "epoch": 0.8964837186754587, "grad_norm": 0.5352250933647156, "learning_rate": 8.349319273255224e-07, "loss": 0.0569, "step": 40684 }, { "epoch": 0.8965057539649749, "grad_norm": 0.77719646692276, "learning_rate": 8.345797817444023e-07, "loss": 0.0908, "step": 40685 }, { "epoch": 0.8965277892544911, "grad_norm": 0.8572896122932434, "learning_rate": 8.342277083153632e-07, "loss": 0.0433, "step": 40686 }, { "epoch": 0.8965498245440072, "grad_norm": 0.7804409265518188, "learning_rate": 8.338757070402003e-07, "loss": 0.0589, "step": 40687 }, { "epoch": 0.8965718598335234, "grad_norm": 0.5235251188278198, "learning_rate": 8.335237779207055e-07, "loss": 0.0749, "step": 40688 }, { "epoch": 0.8965938951230396, "grad_norm": 0.38899990916252136, "learning_rate": 8.331719209586692e-07, "loss": 0.091, "step": 40689 }, { "epoch": 0.8966159304125557, "grad_norm": 0.600475013256073, "learning_rate": 8.328201361558846e-07, "loss": 0.05, "step": 40690 }, { "epoch": 0.8966379657020719, "grad_norm": 0.16720382869243622, "learning_rate": 8.324684235141455e-07, "loss": 0.0457, "step": 40691 }, { "epoch": 0.896660000991588, "grad_norm": 0.611353874206543, "learning_rate": 8.321167830352422e-07, "loss": 0.106, "step": 40692 }, { "epoch": 0.8966820362811042, "grad_norm": 0.589448869228363, "learning_rate": 8.317652147209664e-07, "loss": 0.0639, "step": 40693 }, { "epoch": 0.8967040715706204, "grad_norm": 0.5695894956588745, "learning_rate": 8.314137185731035e-07, "loss": 0.0626, "step": 40694 }, { "epoch": 0.8967261068601365, "grad_norm": 0.466189980506897, "learning_rate": 8.31062294593452e-07, "loss": 0.0733, "step": 40695 }, { "epoch": 0.8967481421496527, "grad_norm": 0.45947566628456116, "learning_rate": 8.307109427837972e-07, "loss": 0.0494, "step": 40696 }, { "epoch": 0.8967701774391689, "grad_norm": 0.6007534861564636, "learning_rate": 8.303596631459293e-07, "loss": 0.0507, "step": 40697 }, { "epoch": 0.896792212728685, "grad_norm": 0.2551019787788391, "learning_rate": 8.300084556816384e-07, "loss": 0.0608, "step": 40698 }, { "epoch": 0.8968142480182012, "grad_norm": 0.6514926552772522, "learning_rate": 8.296573203927149e-07, "loss": 0.0596, "step": 40699 }, { "epoch": 0.8968362833077174, "grad_norm": 0.6463236808776855, "learning_rate": 8.29306257280944e-07, "loss": 0.0741, "step": 40700 }, { "epoch": 0.8968583185972334, "grad_norm": 0.5222799777984619, "learning_rate": 8.289552663481126e-07, "loss": 0.0602, "step": 40701 }, { "epoch": 0.8968803538867496, "grad_norm": 0.670574426651001, "learning_rate": 8.286043475960159e-07, "loss": 0.0652, "step": 40702 }, { "epoch": 0.8969023891762657, "grad_norm": 0.547774612903595, "learning_rate": 8.282535010264325e-07, "loss": 0.0508, "step": 40703 }, { "epoch": 0.8969244244657819, "grad_norm": 0.6043246984481812, "learning_rate": 8.279027266411577e-07, "loss": 0.0495, "step": 40704 }, { "epoch": 0.8969464597552981, "grad_norm": 0.3650645613670349, "learning_rate": 8.275520244419682e-07, "loss": 0.063, "step": 40705 }, { "epoch": 0.8969684950448142, "grad_norm": 0.6624302268028259, "learning_rate": 8.272013944306612e-07, "loss": 0.0584, "step": 40706 }, { "epoch": 0.8969905303343304, "grad_norm": 0.5837598443031311, "learning_rate": 8.268508366090149e-07, "loss": 0.0419, "step": 40707 }, { "epoch": 0.8970125656238466, "grad_norm": 0.5198169946670532, "learning_rate": 8.265003509788199e-07, "loss": 0.0551, "step": 40708 }, { "epoch": 0.8970346009133627, "grad_norm": 0.4525599479675293, "learning_rate": 8.26149937541858e-07, "loss": 0.0459, "step": 40709 }, { "epoch": 0.8970566362028789, "grad_norm": 0.40589597821235657, "learning_rate": 8.257995962999143e-07, "loss": 0.0704, "step": 40710 }, { "epoch": 0.8970786714923951, "grad_norm": 0.5369834303855896, "learning_rate": 8.254493272547758e-07, "loss": 0.0563, "step": 40711 }, { "epoch": 0.8971007067819112, "grad_norm": 0.7348031997680664, "learning_rate": 8.250991304082228e-07, "loss": 0.0559, "step": 40712 }, { "epoch": 0.8971227420714274, "grad_norm": 0.4045811593532562, "learning_rate": 8.247490057620405e-07, "loss": 0.0462, "step": 40713 }, { "epoch": 0.8971447773609436, "grad_norm": 0.7490021586418152, "learning_rate": 8.243989533180141e-07, "loss": 0.0646, "step": 40714 }, { "epoch": 0.8971668126504597, "grad_norm": 0.49905842542648315, "learning_rate": 8.240489730779272e-07, "loss": 0.0696, "step": 40715 }, { "epoch": 0.8971888479399759, "grad_norm": 0.598088800907135, "learning_rate": 8.236990650435583e-07, "loss": 0.0686, "step": 40716 }, { "epoch": 0.897210883229492, "grad_norm": 0.5456354022026062, "learning_rate": 8.233492292166911e-07, "loss": 0.0493, "step": 40717 }, { "epoch": 0.8972329185190082, "grad_norm": 0.4654265344142914, "learning_rate": 8.229994655991124e-07, "loss": 0.0598, "step": 40718 }, { "epoch": 0.8972549538085244, "grad_norm": 0.6615853309631348, "learning_rate": 8.226497741925959e-07, "loss": 0.0695, "step": 40719 }, { "epoch": 0.8972769890980405, "grad_norm": 0.513739824295044, "learning_rate": 8.223001549989284e-07, "loss": 0.0669, "step": 40720 }, { "epoch": 0.8972990243875567, "grad_norm": 0.5024479627609253, "learning_rate": 8.219506080198869e-07, "loss": 0.0756, "step": 40721 }, { "epoch": 0.8973210596770729, "grad_norm": 0.540738582611084, "learning_rate": 8.216011332572582e-07, "loss": 0.0366, "step": 40722 }, { "epoch": 0.897343094966589, "grad_norm": 0.8934340476989746, "learning_rate": 8.212517307128142e-07, "loss": 0.0678, "step": 40723 }, { "epoch": 0.8973651302561052, "grad_norm": 0.5341708064079285, "learning_rate": 8.209024003883386e-07, "loss": 0.0439, "step": 40724 }, { "epoch": 0.8973871655456213, "grad_norm": 0.5829967260360718, "learning_rate": 8.205531422856133e-07, "loss": 0.028, "step": 40725 }, { "epoch": 0.8974092008351374, "grad_norm": 0.6028907895088196, "learning_rate": 8.202039564064118e-07, "loss": 0.0584, "step": 40726 }, { "epoch": 0.8974312361246536, "grad_norm": 0.5311353206634521, "learning_rate": 8.19854842752516e-07, "loss": 0.0382, "step": 40727 }, { "epoch": 0.8974532714141698, "grad_norm": 0.7470443248748779, "learning_rate": 8.195058013257011e-07, "loss": 0.0658, "step": 40728 }, { "epoch": 0.8974753067036859, "grad_norm": 0.8091164827346802, "learning_rate": 8.191568321277509e-07, "loss": 0.0562, "step": 40729 }, { "epoch": 0.8974973419932021, "grad_norm": 0.22760620713233948, "learning_rate": 8.188079351604388e-07, "loss": 0.0376, "step": 40730 }, { "epoch": 0.8975193772827182, "grad_norm": 0.810642421245575, "learning_rate": 8.184591104255434e-07, "loss": 0.0434, "step": 40731 }, { "epoch": 0.8975414125722344, "grad_norm": 0.47099149227142334, "learning_rate": 8.181103579248384e-07, "loss": 0.0567, "step": 40732 }, { "epoch": 0.8975634478617506, "grad_norm": 0.4915846884250641, "learning_rate": 8.177616776601021e-07, "loss": 0.0656, "step": 40733 }, { "epoch": 0.8975854831512667, "grad_norm": 0.40774041414260864, "learning_rate": 8.174130696331117e-07, "loss": 0.0556, "step": 40734 }, { "epoch": 0.8976075184407829, "grad_norm": 0.42327237129211426, "learning_rate": 8.170645338456406e-07, "loss": 0.0904, "step": 40735 }, { "epoch": 0.8976295537302991, "grad_norm": 0.4958799481391907, "learning_rate": 8.167160702994659e-07, "loss": 0.0467, "step": 40736 }, { "epoch": 0.8976515890198152, "grad_norm": 0.737305223941803, "learning_rate": 8.163676789963625e-07, "loss": 0.05, "step": 40737 }, { "epoch": 0.8976736243093314, "grad_norm": 0.8819200992584229, "learning_rate": 8.160193599381061e-07, "loss": 0.0828, "step": 40738 }, { "epoch": 0.8976956595988476, "grad_norm": 0.6309894323348999, "learning_rate": 8.156711131264666e-07, "loss": 0.043, "step": 40739 }, { "epoch": 0.8977176948883637, "grad_norm": 0.3973095417022705, "learning_rate": 8.153229385632194e-07, "loss": 0.0516, "step": 40740 }, { "epoch": 0.8977397301778799, "grad_norm": 0.6030462980270386, "learning_rate": 8.149748362501413e-07, "loss": 0.0612, "step": 40741 }, { "epoch": 0.8977617654673961, "grad_norm": 0.8331339955329895, "learning_rate": 8.146268061890027e-07, "loss": 0.0759, "step": 40742 }, { "epoch": 0.8977838007569122, "grad_norm": 0.8140267729759216, "learning_rate": 8.142788483815755e-07, "loss": 0.0741, "step": 40743 }, { "epoch": 0.8978058360464284, "grad_norm": 0.5482480525970459, "learning_rate": 8.139309628296332e-07, "loss": 0.0628, "step": 40744 }, { "epoch": 0.8978278713359445, "grad_norm": 0.6871162056922913, "learning_rate": 8.135831495349494e-07, "loss": 0.0407, "step": 40745 }, { "epoch": 0.8978499066254607, "grad_norm": 0.9429118633270264, "learning_rate": 8.13235408499291e-07, "loss": 0.0989, "step": 40746 }, { "epoch": 0.8978719419149769, "grad_norm": 0.3902120292186737, "learning_rate": 8.128877397244333e-07, "loss": 0.0335, "step": 40747 }, { "epoch": 0.897893977204493, "grad_norm": 0.38930588960647583, "learning_rate": 8.125401432121482e-07, "loss": 0.0436, "step": 40748 }, { "epoch": 0.8979160124940092, "grad_norm": 0.3911765217781067, "learning_rate": 8.121926189642026e-07, "loss": 0.049, "step": 40749 }, { "epoch": 0.8979380477835253, "grad_norm": 0.4662109315395355, "learning_rate": 8.118451669823701e-07, "loss": 0.0574, "step": 40750 }, { "epoch": 0.8979600830730414, "grad_norm": 0.5988982319831848, "learning_rate": 8.114977872684126e-07, "loss": 0.0454, "step": 40751 }, { "epoch": 0.8979821183625576, "grad_norm": 0.5968878269195557, "learning_rate": 8.111504798241104e-07, "loss": 0.0601, "step": 40752 }, { "epoch": 0.8980041536520738, "grad_norm": 0.19813697040081024, "learning_rate": 8.10803244651227e-07, "loss": 0.0284, "step": 40753 }, { "epoch": 0.8980261889415899, "grad_norm": 0.7569100856781006, "learning_rate": 8.104560817515311e-07, "loss": 0.0858, "step": 40754 }, { "epoch": 0.8980482242311061, "grad_norm": 0.6617090702056885, "learning_rate": 8.101089911267912e-07, "loss": 0.0745, "step": 40755 }, { "epoch": 0.8980702595206222, "grad_norm": 0.45425719022750854, "learning_rate": 8.097619727787741e-07, "loss": 0.0447, "step": 40756 }, { "epoch": 0.8980922948101384, "grad_norm": 0.29656097292900085, "learning_rate": 8.09415026709252e-07, "loss": 0.0509, "step": 40757 }, { "epoch": 0.8981143300996546, "grad_norm": 0.5618170499801636, "learning_rate": 8.09068152919985e-07, "loss": 0.0431, "step": 40758 }, { "epoch": 0.8981363653891707, "grad_norm": 0.30278280377388, "learning_rate": 8.087213514127451e-07, "loss": 0.0559, "step": 40759 }, { "epoch": 0.8981584006786869, "grad_norm": 0.6628549098968506, "learning_rate": 8.083746221892974e-07, "loss": 0.0764, "step": 40760 }, { "epoch": 0.8981804359682031, "grad_norm": 0.4508671164512634, "learning_rate": 8.080279652514088e-07, "loss": 0.0629, "step": 40761 }, { "epoch": 0.8982024712577192, "grad_norm": 0.49744322896003723, "learning_rate": 8.076813806008432e-07, "loss": 0.0637, "step": 40762 }, { "epoch": 0.8982245065472354, "grad_norm": 0.5001713633537292, "learning_rate": 8.073348682393655e-07, "loss": 0.0375, "step": 40763 }, { "epoch": 0.8982465418367516, "grad_norm": 0.3587161898612976, "learning_rate": 8.069884281687461e-07, "loss": 0.0451, "step": 40764 }, { "epoch": 0.8982685771262677, "grad_norm": 0.464503675699234, "learning_rate": 8.06642060390742e-07, "loss": 0.0509, "step": 40765 }, { "epoch": 0.8982906124157839, "grad_norm": 0.528755784034729, "learning_rate": 8.062957649071218e-07, "loss": 0.0531, "step": 40766 }, { "epoch": 0.8983126477053001, "grad_norm": 0.671570360660553, "learning_rate": 8.059495417196471e-07, "loss": 0.0646, "step": 40767 }, { "epoch": 0.8983346829948162, "grad_norm": 0.4700498878955841, "learning_rate": 8.05603390830087e-07, "loss": 0.0454, "step": 40768 }, { "epoch": 0.8983567182843324, "grad_norm": 0.4807067811489105, "learning_rate": 8.052573122401963e-07, "loss": 0.0485, "step": 40769 }, { "epoch": 0.8983787535738486, "grad_norm": 0.4757445752620697, "learning_rate": 8.049113059517437e-07, "loss": 0.0368, "step": 40770 }, { "epoch": 0.8984007888633647, "grad_norm": 0.31774628162384033, "learning_rate": 8.045653719664913e-07, "loss": 0.0373, "step": 40771 }, { "epoch": 0.8984228241528809, "grad_norm": 0.46189308166503906, "learning_rate": 8.04219510286196e-07, "loss": 0.0535, "step": 40772 }, { "epoch": 0.898444859442397, "grad_norm": 0.7962791919708252, "learning_rate": 8.038737209126279e-07, "loss": 0.066, "step": 40773 }, { "epoch": 0.8984668947319132, "grad_norm": 0.5833404660224915, "learning_rate": 8.035280038475373e-07, "loss": 0.0508, "step": 40774 }, { "epoch": 0.8984889300214293, "grad_norm": 0.8284891247749329, "learning_rate": 8.031823590926962e-07, "loss": 0.1024, "step": 40775 }, { "epoch": 0.8985109653109454, "grad_norm": 1.0697993040084839, "learning_rate": 8.028367866498582e-07, "loss": 0.0691, "step": 40776 }, { "epoch": 0.8985330006004616, "grad_norm": 0.6524771451950073, "learning_rate": 8.024912865207885e-07, "loss": 0.0619, "step": 40777 }, { "epoch": 0.8985550358899778, "grad_norm": 0.635398268699646, "learning_rate": 8.021458587072406e-07, "loss": 0.0469, "step": 40778 }, { "epoch": 0.8985770711794939, "grad_norm": 0.6785695552825928, "learning_rate": 8.018005032109782e-07, "loss": 0.0789, "step": 40779 }, { "epoch": 0.8985991064690101, "grad_norm": 0.5353991985321045, "learning_rate": 8.014552200337599e-07, "loss": 0.0715, "step": 40780 }, { "epoch": 0.8986211417585263, "grad_norm": 0.8190686702728271, "learning_rate": 8.011100091773444e-07, "loss": 0.054, "step": 40781 }, { "epoch": 0.8986431770480424, "grad_norm": 0.5405558347702026, "learning_rate": 8.007648706434884e-07, "loss": 0.0563, "step": 40782 }, { "epoch": 0.8986652123375586, "grad_norm": 0.6060584783554077, "learning_rate": 8.004198044339506e-07, "loss": 0.0605, "step": 40783 }, { "epoch": 0.8986872476270747, "grad_norm": 0.2671452760696411, "learning_rate": 8.000748105504929e-07, "loss": 0.0377, "step": 40784 }, { "epoch": 0.8987092829165909, "grad_norm": 0.46838095784187317, "learning_rate": 7.997298889948657e-07, "loss": 0.069, "step": 40785 }, { "epoch": 0.8987313182061071, "grad_norm": 0.6271266937255859, "learning_rate": 7.99385039768829e-07, "loss": 0.0766, "step": 40786 }, { "epoch": 0.8987533534956232, "grad_norm": 0.40822815895080566, "learning_rate": 7.9904026287414e-07, "loss": 0.0651, "step": 40787 }, { "epoch": 0.8987753887851394, "grad_norm": 1.164623737335205, "learning_rate": 7.986955583125538e-07, "loss": 0.0782, "step": 40788 }, { "epoch": 0.8987974240746556, "grad_norm": 0.7298611402511597, "learning_rate": 7.983509260858257e-07, "loss": 0.0624, "step": 40789 }, { "epoch": 0.8988194593641717, "grad_norm": 0.44241219758987427, "learning_rate": 7.980063661957126e-07, "loss": 0.0647, "step": 40790 }, { "epoch": 0.8988414946536879, "grad_norm": 0.8883689641952515, "learning_rate": 7.976618786439699e-07, "loss": 0.0662, "step": 40791 }, { "epoch": 0.8988635299432041, "grad_norm": 0.6269840598106384, "learning_rate": 7.97317463432351e-07, "loss": 0.0681, "step": 40792 }, { "epoch": 0.8988855652327202, "grad_norm": 0.5855679512023926, "learning_rate": 7.969731205626079e-07, "loss": 0.0636, "step": 40793 }, { "epoch": 0.8989076005222364, "grad_norm": 0.6810000538825989, "learning_rate": 7.966288500365009e-07, "loss": 0.0635, "step": 40794 }, { "epoch": 0.8989296358117526, "grad_norm": 0.2826575040817261, "learning_rate": 7.96284651855777e-07, "loss": 0.0433, "step": 40795 }, { "epoch": 0.8989516711012687, "grad_norm": 0.7025699615478516, "learning_rate": 7.95940526022193e-07, "loss": 0.0515, "step": 40796 }, { "epoch": 0.8989737063907849, "grad_norm": 0.3670128285884857, "learning_rate": 7.955964725374975e-07, "loss": 0.0733, "step": 40797 }, { "epoch": 0.898995741680301, "grad_norm": 0.6541225910186768, "learning_rate": 7.952524914034509e-07, "loss": 0.0332, "step": 40798 }, { "epoch": 0.8990177769698172, "grad_norm": 0.4657687246799469, "learning_rate": 7.949085826217966e-07, "loss": 0.0759, "step": 40799 }, { "epoch": 0.8990398122593333, "grad_norm": 0.6347038149833679, "learning_rate": 7.945647461942935e-07, "loss": 0.0668, "step": 40800 }, { "epoch": 0.8990618475488494, "grad_norm": 0.8442801237106323, "learning_rate": 7.942209821226864e-07, "loss": 0.0497, "step": 40801 }, { "epoch": 0.8990838828383656, "grad_norm": 0.445364385843277, "learning_rate": 7.938772904087327e-07, "loss": 0.0362, "step": 40802 }, { "epoch": 0.8991059181278818, "grad_norm": 0.592121422290802, "learning_rate": 7.935336710541807e-07, "loss": 0.0574, "step": 40803 }, { "epoch": 0.8991279534173979, "grad_norm": 0.6565823554992676, "learning_rate": 7.931901240607758e-07, "loss": 0.0725, "step": 40804 }, { "epoch": 0.8991499887069141, "grad_norm": 0.6809330582618713, "learning_rate": 7.928466494302733e-07, "loss": 0.0709, "step": 40805 }, { "epoch": 0.8991720239964303, "grad_norm": 0.9081587791442871, "learning_rate": 7.925032471644217e-07, "loss": 0.0567, "step": 40806 }, { "epoch": 0.8991940592859464, "grad_norm": 0.5356836915016174, "learning_rate": 7.921599172649713e-07, "loss": 0.0542, "step": 40807 }, { "epoch": 0.8992160945754626, "grad_norm": 0.5620399713516235, "learning_rate": 7.918166597336657e-07, "loss": 0.0644, "step": 40808 }, { "epoch": 0.8992381298649788, "grad_norm": 0.4511605501174927, "learning_rate": 7.914734745722602e-07, "loss": 0.0487, "step": 40809 }, { "epoch": 0.8992601651544949, "grad_norm": 0.17236925661563873, "learning_rate": 7.911303617824983e-07, "loss": 0.0269, "step": 40810 }, { "epoch": 0.8992822004440111, "grad_norm": 0.6721319556236267, "learning_rate": 7.907873213661304e-07, "loss": 0.06, "step": 40811 }, { "epoch": 0.8993042357335272, "grad_norm": 0.7778006792068481, "learning_rate": 7.904443533249001e-07, "loss": 0.0652, "step": 40812 }, { "epoch": 0.8993262710230434, "grad_norm": 0.9507145285606384, "learning_rate": 7.901014576605558e-07, "loss": 0.0767, "step": 40813 }, { "epoch": 0.8993483063125596, "grad_norm": 0.4918815791606903, "learning_rate": 7.897586343748481e-07, "loss": 0.0574, "step": 40814 }, { "epoch": 0.8993703416020757, "grad_norm": 0.16545817255973816, "learning_rate": 7.89415883469517e-07, "loss": 0.0456, "step": 40815 }, { "epoch": 0.8993923768915919, "grad_norm": 0.9976992607116699, "learning_rate": 7.890732049463112e-07, "loss": 0.0615, "step": 40816 }, { "epoch": 0.8994144121811081, "grad_norm": 1.0128560066223145, "learning_rate": 7.887305988069759e-07, "loss": 0.092, "step": 40817 }, { "epoch": 0.8994364474706242, "grad_norm": 0.6194179058074951, "learning_rate": 7.883880650532599e-07, "loss": 0.0727, "step": 40818 }, { "epoch": 0.8994584827601404, "grad_norm": 0.5678518414497375, "learning_rate": 7.880456036868999e-07, "loss": 0.0649, "step": 40819 }, { "epoch": 0.8994805180496566, "grad_norm": 0.8224127292633057, "learning_rate": 7.877032147096463e-07, "loss": 0.072, "step": 40820 }, { "epoch": 0.8995025533391727, "grad_norm": 0.4515085816383362, "learning_rate": 7.873608981232428e-07, "loss": 0.0661, "step": 40821 }, { "epoch": 0.8995245886286889, "grad_norm": 0.6589831709861755, "learning_rate": 7.870186539294294e-07, "loss": 0.0545, "step": 40822 }, { "epoch": 0.8995466239182051, "grad_norm": 1.0136066675186157, "learning_rate": 7.866764821299549e-07, "loss": 0.0649, "step": 40823 }, { "epoch": 0.8995686592077211, "grad_norm": 0.5983794331550598, "learning_rate": 7.863343827265546e-07, "loss": 0.0512, "step": 40824 }, { "epoch": 0.8995906944972373, "grad_norm": 0.42607343196868896, "learning_rate": 7.859923557209786e-07, "loss": 0.0569, "step": 40825 }, { "epoch": 0.8996127297867534, "grad_norm": 0.1466132402420044, "learning_rate": 7.85650401114964e-07, "loss": 0.039, "step": 40826 }, { "epoch": 0.8996347650762696, "grad_norm": 0.34505072236061096, "learning_rate": 7.85308518910256e-07, "loss": 0.0424, "step": 40827 }, { "epoch": 0.8996568003657858, "grad_norm": 0.4503195285797119, "learning_rate": 7.849667091085916e-07, "loss": 0.0505, "step": 40828 }, { "epoch": 0.8996788356553019, "grad_norm": 0.7618197202682495, "learning_rate": 7.84624971711716e-07, "loss": 0.0749, "step": 40829 }, { "epoch": 0.8997008709448181, "grad_norm": 0.7419021725654602, "learning_rate": 7.842833067213695e-07, "loss": 0.0446, "step": 40830 }, { "epoch": 0.8997229062343343, "grad_norm": 0.30504363775253296, "learning_rate": 7.839417141392874e-07, "loss": 0.0433, "step": 40831 }, { "epoch": 0.8997449415238504, "grad_norm": 0.8197515606880188, "learning_rate": 7.836001939672166e-07, "loss": 0.0609, "step": 40832 }, { "epoch": 0.8997669768133666, "grad_norm": 0.9233116507530212, "learning_rate": 7.832587462068924e-07, "loss": 0.0719, "step": 40833 }, { "epoch": 0.8997890121028828, "grad_norm": 0.5221589803695679, "learning_rate": 7.829173708600568e-07, "loss": 0.0426, "step": 40834 }, { "epoch": 0.8998110473923989, "grad_norm": 0.4723815321922302, "learning_rate": 7.825760679284449e-07, "loss": 0.0543, "step": 40835 }, { "epoch": 0.8998330826819151, "grad_norm": 0.7154502868652344, "learning_rate": 7.822348374137956e-07, "loss": 0.0554, "step": 40836 }, { "epoch": 0.8998551179714313, "grad_norm": 0.42220544815063477, "learning_rate": 7.818936793178522e-07, "loss": 0.0545, "step": 40837 }, { "epoch": 0.8998771532609474, "grad_norm": 0.4378337860107422, "learning_rate": 7.815525936423467e-07, "loss": 0.0619, "step": 40838 }, { "epoch": 0.8998991885504636, "grad_norm": 0.6835461854934692, "learning_rate": 7.81211580389018e-07, "loss": 0.065, "step": 40839 }, { "epoch": 0.8999212238399797, "grad_norm": 0.432892382144928, "learning_rate": 7.808706395596027e-07, "loss": 0.0642, "step": 40840 }, { "epoch": 0.8999432591294959, "grad_norm": 0.5974481105804443, "learning_rate": 7.805297711558412e-07, "loss": 0.0553, "step": 40841 }, { "epoch": 0.8999652944190121, "grad_norm": 0.5139012336730957, "learning_rate": 7.801889751794638e-07, "loss": 0.0706, "step": 40842 }, { "epoch": 0.8999873297085282, "grad_norm": 0.8029728531837463, "learning_rate": 7.798482516322092e-07, "loss": 0.0452, "step": 40843 }, { "epoch": 0.9000093649980444, "grad_norm": 0.6823333501815796, "learning_rate": 7.795076005158158e-07, "loss": 0.0673, "step": 40844 }, { "epoch": 0.9000314002875606, "grad_norm": 0.23116546869277954, "learning_rate": 7.791670218320124e-07, "loss": 0.0731, "step": 40845 }, { "epoch": 0.9000534355770767, "grad_norm": 0.4683573842048645, "learning_rate": 7.788265155825408e-07, "loss": 0.0727, "step": 40846 }, { "epoch": 0.9000754708665929, "grad_norm": 0.30648332834243774, "learning_rate": 7.784860817691264e-07, "loss": 0.0386, "step": 40847 }, { "epoch": 0.9000975061561091, "grad_norm": 0.5289652943611145, "learning_rate": 7.781457203935144e-07, "loss": 0.0834, "step": 40848 }, { "epoch": 0.9001195414456251, "grad_norm": 0.6785809397697449, "learning_rate": 7.778054314574284e-07, "loss": 0.0554, "step": 40849 }, { "epoch": 0.9001415767351413, "grad_norm": 0.38008370995521545, "learning_rate": 7.774652149626088e-07, "loss": 0.0347, "step": 40850 }, { "epoch": 0.9001636120246574, "grad_norm": 0.5972163081169128, "learning_rate": 7.771250709107841e-07, "loss": 0.0635, "step": 40851 }, { "epoch": 0.9001856473141736, "grad_norm": 0.5084574222564697, "learning_rate": 7.767849993036879e-07, "loss": 0.0498, "step": 40852 }, { "epoch": 0.9002076826036898, "grad_norm": 0.48152053356170654, "learning_rate": 7.764450001430557e-07, "loss": 0.0389, "step": 40853 }, { "epoch": 0.9002297178932059, "grad_norm": 0.24129165709018707, "learning_rate": 7.761050734306107e-07, "loss": 0.0365, "step": 40854 }, { "epoch": 0.9002517531827221, "grad_norm": 0.49869048595428467, "learning_rate": 7.757652191680953e-07, "loss": 0.0596, "step": 40855 }, { "epoch": 0.9002737884722383, "grad_norm": 0.40113383531570435, "learning_rate": 7.754254373572328e-07, "loss": 0.0478, "step": 40856 }, { "epoch": 0.9002958237617544, "grad_norm": 0.8077455163002014, "learning_rate": 7.750857279997586e-07, "loss": 0.0617, "step": 40857 }, { "epoch": 0.9003178590512706, "grad_norm": 0.4449658691883087, "learning_rate": 7.747460910973981e-07, "loss": 0.0423, "step": 40858 }, { "epoch": 0.9003398943407868, "grad_norm": 0.6616484522819519, "learning_rate": 7.744065266518846e-07, "loss": 0.0819, "step": 40859 }, { "epoch": 0.9003619296303029, "grad_norm": 0.5388533473014832, "learning_rate": 7.740670346649487e-07, "loss": 0.0573, "step": 40860 }, { "epoch": 0.9003839649198191, "grad_norm": 0.7018915414810181, "learning_rate": 7.737276151383155e-07, "loss": 0.0598, "step": 40861 }, { "epoch": 0.9004060002093353, "grad_norm": 0.5000811815261841, "learning_rate": 7.733882680737169e-07, "loss": 0.0577, "step": 40862 }, { "epoch": 0.9004280354988514, "grad_norm": 0.5646853446960449, "learning_rate": 7.730489934728802e-07, "loss": 0.0534, "step": 40863 }, { "epoch": 0.9004500707883676, "grad_norm": 0.4672441780567169, "learning_rate": 7.72709791337537e-07, "loss": 0.0482, "step": 40864 }, { "epoch": 0.9004721060778837, "grad_norm": 0.5858240127563477, "learning_rate": 7.723706616694093e-07, "loss": 0.0594, "step": 40865 }, { "epoch": 0.9004941413673999, "grad_norm": 0.526324987411499, "learning_rate": 7.720316044702291e-07, "loss": 0.0695, "step": 40866 }, { "epoch": 0.9005161766569161, "grad_norm": 0.5169084668159485, "learning_rate": 7.716926197417217e-07, "loss": 0.0503, "step": 40867 }, { "epoch": 0.9005382119464322, "grad_norm": 0.5808753371238708, "learning_rate": 7.713537074856125e-07, "loss": 0.0778, "step": 40868 }, { "epoch": 0.9005602472359484, "grad_norm": 0.7448748350143433, "learning_rate": 7.710148677036299e-07, "loss": 0.068, "step": 40869 }, { "epoch": 0.9005822825254646, "grad_norm": 0.749830961227417, "learning_rate": 7.706761003974944e-07, "loss": 0.0509, "step": 40870 }, { "epoch": 0.9006043178149807, "grad_norm": 0.5929139852523804, "learning_rate": 7.70337405568941e-07, "loss": 0.0841, "step": 40871 }, { "epoch": 0.9006263531044969, "grad_norm": 0.4066498279571533, "learning_rate": 7.699987832196887e-07, "loss": 0.0461, "step": 40872 }, { "epoch": 0.9006483883940131, "grad_norm": 0.46399274468421936, "learning_rate": 7.696602333514641e-07, "loss": 0.0379, "step": 40873 }, { "epoch": 0.9006704236835291, "grad_norm": 0.409808486700058, "learning_rate": 7.693217559659893e-07, "loss": 0.0431, "step": 40874 }, { "epoch": 0.9006924589730453, "grad_norm": 0.3181808888912201, "learning_rate": 7.68983351064988e-07, "loss": 0.0434, "step": 40875 }, { "epoch": 0.9007144942625614, "grad_norm": 0.6033471822738647, "learning_rate": 7.686450186501903e-07, "loss": 0.0463, "step": 40876 }, { "epoch": 0.9007365295520776, "grad_norm": 0.609671950340271, "learning_rate": 7.6830675872331e-07, "loss": 0.077, "step": 40877 }, { "epoch": 0.9007585648415938, "grad_norm": 1.0683963298797607, "learning_rate": 7.67968571286079e-07, "loss": 0.08, "step": 40878 }, { "epoch": 0.9007806001311099, "grad_norm": 0.5119858980178833, "learning_rate": 7.676304563402142e-07, "loss": 0.0374, "step": 40879 }, { "epoch": 0.9008026354206261, "grad_norm": 1.0788462162017822, "learning_rate": 7.67292413887441e-07, "loss": 0.0611, "step": 40880 }, { "epoch": 0.9008246707101423, "grad_norm": 0.5699794888496399, "learning_rate": 7.669544439294779e-07, "loss": 0.0582, "step": 40881 }, { "epoch": 0.9008467059996584, "grad_norm": 0.3531484305858612, "learning_rate": 7.666165464680469e-07, "loss": 0.0529, "step": 40882 }, { "epoch": 0.9008687412891746, "grad_norm": 0.4670238792896271, "learning_rate": 7.66278721504875e-07, "loss": 0.0522, "step": 40883 }, { "epoch": 0.9008907765786908, "grad_norm": 0.3800147771835327, "learning_rate": 7.659409690416741e-07, "loss": 0.0721, "step": 40884 }, { "epoch": 0.9009128118682069, "grad_norm": 0.7241026163101196, "learning_rate": 7.656032890801695e-07, "loss": 0.0612, "step": 40885 }, { "epoch": 0.9009348471577231, "grad_norm": 0.5822217464447021, "learning_rate": 7.652656816220799e-07, "loss": 0.0348, "step": 40886 }, { "epoch": 0.9009568824472393, "grad_norm": 0.3788871169090271, "learning_rate": 7.649281466691288e-07, "loss": 0.0875, "step": 40887 }, { "epoch": 0.9009789177367554, "grad_norm": 0.48982352018356323, "learning_rate": 7.6459068422303e-07, "loss": 0.058, "step": 40888 }, { "epoch": 0.9010009530262716, "grad_norm": 0.6967909932136536, "learning_rate": 7.642532942855035e-07, "loss": 0.0476, "step": 40889 }, { "epoch": 0.9010229883157878, "grad_norm": 0.5311026573181152, "learning_rate": 7.639159768582715e-07, "loss": 0.0603, "step": 40890 }, { "epoch": 0.9010450236053039, "grad_norm": 0.49572885036468506, "learning_rate": 7.635787319430459e-07, "loss": 0.0555, "step": 40891 }, { "epoch": 0.9010670588948201, "grad_norm": 0.825739860534668, "learning_rate": 7.63241559541552e-07, "loss": 0.0726, "step": 40892 }, { "epoch": 0.9010890941843362, "grad_norm": 0.5380788445472717, "learning_rate": 7.629044596554985e-07, "loss": 0.0757, "step": 40893 }, { "epoch": 0.9011111294738524, "grad_norm": 0.3361254632472992, "learning_rate": 7.625674322866106e-07, "loss": 0.0403, "step": 40894 }, { "epoch": 0.9011331647633686, "grad_norm": 0.6475170850753784, "learning_rate": 7.622304774366002e-07, "loss": 0.0637, "step": 40895 }, { "epoch": 0.9011552000528847, "grad_norm": 0.777044415473938, "learning_rate": 7.618935951071859e-07, "loss": 0.0476, "step": 40896 }, { "epoch": 0.9011772353424009, "grad_norm": 0.5323143601417542, "learning_rate": 7.615567853000815e-07, "loss": 0.053, "step": 40897 }, { "epoch": 0.901199270631917, "grad_norm": 0.5252022743225098, "learning_rate": 7.61220048017004e-07, "loss": 0.0554, "step": 40898 }, { "epoch": 0.9012213059214331, "grad_norm": 0.7765496969223022, "learning_rate": 7.608833832596684e-07, "loss": 0.065, "step": 40899 }, { "epoch": 0.9012433412109493, "grad_norm": 0.5500044226646423, "learning_rate": 7.605467910297869e-07, "loss": 0.0485, "step": 40900 }, { "epoch": 0.9012653765004655, "grad_norm": 0.5019104480743408, "learning_rate": 7.602102713290799e-07, "loss": 0.0579, "step": 40901 }, { "epoch": 0.9012874117899816, "grad_norm": 0.7828665971755981, "learning_rate": 7.598738241592574e-07, "loss": 0.0623, "step": 40902 }, { "epoch": 0.9013094470794978, "grad_norm": 0.5932464003562927, "learning_rate": 7.595374495220331e-07, "loss": 0.054, "step": 40903 }, { "epoch": 0.901331482369014, "grad_norm": 0.8238381743431091, "learning_rate": 7.592011474191207e-07, "loss": 0.0632, "step": 40904 }, { "epoch": 0.9013535176585301, "grad_norm": 0.501194953918457, "learning_rate": 7.588649178522322e-07, "loss": 0.0536, "step": 40905 }, { "epoch": 0.9013755529480463, "grad_norm": 0.5943055748939514, "learning_rate": 7.585287608230845e-07, "loss": 0.0379, "step": 40906 }, { "epoch": 0.9013975882375624, "grad_norm": 0.4532982110977173, "learning_rate": 7.581926763333847e-07, "loss": 0.077, "step": 40907 }, { "epoch": 0.9014196235270786, "grad_norm": 0.9437491297721863, "learning_rate": 7.578566643848461e-07, "loss": 0.076, "step": 40908 }, { "epoch": 0.9014416588165948, "grad_norm": 0.6256609559059143, "learning_rate": 7.57520724979181e-07, "loss": 0.0472, "step": 40909 }, { "epoch": 0.9014636941061109, "grad_norm": 0.50254887342453, "learning_rate": 7.571848581181012e-07, "loss": 0.0583, "step": 40910 }, { "epoch": 0.9014857293956271, "grad_norm": 0.5423140525817871, "learning_rate": 7.568490638033155e-07, "loss": 0.0319, "step": 40911 }, { "epoch": 0.9015077646851433, "grad_norm": 0.6470816731452942, "learning_rate": 7.565133420365356e-07, "loss": 0.0628, "step": 40912 }, { "epoch": 0.9015297999746594, "grad_norm": 0.40917035937309265, "learning_rate": 7.561776928194719e-07, "loss": 0.0875, "step": 40913 }, { "epoch": 0.9015518352641756, "grad_norm": 0.5812205076217651, "learning_rate": 7.558421161538332e-07, "loss": 0.0425, "step": 40914 }, { "epoch": 0.9015738705536918, "grad_norm": 0.5509210228919983, "learning_rate": 7.555066120413312e-07, "loss": 0.0434, "step": 40915 }, { "epoch": 0.9015959058432079, "grad_norm": 0.6641420722007751, "learning_rate": 7.551711804836665e-07, "loss": 0.0404, "step": 40916 }, { "epoch": 0.9016179411327241, "grad_norm": 0.7653916478157043, "learning_rate": 7.54835821482559e-07, "loss": 0.0659, "step": 40917 }, { "epoch": 0.9016399764222403, "grad_norm": 0.35910341143608093, "learning_rate": 7.545005350397094e-07, "loss": 0.048, "step": 40918 }, { "epoch": 0.9016620117117564, "grad_norm": 0.5924205780029297, "learning_rate": 7.541653211568261e-07, "loss": 0.0807, "step": 40919 }, { "epoch": 0.9016840470012726, "grad_norm": 0.7631568908691406, "learning_rate": 7.538301798356195e-07, "loss": 0.0556, "step": 40920 }, { "epoch": 0.9017060822907887, "grad_norm": 0.5449965000152588, "learning_rate": 7.534951110777966e-07, "loss": 0.0711, "step": 40921 }, { "epoch": 0.9017281175803049, "grad_norm": 0.3610876500606537, "learning_rate": 7.531601148850609e-07, "loss": 0.034, "step": 40922 }, { "epoch": 0.901750152869821, "grad_norm": 0.7019131779670715, "learning_rate": 7.528251912591211e-07, "loss": 0.0728, "step": 40923 }, { "epoch": 0.9017721881593371, "grad_norm": 0.7819012999534607, "learning_rate": 7.524903402016842e-07, "loss": 0.0661, "step": 40924 }, { "epoch": 0.9017942234488533, "grad_norm": 0.4072102904319763, "learning_rate": 7.521555617144521e-07, "loss": 0.0636, "step": 40925 }, { "epoch": 0.9018162587383695, "grad_norm": 0.4479454457759857, "learning_rate": 7.518208557991335e-07, "loss": 0.0717, "step": 40926 }, { "epoch": 0.9018382940278856, "grad_norm": 0.4418596625328064, "learning_rate": 7.514862224574287e-07, "loss": 0.0441, "step": 40927 }, { "epoch": 0.9018603293174018, "grad_norm": 0.6768081188201904, "learning_rate": 7.51151661691048e-07, "loss": 0.0844, "step": 40928 }, { "epoch": 0.901882364606918, "grad_norm": 0.6973114013671875, "learning_rate": 7.508171735016917e-07, "loss": 0.0605, "step": 40929 }, { "epoch": 0.9019043998964341, "grad_norm": 0.587178111076355, "learning_rate": 7.504827578910667e-07, "loss": 0.0459, "step": 40930 }, { "epoch": 0.9019264351859503, "grad_norm": 0.7452039122581482, "learning_rate": 7.501484148608717e-07, "loss": 0.0719, "step": 40931 }, { "epoch": 0.9019484704754664, "grad_norm": 0.517175018787384, "learning_rate": 7.498141444128121e-07, "loss": 0.0604, "step": 40932 }, { "epoch": 0.9019705057649826, "grad_norm": 0.704712986946106, "learning_rate": 7.494799465485929e-07, "loss": 0.0627, "step": 40933 }, { "epoch": 0.9019925410544988, "grad_norm": 0.4800303876399994, "learning_rate": 7.491458212699115e-07, "loss": 0.0686, "step": 40934 }, { "epoch": 0.9020145763440149, "grad_norm": 0.4399438500404358, "learning_rate": 7.488117685784728e-07, "loss": 0.0616, "step": 40935 }, { "epoch": 0.9020366116335311, "grad_norm": 0.8403156399726868, "learning_rate": 7.484777884759775e-07, "loss": 0.0589, "step": 40936 }, { "epoch": 0.9020586469230473, "grad_norm": 0.38732704520225525, "learning_rate": 7.481438809641305e-07, "loss": 0.0626, "step": 40937 }, { "epoch": 0.9020806822125634, "grad_norm": 0.48265203833580017, "learning_rate": 7.478100460446258e-07, "loss": 0.0751, "step": 40938 }, { "epoch": 0.9021027175020796, "grad_norm": 0.48130831122398376, "learning_rate": 7.474762837191667e-07, "loss": 0.0605, "step": 40939 }, { "epoch": 0.9021247527915958, "grad_norm": 0.3125644326210022, "learning_rate": 7.471425939894571e-07, "loss": 0.0528, "step": 40940 }, { "epoch": 0.9021467880811119, "grad_norm": 0.4022246301174164, "learning_rate": 7.468089768571906e-07, "loss": 0.0411, "step": 40941 }, { "epoch": 0.9021688233706281, "grad_norm": 0.7562380433082581, "learning_rate": 7.464754323240691e-07, "loss": 0.0455, "step": 40942 }, { "epoch": 0.9021908586601443, "grad_norm": 0.748936653137207, "learning_rate": 7.461419603917913e-07, "loss": 0.0427, "step": 40943 }, { "epoch": 0.9022128939496604, "grad_norm": 0.5667014718055725, "learning_rate": 7.458085610620591e-07, "loss": 0.0435, "step": 40944 }, { "epoch": 0.9022349292391766, "grad_norm": 0.4077293574810028, "learning_rate": 7.454752343365645e-07, "loss": 0.0545, "step": 40945 }, { "epoch": 0.9022569645286928, "grad_norm": 0.431090384721756, "learning_rate": 7.451419802170095e-07, "loss": 0.048, "step": 40946 }, { "epoch": 0.9022789998182089, "grad_norm": 0.7572010159492493, "learning_rate": 7.448087987050911e-07, "loss": 0.0511, "step": 40947 }, { "epoch": 0.902301035107725, "grad_norm": 0.5090966820716858, "learning_rate": 7.444756898025046e-07, "loss": 0.0845, "step": 40948 }, { "epoch": 0.9023230703972411, "grad_norm": 0.46963396668434143, "learning_rate": 7.441426535109502e-07, "loss": 0.0691, "step": 40949 }, { "epoch": 0.9023451056867573, "grad_norm": 0.4286404848098755, "learning_rate": 7.438096898321168e-07, "loss": 0.0271, "step": 40950 }, { "epoch": 0.9023671409762735, "grad_norm": 0.6057199239730835, "learning_rate": 7.434767987677093e-07, "loss": 0.0389, "step": 40951 }, { "epoch": 0.9023891762657896, "grad_norm": 0.22126556932926178, "learning_rate": 7.431439803194185e-07, "loss": 0.0332, "step": 40952 }, { "epoch": 0.9024112115553058, "grad_norm": 0.5970455408096313, "learning_rate": 7.428112344889426e-07, "loss": 0.0493, "step": 40953 }, { "epoch": 0.902433246844822, "grad_norm": 0.7330735325813293, "learning_rate": 7.424785612779722e-07, "loss": 0.0666, "step": 40954 }, { "epoch": 0.9024552821343381, "grad_norm": 0.60341477394104, "learning_rate": 7.421459606882025e-07, "loss": 0.0484, "step": 40955 }, { "epoch": 0.9024773174238543, "grad_norm": 0.6353824138641357, "learning_rate": 7.418134327213322e-07, "loss": 0.0566, "step": 40956 }, { "epoch": 0.9024993527133705, "grad_norm": 0.49973028898239136, "learning_rate": 7.414809773790498e-07, "loss": 0.0573, "step": 40957 }, { "epoch": 0.9025213880028866, "grad_norm": 0.7954757213592529, "learning_rate": 7.411485946630508e-07, "loss": 0.0674, "step": 40958 }, { "epoch": 0.9025434232924028, "grad_norm": 0.22361622750759125, "learning_rate": 7.408162845750288e-07, "loss": 0.0469, "step": 40959 }, { "epoch": 0.902565458581919, "grad_norm": 0.5270561575889587, "learning_rate": 7.404840471166774e-07, "loss": 0.0546, "step": 40960 }, { "epoch": 0.9025874938714351, "grad_norm": 0.6617404222488403, "learning_rate": 7.401518822896852e-07, "loss": 0.0795, "step": 40961 }, { "epoch": 0.9026095291609513, "grad_norm": 0.4507431089878082, "learning_rate": 7.39819790095746e-07, "loss": 0.0738, "step": 40962 }, { "epoch": 0.9026315644504674, "grad_norm": 0.783116340637207, "learning_rate": 7.394877705365549e-07, "loss": 0.0621, "step": 40963 }, { "epoch": 0.9026535997399836, "grad_norm": 0.9630681276321411, "learning_rate": 7.391558236137958e-07, "loss": 0.0692, "step": 40964 }, { "epoch": 0.9026756350294998, "grad_norm": 0.42681705951690674, "learning_rate": 7.388239493291654e-07, "loss": 0.0258, "step": 40965 }, { "epoch": 0.9026976703190159, "grad_norm": 0.6906366944313049, "learning_rate": 7.384921476843493e-07, "loss": 0.069, "step": 40966 }, { "epoch": 0.9027197056085321, "grad_norm": 0.5616395473480225, "learning_rate": 7.381604186810442e-07, "loss": 0.0502, "step": 40967 }, { "epoch": 0.9027417408980483, "grad_norm": 0.45168250799179077, "learning_rate": 7.37828762320934e-07, "loss": 0.0383, "step": 40968 }, { "epoch": 0.9027637761875644, "grad_norm": 0.6565889716148376, "learning_rate": 7.374971786057089e-07, "loss": 0.0684, "step": 40969 }, { "epoch": 0.9027858114770806, "grad_norm": 0.4073326587677002, "learning_rate": 7.371656675370625e-07, "loss": 0.0424, "step": 40970 }, { "epoch": 0.9028078467665968, "grad_norm": 0.48688289523124695, "learning_rate": 7.368342291166768e-07, "loss": 0.045, "step": 40971 }, { "epoch": 0.9028298820561129, "grad_norm": 1.0072766542434692, "learning_rate": 7.36502863346244e-07, "loss": 0.0752, "step": 40972 }, { "epoch": 0.902851917345629, "grad_norm": 0.7990519404411316, "learning_rate": 7.361715702274474e-07, "loss": 0.0881, "step": 40973 }, { "epoch": 0.9028739526351451, "grad_norm": 0.4702088236808777, "learning_rate": 7.358403497619825e-07, "loss": 0.0657, "step": 40974 }, { "epoch": 0.9028959879246613, "grad_norm": 0.5162016153335571, "learning_rate": 7.35509201951528e-07, "loss": 0.0564, "step": 40975 }, { "epoch": 0.9029180232141775, "grad_norm": 0.7634730339050293, "learning_rate": 7.351781267977775e-07, "loss": 0.0826, "step": 40976 }, { "epoch": 0.9029400585036936, "grad_norm": 0.4734092950820923, "learning_rate": 7.34847124302413e-07, "loss": 0.0368, "step": 40977 }, { "epoch": 0.9029620937932098, "grad_norm": 0.3708478808403015, "learning_rate": 7.345161944671214e-07, "loss": 0.0925, "step": 40978 }, { "epoch": 0.902984129082726, "grad_norm": 0.5658046007156372, "learning_rate": 7.341853372935897e-07, "loss": 0.0621, "step": 40979 }, { "epoch": 0.9030061643722421, "grad_norm": 0.516176164150238, "learning_rate": 7.338545527835e-07, "loss": 0.0653, "step": 40980 }, { "epoch": 0.9030281996617583, "grad_norm": 0.7170766592025757, "learning_rate": 7.335238409385392e-07, "loss": 0.0682, "step": 40981 }, { "epoch": 0.9030502349512745, "grad_norm": 0.3907663822174072, "learning_rate": 7.331932017603926e-07, "loss": 0.0312, "step": 40982 }, { "epoch": 0.9030722702407906, "grad_norm": 0.7339919805526733, "learning_rate": 7.328626352507456e-07, "loss": 0.064, "step": 40983 }, { "epoch": 0.9030943055303068, "grad_norm": 0.35624876618385315, "learning_rate": 7.325321414112767e-07, "loss": 0.0255, "step": 40984 }, { "epoch": 0.903116340819823, "grad_norm": 0.7354521751403809, "learning_rate": 7.322017202436731e-07, "loss": 0.0769, "step": 40985 }, { "epoch": 0.9031383761093391, "grad_norm": 0.9117373824119568, "learning_rate": 7.318713717496201e-07, "loss": 0.0792, "step": 40986 }, { "epoch": 0.9031604113988553, "grad_norm": 0.5435613393783569, "learning_rate": 7.315410959307944e-07, "loss": 0.035, "step": 40987 }, { "epoch": 0.9031824466883714, "grad_norm": 0.5705593824386597, "learning_rate": 7.3121089278888e-07, "loss": 0.0674, "step": 40988 }, { "epoch": 0.9032044819778876, "grad_norm": 0.8611945509910583, "learning_rate": 7.30880762325562e-07, "loss": 0.0492, "step": 40989 }, { "epoch": 0.9032265172674038, "grad_norm": 0.724360466003418, "learning_rate": 7.305507045425208e-07, "loss": 0.075, "step": 40990 }, { "epoch": 0.9032485525569199, "grad_norm": 0.7074700593948364, "learning_rate": 7.302207194414351e-07, "loss": 0.0878, "step": 40991 }, { "epoch": 0.9032705878464361, "grad_norm": 0.7637224793434143, "learning_rate": 7.298908070239885e-07, "loss": 0.0685, "step": 40992 }, { "epoch": 0.9032926231359523, "grad_norm": 1.1124930381774902, "learning_rate": 7.295609672918612e-07, "loss": 0.0838, "step": 40993 }, { "epoch": 0.9033146584254684, "grad_norm": 0.4753939211368561, "learning_rate": 7.292312002467289e-07, "loss": 0.0521, "step": 40994 }, { "epoch": 0.9033366937149846, "grad_norm": 0.6757237911224365, "learning_rate": 7.289015058902782e-07, "loss": 0.0507, "step": 40995 }, { "epoch": 0.9033587290045008, "grad_norm": 0.3766099512577057, "learning_rate": 7.285718842241812e-07, "loss": 0.0577, "step": 40996 }, { "epoch": 0.9033807642940168, "grad_norm": 0.6802752614021301, "learning_rate": 7.282423352501249e-07, "loss": 0.0503, "step": 40997 }, { "epoch": 0.903402799583533, "grad_norm": 0.6226682066917419, "learning_rate": 7.279128589697798e-07, "loss": 0.0553, "step": 40998 }, { "epoch": 0.9034248348730491, "grad_norm": 0.4363618791103363, "learning_rate": 7.275834553848309e-07, "loss": 0.0461, "step": 40999 }, { "epoch": 0.9034468701625653, "grad_norm": 0.6572767496109009, "learning_rate": 7.272541244969505e-07, "loss": 0.0479, "step": 41000 }, { "epoch": 0.9034689054520815, "grad_norm": 0.6551676392555237, "learning_rate": 7.269248663078187e-07, "loss": 0.0389, "step": 41001 }, { "epoch": 0.9034909407415976, "grad_norm": 0.42005372047424316, "learning_rate": 7.26595680819116e-07, "loss": 0.0468, "step": 41002 }, { "epoch": 0.9035129760311138, "grad_norm": 0.40837037563323975, "learning_rate": 7.262665680325109e-07, "loss": 0.0542, "step": 41003 }, { "epoch": 0.90353501132063, "grad_norm": 0.3823428750038147, "learning_rate": 7.259375279496855e-07, "loss": 0.0443, "step": 41004 }, { "epoch": 0.9035570466101461, "grad_norm": 0.5427545309066772, "learning_rate": 7.256085605723151e-07, "loss": 0.0764, "step": 41005 }, { "epoch": 0.9035790818996623, "grad_norm": 0.5365109443664551, "learning_rate": 7.252796659020766e-07, "loss": 0.0532, "step": 41006 }, { "epoch": 0.9036011171891785, "grad_norm": 0.534963071346283, "learning_rate": 7.249508439406405e-07, "loss": 0.0561, "step": 41007 }, { "epoch": 0.9036231524786946, "grad_norm": 0.8590648770332336, "learning_rate": 7.246220946896854e-07, "loss": 0.077, "step": 41008 }, { "epoch": 0.9036451877682108, "grad_norm": 0.3019438683986664, "learning_rate": 7.242934181508865e-07, "loss": 0.0567, "step": 41009 }, { "epoch": 0.903667223057727, "grad_norm": 0.4322936534881592, "learning_rate": 7.239648143259159e-07, "loss": 0.053, "step": 41010 }, { "epoch": 0.9036892583472431, "grad_norm": 0.6647979617118835, "learning_rate": 7.236362832164456e-07, "loss": 0.0601, "step": 41011 }, { "epoch": 0.9037112936367593, "grad_norm": 0.5661143064498901, "learning_rate": 7.233078248241543e-07, "loss": 0.0717, "step": 41012 }, { "epoch": 0.9037333289262754, "grad_norm": 0.6570031046867371, "learning_rate": 7.229794391507122e-07, "loss": 0.0916, "step": 41013 }, { "epoch": 0.9037553642157916, "grad_norm": 0.6238263845443726, "learning_rate": 7.226511261977897e-07, "loss": 0.0434, "step": 41014 }, { "epoch": 0.9037773995053078, "grad_norm": 0.4378677010536194, "learning_rate": 7.223228859670605e-07, "loss": 0.0667, "step": 41015 }, { "epoch": 0.9037994347948239, "grad_norm": 0.5184228420257568, "learning_rate": 7.219947184601999e-07, "loss": 0.0576, "step": 41016 }, { "epoch": 0.9038214700843401, "grad_norm": 0.508499264717102, "learning_rate": 7.216666236788749e-07, "loss": 0.0582, "step": 41017 }, { "epoch": 0.9038435053738563, "grad_norm": 0.9957579374313354, "learning_rate": 7.213386016247608e-07, "loss": 0.0741, "step": 41018 }, { "epoch": 0.9038655406633724, "grad_norm": 0.44544658064842224, "learning_rate": 7.210106522995214e-07, "loss": 0.033, "step": 41019 }, { "epoch": 0.9038875759528886, "grad_norm": 0.5431433916091919, "learning_rate": 7.206827757048351e-07, "loss": 0.0575, "step": 41020 }, { "epoch": 0.9039096112424048, "grad_norm": 0.5622338652610779, "learning_rate": 7.203549718423674e-07, "loss": 0.0406, "step": 41021 }, { "epoch": 0.9039316465319208, "grad_norm": 0.4360109567642212, "learning_rate": 7.20027240713792e-07, "loss": 0.0432, "step": 41022 }, { "epoch": 0.903953681821437, "grad_norm": 0.5817738175392151, "learning_rate": 7.196995823207708e-07, "loss": 0.0589, "step": 41023 }, { "epoch": 0.9039757171109531, "grad_norm": 0.7978906035423279, "learning_rate": 7.193719966649809e-07, "loss": 0.0688, "step": 41024 }, { "epoch": 0.9039977524004693, "grad_norm": 0.6605454683303833, "learning_rate": 7.190444837480876e-07, "loss": 0.0657, "step": 41025 }, { "epoch": 0.9040197876899855, "grad_norm": 0.7402865886688232, "learning_rate": 7.18717043571756e-07, "loss": 0.0724, "step": 41026 }, { "epoch": 0.9040418229795016, "grad_norm": 0.6882727742195129, "learning_rate": 7.183896761376585e-07, "loss": 0.0777, "step": 41027 }, { "epoch": 0.9040638582690178, "grad_norm": 0.5627460479736328, "learning_rate": 7.180623814474585e-07, "loss": 0.0532, "step": 41028 }, { "epoch": 0.904085893558534, "grad_norm": 0.6973477602005005, "learning_rate": 7.177351595028298e-07, "loss": 0.0824, "step": 41029 }, { "epoch": 0.9041079288480501, "grad_norm": 0.7393231391906738, "learning_rate": 7.174080103054309e-07, "loss": 0.0708, "step": 41030 }, { "epoch": 0.9041299641375663, "grad_norm": 0.41884762048721313, "learning_rate": 7.170809338569323e-07, "loss": 0.0517, "step": 41031 }, { "epoch": 0.9041519994270825, "grad_norm": 0.6926143169403076, "learning_rate": 7.167539301589992e-07, "loss": 0.0564, "step": 41032 }, { "epoch": 0.9041740347165986, "grad_norm": 0.36042988300323486, "learning_rate": 7.164269992133005e-07, "loss": 0.0319, "step": 41033 }, { "epoch": 0.9041960700061148, "grad_norm": 0.6231669187545776, "learning_rate": 7.161001410214962e-07, "loss": 0.0617, "step": 41034 }, { "epoch": 0.904218105295631, "grad_norm": 0.6565308570861816, "learning_rate": 7.157733555852536e-07, "loss": 0.0725, "step": 41035 }, { "epoch": 0.9042401405851471, "grad_norm": 0.6559463739395142, "learning_rate": 7.154466429062396e-07, "loss": 0.057, "step": 41036 }, { "epoch": 0.9042621758746633, "grad_norm": 0.448293536901474, "learning_rate": 7.151200029861127e-07, "loss": 0.0415, "step": 41037 }, { "epoch": 0.9042842111641795, "grad_norm": 0.9336470365524292, "learning_rate": 7.147934358265418e-07, "loss": 0.0905, "step": 41038 }, { "epoch": 0.9043062464536956, "grad_norm": 0.721817672252655, "learning_rate": 7.144669414291855e-07, "loss": 0.063, "step": 41039 }, { "epoch": 0.9043282817432118, "grad_norm": 0.6870113611221313, "learning_rate": 7.141405197957141e-07, "loss": 0.0572, "step": 41040 }, { "epoch": 0.904350317032728, "grad_norm": 0.928003191947937, "learning_rate": 7.138141709277829e-07, "loss": 0.072, "step": 41041 }, { "epoch": 0.9043723523222441, "grad_norm": 0.45864570140838623, "learning_rate": 7.134878948270556e-07, "loss": 0.0738, "step": 41042 }, { "epoch": 0.9043943876117603, "grad_norm": 0.4939996600151062, "learning_rate": 7.131616914951993e-07, "loss": 0.0554, "step": 41043 }, { "epoch": 0.9044164229012764, "grad_norm": 0.6604880094528198, "learning_rate": 7.128355609338694e-07, "loss": 0.0839, "step": 41044 }, { "epoch": 0.9044384581907926, "grad_norm": 0.799281120300293, "learning_rate": 7.125095031447293e-07, "loss": 0.0653, "step": 41045 }, { "epoch": 0.9044604934803088, "grad_norm": 0.7610433101654053, "learning_rate": 7.121835181294378e-07, "loss": 0.0556, "step": 41046 }, { "epoch": 0.9044825287698248, "grad_norm": 0.5495494604110718, "learning_rate": 7.11857605889662e-07, "loss": 0.0474, "step": 41047 }, { "epoch": 0.904504564059341, "grad_norm": 0.5958801507949829, "learning_rate": 7.115317664270537e-07, "loss": 0.066, "step": 41048 }, { "epoch": 0.9045265993488572, "grad_norm": 0.48201826214790344, "learning_rate": 7.112059997432785e-07, "loss": 0.0359, "step": 41049 }, { "epoch": 0.9045486346383733, "grad_norm": 0.5903002023696899, "learning_rate": 7.108803058399932e-07, "loss": 0.0496, "step": 41050 }, { "epoch": 0.9045706699278895, "grad_norm": 0.7223563194274902, "learning_rate": 7.105546847188549e-07, "loss": 0.0622, "step": 41051 }, { "epoch": 0.9045927052174056, "grad_norm": 0.42284148931503296, "learning_rate": 7.102291363815272e-07, "loss": 0.0513, "step": 41052 }, { "epoch": 0.9046147405069218, "grad_norm": 0.340091735124588, "learning_rate": 7.099036608296605e-07, "loss": 0.0524, "step": 41053 }, { "epoch": 0.904636775796438, "grad_norm": 0.5786680579185486, "learning_rate": 7.095782580649219e-07, "loss": 0.064, "step": 41054 }, { "epoch": 0.9046588110859541, "grad_norm": 0.34521180391311646, "learning_rate": 7.092529280889631e-07, "loss": 0.0463, "step": 41055 }, { "epoch": 0.9046808463754703, "grad_norm": 0.6945970058441162, "learning_rate": 7.089276709034431e-07, "loss": 0.083, "step": 41056 }, { "epoch": 0.9047028816649865, "grad_norm": 0.512704074382782, "learning_rate": 7.086024865100172e-07, "loss": 0.0703, "step": 41057 }, { "epoch": 0.9047249169545026, "grad_norm": 0.7977209687232971, "learning_rate": 7.082773749103406e-07, "loss": 0.0504, "step": 41058 }, { "epoch": 0.9047469522440188, "grad_norm": 0.6553464531898499, "learning_rate": 7.079523361060752e-07, "loss": 0.0733, "step": 41059 }, { "epoch": 0.904768987533535, "grad_norm": 0.5471223592758179, "learning_rate": 7.0762737009887e-07, "loss": 0.0676, "step": 41060 }, { "epoch": 0.9047910228230511, "grad_norm": 0.5528028607368469, "learning_rate": 7.073024768903818e-07, "loss": 0.046, "step": 41061 }, { "epoch": 0.9048130581125673, "grad_norm": 0.8628400564193726, "learning_rate": 7.069776564822678e-07, "loss": 0.0723, "step": 41062 }, { "epoch": 0.9048350934020835, "grad_norm": 0.3786916136741638, "learning_rate": 7.066529088761814e-07, "loss": 0.0596, "step": 41063 }, { "epoch": 0.9048571286915996, "grad_norm": 0.44765180349349976, "learning_rate": 7.063282340737764e-07, "loss": 0.0651, "step": 41064 }, { "epoch": 0.9048791639811158, "grad_norm": 0.7440064549446106, "learning_rate": 7.060036320767049e-07, "loss": 0.0557, "step": 41065 }, { "epoch": 0.904901199270632, "grad_norm": 1.1439480781555176, "learning_rate": 7.056791028866255e-07, "loss": 0.0595, "step": 41066 }, { "epoch": 0.9049232345601481, "grad_norm": 0.8506394624710083, "learning_rate": 7.053546465051835e-07, "loss": 0.0799, "step": 41067 }, { "epoch": 0.9049452698496643, "grad_norm": 0.6804932355880737, "learning_rate": 7.050302629340393e-07, "loss": 0.0585, "step": 41068 }, { "epoch": 0.9049673051391804, "grad_norm": 0.22801651060581207, "learning_rate": 7.047059521748384e-07, "loss": 0.0694, "step": 41069 }, { "epoch": 0.9049893404286966, "grad_norm": 0.5814152956008911, "learning_rate": 7.043817142292374e-07, "loss": 0.0738, "step": 41070 }, { "epoch": 0.9050113757182127, "grad_norm": 0.5454685688018799, "learning_rate": 7.040575490988871e-07, "loss": 0.057, "step": 41071 }, { "epoch": 0.9050334110077288, "grad_norm": 0.7498335242271423, "learning_rate": 7.037334567854375e-07, "loss": 0.0778, "step": 41072 }, { "epoch": 0.905055446297245, "grad_norm": 0.30407291650772095, "learning_rate": 7.034094372905392e-07, "loss": 0.0289, "step": 41073 }, { "epoch": 0.9050774815867612, "grad_norm": 0.6126869916915894, "learning_rate": 7.030854906158424e-07, "loss": 0.0679, "step": 41074 }, { "epoch": 0.9050995168762773, "grad_norm": 0.5546298027038574, "learning_rate": 7.027616167629991e-07, "loss": 0.0707, "step": 41075 }, { "epoch": 0.9051215521657935, "grad_norm": 0.6546347737312317, "learning_rate": 7.024378157336547e-07, "loss": 0.057, "step": 41076 }, { "epoch": 0.9051435874553097, "grad_norm": 0.46874985098838806, "learning_rate": 7.021140875294646e-07, "loss": 0.0597, "step": 41077 }, { "epoch": 0.9051656227448258, "grad_norm": 0.43655017018318176, "learning_rate": 7.017904321520724e-07, "loss": 0.0333, "step": 41078 }, { "epoch": 0.905187658034342, "grad_norm": 0.7415719032287598, "learning_rate": 7.014668496031318e-07, "loss": 0.0573, "step": 41079 }, { "epoch": 0.9052096933238581, "grad_norm": 0.38664862513542175, "learning_rate": 7.011433398842864e-07, "loss": 0.0419, "step": 41080 }, { "epoch": 0.9052317286133743, "grad_norm": 0.4233431816101074, "learning_rate": 7.008199029971851e-07, "loss": 0.0514, "step": 41081 }, { "epoch": 0.9052537639028905, "grad_norm": 0.6152915358543396, "learning_rate": 7.004965389434764e-07, "loss": 0.0512, "step": 41082 }, { "epoch": 0.9052757991924066, "grad_norm": 0.6462274193763733, "learning_rate": 7.001732477248074e-07, "loss": 0.0766, "step": 41083 }, { "epoch": 0.9052978344819228, "grad_norm": 0.9022625684738159, "learning_rate": 6.998500293428217e-07, "loss": 0.0576, "step": 41084 }, { "epoch": 0.905319869771439, "grad_norm": 0.4660114347934723, "learning_rate": 6.995268837991697e-07, "loss": 0.0772, "step": 41085 }, { "epoch": 0.9053419050609551, "grad_norm": 0.6324649453163147, "learning_rate": 6.992038110954985e-07, "loss": 0.0507, "step": 41086 }, { "epoch": 0.9053639403504713, "grad_norm": 0.4140923023223877, "learning_rate": 6.988808112334466e-07, "loss": 0.0427, "step": 41087 }, { "epoch": 0.9053859756399875, "grad_norm": 0.25737136602401733, "learning_rate": 6.985578842146662e-07, "loss": 0.0435, "step": 41088 }, { "epoch": 0.9054080109295036, "grad_norm": 0.43774735927581787, "learning_rate": 6.982350300407991e-07, "loss": 0.0566, "step": 41089 }, { "epoch": 0.9054300462190198, "grad_norm": 0.6802552342414856, "learning_rate": 6.979122487134909e-07, "loss": 0.0672, "step": 41090 }, { "epoch": 0.905452081508536, "grad_norm": 0.5946816802024841, "learning_rate": 6.975895402343852e-07, "loss": 0.0754, "step": 41091 }, { "epoch": 0.9054741167980521, "grad_norm": 0.5710075497627258, "learning_rate": 6.972669046051222e-07, "loss": 0.0373, "step": 41092 }, { "epoch": 0.9054961520875683, "grad_norm": 0.42820408940315247, "learning_rate": 6.969443418273508e-07, "loss": 0.0489, "step": 41093 }, { "epoch": 0.9055181873770844, "grad_norm": 0.5688280463218689, "learning_rate": 6.966218519027112e-07, "loss": 0.0506, "step": 41094 }, { "epoch": 0.9055402226666006, "grad_norm": 0.4876216650009155, "learning_rate": 6.962994348328488e-07, "loss": 0.0766, "step": 41095 }, { "epoch": 0.9055622579561167, "grad_norm": 0.9215225577354431, "learning_rate": 6.959770906194024e-07, "loss": 0.0899, "step": 41096 }, { "epoch": 0.9055842932456328, "grad_norm": 0.6192151308059692, "learning_rate": 6.956548192640139e-07, "loss": 0.0571, "step": 41097 }, { "epoch": 0.905606328535149, "grad_norm": 0.5700811147689819, "learning_rate": 6.953326207683285e-07, "loss": 0.0816, "step": 41098 }, { "epoch": 0.9056283638246652, "grad_norm": 0.4520500600337982, "learning_rate": 6.950104951339803e-07, "loss": 0.0442, "step": 41099 }, { "epoch": 0.9056503991141813, "grad_norm": 0.4179568588733673, "learning_rate": 6.946884423626194e-07, "loss": 0.0728, "step": 41100 }, { "epoch": 0.9056724344036975, "grad_norm": 0.3721925914287567, "learning_rate": 6.943664624558777e-07, "loss": 0.0464, "step": 41101 }, { "epoch": 0.9056944696932137, "grad_norm": 0.6054614186286926, "learning_rate": 6.940445554154023e-07, "loss": 0.0375, "step": 41102 }, { "epoch": 0.9057165049827298, "grad_norm": 0.7297281622886658, "learning_rate": 6.93722721242827e-07, "loss": 0.0749, "step": 41103 }, { "epoch": 0.905738540272246, "grad_norm": 0.9753071665763855, "learning_rate": 6.93400959939794e-07, "loss": 0.0806, "step": 41104 }, { "epoch": 0.9057605755617621, "grad_norm": 0.3997596502304077, "learning_rate": 6.930792715079432e-07, "loss": 0.0525, "step": 41105 }, { "epoch": 0.9057826108512783, "grad_norm": 0.7547625303268433, "learning_rate": 6.927576559489101e-07, "loss": 0.0714, "step": 41106 }, { "epoch": 0.9058046461407945, "grad_norm": 1.1609784364700317, "learning_rate": 6.924361132643336e-07, "loss": 0.0745, "step": 41107 }, { "epoch": 0.9058266814303106, "grad_norm": 0.6329809427261353, "learning_rate": 6.921146434558539e-07, "loss": 0.0528, "step": 41108 }, { "epoch": 0.9058487167198268, "grad_norm": 0.6513953804969788, "learning_rate": 6.917932465251081e-07, "loss": 0.0579, "step": 41109 }, { "epoch": 0.905870752009343, "grad_norm": 0.6227496266365051, "learning_rate": 6.914719224737298e-07, "loss": 0.0603, "step": 41110 }, { "epoch": 0.9058927872988591, "grad_norm": 0.9118697643280029, "learning_rate": 6.911506713033577e-07, "loss": 0.0692, "step": 41111 }, { "epoch": 0.9059148225883753, "grad_norm": 0.7196918725967407, "learning_rate": 6.908294930156289e-07, "loss": 0.0729, "step": 41112 }, { "epoch": 0.9059368578778915, "grad_norm": 0.7428920269012451, "learning_rate": 6.905083876121787e-07, "loss": 0.0615, "step": 41113 }, { "epoch": 0.9059588931674076, "grad_norm": 0.47675132751464844, "learning_rate": 6.901873550946441e-07, "loss": 0.0455, "step": 41114 }, { "epoch": 0.9059809284569238, "grad_norm": 0.6644411683082581, "learning_rate": 6.898663954646539e-07, "loss": 0.0641, "step": 41115 }, { "epoch": 0.90600296374644, "grad_norm": 0.8629118800163269, "learning_rate": 6.895455087238533e-07, "loss": 0.0748, "step": 41116 }, { "epoch": 0.9060249990359561, "grad_norm": 0.8715038299560547, "learning_rate": 6.892246948738695e-07, "loss": 0.0465, "step": 41117 }, { "epoch": 0.9060470343254723, "grad_norm": 0.2924690544605255, "learning_rate": 6.889039539163394e-07, "loss": 0.0488, "step": 41118 }, { "epoch": 0.9060690696149885, "grad_norm": 0.9571830034255981, "learning_rate": 6.885832858528935e-07, "loss": 0.0533, "step": 41119 }, { "epoch": 0.9060911049045046, "grad_norm": 0.3976043164730072, "learning_rate": 6.882626906851669e-07, "loss": 0.0355, "step": 41120 }, { "epoch": 0.9061131401940207, "grad_norm": 0.6222071051597595, "learning_rate": 6.879421684147969e-07, "loss": 0.0779, "step": 41121 }, { "epoch": 0.9061351754835368, "grad_norm": 0.6319140791893005, "learning_rate": 6.876217190434069e-07, "loss": 0.075, "step": 41122 }, { "epoch": 0.906157210773053, "grad_norm": 0.629230797290802, "learning_rate": 6.873013425726376e-07, "loss": 0.0617, "step": 41123 }, { "epoch": 0.9061792460625692, "grad_norm": 0.48879197239875793, "learning_rate": 6.869810390041159e-07, "loss": 0.0382, "step": 41124 }, { "epoch": 0.9062012813520853, "grad_norm": 0.7206112146377563, "learning_rate": 6.866608083394788e-07, "loss": 0.0544, "step": 41125 }, { "epoch": 0.9062233166416015, "grad_norm": 0.6620700359344482, "learning_rate": 6.863406505803482e-07, "loss": 0.079, "step": 41126 }, { "epoch": 0.9062453519311177, "grad_norm": 0.2780761122703552, "learning_rate": 6.860205657283647e-07, "loss": 0.0533, "step": 41127 }, { "epoch": 0.9062673872206338, "grad_norm": 0.369882196187973, "learning_rate": 6.857005537851535e-07, "loss": 0.0695, "step": 41128 }, { "epoch": 0.90628942251015, "grad_norm": 0.45627373456954956, "learning_rate": 6.853806147523434e-07, "loss": 0.057, "step": 41129 }, { "epoch": 0.9063114577996662, "grad_norm": 0.8502672910690308, "learning_rate": 6.850607486315663e-07, "loss": 0.09, "step": 41130 }, { "epoch": 0.9063334930891823, "grad_norm": 0.6194559335708618, "learning_rate": 6.847409554244511e-07, "loss": 0.0485, "step": 41131 }, { "epoch": 0.9063555283786985, "grad_norm": 0.7647504210472107, "learning_rate": 6.844212351326296e-07, "loss": 0.0748, "step": 41132 }, { "epoch": 0.9063775636682146, "grad_norm": 0.564359188079834, "learning_rate": 6.841015877577239e-07, "loss": 0.0326, "step": 41133 }, { "epoch": 0.9063995989577308, "grad_norm": 0.5218809247016907, "learning_rate": 6.837820133013662e-07, "loss": 0.0569, "step": 41134 }, { "epoch": 0.906421634247247, "grad_norm": 0.5632066130638123, "learning_rate": 6.83462511765185e-07, "loss": 0.0968, "step": 41135 }, { "epoch": 0.9064436695367631, "grad_norm": 0.5990208983421326, "learning_rate": 6.831430831508056e-07, "loss": 0.0533, "step": 41136 }, { "epoch": 0.9064657048262793, "grad_norm": 0.7748122811317444, "learning_rate": 6.82823727459857e-07, "loss": 0.0476, "step": 41137 }, { "epoch": 0.9064877401157955, "grad_norm": 0.8042939305305481, "learning_rate": 6.82504444693961e-07, "loss": 0.0652, "step": 41138 }, { "epoch": 0.9065097754053116, "grad_norm": 0.5763826370239258, "learning_rate": 6.821852348547513e-07, "loss": 0.0595, "step": 41139 }, { "epoch": 0.9065318106948278, "grad_norm": 0.7132328748703003, "learning_rate": 6.818660979438484e-07, "loss": 0.0622, "step": 41140 }, { "epoch": 0.906553845984344, "grad_norm": 0.49835243821144104, "learning_rate": 6.81547033962881e-07, "loss": 0.0707, "step": 41141 }, { "epoch": 0.9065758812738601, "grad_norm": 0.577333390712738, "learning_rate": 6.812280429134692e-07, "loss": 0.0876, "step": 41142 }, { "epoch": 0.9065979165633763, "grad_norm": 0.4464934766292572, "learning_rate": 6.809091247972454e-07, "loss": 0.039, "step": 41143 }, { "epoch": 0.9066199518528925, "grad_norm": 0.8376131653785706, "learning_rate": 6.805902796158281e-07, "loss": 0.0619, "step": 41144 }, { "epoch": 0.9066419871424085, "grad_norm": 0.28494638204574585, "learning_rate": 6.802715073708426e-07, "loss": 0.0473, "step": 41145 }, { "epoch": 0.9066640224319247, "grad_norm": 0.49575668573379517, "learning_rate": 6.799528080639144e-07, "loss": 0.0481, "step": 41146 }, { "epoch": 0.9066860577214408, "grad_norm": 0.5855593681335449, "learning_rate": 6.796341816966639e-07, "loss": 0.04, "step": 41147 }, { "epoch": 0.906708093010957, "grad_norm": 0.8421683311462402, "learning_rate": 6.79315628270718e-07, "loss": 0.0764, "step": 41148 }, { "epoch": 0.9067301283004732, "grad_norm": 0.4806078374385834, "learning_rate": 6.789971477876938e-07, "loss": 0.0757, "step": 41149 }, { "epoch": 0.9067521635899893, "grad_norm": 0.5681993961334229, "learning_rate": 6.786787402492201e-07, "loss": 0.0825, "step": 41150 }, { "epoch": 0.9067741988795055, "grad_norm": 0.7571909427642822, "learning_rate": 6.783604056569137e-07, "loss": 0.0496, "step": 41151 }, { "epoch": 0.9067962341690217, "grad_norm": 0.796764612197876, "learning_rate": 6.780421440123985e-07, "loss": 0.0697, "step": 41152 }, { "epoch": 0.9068182694585378, "grad_norm": 0.5572689175605774, "learning_rate": 6.777239553172931e-07, "loss": 0.0445, "step": 41153 }, { "epoch": 0.906840304748054, "grad_norm": 0.26206377148628235, "learning_rate": 6.774058395732197e-07, "loss": 0.0608, "step": 41154 }, { "epoch": 0.9068623400375702, "grad_norm": 0.6145594716072083, "learning_rate": 6.770877967818018e-07, "loss": 0.0693, "step": 41155 }, { "epoch": 0.9068843753270863, "grad_norm": 0.4404701590538025, "learning_rate": 6.767698269446531e-07, "loss": 0.059, "step": 41156 }, { "epoch": 0.9069064106166025, "grad_norm": 0.42640605568885803, "learning_rate": 6.764519300633959e-07, "loss": 0.0617, "step": 41157 }, { "epoch": 0.9069284459061187, "grad_norm": 0.8027660250663757, "learning_rate": 6.761341061396519e-07, "loss": 0.0802, "step": 41158 }, { "epoch": 0.9069504811956348, "grad_norm": 0.4088588058948517, "learning_rate": 6.758163551750385e-07, "loss": 0.0347, "step": 41159 }, { "epoch": 0.906972516485151, "grad_norm": 0.5541253685951233, "learning_rate": 6.754986771711724e-07, "loss": 0.0581, "step": 41160 }, { "epoch": 0.9069945517746671, "grad_norm": 0.2837711274623871, "learning_rate": 6.751810721296726e-07, "loss": 0.0368, "step": 41161 }, { "epoch": 0.9070165870641833, "grad_norm": 0.4574016332626343, "learning_rate": 6.748635400521591e-07, "loss": 0.0606, "step": 41162 }, { "epoch": 0.9070386223536995, "grad_norm": 0.4304547607898712, "learning_rate": 6.745460809402443e-07, "loss": 0.042, "step": 41163 }, { "epoch": 0.9070606576432156, "grad_norm": 0.7708497643470764, "learning_rate": 6.742286947955517e-07, "loss": 0.0693, "step": 41164 }, { "epoch": 0.9070826929327318, "grad_norm": 0.9979637265205383, "learning_rate": 6.739113816196902e-07, "loss": 0.1011, "step": 41165 }, { "epoch": 0.907104728222248, "grad_norm": 0.7795000076293945, "learning_rate": 6.735941414142849e-07, "loss": 0.0415, "step": 41166 }, { "epoch": 0.9071267635117641, "grad_norm": 0.49860432744026184, "learning_rate": 6.732769741809431e-07, "loss": 0.0647, "step": 41167 }, { "epoch": 0.9071487988012803, "grad_norm": 0.41828036308288574, "learning_rate": 6.729598799212866e-07, "loss": 0.0475, "step": 41168 }, { "epoch": 0.9071708340907965, "grad_norm": 0.5127784013748169, "learning_rate": 6.726428586369293e-07, "loss": 0.0644, "step": 41169 }, { "epoch": 0.9071928693803125, "grad_norm": 0.32356593012809753, "learning_rate": 6.723259103294815e-07, "loss": 0.0484, "step": 41170 }, { "epoch": 0.9072149046698287, "grad_norm": 0.5814305543899536, "learning_rate": 6.720090350005653e-07, "loss": 0.0394, "step": 41171 }, { "epoch": 0.9072369399593448, "grad_norm": 0.6874579787254333, "learning_rate": 6.716922326517844e-07, "loss": 0.1185, "step": 41172 }, { "epoch": 0.907258975248861, "grad_norm": 0.5539840459823608, "learning_rate": 6.71375503284764e-07, "loss": 0.0589, "step": 41173 }, { "epoch": 0.9072810105383772, "grad_norm": 0.6074809432029724, "learning_rate": 6.710588469011081e-07, "loss": 0.0883, "step": 41174 }, { "epoch": 0.9073030458278933, "grad_norm": 0.9192906618118286, "learning_rate": 6.707422635024368e-07, "loss": 0.0625, "step": 41175 }, { "epoch": 0.9073250811174095, "grad_norm": 0.6097303628921509, "learning_rate": 6.704257530903557e-07, "loss": 0.0756, "step": 41176 }, { "epoch": 0.9073471164069257, "grad_norm": 1.2184510231018066, "learning_rate": 6.701093156664817e-07, "loss": 0.115, "step": 41177 }, { "epoch": 0.9073691516964418, "grad_norm": 0.8154317140579224, "learning_rate": 6.697929512324269e-07, "loss": 0.0651, "step": 41178 }, { "epoch": 0.907391186985958, "grad_norm": 0.5251997113227844, "learning_rate": 6.694766597897983e-07, "loss": 0.0658, "step": 41179 }, { "epoch": 0.9074132222754742, "grad_norm": 0.9178847074508667, "learning_rate": 6.691604413402097e-07, "loss": 0.0747, "step": 41180 }, { "epoch": 0.9074352575649903, "grad_norm": 0.4574790596961975, "learning_rate": 6.688442958852731e-07, "loss": 0.0519, "step": 41181 }, { "epoch": 0.9074572928545065, "grad_norm": 0.5443161725997925, "learning_rate": 6.685282234265989e-07, "loss": 0.0705, "step": 41182 }, { "epoch": 0.9074793281440227, "grad_norm": 0.46896180510520935, "learning_rate": 6.682122239657923e-07, "loss": 0.0494, "step": 41183 }, { "epoch": 0.9075013634335388, "grad_norm": 0.5985278487205505, "learning_rate": 6.678962975044672e-07, "loss": 0.0763, "step": 41184 }, { "epoch": 0.907523398723055, "grad_norm": 0.7667736411094666, "learning_rate": 6.67580444044234e-07, "loss": 0.0586, "step": 41185 }, { "epoch": 0.9075454340125712, "grad_norm": 0.46591824293136597, "learning_rate": 6.67264663586698e-07, "loss": 0.0678, "step": 41186 }, { "epoch": 0.9075674693020873, "grad_norm": 0.4026847183704376, "learning_rate": 6.66948956133468e-07, "loss": 0.0601, "step": 41187 }, { "epoch": 0.9075895045916035, "grad_norm": 0.6524162888526917, "learning_rate": 6.666333216861525e-07, "loss": 0.0627, "step": 41188 }, { "epoch": 0.9076115398811196, "grad_norm": 0.47185051441192627, "learning_rate": 6.663177602463621e-07, "loss": 0.0582, "step": 41189 }, { "epoch": 0.9076335751706358, "grad_norm": 0.705636739730835, "learning_rate": 6.660022718156988e-07, "loss": 0.0399, "step": 41190 }, { "epoch": 0.907655610460152, "grad_norm": 0.4587395489215851, "learning_rate": 6.65686856395773e-07, "loss": 0.0747, "step": 41191 }, { "epoch": 0.9076776457496681, "grad_norm": 0.5112444162368774, "learning_rate": 6.653715139881933e-07, "loss": 0.0594, "step": 41192 }, { "epoch": 0.9076996810391843, "grad_norm": 0.5377997756004333, "learning_rate": 6.650562445945618e-07, "loss": 0.0486, "step": 41193 }, { "epoch": 0.9077217163287005, "grad_norm": 0.6978014707565308, "learning_rate": 6.647410482164856e-07, "loss": 0.0567, "step": 41194 }, { "epoch": 0.9077437516182165, "grad_norm": 0.5062078833580017, "learning_rate": 6.644259248555685e-07, "loss": 0.0284, "step": 41195 }, { "epoch": 0.9077657869077327, "grad_norm": 1.0866745710372925, "learning_rate": 6.641108745134205e-07, "loss": 0.0796, "step": 41196 }, { "epoch": 0.9077878221972489, "grad_norm": 0.5000510811805725, "learning_rate": 6.637958971916424e-07, "loss": 0.046, "step": 41197 }, { "epoch": 0.907809857486765, "grad_norm": 1.0308936834335327, "learning_rate": 6.634809928918412e-07, "loss": 0.0501, "step": 41198 }, { "epoch": 0.9078318927762812, "grad_norm": 0.5979297757148743, "learning_rate": 6.631661616156154e-07, "loss": 0.0442, "step": 41199 }, { "epoch": 0.9078539280657973, "grad_norm": 0.6872717142105103, "learning_rate": 6.628514033645738e-07, "loss": 0.0492, "step": 41200 }, { "epoch": 0.9078759633553135, "grad_norm": 0.9354525804519653, "learning_rate": 6.625367181403202e-07, "loss": 0.0575, "step": 41201 }, { "epoch": 0.9078979986448297, "grad_norm": 0.5194363594055176, "learning_rate": 6.622221059444533e-07, "loss": 0.053, "step": 41202 }, { "epoch": 0.9079200339343458, "grad_norm": 0.6734944581985474, "learning_rate": 6.619075667785785e-07, "loss": 0.0891, "step": 41203 }, { "epoch": 0.907942069223862, "grad_norm": 0.43894028663635254, "learning_rate": 6.61593100644296e-07, "loss": 0.0798, "step": 41204 }, { "epoch": 0.9079641045133782, "grad_norm": 0.5199000835418701, "learning_rate": 6.612787075432114e-07, "loss": 0.0236, "step": 41205 }, { "epoch": 0.9079861398028943, "grad_norm": 0.5375951528549194, "learning_rate": 6.6096438747692e-07, "loss": 0.0683, "step": 41206 }, { "epoch": 0.9080081750924105, "grad_norm": 0.47310808300971985, "learning_rate": 6.606501404470289e-07, "loss": 0.0615, "step": 41207 }, { "epoch": 0.9080302103819267, "grad_norm": 0.6994590163230896, "learning_rate": 6.60335966455135e-07, "loss": 0.0709, "step": 41208 }, { "epoch": 0.9080522456714428, "grad_norm": 0.4923039674758911, "learning_rate": 6.600218655028389e-07, "loss": 0.0814, "step": 41209 }, { "epoch": 0.908074280960959, "grad_norm": 0.4934120774269104, "learning_rate": 6.597078375917426e-07, "loss": 0.0744, "step": 41210 }, { "epoch": 0.9080963162504752, "grad_norm": 0.6639313101768494, "learning_rate": 6.593938827234413e-07, "loss": 0.0831, "step": 41211 }, { "epoch": 0.9081183515399913, "grad_norm": 0.7083591818809509, "learning_rate": 6.590800008995407e-07, "loss": 0.0561, "step": 41212 }, { "epoch": 0.9081403868295075, "grad_norm": 1.1319777965545654, "learning_rate": 6.587661921216343e-07, "loss": 0.0899, "step": 41213 }, { "epoch": 0.9081624221190236, "grad_norm": 0.5640087127685547, "learning_rate": 6.584524563913208e-07, "loss": 0.0815, "step": 41214 }, { "epoch": 0.9081844574085398, "grad_norm": 0.6439996957778931, "learning_rate": 6.581387937102024e-07, "loss": 0.0476, "step": 41215 }, { "epoch": 0.908206492698056, "grad_norm": 0.1839897781610489, "learning_rate": 6.57825204079871e-07, "loss": 0.0454, "step": 41216 }, { "epoch": 0.9082285279875721, "grad_norm": 0.7562687397003174, "learning_rate": 6.575116875019288e-07, "loss": 0.0549, "step": 41217 }, { "epoch": 0.9082505632770883, "grad_norm": 0.606478750705719, "learning_rate": 6.571982439779678e-07, "loss": 0.0857, "step": 41218 }, { "epoch": 0.9082725985666045, "grad_norm": 0.5036521553993225, "learning_rate": 6.5688487350959e-07, "loss": 0.0542, "step": 41219 }, { "epoch": 0.9082946338561205, "grad_norm": 0.6373855471611023, "learning_rate": 6.565715760983875e-07, "loss": 0.0613, "step": 41220 }, { "epoch": 0.9083166691456367, "grad_norm": 0.7570253014564514, "learning_rate": 6.562583517459608e-07, "loss": 0.069, "step": 41221 }, { "epoch": 0.9083387044351529, "grad_norm": 0.33317530155181885, "learning_rate": 6.559452004538985e-07, "loss": 0.0302, "step": 41222 }, { "epoch": 0.908360739724669, "grad_norm": 0.3281251788139343, "learning_rate": 6.556321222237993e-07, "loss": 0.0743, "step": 41223 }, { "epoch": 0.9083827750141852, "grad_norm": 0.5259449481964111, "learning_rate": 6.553191170572604e-07, "loss": 0.0327, "step": 41224 }, { "epoch": 0.9084048103037013, "grad_norm": 0.6058501601219177, "learning_rate": 6.550061849558703e-07, "loss": 0.0605, "step": 41225 }, { "epoch": 0.9084268455932175, "grad_norm": 0.6468759179115295, "learning_rate": 6.546933259212262e-07, "loss": 0.0671, "step": 41226 }, { "epoch": 0.9084488808827337, "grad_norm": 0.4641161262989044, "learning_rate": 6.543805399549218e-07, "loss": 0.0485, "step": 41227 }, { "epoch": 0.9084709161722498, "grad_norm": 0.7743275761604309, "learning_rate": 6.540678270585526e-07, "loss": 0.0694, "step": 41228 }, { "epoch": 0.908492951461766, "grad_norm": 0.6445423364639282, "learning_rate": 6.537551872337071e-07, "loss": 0.057, "step": 41229 }, { "epoch": 0.9085149867512822, "grad_norm": 0.43546977639198303, "learning_rate": 6.534426204819793e-07, "loss": 0.0492, "step": 41230 }, { "epoch": 0.9085370220407983, "grad_norm": 0.7205911874771118, "learning_rate": 6.531301268049628e-07, "loss": 0.0588, "step": 41231 }, { "epoch": 0.9085590573303145, "grad_norm": 0.6770471930503845, "learning_rate": 6.528177062042462e-07, "loss": 0.0363, "step": 41232 }, { "epoch": 0.9085810926198307, "grad_norm": 0.6401680111885071, "learning_rate": 6.525053586814233e-07, "loss": 0.0641, "step": 41233 }, { "epoch": 0.9086031279093468, "grad_norm": 0.45720571279525757, "learning_rate": 6.521930842380829e-07, "loss": 0.0453, "step": 41234 }, { "epoch": 0.908625163198863, "grad_norm": 0.7817442417144775, "learning_rate": 6.518808828758205e-07, "loss": 0.0795, "step": 41235 }, { "epoch": 0.9086471984883792, "grad_norm": 0.3808217942714691, "learning_rate": 6.515687545962212e-07, "loss": 0.0451, "step": 41236 }, { "epoch": 0.9086692337778953, "grad_norm": 0.4173769950866699, "learning_rate": 6.512566994008757e-07, "loss": 0.0559, "step": 41237 }, { "epoch": 0.9086912690674115, "grad_norm": 0.9085289239883423, "learning_rate": 6.50944717291374e-07, "loss": 0.073, "step": 41238 }, { "epoch": 0.9087133043569277, "grad_norm": 1.150557041168213, "learning_rate": 6.506328082693069e-07, "loss": 0.0762, "step": 41239 }, { "epoch": 0.9087353396464438, "grad_norm": 0.7870787978172302, "learning_rate": 6.503209723362613e-07, "loss": 0.0558, "step": 41240 }, { "epoch": 0.90875737493596, "grad_norm": 0.6011865735054016, "learning_rate": 6.500092094938242e-07, "loss": 0.0615, "step": 41241 }, { "epoch": 0.9087794102254761, "grad_norm": 0.6389840841293335, "learning_rate": 6.496975197435878e-07, "loss": 0.0389, "step": 41242 }, { "epoch": 0.9088014455149923, "grad_norm": 0.4784621000289917, "learning_rate": 6.49385903087134e-07, "loss": 0.049, "step": 41243 }, { "epoch": 0.9088234808045084, "grad_norm": 0.6224386692047119, "learning_rate": 6.490743595260568e-07, "loss": 0.0485, "step": 41244 }, { "epoch": 0.9088455160940245, "grad_norm": 0.5749048590660095, "learning_rate": 6.487628890619363e-07, "loss": 0.0441, "step": 41245 }, { "epoch": 0.9088675513835407, "grad_norm": 0.41744494438171387, "learning_rate": 6.484514916963647e-07, "loss": 0.0514, "step": 41246 }, { "epoch": 0.9088895866730569, "grad_norm": 0.4609641134738922, "learning_rate": 6.48140167430924e-07, "loss": 0.0635, "step": 41247 }, { "epoch": 0.908911621962573, "grad_norm": 0.49694961309432983, "learning_rate": 6.478289162672029e-07, "loss": 0.0678, "step": 41248 }, { "epoch": 0.9089336572520892, "grad_norm": 0.8350515961647034, "learning_rate": 6.475177382067837e-07, "loss": 0.0631, "step": 41249 }, { "epoch": 0.9089556925416054, "grad_norm": 0.757794976234436, "learning_rate": 6.472066332512533e-07, "loss": 0.0727, "step": 41250 }, { "epoch": 0.9089777278311215, "grad_norm": 0.762273371219635, "learning_rate": 6.468956014021987e-07, "loss": 0.0632, "step": 41251 }, { "epoch": 0.9089997631206377, "grad_norm": 0.6152172088623047, "learning_rate": 6.465846426611988e-07, "loss": 0.0653, "step": 41252 }, { "epoch": 0.9090217984101538, "grad_norm": 0.7506477236747742, "learning_rate": 6.462737570298405e-07, "loss": 0.053, "step": 41253 }, { "epoch": 0.90904383369967, "grad_norm": 0.6324487328529358, "learning_rate": 6.45962944509706e-07, "loss": 0.0529, "step": 41254 }, { "epoch": 0.9090658689891862, "grad_norm": 0.6887124180793762, "learning_rate": 6.456522051023822e-07, "loss": 0.0608, "step": 41255 }, { "epoch": 0.9090879042787023, "grad_norm": 0.5102815628051758, "learning_rate": 6.45341538809448e-07, "loss": 0.0529, "step": 41256 }, { "epoch": 0.9091099395682185, "grad_norm": 0.649841845035553, "learning_rate": 6.450309456324854e-07, "loss": 0.0513, "step": 41257 }, { "epoch": 0.9091319748577347, "grad_norm": 0.22024013102054596, "learning_rate": 6.447204255730816e-07, "loss": 0.0503, "step": 41258 }, { "epoch": 0.9091540101472508, "grad_norm": 0.6945667862892151, "learning_rate": 6.444099786328117e-07, "loss": 0.0742, "step": 41259 }, { "epoch": 0.909176045436767, "grad_norm": 0.43922755122184753, "learning_rate": 6.440996048132597e-07, "loss": 0.0286, "step": 41260 }, { "epoch": 0.9091980807262832, "grad_norm": 0.6599730253219604, "learning_rate": 6.437893041160059e-07, "loss": 0.0498, "step": 41261 }, { "epoch": 0.9092201160157993, "grad_norm": 0.4266549348831177, "learning_rate": 6.434790765426341e-07, "loss": 0.0506, "step": 41262 }, { "epoch": 0.9092421513053155, "grad_norm": 0.6237727999687195, "learning_rate": 6.431689220947212e-07, "loss": 0.0586, "step": 41263 }, { "epoch": 0.9092641865948317, "grad_norm": 0.5381751656532288, "learning_rate": 6.428588407738461e-07, "loss": 0.0428, "step": 41264 }, { "epoch": 0.9092862218843478, "grad_norm": 0.4126816391944885, "learning_rate": 6.425488325815926e-07, "loss": 0.0442, "step": 41265 }, { "epoch": 0.909308257173864, "grad_norm": 0.4711243808269501, "learning_rate": 6.42238897519536e-07, "loss": 0.0383, "step": 41266 }, { "epoch": 0.9093302924633802, "grad_norm": 0.6220154762268066, "learning_rate": 6.419290355892565e-07, "loss": 0.0528, "step": 41267 }, { "epoch": 0.9093523277528963, "grad_norm": 0.47046366333961487, "learning_rate": 6.416192467923282e-07, "loss": 0.055, "step": 41268 }, { "epoch": 0.9093743630424124, "grad_norm": 0.564675509929657, "learning_rate": 6.413095311303363e-07, "loss": 0.0598, "step": 41269 }, { "epoch": 0.9093963983319285, "grad_norm": 0.6734050512313843, "learning_rate": 6.409998886048529e-07, "loss": 0.0422, "step": 41270 }, { "epoch": 0.9094184336214447, "grad_norm": 0.7996972799301147, "learning_rate": 6.406903192174601e-07, "loss": 0.0707, "step": 41271 }, { "epoch": 0.9094404689109609, "grad_norm": 0.6042285561561584, "learning_rate": 6.403808229697284e-07, "loss": 0.0708, "step": 41272 }, { "epoch": 0.909462504200477, "grad_norm": 0.4567015469074249, "learning_rate": 6.400713998632379e-07, "loss": 0.0675, "step": 41273 }, { "epoch": 0.9094845394899932, "grad_norm": 0.7356481552124023, "learning_rate": 6.39762049899566e-07, "loss": 0.0729, "step": 41274 }, { "epoch": 0.9095065747795094, "grad_norm": 0.2645614445209503, "learning_rate": 6.394527730802829e-07, "loss": 0.0329, "step": 41275 }, { "epoch": 0.9095286100690255, "grad_norm": 0.6342121958732605, "learning_rate": 6.391435694069692e-07, "loss": 0.0656, "step": 41276 }, { "epoch": 0.9095506453585417, "grad_norm": 0.5715845227241516, "learning_rate": 6.388344388811985e-07, "loss": 0.0434, "step": 41277 }, { "epoch": 0.9095726806480579, "grad_norm": 0.6013768315315247, "learning_rate": 6.38525381504546e-07, "loss": 0.0576, "step": 41278 }, { "epoch": 0.909594715937574, "grad_norm": 0.7332448363304138, "learning_rate": 6.382163972785842e-07, "loss": 0.0326, "step": 41279 }, { "epoch": 0.9096167512270902, "grad_norm": 0.6614199280738831, "learning_rate": 6.379074862048867e-07, "loss": 0.0801, "step": 41280 }, { "epoch": 0.9096387865166063, "grad_norm": 0.5431474447250366, "learning_rate": 6.375986482850304e-07, "loss": 0.05, "step": 41281 }, { "epoch": 0.9096608218061225, "grad_norm": 0.674504280090332, "learning_rate": 6.372898835205843e-07, "loss": 0.0725, "step": 41282 }, { "epoch": 0.9096828570956387, "grad_norm": 0.3437063992023468, "learning_rate": 6.369811919131219e-07, "loss": 0.0662, "step": 41283 }, { "epoch": 0.9097048923851548, "grad_norm": 0.5638325810432434, "learning_rate": 6.366725734642171e-07, "loss": 0.0381, "step": 41284 }, { "epoch": 0.909726927674671, "grad_norm": 0.506119430065155, "learning_rate": 6.363640281754435e-07, "loss": 0.0583, "step": 41285 }, { "epoch": 0.9097489629641872, "grad_norm": 0.40577125549316406, "learning_rate": 6.360555560483666e-07, "loss": 0.0751, "step": 41286 }, { "epoch": 0.9097709982537033, "grad_norm": 0.47180846333503723, "learning_rate": 6.357471570845635e-07, "loss": 0.0571, "step": 41287 }, { "epoch": 0.9097930335432195, "grad_norm": 0.35746461153030396, "learning_rate": 6.35438831285603e-07, "loss": 0.0401, "step": 41288 }, { "epoch": 0.9098150688327357, "grad_norm": 0.5200883746147156, "learning_rate": 6.351305786530536e-07, "loss": 0.0565, "step": 41289 }, { "epoch": 0.9098371041222518, "grad_norm": 0.4085940420627594, "learning_rate": 6.348223991884894e-07, "loss": 0.0641, "step": 41290 }, { "epoch": 0.909859139411768, "grad_norm": 0.5060986876487732, "learning_rate": 6.345142928934755e-07, "loss": 0.0252, "step": 41291 }, { "epoch": 0.9098811747012842, "grad_norm": 1.054945468902588, "learning_rate": 6.342062597695858e-07, "loss": 0.073, "step": 41292 }, { "epoch": 0.9099032099908003, "grad_norm": 0.948409914970398, "learning_rate": 6.338982998183856e-07, "loss": 0.0543, "step": 41293 }, { "epoch": 0.9099252452803164, "grad_norm": 0.3425229787826538, "learning_rate": 6.335904130414471e-07, "loss": 0.0319, "step": 41294 }, { "epoch": 0.9099472805698325, "grad_norm": 0.8525256514549255, "learning_rate": 6.33282599440334e-07, "loss": 0.0827, "step": 41295 }, { "epoch": 0.9099693158593487, "grad_norm": 0.4019516408443451, "learning_rate": 6.329748590166184e-07, "loss": 0.0573, "step": 41296 }, { "epoch": 0.9099913511488649, "grad_norm": 0.5139709115028381, "learning_rate": 6.326671917718673e-07, "loss": 0.0499, "step": 41297 }, { "epoch": 0.910013386438381, "grad_norm": 0.45917990803718567, "learning_rate": 6.323595977076429e-07, "loss": 0.0315, "step": 41298 }, { "epoch": 0.9100354217278972, "grad_norm": 0.2945774793624878, "learning_rate": 6.320520768255172e-07, "loss": 0.0716, "step": 41299 }, { "epoch": 0.9100574570174134, "grad_norm": 0.5087344646453857, "learning_rate": 6.317446291270557e-07, "loss": 0.0556, "step": 41300 }, { "epoch": 0.9100794923069295, "grad_norm": 0.5952342748641968, "learning_rate": 6.314372546138236e-07, "loss": 0.0528, "step": 41301 }, { "epoch": 0.9101015275964457, "grad_norm": 0.7240443229675293, "learning_rate": 6.311299532873865e-07, "loss": 0.0677, "step": 41302 }, { "epoch": 0.9101235628859619, "grad_norm": 0.5165095329284668, "learning_rate": 6.308227251493082e-07, "loss": 0.0592, "step": 41303 }, { "epoch": 0.910145598175478, "grad_norm": 0.7163882851600647, "learning_rate": 6.305155702011589e-07, "loss": 0.059, "step": 41304 }, { "epoch": 0.9101676334649942, "grad_norm": 0.6251655220985413, "learning_rate": 6.302084884444959e-07, "loss": 0.0469, "step": 41305 }, { "epoch": 0.9101896687545104, "grad_norm": 0.765114963054657, "learning_rate": 6.299014798808861e-07, "loss": 0.071, "step": 41306 }, { "epoch": 0.9102117040440265, "grad_norm": 0.7428812980651855, "learning_rate": 6.29594544511895e-07, "loss": 0.0649, "step": 41307 }, { "epoch": 0.9102337393335427, "grad_norm": 0.6308498382568359, "learning_rate": 6.292876823390881e-07, "loss": 0.0776, "step": 41308 }, { "epoch": 0.9102557746230588, "grad_norm": 0.9514975547790527, "learning_rate": 6.289808933640206e-07, "loss": 0.0913, "step": 41309 }, { "epoch": 0.910277809912575, "grad_norm": 0.6075811386108398, "learning_rate": 6.286741775882615e-07, "loss": 0.05, "step": 41310 }, { "epoch": 0.9102998452020912, "grad_norm": 0.6377310156822205, "learning_rate": 6.283675350133727e-07, "loss": 0.0684, "step": 41311 }, { "epoch": 0.9103218804916073, "grad_norm": 0.863936722278595, "learning_rate": 6.280609656409131e-07, "loss": 0.0747, "step": 41312 }, { "epoch": 0.9103439157811235, "grad_norm": 0.5367920398712158, "learning_rate": 6.277544694724479e-07, "loss": 0.0442, "step": 41313 }, { "epoch": 0.9103659510706397, "grad_norm": 0.5821807980537415, "learning_rate": 6.274480465095328e-07, "loss": 0.0522, "step": 41314 }, { "epoch": 0.9103879863601558, "grad_norm": 0.8326097726821899, "learning_rate": 6.271416967537347e-07, "loss": 0.0526, "step": 41315 }, { "epoch": 0.910410021649672, "grad_norm": 0.6137572526931763, "learning_rate": 6.268354202066106e-07, "loss": 0.0579, "step": 41316 }, { "epoch": 0.9104320569391882, "grad_norm": 0.5961829423904419, "learning_rate": 6.265292168697212e-07, "loss": 0.0485, "step": 41317 }, { "epoch": 0.9104540922287042, "grad_norm": 0.695706307888031, "learning_rate": 6.262230867446267e-07, "loss": 0.0619, "step": 41318 }, { "epoch": 0.9104761275182204, "grad_norm": 0.5910059809684753, "learning_rate": 6.259170298328843e-07, "loss": 0.0363, "step": 41319 }, { "epoch": 0.9104981628077365, "grad_norm": 0.38064759969711304, "learning_rate": 6.256110461360559e-07, "loss": 0.0616, "step": 41320 }, { "epoch": 0.9105201980972527, "grad_norm": 0.5901778936386108, "learning_rate": 6.253051356556955e-07, "loss": 0.0655, "step": 41321 }, { "epoch": 0.9105422333867689, "grad_norm": 0.36970794200897217, "learning_rate": 6.249992983933683e-07, "loss": 0.0573, "step": 41322 }, { "epoch": 0.910564268676285, "grad_norm": 0.9892854690551758, "learning_rate": 6.246935343506266e-07, "loss": 0.0635, "step": 41323 }, { "epoch": 0.9105863039658012, "grad_norm": 0.4006727933883667, "learning_rate": 6.243878435290307e-07, "loss": 0.0583, "step": 41324 }, { "epoch": 0.9106083392553174, "grad_norm": 0.4721331298351288, "learning_rate": 6.240822259301343e-07, "loss": 0.0569, "step": 41325 }, { "epoch": 0.9106303745448335, "grad_norm": 0.8061052560806274, "learning_rate": 6.237766815554962e-07, "loss": 0.0566, "step": 41326 }, { "epoch": 0.9106524098343497, "grad_norm": 0.11211660504341125, "learning_rate": 6.234712104066753e-07, "loss": 0.0493, "step": 41327 }, { "epoch": 0.9106744451238659, "grad_norm": 0.5625097155570984, "learning_rate": 6.231658124852219e-07, "loss": 0.0687, "step": 41328 }, { "epoch": 0.910696480413382, "grad_norm": 0.6565241813659668, "learning_rate": 6.228604877926946e-07, "loss": 0.0534, "step": 41329 }, { "epoch": 0.9107185157028982, "grad_norm": 0.6609076261520386, "learning_rate": 6.225552363306492e-07, "loss": 0.0528, "step": 41330 }, { "epoch": 0.9107405509924144, "grad_norm": 0.4663519561290741, "learning_rate": 6.222500581006407e-07, "loss": 0.0362, "step": 41331 }, { "epoch": 0.9107625862819305, "grad_norm": 0.8898695111274719, "learning_rate": 6.219449531042215e-07, "loss": 0.0727, "step": 41332 }, { "epoch": 0.9107846215714467, "grad_norm": 0.2175973802804947, "learning_rate": 6.216399213429469e-07, "loss": 0.0505, "step": 41333 }, { "epoch": 0.9108066568609628, "grad_norm": 0.9851136803627014, "learning_rate": 6.213349628183707e-07, "loss": 0.0484, "step": 41334 }, { "epoch": 0.910828692150479, "grad_norm": 0.8902252912521362, "learning_rate": 6.210300775320465e-07, "loss": 0.0812, "step": 41335 }, { "epoch": 0.9108507274399952, "grad_norm": 0.591912031173706, "learning_rate": 6.207252654855267e-07, "loss": 0.0545, "step": 41336 }, { "epoch": 0.9108727627295113, "grad_norm": 0.8031384348869324, "learning_rate": 6.204205266803614e-07, "loss": 0.0504, "step": 41337 }, { "epoch": 0.9108947980190275, "grad_norm": 0.6209788918495178, "learning_rate": 6.201158611181079e-07, "loss": 0.0621, "step": 41338 }, { "epoch": 0.9109168333085437, "grad_norm": 1.4238924980163574, "learning_rate": 6.198112688003149e-07, "loss": 0.0535, "step": 41339 }, { "epoch": 0.9109388685980598, "grad_norm": 0.41605207324028015, "learning_rate": 6.195067497285362e-07, "loss": 0.0485, "step": 41340 }, { "epoch": 0.910960903887576, "grad_norm": 0.4564274847507477, "learning_rate": 6.192023039043171e-07, "loss": 0.0501, "step": 41341 }, { "epoch": 0.9109829391770922, "grad_norm": 0.5617774724960327, "learning_rate": 6.188979313292148e-07, "loss": 0.0736, "step": 41342 }, { "epoch": 0.9110049744666082, "grad_norm": 0.7959338426589966, "learning_rate": 6.18593632004778e-07, "loss": 0.0813, "step": 41343 }, { "epoch": 0.9110270097561244, "grad_norm": 0.49596139788627625, "learning_rate": 6.182894059325522e-07, "loss": 0.0631, "step": 41344 }, { "epoch": 0.9110490450456405, "grad_norm": 0.4940144121646881, "learning_rate": 6.179852531140928e-07, "loss": 0.0355, "step": 41345 }, { "epoch": 0.9110710803351567, "grad_norm": 0.6998546123504639, "learning_rate": 6.176811735509469e-07, "loss": 0.0568, "step": 41346 }, { "epoch": 0.9110931156246729, "grad_norm": 0.8996942639350891, "learning_rate": 6.173771672446632e-07, "loss": 0.0496, "step": 41347 }, { "epoch": 0.911115150914189, "grad_norm": 0.4713624119758606, "learning_rate": 6.170732341967888e-07, "loss": 0.074, "step": 41348 }, { "epoch": 0.9111371862037052, "grad_norm": 0.5180858373641968, "learning_rate": 6.167693744088743e-07, "loss": 0.0522, "step": 41349 }, { "epoch": 0.9111592214932214, "grad_norm": 0.4818393886089325, "learning_rate": 6.164655878824682e-07, "loss": 0.0292, "step": 41350 }, { "epoch": 0.9111812567827375, "grad_norm": 0.3321091830730438, "learning_rate": 6.161618746191128e-07, "loss": 0.0497, "step": 41351 }, { "epoch": 0.9112032920722537, "grad_norm": 0.603987455368042, "learning_rate": 6.158582346203584e-07, "loss": 0.0485, "step": 41352 }, { "epoch": 0.9112253273617699, "grad_norm": 0.6413713097572327, "learning_rate": 6.155546678877521e-07, "loss": 0.0811, "step": 41353 }, { "epoch": 0.911247362651286, "grad_norm": 0.6267648339271545, "learning_rate": 6.152511744228412e-07, "loss": 0.0555, "step": 41354 }, { "epoch": 0.9112693979408022, "grad_norm": 0.9711702466011047, "learning_rate": 6.149477542271675e-07, "loss": 0.0618, "step": 41355 }, { "epoch": 0.9112914332303184, "grad_norm": 0.32212212681770325, "learning_rate": 6.1464440730228e-07, "loss": 0.0445, "step": 41356 }, { "epoch": 0.9113134685198345, "grad_norm": 0.42215144634246826, "learning_rate": 6.143411336497207e-07, "loss": 0.0382, "step": 41357 }, { "epoch": 0.9113355038093507, "grad_norm": 0.4043949246406555, "learning_rate": 6.140379332710383e-07, "loss": 0.064, "step": 41358 }, { "epoch": 0.9113575390988669, "grad_norm": 0.573658287525177, "learning_rate": 6.13734806167775e-07, "loss": 0.0658, "step": 41359 }, { "epoch": 0.911379574388383, "grad_norm": 0.9429768919944763, "learning_rate": 6.134317523414729e-07, "loss": 0.0905, "step": 41360 }, { "epoch": 0.9114016096778992, "grad_norm": 0.6346355676651001, "learning_rate": 6.131287717936806e-07, "loss": 0.0567, "step": 41361 }, { "epoch": 0.9114236449674153, "grad_norm": 0.8087946772575378, "learning_rate": 6.128258645259355e-07, "loss": 0.0556, "step": 41362 }, { "epoch": 0.9114456802569315, "grad_norm": 0.7840694785118103, "learning_rate": 6.125230305397861e-07, "loss": 0.0785, "step": 41363 }, { "epoch": 0.9114677155464477, "grad_norm": 0.4787752628326416, "learning_rate": 6.12220269836768e-07, "loss": 0.0477, "step": 41364 }, { "epoch": 0.9114897508359638, "grad_norm": 0.46840783953666687, "learning_rate": 6.119175824184315e-07, "loss": 0.0498, "step": 41365 }, { "epoch": 0.91151178612548, "grad_norm": 0.6127579212188721, "learning_rate": 6.11614968286312e-07, "loss": 0.0786, "step": 41366 }, { "epoch": 0.9115338214149962, "grad_norm": 0.4615955948829651, "learning_rate": 6.113124274419535e-07, "loss": 0.0408, "step": 41367 }, { "epoch": 0.9115558567045122, "grad_norm": 0.8558708429336548, "learning_rate": 6.110099598868979e-07, "loss": 0.0428, "step": 41368 }, { "epoch": 0.9115778919940284, "grad_norm": 0.6694774031639099, "learning_rate": 6.107075656226824e-07, "loss": 0.0706, "step": 41369 }, { "epoch": 0.9115999272835446, "grad_norm": 0.48698392510414124, "learning_rate": 6.104052446508524e-07, "loss": 0.058, "step": 41370 }, { "epoch": 0.9116219625730607, "grad_norm": 0.8094778060913086, "learning_rate": 6.101029969729399e-07, "loss": 0.0665, "step": 41371 }, { "epoch": 0.9116439978625769, "grad_norm": 0.7088784575462341, "learning_rate": 6.098008225904939e-07, "loss": 0.0456, "step": 41372 }, { "epoch": 0.911666033152093, "grad_norm": 0.5933710932731628, "learning_rate": 6.094987215050462e-07, "loss": 0.0547, "step": 41373 }, { "epoch": 0.9116880684416092, "grad_norm": 0.299049437046051, "learning_rate": 6.091966937181409e-07, "loss": 0.0516, "step": 41374 }, { "epoch": 0.9117101037311254, "grad_norm": 0.6988309025764465, "learning_rate": 6.088947392313115e-07, "loss": 0.0534, "step": 41375 }, { "epoch": 0.9117321390206415, "grad_norm": 0.5360057353973389, "learning_rate": 6.085928580460986e-07, "loss": 0.0591, "step": 41376 }, { "epoch": 0.9117541743101577, "grad_norm": 0.6364514231681824, "learning_rate": 6.082910501640426e-07, "loss": 0.0902, "step": 41377 }, { "epoch": 0.9117762095996739, "grad_norm": 0.44298359751701355, "learning_rate": 6.079893155866739e-07, "loss": 0.0681, "step": 41378 }, { "epoch": 0.91179824488919, "grad_norm": 0.858886182308197, "learning_rate": 6.076876543155346e-07, "loss": 0.0703, "step": 41379 }, { "epoch": 0.9118202801787062, "grad_norm": 0.46623486280441284, "learning_rate": 6.073860663521586e-07, "loss": 0.072, "step": 41380 }, { "epoch": 0.9118423154682224, "grad_norm": 0.4471556544303894, "learning_rate": 6.070845516980861e-07, "loss": 0.0472, "step": 41381 }, { "epoch": 0.9118643507577385, "grad_norm": 0.561044454574585, "learning_rate": 6.067831103548477e-07, "loss": 0.0466, "step": 41382 }, { "epoch": 0.9118863860472547, "grad_norm": 0.6143766045570374, "learning_rate": 6.064817423239821e-07, "loss": 0.0593, "step": 41383 }, { "epoch": 0.9119084213367709, "grad_norm": 0.697166919708252, "learning_rate": 6.061804476070232e-07, "loss": 0.048, "step": 41384 }, { "epoch": 0.911930456626287, "grad_norm": 0.3670375347137451, "learning_rate": 6.058792262055063e-07, "loss": 0.0366, "step": 41385 }, { "epoch": 0.9119524919158032, "grad_norm": 0.5977451801300049, "learning_rate": 6.055780781209653e-07, "loss": 0.0815, "step": 41386 }, { "epoch": 0.9119745272053194, "grad_norm": 0.8351966738700867, "learning_rate": 6.052770033549321e-07, "loss": 0.1056, "step": 41387 }, { "epoch": 0.9119965624948355, "grad_norm": 0.40713876485824585, "learning_rate": 6.04976001908944e-07, "loss": 0.0447, "step": 41388 }, { "epoch": 0.9120185977843517, "grad_norm": 0.3055843412876129, "learning_rate": 6.04675073784533e-07, "loss": 0.046, "step": 41389 }, { "epoch": 0.9120406330738678, "grad_norm": 0.716857373714447, "learning_rate": 6.043742189832297e-07, "loss": 0.0605, "step": 41390 }, { "epoch": 0.912062668363384, "grad_norm": 0.6095912456512451, "learning_rate": 6.040734375065693e-07, "loss": 0.0717, "step": 41391 }, { "epoch": 0.9120847036529002, "grad_norm": 1.1057822704315186, "learning_rate": 6.037727293560807e-07, "loss": 0.0685, "step": 41392 }, { "epoch": 0.9121067389424162, "grad_norm": 0.756662130355835, "learning_rate": 6.034720945333011e-07, "loss": 0.0762, "step": 41393 }, { "epoch": 0.9121287742319324, "grad_norm": 0.9921894669532776, "learning_rate": 6.031715330397525e-07, "loss": 0.0693, "step": 41394 }, { "epoch": 0.9121508095214486, "grad_norm": 0.7885181903839111, "learning_rate": 6.02871044876977e-07, "loss": 0.0519, "step": 41395 }, { "epoch": 0.9121728448109647, "grad_norm": 0.686040997505188, "learning_rate": 6.025706300464967e-07, "loss": 0.0442, "step": 41396 }, { "epoch": 0.9121948801004809, "grad_norm": 0.24011369049549103, "learning_rate": 6.022702885498454e-07, "loss": 0.0387, "step": 41397 }, { "epoch": 0.912216915389997, "grad_norm": 0.5847232937812805, "learning_rate": 6.01970020388552e-07, "loss": 0.0299, "step": 41398 }, { "epoch": 0.9122389506795132, "grad_norm": 0.4036369323730469, "learning_rate": 6.016698255641467e-07, "loss": 0.0399, "step": 41399 }, { "epoch": 0.9122609859690294, "grad_norm": 0.6411815881729126, "learning_rate": 6.013697040781585e-07, "loss": 0.0672, "step": 41400 }, { "epoch": 0.9122830212585455, "grad_norm": 0.6778431534767151, "learning_rate": 6.010696559321144e-07, "loss": 0.0531, "step": 41401 }, { "epoch": 0.9123050565480617, "grad_norm": 0.3964217007160187, "learning_rate": 6.007696811275432e-07, "loss": 0.0544, "step": 41402 }, { "epoch": 0.9123270918375779, "grad_norm": 0.9128100872039795, "learning_rate": 6.004697796659736e-07, "loss": 0.0642, "step": 41403 }, { "epoch": 0.912349127127094, "grad_norm": 0.5395764112472534, "learning_rate": 6.001699515489345e-07, "loss": 0.0435, "step": 41404 }, { "epoch": 0.9123711624166102, "grad_norm": 0.8185807466506958, "learning_rate": 5.998701967779513e-07, "loss": 0.0429, "step": 41405 }, { "epoch": 0.9123931977061264, "grad_norm": 0.4992223083972931, "learning_rate": 5.995705153545494e-07, "loss": 0.0515, "step": 41406 }, { "epoch": 0.9124152329956425, "grad_norm": 0.7694149613380432, "learning_rate": 5.992709072802593e-07, "loss": 0.0874, "step": 41407 }, { "epoch": 0.9124372682851587, "grad_norm": 0.763477623462677, "learning_rate": 5.989713725566032e-07, "loss": 0.0499, "step": 41408 }, { "epoch": 0.9124593035746749, "grad_norm": 0.7211759686470032, "learning_rate": 5.986719111851114e-07, "loss": 0.0525, "step": 41409 }, { "epoch": 0.912481338864191, "grad_norm": 0.4087577164173126, "learning_rate": 5.98372523167301e-07, "loss": 0.0521, "step": 41410 }, { "epoch": 0.9125033741537072, "grad_norm": 0.7072274088859558, "learning_rate": 5.980732085047058e-07, "loss": 0.0552, "step": 41411 }, { "epoch": 0.9125254094432234, "grad_norm": 0.4479898512363434, "learning_rate": 5.977739671988447e-07, "loss": 0.0522, "step": 41412 }, { "epoch": 0.9125474447327395, "grad_norm": 0.4667559862136841, "learning_rate": 5.974747992512447e-07, "loss": 0.073, "step": 41413 }, { "epoch": 0.9125694800222557, "grad_norm": 0.5856842398643494, "learning_rate": 5.971757046634296e-07, "loss": 0.0556, "step": 41414 }, { "epoch": 0.9125915153117719, "grad_norm": 1.0085690021514893, "learning_rate": 5.968766834369216e-07, "loss": 0.0638, "step": 41415 }, { "epoch": 0.912613550601288, "grad_norm": 1.0660046339035034, "learning_rate": 5.96577735573246e-07, "loss": 0.0587, "step": 41416 }, { "epoch": 0.9126355858908041, "grad_norm": 0.6374170780181885, "learning_rate": 5.962788610739183e-07, "loss": 0.067, "step": 41417 }, { "epoch": 0.9126576211803202, "grad_norm": 0.784080445766449, "learning_rate": 5.959800599404708e-07, "loss": 0.0481, "step": 41418 }, { "epoch": 0.9126796564698364, "grad_norm": 0.30593281984329224, "learning_rate": 5.956813321744203e-07, "loss": 0.051, "step": 41419 }, { "epoch": 0.9127016917593526, "grad_norm": 0.5596378445625305, "learning_rate": 5.953826777772892e-07, "loss": 0.0606, "step": 41420 }, { "epoch": 0.9127237270488687, "grad_norm": 0.7787410616874695, "learning_rate": 5.950840967505978e-07, "loss": 0.0723, "step": 41421 }, { "epoch": 0.9127457623383849, "grad_norm": 0.6202200055122375, "learning_rate": 5.947855890958665e-07, "loss": 0.0547, "step": 41422 }, { "epoch": 0.9127677976279011, "grad_norm": 0.8271389603614807, "learning_rate": 5.944871548146191e-07, "loss": 0.0955, "step": 41423 }, { "epoch": 0.9127898329174172, "grad_norm": 0.4089002013206482, "learning_rate": 5.941887939083712e-07, "loss": 0.0466, "step": 41424 }, { "epoch": 0.9128118682069334, "grad_norm": 0.5931822061538696, "learning_rate": 5.938905063786465e-07, "loss": 0.0477, "step": 41425 }, { "epoch": 0.9128339034964496, "grad_norm": 0.5236233472824097, "learning_rate": 5.935922922269605e-07, "loss": 0.0351, "step": 41426 }, { "epoch": 0.9128559387859657, "grad_norm": 0.4171489179134369, "learning_rate": 5.932941514548384e-07, "loss": 0.0664, "step": 41427 }, { "epoch": 0.9128779740754819, "grad_norm": 0.6066768765449524, "learning_rate": 5.929960840637911e-07, "loss": 0.0863, "step": 41428 }, { "epoch": 0.912900009364998, "grad_norm": 0.555530846118927, "learning_rate": 5.926980900553403e-07, "loss": 0.0672, "step": 41429 }, { "epoch": 0.9129220446545142, "grad_norm": 0.21605300903320312, "learning_rate": 5.924001694310066e-07, "loss": 0.0323, "step": 41430 }, { "epoch": 0.9129440799440304, "grad_norm": 0.8140695691108704, "learning_rate": 5.921023221923022e-07, "loss": 0.071, "step": 41431 }, { "epoch": 0.9129661152335465, "grad_norm": 0.5565714240074158, "learning_rate": 5.918045483407492e-07, "loss": 0.0595, "step": 41432 }, { "epoch": 0.9129881505230627, "grad_norm": 0.7473786473274231, "learning_rate": 5.915068478778579e-07, "loss": 0.0717, "step": 41433 }, { "epoch": 0.9130101858125789, "grad_norm": 0.9547047019004822, "learning_rate": 5.91209220805154e-07, "loss": 0.0574, "step": 41434 }, { "epoch": 0.913032221102095, "grad_norm": 0.5827768445014954, "learning_rate": 5.909116671241443e-07, "loss": 0.0594, "step": 41435 }, { "epoch": 0.9130542563916112, "grad_norm": 0.6073786616325378, "learning_rate": 5.906141868363496e-07, "loss": 0.0636, "step": 41436 }, { "epoch": 0.9130762916811274, "grad_norm": 0.2914079427719116, "learning_rate": 5.903167799432835e-07, "loss": 0.06, "step": 41437 }, { "epoch": 0.9130983269706435, "grad_norm": 0.7506868243217468, "learning_rate": 5.900194464464614e-07, "loss": 0.0547, "step": 41438 }, { "epoch": 0.9131203622601597, "grad_norm": 0.6827535033226013, "learning_rate": 5.89722186347399e-07, "loss": 0.0725, "step": 41439 }, { "epoch": 0.9131423975496759, "grad_norm": 0.590227484703064, "learning_rate": 5.894249996476047e-07, "loss": 0.0266, "step": 41440 }, { "epoch": 0.913164432839192, "grad_norm": 0.6399306058883667, "learning_rate": 5.891278863485994e-07, "loss": 0.0487, "step": 41441 }, { "epoch": 0.9131864681287081, "grad_norm": 0.3704201877117157, "learning_rate": 5.888308464518932e-07, "loss": 0.0683, "step": 41442 }, { "epoch": 0.9132085034182242, "grad_norm": 0.6038900017738342, "learning_rate": 5.885338799590001e-07, "loss": 0.0567, "step": 41443 }, { "epoch": 0.9132305387077404, "grad_norm": 0.8615712523460388, "learning_rate": 5.882369868714304e-07, "loss": 0.0804, "step": 41444 }, { "epoch": 0.9132525739972566, "grad_norm": 0.8197994232177734, "learning_rate": 5.879401671906981e-07, "loss": 0.0872, "step": 41445 }, { "epoch": 0.9132746092867727, "grad_norm": 0.4321417808532715, "learning_rate": 5.876434209183168e-07, "loss": 0.061, "step": 41446 }, { "epoch": 0.9132966445762889, "grad_norm": 0.5316075682640076, "learning_rate": 5.873467480557954e-07, "loss": 0.08, "step": 41447 }, { "epoch": 0.9133186798658051, "grad_norm": 0.5622576475143433, "learning_rate": 5.870501486046443e-07, "loss": 0.0567, "step": 41448 }, { "epoch": 0.9133407151553212, "grad_norm": 0.7559342384338379, "learning_rate": 5.867536225663755e-07, "loss": 0.1033, "step": 41449 }, { "epoch": 0.9133627504448374, "grad_norm": 0.9585675001144409, "learning_rate": 5.864571699425015e-07, "loss": 0.0707, "step": 41450 }, { "epoch": 0.9133847857343536, "grad_norm": 0.8487691283226013, "learning_rate": 5.86160790734529e-07, "loss": 0.0719, "step": 41451 }, { "epoch": 0.9134068210238697, "grad_norm": 0.7564540505409241, "learning_rate": 5.858644849439671e-07, "loss": 0.1041, "step": 41452 }, { "epoch": 0.9134288563133859, "grad_norm": 0.8241108059883118, "learning_rate": 5.85568252572331e-07, "loss": 0.069, "step": 41453 }, { "epoch": 0.913450891602902, "grad_norm": 0.45541736483573914, "learning_rate": 5.852720936211231e-07, "loss": 0.0344, "step": 41454 }, { "epoch": 0.9134729268924182, "grad_norm": 0.44387003779411316, "learning_rate": 5.849760080918537e-07, "loss": 0.0688, "step": 41455 }, { "epoch": 0.9134949621819344, "grad_norm": 0.8420665264129639, "learning_rate": 5.846799959860316e-07, "loss": 0.0811, "step": 41456 }, { "epoch": 0.9135169974714505, "grad_norm": 0.6387863755226135, "learning_rate": 5.843840573051657e-07, "loss": 0.0486, "step": 41457 }, { "epoch": 0.9135390327609667, "grad_norm": 0.3780747056007385, "learning_rate": 5.840881920507613e-07, "loss": 0.08, "step": 41458 }, { "epoch": 0.9135610680504829, "grad_norm": 0.35928189754486084, "learning_rate": 5.837924002243239e-07, "loss": 0.0509, "step": 41459 }, { "epoch": 0.913583103339999, "grad_norm": 0.7175140380859375, "learning_rate": 5.83496681827364e-07, "loss": 0.0583, "step": 41460 }, { "epoch": 0.9136051386295152, "grad_norm": 0.7217336297035217, "learning_rate": 5.832010368613888e-07, "loss": 0.0572, "step": 41461 }, { "epoch": 0.9136271739190314, "grad_norm": 0.5681540369987488, "learning_rate": 5.829054653279003e-07, "loss": 0.0392, "step": 41462 }, { "epoch": 0.9136492092085475, "grad_norm": 0.8308674097061157, "learning_rate": 5.826099672284024e-07, "loss": 0.0551, "step": 41463 }, { "epoch": 0.9136712444980637, "grad_norm": 0.6385757327079773, "learning_rate": 5.823145425644072e-07, "loss": 0.0604, "step": 41464 }, { "epoch": 0.9136932797875799, "grad_norm": 0.4299928545951843, "learning_rate": 5.820191913374134e-07, "loss": 0.0454, "step": 41465 }, { "epoch": 0.913715315077096, "grad_norm": 0.5464329123497009, "learning_rate": 5.817239135489283e-07, "loss": 0.0566, "step": 41466 }, { "epoch": 0.9137373503666121, "grad_norm": 0.36921533942222595, "learning_rate": 5.814287092004522e-07, "loss": 0.0515, "step": 41467 }, { "epoch": 0.9137593856561282, "grad_norm": 0.5405840873718262, "learning_rate": 5.811335782934956e-07, "loss": 0.0324, "step": 41468 }, { "epoch": 0.9137814209456444, "grad_norm": 0.44329023361206055, "learning_rate": 5.808385208295541e-07, "loss": 0.0647, "step": 41469 }, { "epoch": 0.9138034562351606, "grad_norm": 0.7190123796463013, "learning_rate": 5.80543536810138e-07, "loss": 0.0572, "step": 41470 }, { "epoch": 0.9138254915246767, "grad_norm": 0.6826494932174683, "learning_rate": 5.802486262367429e-07, "loss": 0.0407, "step": 41471 }, { "epoch": 0.9138475268141929, "grad_norm": 1.1114939451217651, "learning_rate": 5.799537891108741e-07, "loss": 0.0812, "step": 41472 }, { "epoch": 0.9138695621037091, "grad_norm": 0.5845606923103333, "learning_rate": 5.796590254340356e-07, "loss": 0.0528, "step": 41473 }, { "epoch": 0.9138915973932252, "grad_norm": 0.16990846395492554, "learning_rate": 5.793643352077244e-07, "loss": 0.0568, "step": 41474 }, { "epoch": 0.9139136326827414, "grad_norm": 0.6363173723220825, "learning_rate": 5.790697184334443e-07, "loss": 0.0454, "step": 41475 }, { "epoch": 0.9139356679722576, "grad_norm": 0.4752141535282135, "learning_rate": 5.78775175112694e-07, "loss": 0.0658, "step": 41476 }, { "epoch": 0.9139577032617737, "grad_norm": 0.7161213755607605, "learning_rate": 5.784807052469776e-07, "loss": 0.0743, "step": 41477 }, { "epoch": 0.9139797385512899, "grad_norm": 0.7552381753921509, "learning_rate": 5.781863088377904e-07, "loss": 0.0611, "step": 41478 }, { "epoch": 0.914001773840806, "grad_norm": 0.921833336353302, "learning_rate": 5.778919858866344e-07, "loss": 0.1005, "step": 41479 }, { "epoch": 0.9140238091303222, "grad_norm": 0.7621638774871826, "learning_rate": 5.775977363950086e-07, "loss": 0.0477, "step": 41480 }, { "epoch": 0.9140458444198384, "grad_norm": 0.5228559970855713, "learning_rate": 5.773035603644116e-07, "loss": 0.0437, "step": 41481 }, { "epoch": 0.9140678797093545, "grad_norm": 0.6843950152397156, "learning_rate": 5.770094577963408e-07, "loss": 0.0474, "step": 41482 }, { "epoch": 0.9140899149988707, "grad_norm": 0.7488090991973877, "learning_rate": 5.767154286922948e-07, "loss": 0.0533, "step": 41483 }, { "epoch": 0.9141119502883869, "grad_norm": 0.5123916268348694, "learning_rate": 5.764214730537726e-07, "loss": 0.0312, "step": 41484 }, { "epoch": 0.914133985577903, "grad_norm": 0.5158678293228149, "learning_rate": 5.761275908822694e-07, "loss": 0.0635, "step": 41485 }, { "epoch": 0.9141560208674192, "grad_norm": 0.5918833613395691, "learning_rate": 5.758337821792842e-07, "loss": 0.0601, "step": 41486 }, { "epoch": 0.9141780561569354, "grad_norm": 0.47194960713386536, "learning_rate": 5.755400469463123e-07, "loss": 0.0702, "step": 41487 }, { "epoch": 0.9142000914464515, "grad_norm": 0.39054033160209656, "learning_rate": 5.752463851848494e-07, "loss": 0.0548, "step": 41488 }, { "epoch": 0.9142221267359677, "grad_norm": 0.3841741979122162, "learning_rate": 5.749527968963925e-07, "loss": 0.0636, "step": 41489 }, { "epoch": 0.9142441620254839, "grad_norm": 0.8141777515411377, "learning_rate": 5.746592820824336e-07, "loss": 0.077, "step": 41490 }, { "epoch": 0.9142661973149999, "grad_norm": 0.554675817489624, "learning_rate": 5.743658407444735e-07, "loss": 0.0444, "step": 41491 }, { "epoch": 0.9142882326045161, "grad_norm": 0.4279114305973053, "learning_rate": 5.740724728840025e-07, "loss": 0.0636, "step": 41492 }, { "epoch": 0.9143102678940322, "grad_norm": 0.5421402454376221, "learning_rate": 5.737791785025176e-07, "loss": 0.0553, "step": 41493 }, { "epoch": 0.9143323031835484, "grad_norm": 0.583018958568573, "learning_rate": 5.734859576015111e-07, "loss": 0.0569, "step": 41494 }, { "epoch": 0.9143543384730646, "grad_norm": 0.538987934589386, "learning_rate": 5.731928101824752e-07, "loss": 0.0525, "step": 41495 }, { "epoch": 0.9143763737625807, "grad_norm": 0.2852783203125, "learning_rate": 5.728997362469069e-07, "loss": 0.0687, "step": 41496 }, { "epoch": 0.9143984090520969, "grad_norm": 0.42834585905075073, "learning_rate": 5.726067357962933e-07, "loss": 0.046, "step": 41497 }, { "epoch": 0.9144204443416131, "grad_norm": 0.5380094051361084, "learning_rate": 5.723138088321316e-07, "loss": 0.0596, "step": 41498 }, { "epoch": 0.9144424796311292, "grad_norm": 0.7300912141799927, "learning_rate": 5.720209553559125e-07, "loss": 0.0554, "step": 41499 }, { "epoch": 0.9144645149206454, "grad_norm": 0.7298390865325928, "learning_rate": 5.717281753691294e-07, "loss": 0.0622, "step": 41500 }, { "epoch": 0.9144865502101616, "grad_norm": 0.427974134683609, "learning_rate": 5.714354688732699e-07, "loss": 0.0375, "step": 41501 }, { "epoch": 0.9145085854996777, "grad_norm": 0.785599946975708, "learning_rate": 5.711428358698256e-07, "loss": 0.0534, "step": 41502 }, { "epoch": 0.9145306207891939, "grad_norm": 0.3769310712814331, "learning_rate": 5.708502763602907e-07, "loss": 0.0401, "step": 41503 }, { "epoch": 0.9145526560787101, "grad_norm": 0.6146177649497986, "learning_rate": 5.705577903461523e-07, "loss": 0.0481, "step": 41504 }, { "epoch": 0.9145746913682262, "grad_norm": 0.7020168900489807, "learning_rate": 5.702653778288991e-07, "loss": 0.0604, "step": 41505 }, { "epoch": 0.9145967266577424, "grad_norm": 0.7645794153213501, "learning_rate": 5.699730388100233e-07, "loss": 0.0652, "step": 41506 }, { "epoch": 0.9146187619472586, "grad_norm": 0.6211815476417542, "learning_rate": 5.696807732910137e-07, "loss": 0.0603, "step": 41507 }, { "epoch": 0.9146407972367747, "grad_norm": 1.0015275478363037, "learning_rate": 5.693885812733573e-07, "loss": 0.048, "step": 41508 }, { "epoch": 0.9146628325262909, "grad_norm": 0.7623594999313354, "learning_rate": 5.690964627585416e-07, "loss": 0.0741, "step": 41509 }, { "epoch": 0.914684867815807, "grad_norm": 0.4031670093536377, "learning_rate": 5.688044177480584e-07, "loss": 0.0488, "step": 41510 }, { "epoch": 0.9147069031053232, "grad_norm": 0.3931124210357666, "learning_rate": 5.685124462433916e-07, "loss": 0.0358, "step": 41511 }, { "epoch": 0.9147289383948394, "grad_norm": 0.5559506416320801, "learning_rate": 5.682205482460317e-07, "loss": 0.0451, "step": 41512 }, { "epoch": 0.9147509736843555, "grad_norm": 0.6317620873451233, "learning_rate": 5.679287237574593e-07, "loss": 0.0355, "step": 41513 }, { "epoch": 0.9147730089738717, "grad_norm": 0.45043128728866577, "learning_rate": 5.676369727791697e-07, "loss": 0.0738, "step": 41514 }, { "epoch": 0.9147950442633879, "grad_norm": 0.3907306492328644, "learning_rate": 5.673452953126418e-07, "loss": 0.0341, "step": 41515 }, { "epoch": 0.9148170795529039, "grad_norm": 0.8364898562431335, "learning_rate": 5.670536913593644e-07, "loss": 0.0702, "step": 41516 }, { "epoch": 0.9148391148424201, "grad_norm": 0.7049005627632141, "learning_rate": 5.667621609208212e-07, "loss": 0.0793, "step": 41517 }, { "epoch": 0.9148611501319363, "grad_norm": 0.5985690355300903, "learning_rate": 5.664707039984995e-07, "loss": 0.0533, "step": 41518 }, { "epoch": 0.9148831854214524, "grad_norm": 0.45663073658943176, "learning_rate": 5.661793205938814e-07, "loss": 0.0618, "step": 41519 }, { "epoch": 0.9149052207109686, "grad_norm": 0.37640517950057983, "learning_rate": 5.658880107084524e-07, "loss": 0.0626, "step": 41520 }, { "epoch": 0.9149272560004847, "grad_norm": 0.31106284260749817, "learning_rate": 5.655967743436946e-07, "loss": 0.0443, "step": 41521 }, { "epoch": 0.9149492912900009, "grad_norm": 0.42823365330696106, "learning_rate": 5.653056115010935e-07, "loss": 0.0582, "step": 41522 }, { "epoch": 0.9149713265795171, "grad_norm": 0.4209776222705841, "learning_rate": 5.650145221821329e-07, "loss": 0.0541, "step": 41523 }, { "epoch": 0.9149933618690332, "grad_norm": 0.4314127266407013, "learning_rate": 5.647235063882932e-07, "loss": 0.0528, "step": 41524 }, { "epoch": 0.9150153971585494, "grad_norm": 0.526002049446106, "learning_rate": 5.644325641210568e-07, "loss": 0.054, "step": 41525 }, { "epoch": 0.9150374324480656, "grad_norm": 0.2737414836883545, "learning_rate": 5.641416953819073e-07, "loss": 0.0493, "step": 41526 }, { "epoch": 0.9150594677375817, "grad_norm": 0.42047154903411865, "learning_rate": 5.638509001723252e-07, "loss": 0.0526, "step": 41527 }, { "epoch": 0.9150815030270979, "grad_norm": 0.5361923575401306, "learning_rate": 5.635601784937894e-07, "loss": 0.0601, "step": 41528 }, { "epoch": 0.9151035383166141, "grad_norm": 0.48249736428260803, "learning_rate": 5.632695303477852e-07, "loss": 0.0526, "step": 41529 }, { "epoch": 0.9151255736061302, "grad_norm": 0.4931281507015228, "learning_rate": 5.629789557357917e-07, "loss": 0.0734, "step": 41530 }, { "epoch": 0.9151476088956464, "grad_norm": 1.0554108619689941, "learning_rate": 5.626884546592858e-07, "loss": 0.0776, "step": 41531 }, { "epoch": 0.9151696441851626, "grad_norm": 0.8246681690216064, "learning_rate": 5.623980271197499e-07, "loss": 0.0624, "step": 41532 }, { "epoch": 0.9151916794746787, "grad_norm": 0.7461547255516052, "learning_rate": 5.621076731186658e-07, "loss": 0.0701, "step": 41533 }, { "epoch": 0.9152137147641949, "grad_norm": 0.4841082990169525, "learning_rate": 5.61817392657506e-07, "loss": 0.0484, "step": 41534 }, { "epoch": 0.915235750053711, "grad_norm": 0.9548581838607788, "learning_rate": 5.615271857377557e-07, "loss": 0.0485, "step": 41535 }, { "epoch": 0.9152577853432272, "grad_norm": 0.47975483536720276, "learning_rate": 5.612370523608857e-07, "loss": 0.0303, "step": 41536 }, { "epoch": 0.9152798206327434, "grad_norm": 0.5662665963172913, "learning_rate": 5.609469925283828e-07, "loss": 0.0657, "step": 41537 }, { "epoch": 0.9153018559222595, "grad_norm": 0.5066026449203491, "learning_rate": 5.60657006241716e-07, "loss": 0.0484, "step": 41538 }, { "epoch": 0.9153238912117757, "grad_norm": 0.47569239139556885, "learning_rate": 5.603670935023691e-07, "loss": 0.0459, "step": 41539 }, { "epoch": 0.9153459265012919, "grad_norm": 0.520775318145752, "learning_rate": 5.600772543118143e-07, "loss": 0.0548, "step": 41540 }, { "epoch": 0.9153679617908079, "grad_norm": 0.35952043533325195, "learning_rate": 5.597874886715287e-07, "loss": 0.0336, "step": 41541 }, { "epoch": 0.9153899970803241, "grad_norm": 0.5858431458473206, "learning_rate": 5.594977965829895e-07, "loss": 0.0552, "step": 41542 }, { "epoch": 0.9154120323698403, "grad_norm": 0.5032472610473633, "learning_rate": 5.592081780476721e-07, "loss": 0.0604, "step": 41543 }, { "epoch": 0.9154340676593564, "grad_norm": 0.5325306057929993, "learning_rate": 5.589186330670504e-07, "loss": 0.0567, "step": 41544 }, { "epoch": 0.9154561029488726, "grad_norm": 0.840566873550415, "learning_rate": 5.586291616425998e-07, "loss": 0.073, "step": 41545 }, { "epoch": 0.9154781382383888, "grad_norm": 0.27704286575317383, "learning_rate": 5.583397637757958e-07, "loss": 0.0531, "step": 41546 }, { "epoch": 0.9155001735279049, "grad_norm": 0.8616597056388855, "learning_rate": 5.580504394681107e-07, "loss": 0.0473, "step": 41547 }, { "epoch": 0.9155222088174211, "grad_norm": 0.5319915413856506, "learning_rate": 5.577611887210199e-07, "loss": 0.078, "step": 41548 }, { "epoch": 0.9155442441069372, "grad_norm": 0.4616481363773346, "learning_rate": 5.574720115359971e-07, "loss": 0.0482, "step": 41549 }, { "epoch": 0.9155662793964534, "grad_norm": 0.5872772932052612, "learning_rate": 5.571829079145113e-07, "loss": 0.0662, "step": 41550 }, { "epoch": 0.9155883146859696, "grad_norm": 0.7379633784294128, "learning_rate": 5.56893877858038e-07, "loss": 0.106, "step": 41551 }, { "epoch": 0.9156103499754857, "grad_norm": 0.951457679271698, "learning_rate": 5.566049213680507e-07, "loss": 0.0816, "step": 41552 }, { "epoch": 0.9156323852650019, "grad_norm": 0.6780899167060852, "learning_rate": 5.563160384460203e-07, "loss": 0.0766, "step": 41553 }, { "epoch": 0.9156544205545181, "grad_norm": 0.6498970985412598, "learning_rate": 5.560272290934154e-07, "loss": 0.076, "step": 41554 }, { "epoch": 0.9156764558440342, "grad_norm": 0.3495299816131592, "learning_rate": 5.557384933117115e-07, "loss": 0.0337, "step": 41555 }, { "epoch": 0.9156984911335504, "grad_norm": 0.5429393649101257, "learning_rate": 5.554498311023759e-07, "loss": 0.0523, "step": 41556 }, { "epoch": 0.9157205264230666, "grad_norm": 0.6741225719451904, "learning_rate": 5.551612424668807e-07, "loss": 0.0742, "step": 41557 }, { "epoch": 0.9157425617125827, "grad_norm": 0.5845943093299866, "learning_rate": 5.548727274066961e-07, "loss": 0.0443, "step": 41558 }, { "epoch": 0.9157645970020989, "grad_norm": 1.0066782236099243, "learning_rate": 5.545842859232864e-07, "loss": 0.0761, "step": 41559 }, { "epoch": 0.915786632291615, "grad_norm": 0.16738800704479218, "learning_rate": 5.5429591801813e-07, "loss": 0.0423, "step": 41560 }, { "epoch": 0.9158086675811312, "grad_norm": 0.5645983815193176, "learning_rate": 5.540076236926894e-07, "loss": 0.0637, "step": 41561 }, { "epoch": 0.9158307028706474, "grad_norm": 0.5149860382080078, "learning_rate": 5.537194029484349e-07, "loss": 0.0697, "step": 41562 }, { "epoch": 0.9158527381601635, "grad_norm": 0.40826737880706787, "learning_rate": 5.534312557868337e-07, "loss": 0.0871, "step": 41563 }, { "epoch": 0.9158747734496797, "grad_norm": 0.7042061686515808, "learning_rate": 5.53143182209353e-07, "loss": 0.053, "step": 41564 }, { "epoch": 0.9158968087391958, "grad_norm": 0.5123149156570435, "learning_rate": 5.528551822174632e-07, "loss": 0.0431, "step": 41565 }, { "epoch": 0.9159188440287119, "grad_norm": 0.9719827771186829, "learning_rate": 5.525672558126249e-07, "loss": 0.066, "step": 41566 }, { "epoch": 0.9159408793182281, "grad_norm": 0.4179104268550873, "learning_rate": 5.522794029963135e-07, "loss": 0.0498, "step": 41567 }, { "epoch": 0.9159629146077443, "grad_norm": 0.3694973886013031, "learning_rate": 5.519916237699895e-07, "loss": 0.0523, "step": 41568 }, { "epoch": 0.9159849498972604, "grad_norm": 0.46072423458099365, "learning_rate": 5.517039181351203e-07, "loss": 0.039, "step": 41569 }, { "epoch": 0.9160069851867766, "grad_norm": 0.5346167087554932, "learning_rate": 5.514162860931676e-07, "loss": 0.0667, "step": 41570 }, { "epoch": 0.9160290204762928, "grad_norm": 0.7141921520233154, "learning_rate": 5.511287276456023e-07, "loss": 0.0704, "step": 41571 }, { "epoch": 0.9160510557658089, "grad_norm": 0.5721578001976013, "learning_rate": 5.508412427938864e-07, "loss": 0.0379, "step": 41572 }, { "epoch": 0.9160730910553251, "grad_norm": 0.6447412371635437, "learning_rate": 5.505538315394853e-07, "loss": 0.0595, "step": 41573 }, { "epoch": 0.9160951263448412, "grad_norm": 0.5256136655807495, "learning_rate": 5.502664938838598e-07, "loss": 0.0362, "step": 41574 }, { "epoch": 0.9161171616343574, "grad_norm": 0.7870875597000122, "learning_rate": 5.499792298284767e-07, "loss": 0.0669, "step": 41575 }, { "epoch": 0.9161391969238736, "grad_norm": 0.4657837450504303, "learning_rate": 5.496920393747983e-07, "loss": 0.0466, "step": 41576 }, { "epoch": 0.9161612322133897, "grad_norm": 0.40082547068595886, "learning_rate": 5.49404922524287e-07, "loss": 0.0354, "step": 41577 }, { "epoch": 0.9161832675029059, "grad_norm": 0.577272891998291, "learning_rate": 5.491178792784046e-07, "loss": 0.0737, "step": 41578 }, { "epoch": 0.9162053027924221, "grad_norm": 0.6416456699371338, "learning_rate": 5.488309096386134e-07, "loss": 0.037, "step": 41579 }, { "epoch": 0.9162273380819382, "grad_norm": 0.8246563076972961, "learning_rate": 5.48544013606379e-07, "loss": 0.0738, "step": 41580 }, { "epoch": 0.9162493733714544, "grad_norm": 0.7322677373886108, "learning_rate": 5.482571911831568e-07, "loss": 0.0737, "step": 41581 }, { "epoch": 0.9162714086609706, "grad_norm": 0.49987706542015076, "learning_rate": 5.47970442370409e-07, "loss": 0.0556, "step": 41582 }, { "epoch": 0.9162934439504867, "grad_norm": 0.7684990167617798, "learning_rate": 5.476837671696011e-07, "loss": 0.0764, "step": 41583 }, { "epoch": 0.9163154792400029, "grad_norm": 0.9445611238479614, "learning_rate": 5.473971655821885e-07, "loss": 0.0542, "step": 41584 }, { "epoch": 0.9163375145295191, "grad_norm": 0.2357259839773178, "learning_rate": 5.471106376096319e-07, "loss": 0.0343, "step": 41585 }, { "epoch": 0.9163595498190352, "grad_norm": 0.24581414461135864, "learning_rate": 5.468241832533899e-07, "loss": 0.0575, "step": 41586 }, { "epoch": 0.9163815851085514, "grad_norm": 0.5631428360939026, "learning_rate": 5.465378025149248e-07, "loss": 0.0822, "step": 41587 }, { "epoch": 0.9164036203980676, "grad_norm": 0.7970180511474609, "learning_rate": 5.462514953956921e-07, "loss": 0.0517, "step": 41588 }, { "epoch": 0.9164256556875837, "grad_norm": 0.267956405878067, "learning_rate": 5.459652618971505e-07, "loss": 0.0501, "step": 41589 }, { "epoch": 0.9164476909770998, "grad_norm": 0.7351487874984741, "learning_rate": 5.456791020207608e-07, "loss": 0.0876, "step": 41590 }, { "epoch": 0.9164697262666159, "grad_norm": 0.6677447557449341, "learning_rate": 5.453930157679765e-07, "loss": 0.0461, "step": 41591 }, { "epoch": 0.9164917615561321, "grad_norm": 0.3022845983505249, "learning_rate": 5.451070031402582e-07, "loss": 0.0579, "step": 41592 }, { "epoch": 0.9165137968456483, "grad_norm": 0.6576614379882812, "learning_rate": 5.4482106413906e-07, "loss": 0.059, "step": 41593 }, { "epoch": 0.9165358321351644, "grad_norm": 0.6154637932777405, "learning_rate": 5.445351987658403e-07, "loss": 0.057, "step": 41594 }, { "epoch": 0.9165578674246806, "grad_norm": 0.8432979583740234, "learning_rate": 5.442494070220549e-07, "loss": 0.0728, "step": 41595 }, { "epoch": 0.9165799027141968, "grad_norm": 0.6283755302429199, "learning_rate": 5.439636889091593e-07, "loss": 0.0507, "step": 41596 }, { "epoch": 0.9166019380037129, "grad_norm": 0.4007701575756073, "learning_rate": 5.436780444286071e-07, "loss": 0.0424, "step": 41597 }, { "epoch": 0.9166239732932291, "grad_norm": 0.6563659310340881, "learning_rate": 5.433924735818541e-07, "loss": 0.0559, "step": 41598 }, { "epoch": 0.9166460085827453, "grad_norm": 0.8624966144561768, "learning_rate": 5.431069763703588e-07, "loss": 0.0805, "step": 41599 }, { "epoch": 0.9166680438722614, "grad_norm": 0.5077862739562988, "learning_rate": 5.428215527955688e-07, "loss": 0.0451, "step": 41600 }, { "epoch": 0.9166900791617776, "grad_norm": 0.5799659490585327, "learning_rate": 5.425362028589426e-07, "loss": 0.0361, "step": 41601 }, { "epoch": 0.9167121144512937, "grad_norm": 0.3523062467575073, "learning_rate": 5.422509265619307e-07, "loss": 0.0269, "step": 41602 }, { "epoch": 0.9167341497408099, "grad_norm": 0.4205075800418854, "learning_rate": 5.419657239059905e-07, "loss": 0.0699, "step": 41603 }, { "epoch": 0.9167561850303261, "grad_norm": 0.18276375532150269, "learning_rate": 5.416805948925708e-07, "loss": 0.05, "step": 41604 }, { "epoch": 0.9167782203198422, "grad_norm": 0.5681104063987732, "learning_rate": 5.413955395231235e-07, "loss": 0.0483, "step": 41605 }, { "epoch": 0.9168002556093584, "grad_norm": 0.9714841842651367, "learning_rate": 5.411105577991044e-07, "loss": 0.061, "step": 41606 }, { "epoch": 0.9168222908988746, "grad_norm": 0.4582805633544922, "learning_rate": 5.408256497219621e-07, "loss": 0.0636, "step": 41607 }, { "epoch": 0.9168443261883907, "grad_norm": 0.8232606053352356, "learning_rate": 5.40540815293149e-07, "loss": 0.0764, "step": 41608 }, { "epoch": 0.9168663614779069, "grad_norm": 0.7011126279830933, "learning_rate": 5.402560545141105e-07, "loss": 0.0645, "step": 41609 }, { "epoch": 0.9168883967674231, "grad_norm": 0.6955760717391968, "learning_rate": 5.399713673863072e-07, "loss": 0.0519, "step": 41610 }, { "epoch": 0.9169104320569392, "grad_norm": 0.39495357871055603, "learning_rate": 5.396867539111811e-07, "loss": 0.0406, "step": 41611 }, { "epoch": 0.9169324673464554, "grad_norm": 0.6561723351478577, "learning_rate": 5.394022140901861e-07, "loss": 0.092, "step": 41612 }, { "epoch": 0.9169545026359716, "grad_norm": 0.5680203437805176, "learning_rate": 5.391177479247695e-07, "loss": 0.044, "step": 41613 }, { "epoch": 0.9169765379254877, "grad_norm": 0.7501038312911987, "learning_rate": 5.388333554163799e-07, "loss": 0.0396, "step": 41614 }, { "epoch": 0.9169985732150038, "grad_norm": 0.4016716778278351, "learning_rate": 5.385490365664697e-07, "loss": 0.0267, "step": 41615 }, { "epoch": 0.9170206085045199, "grad_norm": 0.5714150667190552, "learning_rate": 5.382647913764793e-07, "loss": 0.0613, "step": 41616 }, { "epoch": 0.9170426437940361, "grad_norm": 0.8182896971702576, "learning_rate": 5.37980619847866e-07, "loss": 0.0473, "step": 41617 }, { "epoch": 0.9170646790835523, "grad_norm": 0.44307249784469604, "learning_rate": 5.376965219820685e-07, "loss": 0.073, "step": 41618 }, { "epoch": 0.9170867143730684, "grad_norm": 0.3706280589103699, "learning_rate": 5.374124977805423e-07, "loss": 0.048, "step": 41619 }, { "epoch": 0.9171087496625846, "grad_norm": 0.3345682919025421, "learning_rate": 5.371285472447263e-07, "loss": 0.0396, "step": 41620 }, { "epoch": 0.9171307849521008, "grad_norm": 0.42942512035369873, "learning_rate": 5.36844670376071e-07, "loss": 0.0397, "step": 41621 }, { "epoch": 0.9171528202416169, "grad_norm": 0.49245408177375793, "learning_rate": 5.365608671760219e-07, "loss": 0.0288, "step": 41622 }, { "epoch": 0.9171748555311331, "grad_norm": 0.2874037027359009, "learning_rate": 5.362771376460246e-07, "loss": 0.0452, "step": 41623 }, { "epoch": 0.9171968908206493, "grad_norm": 0.70591801404953, "learning_rate": 5.359934817875228e-07, "loss": 0.0728, "step": 41624 }, { "epoch": 0.9172189261101654, "grad_norm": 0.4607701599597931, "learning_rate": 5.357098996019621e-07, "loss": 0.0713, "step": 41625 }, { "epoch": 0.9172409613996816, "grad_norm": 0.4756571054458618, "learning_rate": 5.354263910907898e-07, "loss": 0.0652, "step": 41626 }, { "epoch": 0.9172629966891978, "grad_norm": 0.8536120057106018, "learning_rate": 5.351429562554444e-07, "loss": 0.0972, "step": 41627 }, { "epoch": 0.9172850319787139, "grad_norm": 0.5717814564704895, "learning_rate": 5.348595950973733e-07, "loss": 0.0427, "step": 41628 }, { "epoch": 0.9173070672682301, "grad_norm": 0.4892677366733551, "learning_rate": 5.345763076180221e-07, "loss": 0.0464, "step": 41629 }, { "epoch": 0.9173291025577462, "grad_norm": 0.47729259729385376, "learning_rate": 5.342930938188279e-07, "loss": 0.0467, "step": 41630 }, { "epoch": 0.9173511378472624, "grad_norm": 0.43243923783302307, "learning_rate": 5.340099537012377e-07, "loss": 0.0359, "step": 41631 }, { "epoch": 0.9173731731367786, "grad_norm": 0.6225056052207947, "learning_rate": 5.337268872666889e-07, "loss": 0.0804, "step": 41632 }, { "epoch": 0.9173952084262947, "grad_norm": 0.7004187107086182, "learning_rate": 5.334438945166287e-07, "loss": 0.0788, "step": 41633 }, { "epoch": 0.9174172437158109, "grad_norm": 0.8493190407752991, "learning_rate": 5.331609754524957e-07, "loss": 0.0482, "step": 41634 }, { "epoch": 0.9174392790053271, "grad_norm": 0.6949160695075989, "learning_rate": 5.328781300757324e-07, "loss": 0.0563, "step": 41635 }, { "epoch": 0.9174613142948432, "grad_norm": 0.39150574803352356, "learning_rate": 5.325953583877791e-07, "loss": 0.0526, "step": 41636 }, { "epoch": 0.9174833495843594, "grad_norm": 0.19249895215034485, "learning_rate": 5.323126603900746e-07, "loss": 0.074, "step": 41637 }, { "epoch": 0.9175053848738756, "grad_norm": 0.6124556064605713, "learning_rate": 5.320300360840613e-07, "loss": 0.0875, "step": 41638 }, { "epoch": 0.9175274201633917, "grad_norm": 0.4714086949825287, "learning_rate": 5.317474854711746e-07, "loss": 0.0442, "step": 41639 }, { "epoch": 0.9175494554529078, "grad_norm": 0.821851372718811, "learning_rate": 5.314650085528583e-07, "loss": 0.0564, "step": 41640 }, { "epoch": 0.917571490742424, "grad_norm": 0.5492498874664307, "learning_rate": 5.311826053305496e-07, "loss": 0.0646, "step": 41641 }, { "epoch": 0.9175935260319401, "grad_norm": 0.8067814111709595, "learning_rate": 5.309002758056875e-07, "loss": 0.0571, "step": 41642 }, { "epoch": 0.9176155613214563, "grad_norm": 0.2184799313545227, "learning_rate": 5.306180199797073e-07, "loss": 0.0464, "step": 41643 }, { "epoch": 0.9176375966109724, "grad_norm": 0.5139013528823853, "learning_rate": 5.30335837854048e-07, "loss": 0.0565, "step": 41644 }, { "epoch": 0.9176596319004886, "grad_norm": 0.7273232340812683, "learning_rate": 5.300537294301516e-07, "loss": 0.0479, "step": 41645 }, { "epoch": 0.9176816671900048, "grad_norm": 0.5626949071884155, "learning_rate": 5.297716947094472e-07, "loss": 0.0749, "step": 41646 }, { "epoch": 0.9177037024795209, "grad_norm": 0.7898055911064148, "learning_rate": 5.294897336933751e-07, "loss": 0.0723, "step": 41647 }, { "epoch": 0.9177257377690371, "grad_norm": 0.28310781717300415, "learning_rate": 5.292078463833728e-07, "loss": 0.0452, "step": 41648 }, { "epoch": 0.9177477730585533, "grad_norm": 1.0094965696334839, "learning_rate": 5.289260327808754e-07, "loss": 0.0424, "step": 41649 }, { "epoch": 0.9177698083480694, "grad_norm": 0.6519292593002319, "learning_rate": 5.286442928873169e-07, "loss": 0.0643, "step": 41650 }, { "epoch": 0.9177918436375856, "grad_norm": 0.819097101688385, "learning_rate": 5.283626267041313e-07, "loss": 0.0882, "step": 41651 }, { "epoch": 0.9178138789271018, "grad_norm": 0.5675016641616821, "learning_rate": 5.280810342327591e-07, "loss": 0.0574, "step": 41652 }, { "epoch": 0.9178359142166179, "grad_norm": 0.5058205723762512, "learning_rate": 5.277995154746274e-07, "loss": 0.0746, "step": 41653 }, { "epoch": 0.9178579495061341, "grad_norm": 0.47308996319770813, "learning_rate": 5.275180704311767e-07, "loss": 0.0479, "step": 41654 }, { "epoch": 0.9178799847956503, "grad_norm": 0.7132311463356018, "learning_rate": 5.272366991038324e-07, "loss": 0.064, "step": 41655 }, { "epoch": 0.9179020200851664, "grad_norm": 0.5569157600402832, "learning_rate": 5.269554014940353e-07, "loss": 0.0588, "step": 41656 }, { "epoch": 0.9179240553746826, "grad_norm": 0.6911221742630005, "learning_rate": 5.266741776032158e-07, "loss": 0.0352, "step": 41657 }, { "epoch": 0.9179460906641987, "grad_norm": 0.624992847442627, "learning_rate": 5.263930274328044e-07, "loss": 0.0393, "step": 41658 }, { "epoch": 0.9179681259537149, "grad_norm": 0.4943951964378357, "learning_rate": 5.261119509842366e-07, "loss": 0.0619, "step": 41659 }, { "epoch": 0.9179901612432311, "grad_norm": 0.8720166087150574, "learning_rate": 5.258309482589413e-07, "loss": 0.0686, "step": 41660 }, { "epoch": 0.9180121965327472, "grad_norm": 0.7461374998092651, "learning_rate": 5.255500192583523e-07, "loss": 0.061, "step": 41661 }, { "epoch": 0.9180342318222634, "grad_norm": 0.7106285095214844, "learning_rate": 5.252691639838936e-07, "loss": 0.076, "step": 41662 }, { "epoch": 0.9180562671117796, "grad_norm": 0.28828614950180054, "learning_rate": 5.249883824370055e-07, "loss": 0.0252, "step": 41663 }, { "epoch": 0.9180783024012956, "grad_norm": 0.8179900050163269, "learning_rate": 5.247076746191121e-07, "loss": 0.0769, "step": 41664 }, { "epoch": 0.9181003376908118, "grad_norm": 0.520857036113739, "learning_rate": 5.244270405316471e-07, "loss": 0.0574, "step": 41665 }, { "epoch": 0.918122372980328, "grad_norm": 0.621403157711029, "learning_rate": 5.241464801760359e-07, "loss": 0.0671, "step": 41666 }, { "epoch": 0.9181444082698441, "grad_norm": 0.693709671497345, "learning_rate": 5.238659935537077e-07, "loss": 0.054, "step": 41667 }, { "epoch": 0.9181664435593603, "grad_norm": 0.632593035697937, "learning_rate": 5.235855806660961e-07, "loss": 0.0561, "step": 41668 }, { "epoch": 0.9181884788488764, "grad_norm": 0.8211840987205505, "learning_rate": 5.233052415146233e-07, "loss": 0.0669, "step": 41669 }, { "epoch": 0.9182105141383926, "grad_norm": 0.4448718726634979, "learning_rate": 5.230249761007199e-07, "loss": 0.0621, "step": 41670 }, { "epoch": 0.9182325494279088, "grad_norm": 0.4283614754676819, "learning_rate": 5.227447844258148e-07, "loss": 0.0566, "step": 41671 }, { "epoch": 0.9182545847174249, "grad_norm": 0.5805302858352661, "learning_rate": 5.224646664913351e-07, "loss": 0.051, "step": 41672 }, { "epoch": 0.9182766200069411, "grad_norm": 0.8697357773780823, "learning_rate": 5.221846222987032e-07, "loss": 0.056, "step": 41673 }, { "epoch": 0.9182986552964573, "grad_norm": 0.4746817946434021, "learning_rate": 5.219046518493492e-07, "loss": 0.0552, "step": 41674 }, { "epoch": 0.9183206905859734, "grad_norm": 0.6178449392318726, "learning_rate": 5.216247551447007e-07, "loss": 0.048, "step": 41675 }, { "epoch": 0.9183427258754896, "grad_norm": 0.6560752391815186, "learning_rate": 5.213449321861797e-07, "loss": 0.0487, "step": 41676 }, { "epoch": 0.9183647611650058, "grad_norm": 1.1114431619644165, "learning_rate": 5.21065182975215e-07, "loss": 0.0679, "step": 41677 }, { "epoch": 0.9183867964545219, "grad_norm": 0.5877528190612793, "learning_rate": 5.207855075132256e-07, "loss": 0.0535, "step": 41678 }, { "epoch": 0.9184088317440381, "grad_norm": 0.6301151514053345, "learning_rate": 5.205059058016437e-07, "loss": 0.037, "step": 41679 }, { "epoch": 0.9184308670335543, "grad_norm": 0.5168156623840332, "learning_rate": 5.202263778418865e-07, "loss": 0.0487, "step": 41680 }, { "epoch": 0.9184529023230704, "grad_norm": 0.4300811290740967, "learning_rate": 5.199469236353827e-07, "loss": 0.0273, "step": 41681 }, { "epoch": 0.9184749376125866, "grad_norm": 0.4645267426967621, "learning_rate": 5.19667543183553e-07, "loss": 0.0426, "step": 41682 }, { "epoch": 0.9184969729021027, "grad_norm": 0.595872700214386, "learning_rate": 5.193882364878244e-07, "loss": 0.0452, "step": 41683 }, { "epoch": 0.9185190081916189, "grad_norm": 0.4749840795993805, "learning_rate": 5.191090035496144e-07, "loss": 0.0423, "step": 41684 }, { "epoch": 0.9185410434811351, "grad_norm": 0.5819030404090881, "learning_rate": 5.188298443703482e-07, "loss": 0.0693, "step": 41685 }, { "epoch": 0.9185630787706512, "grad_norm": 0.30780836939811707, "learning_rate": 5.185507589514466e-07, "loss": 0.0586, "step": 41686 }, { "epoch": 0.9185851140601674, "grad_norm": 0.6083502769470215, "learning_rate": 5.182717472943315e-07, "loss": 0.0744, "step": 41687 }, { "epoch": 0.9186071493496836, "grad_norm": 0.39311787486076355, "learning_rate": 5.179928094004255e-07, "loss": 0.054, "step": 41688 }, { "epoch": 0.9186291846391996, "grad_norm": 0.5960447788238525, "learning_rate": 5.177139452711454e-07, "loss": 0.0745, "step": 41689 }, { "epoch": 0.9186512199287158, "grad_norm": 0.35262367129325867, "learning_rate": 5.17435154907917e-07, "loss": 0.0567, "step": 41690 }, { "epoch": 0.918673255218232, "grad_norm": 0.6313897371292114, "learning_rate": 5.171564383121557e-07, "loss": 0.0592, "step": 41691 }, { "epoch": 0.9186952905077481, "grad_norm": 0.6392266750335693, "learning_rate": 5.168777954852838e-07, "loss": 0.0628, "step": 41692 }, { "epoch": 0.9187173257972643, "grad_norm": 0.3809407651424408, "learning_rate": 5.165992264287184e-07, "loss": 0.0464, "step": 41693 }, { "epoch": 0.9187393610867804, "grad_norm": 0.6504279971122742, "learning_rate": 5.1632073114388e-07, "loss": 0.0705, "step": 41694 }, { "epoch": 0.9187613963762966, "grad_norm": 0.5322034955024719, "learning_rate": 5.160423096321893e-07, "loss": 0.0415, "step": 41695 }, { "epoch": 0.9187834316658128, "grad_norm": 0.4120228886604309, "learning_rate": 5.1576396189506e-07, "loss": 0.0576, "step": 41696 }, { "epoch": 0.9188054669553289, "grad_norm": 0.7870598435401917, "learning_rate": 5.154856879339109e-07, "loss": 0.0518, "step": 41697 }, { "epoch": 0.9188275022448451, "grad_norm": 0.6694153547286987, "learning_rate": 5.152074877501628e-07, "loss": 0.0573, "step": 41698 }, { "epoch": 0.9188495375343613, "grad_norm": 0.4243057668209076, "learning_rate": 5.149293613452294e-07, "loss": 0.0394, "step": 41699 }, { "epoch": 0.9188715728238774, "grad_norm": 0.6507843732833862, "learning_rate": 5.146513087205279e-07, "loss": 0.0569, "step": 41700 }, { "epoch": 0.9188936081133936, "grad_norm": 0.6546043753623962, "learning_rate": 5.143733298774755e-07, "loss": 0.0477, "step": 41701 }, { "epoch": 0.9189156434029098, "grad_norm": 0.5403856039047241, "learning_rate": 5.140954248174878e-07, "loss": 0.0542, "step": 41702 }, { "epoch": 0.9189376786924259, "grad_norm": 0.4885750412940979, "learning_rate": 5.138175935419786e-07, "loss": 0.0642, "step": 41703 }, { "epoch": 0.9189597139819421, "grad_norm": 0.9784310460090637, "learning_rate": 5.135398360523652e-07, "loss": 0.0817, "step": 41704 }, { "epoch": 0.9189817492714583, "grad_norm": 0.35462018847465515, "learning_rate": 5.132621523500613e-07, "loss": 0.0626, "step": 41705 }, { "epoch": 0.9190037845609744, "grad_norm": 0.7573084235191345, "learning_rate": 5.129845424364826e-07, "loss": 0.0693, "step": 41706 }, { "epoch": 0.9190258198504906, "grad_norm": 0.8367031812667847, "learning_rate": 5.127070063130412e-07, "loss": 0.0746, "step": 41707 }, { "epoch": 0.9190478551400068, "grad_norm": 0.554355800151825, "learning_rate": 5.124295439811528e-07, "loss": 0.0481, "step": 41708 }, { "epoch": 0.9190698904295229, "grad_norm": 0.6698145866394043, "learning_rate": 5.12152155442231e-07, "loss": 0.0604, "step": 41709 }, { "epoch": 0.9190919257190391, "grad_norm": 0.9650831818580627, "learning_rate": 5.118748406976848e-07, "loss": 0.1073, "step": 41710 }, { "epoch": 0.9191139610085552, "grad_norm": 0.5841494798660278, "learning_rate": 5.115975997489297e-07, "loss": 0.0611, "step": 41711 }, { "epoch": 0.9191359962980714, "grad_norm": 0.6156141757965088, "learning_rate": 5.113204325973764e-07, "loss": 0.0405, "step": 41712 }, { "epoch": 0.9191580315875876, "grad_norm": 0.6982807517051697, "learning_rate": 5.110433392444386e-07, "loss": 0.0667, "step": 41713 }, { "epoch": 0.9191800668771036, "grad_norm": 0.3936799168586731, "learning_rate": 5.107663196915269e-07, "loss": 0.0313, "step": 41714 }, { "epoch": 0.9192021021666198, "grad_norm": 0.48879727721214294, "learning_rate": 5.104893739400535e-07, "loss": 0.0615, "step": 41715 }, { "epoch": 0.919224137456136, "grad_norm": 0.569148600101471, "learning_rate": 5.102125019914239e-07, "loss": 0.0698, "step": 41716 }, { "epoch": 0.9192461727456521, "grad_norm": 0.5862075686454773, "learning_rate": 5.099357038470537e-07, "loss": 0.0488, "step": 41717 }, { "epoch": 0.9192682080351683, "grad_norm": 0.5903484225273132, "learning_rate": 5.096589795083534e-07, "loss": 0.0424, "step": 41718 }, { "epoch": 0.9192902433246845, "grad_norm": 0.5519334077835083, "learning_rate": 5.093823289767285e-07, "loss": 0.0403, "step": 41719 }, { "epoch": 0.9193122786142006, "grad_norm": 0.5256152749061584, "learning_rate": 5.09105752253588e-07, "loss": 0.0665, "step": 41720 }, { "epoch": 0.9193343139037168, "grad_norm": 0.6139671802520752, "learning_rate": 5.08829249340344e-07, "loss": 0.0523, "step": 41721 }, { "epoch": 0.919356349193233, "grad_norm": 0.8480370044708252, "learning_rate": 5.085528202384054e-07, "loss": 0.0413, "step": 41722 }, { "epoch": 0.9193783844827491, "grad_norm": 0.7630393505096436, "learning_rate": 5.082764649491762e-07, "loss": 0.0665, "step": 41723 }, { "epoch": 0.9194004197722653, "grad_norm": 0.8770413994789124, "learning_rate": 5.080001834740649e-07, "loss": 0.0556, "step": 41724 }, { "epoch": 0.9194224550617814, "grad_norm": 0.5423702001571655, "learning_rate": 5.077239758144825e-07, "loss": 0.0791, "step": 41725 }, { "epoch": 0.9194444903512976, "grad_norm": 0.7062689661979675, "learning_rate": 5.074478419718326e-07, "loss": 0.0453, "step": 41726 }, { "epoch": 0.9194665256408138, "grad_norm": 0.5019423961639404, "learning_rate": 5.071717819475208e-07, "loss": 0.0508, "step": 41727 }, { "epoch": 0.9194885609303299, "grad_norm": 0.25632718205451965, "learning_rate": 5.06895795742956e-07, "loss": 0.0521, "step": 41728 }, { "epoch": 0.9195105962198461, "grad_norm": 0.4420451521873474, "learning_rate": 5.066198833595436e-07, "loss": 0.05, "step": 41729 }, { "epoch": 0.9195326315093623, "grad_norm": 0.5054109692573547, "learning_rate": 5.063440447986861e-07, "loss": 0.0669, "step": 41730 }, { "epoch": 0.9195546667988784, "grad_norm": 0.34422069787979126, "learning_rate": 5.060682800617922e-07, "loss": 0.0756, "step": 41731 }, { "epoch": 0.9195767020883946, "grad_norm": 0.4590836465358734, "learning_rate": 5.057925891502641e-07, "loss": 0.0431, "step": 41732 }, { "epoch": 0.9195987373779108, "grad_norm": 0.3542667329311371, "learning_rate": 5.055169720655073e-07, "loss": 0.0447, "step": 41733 }, { "epoch": 0.9196207726674269, "grad_norm": 0.6865270137786865, "learning_rate": 5.052414288089258e-07, "loss": 0.0451, "step": 41734 }, { "epoch": 0.9196428079569431, "grad_norm": 0.3453318178653717, "learning_rate": 5.049659593819184e-07, "loss": 0.0704, "step": 41735 }, { "epoch": 0.9196648432464593, "grad_norm": 0.24476122856140137, "learning_rate": 5.046905637858956e-07, "loss": 0.0495, "step": 41736 }, { "epoch": 0.9196868785359754, "grad_norm": 0.2435288280248642, "learning_rate": 5.044152420222564e-07, "loss": 0.0179, "step": 41737 }, { "epoch": 0.9197089138254915, "grad_norm": 0.653967559337616, "learning_rate": 5.041399940924046e-07, "loss": 0.069, "step": 41738 }, { "epoch": 0.9197309491150076, "grad_norm": 0.7424222230911255, "learning_rate": 5.038648199977392e-07, "loss": 0.0652, "step": 41739 }, { "epoch": 0.9197529844045238, "grad_norm": 0.7942312359809875, "learning_rate": 5.035897197396638e-07, "loss": 0.0696, "step": 41740 }, { "epoch": 0.91977501969404, "grad_norm": 0.765734076499939, "learning_rate": 5.033146933195809e-07, "loss": 0.0814, "step": 41741 }, { "epoch": 0.9197970549835561, "grad_norm": 0.7944257855415344, "learning_rate": 5.030397407388892e-07, "loss": 0.0603, "step": 41742 }, { "epoch": 0.9198190902730723, "grad_norm": 0.33627772331237793, "learning_rate": 5.027648619989894e-07, "loss": 0.0528, "step": 41743 }, { "epoch": 0.9198411255625885, "grad_norm": 0.6122217774391174, "learning_rate": 5.024900571012819e-07, "loss": 0.0568, "step": 41744 }, { "epoch": 0.9198631608521046, "grad_norm": 0.7173078656196594, "learning_rate": 5.02215326047169e-07, "loss": 0.0575, "step": 41745 }, { "epoch": 0.9198851961416208, "grad_norm": 0.3077422082424164, "learning_rate": 5.019406688380462e-07, "loss": 0.0408, "step": 41746 }, { "epoch": 0.919907231431137, "grad_norm": 0.835828423500061, "learning_rate": 5.016660854753141e-07, "loss": 0.073, "step": 41747 }, { "epoch": 0.9199292667206531, "grad_norm": 0.5939775705337524, "learning_rate": 5.013915759603716e-07, "loss": 0.0727, "step": 41748 }, { "epoch": 0.9199513020101693, "grad_norm": 0.814154863357544, "learning_rate": 5.011171402946157e-07, "loss": 0.079, "step": 41749 }, { "epoch": 0.9199733372996854, "grad_norm": 0.7430562973022461, "learning_rate": 5.008427784794456e-07, "loss": 0.0736, "step": 41750 }, { "epoch": 0.9199953725892016, "grad_norm": 0.6075496077537537, "learning_rate": 5.005684905162583e-07, "loss": 0.0521, "step": 41751 }, { "epoch": 0.9200174078787178, "grad_norm": 0.7534224987030029, "learning_rate": 5.002942764064528e-07, "loss": 0.0716, "step": 41752 }, { "epoch": 0.9200394431682339, "grad_norm": 1.3229655027389526, "learning_rate": 5.000201361514212e-07, "loss": 0.0724, "step": 41753 }, { "epoch": 0.9200614784577501, "grad_norm": 0.3800828158855438, "learning_rate": 4.997460697525624e-07, "loss": 0.0502, "step": 41754 }, { "epoch": 0.9200835137472663, "grad_norm": 0.6813405156135559, "learning_rate": 4.994720772112754e-07, "loss": 0.0548, "step": 41755 }, { "epoch": 0.9201055490367824, "grad_norm": 0.6685683727264404, "learning_rate": 4.991981585289507e-07, "loss": 0.0729, "step": 41756 }, { "epoch": 0.9201275843262986, "grad_norm": 0.40135398507118225, "learning_rate": 4.989243137069871e-07, "loss": 0.0645, "step": 41757 }, { "epoch": 0.9201496196158148, "grad_norm": 0.6769269704818726, "learning_rate": 4.986505427467752e-07, "loss": 0.0799, "step": 41758 }, { "epoch": 0.9201716549053309, "grad_norm": 0.5057958364486694, "learning_rate": 4.983768456497156e-07, "loss": 0.051, "step": 41759 }, { "epoch": 0.9201936901948471, "grad_norm": 0.8178426027297974, "learning_rate": 4.981032224171972e-07, "loss": 0.0575, "step": 41760 }, { "epoch": 0.9202157254843633, "grad_norm": 0.5924693942070007, "learning_rate": 4.978296730506171e-07, "loss": 0.0643, "step": 41761 }, { "epoch": 0.9202377607738794, "grad_norm": 0.48612770438194275, "learning_rate": 4.97556197551366e-07, "loss": 0.0485, "step": 41762 }, { "epoch": 0.9202597960633955, "grad_norm": 0.5394911170005798, "learning_rate": 4.972827959208376e-07, "loss": 0.038, "step": 41763 }, { "epoch": 0.9202818313529116, "grad_norm": 0.7604281902313232, "learning_rate": 4.970094681604259e-07, "loss": 0.0663, "step": 41764 }, { "epoch": 0.9203038666424278, "grad_norm": 0.2841455042362213, "learning_rate": 4.967362142715215e-07, "loss": 0.0692, "step": 41765 }, { "epoch": 0.920325901931944, "grad_norm": 0.560387372970581, "learning_rate": 4.964630342555166e-07, "loss": 0.0416, "step": 41766 }, { "epoch": 0.9203479372214601, "grad_norm": 0.6698645353317261, "learning_rate": 4.961899281138032e-07, "loss": 0.0479, "step": 41767 }, { "epoch": 0.9203699725109763, "grad_norm": 0.7829599380493164, "learning_rate": 4.959168958477739e-07, "loss": 0.0528, "step": 41768 }, { "epoch": 0.9203920078004925, "grad_norm": 0.6971178650856018, "learning_rate": 4.95643937458814e-07, "loss": 0.0394, "step": 41769 }, { "epoch": 0.9204140430900086, "grad_norm": 0.516590416431427, "learning_rate": 4.953710529483191e-07, "loss": 0.052, "step": 41770 }, { "epoch": 0.9204360783795248, "grad_norm": 0.5467920303344727, "learning_rate": 4.950982423176797e-07, "loss": 0.1034, "step": 41771 }, { "epoch": 0.920458113669041, "grad_norm": 0.4403109550476074, "learning_rate": 4.948255055682799e-07, "loss": 0.0455, "step": 41772 }, { "epoch": 0.9204801489585571, "grad_norm": 0.8342901468276978, "learning_rate": 4.945528427015134e-07, "loss": 0.0597, "step": 41773 }, { "epoch": 0.9205021842480733, "grad_norm": 0.895348072052002, "learning_rate": 4.942802537187657e-07, "loss": 0.0639, "step": 41774 }, { "epoch": 0.9205242195375894, "grad_norm": 0.5744693875312805, "learning_rate": 4.940077386214309e-07, "loss": 0.0791, "step": 41775 }, { "epoch": 0.9205462548271056, "grad_norm": 0.4885644316673279, "learning_rate": 4.937352974108911e-07, "loss": 0.0775, "step": 41776 }, { "epoch": 0.9205682901166218, "grad_norm": 0.4721863567829132, "learning_rate": 4.934629300885369e-07, "loss": 0.0551, "step": 41777 }, { "epoch": 0.9205903254061379, "grad_norm": 0.5587785840034485, "learning_rate": 4.931906366557553e-07, "loss": 0.0646, "step": 41778 }, { "epoch": 0.9206123606956541, "grad_norm": 0.6237377524375916, "learning_rate": 4.929184171139323e-07, "loss": 0.0458, "step": 41779 }, { "epoch": 0.9206343959851703, "grad_norm": 0.847256600856781, "learning_rate": 4.926462714644564e-07, "loss": 0.0728, "step": 41780 }, { "epoch": 0.9206564312746864, "grad_norm": 0.6144159436225891, "learning_rate": 4.923741997087084e-07, "loss": 0.0806, "step": 41781 }, { "epoch": 0.9206784665642026, "grad_norm": 0.8271404504776001, "learning_rate": 4.921022018480836e-07, "loss": 0.0572, "step": 41782 }, { "epoch": 0.9207005018537188, "grad_norm": 0.6161588430404663, "learning_rate": 4.918302778839579e-07, "loss": 0.075, "step": 41783 }, { "epoch": 0.9207225371432349, "grad_norm": 0.4155879616737366, "learning_rate": 4.915584278177248e-07, "loss": 0.0808, "step": 41784 }, { "epoch": 0.9207445724327511, "grad_norm": 0.7778478264808655, "learning_rate": 4.912866516507586e-07, "loss": 0.0521, "step": 41785 }, { "epoch": 0.9207666077222673, "grad_norm": 0.5740969181060791, "learning_rate": 4.910149493844546e-07, "loss": 0.0415, "step": 41786 }, { "epoch": 0.9207886430117834, "grad_norm": 0.20281417667865753, "learning_rate": 4.907433210201917e-07, "loss": 0.034, "step": 41787 }, { "epoch": 0.9208106783012995, "grad_norm": 0.49166107177734375, "learning_rate": 4.904717665593523e-07, "loss": 0.0497, "step": 41788 }, { "epoch": 0.9208327135908156, "grad_norm": 0.6900886297225952, "learning_rate": 4.9020028600332e-07, "loss": 0.0571, "step": 41789 }, { "epoch": 0.9208547488803318, "grad_norm": 0.4526091516017914, "learning_rate": 4.899288793534789e-07, "loss": 0.0366, "step": 41790 }, { "epoch": 0.920876784169848, "grad_norm": 0.8533313870429993, "learning_rate": 4.896575466112129e-07, "loss": 0.0469, "step": 41791 }, { "epoch": 0.9208988194593641, "grad_norm": 0.9347274303436279, "learning_rate": 4.893862877778976e-07, "loss": 0.0705, "step": 41792 }, { "epoch": 0.9209208547488803, "grad_norm": 0.8711991906166077, "learning_rate": 4.891151028549234e-07, "loss": 0.0672, "step": 41793 }, { "epoch": 0.9209428900383965, "grad_norm": 0.5333174467086792, "learning_rate": 4.88843991843666e-07, "loss": 0.0416, "step": 41794 }, { "epoch": 0.9209649253279126, "grad_norm": 0.35047000646591187, "learning_rate": 4.885729547455076e-07, "loss": 0.0439, "step": 41795 }, { "epoch": 0.9209869606174288, "grad_norm": 0.7709947228431702, "learning_rate": 4.883019915618287e-07, "loss": 0.0674, "step": 41796 }, { "epoch": 0.921008995906945, "grad_norm": 0.6231696605682373, "learning_rate": 4.880311022940082e-07, "loss": 0.0849, "step": 41797 }, { "epoch": 0.9210310311964611, "grad_norm": 0.504177987575531, "learning_rate": 4.8776028694343e-07, "loss": 0.0742, "step": 41798 }, { "epoch": 0.9210530664859773, "grad_norm": 0.5683296322822571, "learning_rate": 4.874895455114697e-07, "loss": 0.0652, "step": 41799 }, { "epoch": 0.9210751017754935, "grad_norm": 0.8182330131530762, "learning_rate": 4.872188779995062e-07, "loss": 0.0751, "step": 41800 }, { "epoch": 0.9210971370650096, "grad_norm": 0.46358296275138855, "learning_rate": 4.869482844089202e-07, "loss": 0.074, "step": 41801 }, { "epoch": 0.9211191723545258, "grad_norm": 0.4723820388317108, "learning_rate": 4.866777647410903e-07, "loss": 0.0562, "step": 41802 }, { "epoch": 0.921141207644042, "grad_norm": 0.6230228543281555, "learning_rate": 4.864073189973906e-07, "loss": 0.0532, "step": 41803 }, { "epoch": 0.9211632429335581, "grad_norm": 0.5760181546211243, "learning_rate": 4.861369471792015e-07, "loss": 0.0485, "step": 41804 }, { "epoch": 0.9211852782230743, "grad_norm": 0.4107123613357544, "learning_rate": 4.858666492879022e-07, "loss": 0.0514, "step": 41805 }, { "epoch": 0.9212073135125904, "grad_norm": 0.4454192519187927, "learning_rate": 4.855964253248663e-07, "loss": 0.0478, "step": 41806 }, { "epoch": 0.9212293488021066, "grad_norm": 0.5485970377922058, "learning_rate": 4.853262752914711e-07, "loss": 0.0574, "step": 41807 }, { "epoch": 0.9212513840916228, "grad_norm": 0.22151033580303192, "learning_rate": 4.850561991890906e-07, "loss": 0.0492, "step": 41808 }, { "epoch": 0.9212734193811389, "grad_norm": 0.8314432501792908, "learning_rate": 4.847861970191053e-07, "loss": 0.0822, "step": 41809 }, { "epoch": 0.9212954546706551, "grad_norm": 0.5331825613975525, "learning_rate": 4.845162687828858e-07, "loss": 0.0469, "step": 41810 }, { "epoch": 0.9213174899601713, "grad_norm": 0.664476215839386, "learning_rate": 4.842464144818093e-07, "loss": 0.0607, "step": 41811 }, { "epoch": 0.9213395252496873, "grad_norm": 0.7845157384872437, "learning_rate": 4.839766341172497e-07, "loss": 0.0899, "step": 41812 }, { "epoch": 0.9213615605392035, "grad_norm": 0.8299367427825928, "learning_rate": 4.837069276905809e-07, "loss": 0.0606, "step": 41813 }, { "epoch": 0.9213835958287196, "grad_norm": 0.7031581997871399, "learning_rate": 4.834372952031785e-07, "loss": 0.0612, "step": 41814 }, { "epoch": 0.9214056311182358, "grad_norm": 0.5366895198822021, "learning_rate": 4.831677366564097e-07, "loss": 0.0886, "step": 41815 }, { "epoch": 0.921427666407752, "grad_norm": 0.697216272354126, "learning_rate": 4.828982520516567e-07, "loss": 0.079, "step": 41816 }, { "epoch": 0.9214497016972681, "grad_norm": 0.8825953006744385, "learning_rate": 4.826288413902868e-07, "loss": 0.084, "step": 41817 }, { "epoch": 0.9214717369867843, "grad_norm": 0.643482506275177, "learning_rate": 4.82359504673674e-07, "loss": 0.0873, "step": 41818 }, { "epoch": 0.9214937722763005, "grad_norm": 0.869408369064331, "learning_rate": 4.82090241903187e-07, "loss": 0.079, "step": 41819 }, { "epoch": 0.9215158075658166, "grad_norm": 0.7619772553443909, "learning_rate": 4.818210530801997e-07, "loss": 0.0693, "step": 41820 }, { "epoch": 0.9215378428553328, "grad_norm": 0.7433004379272461, "learning_rate": 4.815519382060846e-07, "loss": 0.0529, "step": 41821 }, { "epoch": 0.921559878144849, "grad_norm": 0.708322286605835, "learning_rate": 4.812828972822103e-07, "loss": 0.0398, "step": 41822 }, { "epoch": 0.9215819134343651, "grad_norm": 0.5313596129417419, "learning_rate": 4.810139303099475e-07, "loss": 0.0612, "step": 41823 }, { "epoch": 0.9216039487238813, "grad_norm": 0.5454323291778564, "learning_rate": 4.807450372906652e-07, "loss": 0.0302, "step": 41824 }, { "epoch": 0.9216259840133975, "grad_norm": 0.2127160280942917, "learning_rate": 4.804762182257372e-07, "loss": 0.0382, "step": 41825 }, { "epoch": 0.9216480193029136, "grad_norm": 0.6405824422836304, "learning_rate": 4.802074731165274e-07, "loss": 0.0594, "step": 41826 }, { "epoch": 0.9216700545924298, "grad_norm": 0.5074835419654846, "learning_rate": 4.799388019644063e-07, "loss": 0.0582, "step": 41827 }, { "epoch": 0.921692089881946, "grad_norm": 0.6836254596710205, "learning_rate": 4.796702047707463e-07, "loss": 0.0436, "step": 41828 }, { "epoch": 0.9217141251714621, "grad_norm": 0.8167877197265625, "learning_rate": 4.794016815369112e-07, "loss": 0.0875, "step": 41829 }, { "epoch": 0.9217361604609783, "grad_norm": 0.45012331008911133, "learning_rate": 4.7913323226427e-07, "loss": 0.0661, "step": 41830 }, { "epoch": 0.9217581957504944, "grad_norm": 0.5465439558029175, "learning_rate": 4.788648569541865e-07, "loss": 0.0578, "step": 41831 }, { "epoch": 0.9217802310400106, "grad_norm": 0.48285719752311707, "learning_rate": 4.785965556080363e-07, "loss": 0.056, "step": 41832 }, { "epoch": 0.9218022663295268, "grad_norm": 0.36322757601737976, "learning_rate": 4.783283282271767e-07, "loss": 0.0694, "step": 41833 }, { "epoch": 0.9218243016190429, "grad_norm": 0.29085567593574524, "learning_rate": 4.780601748129798e-07, "loss": 0.0376, "step": 41834 }, { "epoch": 0.9218463369085591, "grad_norm": 1.035144329071045, "learning_rate": 4.777920953668113e-07, "loss": 0.0764, "step": 41835 }, { "epoch": 0.9218683721980753, "grad_norm": 0.6875364780426025, "learning_rate": 4.775240898900335e-07, "loss": 0.0604, "step": 41836 }, { "epoch": 0.9218904074875913, "grad_norm": 1.0353213548660278, "learning_rate": 4.772561583840135e-07, "loss": 0.0506, "step": 41837 }, { "epoch": 0.9219124427771075, "grad_norm": 0.8060516715049744, "learning_rate": 4.769883008501136e-07, "loss": 0.0777, "step": 41838 }, { "epoch": 0.9219344780666237, "grad_norm": 0.7558438777923584, "learning_rate": 4.7672051728970265e-07, "loss": 0.036, "step": 41839 }, { "epoch": 0.9219565133561398, "grad_norm": 0.6047167778015137, "learning_rate": 4.764528077041413e-07, "loss": 0.0298, "step": 41840 }, { "epoch": 0.921978548645656, "grad_norm": 0.8564267754554749, "learning_rate": 4.7618517209479515e-07, "loss": 0.0726, "step": 41841 }, { "epoch": 0.9220005839351721, "grad_norm": 0.7412589192390442, "learning_rate": 4.7591761046302474e-07, "loss": 0.0817, "step": 41842 }, { "epoch": 0.9220226192246883, "grad_norm": 0.6450191140174866, "learning_rate": 4.756501228101939e-07, "loss": 0.056, "step": 41843 }, { "epoch": 0.9220446545142045, "grad_norm": 0.5235415101051331, "learning_rate": 4.7538270913766667e-07, "loss": 0.0575, "step": 41844 }, { "epoch": 0.9220666898037206, "grad_norm": 1.1127996444702148, "learning_rate": 4.7511536944680355e-07, "loss": 0.0923, "step": 41845 }, { "epoch": 0.9220887250932368, "grad_norm": 0.8576556444168091, "learning_rate": 4.7484810373896514e-07, "loss": 0.0447, "step": 41846 }, { "epoch": 0.922110760382753, "grad_norm": 0.5876525640487671, "learning_rate": 4.74580912015517e-07, "loss": 0.0647, "step": 41847 }, { "epoch": 0.9221327956722691, "grad_norm": 0.2419256567955017, "learning_rate": 4.743137942778164e-07, "loss": 0.0471, "step": 41848 }, { "epoch": 0.9221548309617853, "grad_norm": 0.4818410873413086, "learning_rate": 4.7404675052722555e-07, "loss": 0.0605, "step": 41849 }, { "epoch": 0.9221768662513015, "grad_norm": 0.7051114439964294, "learning_rate": 4.737797807651018e-07, "loss": 0.0544, "step": 41850 }, { "epoch": 0.9221989015408176, "grad_norm": 0.6150321960449219, "learning_rate": 4.7351288499281054e-07, "loss": 0.0573, "step": 41851 }, { "epoch": 0.9222209368303338, "grad_norm": 0.8478207588195801, "learning_rate": 4.732460632117058e-07, "loss": 0.0752, "step": 41852 }, { "epoch": 0.92224297211985, "grad_norm": 0.8360603451728821, "learning_rate": 4.7297931542314986e-07, "loss": 0.0651, "step": 41853 }, { "epoch": 0.9222650074093661, "grad_norm": 0.4575102627277374, "learning_rate": 4.727126416284966e-07, "loss": 0.0458, "step": 41854 }, { "epoch": 0.9222870426988823, "grad_norm": 0.7334572076797485, "learning_rate": 4.7244604182911323e-07, "loss": 0.0832, "step": 41855 }, { "epoch": 0.9223090779883985, "grad_norm": 0.7595484256744385, "learning_rate": 4.721795160263487e-07, "loss": 0.0454, "step": 41856 }, { "epoch": 0.9223311132779146, "grad_norm": 0.852630615234375, "learning_rate": 4.7191306422156523e-07, "loss": 0.0559, "step": 41857 }, { "epoch": 0.9223531485674308, "grad_norm": 0.7915183305740356, "learning_rate": 4.7164668641612173e-07, "loss": 0.0931, "step": 41858 }, { "epoch": 0.922375183856947, "grad_norm": 0.6501772403717041, "learning_rate": 4.7138038261136883e-07, "loss": 0.0739, "step": 41859 }, { "epoch": 0.9223972191464631, "grad_norm": 0.6375786066055298, "learning_rate": 4.711141528086704e-07, "loss": 0.055, "step": 41860 }, { "epoch": 0.9224192544359793, "grad_norm": 0.5570273399353027, "learning_rate": 4.7084799700937374e-07, "loss": 0.0861, "step": 41861 }, { "epoch": 0.9224412897254953, "grad_norm": 0.5431959629058838, "learning_rate": 4.7058191521484274e-07, "loss": 0.043, "step": 41862 }, { "epoch": 0.9224633250150115, "grad_norm": 0.6620861887931824, "learning_rate": 4.7031590742642796e-07, "loss": 0.0497, "step": 41863 }, { "epoch": 0.9224853603045277, "grad_norm": 0.8601168990135193, "learning_rate": 4.700499736454866e-07, "loss": 0.0507, "step": 41864 }, { "epoch": 0.9225073955940438, "grad_norm": 0.7366585731506348, "learning_rate": 4.6978411387337107e-07, "loss": 0.0662, "step": 41865 }, { "epoch": 0.92252943088356, "grad_norm": 0.31769460439682007, "learning_rate": 4.6951832811143683e-07, "loss": 0.0463, "step": 41866 }, { "epoch": 0.9225514661730762, "grad_norm": 0.8264835476875305, "learning_rate": 4.6925261636103944e-07, "loss": 0.0789, "step": 41867 }, { "epoch": 0.9225735014625923, "grad_norm": 0.7862602472305298, "learning_rate": 4.6898697862352956e-07, "loss": 0.0704, "step": 41868 }, { "epoch": 0.9225955367521085, "grad_norm": 0.9169415235519409, "learning_rate": 4.687214149002611e-07, "loss": 0.0752, "step": 41869 }, { "epoch": 0.9226175720416246, "grad_norm": 0.9127082824707031, "learning_rate": 4.6845592519258463e-07, "loss": 0.0644, "step": 41870 }, { "epoch": 0.9226396073311408, "grad_norm": 0.7144006490707397, "learning_rate": 4.6819050950185903e-07, "loss": 0.05, "step": 41871 }, { "epoch": 0.922661642620657, "grad_norm": 0.39344048500061035, "learning_rate": 4.679251678294283e-07, "loss": 0.0347, "step": 41872 }, { "epoch": 0.9226836779101731, "grad_norm": 0.5538260340690613, "learning_rate": 4.6765990017664793e-07, "loss": 0.0472, "step": 41873 }, { "epoch": 0.9227057131996893, "grad_norm": 0.45960620045661926, "learning_rate": 4.673947065448703e-07, "loss": 0.0599, "step": 41874 }, { "epoch": 0.9227277484892055, "grad_norm": 0.5769316554069519, "learning_rate": 4.671295869354425e-07, "loss": 0.056, "step": 41875 }, { "epoch": 0.9227497837787216, "grad_norm": 0.791168749332428, "learning_rate": 4.6686454134971856e-07, "loss": 0.0899, "step": 41876 }, { "epoch": 0.9227718190682378, "grad_norm": 0.641232430934906, "learning_rate": 4.665995697890424e-07, "loss": 0.06, "step": 41877 }, { "epoch": 0.922793854357754, "grad_norm": 0.46829232573509216, "learning_rate": 4.663346722547729e-07, "loss": 0.0619, "step": 41878 }, { "epoch": 0.9228158896472701, "grad_norm": 0.5996441841125488, "learning_rate": 4.660698487482523e-07, "loss": 0.0728, "step": 41879 }, { "epoch": 0.9228379249367863, "grad_norm": 0.8566173911094666, "learning_rate": 4.6580509927083293e-07, "loss": 0.0861, "step": 41880 }, { "epoch": 0.9228599602263025, "grad_norm": 0.8972233533859253, "learning_rate": 4.6554042382386196e-07, "loss": 0.0545, "step": 41881 }, { "epoch": 0.9228819955158186, "grad_norm": 0.5310994982719421, "learning_rate": 4.652758224086867e-07, "loss": 0.0323, "step": 41882 }, { "epoch": 0.9229040308053348, "grad_norm": 0.6353192329406738, "learning_rate": 4.6501129502665776e-07, "loss": 0.0696, "step": 41883 }, { "epoch": 0.922926066094851, "grad_norm": 0.8437648415565491, "learning_rate": 4.647468416791156e-07, "loss": 0.0504, "step": 41884 }, { "epoch": 0.9229481013843671, "grad_norm": 0.6808183193206787, "learning_rate": 4.6448246236741766e-07, "loss": 0.0542, "step": 41885 }, { "epoch": 0.9229701366738833, "grad_norm": 0.5589200258255005, "learning_rate": 4.6421815709290273e-07, "loss": 0.0576, "step": 41886 }, { "epoch": 0.9229921719633993, "grad_norm": 0.6356754302978516, "learning_rate": 4.6395392585691975e-07, "loss": 0.0568, "step": 41887 }, { "epoch": 0.9230142072529155, "grad_norm": 0.4956967532634735, "learning_rate": 4.6368976866081434e-07, "loss": 0.0519, "step": 41888 }, { "epoch": 0.9230362425424317, "grad_norm": 0.7802345156669617, "learning_rate": 4.6342568550593043e-07, "loss": 0.0505, "step": 41889 }, { "epoch": 0.9230582778319478, "grad_norm": 0.8914209604263306, "learning_rate": 4.631616763936186e-07, "loss": 0.1028, "step": 41890 }, { "epoch": 0.923080313121464, "grad_norm": 0.6192325949668884, "learning_rate": 4.6289774132521614e-07, "loss": 0.0506, "step": 41891 }, { "epoch": 0.9231023484109802, "grad_norm": 0.7875891327857971, "learning_rate": 4.6263388030207185e-07, "loss": 0.0619, "step": 41892 }, { "epoch": 0.9231243837004963, "grad_norm": 0.8900039196014404, "learning_rate": 4.6237009332552815e-07, "loss": 0.0687, "step": 41893 }, { "epoch": 0.9231464189900125, "grad_norm": 0.2541700005531311, "learning_rate": 4.6210638039693054e-07, "loss": 0.0384, "step": 41894 }, { "epoch": 0.9231684542795286, "grad_norm": 0.37304365634918213, "learning_rate": 4.618427415176196e-07, "loss": 0.0666, "step": 41895 }, { "epoch": 0.9231904895690448, "grad_norm": 0.33293265104293823, "learning_rate": 4.6157917668893934e-07, "loss": 0.0508, "step": 41896 }, { "epoch": 0.923212524858561, "grad_norm": 0.7130694389343262, "learning_rate": 4.613156859122336e-07, "loss": 0.0575, "step": 41897 }, { "epoch": 0.9232345601480771, "grad_norm": 0.7962637543678284, "learning_rate": 4.61052269188843e-07, "loss": 0.0608, "step": 41898 }, { "epoch": 0.9232565954375933, "grad_norm": 0.37228769063949585, "learning_rate": 4.607889265201115e-07, "loss": 0.0566, "step": 41899 }, { "epoch": 0.9232786307271095, "grad_norm": 0.7937090992927551, "learning_rate": 4.6052565790737465e-07, "loss": 0.0791, "step": 41900 }, { "epoch": 0.9233006660166256, "grad_norm": 0.9240401983261108, "learning_rate": 4.6026246335197973e-07, "loss": 0.0632, "step": 41901 }, { "epoch": 0.9233227013061418, "grad_norm": 0.5709457397460938, "learning_rate": 4.5999934285526403e-07, "loss": 0.0468, "step": 41902 }, { "epoch": 0.923344736595658, "grad_norm": 0.4052061140537262, "learning_rate": 4.597362964185697e-07, "loss": 0.0574, "step": 41903 }, { "epoch": 0.9233667718851741, "grad_norm": 0.7797490954399109, "learning_rate": 4.5947332404323416e-07, "loss": 0.0758, "step": 41904 }, { "epoch": 0.9233888071746903, "grad_norm": 0.6094074249267578, "learning_rate": 4.592104257305996e-07, "loss": 0.0823, "step": 41905 }, { "epoch": 0.9234108424642065, "grad_norm": 0.5875452160835266, "learning_rate": 4.589476014820032e-07, "loss": 0.0663, "step": 41906 }, { "epoch": 0.9234328777537226, "grad_norm": 0.6806384921073914, "learning_rate": 4.5868485129878404e-07, "loss": 0.0645, "step": 41907 }, { "epoch": 0.9234549130432388, "grad_norm": 0.516481339931488, "learning_rate": 4.584221751822809e-07, "loss": 0.0563, "step": 41908 }, { "epoch": 0.923476948332755, "grad_norm": 0.523374080657959, "learning_rate": 4.5815957313383116e-07, "loss": 0.0665, "step": 41909 }, { "epoch": 0.9234989836222711, "grad_norm": 0.8763672709465027, "learning_rate": 4.578970451547737e-07, "loss": 0.0625, "step": 41910 }, { "epoch": 0.9235210189117872, "grad_norm": 0.9060510993003845, "learning_rate": 4.5763459124644247e-07, "loss": 0.078, "step": 41911 }, { "epoch": 0.9235430542013033, "grad_norm": 0.5837112069129944, "learning_rate": 4.5737221141017803e-07, "loss": 0.0722, "step": 41912 }, { "epoch": 0.9235650894908195, "grad_norm": 0.5996651649475098, "learning_rate": 4.5710990564731435e-07, "loss": 0.0495, "step": 41913 }, { "epoch": 0.9235871247803357, "grad_norm": 0.5568810701370239, "learning_rate": 4.568476739591904e-07, "loss": 0.0559, "step": 41914 }, { "epoch": 0.9236091600698518, "grad_norm": 0.41450896859169006, "learning_rate": 4.565855163471383e-07, "loss": 0.0591, "step": 41915 }, { "epoch": 0.923631195359368, "grad_norm": 0.7281115055084229, "learning_rate": 4.5632343281249545e-07, "loss": 0.0579, "step": 41916 }, { "epoch": 0.9236532306488842, "grad_norm": 0.6661636233329773, "learning_rate": 4.5606142335659737e-07, "loss": 0.0461, "step": 41917 }, { "epoch": 0.9236752659384003, "grad_norm": 0.39483219385147095, "learning_rate": 4.557994879807748e-07, "loss": 0.05, "step": 41918 }, { "epoch": 0.9236973012279165, "grad_norm": 0.29767170548439026, "learning_rate": 4.5553762668636645e-07, "loss": 0.0457, "step": 41919 }, { "epoch": 0.9237193365174327, "grad_norm": 0.7905954122543335, "learning_rate": 4.552758394747031e-07, "loss": 0.0567, "step": 41920 }, { "epoch": 0.9237413718069488, "grad_norm": 0.8010256290435791, "learning_rate": 4.550141263471219e-07, "loss": 0.0502, "step": 41921 }, { "epoch": 0.923763407096465, "grad_norm": 0.5220596790313721, "learning_rate": 4.547524873049519e-07, "loss": 0.042, "step": 41922 }, { "epoch": 0.9237854423859811, "grad_norm": 0.5590133666992188, "learning_rate": 4.544909223495269e-07, "loss": 0.0391, "step": 41923 }, { "epoch": 0.9238074776754973, "grad_norm": 0.8431592583656311, "learning_rate": 4.54229431482181e-07, "loss": 0.0816, "step": 41924 }, { "epoch": 0.9238295129650135, "grad_norm": 0.6974703669548035, "learning_rate": 4.5396801470424297e-07, "loss": 0.0575, "step": 41925 }, { "epoch": 0.9238515482545296, "grad_norm": 0.6409667134284973, "learning_rate": 4.5370667201704516e-07, "loss": 0.0455, "step": 41926 }, { "epoch": 0.9238735835440458, "grad_norm": 0.3558247983455658, "learning_rate": 4.5344540342192154e-07, "loss": 0.0649, "step": 41927 }, { "epoch": 0.923895618833562, "grad_norm": 0.5252104997634888, "learning_rate": 4.53184208920201e-07, "loss": 0.0601, "step": 41928 }, { "epoch": 0.9239176541230781, "grad_norm": 0.8446199297904968, "learning_rate": 4.529230885132124e-07, "loss": 0.0612, "step": 41929 }, { "epoch": 0.9239396894125943, "grad_norm": 0.7192697525024414, "learning_rate": 4.526620422022881e-07, "loss": 0.0631, "step": 41930 }, { "epoch": 0.9239617247021105, "grad_norm": 0.4930805563926697, "learning_rate": 4.524010699887587e-07, "loss": 0.046, "step": 41931 }, { "epoch": 0.9239837599916266, "grad_norm": 0.5899662375450134, "learning_rate": 4.5214017187394986e-07, "loss": 0.0331, "step": 41932 }, { "epoch": 0.9240057952811428, "grad_norm": 0.7769657969474792, "learning_rate": 4.5187934785919206e-07, "loss": 0.0697, "step": 41933 }, { "epoch": 0.924027830570659, "grad_norm": 0.41395917534828186, "learning_rate": 4.516185979458126e-07, "loss": 0.0432, "step": 41934 }, { "epoch": 0.9240498658601751, "grad_norm": 0.8813086748123169, "learning_rate": 4.513579221351438e-07, "loss": 0.0561, "step": 41935 }, { "epoch": 0.9240719011496912, "grad_norm": 0.715386688709259, "learning_rate": 4.510973204285096e-07, "loss": 0.062, "step": 41936 }, { "epoch": 0.9240939364392073, "grad_norm": 0.7525579333305359, "learning_rate": 4.5083679282724053e-07, "loss": 0.0669, "step": 41937 }, { "epoch": 0.9241159717287235, "grad_norm": 0.7660996913909912, "learning_rate": 4.5057633933265896e-07, "loss": 0.0737, "step": 41938 }, { "epoch": 0.9241380070182397, "grad_norm": 0.6608009338378906, "learning_rate": 4.5031595994609544e-07, "loss": 0.0777, "step": 41939 }, { "epoch": 0.9241600423077558, "grad_norm": 0.590177059173584, "learning_rate": 4.5005565466887556e-07, "loss": 0.0607, "step": 41940 }, { "epoch": 0.924182077597272, "grad_norm": 0.4833541214466095, "learning_rate": 4.497954235023216e-07, "loss": 0.0438, "step": 41941 }, { "epoch": 0.9242041128867882, "grad_norm": 0.5138635039329529, "learning_rate": 4.4953526644776424e-07, "loss": 0.036, "step": 41942 }, { "epoch": 0.9242261481763043, "grad_norm": 0.5877645015716553, "learning_rate": 4.4927518350652566e-07, "loss": 0.0417, "step": 41943 }, { "epoch": 0.9242481834658205, "grad_norm": 0.6694796085357666, "learning_rate": 4.4901517467993145e-07, "loss": 0.0502, "step": 41944 }, { "epoch": 0.9242702187553367, "grad_norm": 1.0213567018508911, "learning_rate": 4.487552399693057e-07, "loss": 0.085, "step": 41945 }, { "epoch": 0.9242922540448528, "grad_norm": 0.6571388244628906, "learning_rate": 4.484953793759722e-07, "loss": 0.0674, "step": 41946 }, { "epoch": 0.924314289334369, "grad_norm": 0.49964019656181335, "learning_rate": 4.482355929012566e-07, "loss": 0.0591, "step": 41947 }, { "epoch": 0.9243363246238852, "grad_norm": 0.829727053642273, "learning_rate": 4.479758805464795e-07, "loss": 0.0747, "step": 41948 }, { "epoch": 0.9243583599134013, "grad_norm": 0.7687138915061951, "learning_rate": 4.477162423129633e-07, "loss": 0.0456, "step": 41949 }, { "epoch": 0.9243803952029175, "grad_norm": 0.5820419788360596, "learning_rate": 4.4745667820203174e-07, "loss": 0.0549, "step": 41950 }, { "epoch": 0.9244024304924336, "grad_norm": 0.1812395602464676, "learning_rate": 4.471971882150089e-07, "loss": 0.0639, "step": 41951 }, { "epoch": 0.9244244657819498, "grad_norm": 0.5865002274513245, "learning_rate": 4.4693777235321366e-07, "loss": 0.0407, "step": 41952 }, { "epoch": 0.924446501071466, "grad_norm": 0.6613875031471252, "learning_rate": 4.466784306179683e-07, "loss": 0.0589, "step": 41953 }, { "epoch": 0.9244685363609821, "grad_norm": 0.8570960164070129, "learning_rate": 4.4641916301059514e-07, "loss": 0.0812, "step": 41954 }, { "epoch": 0.9244905716504983, "grad_norm": 0.4044259488582611, "learning_rate": 4.4615996953241147e-07, "loss": 0.0331, "step": 41955 }, { "epoch": 0.9245126069400145, "grad_norm": 0.44087138772010803, "learning_rate": 4.459008501847411e-07, "loss": 0.0443, "step": 41956 }, { "epoch": 0.9245346422295306, "grad_norm": 0.6711385250091553, "learning_rate": 4.4564180496889986e-07, "loss": 0.0908, "step": 41957 }, { "epoch": 0.9245566775190468, "grad_norm": 0.6441682577133179, "learning_rate": 4.453828338862115e-07, "loss": 0.0735, "step": 41958 }, { "epoch": 0.924578712808563, "grad_norm": 0.23698505759239197, "learning_rate": 4.4512393693799337e-07, "loss": 0.0447, "step": 41959 }, { "epoch": 0.9246007480980791, "grad_norm": 0.6005080938339233, "learning_rate": 4.448651141255644e-07, "loss": 0.0579, "step": 41960 }, { "epoch": 0.9246227833875952, "grad_norm": 0.8228248357772827, "learning_rate": 4.4460636545024026e-07, "loss": 0.0632, "step": 41961 }, { "epoch": 0.9246448186771113, "grad_norm": 0.44717341661453247, "learning_rate": 4.443476909133415e-07, "loss": 0.0789, "step": 41962 }, { "epoch": 0.9246668539666275, "grad_norm": 0.6323304176330566, "learning_rate": 4.4408909051618874e-07, "loss": 0.0622, "step": 41963 }, { "epoch": 0.9246888892561437, "grad_norm": 0.6847199201583862, "learning_rate": 4.438305642600926e-07, "loss": 0.0595, "step": 41964 }, { "epoch": 0.9247109245456598, "grad_norm": 0.5618323683738708, "learning_rate": 4.4357211214637204e-07, "loss": 0.0802, "step": 41965 }, { "epoch": 0.924732959835176, "grad_norm": 0.49671316146850586, "learning_rate": 4.43313734176346e-07, "loss": 0.0542, "step": 41966 }, { "epoch": 0.9247549951246922, "grad_norm": 0.22406457364559174, "learning_rate": 4.430554303513301e-07, "loss": 0.0268, "step": 41967 }, { "epoch": 0.9247770304142083, "grad_norm": 0.4509296119213104, "learning_rate": 4.427972006726366e-07, "loss": 0.0496, "step": 41968 }, { "epoch": 0.9247990657037245, "grad_norm": 0.6330062747001648, "learning_rate": 4.425390451415845e-07, "loss": 0.0382, "step": 41969 }, { "epoch": 0.9248211009932407, "grad_norm": 0.4002845883369446, "learning_rate": 4.4228096375948766e-07, "loss": 0.0463, "step": 41970 }, { "epoch": 0.9248431362827568, "grad_norm": 0.8615458607673645, "learning_rate": 4.4202295652766014e-07, "loss": 0.0805, "step": 41971 }, { "epoch": 0.924865171572273, "grad_norm": 0.6044681072235107, "learning_rate": 4.4176502344741574e-07, "loss": 0.0498, "step": 41972 }, { "epoch": 0.9248872068617892, "grad_norm": 0.5771293640136719, "learning_rate": 4.4150716452006856e-07, "loss": 0.0733, "step": 41973 }, { "epoch": 0.9249092421513053, "grad_norm": 0.3036426305770874, "learning_rate": 4.412493797469341e-07, "loss": 0.0412, "step": 41974 }, { "epoch": 0.9249312774408215, "grad_norm": 0.3940361738204956, "learning_rate": 4.4099166912932144e-07, "loss": 0.0451, "step": 41975 }, { "epoch": 0.9249533127303377, "grad_norm": 0.9667608141899109, "learning_rate": 4.407340326685461e-07, "loss": 0.07, "step": 41976 }, { "epoch": 0.9249753480198538, "grad_norm": 0.8216466307640076, "learning_rate": 4.40476470365922e-07, "loss": 0.0608, "step": 41977 }, { "epoch": 0.92499738330937, "grad_norm": 1.0691529512405396, "learning_rate": 4.402189822227548e-07, "loss": 0.0567, "step": 41978 }, { "epoch": 0.9250194185988861, "grad_norm": 0.9980533123016357, "learning_rate": 4.399615682403635e-07, "loss": 0.0601, "step": 41979 }, { "epoch": 0.9250414538884023, "grad_norm": 0.8881833553314209, "learning_rate": 4.39704228420052e-07, "loss": 0.0781, "step": 41980 }, { "epoch": 0.9250634891779185, "grad_norm": 0.41294044256210327, "learning_rate": 4.3944696276313754e-07, "loss": 0.0382, "step": 41981 }, { "epoch": 0.9250855244674346, "grad_norm": 0.45050927996635437, "learning_rate": 4.3918977127092584e-07, "loss": 0.0796, "step": 41982 }, { "epoch": 0.9251075597569508, "grad_norm": 0.8942344784736633, "learning_rate": 4.389326539447308e-07, "loss": 0.047, "step": 41983 }, { "epoch": 0.925129595046467, "grad_norm": 0.4628966152667999, "learning_rate": 4.386756107858581e-07, "loss": 0.0422, "step": 41984 }, { "epoch": 0.925151630335983, "grad_norm": 0.9172213077545166, "learning_rate": 4.3841864179561987e-07, "loss": 0.1053, "step": 41985 }, { "epoch": 0.9251736656254992, "grad_norm": 0.543548047542572, "learning_rate": 4.3816174697532353e-07, "loss": 0.0524, "step": 41986 }, { "epoch": 0.9251957009150154, "grad_norm": 0.509993851184845, "learning_rate": 4.37904926326278e-07, "loss": 0.0483, "step": 41987 }, { "epoch": 0.9252177362045315, "grad_norm": 0.6616377830505371, "learning_rate": 4.3764817984979223e-07, "loss": 0.078, "step": 41988 }, { "epoch": 0.9252397714940477, "grad_norm": 1.0034973621368408, "learning_rate": 4.3739150754717184e-07, "loss": 0.0739, "step": 41989 }, { "epoch": 0.9252618067835638, "grad_norm": 0.4769657254219055, "learning_rate": 4.3713490941972745e-07, "loss": 0.0629, "step": 41990 }, { "epoch": 0.92528384207308, "grad_norm": 0.5123116970062256, "learning_rate": 4.36878385468763e-07, "loss": 0.0703, "step": 41991 }, { "epoch": 0.9253058773625962, "grad_norm": 0.48186179995536804, "learning_rate": 4.366219356955858e-07, "loss": 0.0582, "step": 41992 }, { "epoch": 0.9253279126521123, "grad_norm": 0.9859275817871094, "learning_rate": 4.3636556010150485e-07, "loss": 0.0695, "step": 41993 }, { "epoch": 0.9253499479416285, "grad_norm": 0.4793069660663605, "learning_rate": 4.3610925868782237e-07, "loss": 0.0616, "step": 41994 }, { "epoch": 0.9253719832311447, "grad_norm": 0.6096899509429932, "learning_rate": 4.35853031455844e-07, "loss": 0.0842, "step": 41995 }, { "epoch": 0.9253940185206608, "grad_norm": 0.5014609098434448, "learning_rate": 4.35596878406877e-07, "loss": 0.0695, "step": 41996 }, { "epoch": 0.925416053810177, "grad_norm": 0.9905334711074829, "learning_rate": 4.3534079954222705e-07, "loss": 0.0832, "step": 41997 }, { "epoch": 0.9254380890996932, "grad_norm": 0.4842982590198517, "learning_rate": 4.3508479486319473e-07, "loss": 0.0407, "step": 41998 }, { "epoch": 0.9254601243892093, "grad_norm": 0.37759125232696533, "learning_rate": 4.3482886437108407e-07, "loss": 0.0598, "step": 41999 }, { "epoch": 0.9254821596787255, "grad_norm": 0.9580662250518799, "learning_rate": 4.345730080672039e-07, "loss": 0.1147, "step": 42000 }, { "epoch": 0.9255041949682417, "grad_norm": 0.5021623373031616, "learning_rate": 4.3431722595285164e-07, "loss": 0.0503, "step": 42001 }, { "epoch": 0.9255262302577578, "grad_norm": 0.6823281049728394, "learning_rate": 4.3406151802933446e-07, "loss": 0.0531, "step": 42002 }, { "epoch": 0.925548265547274, "grad_norm": 0.6483376622200012, "learning_rate": 4.3380588429794976e-07, "loss": 0.0522, "step": 42003 }, { "epoch": 0.9255703008367901, "grad_norm": 0.7643381953239441, "learning_rate": 4.335503247600064e-07, "loss": 0.073, "step": 42004 }, { "epoch": 0.9255923361263063, "grad_norm": 0.5185021758079529, "learning_rate": 4.3329483941680015e-07, "loss": 0.0614, "step": 42005 }, { "epoch": 0.9256143714158225, "grad_norm": 0.663818359375, "learning_rate": 4.330394282696365e-07, "loss": 0.0527, "step": 42006 }, { "epoch": 0.9256364067053386, "grad_norm": 0.3279555141925812, "learning_rate": 4.327840913198111e-07, "loss": 0.0441, "step": 42007 }, { "epoch": 0.9256584419948548, "grad_norm": 0.3943403661251068, "learning_rate": 4.325288285686313e-07, "loss": 0.033, "step": 42008 }, { "epoch": 0.925680477284371, "grad_norm": 0.9046619534492493, "learning_rate": 4.32273640017391e-07, "loss": 0.0395, "step": 42009 }, { "epoch": 0.925702512573887, "grad_norm": 0.3316882848739624, "learning_rate": 4.3201852566739586e-07, "loss": 0.0612, "step": 42010 }, { "epoch": 0.9257245478634032, "grad_norm": 0.7595177292823792, "learning_rate": 4.3176348551993983e-07, "loss": 0.0756, "step": 42011 }, { "epoch": 0.9257465831529194, "grad_norm": 0.4910758435726166, "learning_rate": 4.315085195763252e-07, "loss": 0.055, "step": 42012 }, { "epoch": 0.9257686184424355, "grad_norm": 0.6140666007995605, "learning_rate": 4.3125362783785093e-07, "loss": 0.0675, "step": 42013 }, { "epoch": 0.9257906537319517, "grad_norm": 0.5062790513038635, "learning_rate": 4.309988103058127e-07, "loss": 0.0648, "step": 42014 }, { "epoch": 0.9258126890214678, "grad_norm": 0.48460811376571655, "learning_rate": 4.30744066981511e-07, "loss": 0.0794, "step": 42015 }, { "epoch": 0.925834724310984, "grad_norm": 0.45249664783477783, "learning_rate": 4.3048939786624166e-07, "loss": 0.0475, "step": 42016 }, { "epoch": 0.9258567596005002, "grad_norm": 0.7981896996498108, "learning_rate": 4.302348029613051e-07, "loss": 0.0508, "step": 42017 }, { "epoch": 0.9258787948900163, "grad_norm": 0.40725576877593994, "learning_rate": 4.299802822679938e-07, "loss": 0.0738, "step": 42018 }, { "epoch": 0.9259008301795325, "grad_norm": 0.4278942942619324, "learning_rate": 4.2972583578760496e-07, "loss": 0.0361, "step": 42019 }, { "epoch": 0.9259228654690487, "grad_norm": 0.40777361392974854, "learning_rate": 4.294714635214375e-07, "loss": 0.0426, "step": 42020 }, { "epoch": 0.9259449007585648, "grad_norm": 1.0168228149414062, "learning_rate": 4.2921716547078384e-07, "loss": 0.0772, "step": 42021 }, { "epoch": 0.925966936048081, "grad_norm": 0.48104625940322876, "learning_rate": 4.2896294163694117e-07, "loss": 0.052, "step": 42022 }, { "epoch": 0.9259889713375972, "grad_norm": 0.7447473406791687, "learning_rate": 4.287087920212035e-07, "loss": 0.0734, "step": 42023 }, { "epoch": 0.9260110066271133, "grad_norm": 0.6770941019058228, "learning_rate": 4.284547166248665e-07, "loss": 0.0633, "step": 42024 }, { "epoch": 0.9260330419166295, "grad_norm": 0.5551449060440063, "learning_rate": 4.2820071544922237e-07, "loss": 0.0475, "step": 42025 }, { "epoch": 0.9260550772061457, "grad_norm": 0.7746178507804871, "learning_rate": 4.279467884955668e-07, "loss": 0.045, "step": 42026 }, { "epoch": 0.9260771124956618, "grad_norm": 0.5408285856246948, "learning_rate": 4.2769293576519373e-07, "loss": 0.0532, "step": 42027 }, { "epoch": 0.926099147785178, "grad_norm": 0.5848895311355591, "learning_rate": 4.274391572593922e-07, "loss": 0.0383, "step": 42028 }, { "epoch": 0.9261211830746942, "grad_norm": 0.3532765507698059, "learning_rate": 4.271854529794594e-07, "loss": 0.0547, "step": 42029 }, { "epoch": 0.9261432183642103, "grad_norm": 0.931901752948761, "learning_rate": 4.2693182292668275e-07, "loss": 0.067, "step": 42030 }, { "epoch": 0.9261652536537265, "grad_norm": 0.1883084625005722, "learning_rate": 4.2667826710236115e-07, "loss": 0.0529, "step": 42031 }, { "epoch": 0.9261872889432426, "grad_norm": 0.6515451669692993, "learning_rate": 4.264247855077802e-07, "loss": 0.0484, "step": 42032 }, { "epoch": 0.9262093242327588, "grad_norm": 0.6642662882804871, "learning_rate": 4.261713781442322e-07, "loss": 0.0611, "step": 42033 }, { "epoch": 0.926231359522275, "grad_norm": 0.4937841594219208, "learning_rate": 4.2591804501300957e-07, "loss": 0.0713, "step": 42034 }, { "epoch": 0.926253394811791, "grad_norm": 0.3234509527683258, "learning_rate": 4.256647861153995e-07, "loss": 0.0604, "step": 42035 }, { "epoch": 0.9262754301013072, "grad_norm": 0.571525514125824, "learning_rate": 4.2541160145269764e-07, "loss": 0.0519, "step": 42036 }, { "epoch": 0.9262974653908234, "grad_norm": 0.3762475848197937, "learning_rate": 4.2515849102618466e-07, "loss": 0.0531, "step": 42037 }, { "epoch": 0.9263195006803395, "grad_norm": 0.5090567469596863, "learning_rate": 4.249054548371595e-07, "loss": 0.0386, "step": 42038 }, { "epoch": 0.9263415359698557, "grad_norm": 0.525841474533081, "learning_rate": 4.2465249288690445e-07, "loss": 0.0628, "step": 42039 }, { "epoch": 0.9263635712593719, "grad_norm": 0.5856566429138184, "learning_rate": 4.243996051767135e-07, "loss": 0.0479, "step": 42040 }, { "epoch": 0.926385606548888, "grad_norm": 0.5066328048706055, "learning_rate": 4.241467917078673e-07, "loss": 0.0505, "step": 42041 }, { "epoch": 0.9264076418384042, "grad_norm": 0.37957483530044556, "learning_rate": 4.238940524816598e-07, "loss": 0.033, "step": 42042 }, { "epoch": 0.9264296771279203, "grad_norm": 0.753481388092041, "learning_rate": 4.2364138749937664e-07, "loss": 0.0727, "step": 42043 }, { "epoch": 0.9264517124174365, "grad_norm": 0.5405764579772949, "learning_rate": 4.2338879676230346e-07, "loss": 0.0305, "step": 42044 }, { "epoch": 0.9264737477069527, "grad_norm": 0.6728359460830688, "learning_rate": 4.2313628027172756e-07, "loss": 0.0631, "step": 42045 }, { "epoch": 0.9264957829964688, "grad_norm": 0.6366368532180786, "learning_rate": 4.228838380289346e-07, "loss": 0.0843, "step": 42046 }, { "epoch": 0.926517818285985, "grad_norm": 0.7038540840148926, "learning_rate": 4.226314700352135e-07, "loss": 0.0579, "step": 42047 }, { "epoch": 0.9265398535755012, "grad_norm": 0.7066494822502136, "learning_rate": 4.2237917629184497e-07, "loss": 0.0657, "step": 42048 }, { "epoch": 0.9265618888650173, "grad_norm": 0.3966289758682251, "learning_rate": 4.221269568001179e-07, "loss": 0.0715, "step": 42049 }, { "epoch": 0.9265839241545335, "grad_norm": 0.9085825681686401, "learning_rate": 4.2187481156131633e-07, "loss": 0.0799, "step": 42050 }, { "epoch": 0.9266059594440497, "grad_norm": 0.7773251533508301, "learning_rate": 4.216227405767209e-07, "loss": 0.0549, "step": 42051 }, { "epoch": 0.9266279947335658, "grad_norm": 0.7527346611022949, "learning_rate": 4.213707438476222e-07, "loss": 0.0488, "step": 42052 }, { "epoch": 0.926650030023082, "grad_norm": 0.6729098558425903, "learning_rate": 4.211188213752942e-07, "loss": 0.059, "step": 42053 }, { "epoch": 0.9266720653125982, "grad_norm": 0.7236770987510681, "learning_rate": 4.2086697316103093e-07, "loss": 0.0746, "step": 42054 }, { "epoch": 0.9266941006021143, "grad_norm": 0.4908192753791809, "learning_rate": 4.20615199206108e-07, "loss": 0.049, "step": 42055 }, { "epoch": 0.9267161358916305, "grad_norm": 0.4207886755466461, "learning_rate": 4.2036349951181275e-07, "loss": 0.0508, "step": 42056 }, { "epoch": 0.9267381711811467, "grad_norm": 0.4695318639278412, "learning_rate": 4.2011187407942076e-07, "loss": 0.0508, "step": 42057 }, { "epoch": 0.9267602064706628, "grad_norm": 0.5461178421974182, "learning_rate": 4.198603229102194e-07, "loss": 0.0639, "step": 42058 }, { "epoch": 0.926782241760179, "grad_norm": 0.33945193886756897, "learning_rate": 4.1960884600548754e-07, "loss": 0.0624, "step": 42059 }, { "epoch": 0.926804277049695, "grad_norm": 0.4466497004032135, "learning_rate": 4.193574433665043e-07, "loss": 0.0533, "step": 42060 }, { "epoch": 0.9268263123392112, "grad_norm": 0.7742688655853271, "learning_rate": 4.191061149945552e-07, "loss": 0.044, "step": 42061 }, { "epoch": 0.9268483476287274, "grad_norm": 0.8600077629089355, "learning_rate": 4.1885486089091595e-07, "loss": 0.0424, "step": 42062 }, { "epoch": 0.9268703829182435, "grad_norm": 0.591781497001648, "learning_rate": 4.1860368105687055e-07, "loss": 0.0611, "step": 42063 }, { "epoch": 0.9268924182077597, "grad_norm": 0.5178589224815369, "learning_rate": 4.183525754936929e-07, "loss": 0.0556, "step": 42064 }, { "epoch": 0.9269144534972759, "grad_norm": 0.468561589717865, "learning_rate": 4.1810154420266535e-07, "loss": 0.0487, "step": 42065 }, { "epoch": 0.926936488786792, "grad_norm": 0.6780340075492859, "learning_rate": 4.1785058718506686e-07, "loss": 0.0574, "step": 42066 }, { "epoch": 0.9269585240763082, "grad_norm": 0.4906289875507355, "learning_rate": 4.175997044421748e-07, "loss": 0.0435, "step": 42067 }, { "epoch": 0.9269805593658244, "grad_norm": 0.4881037175655365, "learning_rate": 4.1734889597526635e-07, "loss": 0.06, "step": 42068 }, { "epoch": 0.9270025946553405, "grad_norm": 0.7965572476387024, "learning_rate": 4.1709816178562067e-07, "loss": 0.0451, "step": 42069 }, { "epoch": 0.9270246299448567, "grad_norm": 0.6011744141578674, "learning_rate": 4.1684750187451495e-07, "loss": 0.0447, "step": 42070 }, { "epoch": 0.9270466652343728, "grad_norm": 0.5888426899909973, "learning_rate": 4.165969162432232e-07, "loss": 0.0634, "step": 42071 }, { "epoch": 0.927068700523889, "grad_norm": 0.4498428702354431, "learning_rate": 4.163464048930227e-07, "loss": 0.041, "step": 42072 }, { "epoch": 0.9270907358134052, "grad_norm": 0.78469318151474, "learning_rate": 4.160959678251924e-07, "loss": 0.0815, "step": 42073 }, { "epoch": 0.9271127711029213, "grad_norm": 0.5733848810195923, "learning_rate": 4.1584560504100476e-07, "loss": 0.0625, "step": 42074 }, { "epoch": 0.9271348063924375, "grad_norm": 0.3772623538970947, "learning_rate": 4.1559531654173865e-07, "loss": 0.0349, "step": 42075 }, { "epoch": 0.9271568416819537, "grad_norm": 0.4006408154964447, "learning_rate": 4.1534510232866143e-07, "loss": 0.051, "step": 42076 }, { "epoch": 0.9271788769714698, "grad_norm": 0.4204023778438568, "learning_rate": 4.15094962403057e-07, "loss": 0.0613, "step": 42077 }, { "epoch": 0.927200912260986, "grad_norm": 0.5144542455673218, "learning_rate": 4.1484489676619276e-07, "loss": 0.0492, "step": 42078 }, { "epoch": 0.9272229475505022, "grad_norm": 0.600788414478302, "learning_rate": 4.1459490541934596e-07, "loss": 0.055, "step": 42079 }, { "epoch": 0.9272449828400183, "grad_norm": 0.3315027356147766, "learning_rate": 4.14344988363789e-07, "loss": 0.0367, "step": 42080 }, { "epoch": 0.9272670181295345, "grad_norm": 0.5970927476882935, "learning_rate": 4.140951456007924e-07, "loss": 0.056, "step": 42081 }, { "epoch": 0.9272890534190507, "grad_norm": 1.0045225620269775, "learning_rate": 4.1384537713163197e-07, "loss": 0.0894, "step": 42082 }, { "epoch": 0.9273110887085668, "grad_norm": 0.5513004660606384, "learning_rate": 4.135956829575766e-07, "loss": 0.0516, "step": 42083 }, { "epoch": 0.9273331239980829, "grad_norm": 0.40730586647987366, "learning_rate": 4.133460630799035e-07, "loss": 0.0437, "step": 42084 }, { "epoch": 0.927355159287599, "grad_norm": 0.7615055441856384, "learning_rate": 4.130965174998802e-07, "loss": 0.0542, "step": 42085 }, { "epoch": 0.9273771945771152, "grad_norm": 0.5068759322166443, "learning_rate": 4.128470462187789e-07, "loss": 0.0532, "step": 42086 }, { "epoch": 0.9273992298666314, "grad_norm": 0.5727839469909668, "learning_rate": 4.125976492378686e-07, "loss": 0.0574, "step": 42087 }, { "epoch": 0.9274212651561475, "grad_norm": 0.3596741259098053, "learning_rate": 4.1234832655841994e-07, "loss": 0.0348, "step": 42088 }, { "epoch": 0.9274433004456637, "grad_norm": 0.5971947312355042, "learning_rate": 4.120990781817052e-07, "loss": 0.0628, "step": 42089 }, { "epoch": 0.9274653357351799, "grad_norm": 0.8510838150978088, "learning_rate": 4.118499041089918e-07, "loss": 0.0644, "step": 42090 }, { "epoch": 0.927487371024696, "grad_norm": 0.6107050776481628, "learning_rate": 4.116008043415503e-07, "loss": 0.0672, "step": 42091 }, { "epoch": 0.9275094063142122, "grad_norm": 0.5517033338546753, "learning_rate": 4.113517788806481e-07, "loss": 0.0432, "step": 42092 }, { "epoch": 0.9275314416037284, "grad_norm": 0.8591353893280029, "learning_rate": 4.1110282772755583e-07, "loss": 0.0543, "step": 42093 }, { "epoch": 0.9275534768932445, "grad_norm": 0.49798527359962463, "learning_rate": 4.1085395088353905e-07, "loss": 0.0682, "step": 42094 }, { "epoch": 0.9275755121827607, "grad_norm": 0.4408951997756958, "learning_rate": 4.106051483498652e-07, "loss": 0.0216, "step": 42095 }, { "epoch": 0.9275975474722769, "grad_norm": 0.7614092230796814, "learning_rate": 4.1035642012780484e-07, "loss": 0.0912, "step": 42096 }, { "epoch": 0.927619582761793, "grad_norm": 0.8409495949745178, "learning_rate": 4.10107766218622e-07, "loss": 0.0494, "step": 42097 }, { "epoch": 0.9276416180513092, "grad_norm": 0.38102421164512634, "learning_rate": 4.0985918662358566e-07, "loss": 0.0543, "step": 42098 }, { "epoch": 0.9276636533408253, "grad_norm": 0.8246488571166992, "learning_rate": 4.0961068134395653e-07, "loss": 0.0869, "step": 42099 }, { "epoch": 0.9276856886303415, "grad_norm": 0.49537861347198486, "learning_rate": 4.093622503810068e-07, "loss": 0.0865, "step": 42100 }, { "epoch": 0.9277077239198577, "grad_norm": 0.6059728264808655, "learning_rate": 4.091138937359989e-07, "loss": 0.0439, "step": 42101 }, { "epoch": 0.9277297592093738, "grad_norm": 0.6271867156028748, "learning_rate": 4.088656114101968e-07, "loss": 0.0806, "step": 42102 }, { "epoch": 0.92775179449889, "grad_norm": 0.31747251749038696, "learning_rate": 4.0861740340486954e-07, "loss": 0.055, "step": 42103 }, { "epoch": 0.9277738297884062, "grad_norm": 0.5374951958656311, "learning_rate": 4.0836926972127597e-07, "loss": 0.0685, "step": 42104 }, { "epoch": 0.9277958650779223, "grad_norm": 0.823310911655426, "learning_rate": 4.081212103606835e-07, "loss": 0.0614, "step": 42105 }, { "epoch": 0.9278179003674385, "grad_norm": 0.3930615186691284, "learning_rate": 4.078732253243528e-07, "loss": 0.0724, "step": 42106 }, { "epoch": 0.9278399356569547, "grad_norm": 0.5011114478111267, "learning_rate": 4.076253146135511e-07, "loss": 0.0642, "step": 42107 }, { "epoch": 0.9278619709464708, "grad_norm": 0.752743124961853, "learning_rate": 4.0737747822953587e-07, "loss": 0.0502, "step": 42108 }, { "epoch": 0.9278840062359869, "grad_norm": 0.35685431957244873, "learning_rate": 4.0712971617357595e-07, "loss": 0.046, "step": 42109 }, { "epoch": 0.927906041525503, "grad_norm": 0.5661906599998474, "learning_rate": 4.0688202844692545e-07, "loss": 0.0686, "step": 42110 }, { "epoch": 0.9279280768150192, "grad_norm": 0.6416738033294678, "learning_rate": 4.0663441505085333e-07, "loss": 0.0466, "step": 42111 }, { "epoch": 0.9279501121045354, "grad_norm": 0.5960641503334045, "learning_rate": 4.063868759866185e-07, "loss": 0.0442, "step": 42112 }, { "epoch": 0.9279721473940515, "grad_norm": 0.39378586411476135, "learning_rate": 4.0613941125547836e-07, "loss": 0.0498, "step": 42113 }, { "epoch": 0.9279941826835677, "grad_norm": 0.8099272847175598, "learning_rate": 4.058920208586969e-07, "loss": 0.0661, "step": 42114 }, { "epoch": 0.9280162179730839, "grad_norm": 0.4208109676837921, "learning_rate": 4.056447047975331e-07, "loss": 0.0673, "step": 42115 }, { "epoch": 0.9280382532626, "grad_norm": 0.7234671115875244, "learning_rate": 4.053974630732477e-07, "loss": 0.0762, "step": 42116 }, { "epoch": 0.9280602885521162, "grad_norm": 0.7532421350479126, "learning_rate": 4.051502956870995e-07, "loss": 0.077, "step": 42117 }, { "epoch": 0.9280823238416324, "grad_norm": 0.711792528629303, "learning_rate": 4.0490320264034595e-07, "loss": 0.0614, "step": 42118 }, { "epoch": 0.9281043591311485, "grad_norm": 0.3196318745613098, "learning_rate": 4.0465618393424774e-07, "loss": 0.0361, "step": 42119 }, { "epoch": 0.9281263944206647, "grad_norm": 0.508330225944519, "learning_rate": 4.0440923957006217e-07, "loss": 0.0528, "step": 42120 }, { "epoch": 0.9281484297101809, "grad_norm": 0.6375804543495178, "learning_rate": 4.041623695490465e-07, "loss": 0.0509, "step": 42121 }, { "epoch": 0.928170464999697, "grad_norm": 1.0798581838607788, "learning_rate": 4.0391557387245983e-07, "loss": 0.0735, "step": 42122 }, { "epoch": 0.9281925002892132, "grad_norm": 0.5581551790237427, "learning_rate": 4.0366885254155775e-07, "loss": 0.0447, "step": 42123 }, { "epoch": 0.9282145355787293, "grad_norm": 0.11552027612924576, "learning_rate": 4.0342220555759755e-07, "loss": 0.0305, "step": 42124 }, { "epoch": 0.9282365708682455, "grad_norm": 0.5337674617767334, "learning_rate": 4.0317563292183503e-07, "loss": 0.0361, "step": 42125 }, { "epoch": 0.9282586061577617, "grad_norm": 0.6444417834281921, "learning_rate": 4.029291346355257e-07, "loss": 0.056, "step": 42126 }, { "epoch": 0.9282806414472778, "grad_norm": 0.6904199719429016, "learning_rate": 4.0268271069992703e-07, "loss": 0.0774, "step": 42127 }, { "epoch": 0.928302676736794, "grad_norm": 0.5972409248352051, "learning_rate": 4.024363611162929e-07, "loss": 0.0589, "step": 42128 }, { "epoch": 0.9283247120263102, "grad_norm": 0.4867396354675293, "learning_rate": 4.0219008588587737e-07, "loss": 0.0524, "step": 42129 }, { "epoch": 0.9283467473158263, "grad_norm": 0.46753212809562683, "learning_rate": 4.0194388500993773e-07, "loss": 0.0656, "step": 42130 }, { "epoch": 0.9283687826053425, "grad_norm": 0.431296706199646, "learning_rate": 4.0169775848972467e-07, "loss": 0.0457, "step": 42131 }, { "epoch": 0.9283908178948587, "grad_norm": 0.9036409854888916, "learning_rate": 4.0145170632649386e-07, "loss": 0.0751, "step": 42132 }, { "epoch": 0.9284128531843748, "grad_norm": 0.9424900412559509, "learning_rate": 4.012057285214943e-07, "loss": 0.0725, "step": 42133 }, { "epoch": 0.9284348884738909, "grad_norm": 0.8619340658187866, "learning_rate": 4.009598250759866e-07, "loss": 0.0629, "step": 42134 }, { "epoch": 0.928456923763407, "grad_norm": 0.5597470998764038, "learning_rate": 4.0071399599121816e-07, "loss": 0.0524, "step": 42135 }, { "epoch": 0.9284789590529232, "grad_norm": 0.7500157952308655, "learning_rate": 4.0046824126844296e-07, "loss": 0.0593, "step": 42136 }, { "epoch": 0.9285009943424394, "grad_norm": 0.4657168686389923, "learning_rate": 4.0022256090890996e-07, "loss": 0.0619, "step": 42137 }, { "epoch": 0.9285230296319555, "grad_norm": 0.7125430703163147, "learning_rate": 3.999769549138732e-07, "loss": 0.0447, "step": 42138 }, { "epoch": 0.9285450649214717, "grad_norm": 0.38410574197769165, "learning_rate": 3.9973142328458334e-07, "loss": 0.0421, "step": 42139 }, { "epoch": 0.9285671002109879, "grad_norm": 0.9474286437034607, "learning_rate": 3.9948596602228937e-07, "loss": 0.0455, "step": 42140 }, { "epoch": 0.928589135500504, "grad_norm": 0.42773473262786865, "learning_rate": 3.992405831282436e-07, "loss": 0.0478, "step": 42141 }, { "epoch": 0.9286111707900202, "grad_norm": 0.40386703610420227, "learning_rate": 3.989952746036951e-07, "loss": 0.0551, "step": 42142 }, { "epoch": 0.9286332060795364, "grad_norm": 0.9511014223098755, "learning_rate": 3.987500404498945e-07, "loss": 0.0555, "step": 42143 }, { "epoch": 0.9286552413690525, "grad_norm": 0.8621142506599426, "learning_rate": 3.9850488066808744e-07, "loss": 0.0603, "step": 42144 }, { "epoch": 0.9286772766585687, "grad_norm": 0.39554741978645325, "learning_rate": 3.9825979525952625e-07, "loss": 0.0708, "step": 42145 }, { "epoch": 0.9286993119480849, "grad_norm": 0.3954044282436371, "learning_rate": 3.9801478422545835e-07, "loss": 0.0378, "step": 42146 }, { "epoch": 0.928721347237601, "grad_norm": 0.31885531544685364, "learning_rate": 3.97769847567131e-07, "loss": 0.0542, "step": 42147 }, { "epoch": 0.9287433825271172, "grad_norm": 0.8773308992385864, "learning_rate": 3.975249852857915e-07, "loss": 0.0728, "step": 42148 }, { "epoch": 0.9287654178166334, "grad_norm": 0.2929416298866272, "learning_rate": 3.9728019738268726e-07, "loss": 0.0493, "step": 42149 }, { "epoch": 0.9287874531061495, "grad_norm": 0.44592568278312683, "learning_rate": 3.9703548385906895e-07, "loss": 0.0532, "step": 42150 }, { "epoch": 0.9288094883956657, "grad_norm": 0.3119995594024658, "learning_rate": 3.9679084471617555e-07, "loss": 0.0499, "step": 42151 }, { "epoch": 0.9288315236851818, "grad_norm": 0.8049126267433167, "learning_rate": 3.965462799552594e-07, "loss": 0.076, "step": 42152 }, { "epoch": 0.928853558974698, "grad_norm": 0.9556389451026917, "learning_rate": 3.9630178957756445e-07, "loss": 0.0573, "step": 42153 }, { "epoch": 0.9288755942642142, "grad_norm": 0.9250341653823853, "learning_rate": 3.960573735843348e-07, "loss": 0.0692, "step": 42154 }, { "epoch": 0.9288976295537303, "grad_norm": 0.28876304626464844, "learning_rate": 3.9581303197681775e-07, "loss": 0.0571, "step": 42155 }, { "epoch": 0.9289196648432465, "grad_norm": 0.6181818246841431, "learning_rate": 3.9556876475625225e-07, "loss": 0.0519, "step": 42156 }, { "epoch": 0.9289417001327627, "grad_norm": 0.3832695484161377, "learning_rate": 3.953245719238907e-07, "loss": 0.0385, "step": 42157 }, { "epoch": 0.9289637354222787, "grad_norm": 0.40933361649513245, "learning_rate": 3.950804534809704e-07, "loss": 0.0548, "step": 42158 }, { "epoch": 0.9289857707117949, "grad_norm": 0.8848361968994141, "learning_rate": 3.9483640942873875e-07, "loss": 0.0649, "step": 42159 }, { "epoch": 0.929007806001311, "grad_norm": 0.4743114113807678, "learning_rate": 3.9459243976843474e-07, "loss": 0.0289, "step": 42160 }, { "epoch": 0.9290298412908272, "grad_norm": 0.4288340210914612, "learning_rate": 3.9434854450130564e-07, "loss": 0.065, "step": 42161 }, { "epoch": 0.9290518765803434, "grad_norm": 0.6813847422599792, "learning_rate": 3.941047236285922e-07, "loss": 0.0436, "step": 42162 }, { "epoch": 0.9290739118698595, "grad_norm": 0.5151041746139526, "learning_rate": 3.9386097715153335e-07, "loss": 0.0636, "step": 42163 }, { "epoch": 0.9290959471593757, "grad_norm": 0.5934016704559326, "learning_rate": 3.936173050713732e-07, "loss": 0.0662, "step": 42164 }, { "epoch": 0.9291179824488919, "grad_norm": 0.49084195494651794, "learning_rate": 3.93373707389354e-07, "loss": 0.0358, "step": 42165 }, { "epoch": 0.929140017738408, "grad_norm": 0.62950599193573, "learning_rate": 3.931301841067148e-07, "loss": 0.0579, "step": 42166 }, { "epoch": 0.9291620530279242, "grad_norm": 0.3494236171245575, "learning_rate": 3.928867352246962e-07, "loss": 0.056, "step": 42167 }, { "epoch": 0.9291840883174404, "grad_norm": 0.45078107714653015, "learning_rate": 3.92643360744539e-07, "loss": 0.071, "step": 42168 }, { "epoch": 0.9292061236069565, "grad_norm": 0.46517372131347656, "learning_rate": 3.9240006066748213e-07, "loss": 0.068, "step": 42169 }, { "epoch": 0.9292281588964727, "grad_norm": 0.6082862019538879, "learning_rate": 3.9215683499476464e-07, "loss": 0.0516, "step": 42170 }, { "epoch": 0.9292501941859889, "grad_norm": 0.6217883825302124, "learning_rate": 3.919136837276255e-07, "loss": 0.041, "step": 42171 }, { "epoch": 0.929272229475505, "grad_norm": 0.9521903395652771, "learning_rate": 3.916706068673037e-07, "loss": 0.0906, "step": 42172 }, { "epoch": 0.9292942647650212, "grad_norm": 0.5252739191055298, "learning_rate": 3.9142760441503666e-07, "loss": 0.0424, "step": 42173 }, { "epoch": 0.9293163000545374, "grad_norm": 0.6934124231338501, "learning_rate": 3.9118467637206336e-07, "loss": 0.0516, "step": 42174 }, { "epoch": 0.9293383353440535, "grad_norm": 0.512411892414093, "learning_rate": 3.909418227396194e-07, "loss": 0.0399, "step": 42175 }, { "epoch": 0.9293603706335697, "grad_norm": 0.591596245765686, "learning_rate": 3.9069904351894394e-07, "loss": 0.0444, "step": 42176 }, { "epoch": 0.9293824059230859, "grad_norm": 0.4129488468170166, "learning_rate": 3.904563387112725e-07, "loss": 0.0729, "step": 42177 }, { "epoch": 0.929404441212602, "grad_norm": 0.26842883229255676, "learning_rate": 3.9021370831784086e-07, "loss": 0.0767, "step": 42178 }, { "epoch": 0.9294264765021182, "grad_norm": 0.3375196158885956, "learning_rate": 3.89971152339883e-07, "loss": 0.0402, "step": 42179 }, { "epoch": 0.9294485117916343, "grad_norm": 0.8709284663200378, "learning_rate": 3.897286707786396e-07, "loss": 0.0706, "step": 42180 }, { "epoch": 0.9294705470811505, "grad_norm": 0.5524513125419617, "learning_rate": 3.8948626363534133e-07, "loss": 0.0483, "step": 42181 }, { "epoch": 0.9294925823706667, "grad_norm": 0.3408738672733307, "learning_rate": 3.892439309112256e-07, "loss": 0.0604, "step": 42182 }, { "epoch": 0.9295146176601827, "grad_norm": 0.49997514486312866, "learning_rate": 3.890016726075229e-07, "loss": 0.0638, "step": 42183 }, { "epoch": 0.9295366529496989, "grad_norm": 0.646330714225769, "learning_rate": 3.8875948872547085e-07, "loss": 0.0362, "step": 42184 }, { "epoch": 0.9295586882392151, "grad_norm": 0.5814125537872314, "learning_rate": 3.8851737926630325e-07, "loss": 0.0814, "step": 42185 }, { "epoch": 0.9295807235287312, "grad_norm": 0.5117394924163818, "learning_rate": 3.882753442312509e-07, "loss": 0.0655, "step": 42186 }, { "epoch": 0.9296027588182474, "grad_norm": 0.48431119322776794, "learning_rate": 3.880333836215477e-07, "loss": 0.0394, "step": 42187 }, { "epoch": 0.9296247941077636, "grad_norm": 0.5464111566543579, "learning_rate": 3.8779149743842445e-07, "loss": 0.0602, "step": 42188 }, { "epoch": 0.9296468293972797, "grad_norm": 0.6381893754005432, "learning_rate": 3.8754968568311844e-07, "loss": 0.0721, "step": 42189 }, { "epoch": 0.9296688646867959, "grad_norm": 0.8201759457588196, "learning_rate": 3.8730794835685536e-07, "loss": 0.0684, "step": 42190 }, { "epoch": 0.929690899976312, "grad_norm": 0.7618167996406555, "learning_rate": 3.870662854608675e-07, "loss": 0.073, "step": 42191 }, { "epoch": 0.9297129352658282, "grad_norm": 0.5406531691551208, "learning_rate": 3.8682469699639064e-07, "loss": 0.064, "step": 42192 }, { "epoch": 0.9297349705553444, "grad_norm": 0.7226631045341492, "learning_rate": 3.865831829646488e-07, "loss": 0.046, "step": 42193 }, { "epoch": 0.9297570058448605, "grad_norm": 0.5062714219093323, "learning_rate": 3.863417433668759e-07, "loss": 0.0387, "step": 42194 }, { "epoch": 0.9297790411343767, "grad_norm": 0.7867134809494019, "learning_rate": 3.86100378204301e-07, "loss": 0.0817, "step": 42195 }, { "epoch": 0.9298010764238929, "grad_norm": 0.4578326344490051, "learning_rate": 3.8585908747815477e-07, "loss": 0.0464, "step": 42196 }, { "epoch": 0.929823111713409, "grad_norm": 0.5895786285400391, "learning_rate": 3.8561787118966295e-07, "loss": 0.0621, "step": 42197 }, { "epoch": 0.9298451470029252, "grad_norm": 0.425746351480484, "learning_rate": 3.8537672934005617e-07, "loss": 0.0482, "step": 42198 }, { "epoch": 0.9298671822924414, "grad_norm": 1.0282533168792725, "learning_rate": 3.8513566193056517e-07, "loss": 0.0684, "step": 42199 }, { "epoch": 0.9298892175819575, "grad_norm": 0.6705611348152161, "learning_rate": 3.8489466896241386e-07, "loss": 0.0366, "step": 42200 }, { "epoch": 0.9299112528714737, "grad_norm": 0.4257580041885376, "learning_rate": 3.846537504368314e-07, "loss": 0.0467, "step": 42201 }, { "epoch": 0.9299332881609899, "grad_norm": 0.8449110984802246, "learning_rate": 3.8441290635504333e-07, "loss": 0.0572, "step": 42202 }, { "epoch": 0.929955323450506, "grad_norm": 0.8169903755187988, "learning_rate": 3.8417213671828044e-07, "loss": 0.0628, "step": 42203 }, { "epoch": 0.9299773587400222, "grad_norm": 0.4641599655151367, "learning_rate": 3.8393144152776506e-07, "loss": 0.0624, "step": 42204 }, { "epoch": 0.9299993940295384, "grad_norm": 0.5290840864181519, "learning_rate": 3.836908207847245e-07, "loss": 0.0655, "step": 42205 }, { "epoch": 0.9300214293190545, "grad_norm": 0.8761743903160095, "learning_rate": 3.834502744903845e-07, "loss": 0.0917, "step": 42206 }, { "epoch": 0.9300434646085707, "grad_norm": 0.5425640344619751, "learning_rate": 3.832098026459707e-07, "loss": 0.0692, "step": 42207 }, { "epoch": 0.9300654998980867, "grad_norm": 0.4467603862285614, "learning_rate": 3.829694052527072e-07, "loss": 0.0514, "step": 42208 }, { "epoch": 0.9300875351876029, "grad_norm": 0.47967323660850525, "learning_rate": 3.827290823118179e-07, "loss": 0.0518, "step": 42209 }, { "epoch": 0.9301095704771191, "grad_norm": 0.361270934343338, "learning_rate": 3.824888338245286e-07, "loss": 0.0409, "step": 42210 }, { "epoch": 0.9301316057666352, "grad_norm": 1.0029079914093018, "learning_rate": 3.822486597920616e-07, "loss": 0.1082, "step": 42211 }, { "epoch": 0.9301536410561514, "grad_norm": 0.5028069615364075, "learning_rate": 3.8200856021564255e-07, "loss": 0.0386, "step": 42212 }, { "epoch": 0.9301756763456676, "grad_norm": 0.5857383012771606, "learning_rate": 3.8176853509649224e-07, "loss": 0.0472, "step": 42213 }, { "epoch": 0.9301977116351837, "grad_norm": 0.8135754466056824, "learning_rate": 3.815285844358329e-07, "loss": 0.0842, "step": 42214 }, { "epoch": 0.9302197469246999, "grad_norm": 0.7207797169685364, "learning_rate": 3.812887082348887e-07, "loss": 0.0521, "step": 42215 }, { "epoch": 0.930241782214216, "grad_norm": 0.4495892822742462, "learning_rate": 3.8104890649487857e-07, "loss": 0.0542, "step": 42216 }, { "epoch": 0.9302638175037322, "grad_norm": 0.3825010359287262, "learning_rate": 3.8080917921702654e-07, "loss": 0.0491, "step": 42217 }, { "epoch": 0.9302858527932484, "grad_norm": 0.7334785461425781, "learning_rate": 3.805695264025516e-07, "loss": 0.0503, "step": 42218 }, { "epoch": 0.9303078880827645, "grad_norm": 0.37985658645629883, "learning_rate": 3.8032994805267787e-07, "loss": 0.0231, "step": 42219 }, { "epoch": 0.9303299233722807, "grad_norm": 0.5516656041145325, "learning_rate": 3.8009044416862095e-07, "loss": 0.0579, "step": 42220 }, { "epoch": 0.9303519586617969, "grad_norm": 0.724345862865448, "learning_rate": 3.7985101475160487e-07, "loss": 0.0642, "step": 42221 }, { "epoch": 0.930373993951313, "grad_norm": 0.6553949117660522, "learning_rate": 3.796116598028471e-07, "loss": 0.0532, "step": 42222 }, { "epoch": 0.9303960292408292, "grad_norm": 0.43477872014045715, "learning_rate": 3.7937237932356816e-07, "loss": 0.059, "step": 42223 }, { "epoch": 0.9304180645303454, "grad_norm": 0.6363983750343323, "learning_rate": 3.7913317331498556e-07, "loss": 0.0547, "step": 42224 }, { "epoch": 0.9304400998198615, "grad_norm": 0.7198565602302551, "learning_rate": 3.788940417783149e-07, "loss": 0.0739, "step": 42225 }, { "epoch": 0.9304621351093777, "grad_norm": 0.6477773189544678, "learning_rate": 3.786549847147819e-07, "loss": 0.0845, "step": 42226 }, { "epoch": 0.9304841703988939, "grad_norm": 0.5238853096961975, "learning_rate": 3.784160021255972e-07, "loss": 0.0667, "step": 42227 }, { "epoch": 0.93050620568841, "grad_norm": 0.7612841725349426, "learning_rate": 3.7817709401198166e-07, "loss": 0.068, "step": 42228 }, { "epoch": 0.9305282409779262, "grad_norm": 0.6123030781745911, "learning_rate": 3.779382603751491e-07, "loss": 0.0573, "step": 42229 }, { "epoch": 0.9305502762674424, "grad_norm": 0.6443435549736023, "learning_rate": 3.7769950121632035e-07, "loss": 0.0495, "step": 42230 }, { "epoch": 0.9305723115569585, "grad_norm": 0.5945156812667847, "learning_rate": 3.774608165367077e-07, "loss": 0.0454, "step": 42231 }, { "epoch": 0.9305943468464746, "grad_norm": 0.5672129988670349, "learning_rate": 3.772222063375286e-07, "loss": 0.0557, "step": 42232 }, { "epoch": 0.9306163821359907, "grad_norm": 0.4775916337966919, "learning_rate": 3.7698367061999863e-07, "loss": 0.047, "step": 42233 }, { "epoch": 0.9306384174255069, "grad_norm": 0.7515520453453064, "learning_rate": 3.767452093853302e-07, "loss": 0.0429, "step": 42234 }, { "epoch": 0.9306604527150231, "grad_norm": 0.34253308176994324, "learning_rate": 3.7650682263474403e-07, "loss": 0.0543, "step": 42235 }, { "epoch": 0.9306824880045392, "grad_norm": 0.5343729257583618, "learning_rate": 3.7626851036944577e-07, "loss": 0.0485, "step": 42236 }, { "epoch": 0.9307045232940554, "grad_norm": 0.6385188698768616, "learning_rate": 3.7603027259065613e-07, "loss": 0.0877, "step": 42237 }, { "epoch": 0.9307265585835716, "grad_norm": 0.40469154715538025, "learning_rate": 3.7579210929958584e-07, "loss": 0.0615, "step": 42238 }, { "epoch": 0.9307485938730877, "grad_norm": 0.657812237739563, "learning_rate": 3.7555402049745056e-07, "loss": 0.0662, "step": 42239 }, { "epoch": 0.9307706291626039, "grad_norm": 0.48215922713279724, "learning_rate": 3.7531600618545937e-07, "loss": 0.1043, "step": 42240 }, { "epoch": 0.93079266445212, "grad_norm": 0.8917111754417419, "learning_rate": 3.750780663648246e-07, "loss": 0.0553, "step": 42241 }, { "epoch": 0.9308146997416362, "grad_norm": 0.5051928758621216, "learning_rate": 3.7484020103676184e-07, "loss": 0.0519, "step": 42242 }, { "epoch": 0.9308367350311524, "grad_norm": 0.7877606153488159, "learning_rate": 3.746024102024803e-07, "loss": 0.0523, "step": 42243 }, { "epoch": 0.9308587703206685, "grad_norm": 0.6909894347190857, "learning_rate": 3.7436469386319063e-07, "loss": 0.0923, "step": 42244 }, { "epoch": 0.9308808056101847, "grad_norm": 1.195516586303711, "learning_rate": 3.741270520201051e-07, "loss": 0.0998, "step": 42245 }, { "epoch": 0.9309028408997009, "grad_norm": 0.5506912469863892, "learning_rate": 3.7388948467443283e-07, "loss": 0.0512, "step": 42246 }, { "epoch": 0.930924876189217, "grad_norm": 0.5703266859054565, "learning_rate": 3.736519918273845e-07, "loss": 0.0356, "step": 42247 }, { "epoch": 0.9309469114787332, "grad_norm": 0.4951777756214142, "learning_rate": 3.7341457348017083e-07, "loss": 0.0514, "step": 42248 }, { "epoch": 0.9309689467682494, "grad_norm": 0.2733492851257324, "learning_rate": 3.731772296340008e-07, "loss": 0.0254, "step": 42249 }, { "epoch": 0.9309909820577655, "grad_norm": 0.32658058404922485, "learning_rate": 3.7293996029008016e-07, "loss": 0.0332, "step": 42250 }, { "epoch": 0.9310130173472817, "grad_norm": 1.1361980438232422, "learning_rate": 3.727027654496229e-07, "loss": 0.1028, "step": 42251 }, { "epoch": 0.9310350526367979, "grad_norm": 0.4452306628227234, "learning_rate": 3.724656451138314e-07, "loss": 0.0725, "step": 42252 }, { "epoch": 0.931057087926314, "grad_norm": 0.6626614332199097, "learning_rate": 3.722285992839181e-07, "loss": 0.0599, "step": 42253 }, { "epoch": 0.9310791232158302, "grad_norm": 0.8988470435142517, "learning_rate": 3.719916279610869e-07, "loss": 0.0627, "step": 42254 }, { "epoch": 0.9311011585053464, "grad_norm": 0.6073031425476074, "learning_rate": 3.7175473114654854e-07, "loss": 0.0692, "step": 42255 }, { "epoch": 0.9311231937948625, "grad_norm": 0.5801507830619812, "learning_rate": 3.715179088415072e-07, "loss": 0.0584, "step": 42256 }, { "epoch": 0.9311452290843786, "grad_norm": 0.3839346766471863, "learning_rate": 3.7128116104716836e-07, "loss": 0.0861, "step": 42257 }, { "epoch": 0.9311672643738947, "grad_norm": 0.3545217216014862, "learning_rate": 3.710444877647412e-07, "loss": 0.0377, "step": 42258 }, { "epoch": 0.9311892996634109, "grad_norm": 0.7146065831184387, "learning_rate": 3.7080788899542474e-07, "loss": 0.0703, "step": 42259 }, { "epoch": 0.9312113349529271, "grad_norm": 0.5909585356712341, "learning_rate": 3.7057136474043294e-07, "loss": 0.0528, "step": 42260 }, { "epoch": 0.9312333702424432, "grad_norm": 0.8562866449356079, "learning_rate": 3.703349150009649e-07, "loss": 0.069, "step": 42261 }, { "epoch": 0.9312554055319594, "grad_norm": 0.579814612865448, "learning_rate": 3.700985397782264e-07, "loss": 0.0612, "step": 42262 }, { "epoch": 0.9312774408214756, "grad_norm": 0.9102751016616821, "learning_rate": 3.698622390734196e-07, "loss": 0.0908, "step": 42263 }, { "epoch": 0.9312994761109917, "grad_norm": 0.6147626042366028, "learning_rate": 3.6962601288775045e-07, "loss": 0.067, "step": 42264 }, { "epoch": 0.9313215114005079, "grad_norm": 0.3576795160770416, "learning_rate": 3.693898612224228e-07, "loss": 0.0362, "step": 42265 }, { "epoch": 0.9313435466900241, "grad_norm": 0.7821642160415649, "learning_rate": 3.6915378407863744e-07, "loss": 0.0826, "step": 42266 }, { "epoch": 0.9313655819795402, "grad_norm": 0.7153089642524719, "learning_rate": 3.689177814575967e-07, "loss": 0.0729, "step": 42267 }, { "epoch": 0.9313876172690564, "grad_norm": 0.7740236520767212, "learning_rate": 3.68681853360503e-07, "loss": 0.0647, "step": 42268 }, { "epoch": 0.9314096525585726, "grad_norm": 0.994129478931427, "learning_rate": 3.68445999788562e-07, "loss": 0.0798, "step": 42269 }, { "epoch": 0.9314316878480887, "grad_norm": 0.4717579782009125, "learning_rate": 3.6821022074296774e-07, "loss": 0.0466, "step": 42270 }, { "epoch": 0.9314537231376049, "grad_norm": 0.651714563369751, "learning_rate": 3.6797451622492595e-07, "loss": 0.0506, "step": 42271 }, { "epoch": 0.931475758427121, "grad_norm": 0.4847295582294464, "learning_rate": 3.67738886235639e-07, "loss": 0.0638, "step": 42272 }, { "epoch": 0.9314977937166372, "grad_norm": 0.37436679005622864, "learning_rate": 3.675033307763009e-07, "loss": 0.0475, "step": 42273 }, { "epoch": 0.9315198290061534, "grad_norm": 0.9036223888397217, "learning_rate": 3.672678498481175e-07, "loss": 0.0612, "step": 42274 }, { "epoch": 0.9315418642956695, "grad_norm": 0.6903297305107117, "learning_rate": 3.670324434522826e-07, "loss": 0.052, "step": 42275 }, { "epoch": 0.9315638995851857, "grad_norm": 0.459041953086853, "learning_rate": 3.667971115900004e-07, "loss": 0.0606, "step": 42276 }, { "epoch": 0.9315859348747019, "grad_norm": 0.2747197151184082, "learning_rate": 3.665618542624666e-07, "loss": 0.0333, "step": 42277 }, { "epoch": 0.931607970164218, "grad_norm": 0.7131180763244629, "learning_rate": 3.6632667147088183e-07, "loss": 0.0665, "step": 42278 }, { "epoch": 0.9316300054537342, "grad_norm": 0.768887996673584, "learning_rate": 3.660915632164419e-07, "loss": 0.0623, "step": 42279 }, { "epoch": 0.9316520407432504, "grad_norm": 0.3951259255409241, "learning_rate": 3.658565295003441e-07, "loss": 0.0436, "step": 42280 }, { "epoch": 0.9316740760327665, "grad_norm": 0.5510576367378235, "learning_rate": 3.656215703237875e-07, "loss": 0.0813, "step": 42281 }, { "epoch": 0.9316961113222826, "grad_norm": 0.5078368782997131, "learning_rate": 3.653866856879645e-07, "loss": 0.0441, "step": 42282 }, { "epoch": 0.9317181466117987, "grad_norm": 0.7073560357093811, "learning_rate": 3.6515187559407914e-07, "loss": 0.0631, "step": 42283 }, { "epoch": 0.9317401819013149, "grad_norm": 0.5099357962608337, "learning_rate": 3.6491714004332046e-07, "loss": 0.054, "step": 42284 }, { "epoch": 0.9317622171908311, "grad_norm": 0.8445284962654114, "learning_rate": 3.6468247903688913e-07, "loss": 0.0643, "step": 42285 }, { "epoch": 0.9317842524803472, "grad_norm": 0.2582099139690399, "learning_rate": 3.644478925759759e-07, "loss": 0.0595, "step": 42286 }, { "epoch": 0.9318062877698634, "grad_norm": 0.8720759749412537, "learning_rate": 3.6421338066177814e-07, "loss": 0.0638, "step": 42287 }, { "epoch": 0.9318283230593796, "grad_norm": 0.6545811891555786, "learning_rate": 3.639789432954915e-07, "loss": 0.0645, "step": 42288 }, { "epoch": 0.9318503583488957, "grad_norm": 0.8261605501174927, "learning_rate": 3.6374458047830684e-07, "loss": 0.0791, "step": 42289 }, { "epoch": 0.9318723936384119, "grad_norm": 0.2059607207775116, "learning_rate": 3.6351029221141975e-07, "loss": 0.0473, "step": 42290 }, { "epoch": 0.9318944289279281, "grad_norm": 0.5005276203155518, "learning_rate": 3.6327607849602426e-07, "loss": 0.0605, "step": 42291 }, { "epoch": 0.9319164642174442, "grad_norm": 0.6984808444976807, "learning_rate": 3.6304193933331286e-07, "loss": 0.0659, "step": 42292 }, { "epoch": 0.9319384995069604, "grad_norm": 0.5882987976074219, "learning_rate": 3.628078747244762e-07, "loss": 0.051, "step": 42293 }, { "epoch": 0.9319605347964766, "grad_norm": 0.5343425869941711, "learning_rate": 3.6257388467070997e-07, "loss": 0.0375, "step": 42294 }, { "epoch": 0.9319825700859927, "grad_norm": 0.2739580571651459, "learning_rate": 3.623399691732032e-07, "loss": 0.0552, "step": 42295 }, { "epoch": 0.9320046053755089, "grad_norm": 0.6608701944351196, "learning_rate": 3.6210612823314835e-07, "loss": 0.0601, "step": 42296 }, { "epoch": 0.932026640665025, "grad_norm": 0.41117143630981445, "learning_rate": 3.618723618517378e-07, "loss": 0.042, "step": 42297 }, { "epoch": 0.9320486759545412, "grad_norm": 0.7238300442695618, "learning_rate": 3.6163867003015717e-07, "loss": 0.0633, "step": 42298 }, { "epoch": 0.9320707112440574, "grad_norm": 0.5721544027328491, "learning_rate": 3.6140505276960387e-07, "loss": 0.0466, "step": 42299 }, { "epoch": 0.9320927465335735, "grad_norm": 0.43817952275276184, "learning_rate": 3.61171510071262e-07, "loss": 0.0544, "step": 42300 }, { "epoch": 0.9321147818230897, "grad_norm": 0.512454628944397, "learning_rate": 3.609380419363273e-07, "loss": 0.0554, "step": 42301 }, { "epoch": 0.9321368171126059, "grad_norm": 0.5132095217704773, "learning_rate": 3.6070464836598205e-07, "loss": 0.0604, "step": 42302 }, { "epoch": 0.932158852402122, "grad_norm": 0.5179974436759949, "learning_rate": 3.604713293614187e-07, "loss": 0.0856, "step": 42303 }, { "epoch": 0.9321808876916382, "grad_norm": 0.6896418333053589, "learning_rate": 3.6023808492382626e-07, "loss": 0.0751, "step": 42304 }, { "epoch": 0.9322029229811544, "grad_norm": 0.6256599426269531, "learning_rate": 3.6000491505439046e-07, "loss": 0.0678, "step": 42305 }, { "epoch": 0.9322249582706705, "grad_norm": 0.49769240617752075, "learning_rate": 3.597718197543004e-07, "loss": 0.0486, "step": 42306 }, { "epoch": 0.9322469935601866, "grad_norm": 0.6356514096260071, "learning_rate": 3.595387990247434e-07, "loss": 0.0816, "step": 42307 }, { "epoch": 0.9322690288497028, "grad_norm": 0.59033203125, "learning_rate": 3.593058528669085e-07, "loss": 0.0487, "step": 42308 }, { "epoch": 0.9322910641392189, "grad_norm": 0.9168450832366943, "learning_rate": 3.5907298128197654e-07, "loss": 0.0697, "step": 42309 }, { "epoch": 0.9323130994287351, "grad_norm": 0.4598192274570465, "learning_rate": 3.588401842711381e-07, "loss": 0.0415, "step": 42310 }, { "epoch": 0.9323351347182512, "grad_norm": 0.6336793899536133, "learning_rate": 3.586074618355789e-07, "loss": 0.0506, "step": 42311 }, { "epoch": 0.9323571700077674, "grad_norm": 0.39436599612236023, "learning_rate": 3.5837481397648143e-07, "loss": 0.0313, "step": 42312 }, { "epoch": 0.9323792052972836, "grad_norm": 0.8007344007492065, "learning_rate": 3.5814224069503297e-07, "loss": 0.0676, "step": 42313 }, { "epoch": 0.9324012405867997, "grad_norm": 0.5310509204864502, "learning_rate": 3.5790974199241765e-07, "loss": 0.0681, "step": 42314 }, { "epoch": 0.9324232758763159, "grad_norm": 0.5943204164505005, "learning_rate": 3.5767731786982117e-07, "loss": 0.0523, "step": 42315 }, { "epoch": 0.9324453111658321, "grad_norm": 0.6105213761329651, "learning_rate": 3.5744496832842424e-07, "loss": 0.0487, "step": 42316 }, { "epoch": 0.9324673464553482, "grad_norm": 0.7967960238456726, "learning_rate": 3.5721269336941254e-07, "loss": 0.0757, "step": 42317 }, { "epoch": 0.9324893817448644, "grad_norm": 0.7046473026275635, "learning_rate": 3.569804929939702e-07, "loss": 0.0572, "step": 42318 }, { "epoch": 0.9325114170343806, "grad_norm": 0.6233252882957458, "learning_rate": 3.567483672032762e-07, "loss": 0.0476, "step": 42319 }, { "epoch": 0.9325334523238967, "grad_norm": 0.4686313271522522, "learning_rate": 3.565163159985163e-07, "loss": 0.0483, "step": 42320 }, { "epoch": 0.9325554876134129, "grad_norm": 0.2690710425376892, "learning_rate": 3.5628433938086956e-07, "loss": 0.0508, "step": 42321 }, { "epoch": 0.9325775229029291, "grad_norm": 0.6936659812927246, "learning_rate": 3.560524373515217e-07, "loss": 0.0724, "step": 42322 }, { "epoch": 0.9325995581924452, "grad_norm": 0.48112717270851135, "learning_rate": 3.5582060991165013e-07, "loss": 0.0494, "step": 42323 }, { "epoch": 0.9326215934819614, "grad_norm": 0.6261571049690247, "learning_rate": 3.5558885706243713e-07, "loss": 0.0377, "step": 42324 }, { "epoch": 0.9326436287714776, "grad_norm": 0.46231961250305176, "learning_rate": 3.553571788050636e-07, "loss": 0.0425, "step": 42325 }, { "epoch": 0.9326656640609937, "grad_norm": 0.7295989990234375, "learning_rate": 3.5512557514070844e-07, "loss": 0.0678, "step": 42326 }, { "epoch": 0.9326876993505099, "grad_norm": 0.4132307171821594, "learning_rate": 3.5489404607055243e-07, "loss": 0.0392, "step": 42327 }, { "epoch": 0.932709734640026, "grad_norm": 0.8599073886871338, "learning_rate": 3.5466259159577296e-07, "loss": 0.0697, "step": 42328 }, { "epoch": 0.9327317699295422, "grad_norm": 0.8319354057312012, "learning_rate": 3.5443121171755245e-07, "loss": 0.0836, "step": 42329 }, { "epoch": 0.9327538052190584, "grad_norm": 0.5338850021362305, "learning_rate": 3.541999064370649e-07, "loss": 0.0374, "step": 42330 }, { "epoch": 0.9327758405085744, "grad_norm": 0.5501319766044617, "learning_rate": 3.539686757554944e-07, "loss": 0.0615, "step": 42331 }, { "epoch": 0.9327978757980906, "grad_norm": 0.37918809056282043, "learning_rate": 3.5373751967401005e-07, "loss": 0.0504, "step": 42332 }, { "epoch": 0.9328199110876068, "grad_norm": 0.47244611382484436, "learning_rate": 3.535064381937991e-07, "loss": 0.0635, "step": 42333 }, { "epoch": 0.9328419463771229, "grad_norm": 0.7255577445030212, "learning_rate": 3.532754313160325e-07, "loss": 0.0672, "step": 42334 }, { "epoch": 0.9328639816666391, "grad_norm": 0.7396807670593262, "learning_rate": 3.5304449904188907e-07, "loss": 0.0543, "step": 42335 }, { "epoch": 0.9328860169561553, "grad_norm": 0.5522669553756714, "learning_rate": 3.52813641372543e-07, "loss": 0.067, "step": 42336 }, { "epoch": 0.9329080522456714, "grad_norm": 0.5903763175010681, "learning_rate": 3.525828583091717e-07, "loss": 0.0552, "step": 42337 }, { "epoch": 0.9329300875351876, "grad_norm": 0.7247342467308044, "learning_rate": 3.523521498529525e-07, "loss": 0.0914, "step": 42338 }, { "epoch": 0.9329521228247037, "grad_norm": 0.7187944650650024, "learning_rate": 3.5212151600505615e-07, "loss": 0.0453, "step": 42339 }, { "epoch": 0.9329741581142199, "grad_norm": 0.6598560214042664, "learning_rate": 3.5189095676666004e-07, "loss": 0.052, "step": 42340 }, { "epoch": 0.9329961934037361, "grad_norm": 0.8329393863677979, "learning_rate": 3.5166047213893826e-07, "loss": 0.0624, "step": 42341 }, { "epoch": 0.9330182286932522, "grad_norm": 0.5357841849327087, "learning_rate": 3.514300621230648e-07, "loss": 0.0372, "step": 42342 }, { "epoch": 0.9330402639827684, "grad_norm": 1.0444070100784302, "learning_rate": 3.5119972672021383e-07, "loss": 0.0645, "step": 42343 }, { "epoch": 0.9330622992722846, "grad_norm": 0.4200437068939209, "learning_rate": 3.5096946593155767e-07, "loss": 0.0601, "step": 42344 }, { "epoch": 0.9330843345618007, "grad_norm": 0.6190097332000732, "learning_rate": 3.5073927975827036e-07, "loss": 0.0635, "step": 42345 }, { "epoch": 0.9331063698513169, "grad_norm": 0.5682071447372437, "learning_rate": 3.5050916820152103e-07, "loss": 0.0641, "step": 42346 }, { "epoch": 0.9331284051408331, "grad_norm": 0.46298667788505554, "learning_rate": 3.5027913126248535e-07, "loss": 0.0363, "step": 42347 }, { "epoch": 0.9331504404303492, "grad_norm": 0.6195507645606995, "learning_rate": 3.500491689423341e-07, "loss": 0.0528, "step": 42348 }, { "epoch": 0.9331724757198654, "grad_norm": 0.8748758435249329, "learning_rate": 3.498192812422396e-07, "loss": 0.0503, "step": 42349 }, { "epoch": 0.9331945110093816, "grad_norm": 0.7298899292945862, "learning_rate": 3.4958946816336933e-07, "loss": 0.0609, "step": 42350 }, { "epoch": 0.9332165462988977, "grad_norm": 1.0150853395462036, "learning_rate": 3.4935972970689735e-07, "loss": 0.0785, "step": 42351 }, { "epoch": 0.9332385815884139, "grad_norm": 0.6960801482200623, "learning_rate": 3.4913006587399266e-07, "loss": 0.0459, "step": 42352 }, { "epoch": 0.93326061687793, "grad_norm": 0.6104487776756287, "learning_rate": 3.4890047666582437e-07, "loss": 0.0587, "step": 42353 }, { "epoch": 0.9332826521674462, "grad_norm": 0.7098932862281799, "learning_rate": 3.4867096208356484e-07, "loss": 0.0562, "step": 42354 }, { "epoch": 0.9333046874569624, "grad_norm": 0.42923521995544434, "learning_rate": 3.4844152212837656e-07, "loss": 0.048, "step": 42355 }, { "epoch": 0.9333267227464784, "grad_norm": 0.6470106840133667, "learning_rate": 3.4821215680143516e-07, "loss": 0.0546, "step": 42356 }, { "epoch": 0.9333487580359946, "grad_norm": 0.5278635621070862, "learning_rate": 3.4798286610390473e-07, "loss": 0.0383, "step": 42357 }, { "epoch": 0.9333707933255108, "grad_norm": 0.7753425240516663, "learning_rate": 3.4775365003695767e-07, "loss": 0.0514, "step": 42358 }, { "epoch": 0.9333928286150269, "grad_norm": 0.3773012161254883, "learning_rate": 3.475245086017548e-07, "loss": 0.0569, "step": 42359 }, { "epoch": 0.9334148639045431, "grad_norm": 0.6845206618309021, "learning_rate": 3.4729544179946837e-07, "loss": 0.0557, "step": 42360 }, { "epoch": 0.9334368991940593, "grad_norm": 0.8998578190803528, "learning_rate": 3.470664496312642e-07, "loss": 0.0632, "step": 42361 }, { "epoch": 0.9334589344835754, "grad_norm": 0.7703036665916443, "learning_rate": 3.468375320983064e-07, "loss": 0.0603, "step": 42362 }, { "epoch": 0.9334809697730916, "grad_norm": 0.7686632871627808, "learning_rate": 3.466086892017639e-07, "loss": 0.0568, "step": 42363 }, { "epoch": 0.9335030050626077, "grad_norm": 0.24390718340873718, "learning_rate": 3.463799209428009e-07, "loss": 0.0521, "step": 42364 }, { "epoch": 0.9335250403521239, "grad_norm": 0.5872990489006042, "learning_rate": 3.461512273225831e-07, "loss": 0.0762, "step": 42365 }, { "epoch": 0.9335470756416401, "grad_norm": 0.5530937314033508, "learning_rate": 3.4592260834227453e-07, "loss": 0.0372, "step": 42366 }, { "epoch": 0.9335691109311562, "grad_norm": 0.6977860331535339, "learning_rate": 3.4569406400304095e-07, "loss": 0.0486, "step": 42367 }, { "epoch": 0.9335911462206724, "grad_norm": 0.7651246190071106, "learning_rate": 3.454655943060464e-07, "loss": 0.067, "step": 42368 }, { "epoch": 0.9336131815101886, "grad_norm": 0.6630116105079651, "learning_rate": 3.4523719925245334e-07, "loss": 0.0601, "step": 42369 }, { "epoch": 0.9336352167997047, "grad_norm": 0.4759981632232666, "learning_rate": 3.4500887884342416e-07, "loss": 0.0346, "step": 42370 }, { "epoch": 0.9336572520892209, "grad_norm": 0.201028510928154, "learning_rate": 3.447806330801245e-07, "loss": 0.0317, "step": 42371 }, { "epoch": 0.9336792873787371, "grad_norm": 0.5951736569404602, "learning_rate": 3.445524619637169e-07, "loss": 0.0643, "step": 42372 }, { "epoch": 0.9337013226682532, "grad_norm": 0.7646483778953552, "learning_rate": 3.443243654953604e-07, "loss": 0.0627, "step": 42373 }, { "epoch": 0.9337233579577694, "grad_norm": 0.8553526401519775, "learning_rate": 3.440963436762207e-07, "loss": 0.0994, "step": 42374 }, { "epoch": 0.9337453932472856, "grad_norm": 0.6024706363677979, "learning_rate": 3.4386839650745683e-07, "loss": 0.0618, "step": 42375 }, { "epoch": 0.9337674285368017, "grad_norm": 0.7999944686889648, "learning_rate": 3.436405239902313e-07, "loss": 0.075, "step": 42376 }, { "epoch": 0.9337894638263179, "grad_norm": 0.7565144896507263, "learning_rate": 3.434127261257031e-07, "loss": 0.0831, "step": 42377 }, { "epoch": 0.933811499115834, "grad_norm": 0.5850280523300171, "learning_rate": 3.431850029150313e-07, "loss": 0.0577, "step": 42378 }, { "epoch": 0.9338335344053502, "grad_norm": 0.7071510553359985, "learning_rate": 3.429573543593817e-07, "loss": 0.0547, "step": 42379 }, { "epoch": 0.9338555696948664, "grad_norm": 0.5693602561950684, "learning_rate": 3.4272978045990834e-07, "loss": 0.0548, "step": 42380 }, { "epoch": 0.9338776049843824, "grad_norm": 0.25397005677223206, "learning_rate": 3.425022812177719e-07, "loss": 0.0505, "step": 42381 }, { "epoch": 0.9338996402738986, "grad_norm": 0.5045250654220581, "learning_rate": 3.422748566341316e-07, "loss": 0.0597, "step": 42382 }, { "epoch": 0.9339216755634148, "grad_norm": 0.578629195690155, "learning_rate": 3.420475067101447e-07, "loss": 0.0453, "step": 42383 }, { "epoch": 0.9339437108529309, "grad_norm": 0.6434034705162048, "learning_rate": 3.4182023144697205e-07, "loss": 0.0603, "step": 42384 }, { "epoch": 0.9339657461424471, "grad_norm": 0.4097043573856354, "learning_rate": 3.4159303084576765e-07, "loss": 0.0499, "step": 42385 }, { "epoch": 0.9339877814319633, "grad_norm": 0.6966468095779419, "learning_rate": 3.413659049076906e-07, "loss": 0.0631, "step": 42386 }, { "epoch": 0.9340098167214794, "grad_norm": 0.9464907646179199, "learning_rate": 3.411388536338966e-07, "loss": 0.082, "step": 42387 }, { "epoch": 0.9340318520109956, "grad_norm": 1.188087821006775, "learning_rate": 3.4091187702554485e-07, "loss": 0.0715, "step": 42388 }, { "epoch": 0.9340538873005118, "grad_norm": 0.6156164407730103, "learning_rate": 3.4068497508378924e-07, "loss": 0.0577, "step": 42389 }, { "epoch": 0.9340759225900279, "grad_norm": 0.3663586378097534, "learning_rate": 3.404581478097857e-07, "loss": 0.0745, "step": 42390 }, { "epoch": 0.9340979578795441, "grad_norm": 0.7537418603897095, "learning_rate": 3.402313952046915e-07, "loss": 0.073, "step": 42391 }, { "epoch": 0.9341199931690602, "grad_norm": 0.4156443476676941, "learning_rate": 3.400047172696574e-07, "loss": 0.0547, "step": 42392 }, { "epoch": 0.9341420284585764, "grad_norm": 0.8516354560852051, "learning_rate": 3.397781140058409e-07, "loss": 0.0669, "step": 42393 }, { "epoch": 0.9341640637480926, "grad_norm": 0.8426395654678345, "learning_rate": 3.39551585414396e-07, "loss": 0.0746, "step": 42394 }, { "epoch": 0.9341860990376087, "grad_norm": 0.4797476828098297, "learning_rate": 3.393251314964785e-07, "loss": 0.0489, "step": 42395 }, { "epoch": 0.9342081343271249, "grad_norm": 0.9291186928749084, "learning_rate": 3.3909875225323907e-07, "loss": 0.0656, "step": 42396 }, { "epoch": 0.9342301696166411, "grad_norm": 0.7089559435844421, "learning_rate": 3.3887244768583014e-07, "loss": 0.0553, "step": 42397 }, { "epoch": 0.9342522049061572, "grad_norm": 0.8986509442329407, "learning_rate": 3.386462177954075e-07, "loss": 0.0503, "step": 42398 }, { "epoch": 0.9342742401956734, "grad_norm": 0.903326690196991, "learning_rate": 3.384200625831202e-07, "loss": 0.0645, "step": 42399 }, { "epoch": 0.9342962754851896, "grad_norm": 0.7129490375518799, "learning_rate": 3.381939820501223e-07, "loss": 0.0356, "step": 42400 }, { "epoch": 0.9343183107747057, "grad_norm": 0.394161581993103, "learning_rate": 3.3796797619756294e-07, "loss": 0.0659, "step": 42401 }, { "epoch": 0.9343403460642219, "grad_norm": 0.9329366683959961, "learning_rate": 3.377420450265978e-07, "loss": 0.0681, "step": 42402 }, { "epoch": 0.9343623813537381, "grad_norm": 0.5259028077125549, "learning_rate": 3.375161885383743e-07, "loss": 0.0501, "step": 42403 }, { "epoch": 0.9343844166432542, "grad_norm": 0.672894299030304, "learning_rate": 3.372904067340432e-07, "loss": 0.0468, "step": 42404 }, { "epoch": 0.9344064519327703, "grad_norm": 0.9838550686836243, "learning_rate": 3.3706469961475526e-07, "loss": 0.0617, "step": 42405 }, { "epoch": 0.9344284872222864, "grad_norm": 0.5017509460449219, "learning_rate": 3.368390671816579e-07, "loss": 0.0344, "step": 42406 }, { "epoch": 0.9344505225118026, "grad_norm": 0.3472244441509247, "learning_rate": 3.366135094359052e-07, "loss": 0.0577, "step": 42407 }, { "epoch": 0.9344725578013188, "grad_norm": 0.7405363321304321, "learning_rate": 3.363880263786412e-07, "loss": 0.0826, "step": 42408 }, { "epoch": 0.9344945930908349, "grad_norm": 1.0191738605499268, "learning_rate": 3.36162618011015e-07, "loss": 0.0451, "step": 42409 }, { "epoch": 0.9345166283803511, "grad_norm": 0.4861299395561218, "learning_rate": 3.3593728433417734e-07, "loss": 0.0467, "step": 42410 }, { "epoch": 0.9345386636698673, "grad_norm": 0.4043191969394684, "learning_rate": 3.357120253492757e-07, "loss": 0.0621, "step": 42411 }, { "epoch": 0.9345606989593834, "grad_norm": 0.6936149597167969, "learning_rate": 3.354868410574541e-07, "loss": 0.0461, "step": 42412 }, { "epoch": 0.9345827342488996, "grad_norm": 0.3949744999408722, "learning_rate": 3.3526173145986327e-07, "loss": 0.0563, "step": 42413 }, { "epoch": 0.9346047695384158, "grad_norm": 0.5978462100028992, "learning_rate": 3.35036696557649e-07, "loss": 0.0929, "step": 42414 }, { "epoch": 0.9346268048279319, "grad_norm": 0.5346124768257141, "learning_rate": 3.348117363519554e-07, "loss": 0.0696, "step": 42415 }, { "epoch": 0.9346488401174481, "grad_norm": 0.9474238753318787, "learning_rate": 3.3458685084392814e-07, "loss": 0.0685, "step": 42416 }, { "epoch": 0.9346708754069643, "grad_norm": 0.23344102501869202, "learning_rate": 3.3436204003471636e-07, "loss": 0.0605, "step": 42417 }, { "epoch": 0.9346929106964804, "grad_norm": 0.6176032423973083, "learning_rate": 3.3413730392546413e-07, "loss": 0.0436, "step": 42418 }, { "epoch": 0.9347149459859966, "grad_norm": 0.7810982465744019, "learning_rate": 3.339126425173139e-07, "loss": 0.0512, "step": 42419 }, { "epoch": 0.9347369812755127, "grad_norm": 0.47605547308921814, "learning_rate": 3.336880558114097e-07, "loss": 0.0364, "step": 42420 }, { "epoch": 0.9347590165650289, "grad_norm": 0.4914194345474243, "learning_rate": 3.33463543808899e-07, "loss": 0.0695, "step": 42421 }, { "epoch": 0.9347810518545451, "grad_norm": 0.9027988314628601, "learning_rate": 3.3323910651092246e-07, "loss": 0.0637, "step": 42422 }, { "epoch": 0.9348030871440612, "grad_norm": 0.5267850756645203, "learning_rate": 3.330147439186243e-07, "loss": 0.0636, "step": 42423 }, { "epoch": 0.9348251224335774, "grad_norm": 0.7123528718948364, "learning_rate": 3.3279045603314516e-07, "loss": 0.0628, "step": 42424 }, { "epoch": 0.9348471577230936, "grad_norm": 0.5509799122810364, "learning_rate": 3.3256624285563255e-07, "loss": 0.0377, "step": 42425 }, { "epoch": 0.9348691930126097, "grad_norm": 0.8584052324295044, "learning_rate": 3.323421043872238e-07, "loss": 0.0663, "step": 42426 }, { "epoch": 0.9348912283021259, "grad_norm": 0.490515798330307, "learning_rate": 3.32118040629063e-07, "loss": 0.0607, "step": 42427 }, { "epoch": 0.9349132635916421, "grad_norm": 0.7310190200805664, "learning_rate": 3.318940515822894e-07, "loss": 0.0561, "step": 42428 }, { "epoch": 0.9349352988811582, "grad_norm": 0.3810572624206543, "learning_rate": 3.3167013724804515e-07, "loss": 0.065, "step": 42429 }, { "epoch": 0.9349573341706743, "grad_norm": 0.6475512385368347, "learning_rate": 3.3144629762747294e-07, "loss": 0.0412, "step": 42430 }, { "epoch": 0.9349793694601904, "grad_norm": 0.4806130528450012, "learning_rate": 3.3122253272170833e-07, "loss": 0.0492, "step": 42431 }, { "epoch": 0.9350014047497066, "grad_norm": 0.37469983100891113, "learning_rate": 3.3099884253189383e-07, "loss": 0.0401, "step": 42432 }, { "epoch": 0.9350234400392228, "grad_norm": 0.7451303601264954, "learning_rate": 3.3077522705916684e-07, "loss": 0.0849, "step": 42433 }, { "epoch": 0.9350454753287389, "grad_norm": 0.6610162854194641, "learning_rate": 3.305516863046715e-07, "loss": 0.0569, "step": 42434 }, { "epoch": 0.9350675106182551, "grad_norm": 1.074946641921997, "learning_rate": 3.3032822026954015e-07, "loss": 0.0864, "step": 42435 }, { "epoch": 0.9350895459077713, "grad_norm": 0.39278873801231384, "learning_rate": 3.3010482895491357e-07, "loss": 0.0471, "step": 42436 }, { "epoch": 0.9351115811972874, "grad_norm": 0.5268728137016296, "learning_rate": 3.2988151236193084e-07, "loss": 0.0495, "step": 42437 }, { "epoch": 0.9351336164868036, "grad_norm": 0.7787057161331177, "learning_rate": 3.296582704917278e-07, "loss": 0.0596, "step": 42438 }, { "epoch": 0.9351556517763198, "grad_norm": 0.3774147629737854, "learning_rate": 3.294351033454418e-07, "loss": 0.0509, "step": 42439 }, { "epoch": 0.9351776870658359, "grad_norm": 0.7869269251823425, "learning_rate": 3.2921201092421025e-07, "loss": 0.0625, "step": 42440 }, { "epoch": 0.9351997223553521, "grad_norm": 0.8824833035469055, "learning_rate": 3.28988993229169e-07, "loss": 0.0542, "step": 42441 }, { "epoch": 0.9352217576448683, "grad_norm": 0.5550054907798767, "learning_rate": 3.287660502614537e-07, "loss": 0.0855, "step": 42442 }, { "epoch": 0.9352437929343844, "grad_norm": 0.5254932045936584, "learning_rate": 3.285431820222001e-07, "loss": 0.0624, "step": 42443 }, { "epoch": 0.9352658282239006, "grad_norm": 0.8874894976615906, "learning_rate": 3.283203885125441e-07, "loss": 0.0591, "step": 42444 }, { "epoch": 0.9352878635134168, "grad_norm": 0.703428328037262, "learning_rate": 3.280976697336213e-07, "loss": 0.0565, "step": 42445 }, { "epoch": 0.9353098988029329, "grad_norm": 0.3797910213470459, "learning_rate": 3.2787502568656256e-07, "loss": 0.0618, "step": 42446 }, { "epoch": 0.9353319340924491, "grad_norm": 0.6137276291847229, "learning_rate": 3.276524563725036e-07, "loss": 0.0525, "step": 42447 }, { "epoch": 0.9353539693819652, "grad_norm": 0.7062071561813354, "learning_rate": 3.274299617925819e-07, "loss": 0.0671, "step": 42448 }, { "epoch": 0.9353760046714814, "grad_norm": 0.4167250692844391, "learning_rate": 3.272075419479248e-07, "loss": 0.0593, "step": 42449 }, { "epoch": 0.9353980399609976, "grad_norm": 0.7368252277374268, "learning_rate": 3.269851968396681e-07, "loss": 0.0642, "step": 42450 }, { "epoch": 0.9354200752505137, "grad_norm": 0.5537042617797852, "learning_rate": 3.2676292646894255e-07, "loss": 0.0471, "step": 42451 }, { "epoch": 0.9354421105400299, "grad_norm": 0.6116830706596375, "learning_rate": 3.265407308368856e-07, "loss": 0.0462, "step": 42452 }, { "epoch": 0.9354641458295461, "grad_norm": 0.6167735457420349, "learning_rate": 3.26318609944623e-07, "loss": 0.0547, "step": 42453 }, { "epoch": 0.9354861811190622, "grad_norm": 0.6590536832809448, "learning_rate": 3.260965637932889e-07, "loss": 0.0579, "step": 42454 }, { "epoch": 0.9355082164085783, "grad_norm": 1.1844793558120728, "learning_rate": 3.2587459238401395e-07, "loss": 0.0931, "step": 42455 }, { "epoch": 0.9355302516980945, "grad_norm": 0.3657669126987457, "learning_rate": 3.2565269571792733e-07, "loss": 0.0242, "step": 42456 }, { "epoch": 0.9355522869876106, "grad_norm": 0.8931936025619507, "learning_rate": 3.2543087379616307e-07, "loss": 0.0892, "step": 42457 }, { "epoch": 0.9355743222771268, "grad_norm": 0.5653123259544373, "learning_rate": 3.2520912661984536e-07, "loss": 0.0571, "step": 42458 }, { "epoch": 0.9355963575666429, "grad_norm": 0.9092668294906616, "learning_rate": 3.249874541901082e-07, "loss": 0.0782, "step": 42459 }, { "epoch": 0.9356183928561591, "grad_norm": 0.7085195779800415, "learning_rate": 3.247658565080791e-07, "loss": 0.0674, "step": 42460 }, { "epoch": 0.9356404281456753, "grad_norm": 0.6386174559593201, "learning_rate": 3.245443335748888e-07, "loss": 0.0573, "step": 42461 }, { "epoch": 0.9356624634351914, "grad_norm": 0.5841710567474365, "learning_rate": 3.2432288539166133e-07, "loss": 0.0663, "step": 42462 }, { "epoch": 0.9356844987247076, "grad_norm": 0.4281246066093445, "learning_rate": 3.241015119595275e-07, "loss": 0.0327, "step": 42463 }, { "epoch": 0.9357065340142238, "grad_norm": 0.5453546643257141, "learning_rate": 3.2388021327961647e-07, "loss": 0.0509, "step": 42464 }, { "epoch": 0.9357285693037399, "grad_norm": 0.8293541669845581, "learning_rate": 3.236589893530523e-07, "loss": 0.0684, "step": 42465 }, { "epoch": 0.9357506045932561, "grad_norm": 0.6213458180427551, "learning_rate": 3.2343784018096233e-07, "loss": 0.0616, "step": 42466 }, { "epoch": 0.9357726398827723, "grad_norm": 0.563612699508667, "learning_rate": 3.232167657644741e-07, "loss": 0.0917, "step": 42467 }, { "epoch": 0.9357946751722884, "grad_norm": 0.6243429780006409, "learning_rate": 3.2299576610471335e-07, "loss": 0.0527, "step": 42468 }, { "epoch": 0.9358167104618046, "grad_norm": 0.9413971900939941, "learning_rate": 3.2277484120280585e-07, "loss": 0.0663, "step": 42469 }, { "epoch": 0.9358387457513208, "grad_norm": 0.7287212610244751, "learning_rate": 3.225539910598757e-07, "loss": 0.0703, "step": 42470 }, { "epoch": 0.9358607810408369, "grad_norm": 0.5207265615463257, "learning_rate": 3.2233321567705034e-07, "loss": 0.0648, "step": 42471 }, { "epoch": 0.9358828163303531, "grad_norm": 0.5621873736381531, "learning_rate": 3.2211251505545046e-07, "loss": 0.1005, "step": 42472 }, { "epoch": 0.9359048516198692, "grad_norm": 0.5632802248001099, "learning_rate": 3.218918891962053e-07, "loss": 0.0465, "step": 42473 }, { "epoch": 0.9359268869093854, "grad_norm": 0.5703805088996887, "learning_rate": 3.2167133810043214e-07, "loss": 0.036, "step": 42474 }, { "epoch": 0.9359489221989016, "grad_norm": 0.7105011343955994, "learning_rate": 3.214508617692602e-07, "loss": 0.0723, "step": 42475 }, { "epoch": 0.9359709574884177, "grad_norm": 0.5362110733985901, "learning_rate": 3.212304602038102e-07, "loss": 0.0406, "step": 42476 }, { "epoch": 0.9359929927779339, "grad_norm": 0.6795896291732788, "learning_rate": 3.2101013340520626e-07, "loss": 0.0607, "step": 42477 }, { "epoch": 0.9360150280674501, "grad_norm": 0.5786995887756348, "learning_rate": 3.207898813745658e-07, "loss": 0.0493, "step": 42478 }, { "epoch": 0.9360370633569661, "grad_norm": 0.5222312211990356, "learning_rate": 3.205697041130162e-07, "loss": 0.0385, "step": 42479 }, { "epoch": 0.9360590986464823, "grad_norm": 0.4622616171836853, "learning_rate": 3.2034960162167836e-07, "loss": 0.0715, "step": 42480 }, { "epoch": 0.9360811339359985, "grad_norm": 0.3438657224178314, "learning_rate": 3.20129573901668e-07, "loss": 0.1015, "step": 42481 }, { "epoch": 0.9361031692255146, "grad_norm": 0.4519757330417633, "learning_rate": 3.199096209541125e-07, "loss": 0.0511, "step": 42482 }, { "epoch": 0.9361252045150308, "grad_norm": 0.9206674098968506, "learning_rate": 3.1968974278012766e-07, "loss": 0.064, "step": 42483 }, { "epoch": 0.936147239804547, "grad_norm": 0.21169768273830414, "learning_rate": 3.194699393808376e-07, "loss": 0.0236, "step": 42484 }, { "epoch": 0.9361692750940631, "grad_norm": 0.6408786773681641, "learning_rate": 3.1925021075735807e-07, "loss": 0.0646, "step": 42485 }, { "epoch": 0.9361913103835793, "grad_norm": 0.44625914096832275, "learning_rate": 3.190305569108082e-07, "loss": 0.052, "step": 42486 }, { "epoch": 0.9362133456730954, "grad_norm": 0.5193206667900085, "learning_rate": 3.188109778423104e-07, "loss": 0.0458, "step": 42487 }, { "epoch": 0.9362353809626116, "grad_norm": 0.5780375003814697, "learning_rate": 3.185914735529805e-07, "loss": 0.0769, "step": 42488 }, { "epoch": 0.9362574162521278, "grad_norm": 0.49306872487068176, "learning_rate": 3.183720440439358e-07, "loss": 0.0761, "step": 42489 }, { "epoch": 0.9362794515416439, "grad_norm": 0.8810708522796631, "learning_rate": 3.181526893162956e-07, "loss": 0.0923, "step": 42490 }, { "epoch": 0.9363014868311601, "grad_norm": 0.5033226013183594, "learning_rate": 3.1793340937117886e-07, "loss": 0.0744, "step": 42491 }, { "epoch": 0.9363235221206763, "grad_norm": 0.4740745425224304, "learning_rate": 3.177142042096981e-07, "loss": 0.052, "step": 42492 }, { "epoch": 0.9363455574101924, "grad_norm": 1.010536789894104, "learning_rate": 3.1749507383297236e-07, "loss": 0.0678, "step": 42493 }, { "epoch": 0.9363675926997086, "grad_norm": 0.4079546630382538, "learning_rate": 3.172760182421192e-07, "loss": 0.057, "step": 42494 }, { "epoch": 0.9363896279892248, "grad_norm": 0.8973246812820435, "learning_rate": 3.170570374382525e-07, "loss": 0.075, "step": 42495 }, { "epoch": 0.9364116632787409, "grad_norm": 0.8239725232124329, "learning_rate": 3.168381314224883e-07, "loss": 0.0561, "step": 42496 }, { "epoch": 0.9364336985682571, "grad_norm": 0.5026432871818542, "learning_rate": 3.1661930019593724e-07, "loss": 0.0401, "step": 42497 }, { "epoch": 0.9364557338577733, "grad_norm": 0.6900593042373657, "learning_rate": 3.164005437597217e-07, "loss": 0.0605, "step": 42498 }, { "epoch": 0.9364777691472894, "grad_norm": 0.6326711177825928, "learning_rate": 3.1618186211495094e-07, "loss": 0.0418, "step": 42499 }, { "epoch": 0.9364998044368056, "grad_norm": 0.3976828157901764, "learning_rate": 3.159632552627423e-07, "loss": 0.0826, "step": 42500 }, { "epoch": 0.9365218397263217, "grad_norm": 0.7389234900474548, "learning_rate": 3.157447232042049e-07, "loss": 0.0684, "step": 42501 }, { "epoch": 0.9365438750158379, "grad_norm": 0.39810073375701904, "learning_rate": 3.1552626594045287e-07, "loss": 0.0812, "step": 42502 }, { "epoch": 0.9365659103053541, "grad_norm": 0.5131168961524963, "learning_rate": 3.1530788347260196e-07, "loss": 0.0498, "step": 42503 }, { "epoch": 0.9365879455948701, "grad_norm": 0.7161620259284973, "learning_rate": 3.1508957580175966e-07, "loss": 0.0505, "step": 42504 }, { "epoch": 0.9366099808843863, "grad_norm": 0.6997950673103333, "learning_rate": 3.1487134292904507e-07, "loss": 0.0788, "step": 42505 }, { "epoch": 0.9366320161739025, "grad_norm": 0.5202924013137817, "learning_rate": 3.146531848555623e-07, "loss": 0.0595, "step": 42506 }, { "epoch": 0.9366540514634186, "grad_norm": 0.6840320229530334, "learning_rate": 3.144351015824287e-07, "loss": 0.0465, "step": 42507 }, { "epoch": 0.9366760867529348, "grad_norm": 0.8236808180809021, "learning_rate": 3.142170931107502e-07, "loss": 0.0676, "step": 42508 }, { "epoch": 0.936698122042451, "grad_norm": 0.4297591745853424, "learning_rate": 3.1399915944163917e-07, "loss": 0.0489, "step": 42509 }, { "epoch": 0.9367201573319671, "grad_norm": 0.33862626552581787, "learning_rate": 3.1378130057620634e-07, "loss": 0.0407, "step": 42510 }, { "epoch": 0.9367421926214833, "grad_norm": 0.849018394947052, "learning_rate": 3.1356351651555926e-07, "loss": 0.0752, "step": 42511 }, { "epoch": 0.9367642279109994, "grad_norm": 0.5639540553092957, "learning_rate": 3.133458072608103e-07, "loss": 0.0547, "step": 42512 }, { "epoch": 0.9367862632005156, "grad_norm": 0.37573671340942383, "learning_rate": 3.1312817281306527e-07, "loss": 0.0618, "step": 42513 }, { "epoch": 0.9368082984900318, "grad_norm": 0.620692253112793, "learning_rate": 3.1291061317343663e-07, "loss": 0.0558, "step": 42514 }, { "epoch": 0.9368303337795479, "grad_norm": 0.7767339944839478, "learning_rate": 3.126931283430268e-07, "loss": 0.0585, "step": 42515 }, { "epoch": 0.9368523690690641, "grad_norm": 0.5561264753341675, "learning_rate": 3.124757183229482e-07, "loss": 0.0552, "step": 42516 }, { "epoch": 0.9368744043585803, "grad_norm": 0.6598705649375916, "learning_rate": 3.1225838311430834e-07, "loss": 0.0435, "step": 42517 }, { "epoch": 0.9368964396480964, "grad_norm": 0.4518791735172272, "learning_rate": 3.120411227182113e-07, "loss": 0.05, "step": 42518 }, { "epoch": 0.9369184749376126, "grad_norm": 0.6954549551010132, "learning_rate": 3.118239371357662e-07, "loss": 0.0703, "step": 42519 }, { "epoch": 0.9369405102271288, "grad_norm": 0.3126147389411926, "learning_rate": 3.1160682636807546e-07, "loss": 0.0838, "step": 42520 }, { "epoch": 0.9369625455166449, "grad_norm": 0.6672912240028381, "learning_rate": 3.113897904162516e-07, "loss": 0.0669, "step": 42521 }, { "epoch": 0.9369845808061611, "grad_norm": 0.32848188281059265, "learning_rate": 3.1117282928139365e-07, "loss": 0.0494, "step": 42522 }, { "epoch": 0.9370066160956773, "grad_norm": 0.37607327103614807, "learning_rate": 3.1095594296461083e-07, "loss": 0.0549, "step": 42523 }, { "epoch": 0.9370286513851934, "grad_norm": 0.8185001611709595, "learning_rate": 3.1073913146700385e-07, "loss": 0.0898, "step": 42524 }, { "epoch": 0.9370506866747096, "grad_norm": 0.7576407194137573, "learning_rate": 3.105223947896818e-07, "loss": 0.0477, "step": 42525 }, { "epoch": 0.9370727219642258, "grad_norm": 0.3414023220539093, "learning_rate": 3.103057329337455e-07, "loss": 0.0522, "step": 42526 }, { "epoch": 0.9370947572537419, "grad_norm": 0.8327124714851379, "learning_rate": 3.1008914590029746e-07, "loss": 0.0565, "step": 42527 }, { "epoch": 0.9371167925432581, "grad_norm": 0.4716503620147705, "learning_rate": 3.098726336904467e-07, "loss": 0.0342, "step": 42528 }, { "epoch": 0.9371388278327741, "grad_norm": 0.4867643713951111, "learning_rate": 3.096561963052891e-07, "loss": 0.0516, "step": 42529 }, { "epoch": 0.9371608631222903, "grad_norm": 0.6159260869026184, "learning_rate": 3.0943983374593197e-07, "loss": 0.0429, "step": 42530 }, { "epoch": 0.9371828984118065, "grad_norm": 0.6044394373893738, "learning_rate": 3.092235460134729e-07, "loss": 0.0815, "step": 42531 }, { "epoch": 0.9372049337013226, "grad_norm": 0.8249768018722534, "learning_rate": 3.0900733310901764e-07, "loss": 0.074, "step": 42532 }, { "epoch": 0.9372269689908388, "grad_norm": 0.5609961748123169, "learning_rate": 3.087911950336669e-07, "loss": 0.0366, "step": 42533 }, { "epoch": 0.937249004280355, "grad_norm": 0.48002558946609497, "learning_rate": 3.085751317885199e-07, "loss": 0.0668, "step": 42534 }, { "epoch": 0.9372710395698711, "grad_norm": 0.670178234577179, "learning_rate": 3.083591433746774e-07, "loss": 0.0711, "step": 42535 }, { "epoch": 0.9372930748593873, "grad_norm": 0.5692465305328369, "learning_rate": 3.081432297932402e-07, "loss": 0.0549, "step": 42536 }, { "epoch": 0.9373151101489035, "grad_norm": 0.49640458822250366, "learning_rate": 3.07927391045309e-07, "loss": 0.0575, "step": 42537 }, { "epoch": 0.9373371454384196, "grad_norm": 0.41735538840293884, "learning_rate": 3.077116271319813e-07, "loss": 0.0415, "step": 42538 }, { "epoch": 0.9373591807279358, "grad_norm": 0.47946232557296753, "learning_rate": 3.074959380543563e-07, "loss": 0.046, "step": 42539 }, { "epoch": 0.937381216017452, "grad_norm": 0.9685561656951904, "learning_rate": 3.0728032381353464e-07, "loss": 0.0798, "step": 42540 }, { "epoch": 0.9374032513069681, "grad_norm": 0.7759098410606384, "learning_rate": 3.0706478441061223e-07, "loss": 0.0559, "step": 42541 }, { "epoch": 0.9374252865964843, "grad_norm": 0.5067890882492065, "learning_rate": 3.068493198466882e-07, "loss": 0.074, "step": 42542 }, { "epoch": 0.9374473218860004, "grad_norm": 0.46091970801353455, "learning_rate": 3.066339301228582e-07, "loss": 0.0656, "step": 42543 }, { "epoch": 0.9374693571755166, "grad_norm": 0.29660001397132874, "learning_rate": 3.0641861524022317e-07, "loss": 0.0496, "step": 42544 }, { "epoch": 0.9374913924650328, "grad_norm": 0.6832867860794067, "learning_rate": 3.062033751998755e-07, "loss": 0.0694, "step": 42545 }, { "epoch": 0.9375134277545489, "grad_norm": 0.5571551322937012, "learning_rate": 3.05988210002916e-07, "loss": 0.0425, "step": 42546 }, { "epoch": 0.9375354630440651, "grad_norm": 0.5171622037887573, "learning_rate": 3.0577311965043374e-07, "loss": 0.0569, "step": 42547 }, { "epoch": 0.9375574983335813, "grad_norm": 0.3351913392543793, "learning_rate": 3.055581041435329e-07, "loss": 0.0572, "step": 42548 }, { "epoch": 0.9375795336230974, "grad_norm": 1.1489614248275757, "learning_rate": 3.053431634833026e-07, "loss": 0.056, "step": 42549 }, { "epoch": 0.9376015689126136, "grad_norm": 0.4310435652732849, "learning_rate": 3.051282976708386e-07, "loss": 0.0514, "step": 42550 }, { "epoch": 0.9376236042021298, "grad_norm": 0.24350370466709137, "learning_rate": 3.0491350670723837e-07, "loss": 0.023, "step": 42551 }, { "epoch": 0.9376456394916459, "grad_norm": 0.2998313009738922, "learning_rate": 3.046987905935927e-07, "loss": 0.0395, "step": 42552 }, { "epoch": 0.9376676747811621, "grad_norm": 0.8908583521842957, "learning_rate": 3.0448414933099745e-07, "loss": 0.0763, "step": 42553 }, { "epoch": 0.9376897100706781, "grad_norm": 0.7491734027862549, "learning_rate": 3.042695829205416e-07, "loss": 0.0438, "step": 42554 }, { "epoch": 0.9377117453601943, "grad_norm": 0.3017905354499817, "learning_rate": 3.040550913633244e-07, "loss": 0.0695, "step": 42555 }, { "epoch": 0.9377337806497105, "grad_norm": 0.8393460512161255, "learning_rate": 3.038406746604333e-07, "loss": 0.0603, "step": 42556 }, { "epoch": 0.9377558159392266, "grad_norm": 0.5586693286895752, "learning_rate": 3.03626332812964e-07, "loss": 0.0649, "step": 42557 }, { "epoch": 0.9377778512287428, "grad_norm": 0.6563255190849304, "learning_rate": 3.0341206582200573e-07, "loss": 0.0651, "step": 42558 }, { "epoch": 0.937799886518259, "grad_norm": 0.7126755714416504, "learning_rate": 3.0319787368865094e-07, "loss": 0.0746, "step": 42559 }, { "epoch": 0.9378219218077751, "grad_norm": 0.5984966158866882, "learning_rate": 3.0298375641399037e-07, "loss": 0.0788, "step": 42560 }, { "epoch": 0.9378439570972913, "grad_norm": 0.43292751908302307, "learning_rate": 3.0276971399911477e-07, "loss": 0.0508, "step": 42561 }, { "epoch": 0.9378659923868075, "grad_norm": 0.38477373123168945, "learning_rate": 3.0255574644511344e-07, "loss": 0.046, "step": 42562 }, { "epoch": 0.9378880276763236, "grad_norm": 0.5130716562271118, "learning_rate": 3.0234185375307697e-07, "loss": 0.0735, "step": 42563 }, { "epoch": 0.9379100629658398, "grad_norm": 0.6667675971984863, "learning_rate": 3.021280359240963e-07, "loss": 0.0493, "step": 42564 }, { "epoch": 0.937932098255356, "grad_norm": 0.6014944314956665, "learning_rate": 3.0191429295925887e-07, "loss": 0.0417, "step": 42565 }, { "epoch": 0.9379541335448721, "grad_norm": 0.551914632320404, "learning_rate": 3.017006248596538e-07, "loss": 0.0445, "step": 42566 }, { "epoch": 0.9379761688343883, "grad_norm": 1.0470539331436157, "learning_rate": 3.014870316263685e-07, "loss": 0.0752, "step": 42567 }, { "epoch": 0.9379982041239044, "grad_norm": 0.49101075530052185, "learning_rate": 3.0127351326049215e-07, "loss": 0.0592, "step": 42568 }, { "epoch": 0.9380202394134206, "grad_norm": 0.6781541705131531, "learning_rate": 3.0106006976311386e-07, "loss": 0.0455, "step": 42569 }, { "epoch": 0.9380422747029368, "grad_norm": 0.4512655436992645, "learning_rate": 3.008467011353144e-07, "loss": 0.0481, "step": 42570 }, { "epoch": 0.9380643099924529, "grad_norm": 0.3674379587173462, "learning_rate": 3.00633407378188e-07, "loss": 0.0461, "step": 42571 }, { "epoch": 0.9380863452819691, "grad_norm": 0.43071848154067993, "learning_rate": 3.0042018849281704e-07, "loss": 0.0527, "step": 42572 }, { "epoch": 0.9381083805714853, "grad_norm": 0.39365333318710327, "learning_rate": 3.00207044480289e-07, "loss": 0.0413, "step": 42573 }, { "epoch": 0.9381304158610014, "grad_norm": 0.5301886796951294, "learning_rate": 2.999939753416914e-07, "loss": 0.0549, "step": 42574 }, { "epoch": 0.9381524511505176, "grad_norm": 0.5588898658752441, "learning_rate": 2.997809810781033e-07, "loss": 0.0717, "step": 42575 }, { "epoch": 0.9381744864400338, "grad_norm": 0.4328831434249878, "learning_rate": 2.995680616906171e-07, "loss": 0.0547, "step": 42576 }, { "epoch": 0.9381965217295499, "grad_norm": 0.6423357129096985, "learning_rate": 2.9935521718031046e-07, "loss": 0.0391, "step": 42577 }, { "epoch": 0.938218557019066, "grad_norm": 0.2823737859725952, "learning_rate": 2.9914244754827236e-07, "loss": 0.0302, "step": 42578 }, { "epoch": 0.9382405923085821, "grad_norm": 0.9089555740356445, "learning_rate": 2.989297527955853e-07, "loss": 0.0751, "step": 42579 }, { "epoch": 0.9382626275980983, "grad_norm": 0.6813265085220337, "learning_rate": 2.9871713292333345e-07, "loss": 0.0604, "step": 42580 }, { "epoch": 0.9382846628876145, "grad_norm": 0.40043774247169495, "learning_rate": 2.985045879325959e-07, "loss": 0.0672, "step": 42581 }, { "epoch": 0.9383066981771306, "grad_norm": 0.2744886577129364, "learning_rate": 2.982921178244602e-07, "loss": 0.0345, "step": 42582 }, { "epoch": 0.9383287334666468, "grad_norm": 0.18091914057731628, "learning_rate": 2.980797226000054e-07, "loss": 0.0454, "step": 42583 }, { "epoch": 0.938350768756163, "grad_norm": 0.5828894972801208, "learning_rate": 2.9786740226031393e-07, "loss": 0.0487, "step": 42584 }, { "epoch": 0.9383728040456791, "grad_norm": 0.5109524726867676, "learning_rate": 2.9765515680646836e-07, "loss": 0.0545, "step": 42585 }, { "epoch": 0.9383948393351953, "grad_norm": 0.5718520283699036, "learning_rate": 2.9744298623954947e-07, "loss": 0.0382, "step": 42586 }, { "epoch": 0.9384168746247115, "grad_norm": 0.5110287070274353, "learning_rate": 2.97230890560638e-07, "loss": 0.0648, "step": 42587 }, { "epoch": 0.9384389099142276, "grad_norm": 0.48493263125419617, "learning_rate": 2.970188697708132e-07, "loss": 0.0692, "step": 42588 }, { "epoch": 0.9384609452037438, "grad_norm": 0.6211231350898743, "learning_rate": 2.9680692387115404e-07, "loss": 0.0516, "step": 42589 }, { "epoch": 0.93848298049326, "grad_norm": 0.6717410087585449, "learning_rate": 2.965950528627448e-07, "loss": 0.0791, "step": 42590 }, { "epoch": 0.9385050157827761, "grad_norm": 0.6170109510421753, "learning_rate": 2.9638325674665956e-07, "loss": 0.0587, "step": 42591 }, { "epoch": 0.9385270510722923, "grad_norm": 0.4675248861312866, "learning_rate": 2.961715355239808e-07, "loss": 0.0379, "step": 42592 }, { "epoch": 0.9385490863618084, "grad_norm": 1.1986843347549438, "learning_rate": 2.95959889195781e-07, "loss": 0.0789, "step": 42593 }, { "epoch": 0.9385711216513246, "grad_norm": 0.4302644431591034, "learning_rate": 2.957483177631476e-07, "loss": 0.0654, "step": 42594 }, { "epoch": 0.9385931569408408, "grad_norm": 0.7090768218040466, "learning_rate": 2.9553682122714975e-07, "loss": 0.0563, "step": 42595 }, { "epoch": 0.9386151922303569, "grad_norm": 0.6555679440498352, "learning_rate": 2.953253995888683e-07, "loss": 0.0444, "step": 42596 }, { "epoch": 0.9386372275198731, "grad_norm": 0.4254462420940399, "learning_rate": 2.9511405284938233e-07, "loss": 0.0617, "step": 42597 }, { "epoch": 0.9386592628093893, "grad_norm": 0.2707462012767792, "learning_rate": 2.9490278100976264e-07, "loss": 0.0536, "step": 42598 }, { "epoch": 0.9386812980989054, "grad_norm": 0.7647690176963806, "learning_rate": 2.9469158407109177e-07, "loss": 0.0422, "step": 42599 }, { "epoch": 0.9387033333884216, "grad_norm": 0.4047802686691284, "learning_rate": 2.944804620344388e-07, "loss": 0.0649, "step": 42600 }, { "epoch": 0.9387253686779378, "grad_norm": 0.48286905884742737, "learning_rate": 2.9426941490088624e-07, "loss": 0.0729, "step": 42601 }, { "epoch": 0.9387474039674539, "grad_norm": 0.6366367340087891, "learning_rate": 2.9405844267150314e-07, "loss": 0.0435, "step": 42602 }, { "epoch": 0.93876943925697, "grad_norm": 0.44874584674835205, "learning_rate": 2.9384754534736714e-07, "loss": 0.0689, "step": 42603 }, { "epoch": 0.9387914745464861, "grad_norm": 0.663754403591156, "learning_rate": 2.936367229295506e-07, "loss": 0.0576, "step": 42604 }, { "epoch": 0.9388135098360023, "grad_norm": 0.7040610909461975, "learning_rate": 2.934259754191293e-07, "loss": 0.065, "step": 42605 }, { "epoch": 0.9388355451255185, "grad_norm": 0.48513486981391907, "learning_rate": 2.932153028171758e-07, "loss": 0.0544, "step": 42606 }, { "epoch": 0.9388575804150346, "grad_norm": 0.777691125869751, "learning_rate": 2.930047051247625e-07, "loss": 0.0571, "step": 42607 }, { "epoch": 0.9388796157045508, "grad_norm": 0.3454480469226837, "learning_rate": 2.9279418234296363e-07, "loss": 0.045, "step": 42608 }, { "epoch": 0.938901650994067, "grad_norm": 0.5029017925262451, "learning_rate": 2.9258373447284993e-07, "loss": 0.054, "step": 42609 }, { "epoch": 0.9389236862835831, "grad_norm": 0.6510087847709656, "learning_rate": 2.9237336151549386e-07, "loss": 0.0547, "step": 42610 }, { "epoch": 0.9389457215730993, "grad_norm": 0.35198187828063965, "learning_rate": 2.9216306347196796e-07, "loss": 0.0426, "step": 42611 }, { "epoch": 0.9389677568626155, "grad_norm": 0.9051622748374939, "learning_rate": 2.9195284034334136e-07, "loss": 0.0683, "step": 42612 }, { "epoch": 0.9389897921521316, "grad_norm": 0.2972254157066345, "learning_rate": 2.9174269213068815e-07, "loss": 0.026, "step": 42613 }, { "epoch": 0.9390118274416478, "grad_norm": 0.6631323099136353, "learning_rate": 2.915326188350742e-07, "loss": 0.0696, "step": 42614 }, { "epoch": 0.939033862731164, "grad_norm": 0.4930627942085266, "learning_rate": 2.913226204575736e-07, "loss": 0.0921, "step": 42615 }, { "epoch": 0.9390558980206801, "grad_norm": 0.3380172848701477, "learning_rate": 2.9111269699925224e-07, "loss": 0.0564, "step": 42616 }, { "epoch": 0.9390779333101963, "grad_norm": 0.6814164519309998, "learning_rate": 2.909028484611842e-07, "loss": 0.0663, "step": 42617 }, { "epoch": 0.9390999685997125, "grad_norm": 0.35653313994407654, "learning_rate": 2.906930748444336e-07, "loss": 0.0603, "step": 42618 }, { "epoch": 0.9391220038892286, "grad_norm": 0.7281560897827148, "learning_rate": 2.904833761500714e-07, "loss": 0.0628, "step": 42619 }, { "epoch": 0.9391440391787448, "grad_norm": 0.4347023367881775, "learning_rate": 2.9027375237916654e-07, "loss": 0.0473, "step": 42620 }, { "epoch": 0.939166074468261, "grad_norm": 0.4515841603279114, "learning_rate": 2.900642035327833e-07, "loss": 0.0504, "step": 42621 }, { "epoch": 0.9391881097577771, "grad_norm": 0.7114903926849365, "learning_rate": 2.898547296119941e-07, "loss": 0.0711, "step": 42622 }, { "epoch": 0.9392101450472933, "grad_norm": 0.5292455554008484, "learning_rate": 2.8964533061785813e-07, "loss": 0.0423, "step": 42623 }, { "epoch": 0.9392321803368094, "grad_norm": 0.929649293422699, "learning_rate": 2.894360065514512e-07, "loss": 0.0659, "step": 42624 }, { "epoch": 0.9392542156263256, "grad_norm": 0.4884199798107147, "learning_rate": 2.892267574138341e-07, "loss": 0.0496, "step": 42625 }, { "epoch": 0.9392762509158418, "grad_norm": 0.523278534412384, "learning_rate": 2.8901758320607263e-07, "loss": 0.0544, "step": 42626 }, { "epoch": 0.9392982862053579, "grad_norm": 0.56761634349823, "learning_rate": 2.888084839292343e-07, "loss": 0.0742, "step": 42627 }, { "epoch": 0.939320321494874, "grad_norm": 0.3169481158256531, "learning_rate": 2.885994595843816e-07, "loss": 0.044, "step": 42628 }, { "epoch": 0.9393423567843902, "grad_norm": 0.5896102786064148, "learning_rate": 2.8839051017258365e-07, "loss": 0.0502, "step": 42629 }, { "epoch": 0.9393643920739063, "grad_norm": 0.5346083641052246, "learning_rate": 2.881816356948996e-07, "loss": 0.0532, "step": 42630 }, { "epoch": 0.9393864273634225, "grad_norm": 0.5922929048538208, "learning_rate": 2.8797283615239533e-07, "loss": 0.0611, "step": 42631 }, { "epoch": 0.9394084626529386, "grad_norm": 0.6814523339271545, "learning_rate": 2.8776411154613494e-07, "loss": 0.046, "step": 42632 }, { "epoch": 0.9394304979424548, "grad_norm": 0.8303328156471252, "learning_rate": 2.875554618771825e-07, "loss": 0.0976, "step": 42633 }, { "epoch": 0.939452533231971, "grad_norm": 0.4201752841472626, "learning_rate": 2.87346887146599e-07, "loss": 0.0492, "step": 42634 }, { "epoch": 0.9394745685214871, "grad_norm": 0.4146396517753601, "learning_rate": 2.871383873554451e-07, "loss": 0.0465, "step": 42635 }, { "epoch": 0.9394966038110033, "grad_norm": 0.7257052063941956, "learning_rate": 2.869299625047883e-07, "loss": 0.0581, "step": 42636 }, { "epoch": 0.9395186391005195, "grad_norm": 0.7550345063209534, "learning_rate": 2.8672161259568454e-07, "loss": 0.0599, "step": 42637 }, { "epoch": 0.9395406743900356, "grad_norm": 0.49749475717544556, "learning_rate": 2.865133376291995e-07, "loss": 0.048, "step": 42638 }, { "epoch": 0.9395627096795518, "grad_norm": 0.7856515049934387, "learning_rate": 2.8630513760639074e-07, "loss": 0.0916, "step": 42639 }, { "epoch": 0.939584744969068, "grad_norm": 0.7447065711021423, "learning_rate": 2.860970125283208e-07, "loss": 0.0657, "step": 42640 }, { "epoch": 0.9396067802585841, "grad_norm": 0.941207766532898, "learning_rate": 2.8588896239604865e-07, "loss": 0.0505, "step": 42641 }, { "epoch": 0.9396288155481003, "grad_norm": 0.3447802662849426, "learning_rate": 2.856809872106336e-07, "loss": 0.0594, "step": 42642 }, { "epoch": 0.9396508508376165, "grad_norm": 0.47494110465049744, "learning_rate": 2.8547308697313646e-07, "loss": 0.0569, "step": 42643 }, { "epoch": 0.9396728861271326, "grad_norm": 0.3988049626350403, "learning_rate": 2.852652616846146e-07, "loss": 0.0782, "step": 42644 }, { "epoch": 0.9396949214166488, "grad_norm": 0.8392660617828369, "learning_rate": 2.8505751134613065e-07, "loss": 0.0872, "step": 42645 }, { "epoch": 0.939716956706165, "grad_norm": 0.480627179145813, "learning_rate": 2.8484983595873536e-07, "loss": 0.0687, "step": 42646 }, { "epoch": 0.9397389919956811, "grad_norm": 0.8903558254241943, "learning_rate": 2.846422355234929e-07, "loss": 0.0633, "step": 42647 }, { "epoch": 0.9397610272851973, "grad_norm": 0.4956477880477905, "learning_rate": 2.844347100414574e-07, "loss": 0.0443, "step": 42648 }, { "epoch": 0.9397830625747134, "grad_norm": 0.576403021812439, "learning_rate": 2.8422725951368965e-07, "loss": 0.0576, "step": 42649 }, { "epoch": 0.9398050978642296, "grad_norm": 0.6669777035713196, "learning_rate": 2.8401988394124056e-07, "loss": 0.0599, "step": 42650 }, { "epoch": 0.9398271331537458, "grad_norm": 0.3661148250102997, "learning_rate": 2.838125833251709e-07, "loss": 0.0466, "step": 42651 }, { "epoch": 0.9398491684432618, "grad_norm": 0.9561212062835693, "learning_rate": 2.8360535766653486e-07, "loss": 0.0947, "step": 42652 }, { "epoch": 0.939871203732778, "grad_norm": 0.6748526096343994, "learning_rate": 2.8339820696638816e-07, "loss": 0.0494, "step": 42653 }, { "epoch": 0.9398932390222942, "grad_norm": 0.6956541538238525, "learning_rate": 2.8319113122578675e-07, "loss": 0.048, "step": 42654 }, { "epoch": 0.9399152743118103, "grad_norm": 0.5154237747192383, "learning_rate": 2.829841304457831e-07, "loss": 0.0508, "step": 42655 }, { "epoch": 0.9399373096013265, "grad_norm": 0.6909140348434448, "learning_rate": 2.827772046274363e-07, "loss": 0.062, "step": 42656 }, { "epoch": 0.9399593448908427, "grad_norm": 0.6119771599769592, "learning_rate": 2.8257035377179554e-07, "loss": 0.0711, "step": 42657 }, { "epoch": 0.9399813801803588, "grad_norm": 0.4336490035057068, "learning_rate": 2.82363577879915e-07, "loss": 0.0395, "step": 42658 }, { "epoch": 0.940003415469875, "grad_norm": 0.7201542258262634, "learning_rate": 2.8215687695284873e-07, "loss": 0.038, "step": 42659 }, { "epoch": 0.9400254507593911, "grad_norm": 0.6511837840080261, "learning_rate": 2.819502509916527e-07, "loss": 0.0599, "step": 42660 }, { "epoch": 0.9400474860489073, "grad_norm": 0.37852445244789124, "learning_rate": 2.8174369999737437e-07, "loss": 0.0518, "step": 42661 }, { "epoch": 0.9400695213384235, "grad_norm": 0.20924869179725647, "learning_rate": 2.8153722397106785e-07, "loss": 0.0359, "step": 42662 }, { "epoch": 0.9400915566279396, "grad_norm": 0.8365451693534851, "learning_rate": 2.8133082291378566e-07, "loss": 0.0724, "step": 42663 }, { "epoch": 0.9401135919174558, "grad_norm": 0.6988082528114319, "learning_rate": 2.811244968265769e-07, "loss": 0.0495, "step": 42664 }, { "epoch": 0.940135627206972, "grad_norm": 0.4481409788131714, "learning_rate": 2.8091824571049583e-07, "loss": 0.0754, "step": 42665 }, { "epoch": 0.9401576624964881, "grad_norm": 0.3122333586215973, "learning_rate": 2.807120695665899e-07, "loss": 0.038, "step": 42666 }, { "epoch": 0.9401796977860043, "grad_norm": 0.3640887141227722, "learning_rate": 2.8050596839591156e-07, "loss": 0.0454, "step": 42667 }, { "epoch": 0.9402017330755205, "grad_norm": 0.734197199344635, "learning_rate": 2.802999421995084e-07, "loss": 0.0776, "step": 42668 }, { "epoch": 0.9402237683650366, "grad_norm": 0.30567923188209534, "learning_rate": 2.8009399097843114e-07, "loss": 0.0361, "step": 42669 }, { "epoch": 0.9402458036545528, "grad_norm": 0.5876330137252808, "learning_rate": 2.798881147337307e-07, "loss": 0.0653, "step": 42670 }, { "epoch": 0.940267838944069, "grad_norm": 0.5628772974014282, "learning_rate": 2.7968231346645114e-07, "loss": 0.0611, "step": 42671 }, { "epoch": 0.9402898742335851, "grad_norm": 0.3976626694202423, "learning_rate": 2.79476587177645e-07, "loss": 0.0223, "step": 42672 }, { "epoch": 0.9403119095231013, "grad_norm": 0.6785714626312256, "learning_rate": 2.792709358683565e-07, "loss": 0.0623, "step": 42673 }, { "epoch": 0.9403339448126175, "grad_norm": 0.7923830151557922, "learning_rate": 2.790653595396364e-07, "loss": 0.0757, "step": 42674 }, { "epoch": 0.9403559801021336, "grad_norm": 0.6946297883987427, "learning_rate": 2.788598581925289e-07, "loss": 0.0444, "step": 42675 }, { "epoch": 0.9403780153916498, "grad_norm": 0.5968562960624695, "learning_rate": 2.786544318280848e-07, "loss": 0.0596, "step": 42676 }, { "epoch": 0.9404000506811658, "grad_norm": 0.93932044506073, "learning_rate": 2.7844908044734495e-07, "loss": 0.0548, "step": 42677 }, { "epoch": 0.940422085970682, "grad_norm": 0.342630535364151, "learning_rate": 2.782438040513602e-07, "loss": 0.0506, "step": 42678 }, { "epoch": 0.9404441212601982, "grad_norm": 0.517362654209137, "learning_rate": 2.7803860264117464e-07, "loss": 0.0656, "step": 42679 }, { "epoch": 0.9404661565497143, "grad_norm": 0.5406438708305359, "learning_rate": 2.7783347621783083e-07, "loss": 0.0268, "step": 42680 }, { "epoch": 0.9404881918392305, "grad_norm": 0.48628857731819153, "learning_rate": 2.7762842478237795e-07, "loss": 0.0507, "step": 42681 }, { "epoch": 0.9405102271287467, "grad_norm": 0.9398189783096313, "learning_rate": 2.7742344833585674e-07, "loss": 0.0807, "step": 42682 }, { "epoch": 0.9405322624182628, "grad_norm": 0.6277885437011719, "learning_rate": 2.772185468793131e-07, "loss": 0.0699, "step": 42683 }, { "epoch": 0.940554297707779, "grad_norm": 0.6984260678291321, "learning_rate": 2.770137204137912e-07, "loss": 0.048, "step": 42684 }, { "epoch": 0.9405763329972952, "grad_norm": 0.6535294651985168, "learning_rate": 2.768089689403319e-07, "loss": 0.0639, "step": 42685 }, { "epoch": 0.9405983682868113, "grad_norm": 0.6653132438659668, "learning_rate": 2.7660429245998265e-07, "loss": 0.0744, "step": 42686 }, { "epoch": 0.9406204035763275, "grad_norm": 0.5341086983680725, "learning_rate": 2.7639969097378095e-07, "loss": 0.0677, "step": 42687 }, { "epoch": 0.9406424388658436, "grad_norm": 0.4672849774360657, "learning_rate": 2.7619516448276927e-07, "loss": 0.0497, "step": 42688 }, { "epoch": 0.9406644741553598, "grad_norm": 0.4755292236804962, "learning_rate": 2.759907129879935e-07, "loss": 0.0578, "step": 42689 }, { "epoch": 0.940686509444876, "grad_norm": 0.5932561159133911, "learning_rate": 2.757863364904928e-07, "loss": 0.0556, "step": 42690 }, { "epoch": 0.9407085447343921, "grad_norm": 0.8610290288925171, "learning_rate": 2.7558203499130797e-07, "loss": 0.0508, "step": 42691 }, { "epoch": 0.9407305800239083, "grad_norm": 1.0902924537658691, "learning_rate": 2.753778084914782e-07, "loss": 0.0764, "step": 42692 }, { "epoch": 0.9407526153134245, "grad_norm": 0.44351938366889954, "learning_rate": 2.7517365699204766e-07, "loss": 0.0874, "step": 42693 }, { "epoch": 0.9407746506029406, "grad_norm": 0.8896011710166931, "learning_rate": 2.749695804940522e-07, "loss": 0.0524, "step": 42694 }, { "epoch": 0.9407966858924568, "grad_norm": 1.0149072408676147, "learning_rate": 2.747655789985343e-07, "loss": 0.067, "step": 42695 }, { "epoch": 0.940818721181973, "grad_norm": 0.6120278835296631, "learning_rate": 2.745616525065281e-07, "loss": 0.0639, "step": 42696 }, { "epoch": 0.9408407564714891, "grad_norm": 1.0724977254867554, "learning_rate": 2.7435780101907946e-07, "loss": 0.1028, "step": 42697 }, { "epoch": 0.9408627917610053, "grad_norm": 0.9908068180084229, "learning_rate": 2.7415402453722095e-07, "loss": 0.0654, "step": 42698 }, { "epoch": 0.9408848270505215, "grad_norm": 0.44857171177864075, "learning_rate": 2.739503230619933e-07, "loss": 0.0401, "step": 42699 }, { "epoch": 0.9409068623400376, "grad_norm": 0.5060260891914368, "learning_rate": 2.7374669659443407e-07, "loss": 0.0438, "step": 42700 }, { "epoch": 0.9409288976295538, "grad_norm": 0.4317608177661896, "learning_rate": 2.735431451355791e-07, "loss": 0.0415, "step": 42701 }, { "epoch": 0.9409509329190698, "grad_norm": 0.5027376413345337, "learning_rate": 2.733396686864659e-07, "loss": 0.0471, "step": 42702 }, { "epoch": 0.940972968208586, "grad_norm": 0.543168306350708, "learning_rate": 2.7313626724813024e-07, "loss": 0.0887, "step": 42703 }, { "epoch": 0.9409950034981022, "grad_norm": 0.9576412439346313, "learning_rate": 2.729329408216097e-07, "loss": 0.0654, "step": 42704 }, { "epoch": 0.9410170387876183, "grad_norm": 0.854602575302124, "learning_rate": 2.727296894079384e-07, "loss": 0.0451, "step": 42705 }, { "epoch": 0.9410390740771345, "grad_norm": 0.684167742729187, "learning_rate": 2.7252651300815224e-07, "loss": 0.056, "step": 42706 }, { "epoch": 0.9410611093666507, "grad_norm": 0.6717785596847534, "learning_rate": 2.7232341162328536e-07, "loss": 0.0483, "step": 42707 }, { "epoch": 0.9410831446561668, "grad_norm": 0.4314245283603668, "learning_rate": 2.721203852543736e-07, "loss": 0.0478, "step": 42708 }, { "epoch": 0.941105179945683, "grad_norm": 0.7199608683586121, "learning_rate": 2.7191743390245105e-07, "loss": 0.0823, "step": 42709 }, { "epoch": 0.9411272152351992, "grad_norm": 0.5769091248512268, "learning_rate": 2.7171455756855033e-07, "loss": 0.0846, "step": 42710 }, { "epoch": 0.9411492505247153, "grad_norm": 0.6489315032958984, "learning_rate": 2.7151175625370563e-07, "loss": 0.0449, "step": 42711 }, { "epoch": 0.9411712858142315, "grad_norm": 0.3056422173976898, "learning_rate": 2.7130902995894933e-07, "loss": 0.0498, "step": 42712 }, { "epoch": 0.9411933211037476, "grad_norm": 0.42322924733161926, "learning_rate": 2.7110637868531407e-07, "loss": 0.0403, "step": 42713 }, { "epoch": 0.9412153563932638, "grad_norm": 0.33400899171829224, "learning_rate": 2.709038024338323e-07, "loss": 0.0349, "step": 42714 }, { "epoch": 0.94123739168278, "grad_norm": 0.2963625490665436, "learning_rate": 2.707013012055365e-07, "loss": 0.0442, "step": 42715 }, { "epoch": 0.9412594269722961, "grad_norm": 0.48533979058265686, "learning_rate": 2.704988750014575e-07, "loss": 0.0564, "step": 42716 }, { "epoch": 0.9412814622618123, "grad_norm": 0.5858173370361328, "learning_rate": 2.702965238226246e-07, "loss": 0.0586, "step": 42717 }, { "epoch": 0.9413034975513285, "grad_norm": 0.7197014093399048, "learning_rate": 2.7009424767007184e-07, "loss": 0.0745, "step": 42718 }, { "epoch": 0.9413255328408446, "grad_norm": 0.5228981971740723, "learning_rate": 2.698920465448268e-07, "loss": 0.0456, "step": 42719 }, { "epoch": 0.9413475681303608, "grad_norm": 0.4603077173233032, "learning_rate": 2.6968992044792193e-07, "loss": 0.0566, "step": 42720 }, { "epoch": 0.941369603419877, "grad_norm": 0.5518080592155457, "learning_rate": 2.694878693803832e-07, "loss": 0.0435, "step": 42721 }, { "epoch": 0.9413916387093931, "grad_norm": 0.46175652742385864, "learning_rate": 2.692858933432446e-07, "loss": 0.079, "step": 42722 }, { "epoch": 0.9414136739989093, "grad_norm": 0.8039831519126892, "learning_rate": 2.6908399233753045e-07, "loss": 0.0623, "step": 42723 }, { "epoch": 0.9414357092884255, "grad_norm": 0.5437328219413757, "learning_rate": 2.688821663642699e-07, "loss": 0.0377, "step": 42724 }, { "epoch": 0.9414577445779416, "grad_norm": 0.6169230341911316, "learning_rate": 2.686804154244954e-07, "loss": 0.0206, "step": 42725 }, { "epoch": 0.9414797798674578, "grad_norm": 1.1091628074645996, "learning_rate": 2.6847873951922787e-07, "loss": 0.0808, "step": 42726 }, { "epoch": 0.9415018151569738, "grad_norm": 0.7863502502441406, "learning_rate": 2.682771386494981e-07, "loss": 0.0603, "step": 42727 }, { "epoch": 0.94152385044649, "grad_norm": 0.7840026617050171, "learning_rate": 2.6807561281633363e-07, "loss": 0.0849, "step": 42728 }, { "epoch": 0.9415458857360062, "grad_norm": 0.7745034694671631, "learning_rate": 2.6787416202076034e-07, "loss": 0.048, "step": 42729 }, { "epoch": 0.9415679210255223, "grad_norm": 0.6931154727935791, "learning_rate": 2.6767278626380233e-07, "loss": 0.0692, "step": 42730 }, { "epoch": 0.9415899563150385, "grad_norm": 0.4370598793029785, "learning_rate": 2.6747148554648717e-07, "loss": 0.0661, "step": 42731 }, { "epoch": 0.9416119916045547, "grad_norm": 0.5594799518585205, "learning_rate": 2.6727025986984235e-07, "loss": 0.051, "step": 42732 }, { "epoch": 0.9416340268940708, "grad_norm": 0.6898141503334045, "learning_rate": 2.670691092348887e-07, "loss": 0.0594, "step": 42733 }, { "epoch": 0.941656062183587, "grad_norm": 1.0978225469589233, "learning_rate": 2.668680336426521e-07, "loss": 0.067, "step": 42734 }, { "epoch": 0.9416780974731032, "grad_norm": 0.47082751989364624, "learning_rate": 2.6666703309415674e-07, "loss": 0.0489, "step": 42735 }, { "epoch": 0.9417001327626193, "grad_norm": 0.4756937325000763, "learning_rate": 2.6646610759042843e-07, "loss": 0.0582, "step": 42736 }, { "epoch": 0.9417221680521355, "grad_norm": 0.47161149978637695, "learning_rate": 2.6626525713248804e-07, "loss": 0.0494, "step": 42737 }, { "epoch": 0.9417442033416517, "grad_norm": 0.596215009689331, "learning_rate": 2.6606448172135813e-07, "loss": 0.069, "step": 42738 }, { "epoch": 0.9417662386311678, "grad_norm": 0.58438640832901, "learning_rate": 2.658637813580661e-07, "loss": 0.0726, "step": 42739 }, { "epoch": 0.941788273920684, "grad_norm": 0.3867173194885254, "learning_rate": 2.656631560436279e-07, "loss": 0.0534, "step": 42740 }, { "epoch": 0.9418103092102001, "grad_norm": 1.194576621055603, "learning_rate": 2.65462605779071e-07, "loss": 0.075, "step": 42741 }, { "epoch": 0.9418323444997163, "grad_norm": 0.7076263427734375, "learning_rate": 2.6526213056541125e-07, "loss": 0.0631, "step": 42742 }, { "epoch": 0.9418543797892325, "grad_norm": 0.5564820766448975, "learning_rate": 2.650617304036762e-07, "loss": 0.086, "step": 42743 }, { "epoch": 0.9418764150787486, "grad_norm": 0.6121113896369934, "learning_rate": 2.6486140529488003e-07, "loss": 0.0619, "step": 42744 }, { "epoch": 0.9418984503682648, "grad_norm": 0.6471995711326599, "learning_rate": 2.646611552400485e-07, "loss": 0.063, "step": 42745 }, { "epoch": 0.941920485657781, "grad_norm": 0.25473034381866455, "learning_rate": 2.6446098024019926e-07, "loss": 0.0394, "step": 42746 }, { "epoch": 0.9419425209472971, "grad_norm": 0.5262701511383057, "learning_rate": 2.6426088029635144e-07, "loss": 0.0538, "step": 42747 }, { "epoch": 0.9419645562368133, "grad_norm": 0.6050902605056763, "learning_rate": 2.6406085540952585e-07, "loss": 0.0521, "step": 42748 }, { "epoch": 0.9419865915263295, "grad_norm": 0.6113150715827942, "learning_rate": 2.6386090558073837e-07, "loss": 0.0532, "step": 42749 }, { "epoch": 0.9420086268158456, "grad_norm": 0.7855425477027893, "learning_rate": 2.6366103081101154e-07, "loss": 0.0789, "step": 42750 }, { "epoch": 0.9420306621053617, "grad_norm": 0.5714878439903259, "learning_rate": 2.6346123110135953e-07, "loss": 0.0489, "step": 42751 }, { "epoch": 0.9420526973948778, "grad_norm": 0.7856023907661438, "learning_rate": 2.632615064528032e-07, "loss": 0.0731, "step": 42752 }, { "epoch": 0.942074732684394, "grad_norm": 0.4824594259262085, "learning_rate": 2.630618568663584e-07, "loss": 0.0404, "step": 42753 }, { "epoch": 0.9420967679739102, "grad_norm": 0.4615572690963745, "learning_rate": 2.62862282343041e-07, "loss": 0.0425, "step": 42754 }, { "epoch": 0.9421188032634263, "grad_norm": 0.8593016862869263, "learning_rate": 2.626627828838701e-07, "loss": 0.0563, "step": 42755 }, { "epoch": 0.9421408385529425, "grad_norm": 0.6419416069984436, "learning_rate": 2.6246335848986005e-07, "loss": 0.0615, "step": 42756 }, { "epoch": 0.9421628738424587, "grad_norm": 0.4628085196018219, "learning_rate": 2.6226400916202655e-07, "loss": 0.0486, "step": 42757 }, { "epoch": 0.9421849091319748, "grad_norm": 0.3134613037109375, "learning_rate": 2.620647349013855e-07, "loss": 0.0331, "step": 42758 }, { "epoch": 0.942206944421491, "grad_norm": 0.4653148949146271, "learning_rate": 2.6186553570895276e-07, "loss": 0.0564, "step": 42759 }, { "epoch": 0.9422289797110072, "grad_norm": 0.48641273379325867, "learning_rate": 2.616664115857409e-07, "loss": 0.0311, "step": 42760 }, { "epoch": 0.9422510150005233, "grad_norm": 0.9520990252494812, "learning_rate": 2.614673625327657e-07, "loss": 0.0892, "step": 42761 }, { "epoch": 0.9422730502900395, "grad_norm": 0.6544394493103027, "learning_rate": 2.612683885510414e-07, "loss": 0.0376, "step": 42762 }, { "epoch": 0.9422950855795557, "grad_norm": 0.5105916857719421, "learning_rate": 2.6106948964157885e-07, "loss": 0.0523, "step": 42763 }, { "epoch": 0.9423171208690718, "grad_norm": 0.8074313402175903, "learning_rate": 2.608706658053955e-07, "loss": 0.0775, "step": 42764 }, { "epoch": 0.942339156158588, "grad_norm": 0.3900488615036011, "learning_rate": 2.6067191704349904e-07, "loss": 0.0539, "step": 42765 }, { "epoch": 0.9423611914481042, "grad_norm": 0.49221429228782654, "learning_rate": 2.604732433569068e-07, "loss": 0.075, "step": 42766 }, { "epoch": 0.9423832267376203, "grad_norm": 0.3795376420021057, "learning_rate": 2.6027464474662645e-07, "loss": 0.0683, "step": 42767 }, { "epoch": 0.9424052620271365, "grad_norm": 0.76158207654953, "learning_rate": 2.600761212136721e-07, "loss": 0.0628, "step": 42768 }, { "epoch": 0.9424272973166526, "grad_norm": 0.3743063807487488, "learning_rate": 2.5987767275905295e-07, "loss": 0.0484, "step": 42769 }, { "epoch": 0.9424493326061688, "grad_norm": 0.38668641448020935, "learning_rate": 2.596792993837832e-07, "loss": 0.0475, "step": 42770 }, { "epoch": 0.942471367895685, "grad_norm": 0.48708099126815796, "learning_rate": 2.594810010888704e-07, "loss": 0.035, "step": 42771 }, { "epoch": 0.9424934031852011, "grad_norm": 0.5455057621002197, "learning_rate": 2.592827778753254e-07, "loss": 0.0732, "step": 42772 }, { "epoch": 0.9425154384747173, "grad_norm": 0.6974309682846069, "learning_rate": 2.590846297441574e-07, "loss": 0.0758, "step": 42773 }, { "epoch": 0.9425374737642335, "grad_norm": 0.7009844183921814, "learning_rate": 2.588865566963755e-07, "loss": 0.0664, "step": 42774 }, { "epoch": 0.9425595090537496, "grad_norm": 0.5292479991912842, "learning_rate": 2.5868855873299235e-07, "loss": 0.0524, "step": 42775 }, { "epoch": 0.9425815443432657, "grad_norm": 0.5729939937591553, "learning_rate": 2.584906358550088e-07, "loss": 0.0532, "step": 42776 }, { "epoch": 0.9426035796327819, "grad_norm": 0.4886886775493622, "learning_rate": 2.5829278806344056e-07, "loss": 0.0528, "step": 42777 }, { "epoch": 0.942625614922298, "grad_norm": 0.5206166505813599, "learning_rate": 2.5809501535929034e-07, "loss": 0.0708, "step": 42778 }, { "epoch": 0.9426476502118142, "grad_norm": 0.44948163628578186, "learning_rate": 2.5789731774357063e-07, "loss": 0.0684, "step": 42779 }, { "epoch": 0.9426696855013303, "grad_norm": 0.47500282526016235, "learning_rate": 2.576996952172822e-07, "loss": 0.0538, "step": 42780 }, { "epoch": 0.9426917207908465, "grad_norm": 0.6874436140060425, "learning_rate": 2.5750214778143435e-07, "loss": 0.0618, "step": 42781 }, { "epoch": 0.9427137560803627, "grad_norm": 0.6251645088195801, "learning_rate": 2.5730467543703615e-07, "loss": 0.0654, "step": 42782 }, { "epoch": 0.9427357913698788, "grad_norm": 0.2893829941749573, "learning_rate": 2.571072781850886e-07, "loss": 0.0504, "step": 42783 }, { "epoch": 0.942757826659395, "grad_norm": 0.32707351446151733, "learning_rate": 2.5690995602660085e-07, "loss": 0.0538, "step": 42784 }, { "epoch": 0.9427798619489112, "grad_norm": 0.6540440320968628, "learning_rate": 2.567127089625754e-07, "loss": 0.0796, "step": 42785 }, { "epoch": 0.9428018972384273, "grad_norm": 1.051012396812439, "learning_rate": 2.5651553699401977e-07, "loss": 0.0512, "step": 42786 }, { "epoch": 0.9428239325279435, "grad_norm": 0.48961544036865234, "learning_rate": 2.5631844012193485e-07, "loss": 0.0772, "step": 42787 }, { "epoch": 0.9428459678174597, "grad_norm": 0.4166134297847748, "learning_rate": 2.5612141834732814e-07, "loss": 0.0395, "step": 42788 }, { "epoch": 0.9428680031069758, "grad_norm": 0.7093847990036011, "learning_rate": 2.559244716712006e-07, "loss": 0.0574, "step": 42789 }, { "epoch": 0.942890038396492, "grad_norm": 0.4845496118068695, "learning_rate": 2.5572760009455466e-07, "loss": 0.0706, "step": 42790 }, { "epoch": 0.9429120736860082, "grad_norm": 1.8652677536010742, "learning_rate": 2.5553080361839786e-07, "loss": 0.0523, "step": 42791 }, { "epoch": 0.9429341089755243, "grad_norm": 0.5365993976593018, "learning_rate": 2.5533408224372445e-07, "loss": 0.0618, "step": 42792 }, { "epoch": 0.9429561442650405, "grad_norm": 0.8545747399330139, "learning_rate": 2.5513743597154526e-07, "loss": 0.0625, "step": 42793 }, { "epoch": 0.9429781795545567, "grad_norm": 0.5800147652626038, "learning_rate": 2.5494086480285615e-07, "loss": 0.0607, "step": 42794 }, { "epoch": 0.9430002148440728, "grad_norm": 0.40888333320617676, "learning_rate": 2.547443687386597e-07, "loss": 0.073, "step": 42795 }, { "epoch": 0.943022250133589, "grad_norm": 0.3899190127849579, "learning_rate": 2.5454794777996004e-07, "loss": 0.0678, "step": 42796 }, { "epoch": 0.9430442854231051, "grad_norm": 0.3120137155056, "learning_rate": 2.543516019277531e-07, "loss": 0.0378, "step": 42797 }, { "epoch": 0.9430663207126213, "grad_norm": 1.2725023031234741, "learning_rate": 2.5415533118304133e-07, "loss": 0.0588, "step": 42798 }, { "epoch": 0.9430883560021375, "grad_norm": 0.6445040702819824, "learning_rate": 2.539591355468207e-07, "loss": 0.0643, "step": 42799 }, { "epoch": 0.9431103912916536, "grad_norm": 0.41179388761520386, "learning_rate": 2.53763015020097e-07, "loss": 0.066, "step": 42800 }, { "epoch": 0.9431324265811697, "grad_norm": 0.5024402737617493, "learning_rate": 2.535669696038645e-07, "loss": 0.0565, "step": 42801 }, { "epoch": 0.9431544618706859, "grad_norm": 0.625344455242157, "learning_rate": 2.53370999299124e-07, "loss": 0.0427, "step": 42802 }, { "epoch": 0.943176497160202, "grad_norm": 0.26837509870529175, "learning_rate": 2.531751041068714e-07, "loss": 0.0564, "step": 42803 }, { "epoch": 0.9431985324497182, "grad_norm": 0.8138318657875061, "learning_rate": 2.529792840281059e-07, "loss": 0.0584, "step": 42804 }, { "epoch": 0.9432205677392344, "grad_norm": 0.6821615099906921, "learning_rate": 2.527835390638267e-07, "loss": 0.0953, "step": 42805 }, { "epoch": 0.9432426030287505, "grad_norm": 0.85215163230896, "learning_rate": 2.5258786921502796e-07, "loss": 0.082, "step": 42806 }, { "epoch": 0.9432646383182667, "grad_norm": 0.5669378638267517, "learning_rate": 2.523922744827073e-07, "loss": 0.076, "step": 42807 }, { "epoch": 0.9432866736077828, "grad_norm": 0.6566801071166992, "learning_rate": 2.5219675486786056e-07, "loss": 0.0732, "step": 42808 }, { "epoch": 0.943308708897299, "grad_norm": 0.40149372816085815, "learning_rate": 2.520013103714852e-07, "loss": 0.0339, "step": 42809 }, { "epoch": 0.9433307441868152, "grad_norm": 0.501621663570404, "learning_rate": 2.5180594099457557e-07, "loss": 0.0498, "step": 42810 }, { "epoch": 0.9433527794763313, "grad_norm": 0.5466710329055786, "learning_rate": 2.516106467381257e-07, "loss": 0.0585, "step": 42811 }, { "epoch": 0.9433748147658475, "grad_norm": 0.6465400457382202, "learning_rate": 2.514154276031333e-07, "loss": 0.0609, "step": 42812 }, { "epoch": 0.9433968500553637, "grad_norm": 0.6579834222793579, "learning_rate": 2.5122028359058913e-07, "loss": 0.0464, "step": 42813 }, { "epoch": 0.9434188853448798, "grad_norm": 0.6812095642089844, "learning_rate": 2.5102521470149076e-07, "loss": 0.0565, "step": 42814 }, { "epoch": 0.943440920634396, "grad_norm": 0.7992191910743713, "learning_rate": 2.5083022093682737e-07, "loss": 0.062, "step": 42815 }, { "epoch": 0.9434629559239122, "grad_norm": 0.6637953519821167, "learning_rate": 2.5063530229759823e-07, "loss": 0.0564, "step": 42816 }, { "epoch": 0.9434849912134283, "grad_norm": 0.9770626425743103, "learning_rate": 2.5044045878479083e-07, "loss": 0.0722, "step": 42817 }, { "epoch": 0.9435070265029445, "grad_norm": 0.7422732710838318, "learning_rate": 2.5024569039939937e-07, "loss": 0.0881, "step": 42818 }, { "epoch": 0.9435290617924607, "grad_norm": 0.38348233699798584, "learning_rate": 2.500509971424164e-07, "loss": 0.0583, "step": 42819 }, { "epoch": 0.9435510970819768, "grad_norm": 0.48367711901664734, "learning_rate": 2.498563790148328e-07, "loss": 0.0805, "step": 42820 }, { "epoch": 0.943573132371493, "grad_norm": 0.31211569905281067, "learning_rate": 2.496618360176411e-07, "loss": 0.0412, "step": 42821 }, { "epoch": 0.9435951676610091, "grad_norm": 0.45434433221817017, "learning_rate": 2.4946736815183055e-07, "loss": 0.0451, "step": 42822 }, { "epoch": 0.9436172029505253, "grad_norm": 0.3687710165977478, "learning_rate": 2.4927297541839367e-07, "loss": 0.0456, "step": 42823 }, { "epoch": 0.9436392382400415, "grad_norm": 0.2510409653186798, "learning_rate": 2.4907865781831795e-07, "loss": 0.0502, "step": 42824 }, { "epoch": 0.9436612735295575, "grad_norm": 0.8829854726791382, "learning_rate": 2.4888441535259765e-07, "loss": 0.0681, "step": 42825 }, { "epoch": 0.9436833088190737, "grad_norm": 0.808716356754303, "learning_rate": 2.486902480222153e-07, "loss": 0.05, "step": 42826 }, { "epoch": 0.9437053441085899, "grad_norm": 0.6264341473579407, "learning_rate": 2.484961558281668e-07, "loss": 0.0835, "step": 42827 }, { "epoch": 0.943727379398106, "grad_norm": 0.5446615815162659, "learning_rate": 2.4830213877143626e-07, "loss": 0.0648, "step": 42828 }, { "epoch": 0.9437494146876222, "grad_norm": 1.470656394958496, "learning_rate": 2.481081968530147e-07, "loss": 0.0812, "step": 42829 }, { "epoch": 0.9437714499771384, "grad_norm": 0.7987490892410278, "learning_rate": 2.479143300738862e-07, "loss": 0.0701, "step": 42830 }, { "epoch": 0.9437934852666545, "grad_norm": 0.8654690980911255, "learning_rate": 2.477205384350417e-07, "loss": 0.0608, "step": 42831 }, { "epoch": 0.9438155205561707, "grad_norm": 0.48820728063583374, "learning_rate": 2.475268219374688e-07, "loss": 0.0476, "step": 42832 }, { "epoch": 0.9438375558456868, "grad_norm": 0.7708948254585266, "learning_rate": 2.473331805821516e-07, "loss": 0.068, "step": 42833 }, { "epoch": 0.943859591135203, "grad_norm": 0.849234402179718, "learning_rate": 2.471396143700777e-07, "loss": 0.0764, "step": 42834 }, { "epoch": 0.9438816264247192, "grad_norm": 0.5658863186836243, "learning_rate": 2.4694612330223463e-07, "loss": 0.0374, "step": 42835 }, { "epoch": 0.9439036617142353, "grad_norm": 0.8424452543258667, "learning_rate": 2.467527073796033e-07, "loss": 0.0529, "step": 42836 }, { "epoch": 0.9439256970037515, "grad_norm": 0.6460265517234802, "learning_rate": 2.465593666031729e-07, "loss": 0.0555, "step": 42837 }, { "epoch": 0.9439477322932677, "grad_norm": 0.43365028500556946, "learning_rate": 2.46366100973926e-07, "loss": 0.0447, "step": 42838 }, { "epoch": 0.9439697675827838, "grad_norm": 0.5874930024147034, "learning_rate": 2.4617291049285005e-07, "loss": 0.0531, "step": 42839 }, { "epoch": 0.9439918028723, "grad_norm": 0.5347579121589661, "learning_rate": 2.459797951609261e-07, "loss": 0.0581, "step": 42840 }, { "epoch": 0.9440138381618162, "grad_norm": 0.7115095257759094, "learning_rate": 2.457867549791382e-07, "loss": 0.0577, "step": 42841 }, { "epoch": 0.9440358734513323, "grad_norm": 0.5999478101730347, "learning_rate": 2.4559378994847223e-07, "loss": 0.0568, "step": 42842 }, { "epoch": 0.9440579087408485, "grad_norm": 0.4898589849472046, "learning_rate": 2.454009000699076e-07, "loss": 0.0436, "step": 42843 }, { "epoch": 0.9440799440303647, "grad_norm": 0.6169763207435608, "learning_rate": 2.4520808534442826e-07, "loss": 0.0609, "step": 42844 }, { "epoch": 0.9441019793198808, "grad_norm": 0.5387537479400635, "learning_rate": 2.450153457730153e-07, "loss": 0.0449, "step": 42845 }, { "epoch": 0.944124014609397, "grad_norm": 0.33026114106178284, "learning_rate": 2.448226813566529e-07, "loss": 0.0401, "step": 42846 }, { "epoch": 0.9441460498989132, "grad_norm": 0.39840444922447205, "learning_rate": 2.4463009209632014e-07, "loss": 0.0514, "step": 42847 }, { "epoch": 0.9441680851884293, "grad_norm": 0.5058321356773376, "learning_rate": 2.444375779929997e-07, "loss": 0.0601, "step": 42848 }, { "epoch": 0.9441901204779455, "grad_norm": 0.3010009825229645, "learning_rate": 2.442451390476708e-07, "loss": 0.0609, "step": 42849 }, { "epoch": 0.9442121557674615, "grad_norm": 0.5108833909034729, "learning_rate": 2.4405277526131255e-07, "loss": 0.0601, "step": 42850 }, { "epoch": 0.9442341910569777, "grad_norm": 0.6274996399879456, "learning_rate": 2.4386048663490925e-07, "loss": 0.0567, "step": 42851 }, { "epoch": 0.9442562263464939, "grad_norm": 0.6280292272567749, "learning_rate": 2.4366827316943506e-07, "loss": 0.0679, "step": 42852 }, { "epoch": 0.94427826163601, "grad_norm": 0.7620991468429565, "learning_rate": 2.4347613486587097e-07, "loss": 0.0609, "step": 42853 }, { "epoch": 0.9443002969255262, "grad_norm": 0.6353538036346436, "learning_rate": 2.4328407172519773e-07, "loss": 0.0508, "step": 42854 }, { "epoch": 0.9443223322150424, "grad_norm": 0.4509308934211731, "learning_rate": 2.4309208374839133e-07, "loss": 0.0508, "step": 42855 }, { "epoch": 0.9443443675045585, "grad_norm": 0.42348867654800415, "learning_rate": 2.4290017093642925e-07, "loss": 0.0482, "step": 42856 }, { "epoch": 0.9443664027940747, "grad_norm": 0.8742665648460388, "learning_rate": 2.4270833329029076e-07, "loss": 0.0562, "step": 42857 }, { "epoch": 0.9443884380835909, "grad_norm": 0.8804855942726135, "learning_rate": 2.4251657081095334e-07, "loss": 0.0459, "step": 42858 }, { "epoch": 0.944410473373107, "grad_norm": 0.5375016331672668, "learning_rate": 2.423248834993913e-07, "loss": 0.0416, "step": 42859 }, { "epoch": 0.9444325086626232, "grad_norm": 0.6217046976089478, "learning_rate": 2.4213327135658215e-07, "loss": 0.0608, "step": 42860 }, { "epoch": 0.9444545439521393, "grad_norm": 0.7590651512145996, "learning_rate": 2.4194173438350174e-07, "loss": 0.0497, "step": 42861 }, { "epoch": 0.9444765792416555, "grad_norm": 0.5350049138069153, "learning_rate": 2.417502725811277e-07, "loss": 0.0328, "step": 42862 }, { "epoch": 0.9444986145311717, "grad_norm": 0.4926360845565796, "learning_rate": 2.415588859504325e-07, "loss": 0.073, "step": 42863 }, { "epoch": 0.9445206498206878, "grad_norm": 0.45502686500549316, "learning_rate": 2.413675744923921e-07, "loss": 0.0318, "step": 42864 }, { "epoch": 0.944542685110204, "grad_norm": 0.48759013414382935, "learning_rate": 2.411763382079807e-07, "loss": 0.0606, "step": 42865 }, { "epoch": 0.9445647203997202, "grad_norm": 0.15940654277801514, "learning_rate": 2.4098517709817246e-07, "loss": 0.0335, "step": 42866 }, { "epoch": 0.9445867556892363, "grad_norm": 0.5465648174285889, "learning_rate": 2.407940911639417e-07, "loss": 0.0513, "step": 42867 }, { "epoch": 0.9446087909787525, "grad_norm": 0.7101436257362366, "learning_rate": 2.406030804062592e-07, "loss": 0.0457, "step": 42868 }, { "epoch": 0.9446308262682687, "grad_norm": 0.5102372169494629, "learning_rate": 2.404121448261026e-07, "loss": 0.06, "step": 42869 }, { "epoch": 0.9446528615577848, "grad_norm": 0.8883495926856995, "learning_rate": 2.4022128442444113e-07, "loss": 0.0618, "step": 42870 }, { "epoch": 0.944674896847301, "grad_norm": 0.29089322686195374, "learning_rate": 2.400304992022473e-07, "loss": 0.0429, "step": 42871 }, { "epoch": 0.9446969321368172, "grad_norm": 0.7571743726730347, "learning_rate": 2.3983978916049033e-07, "loss": 0.0414, "step": 42872 }, { "epoch": 0.9447189674263333, "grad_norm": 0.3210725486278534, "learning_rate": 2.3964915430014777e-07, "loss": 0.0592, "step": 42873 }, { "epoch": 0.9447410027158495, "grad_norm": 0.3978828489780426, "learning_rate": 2.3945859462218724e-07, "loss": 0.0248, "step": 42874 }, { "epoch": 0.9447630380053655, "grad_norm": 0.3604346811771393, "learning_rate": 2.392681101275762e-07, "loss": 0.046, "step": 42875 }, { "epoch": 0.9447850732948817, "grad_norm": 0.5360692739486694, "learning_rate": 2.390777008172906e-07, "loss": 0.0614, "step": 42876 }, { "epoch": 0.9448071085843979, "grad_norm": 0.6771990656852722, "learning_rate": 2.388873666922964e-07, "loss": 0.0488, "step": 42877 }, { "epoch": 0.944829143873914, "grad_norm": 0.6576488614082336, "learning_rate": 2.38697107753566e-07, "loss": 0.0507, "step": 42878 }, { "epoch": 0.9448511791634302, "grad_norm": 0.7684285640716553, "learning_rate": 2.3850692400206543e-07, "loss": 0.0531, "step": 42879 }, { "epoch": 0.9448732144529464, "grad_norm": 1.3070482015609741, "learning_rate": 2.3831681543876383e-07, "loss": 0.0969, "step": 42880 }, { "epoch": 0.9448952497424625, "grad_norm": 0.6238648891448975, "learning_rate": 2.381267820646321e-07, "loss": 0.0656, "step": 42881 }, { "epoch": 0.9449172850319787, "grad_norm": 0.5401925444602966, "learning_rate": 2.3793682388063786e-07, "loss": 0.0345, "step": 42882 }, { "epoch": 0.9449393203214949, "grad_norm": 0.6962074637413025, "learning_rate": 2.3774694088774528e-07, "loss": 0.0652, "step": 42883 }, { "epoch": 0.944961355611011, "grad_norm": 0.6667433977127075, "learning_rate": 2.3755713308692363e-07, "loss": 0.0686, "step": 42884 }, { "epoch": 0.9449833909005272, "grad_norm": 0.6610879898071289, "learning_rate": 2.3736740047914206e-07, "loss": 0.0409, "step": 42885 }, { "epoch": 0.9450054261900434, "grad_norm": 0.7199327349662781, "learning_rate": 2.3717774306536156e-07, "loss": 0.095, "step": 42886 }, { "epoch": 0.9450274614795595, "grad_norm": 0.6534123420715332, "learning_rate": 2.3698816084655295e-07, "loss": 0.0521, "step": 42887 }, { "epoch": 0.9450494967690757, "grad_norm": 0.3488873839378357, "learning_rate": 2.367986538236805e-07, "loss": 0.0376, "step": 42888 }, { "epoch": 0.9450715320585918, "grad_norm": 0.634638786315918, "learning_rate": 2.3660922199770841e-07, "loss": 0.0685, "step": 42889 }, { "epoch": 0.945093567348108, "grad_norm": 0.710055947303772, "learning_rate": 2.3641986536960257e-07, "loss": 0.0723, "step": 42890 }, { "epoch": 0.9451156026376242, "grad_norm": 0.6109797358512878, "learning_rate": 2.3623058394032726e-07, "loss": 0.0653, "step": 42891 }, { "epoch": 0.9451376379271403, "grad_norm": 0.5649046301841736, "learning_rate": 2.3604137771084665e-07, "loss": 0.0589, "step": 42892 }, { "epoch": 0.9451596732166565, "grad_norm": 0.5336541533470154, "learning_rate": 2.35852246682125e-07, "loss": 0.0433, "step": 42893 }, { "epoch": 0.9451817085061727, "grad_norm": 0.23521175980567932, "learning_rate": 2.3566319085512488e-07, "loss": 0.0588, "step": 42894 }, { "epoch": 0.9452037437956888, "grad_norm": 0.6397950649261475, "learning_rate": 2.3547421023080716e-07, "loss": 0.0575, "step": 42895 }, { "epoch": 0.945225779085205, "grad_norm": 0.7637844681739807, "learning_rate": 2.3528530481013944e-07, "loss": 0.0501, "step": 42896 }, { "epoch": 0.9452478143747212, "grad_norm": 0.5259038805961609, "learning_rate": 2.3509647459407924e-07, "loss": 0.0648, "step": 42897 }, { "epoch": 0.9452698496642373, "grad_norm": 0.7222614884376526, "learning_rate": 2.349077195835908e-07, "loss": 0.0475, "step": 42898 }, { "epoch": 0.9452918849537534, "grad_norm": 0.5074080228805542, "learning_rate": 2.3471903977963505e-07, "loss": 0.0656, "step": 42899 }, { "epoch": 0.9453139202432695, "grad_norm": 0.3343150019645691, "learning_rate": 2.3453043518317285e-07, "loss": 0.0442, "step": 42900 }, { "epoch": 0.9453359555327857, "grad_norm": 0.6353058218955994, "learning_rate": 2.3434190579516679e-07, "loss": 0.0569, "step": 42901 }, { "epoch": 0.9453579908223019, "grad_norm": 1.0614432096481323, "learning_rate": 2.3415345161657277e-07, "loss": 0.0704, "step": 42902 }, { "epoch": 0.945380026111818, "grad_norm": 0.7692437767982483, "learning_rate": 2.3396507264835498e-07, "loss": 0.0727, "step": 42903 }, { "epoch": 0.9454020614013342, "grad_norm": 0.5159347057342529, "learning_rate": 2.3377676889146938e-07, "loss": 0.0404, "step": 42904 }, { "epoch": 0.9454240966908504, "grad_norm": 0.575317919254303, "learning_rate": 2.3358854034688015e-07, "loss": 0.0653, "step": 42905 }, { "epoch": 0.9454461319803665, "grad_norm": 0.34768831729888916, "learning_rate": 2.334003870155399e-07, "loss": 0.039, "step": 42906 }, { "epoch": 0.9454681672698827, "grad_norm": 0.6105165481567383, "learning_rate": 2.3321230889841118e-07, "loss": 0.0385, "step": 42907 }, { "epoch": 0.9454902025593989, "grad_norm": 0.7669717073440552, "learning_rate": 2.3302430599645152e-07, "loss": 0.0698, "step": 42908 }, { "epoch": 0.945512237848915, "grad_norm": 1.2721599340438843, "learning_rate": 2.3283637831061688e-07, "loss": 0.0937, "step": 42909 }, { "epoch": 0.9455342731384312, "grad_norm": 0.4641076922416687, "learning_rate": 2.326485258418648e-07, "loss": 0.0787, "step": 42910 }, { "epoch": 0.9455563084279474, "grad_norm": 0.5714580416679382, "learning_rate": 2.3246074859115286e-07, "loss": 0.0616, "step": 42911 }, { "epoch": 0.9455783437174635, "grad_norm": 0.7763760685920715, "learning_rate": 2.322730465594386e-07, "loss": 0.0419, "step": 42912 }, { "epoch": 0.9456003790069797, "grad_norm": 0.5512899160385132, "learning_rate": 2.3208541974767462e-07, "loss": 0.0371, "step": 42913 }, { "epoch": 0.9456224142964959, "grad_norm": 0.6193036437034607, "learning_rate": 2.3189786815682013e-07, "loss": 0.0764, "step": 42914 }, { "epoch": 0.945644449586012, "grad_norm": 0.25121572613716125, "learning_rate": 2.3171039178782938e-07, "loss": 0.0718, "step": 42915 }, { "epoch": 0.9456664848755282, "grad_norm": 0.8144117593765259, "learning_rate": 2.3152299064165662e-07, "loss": 0.0719, "step": 42916 }, { "epoch": 0.9456885201650443, "grad_norm": 0.5179982781410217, "learning_rate": 2.313356647192577e-07, "loss": 0.0611, "step": 42917 }, { "epoch": 0.9457105554545605, "grad_norm": 0.6728894114494324, "learning_rate": 2.311484140215836e-07, "loss": 0.0435, "step": 42918 }, { "epoch": 0.9457325907440767, "grad_norm": 0.4302442967891693, "learning_rate": 2.3096123854959184e-07, "loss": 0.0646, "step": 42919 }, { "epoch": 0.9457546260335928, "grad_norm": 0.44432005286216736, "learning_rate": 2.3077413830423332e-07, "loss": 0.0645, "step": 42920 }, { "epoch": 0.945776661323109, "grad_norm": 0.5541589856147766, "learning_rate": 2.30587113286464e-07, "loss": 0.0619, "step": 42921 }, { "epoch": 0.9457986966126252, "grad_norm": 0.5058351755142212, "learning_rate": 2.3040016349723302e-07, "loss": 0.0786, "step": 42922 }, { "epoch": 0.9458207319021413, "grad_norm": 0.7466254234313965, "learning_rate": 2.3021328893749306e-07, "loss": 0.0949, "step": 42923 }, { "epoch": 0.9458427671916574, "grad_norm": 0.54632967710495, "learning_rate": 2.3002648960819994e-07, "loss": 0.0818, "step": 42924 }, { "epoch": 0.9458648024811735, "grad_norm": 1.08633291721344, "learning_rate": 2.2983976551029962e-07, "loss": 0.0858, "step": 42925 }, { "epoch": 0.9458868377706897, "grad_norm": 0.16410847008228302, "learning_rate": 2.296531166447463e-07, "loss": 0.0482, "step": 42926 }, { "epoch": 0.9459088730602059, "grad_norm": 0.8097141981124878, "learning_rate": 2.294665430124909e-07, "loss": 0.0602, "step": 42927 }, { "epoch": 0.945930908349722, "grad_norm": 0.7736412882804871, "learning_rate": 2.292800446144827e-07, "loss": 0.0721, "step": 42928 }, { "epoch": 0.9459529436392382, "grad_norm": 0.6172462105751038, "learning_rate": 2.290936214516709e-07, "loss": 0.0671, "step": 42929 }, { "epoch": 0.9459749789287544, "grad_norm": 0.3730292320251465, "learning_rate": 2.289072735250064e-07, "loss": 0.0502, "step": 42930 }, { "epoch": 0.9459970142182705, "grad_norm": 0.5503851175308228, "learning_rate": 2.287210008354401e-07, "loss": 0.0467, "step": 42931 }, { "epoch": 0.9460190495077867, "grad_norm": 0.7008771896362305, "learning_rate": 2.285348033839163e-07, "loss": 0.0577, "step": 42932 }, { "epoch": 0.9460410847973029, "grad_norm": 0.6487745642662048, "learning_rate": 2.2834868117138752e-07, "loss": 0.063, "step": 42933 }, { "epoch": 0.946063120086819, "grad_norm": 0.5293341279029846, "learning_rate": 2.2816263419879802e-07, "loss": 0.0604, "step": 42934 }, { "epoch": 0.9460851553763352, "grad_norm": 0.6618198752403259, "learning_rate": 2.2797666246710036e-07, "loss": 0.0629, "step": 42935 }, { "epoch": 0.9461071906658514, "grad_norm": 0.6903921961784363, "learning_rate": 2.2779076597723713e-07, "loss": 0.0776, "step": 42936 }, { "epoch": 0.9461292259553675, "grad_norm": 0.550520122051239, "learning_rate": 2.2760494473015591e-07, "loss": 0.0688, "step": 42937 }, { "epoch": 0.9461512612448837, "grad_norm": 0.5534846782684326, "learning_rate": 2.2741919872680762e-07, "loss": 0.0684, "step": 42938 }, { "epoch": 0.9461732965343999, "grad_norm": 0.5964480638504028, "learning_rate": 2.2723352796813146e-07, "loss": 0.0652, "step": 42939 }, { "epoch": 0.946195331823916, "grad_norm": 0.5294179320335388, "learning_rate": 2.2704793245508005e-07, "loss": 0.0535, "step": 42940 }, { "epoch": 0.9462173671134322, "grad_norm": 0.8788111209869385, "learning_rate": 2.2686241218859093e-07, "loss": 0.0875, "step": 42941 }, { "epoch": 0.9462394024029483, "grad_norm": 0.41762417554855347, "learning_rate": 2.266769671696184e-07, "loss": 0.0589, "step": 42942 }, { "epoch": 0.9462614376924645, "grad_norm": 0.37926799058914185, "learning_rate": 2.2649159739909996e-07, "loss": 0.0554, "step": 42943 }, { "epoch": 0.9462834729819807, "grad_norm": 0.5309264063835144, "learning_rate": 2.263063028779816e-07, "loss": 0.0338, "step": 42944 }, { "epoch": 0.9463055082714968, "grad_norm": 0.870334804058075, "learning_rate": 2.261210836072075e-07, "loss": 0.0911, "step": 42945 }, { "epoch": 0.946327543561013, "grad_norm": 0.8614656329154968, "learning_rate": 2.2593593958772197e-07, "loss": 0.0546, "step": 42946 }, { "epoch": 0.9463495788505292, "grad_norm": 0.6137224435806274, "learning_rate": 2.2575087082046753e-07, "loss": 0.0795, "step": 42947 }, { "epoch": 0.9463716141400453, "grad_norm": 0.6710336804389954, "learning_rate": 2.255658773063851e-07, "loss": 0.0715, "step": 42948 }, { "epoch": 0.9463936494295614, "grad_norm": 1.2743569612503052, "learning_rate": 2.2538095904641898e-07, "loss": 0.0965, "step": 42949 }, { "epoch": 0.9464156847190776, "grad_norm": 0.6108224391937256, "learning_rate": 2.2519611604151004e-07, "loss": 0.0595, "step": 42950 }, { "epoch": 0.9464377200085937, "grad_norm": 0.7108964323997498, "learning_rate": 2.2501134829260083e-07, "loss": 0.0671, "step": 42951 }, { "epoch": 0.9464597552981099, "grad_norm": 0.6794322729110718, "learning_rate": 2.2482665580063232e-07, "loss": 0.0659, "step": 42952 }, { "epoch": 0.946481790587626, "grad_norm": 0.8732867240905762, "learning_rate": 2.2464203856654374e-07, "loss": 0.0718, "step": 42953 }, { "epoch": 0.9465038258771422, "grad_norm": 0.37943366169929504, "learning_rate": 2.2445749659127768e-07, "loss": 0.0389, "step": 42954 }, { "epoch": 0.9465258611666584, "grad_norm": 0.6244279742240906, "learning_rate": 2.2427302987577003e-07, "loss": 0.0646, "step": 42955 }, { "epoch": 0.9465478964561745, "grad_norm": 0.6827749609947205, "learning_rate": 2.2408863842096505e-07, "loss": 0.078, "step": 42956 }, { "epoch": 0.9465699317456907, "grad_norm": 0.6683164238929749, "learning_rate": 2.2390432222780032e-07, "loss": 0.0247, "step": 42957 }, { "epoch": 0.9465919670352069, "grad_norm": 0.2843016982078552, "learning_rate": 2.237200812972151e-07, "loss": 0.0467, "step": 42958 }, { "epoch": 0.946614002324723, "grad_norm": 0.7373642921447754, "learning_rate": 2.235359156301453e-07, "loss": 0.0688, "step": 42959 }, { "epoch": 0.9466360376142392, "grad_norm": 0.49444714188575745, "learning_rate": 2.2335182522753184e-07, "loss": 0.0818, "step": 42960 }, { "epoch": 0.9466580729037554, "grad_norm": 0.8122887015342712, "learning_rate": 2.231678100903123e-07, "loss": 0.068, "step": 42961 }, { "epoch": 0.9466801081932715, "grad_norm": 0.8371286392211914, "learning_rate": 2.229838702194209e-07, "loss": 0.0594, "step": 42962 }, { "epoch": 0.9467021434827877, "grad_norm": 0.5833749175071716, "learning_rate": 2.2280000561579862e-07, "loss": 0.0631, "step": 42963 }, { "epoch": 0.9467241787723039, "grad_norm": 1.0890733003616333, "learning_rate": 2.22616216280378e-07, "loss": 0.0667, "step": 42964 }, { "epoch": 0.94674621406182, "grad_norm": 0.2206396609544754, "learning_rate": 2.2243250221409995e-07, "loss": 0.0807, "step": 42965 }, { "epoch": 0.9467682493513362, "grad_norm": 0.637175440788269, "learning_rate": 2.222488634178954e-07, "loss": 0.0587, "step": 42966 }, { "epoch": 0.9467902846408524, "grad_norm": 0.4748746156692505, "learning_rate": 2.2206529989270364e-07, "loss": 0.0784, "step": 42967 }, { "epoch": 0.9468123199303685, "grad_norm": 0.5812698602676392, "learning_rate": 2.218818116394572e-07, "loss": 0.0507, "step": 42968 }, { "epoch": 0.9468343552198847, "grad_norm": 0.549339234828949, "learning_rate": 2.2169839865909036e-07, "loss": 0.0445, "step": 42969 }, { "epoch": 0.9468563905094008, "grad_norm": 0.39038026332855225, "learning_rate": 2.215150609525407e-07, "loss": 0.0488, "step": 42970 }, { "epoch": 0.946878425798917, "grad_norm": 0.61881422996521, "learning_rate": 2.2133179852073748e-07, "loss": 0.0406, "step": 42971 }, { "epoch": 0.9469004610884332, "grad_norm": 0.3145851194858551, "learning_rate": 2.2114861136461662e-07, "loss": 0.0541, "step": 42972 }, { "epoch": 0.9469224963779493, "grad_norm": 0.7828335762023926, "learning_rate": 2.209654994851107e-07, "loss": 0.0662, "step": 42973 }, { "epoch": 0.9469445316674654, "grad_norm": 0.534710168838501, "learning_rate": 2.2078246288315396e-07, "loss": 0.0726, "step": 42974 }, { "epoch": 0.9469665669569816, "grad_norm": 0.2801021635532379, "learning_rate": 2.2059950155967566e-07, "loss": 0.067, "step": 42975 }, { "epoch": 0.9469886022464977, "grad_norm": 0.5554687976837158, "learning_rate": 2.204166155156101e-07, "loss": 0.0521, "step": 42976 }, { "epoch": 0.9470106375360139, "grad_norm": 0.4970192611217499, "learning_rate": 2.202338047518898e-07, "loss": 0.0504, "step": 42977 }, { "epoch": 0.94703267282553, "grad_norm": 0.5922719836235046, "learning_rate": 2.200510692694424e-07, "loss": 0.0651, "step": 42978 }, { "epoch": 0.9470547081150462, "grad_norm": 0.364582359790802, "learning_rate": 2.1986840906920047e-07, "loss": 0.0521, "step": 42979 }, { "epoch": 0.9470767434045624, "grad_norm": 0.5653742551803589, "learning_rate": 2.1968582415209493e-07, "loss": 0.0881, "step": 42980 }, { "epoch": 0.9470987786940785, "grad_norm": 0.8537386059761047, "learning_rate": 2.1950331451905504e-07, "loss": 0.08, "step": 42981 }, { "epoch": 0.9471208139835947, "grad_norm": 0.8200358152389526, "learning_rate": 2.1932088017101171e-07, "loss": 0.0601, "step": 42982 }, { "epoch": 0.9471428492731109, "grad_norm": 0.7895739078521729, "learning_rate": 2.1913852110889254e-07, "loss": 0.0796, "step": 42983 }, { "epoch": 0.947164884562627, "grad_norm": 0.89638352394104, "learning_rate": 2.189562373336268e-07, "loss": 0.0752, "step": 42984 }, { "epoch": 0.9471869198521432, "grad_norm": 0.8037312030792236, "learning_rate": 2.1877402884614371e-07, "loss": 0.057, "step": 42985 }, { "epoch": 0.9472089551416594, "grad_norm": 0.7349008917808533, "learning_rate": 2.1859189564737092e-07, "loss": 0.0428, "step": 42986 }, { "epoch": 0.9472309904311755, "grad_norm": 0.5561025142669678, "learning_rate": 2.1840983773823432e-07, "loss": 0.066, "step": 42987 }, { "epoch": 0.9472530257206917, "grad_norm": 0.6781643629074097, "learning_rate": 2.182278551196648e-07, "loss": 0.0791, "step": 42988 }, { "epoch": 0.9472750610102079, "grad_norm": 0.7120988368988037, "learning_rate": 2.180459477925867e-07, "loss": 0.064, "step": 42989 }, { "epoch": 0.947297096299724, "grad_norm": 0.6152094006538391, "learning_rate": 2.1786411575792753e-07, "loss": 0.063, "step": 42990 }, { "epoch": 0.9473191315892402, "grad_norm": 0.4614799916744232, "learning_rate": 2.176823590166116e-07, "loss": 0.0609, "step": 42991 }, { "epoch": 0.9473411668787564, "grad_norm": 0.5119220018386841, "learning_rate": 2.175006775695698e-07, "loss": 0.0435, "step": 42992 }, { "epoch": 0.9473632021682725, "grad_norm": 0.6047932505607605, "learning_rate": 2.1731907141772145e-07, "loss": 0.0569, "step": 42993 }, { "epoch": 0.9473852374577887, "grad_norm": 1.077417254447937, "learning_rate": 2.1713754056199407e-07, "loss": 0.0728, "step": 42994 }, { "epoch": 0.9474072727473049, "grad_norm": 0.48062995076179504, "learning_rate": 2.1695608500331366e-07, "loss": 0.0323, "step": 42995 }, { "epoch": 0.947429308036821, "grad_norm": 1.3545557260513306, "learning_rate": 2.1677470474260275e-07, "loss": 0.0825, "step": 42996 }, { "epoch": 0.9474513433263372, "grad_norm": 0.5782691240310669, "learning_rate": 2.1659339978078563e-07, "loss": 0.0657, "step": 42997 }, { "epoch": 0.9474733786158532, "grad_norm": 0.5409330129623413, "learning_rate": 2.1641217011878488e-07, "loss": 0.0513, "step": 42998 }, { "epoch": 0.9474954139053694, "grad_norm": 0.8383802771568298, "learning_rate": 2.1623101575752647e-07, "loss": 0.1065, "step": 42999 }, { "epoch": 0.9475174491948856, "grad_norm": 0.9903904795646667, "learning_rate": 2.1604993669792962e-07, "loss": 0.0621, "step": 43000 }, { "epoch": 0.9475394844844017, "grad_norm": 0.6653733849525452, "learning_rate": 2.1586893294092025e-07, "loss": 0.0404, "step": 43001 }, { "epoch": 0.9475615197739179, "grad_norm": 0.34550583362579346, "learning_rate": 2.1568800448741598e-07, "loss": 0.0438, "step": 43002 }, { "epoch": 0.9475835550634341, "grad_norm": 0.582095205783844, "learning_rate": 2.1550715133834108e-07, "loss": 0.0425, "step": 43003 }, { "epoch": 0.9476055903529502, "grad_norm": 0.4502773880958557, "learning_rate": 2.1532637349461815e-07, "loss": 0.0431, "step": 43004 }, { "epoch": 0.9476276256424664, "grad_norm": 0.531403660774231, "learning_rate": 2.151456709571631e-07, "loss": 0.0634, "step": 43005 }, { "epoch": 0.9476496609319826, "grad_norm": 0.5037654042243958, "learning_rate": 2.1496504372690185e-07, "loss": 0.0553, "step": 43006 }, { "epoch": 0.9476716962214987, "grad_norm": 0.6548571586608887, "learning_rate": 2.1478449180475034e-07, "loss": 0.042, "step": 43007 }, { "epoch": 0.9476937315110149, "grad_norm": 0.4978049099445343, "learning_rate": 2.146040151916312e-07, "loss": 0.0476, "step": 43008 }, { "epoch": 0.947715766800531, "grad_norm": 0.3583994507789612, "learning_rate": 2.1442361388846198e-07, "loss": 0.0434, "step": 43009 }, { "epoch": 0.9477378020900472, "grad_norm": 0.8246048092842102, "learning_rate": 2.14243287896162e-07, "loss": 0.0345, "step": 43010 }, { "epoch": 0.9477598373795634, "grad_norm": 0.9250581860542297, "learning_rate": 2.1406303721565046e-07, "loss": 0.0811, "step": 43011 }, { "epoch": 0.9477818726690795, "grad_norm": 0.7747259736061096, "learning_rate": 2.1388286184784333e-07, "loss": 0.0519, "step": 43012 }, { "epoch": 0.9478039079585957, "grad_norm": 0.44276851415634155, "learning_rate": 2.1370276179366156e-07, "loss": 0.0357, "step": 43013 }, { "epoch": 0.9478259432481119, "grad_norm": 0.35225021839141846, "learning_rate": 2.135227370540177e-07, "loss": 0.0355, "step": 43014 }, { "epoch": 0.947847978537628, "grad_norm": 0.7884371876716614, "learning_rate": 2.1334278762983274e-07, "loss": 0.0611, "step": 43015 }, { "epoch": 0.9478700138271442, "grad_norm": 0.6711758971214294, "learning_rate": 2.1316291352202256e-07, "loss": 0.0516, "step": 43016 }, { "epoch": 0.9478920491166604, "grad_norm": 0.5281103253364563, "learning_rate": 2.1298311473150312e-07, "loss": 0.0753, "step": 43017 }, { "epoch": 0.9479140844061765, "grad_norm": 0.5847329497337341, "learning_rate": 2.1280339125919202e-07, "loss": 0.0498, "step": 43018 }, { "epoch": 0.9479361196956927, "grad_norm": 0.4039912223815918, "learning_rate": 2.126237431060002e-07, "loss": 0.0489, "step": 43019 }, { "epoch": 0.9479581549852089, "grad_norm": 0.7147493362426758, "learning_rate": 2.1244417027284692e-07, "loss": 0.108, "step": 43020 }, { "epoch": 0.947980190274725, "grad_norm": 0.640357494354248, "learning_rate": 2.122646727606431e-07, "loss": 0.0577, "step": 43021 }, { "epoch": 0.9480022255642412, "grad_norm": 0.7642323970794678, "learning_rate": 2.1208525057030637e-07, "loss": 0.0614, "step": 43022 }, { "epoch": 0.9480242608537572, "grad_norm": 0.5846445560455322, "learning_rate": 2.119059037027493e-07, "loss": 0.0615, "step": 43023 }, { "epoch": 0.9480462961432734, "grad_norm": 0.6685633659362793, "learning_rate": 2.1172663215888622e-07, "loss": 0.0579, "step": 43024 }, { "epoch": 0.9480683314327896, "grad_norm": 0.4781157374382019, "learning_rate": 2.11547435939628e-07, "loss": 0.0578, "step": 43025 }, { "epoch": 0.9480903667223057, "grad_norm": 0.42062193155288696, "learning_rate": 2.113683150458906e-07, "loss": 0.0688, "step": 43026 }, { "epoch": 0.9481124020118219, "grad_norm": 0.6342803835868835, "learning_rate": 2.111892694785833e-07, "loss": 0.0519, "step": 43027 }, { "epoch": 0.9481344373013381, "grad_norm": 1.0883814096450806, "learning_rate": 2.1101029923862036e-07, "loss": 0.0731, "step": 43028 }, { "epoch": 0.9481564725908542, "grad_norm": 0.7679498791694641, "learning_rate": 2.1083140432691107e-07, "loss": 0.0567, "step": 43029 }, { "epoch": 0.9481785078803704, "grad_norm": 0.48214420676231384, "learning_rate": 2.10652584744368e-07, "loss": 0.0492, "step": 43030 }, { "epoch": 0.9482005431698866, "grad_norm": 0.6656879186630249, "learning_rate": 2.1047384049190378e-07, "loss": 0.0537, "step": 43031 }, { "epoch": 0.9482225784594027, "grad_norm": 0.6536292433738708, "learning_rate": 2.10295171570426e-07, "loss": 0.0597, "step": 43032 }, { "epoch": 0.9482446137489189, "grad_norm": 0.5012789368629456, "learning_rate": 2.101165779808456e-07, "loss": 0.0594, "step": 43033 }, { "epoch": 0.948266649038435, "grad_norm": 0.7262697815895081, "learning_rate": 2.0993805972407353e-07, "loss": 0.0376, "step": 43034 }, { "epoch": 0.9482886843279512, "grad_norm": 0.39493894577026367, "learning_rate": 2.097596168010174e-07, "loss": 0.0434, "step": 43035 }, { "epoch": 0.9483107196174674, "grad_norm": 0.3497539758682251, "learning_rate": 2.0958124921258815e-07, "loss": 0.0402, "step": 43036 }, { "epoch": 0.9483327549069835, "grad_norm": 0.35945382714271545, "learning_rate": 2.0940295695969003e-07, "loss": 0.0856, "step": 43037 }, { "epoch": 0.9483547901964997, "grad_norm": 0.5450636744499207, "learning_rate": 2.0922474004323566e-07, "loss": 0.0376, "step": 43038 }, { "epoch": 0.9483768254860159, "grad_norm": 0.47638431191444397, "learning_rate": 2.0904659846412932e-07, "loss": 0.0607, "step": 43039 }, { "epoch": 0.948398860775532, "grad_norm": 0.4049592912197113, "learning_rate": 2.0886853222328195e-07, "loss": 0.0483, "step": 43040 }, { "epoch": 0.9484208960650482, "grad_norm": 0.37076452374458313, "learning_rate": 2.0869054132159949e-07, "loss": 0.0423, "step": 43041 }, { "epoch": 0.9484429313545644, "grad_norm": 0.960541307926178, "learning_rate": 2.085126257599862e-07, "loss": 0.0709, "step": 43042 }, { "epoch": 0.9484649666440805, "grad_norm": 0.628032922744751, "learning_rate": 2.0833478553935136e-07, "loss": 0.0408, "step": 43043 }, { "epoch": 0.9484870019335967, "grad_norm": 0.5831080675125122, "learning_rate": 2.0815702066059594e-07, "loss": 0.0507, "step": 43044 }, { "epoch": 0.9485090372231129, "grad_norm": 0.7952646017074585, "learning_rate": 2.0797933112463253e-07, "loss": 0.064, "step": 43045 }, { "epoch": 0.948531072512629, "grad_norm": 0.680242121219635, "learning_rate": 2.0780171693236204e-07, "loss": 0.0931, "step": 43046 }, { "epoch": 0.9485531078021452, "grad_norm": 0.6786580681800842, "learning_rate": 2.076241780846888e-07, "loss": 0.0501, "step": 43047 }, { "epoch": 0.9485751430916612, "grad_norm": 0.6018152832984924, "learning_rate": 2.0744671458251873e-07, "loss": 0.0698, "step": 43048 }, { "epoch": 0.9485971783811774, "grad_norm": 0.833837628364563, "learning_rate": 2.072693264267528e-07, "loss": 0.0764, "step": 43049 }, { "epoch": 0.9486192136706936, "grad_norm": 0.4305514097213745, "learning_rate": 2.0709201361829854e-07, "loss": 0.0539, "step": 43050 }, { "epoch": 0.9486412489602097, "grad_norm": 0.660926342010498, "learning_rate": 2.06914776158057e-07, "loss": 0.0566, "step": 43051 }, { "epoch": 0.9486632842497259, "grad_norm": 0.43932998180389404, "learning_rate": 2.067376140469307e-07, "loss": 0.0623, "step": 43052 }, { "epoch": 0.9486853195392421, "grad_norm": 0.6195969581604004, "learning_rate": 2.0656052728582232e-07, "loss": 0.0674, "step": 43053 }, { "epoch": 0.9487073548287582, "grad_norm": 0.6803669929504395, "learning_rate": 2.0638351587563608e-07, "loss": 0.0702, "step": 43054 }, { "epoch": 0.9487293901182744, "grad_norm": 0.3369102478027344, "learning_rate": 2.0620657981726964e-07, "loss": 0.0443, "step": 43055 }, { "epoch": 0.9487514254077906, "grad_norm": 0.4800693392753601, "learning_rate": 2.0602971911162561e-07, "loss": 0.0414, "step": 43056 }, { "epoch": 0.9487734606973067, "grad_norm": 0.6074110865592957, "learning_rate": 2.0585293375960655e-07, "loss": 0.0629, "step": 43057 }, { "epoch": 0.9487954959868229, "grad_norm": 0.7229354977607727, "learning_rate": 2.0567622376211182e-07, "loss": 0.0857, "step": 43058 }, { "epoch": 0.948817531276339, "grad_norm": 0.7226477265357971, "learning_rate": 2.054995891200423e-07, "loss": 0.0577, "step": 43059 }, { "epoch": 0.9488395665658552, "grad_norm": 0.42612186074256897, "learning_rate": 2.0532302983429396e-07, "loss": 0.072, "step": 43060 }, { "epoch": 0.9488616018553714, "grad_norm": 0.5546046495437622, "learning_rate": 2.0514654590577108e-07, "loss": 0.041, "step": 43061 }, { "epoch": 0.9488836371448875, "grad_norm": 0.4407677948474884, "learning_rate": 2.0497013733536963e-07, "loss": 0.04, "step": 43062 }, { "epoch": 0.9489056724344037, "grad_norm": 0.3413545489311218, "learning_rate": 2.0479380412398885e-07, "loss": 0.0552, "step": 43063 }, { "epoch": 0.9489277077239199, "grad_norm": 0.7005051970481873, "learning_rate": 2.0461754627252804e-07, "loss": 0.0593, "step": 43064 }, { "epoch": 0.948949743013436, "grad_norm": 0.621289849281311, "learning_rate": 2.0444136378188316e-07, "loss": 0.0739, "step": 43065 }, { "epoch": 0.9489717783029522, "grad_norm": 0.8063599467277527, "learning_rate": 2.0426525665295516e-07, "loss": 0.0578, "step": 43066 }, { "epoch": 0.9489938135924684, "grad_norm": 0.5926001071929932, "learning_rate": 2.0408922488663329e-07, "loss": 0.0447, "step": 43067 }, { "epoch": 0.9490158488819845, "grad_norm": 0.6759797930717468, "learning_rate": 2.0391326848382353e-07, "loss": 0.0582, "step": 43068 }, { "epoch": 0.9490378841715007, "grad_norm": 0.5982341170310974, "learning_rate": 2.0373738744541515e-07, "loss": 0.0461, "step": 43069 }, { "epoch": 0.9490599194610169, "grad_norm": 0.47690120339393616, "learning_rate": 2.035615817723091e-07, "loss": 0.0581, "step": 43070 }, { "epoch": 0.949081954750533, "grad_norm": 0.7318919897079468, "learning_rate": 2.0338585146539633e-07, "loss": 0.0658, "step": 43071 }, { "epoch": 0.9491039900400491, "grad_norm": 0.6086507439613342, "learning_rate": 2.032101965255745e-07, "loss": 0.0778, "step": 43072 }, { "epoch": 0.9491260253295652, "grad_norm": 0.5181049704551697, "learning_rate": 2.0303461695373782e-07, "loss": 0.0571, "step": 43073 }, { "epoch": 0.9491480606190814, "grad_norm": 0.5372576713562012, "learning_rate": 2.028591127507806e-07, "loss": 0.0637, "step": 43074 }, { "epoch": 0.9491700959085976, "grad_norm": 0.7935894727706909, "learning_rate": 2.026836839175955e-07, "loss": 0.0834, "step": 43075 }, { "epoch": 0.9491921311981137, "grad_norm": 0.7901080250740051, "learning_rate": 2.0250833045507844e-07, "loss": 0.0617, "step": 43076 }, { "epoch": 0.9492141664876299, "grad_norm": 0.6268485188484192, "learning_rate": 2.0233305236412368e-07, "loss": 0.0631, "step": 43077 }, { "epoch": 0.9492362017771461, "grad_norm": 0.381957471370697, "learning_rate": 2.021578496456189e-07, "loss": 0.042, "step": 43078 }, { "epoch": 0.9492582370666622, "grad_norm": 0.389830082654953, "learning_rate": 2.0198272230046e-07, "loss": 0.0438, "step": 43079 }, { "epoch": 0.9492802723561784, "grad_norm": 0.4619137644767761, "learning_rate": 2.0180767032953962e-07, "loss": 0.0526, "step": 43080 }, { "epoch": 0.9493023076456946, "grad_norm": 0.5360146760940552, "learning_rate": 2.0163269373374704e-07, "loss": 0.0527, "step": 43081 }, { "epoch": 0.9493243429352107, "grad_norm": 0.8703258633613586, "learning_rate": 2.0145779251397656e-07, "loss": 0.0758, "step": 43082 }, { "epoch": 0.9493463782247269, "grad_norm": 0.2928774356842041, "learning_rate": 2.0128296667111413e-07, "loss": 0.0494, "step": 43083 }, { "epoch": 0.9493684135142431, "grad_norm": 0.5633123517036438, "learning_rate": 2.0110821620605402e-07, "loss": 0.055, "step": 43084 }, { "epoch": 0.9493904488037592, "grad_norm": 0.20517827570438385, "learning_rate": 2.009335411196872e-07, "loss": 0.0235, "step": 43085 }, { "epoch": 0.9494124840932754, "grad_norm": 0.4894367456436157, "learning_rate": 2.0075894141289963e-07, "loss": 0.0627, "step": 43086 }, { "epoch": 0.9494345193827916, "grad_norm": 0.5931201577186584, "learning_rate": 2.0058441708658558e-07, "loss": 0.0463, "step": 43087 }, { "epoch": 0.9494565546723077, "grad_norm": 0.7870681285858154, "learning_rate": 2.0040996814162772e-07, "loss": 0.0545, "step": 43088 }, { "epoch": 0.9494785899618239, "grad_norm": 0.8002670407295227, "learning_rate": 2.0023559457892193e-07, "loss": 0.0619, "step": 43089 }, { "epoch": 0.94950062525134, "grad_norm": 0.6310071349143982, "learning_rate": 2.0006129639934922e-07, "loss": 0.0631, "step": 43090 }, { "epoch": 0.9495226605408562, "grad_norm": 0.6138166785240173, "learning_rate": 1.9988707360380222e-07, "loss": 0.0304, "step": 43091 }, { "epoch": 0.9495446958303724, "grad_norm": 0.56392902135849, "learning_rate": 1.9971292619316518e-07, "loss": 0.077, "step": 43092 }, { "epoch": 0.9495667311198885, "grad_norm": 0.4998040199279785, "learning_rate": 1.9953885416832906e-07, "loss": 0.0698, "step": 43093 }, { "epoch": 0.9495887664094047, "grad_norm": 0.800205647945404, "learning_rate": 1.9936485753017653e-07, "loss": 0.0599, "step": 43094 }, { "epoch": 0.9496108016989209, "grad_norm": 0.42298170924186707, "learning_rate": 1.9919093627959685e-07, "loss": 0.0452, "step": 43095 }, { "epoch": 0.949632836988437, "grad_norm": 0.8208634257316589, "learning_rate": 1.9901709041747263e-07, "loss": 0.0488, "step": 43096 }, { "epoch": 0.9496548722779531, "grad_norm": 0.787488579750061, "learning_rate": 1.9884331994469317e-07, "loss": 0.05, "step": 43097 }, { "epoch": 0.9496769075674693, "grad_norm": 0.616300106048584, "learning_rate": 1.9866962486214113e-07, "loss": 0.0437, "step": 43098 }, { "epoch": 0.9496989428569854, "grad_norm": 0.3208116590976715, "learning_rate": 1.9849600517070244e-07, "loss": 0.0318, "step": 43099 }, { "epoch": 0.9497209781465016, "grad_norm": 0.4679831862449646, "learning_rate": 1.9832246087125973e-07, "loss": 0.0703, "step": 43100 }, { "epoch": 0.9497430134360177, "grad_norm": 0.34291326999664307, "learning_rate": 1.9814899196469893e-07, "loss": 0.0372, "step": 43101 }, { "epoch": 0.9497650487255339, "grad_norm": 0.598770022392273, "learning_rate": 1.9797559845190105e-07, "loss": 0.0491, "step": 43102 }, { "epoch": 0.9497870840150501, "grad_norm": 0.6013936996459961, "learning_rate": 1.9780228033375202e-07, "loss": 0.0594, "step": 43103 }, { "epoch": 0.9498091193045662, "grad_norm": 0.7347452640533447, "learning_rate": 1.976290376111345e-07, "loss": 0.0627, "step": 43104 }, { "epoch": 0.9498311545940824, "grad_norm": 0.5089107751846313, "learning_rate": 1.9745587028492772e-07, "loss": 0.0605, "step": 43105 }, { "epoch": 0.9498531898835986, "grad_norm": 0.7055551409721375, "learning_rate": 1.972827783560177e-07, "loss": 0.0621, "step": 43106 }, { "epoch": 0.9498752251731147, "grad_norm": 0.642940104007721, "learning_rate": 1.9710976182528374e-07, "loss": 0.0518, "step": 43107 }, { "epoch": 0.9498972604626309, "grad_norm": 0.6510463356971741, "learning_rate": 1.969368206936084e-07, "loss": 0.0827, "step": 43108 }, { "epoch": 0.9499192957521471, "grad_norm": 0.6606161594390869, "learning_rate": 1.9676395496187105e-07, "loss": 0.0426, "step": 43109 }, { "epoch": 0.9499413310416632, "grad_norm": 0.9494019150733948, "learning_rate": 1.9659116463095262e-07, "loss": 0.0646, "step": 43110 }, { "epoch": 0.9499633663311794, "grad_norm": 0.730324923992157, "learning_rate": 1.9641844970173405e-07, "loss": 0.0511, "step": 43111 }, { "epoch": 0.9499854016206956, "grad_norm": 0.7440062761306763, "learning_rate": 1.9624581017509468e-07, "loss": 0.0758, "step": 43112 }, { "epoch": 0.9500074369102117, "grad_norm": 0.5542445778846741, "learning_rate": 1.9607324605191378e-07, "loss": 0.0504, "step": 43113 }, { "epoch": 0.9500294721997279, "grad_norm": 0.7567926049232483, "learning_rate": 1.9590075733307066e-07, "loss": 0.0623, "step": 43114 }, { "epoch": 0.950051507489244, "grad_norm": 0.545511782169342, "learning_rate": 1.9572834401944462e-07, "loss": 0.0621, "step": 43115 }, { "epoch": 0.9500735427787602, "grad_norm": 0.4794572591781616, "learning_rate": 1.955560061119116e-07, "loss": 0.0272, "step": 43116 }, { "epoch": 0.9500955780682764, "grad_norm": 0.5906600952148438, "learning_rate": 1.9538374361134925e-07, "loss": 0.0593, "step": 43117 }, { "epoch": 0.9501176133577925, "grad_norm": 0.4804651439189911, "learning_rate": 1.9521155651863853e-07, "loss": 0.0461, "step": 43118 }, { "epoch": 0.9501396486473087, "grad_norm": 0.787164032459259, "learning_rate": 1.9503944483465208e-07, "loss": 0.0682, "step": 43119 }, { "epoch": 0.9501616839368249, "grad_norm": 0.46528416872024536, "learning_rate": 1.9486740856027086e-07, "loss": 0.0637, "step": 43120 }, { "epoch": 0.950183719226341, "grad_norm": 0.5218616127967834, "learning_rate": 1.9469544769636915e-07, "loss": 0.0503, "step": 43121 }, { "epoch": 0.9502057545158571, "grad_norm": 0.5867910385131836, "learning_rate": 1.945235622438213e-07, "loss": 0.0731, "step": 43122 }, { "epoch": 0.9502277898053733, "grad_norm": 0.5116632580757141, "learning_rate": 1.9435175220350487e-07, "loss": 0.0872, "step": 43123 }, { "epoch": 0.9502498250948894, "grad_norm": 0.5611277222633362, "learning_rate": 1.941800175762942e-07, "loss": 0.0538, "step": 43124 }, { "epoch": 0.9502718603844056, "grad_norm": 0.5608580112457275, "learning_rate": 1.940083583630653e-07, "loss": 0.0664, "step": 43125 }, { "epoch": 0.9502938956739218, "grad_norm": 0.7550959587097168, "learning_rate": 1.9383677456468906e-07, "loss": 0.0641, "step": 43126 }, { "epoch": 0.9503159309634379, "grad_norm": 0.8978824019432068, "learning_rate": 1.9366526618204484e-07, "loss": 0.0707, "step": 43127 }, { "epoch": 0.9503379662529541, "grad_norm": 0.6572155952453613, "learning_rate": 1.934938332160019e-07, "loss": 0.0728, "step": 43128 }, { "epoch": 0.9503600015424702, "grad_norm": 0.5156523585319519, "learning_rate": 1.9332247566743456e-07, "loss": 0.0483, "step": 43129 }, { "epoch": 0.9503820368319864, "grad_norm": 0.6487200260162354, "learning_rate": 1.9315119353721545e-07, "loss": 0.0533, "step": 43130 }, { "epoch": 0.9504040721215026, "grad_norm": 0.4124745726585388, "learning_rate": 1.929799868262172e-07, "loss": 0.0529, "step": 43131 }, { "epoch": 0.9504261074110187, "grad_norm": 0.611957311630249, "learning_rate": 1.9280885553531245e-07, "loss": 0.0799, "step": 43132 }, { "epoch": 0.9504481427005349, "grad_norm": 1.1907094717025757, "learning_rate": 1.926377996653722e-07, "loss": 0.0826, "step": 43133 }, { "epoch": 0.9504701779900511, "grad_norm": 0.3579102158546448, "learning_rate": 1.9246681921726906e-07, "loss": 0.0439, "step": 43134 }, { "epoch": 0.9504922132795672, "grad_norm": 0.9968783855438232, "learning_rate": 1.922959141918723e-07, "loss": 0.0887, "step": 43135 }, { "epoch": 0.9505142485690834, "grad_norm": 0.6502405405044556, "learning_rate": 1.9212508459005297e-07, "loss": 0.0614, "step": 43136 }, { "epoch": 0.9505362838585996, "grad_norm": 0.32991012930870056, "learning_rate": 1.9195433041268195e-07, "loss": 0.0386, "step": 43137 }, { "epoch": 0.9505583191481157, "grad_norm": 0.8745673298835754, "learning_rate": 1.9178365166062696e-07, "loss": 0.0669, "step": 43138 }, { "epoch": 0.9505803544376319, "grad_norm": 0.5333910584449768, "learning_rate": 1.916130483347589e-07, "loss": 0.0473, "step": 43139 }, { "epoch": 0.9506023897271481, "grad_norm": 0.572176992893219, "learning_rate": 1.9144252043594545e-07, "loss": 0.0572, "step": 43140 }, { "epoch": 0.9506244250166642, "grad_norm": 0.8325896263122559, "learning_rate": 1.912720679650576e-07, "loss": 0.0533, "step": 43141 }, { "epoch": 0.9506464603061804, "grad_norm": 0.48869240283966064, "learning_rate": 1.9110169092296125e-07, "loss": 0.0454, "step": 43142 }, { "epoch": 0.9506684955956965, "grad_norm": 0.6650786995887756, "learning_rate": 1.9093138931052745e-07, "loss": 0.0582, "step": 43143 }, { "epoch": 0.9506905308852127, "grad_norm": 0.7655796408653259, "learning_rate": 1.907611631286188e-07, "loss": 0.0823, "step": 43144 }, { "epoch": 0.9507125661747289, "grad_norm": 0.5991787910461426, "learning_rate": 1.9059101237810628e-07, "loss": 0.0553, "step": 43145 }, { "epoch": 0.950734601464245, "grad_norm": 0.48485898971557617, "learning_rate": 1.904209370598542e-07, "loss": 0.0607, "step": 43146 }, { "epoch": 0.9507566367537611, "grad_norm": 0.7723761796951294, "learning_rate": 1.9025093717473018e-07, "loss": 0.0455, "step": 43147 }, { "epoch": 0.9507786720432773, "grad_norm": 0.6454999446868896, "learning_rate": 1.9008101272360025e-07, "loss": 0.0607, "step": 43148 }, { "epoch": 0.9508007073327934, "grad_norm": 0.5084917545318604, "learning_rate": 1.8991116370732865e-07, "loss": 0.0813, "step": 43149 }, { "epoch": 0.9508227426223096, "grad_norm": 0.7344123721122742, "learning_rate": 1.8974139012678137e-07, "loss": 0.0491, "step": 43150 }, { "epoch": 0.9508447779118258, "grad_norm": 0.9592769742012024, "learning_rate": 1.8957169198282441e-07, "loss": 0.0711, "step": 43151 }, { "epoch": 0.9508668132013419, "grad_norm": 1.1344571113586426, "learning_rate": 1.8940206927631875e-07, "loss": 0.0633, "step": 43152 }, { "epoch": 0.9508888484908581, "grad_norm": 0.552609920501709, "learning_rate": 1.8923252200813202e-07, "loss": 0.0338, "step": 43153 }, { "epoch": 0.9509108837803742, "grad_norm": 0.6859228610992432, "learning_rate": 1.8906305017912518e-07, "loss": 0.0527, "step": 43154 }, { "epoch": 0.9509329190698904, "grad_norm": 0.8574332594871521, "learning_rate": 1.8889365379016256e-07, "loss": 0.0682, "step": 43155 }, { "epoch": 0.9509549543594066, "grad_norm": 0.6483252644538879, "learning_rate": 1.8872433284210677e-07, "loss": 0.0792, "step": 43156 }, { "epoch": 0.9509769896489227, "grad_norm": 0.3523779809474945, "learning_rate": 1.8855508733582215e-07, "loss": 0.0678, "step": 43157 }, { "epoch": 0.9509990249384389, "grad_norm": 0.5963972806930542, "learning_rate": 1.8838591727216804e-07, "loss": 0.0554, "step": 43158 }, { "epoch": 0.9510210602279551, "grad_norm": 0.44387492537498474, "learning_rate": 1.8821682265200534e-07, "loss": 0.0452, "step": 43159 }, { "epoch": 0.9510430955174712, "grad_norm": 0.8576247096061707, "learning_rate": 1.880478034762001e-07, "loss": 0.0675, "step": 43160 }, { "epoch": 0.9510651308069874, "grad_norm": 0.3973507583141327, "learning_rate": 1.8787885974560825e-07, "loss": 0.0621, "step": 43161 }, { "epoch": 0.9510871660965036, "grad_norm": 0.9224099516868591, "learning_rate": 1.8770999146109414e-07, "loss": 0.0848, "step": 43162 }, { "epoch": 0.9511092013860197, "grad_norm": 0.7183550596237183, "learning_rate": 1.8754119862351205e-07, "loss": 0.0545, "step": 43163 }, { "epoch": 0.9511312366755359, "grad_norm": 0.3711816668510437, "learning_rate": 1.8737248123372963e-07, "loss": 0.0633, "step": 43164 }, { "epoch": 0.9511532719650521, "grad_norm": 0.4639951288700104, "learning_rate": 1.8720383929259955e-07, "loss": 0.0448, "step": 43165 }, { "epoch": 0.9511753072545682, "grad_norm": 0.6153334975242615, "learning_rate": 1.8703527280098608e-07, "loss": 0.0652, "step": 43166 }, { "epoch": 0.9511973425440844, "grad_norm": 0.5564680099487305, "learning_rate": 1.8686678175974358e-07, "loss": 0.0589, "step": 43167 }, { "epoch": 0.9512193778336006, "grad_norm": 0.8589546084403992, "learning_rate": 1.8669836616973301e-07, "loss": 0.0578, "step": 43168 }, { "epoch": 0.9512414131231167, "grad_norm": 0.521250307559967, "learning_rate": 1.8653002603181035e-07, "loss": 0.0637, "step": 43169 }, { "epoch": 0.9512634484126329, "grad_norm": 0.6686290502548218, "learning_rate": 1.8636176134683492e-07, "loss": 0.0622, "step": 43170 }, { "epoch": 0.9512854837021489, "grad_norm": 1.1434237957000732, "learning_rate": 1.8619357211566101e-07, "loss": 0.0793, "step": 43171 }, { "epoch": 0.9513075189916651, "grad_norm": 0.6218320727348328, "learning_rate": 1.8602545833914796e-07, "loss": 0.0663, "step": 43172 }, { "epoch": 0.9513295542811813, "grad_norm": 0.7422053813934326, "learning_rate": 1.8585742001815177e-07, "loss": 0.0431, "step": 43173 }, { "epoch": 0.9513515895706974, "grad_norm": 1.0231248140335083, "learning_rate": 1.8568945715352669e-07, "loss": 0.0737, "step": 43174 }, { "epoch": 0.9513736248602136, "grad_norm": 0.6728469729423523, "learning_rate": 1.8552156974613045e-07, "loss": 0.0712, "step": 43175 }, { "epoch": 0.9513956601497298, "grad_norm": 0.7343281507492065, "learning_rate": 1.8535375779681562e-07, "loss": 0.05, "step": 43176 }, { "epoch": 0.9514176954392459, "grad_norm": 0.3951411545276642, "learning_rate": 1.8518602130643992e-07, "loss": 0.0359, "step": 43177 }, { "epoch": 0.9514397307287621, "grad_norm": 1.021177887916565, "learning_rate": 1.8501836027585428e-07, "loss": 0.0744, "step": 43178 }, { "epoch": 0.9514617660182783, "grad_norm": 0.3844291567802429, "learning_rate": 1.848507747059147e-07, "loss": 0.0695, "step": 43179 }, { "epoch": 0.9514838013077944, "grad_norm": 1.0781995058059692, "learning_rate": 1.846832645974772e-07, "loss": 0.0842, "step": 43180 }, { "epoch": 0.9515058365973106, "grad_norm": 0.7079887986183167, "learning_rate": 1.845158299513894e-07, "loss": 0.055, "step": 43181 }, { "epoch": 0.9515278718868267, "grad_norm": 0.648684024810791, "learning_rate": 1.843484707685089e-07, "loss": 0.0503, "step": 43182 }, { "epoch": 0.9515499071763429, "grad_norm": 0.8612909913063049, "learning_rate": 1.8418118704968678e-07, "loss": 0.103, "step": 43183 }, { "epoch": 0.9515719424658591, "grad_norm": 0.22443385422229767, "learning_rate": 1.8401397879577398e-07, "loss": 0.0464, "step": 43184 }, { "epoch": 0.9515939777553752, "grad_norm": 0.6286749839782715, "learning_rate": 1.8384684600762313e-07, "loss": 0.0584, "step": 43185 }, { "epoch": 0.9516160130448914, "grad_norm": 0.3742391765117645, "learning_rate": 1.836797886860836e-07, "loss": 0.0326, "step": 43186 }, { "epoch": 0.9516380483344076, "grad_norm": 0.3781905472278595, "learning_rate": 1.8351280683201132e-07, "loss": 0.055, "step": 43187 }, { "epoch": 0.9516600836239237, "grad_norm": 0.8376027345657349, "learning_rate": 1.8334590044625067e-07, "loss": 0.0645, "step": 43188 }, { "epoch": 0.9516821189134399, "grad_norm": 0.6141937971115112, "learning_rate": 1.8317906952965758e-07, "loss": 0.054, "step": 43189 }, { "epoch": 0.9517041542029561, "grad_norm": 0.35086148977279663, "learning_rate": 1.8301231408307638e-07, "loss": 0.0698, "step": 43190 }, { "epoch": 0.9517261894924722, "grad_norm": 0.4154853820800781, "learning_rate": 1.8284563410735978e-07, "loss": 0.0362, "step": 43191 }, { "epoch": 0.9517482247819884, "grad_norm": 0.8225875496864319, "learning_rate": 1.8267902960335538e-07, "loss": 0.0491, "step": 43192 }, { "epoch": 0.9517702600715046, "grad_norm": 0.6042379140853882, "learning_rate": 1.825125005719125e-07, "loss": 0.0553, "step": 43193 }, { "epoch": 0.9517922953610207, "grad_norm": 0.17621289193630219, "learning_rate": 1.8234604701388046e-07, "loss": 0.0534, "step": 43194 }, { "epoch": 0.9518143306505369, "grad_norm": 0.7980808615684509, "learning_rate": 1.8217966893010363e-07, "loss": 0.0573, "step": 43195 }, { "epoch": 0.9518363659400529, "grad_norm": 0.5436545610427856, "learning_rate": 1.820133663214346e-07, "loss": 0.0395, "step": 43196 }, { "epoch": 0.9518584012295691, "grad_norm": 0.5895307660102844, "learning_rate": 1.8184713918871442e-07, "loss": 0.0599, "step": 43197 }, { "epoch": 0.9518804365190853, "grad_norm": 0.4434468150138855, "learning_rate": 1.8168098753279572e-07, "loss": 0.049, "step": 43198 }, { "epoch": 0.9519024718086014, "grad_norm": 0.3233889043331146, "learning_rate": 1.8151491135452113e-07, "loss": 0.0455, "step": 43199 }, { "epoch": 0.9519245070981176, "grad_norm": 0.584099292755127, "learning_rate": 1.8134891065473668e-07, "loss": 0.0568, "step": 43200 }, { "epoch": 0.9519465423876338, "grad_norm": 0.5496185421943665, "learning_rate": 1.8118298543428834e-07, "loss": 0.0503, "step": 43201 }, { "epoch": 0.9519685776771499, "grad_norm": 0.4154147803783417, "learning_rate": 1.810171356940221e-07, "loss": 0.0384, "step": 43202 }, { "epoch": 0.9519906129666661, "grad_norm": 0.7064488530158997, "learning_rate": 1.8085136143478232e-07, "loss": 0.0679, "step": 43203 }, { "epoch": 0.9520126482561823, "grad_norm": 0.38725265860557556, "learning_rate": 1.8068566265741327e-07, "loss": 0.0622, "step": 43204 }, { "epoch": 0.9520346835456984, "grad_norm": 0.5249006748199463, "learning_rate": 1.8052003936275762e-07, "loss": 0.0578, "step": 43205 }, { "epoch": 0.9520567188352146, "grad_norm": 0.9719640612602234, "learning_rate": 1.803544915516614e-07, "loss": 0.0622, "step": 43206 }, { "epoch": 0.9520787541247308, "grad_norm": 0.5963620543479919, "learning_rate": 1.8018901922496723e-07, "loss": 0.0376, "step": 43207 }, { "epoch": 0.9521007894142469, "grad_norm": 0.5053498148918152, "learning_rate": 1.8002362238351778e-07, "loss": 0.0542, "step": 43208 }, { "epoch": 0.9521228247037631, "grad_norm": 1.057934045791626, "learning_rate": 1.7985830102815236e-07, "loss": 0.061, "step": 43209 }, { "epoch": 0.9521448599932792, "grad_norm": 0.5848400592803955, "learning_rate": 1.7969305515971868e-07, "loss": 0.0576, "step": 43210 }, { "epoch": 0.9521668952827954, "grad_norm": 0.5945714116096497, "learning_rate": 1.7952788477905437e-07, "loss": 0.0616, "step": 43211 }, { "epoch": 0.9521889305723116, "grad_norm": 0.6184104681015015, "learning_rate": 1.7936278988700204e-07, "loss": 0.0713, "step": 43212 }, { "epoch": 0.9522109658618277, "grad_norm": 0.68960040807724, "learning_rate": 1.7919777048440112e-07, "loss": 0.0414, "step": 43213 }, { "epoch": 0.9522330011513439, "grad_norm": 0.531453013420105, "learning_rate": 1.7903282657209586e-07, "loss": 0.0611, "step": 43214 }, { "epoch": 0.9522550364408601, "grad_norm": 0.45046308636665344, "learning_rate": 1.7886795815092228e-07, "loss": 0.0539, "step": 43215 }, { "epoch": 0.9522770717303762, "grad_norm": 0.8565431237220764, "learning_rate": 1.7870316522172302e-07, "loss": 0.0568, "step": 43216 }, { "epoch": 0.9522991070198924, "grad_norm": 0.6742684245109558, "learning_rate": 1.785384477853358e-07, "loss": 0.0672, "step": 43217 }, { "epoch": 0.9523211423094086, "grad_norm": 1.0446375608444214, "learning_rate": 1.7837380584259988e-07, "loss": 0.0639, "step": 43218 }, { "epoch": 0.9523431775989247, "grad_norm": 0.5604135990142822, "learning_rate": 1.7820923939435464e-07, "loss": 0.0736, "step": 43219 }, { "epoch": 0.9523652128884409, "grad_norm": 0.5582895874977112, "learning_rate": 1.780447484414377e-07, "loss": 0.0538, "step": 43220 }, { "epoch": 0.952387248177957, "grad_norm": 0.6196373701095581, "learning_rate": 1.7788033298468675e-07, "loss": 0.0638, "step": 43221 }, { "epoch": 0.9524092834674731, "grad_norm": 0.7391279935836792, "learning_rate": 1.7771599302493946e-07, "loss": 0.0498, "step": 43222 }, { "epoch": 0.9524313187569893, "grad_norm": 0.7617339491844177, "learning_rate": 1.775517285630335e-07, "loss": 0.0432, "step": 43223 }, { "epoch": 0.9524533540465054, "grad_norm": 0.6757375597953796, "learning_rate": 1.773875395998048e-07, "loss": 0.0471, "step": 43224 }, { "epoch": 0.9524753893360216, "grad_norm": 0.8552606105804443, "learning_rate": 1.7722342613608943e-07, "loss": 0.0439, "step": 43225 }, { "epoch": 0.9524974246255378, "grad_norm": 0.6193491220474243, "learning_rate": 1.7705938817272505e-07, "loss": 0.073, "step": 43226 }, { "epoch": 0.9525194599150539, "grad_norm": 0.5885290503501892, "learning_rate": 1.7689542571054595e-07, "loss": 0.0678, "step": 43227 }, { "epoch": 0.9525414952045701, "grad_norm": 0.4269940257072449, "learning_rate": 1.7673153875038484e-07, "loss": 0.071, "step": 43228 }, { "epoch": 0.9525635304940863, "grad_norm": 0.860706090927124, "learning_rate": 1.76567727293081e-07, "loss": 0.0663, "step": 43229 }, { "epoch": 0.9525855657836024, "grad_norm": 0.7069069743156433, "learning_rate": 1.764039913394655e-07, "loss": 0.05, "step": 43230 }, { "epoch": 0.9526076010731186, "grad_norm": 0.7619704008102417, "learning_rate": 1.7624033089037427e-07, "loss": 0.0687, "step": 43231 }, { "epoch": 0.9526296363626348, "grad_norm": 0.8405818939208984, "learning_rate": 1.7607674594664002e-07, "loss": 0.0614, "step": 43232 }, { "epoch": 0.9526516716521509, "grad_norm": 0.49631428718566895, "learning_rate": 1.759132365090971e-07, "loss": 0.0403, "step": 43233 }, { "epoch": 0.9526737069416671, "grad_norm": 0.6819259524345398, "learning_rate": 1.7574980257857477e-07, "loss": 0.0684, "step": 43234 }, { "epoch": 0.9526957422311833, "grad_norm": 0.9273648262023926, "learning_rate": 1.7558644415591075e-07, "loss": 0.0889, "step": 43235 }, { "epoch": 0.9527177775206994, "grad_norm": 0.9423943161964417, "learning_rate": 1.754231612419327e-07, "loss": 0.086, "step": 43236 }, { "epoch": 0.9527398128102156, "grad_norm": 0.581746518611908, "learning_rate": 1.7525995383747496e-07, "loss": 0.0517, "step": 43237 }, { "epoch": 0.9527618480997317, "grad_norm": 0.9012511968612671, "learning_rate": 1.7509682194336686e-07, "loss": 0.1015, "step": 43238 }, { "epoch": 0.9527838833892479, "grad_norm": 0.8042105436325073, "learning_rate": 1.749337655604427e-07, "loss": 0.0441, "step": 43239 }, { "epoch": 0.9528059186787641, "grad_norm": 1.013182520866394, "learning_rate": 1.7477078468952855e-07, "loss": 0.0705, "step": 43240 }, { "epoch": 0.9528279539682802, "grad_norm": 0.4665723145008087, "learning_rate": 1.7460787933145705e-07, "loss": 0.0417, "step": 43241 }, { "epoch": 0.9528499892577964, "grad_norm": 0.5805286765098572, "learning_rate": 1.7444504948705918e-07, "loss": 0.0732, "step": 43242 }, { "epoch": 0.9528720245473126, "grad_norm": 0.687267005443573, "learning_rate": 1.7428229515715932e-07, "loss": 0.0831, "step": 43243 }, { "epoch": 0.9528940598368287, "grad_norm": 0.43975719809532166, "learning_rate": 1.7411961634259344e-07, "loss": 0.036, "step": 43244 }, { "epoch": 0.9529160951263448, "grad_norm": 0.5906179547309875, "learning_rate": 1.739570130441842e-07, "loss": 0.0381, "step": 43245 }, { "epoch": 0.952938130415861, "grad_norm": 0.7291503548622131, "learning_rate": 1.7379448526276432e-07, "loss": 0.0584, "step": 43246 }, { "epoch": 0.9529601657053771, "grad_norm": 0.6298401355743408, "learning_rate": 1.736320329991581e-07, "loss": 0.0586, "step": 43247 }, { "epoch": 0.9529822009948933, "grad_norm": 0.8248572945594788, "learning_rate": 1.7346965625419485e-07, "loss": 0.1101, "step": 43248 }, { "epoch": 0.9530042362844094, "grad_norm": 0.48466625809669495, "learning_rate": 1.7330735502870232e-07, "loss": 0.0496, "step": 43249 }, { "epoch": 0.9530262715739256, "grad_norm": 0.5937530398368835, "learning_rate": 1.731451293235048e-07, "loss": 0.0598, "step": 43250 }, { "epoch": 0.9530483068634418, "grad_norm": 0.5121186971664429, "learning_rate": 1.7298297913942995e-07, "loss": 0.0466, "step": 43251 }, { "epoch": 0.9530703421529579, "grad_norm": 0.20348066091537476, "learning_rate": 1.728209044773038e-07, "loss": 0.0506, "step": 43252 }, { "epoch": 0.9530923774424741, "grad_norm": 0.7150408625602722, "learning_rate": 1.7265890533795237e-07, "loss": 0.0488, "step": 43253 }, { "epoch": 0.9531144127319903, "grad_norm": 0.5152971148490906, "learning_rate": 1.7249698172219995e-07, "loss": 0.0718, "step": 43254 }, { "epoch": 0.9531364480215064, "grad_norm": 0.6588892936706543, "learning_rate": 1.7233513363087095e-07, "loss": 0.0478, "step": 43255 }, { "epoch": 0.9531584833110226, "grad_norm": 0.6439567804336548, "learning_rate": 1.7217336106479133e-07, "loss": 0.0423, "step": 43256 }, { "epoch": 0.9531805186005388, "grad_norm": 0.5370121002197266, "learning_rate": 1.7201166402478375e-07, "loss": 0.0473, "step": 43257 }, { "epoch": 0.9532025538900549, "grad_norm": 0.3602222800254822, "learning_rate": 1.7185004251167258e-07, "loss": 0.0639, "step": 43258 }, { "epoch": 0.9532245891795711, "grad_norm": 0.41117966175079346, "learning_rate": 1.7168849652627882e-07, "loss": 0.0501, "step": 43259 }, { "epoch": 0.9532466244690873, "grad_norm": 0.439618319272995, "learning_rate": 1.715270260694285e-07, "loss": 0.0384, "step": 43260 }, { "epoch": 0.9532686597586034, "grad_norm": 1.0184319019317627, "learning_rate": 1.713656311419426e-07, "loss": 0.0655, "step": 43261 }, { "epoch": 0.9532906950481196, "grad_norm": 0.46786797046661377, "learning_rate": 1.7120431174464214e-07, "loss": 0.0733, "step": 43262 }, { "epoch": 0.9533127303376357, "grad_norm": 0.09029605984687805, "learning_rate": 1.710430678783531e-07, "loss": 0.0362, "step": 43263 }, { "epoch": 0.9533347656271519, "grad_norm": 0.9675895571708679, "learning_rate": 1.7088189954388987e-07, "loss": 0.075, "step": 43264 }, { "epoch": 0.9533568009166681, "grad_norm": 0.289269357919693, "learning_rate": 1.7072080674208013e-07, "loss": 0.0513, "step": 43265 }, { "epoch": 0.9533788362061842, "grad_norm": 0.8329217433929443, "learning_rate": 1.7055978947373818e-07, "loss": 0.0689, "step": 43266 }, { "epoch": 0.9534008714957004, "grad_norm": 0.6378668546676636, "learning_rate": 1.7039884773969007e-07, "loss": 0.0747, "step": 43267 }, { "epoch": 0.9534229067852166, "grad_norm": 0.8007285594940186, "learning_rate": 1.7023798154075177e-07, "loss": 0.0657, "step": 43268 }, { "epoch": 0.9534449420747327, "grad_norm": 0.9433923959732056, "learning_rate": 1.70077190877746e-07, "loss": 0.0662, "step": 43269 }, { "epoch": 0.9534669773642488, "grad_norm": 0.27744805812835693, "learning_rate": 1.6991647575148705e-07, "loss": 0.0489, "step": 43270 }, { "epoch": 0.953489012653765, "grad_norm": 0.5797786712646484, "learning_rate": 1.69755836162796e-07, "loss": 0.0454, "step": 43271 }, { "epoch": 0.9535110479432811, "grad_norm": 0.5483287572860718, "learning_rate": 1.6959527211249215e-07, "loss": 0.064, "step": 43272 }, { "epoch": 0.9535330832327973, "grad_norm": 0.6249915957450867, "learning_rate": 1.6943478360139153e-07, "loss": 0.064, "step": 43273 }, { "epoch": 0.9535551185223134, "grad_norm": 0.4163910448551178, "learning_rate": 1.6927437063031182e-07, "loss": 0.0408, "step": 43274 }, { "epoch": 0.9535771538118296, "grad_norm": 0.43788230419158936, "learning_rate": 1.6911403320007236e-07, "loss": 0.0622, "step": 43275 }, { "epoch": 0.9535991891013458, "grad_norm": 0.6164421439170837, "learning_rate": 1.6895377131148748e-07, "loss": 0.0599, "step": 43276 }, { "epoch": 0.9536212243908619, "grad_norm": 0.6608526110649109, "learning_rate": 1.6879358496537324e-07, "loss": 0.077, "step": 43277 }, { "epoch": 0.9536432596803781, "grad_norm": 0.40040016174316406, "learning_rate": 1.686334741625456e-07, "loss": 0.0506, "step": 43278 }, { "epoch": 0.9536652949698943, "grad_norm": 0.36864691972732544, "learning_rate": 1.6847343890382394e-07, "loss": 0.0587, "step": 43279 }, { "epoch": 0.9536873302594104, "grad_norm": 1.019426703453064, "learning_rate": 1.683134791900176e-07, "loss": 0.0624, "step": 43280 }, { "epoch": 0.9537093655489266, "grad_norm": 0.8386793732643127, "learning_rate": 1.6815359502194593e-07, "loss": 0.074, "step": 43281 }, { "epoch": 0.9537314008384428, "grad_norm": 0.5357176661491394, "learning_rate": 1.6799378640041996e-07, "loss": 0.0744, "step": 43282 }, { "epoch": 0.9537534361279589, "grad_norm": 0.4571516513824463, "learning_rate": 1.6783405332625568e-07, "loss": 0.0391, "step": 43283 }, { "epoch": 0.9537754714174751, "grad_norm": 0.6328397989273071, "learning_rate": 1.6767439580026577e-07, "loss": 0.0505, "step": 43284 }, { "epoch": 0.9537975067069913, "grad_norm": 0.6895573735237122, "learning_rate": 1.675148138232646e-07, "loss": 0.0595, "step": 43285 }, { "epoch": 0.9538195419965074, "grad_norm": 0.6965829133987427, "learning_rate": 1.6735530739606486e-07, "loss": 0.0748, "step": 43286 }, { "epoch": 0.9538415772860236, "grad_norm": 0.8265507817268372, "learning_rate": 1.6719587651947753e-07, "loss": 0.0497, "step": 43287 }, { "epoch": 0.9538636125755398, "grad_norm": 0.8886640071868896, "learning_rate": 1.67036521194317e-07, "loss": 0.103, "step": 43288 }, { "epoch": 0.9538856478650559, "grad_norm": 0.6716558337211609, "learning_rate": 1.6687724142139094e-07, "loss": 0.0789, "step": 43289 }, { "epoch": 0.9539076831545721, "grad_norm": 0.5483391880989075, "learning_rate": 1.6671803720151536e-07, "loss": 0.0386, "step": 43290 }, { "epoch": 0.9539297184440882, "grad_norm": 0.4583407938480377, "learning_rate": 1.6655890853549792e-07, "loss": 0.0415, "step": 43291 }, { "epoch": 0.9539517537336044, "grad_norm": 0.5568239688873291, "learning_rate": 1.6639985542415136e-07, "loss": 0.057, "step": 43292 }, { "epoch": 0.9539737890231206, "grad_norm": 0.9569032192230225, "learning_rate": 1.6624087786828335e-07, "loss": 0.0647, "step": 43293 }, { "epoch": 0.9539958243126367, "grad_norm": 0.6018470525741577, "learning_rate": 1.6608197586870487e-07, "loss": 0.0655, "step": 43294 }, { "epoch": 0.9540178596021528, "grad_norm": 0.6824538707733154, "learning_rate": 1.6592314942622699e-07, "loss": 0.0472, "step": 43295 }, { "epoch": 0.954039894891669, "grad_norm": 0.3431165814399719, "learning_rate": 1.6576439854165736e-07, "loss": 0.0436, "step": 43296 }, { "epoch": 0.9540619301811851, "grad_norm": 0.2052861899137497, "learning_rate": 1.656057232158037e-07, "loss": 0.0483, "step": 43297 }, { "epoch": 0.9540839654707013, "grad_norm": 0.5954704880714417, "learning_rate": 1.6544712344947365e-07, "loss": 0.0528, "step": 43298 }, { "epoch": 0.9541060007602175, "grad_norm": 0.5968533754348755, "learning_rate": 1.652885992434783e-07, "loss": 0.0638, "step": 43299 }, { "epoch": 0.9541280360497336, "grad_norm": 0.6515912413597107, "learning_rate": 1.651301505986219e-07, "loss": 0.0559, "step": 43300 }, { "epoch": 0.9541500713392498, "grad_norm": 0.8949208855628967, "learning_rate": 1.6497177751571224e-07, "loss": 0.0831, "step": 43301 }, { "epoch": 0.954172106628766, "grad_norm": 0.46654656529426575, "learning_rate": 1.648134799955586e-07, "loss": 0.042, "step": 43302 }, { "epoch": 0.9541941419182821, "grad_norm": 0.564786434173584, "learning_rate": 1.6465525803896375e-07, "loss": 0.0427, "step": 43303 }, { "epoch": 0.9542161772077983, "grad_norm": 0.7060444951057434, "learning_rate": 1.644971116467353e-07, "loss": 0.0738, "step": 43304 }, { "epoch": 0.9542382124973144, "grad_norm": 0.6266140341758728, "learning_rate": 1.6433904081967766e-07, "loss": 0.0408, "step": 43305 }, { "epoch": 0.9542602477868306, "grad_norm": 0.668623149394989, "learning_rate": 1.6418104555859847e-07, "loss": 0.0441, "step": 43306 }, { "epoch": 0.9542822830763468, "grad_norm": 0.205050528049469, "learning_rate": 1.640231258642988e-07, "loss": 0.072, "step": 43307 }, { "epoch": 0.9543043183658629, "grad_norm": 0.8435755372047424, "learning_rate": 1.638652817375863e-07, "loss": 0.0731, "step": 43308 }, { "epoch": 0.9543263536553791, "grad_norm": 0.8618462085723877, "learning_rate": 1.637075131792637e-07, "loss": 0.0608, "step": 43309 }, { "epoch": 0.9543483889448953, "grad_norm": 0.7599037289619446, "learning_rate": 1.6354982019013366e-07, "loss": 0.0497, "step": 43310 }, { "epoch": 0.9543704242344114, "grad_norm": 0.6025205254554749, "learning_rate": 1.633922027710022e-07, "loss": 0.0588, "step": 43311 }, { "epoch": 0.9543924595239276, "grad_norm": 0.32630589604377747, "learning_rate": 1.632346609226687e-07, "loss": 0.0502, "step": 43312 }, { "epoch": 0.9544144948134438, "grad_norm": 0.5434841513633728, "learning_rate": 1.6307719464593752e-07, "loss": 0.0761, "step": 43313 }, { "epoch": 0.9544365301029599, "grad_norm": 0.413590669631958, "learning_rate": 1.6291980394160966e-07, "loss": 0.0384, "step": 43314 }, { "epoch": 0.9544585653924761, "grad_norm": 0.6191744208335876, "learning_rate": 1.627624888104895e-07, "loss": 0.0571, "step": 43315 }, { "epoch": 0.9544806006819923, "grad_norm": 0.4522727429866791, "learning_rate": 1.6260524925337307e-07, "loss": 0.0345, "step": 43316 }, { "epoch": 0.9545026359715084, "grad_norm": 0.46304941177368164, "learning_rate": 1.6244808527106803e-07, "loss": 0.0799, "step": 43317 }, { "epoch": 0.9545246712610246, "grad_norm": 0.42786699533462524, "learning_rate": 1.6229099686437043e-07, "loss": 0.0628, "step": 43318 }, { "epoch": 0.9545467065505406, "grad_norm": 0.6365559697151184, "learning_rate": 1.621339840340813e-07, "loss": 0.0836, "step": 43319 }, { "epoch": 0.9545687418400568, "grad_norm": 0.5236538648605347, "learning_rate": 1.61977046781e-07, "loss": 0.0582, "step": 43320 }, { "epoch": 0.954590777129573, "grad_norm": 0.5669316053390503, "learning_rate": 1.6182018510592588e-07, "loss": 0.0562, "step": 43321 }, { "epoch": 0.9546128124190891, "grad_norm": 0.5281146764755249, "learning_rate": 1.6166339900966e-07, "loss": 0.0688, "step": 43322 }, { "epoch": 0.9546348477086053, "grad_norm": 0.5283325910568237, "learning_rate": 1.6150668849299832e-07, "loss": 0.0471, "step": 43323 }, { "epoch": 0.9546568829981215, "grad_norm": 0.5069051384925842, "learning_rate": 1.613500535567386e-07, "loss": 0.0667, "step": 43324 }, { "epoch": 0.9546789182876376, "grad_norm": 0.4784506857395172, "learning_rate": 1.6119349420168183e-07, "loss": 0.0514, "step": 43325 }, { "epoch": 0.9547009535771538, "grad_norm": 0.4730891287326813, "learning_rate": 1.6103701042862405e-07, "loss": 0.0451, "step": 43326 }, { "epoch": 0.95472298886667, "grad_norm": 0.37321528792381287, "learning_rate": 1.6088060223836133e-07, "loss": 0.0366, "step": 43327 }, { "epoch": 0.9547450241561861, "grad_norm": 0.8682130575180054, "learning_rate": 1.6072426963169128e-07, "loss": 0.0651, "step": 43328 }, { "epoch": 0.9547670594457023, "grad_norm": 0.6683102250099182, "learning_rate": 1.6056801260941167e-07, "loss": 0.0424, "step": 43329 }, { "epoch": 0.9547890947352184, "grad_norm": 0.6342572569847107, "learning_rate": 1.6041183117231516e-07, "loss": 0.092, "step": 43330 }, { "epoch": 0.9548111300247346, "grad_norm": 0.5457188487052917, "learning_rate": 1.6025572532119782e-07, "loss": 0.0494, "step": 43331 }, { "epoch": 0.9548331653142508, "grad_norm": 0.7764597535133362, "learning_rate": 1.600996950568573e-07, "loss": 0.0554, "step": 43332 }, { "epoch": 0.9548552006037669, "grad_norm": 0.6136751770973206, "learning_rate": 1.599437403800863e-07, "loss": 0.0656, "step": 43333 }, { "epoch": 0.9548772358932831, "grad_norm": 0.33668211102485657, "learning_rate": 1.597878612916792e-07, "loss": 0.0457, "step": 43334 }, { "epoch": 0.9548992711827993, "grad_norm": 0.4749230146408081, "learning_rate": 1.5963205779243207e-07, "loss": 0.075, "step": 43335 }, { "epoch": 0.9549213064723154, "grad_norm": 0.5732996463775635, "learning_rate": 1.5947632988313587e-07, "loss": 0.0571, "step": 43336 }, { "epoch": 0.9549433417618316, "grad_norm": 0.3272932469844818, "learning_rate": 1.5932067756458501e-07, "loss": 0.0381, "step": 43337 }, { "epoch": 0.9549653770513478, "grad_norm": 0.4515675902366638, "learning_rate": 1.5916510083757217e-07, "loss": 0.0525, "step": 43338 }, { "epoch": 0.9549874123408639, "grad_norm": 0.14572739601135254, "learning_rate": 1.5900959970288842e-07, "loss": 0.0327, "step": 43339 }, { "epoch": 0.9550094476303801, "grad_norm": 0.7856770753860474, "learning_rate": 1.588541741613281e-07, "loss": 0.0953, "step": 43340 }, { "epoch": 0.9550314829198963, "grad_norm": 0.5968710780143738, "learning_rate": 1.586988242136822e-07, "loss": 0.0325, "step": 43341 }, { "epoch": 0.9550535182094124, "grad_norm": 1.3629425764083862, "learning_rate": 1.5854354986074183e-07, "loss": 0.0784, "step": 43342 }, { "epoch": 0.9550755534989286, "grad_norm": 0.6908714175224304, "learning_rate": 1.5838835110329798e-07, "loss": 0.0518, "step": 43343 }, { "epoch": 0.9550975887884446, "grad_norm": 0.4533047676086426, "learning_rate": 1.5823322794214002e-07, "loss": 0.0326, "step": 43344 }, { "epoch": 0.9551196240779608, "grad_norm": 0.6579781174659729, "learning_rate": 1.5807818037806065e-07, "loss": 0.0582, "step": 43345 }, { "epoch": 0.955141659367477, "grad_norm": 0.6929402351379395, "learning_rate": 1.5792320841184594e-07, "loss": 0.064, "step": 43346 }, { "epoch": 0.9551636946569931, "grad_norm": 0.536883533000946, "learning_rate": 1.5776831204428855e-07, "loss": 0.0354, "step": 43347 }, { "epoch": 0.9551857299465093, "grad_norm": 0.5402938723564148, "learning_rate": 1.5761349127617618e-07, "loss": 0.0605, "step": 43348 }, { "epoch": 0.9552077652360255, "grad_norm": 0.3562909960746765, "learning_rate": 1.574587461082966e-07, "loss": 0.0585, "step": 43349 }, { "epoch": 0.9552298005255416, "grad_norm": 0.5276519060134888, "learning_rate": 1.5730407654143908e-07, "loss": 0.0687, "step": 43350 }, { "epoch": 0.9552518358150578, "grad_norm": 0.47980284690856934, "learning_rate": 1.571494825763914e-07, "loss": 0.0578, "step": 43351 }, { "epoch": 0.955273871104574, "grad_norm": 0.4944310188293457, "learning_rate": 1.569949642139412e-07, "loss": 0.0766, "step": 43352 }, { "epoch": 0.9552959063940901, "grad_norm": 0.4639131426811218, "learning_rate": 1.568405214548746e-07, "loss": 0.0505, "step": 43353 }, { "epoch": 0.9553179416836063, "grad_norm": 0.6242453455924988, "learning_rate": 1.5668615429997924e-07, "loss": 0.0492, "step": 43354 }, { "epoch": 0.9553399769731225, "grad_norm": 0.5826020836830139, "learning_rate": 1.565318627500395e-07, "loss": 0.0589, "step": 43355 }, { "epoch": 0.9553620122626386, "grad_norm": 0.5581095814704895, "learning_rate": 1.5637764680584476e-07, "loss": 0.0724, "step": 43356 }, { "epoch": 0.9553840475521548, "grad_norm": 0.7226517796516418, "learning_rate": 1.5622350646817773e-07, "loss": 0.053, "step": 43357 }, { "epoch": 0.955406082841671, "grad_norm": 0.8075532913208008, "learning_rate": 1.5606944173782277e-07, "loss": 0.0502, "step": 43358 }, { "epoch": 0.9554281181311871, "grad_norm": 0.5123738050460815, "learning_rate": 1.5591545261556762e-07, "loss": 0.0381, "step": 43359 }, { "epoch": 0.9554501534207033, "grad_norm": 0.5960427522659302, "learning_rate": 1.5576153910219493e-07, "loss": 0.058, "step": 43360 }, { "epoch": 0.9554721887102194, "grad_norm": 0.6626535654067993, "learning_rate": 1.556077011984891e-07, "loss": 0.0754, "step": 43361 }, { "epoch": 0.9554942239997356, "grad_norm": 0.780378520488739, "learning_rate": 1.5545393890523118e-07, "loss": 0.0607, "step": 43362 }, { "epoch": 0.9555162592892518, "grad_norm": 0.7924335598945618, "learning_rate": 1.5530025222321053e-07, "loss": 0.0962, "step": 43363 }, { "epoch": 0.9555382945787679, "grad_norm": 0.6807505488395691, "learning_rate": 1.551466411532032e-07, "loss": 0.0707, "step": 43364 }, { "epoch": 0.9555603298682841, "grad_norm": 0.4599282145500183, "learning_rate": 1.5499310569599522e-07, "loss": 0.0506, "step": 43365 }, { "epoch": 0.9555823651578003, "grad_norm": 0.6032112836837769, "learning_rate": 1.5483964585236765e-07, "loss": 0.0627, "step": 43366 }, { "epoch": 0.9556044004473164, "grad_norm": 0.6249759197235107, "learning_rate": 1.5468626162310317e-07, "loss": 0.0632, "step": 43367 }, { "epoch": 0.9556264357368326, "grad_norm": 0.39191490411758423, "learning_rate": 1.5453295300898118e-07, "loss": 0.0384, "step": 43368 }, { "epoch": 0.9556484710263486, "grad_norm": 0.5773789882659912, "learning_rate": 1.5437972001078437e-07, "loss": 0.0631, "step": 43369 }, { "epoch": 0.9556705063158648, "grad_norm": 0.4477504789829254, "learning_rate": 1.5422656262929214e-07, "loss": 0.0547, "step": 43370 }, { "epoch": 0.955692541605381, "grad_norm": 0.8560897707939148, "learning_rate": 1.5407348086528383e-07, "loss": 0.0579, "step": 43371 }, { "epoch": 0.9557145768948971, "grad_norm": 0.289301335811615, "learning_rate": 1.5392047471954217e-07, "loss": 0.0494, "step": 43372 }, { "epoch": 0.9557366121844133, "grad_norm": 0.7568029165267944, "learning_rate": 1.5376754419284488e-07, "loss": 0.0452, "step": 43373 }, { "epoch": 0.9557586474739295, "grad_norm": 0.4604901671409607, "learning_rate": 1.5361468928596968e-07, "loss": 0.0501, "step": 43374 }, { "epoch": 0.9557806827634456, "grad_norm": 0.5933638215065002, "learning_rate": 1.5346190999969588e-07, "loss": 0.0476, "step": 43375 }, { "epoch": 0.9558027180529618, "grad_norm": 0.4600529372692108, "learning_rate": 1.5330920633480295e-07, "loss": 0.0401, "step": 43376 }, { "epoch": 0.955824753342478, "grad_norm": 0.41405344009399414, "learning_rate": 1.5315657829206686e-07, "loss": 0.0685, "step": 43377 }, { "epoch": 0.9558467886319941, "grad_norm": 0.6536244750022888, "learning_rate": 1.5300402587226537e-07, "loss": 0.062, "step": 43378 }, { "epoch": 0.9558688239215103, "grad_norm": 0.6193851232528687, "learning_rate": 1.5285154907617616e-07, "loss": 0.0604, "step": 43379 }, { "epoch": 0.9558908592110265, "grad_norm": 0.35628581047058105, "learning_rate": 1.5269914790457696e-07, "loss": 0.0877, "step": 43380 }, { "epoch": 0.9559128945005426, "grad_norm": 0.7357917428016663, "learning_rate": 1.5254682235824047e-07, "loss": 0.0608, "step": 43381 }, { "epoch": 0.9559349297900588, "grad_norm": 1.0791118144989014, "learning_rate": 1.5239457243794774e-07, "loss": 0.0895, "step": 43382 }, { "epoch": 0.955956965079575, "grad_norm": 0.7246226072311401, "learning_rate": 1.5224239814446984e-07, "loss": 0.0319, "step": 43383 }, { "epoch": 0.9559790003690911, "grad_norm": 0.5456292629241943, "learning_rate": 1.5209029947858278e-07, "loss": 0.0499, "step": 43384 }, { "epoch": 0.9560010356586073, "grad_norm": 0.5049870014190674, "learning_rate": 1.5193827644106262e-07, "loss": 0.0475, "step": 43385 }, { "epoch": 0.9560230709481234, "grad_norm": 0.4672853946685791, "learning_rate": 1.5178632903268375e-07, "loss": 0.0336, "step": 43386 }, { "epoch": 0.9560451062376396, "grad_norm": 0.648722767829895, "learning_rate": 1.516344572542172e-07, "loss": 0.0669, "step": 43387 }, { "epoch": 0.9560671415271558, "grad_norm": 0.39819586277008057, "learning_rate": 1.5148266110644072e-07, "loss": 0.0639, "step": 43388 }, { "epoch": 0.9560891768166719, "grad_norm": 0.8184367418289185, "learning_rate": 1.513309405901253e-07, "loss": 0.0724, "step": 43389 }, { "epoch": 0.9561112121061881, "grad_norm": 0.5195939540863037, "learning_rate": 1.5117929570604205e-07, "loss": 0.0511, "step": 43390 }, { "epoch": 0.9561332473957043, "grad_norm": 0.8054875731468201, "learning_rate": 1.51027726454967e-07, "loss": 0.0968, "step": 43391 }, { "epoch": 0.9561552826852204, "grad_norm": 0.6856769323348999, "learning_rate": 1.508762328376695e-07, "loss": 0.0731, "step": 43392 }, { "epoch": 0.9561773179747366, "grad_norm": 0.4751790463924408, "learning_rate": 1.507248148549223e-07, "loss": 0.065, "step": 43393 }, { "epoch": 0.9561993532642526, "grad_norm": 0.4796886444091797, "learning_rate": 1.5057347250749643e-07, "loss": 0.0667, "step": 43394 }, { "epoch": 0.9562213885537688, "grad_norm": 0.6289635300636292, "learning_rate": 1.5042220579616462e-07, "loss": 0.0723, "step": 43395 }, { "epoch": 0.956243423843285, "grad_norm": 0.6692684292793274, "learning_rate": 1.502710147216929e-07, "loss": 0.0592, "step": 43396 }, { "epoch": 0.9562654591328011, "grad_norm": 0.5773337483406067, "learning_rate": 1.5011989928485403e-07, "loss": 0.0743, "step": 43397 }, { "epoch": 0.9562874944223173, "grad_norm": 0.7487989068031311, "learning_rate": 1.4996885948641902e-07, "loss": 0.0491, "step": 43398 }, { "epoch": 0.9563095297118335, "grad_norm": 0.31020280718803406, "learning_rate": 1.498178953271556e-07, "loss": 0.0428, "step": 43399 }, { "epoch": 0.9563315650013496, "grad_norm": 0.8471426367759705, "learning_rate": 1.4966700680783153e-07, "loss": 0.059, "step": 43400 }, { "epoch": 0.9563536002908658, "grad_norm": 0.6088483929634094, "learning_rate": 1.495161939292178e-07, "loss": 0.0394, "step": 43401 }, { "epoch": 0.956375635580382, "grad_norm": 0.40937259793281555, "learning_rate": 1.4936545669208211e-07, "loss": 0.0478, "step": 43402 }, { "epoch": 0.9563976708698981, "grad_norm": 0.4067492187023163, "learning_rate": 1.4921479509719227e-07, "loss": 0.042, "step": 43403 }, { "epoch": 0.9564197061594143, "grad_norm": 0.6257854700088501, "learning_rate": 1.490642091453126e-07, "loss": 0.045, "step": 43404 }, { "epoch": 0.9564417414489305, "grad_norm": 0.5021976828575134, "learning_rate": 1.4891369883721583e-07, "loss": 0.0518, "step": 43405 }, { "epoch": 0.9564637767384466, "grad_norm": 0.692248523235321, "learning_rate": 1.4876326417366303e-07, "loss": 0.061, "step": 43406 }, { "epoch": 0.9564858120279628, "grad_norm": 0.26271846890449524, "learning_rate": 1.486129051554236e-07, "loss": 0.049, "step": 43407 }, { "epoch": 0.956507847317479, "grad_norm": 0.5827811360359192, "learning_rate": 1.4846262178326187e-07, "loss": 0.0716, "step": 43408 }, { "epoch": 0.9565298826069951, "grad_norm": 0.678956925868988, "learning_rate": 1.4831241405794394e-07, "loss": 0.0604, "step": 43409 }, { "epoch": 0.9565519178965113, "grad_norm": 0.26991888880729675, "learning_rate": 1.4816228198023585e-07, "loss": 0.0646, "step": 43410 }, { "epoch": 0.9565739531860274, "grad_norm": 0.4189591109752655, "learning_rate": 1.4801222555090198e-07, "loss": 0.0815, "step": 43411 }, { "epoch": 0.9565959884755436, "grad_norm": 0.44566547870635986, "learning_rate": 1.478622447707051e-07, "loss": 0.0583, "step": 43412 }, { "epoch": 0.9566180237650598, "grad_norm": 0.7590870261192322, "learning_rate": 1.4771233964040953e-07, "loss": 0.062, "step": 43413 }, { "epoch": 0.9566400590545759, "grad_norm": 0.4169113337993622, "learning_rate": 1.4756251016078136e-07, "loss": 0.0724, "step": 43414 }, { "epoch": 0.9566620943440921, "grad_norm": 0.4685870409011841, "learning_rate": 1.4741275633258e-07, "loss": 0.0433, "step": 43415 }, { "epoch": 0.9566841296336083, "grad_norm": 0.6864864230155945, "learning_rate": 1.472630781565698e-07, "loss": 0.0552, "step": 43416 }, { "epoch": 0.9567061649231244, "grad_norm": 0.8638514876365662, "learning_rate": 1.471134756335152e-07, "loss": 0.0588, "step": 43417 }, { "epoch": 0.9567282002126405, "grad_norm": 0.5249537825584412, "learning_rate": 1.469639487641755e-07, "loss": 0.1025, "step": 43418 }, { "epoch": 0.9567502355021567, "grad_norm": 1.1276862621307373, "learning_rate": 1.4681449754931186e-07, "loss": 0.0704, "step": 43419 }, { "epoch": 0.9567722707916728, "grad_norm": 0.674929141998291, "learning_rate": 1.4666512198969027e-07, "loss": 0.0505, "step": 43420 }, { "epoch": 0.956794306081189, "grad_norm": 0.5713210105895996, "learning_rate": 1.4651582208606517e-07, "loss": 0.0656, "step": 43421 }, { "epoch": 0.9568163413707051, "grad_norm": 0.8142650723457336, "learning_rate": 1.4636659783920257e-07, "loss": 0.056, "step": 43422 }, { "epoch": 0.9568383766602213, "grad_norm": 0.822265625, "learning_rate": 1.4621744924985858e-07, "loss": 0.0797, "step": 43423 }, { "epoch": 0.9568604119497375, "grad_norm": 0.2351427525281906, "learning_rate": 1.460683763187942e-07, "loss": 0.0597, "step": 43424 }, { "epoch": 0.9568824472392536, "grad_norm": 0.2555263042449951, "learning_rate": 1.459193790467689e-07, "loss": 0.0588, "step": 43425 }, { "epoch": 0.9569044825287698, "grad_norm": 0.6390237212181091, "learning_rate": 1.45770457434542e-07, "loss": 0.0665, "step": 43426 }, { "epoch": 0.956926517818286, "grad_norm": 0.3477901816368103, "learning_rate": 1.4562161148287122e-07, "loss": 0.063, "step": 43427 }, { "epoch": 0.9569485531078021, "grad_norm": 0.6877687573432922, "learning_rate": 1.4547284119251436e-07, "loss": 0.059, "step": 43428 }, { "epoch": 0.9569705883973183, "grad_norm": 0.46680697798728943, "learning_rate": 1.4532414656423243e-07, "loss": 0.0467, "step": 43429 }, { "epoch": 0.9569926236868345, "grad_norm": 0.5042773485183716, "learning_rate": 1.4517552759877816e-07, "loss": 0.0331, "step": 43430 }, { "epoch": 0.9570146589763506, "grad_norm": 0.566403865814209, "learning_rate": 1.4502698429691098e-07, "loss": 0.0575, "step": 43431 }, { "epoch": 0.9570366942658668, "grad_norm": 0.7168583869934082, "learning_rate": 1.4487851665938855e-07, "loss": 0.0498, "step": 43432 }, { "epoch": 0.957058729555383, "grad_norm": 0.9779677391052246, "learning_rate": 1.4473012468696368e-07, "loss": 0.0974, "step": 43433 }, { "epoch": 0.9570807648448991, "grad_norm": 0.4682480990886688, "learning_rate": 1.4458180838039737e-07, "loss": 0.034, "step": 43434 }, { "epoch": 0.9571028001344153, "grad_norm": 0.24340036511421204, "learning_rate": 1.4443356774043904e-07, "loss": 0.0421, "step": 43435 }, { "epoch": 0.9571248354239315, "grad_norm": 0.7757869362831116, "learning_rate": 1.4428540276784807e-07, "loss": 0.0547, "step": 43436 }, { "epoch": 0.9571468707134476, "grad_norm": 0.827858567237854, "learning_rate": 1.4413731346337722e-07, "loss": 0.0794, "step": 43437 }, { "epoch": 0.9571689060029638, "grad_norm": 0.6265518665313721, "learning_rate": 1.4398929982778251e-07, "loss": 0.0719, "step": 43438 }, { "epoch": 0.95719094129248, "grad_norm": 0.8432803750038147, "learning_rate": 1.4384136186181506e-07, "loss": 0.0672, "step": 43439 }, { "epoch": 0.9572129765819961, "grad_norm": 0.8842309713363647, "learning_rate": 1.436934995662309e-07, "loss": 0.0726, "step": 43440 }, { "epoch": 0.9572350118715123, "grad_norm": 0.8987396955490112, "learning_rate": 1.4354571294178276e-07, "loss": 0.0628, "step": 43441 }, { "epoch": 0.9572570471610284, "grad_norm": 0.3611205816268921, "learning_rate": 1.433980019892217e-07, "loss": 0.0445, "step": 43442 }, { "epoch": 0.9572790824505445, "grad_norm": 0.591398298740387, "learning_rate": 1.432503667093038e-07, "loss": 0.0717, "step": 43443 }, { "epoch": 0.9573011177400607, "grad_norm": 0.4961196780204773, "learning_rate": 1.4310280710277678e-07, "loss": 0.0463, "step": 43444 }, { "epoch": 0.9573231530295768, "grad_norm": 0.42506054043769836, "learning_rate": 1.4295532317039506e-07, "loss": 0.0733, "step": 43445 }, { "epoch": 0.957345188319093, "grad_norm": 0.5146645903587341, "learning_rate": 1.4280791491290802e-07, "loss": 0.0853, "step": 43446 }, { "epoch": 0.9573672236086092, "grad_norm": 0.8771411776542664, "learning_rate": 1.4266058233106672e-07, "loss": 0.068, "step": 43447 }, { "epoch": 0.9573892588981253, "grad_norm": 0.6795648336410522, "learning_rate": 1.425133254256239e-07, "loss": 0.0807, "step": 43448 }, { "epoch": 0.9574112941876415, "grad_norm": 0.468331903219223, "learning_rate": 1.4236614419732563e-07, "loss": 0.0432, "step": 43449 }, { "epoch": 0.9574333294771576, "grad_norm": 0.8040450811386108, "learning_rate": 1.4221903864692466e-07, "loss": 0.0569, "step": 43450 }, { "epoch": 0.9574553647666738, "grad_norm": 0.6881949305534363, "learning_rate": 1.4207200877517036e-07, "loss": 0.0499, "step": 43451 }, { "epoch": 0.95747740005619, "grad_norm": 0.6464508175849915, "learning_rate": 1.4192505458281214e-07, "loss": 0.062, "step": 43452 }, { "epoch": 0.9574994353457061, "grad_norm": 0.5174723863601685, "learning_rate": 1.417781760705944e-07, "loss": 0.0998, "step": 43453 }, { "epoch": 0.9575214706352223, "grad_norm": 0.376786470413208, "learning_rate": 1.4163137323926823e-07, "loss": 0.053, "step": 43454 }, { "epoch": 0.9575435059247385, "grad_norm": 0.48157191276550293, "learning_rate": 1.41484646089583e-07, "loss": 0.0395, "step": 43455 }, { "epoch": 0.9575655412142546, "grad_norm": 0.9117558002471924, "learning_rate": 1.4133799462228313e-07, "loss": 0.0796, "step": 43456 }, { "epoch": 0.9575875765037708, "grad_norm": 1.0178033113479614, "learning_rate": 1.4119141883811805e-07, "loss": 0.0484, "step": 43457 }, { "epoch": 0.957609611793287, "grad_norm": 0.5404222011566162, "learning_rate": 1.4104491873783044e-07, "loss": 0.0554, "step": 43458 }, { "epoch": 0.9576316470828031, "grad_norm": 0.694334089756012, "learning_rate": 1.4089849432217138e-07, "loss": 0.0644, "step": 43459 }, { "epoch": 0.9576536823723193, "grad_norm": 0.7437916398048401, "learning_rate": 1.40752145591882e-07, "loss": 0.0633, "step": 43460 }, { "epoch": 0.9576757176618355, "grad_norm": 0.5522574186325073, "learning_rate": 1.4060587254771328e-07, "loss": 0.0513, "step": 43461 }, { "epoch": 0.9576977529513516, "grad_norm": 0.571111798286438, "learning_rate": 1.404596751904047e-07, "loss": 0.0618, "step": 43462 }, { "epoch": 0.9577197882408678, "grad_norm": 0.7894278168678284, "learning_rate": 1.4031355352070396e-07, "loss": 0.0662, "step": 43463 }, { "epoch": 0.957741823530384, "grad_norm": 0.29152652621269226, "learning_rate": 1.4016750753935548e-07, "loss": 0.0556, "step": 43464 }, { "epoch": 0.9577638588199001, "grad_norm": 0.3507654666900635, "learning_rate": 1.400215372471003e-07, "loss": 0.0353, "step": 43465 }, { "epoch": 0.9577858941094163, "grad_norm": 0.6081802845001221, "learning_rate": 1.3987564264468456e-07, "loss": 0.037, "step": 43466 }, { "epoch": 0.9578079293989324, "grad_norm": 0.6610892415046692, "learning_rate": 1.397298237328526e-07, "loss": 0.069, "step": 43467 }, { "epoch": 0.9578299646884485, "grad_norm": 0.4152245819568634, "learning_rate": 1.3958408051234383e-07, "loss": 0.0548, "step": 43468 }, { "epoch": 0.9578519999779647, "grad_norm": 0.4769200384616852, "learning_rate": 1.394384129839027e-07, "loss": 0.0553, "step": 43469 }, { "epoch": 0.9578740352674808, "grad_norm": 0.2879483699798584, "learning_rate": 1.3929282114827025e-07, "loss": 0.0362, "step": 43470 }, { "epoch": 0.957896070556997, "grad_norm": 0.5744537115097046, "learning_rate": 1.391473050061892e-07, "loss": 0.0475, "step": 43471 }, { "epoch": 0.9579181058465132, "grad_norm": 0.32653945684432983, "learning_rate": 1.3900186455839903e-07, "loss": 0.0577, "step": 43472 }, { "epoch": 0.9579401411360293, "grad_norm": 0.5506091117858887, "learning_rate": 1.3885649980564074e-07, "loss": 0.0484, "step": 43473 }, { "epoch": 0.9579621764255455, "grad_norm": 0.6866458654403687, "learning_rate": 1.387112107486571e-07, "loss": 0.058, "step": 43474 }, { "epoch": 0.9579842117150617, "grad_norm": 0.6315982937812805, "learning_rate": 1.3856599738818587e-07, "loss": 0.0714, "step": 43475 }, { "epoch": 0.9580062470045778, "grad_norm": 1.0523320436477661, "learning_rate": 1.384208597249681e-07, "loss": 0.0812, "step": 43476 }, { "epoch": 0.958028282294094, "grad_norm": 1.1701983213424683, "learning_rate": 1.3827579775973987e-07, "loss": 0.0983, "step": 43477 }, { "epoch": 0.9580503175836101, "grad_norm": 0.5752395391464233, "learning_rate": 1.3813081149324559e-07, "loss": 0.0441, "step": 43478 }, { "epoch": 0.9580723528731263, "grad_norm": 0.37286075949668884, "learning_rate": 1.3798590092621966e-07, "loss": 0.0298, "step": 43479 }, { "epoch": 0.9580943881626425, "grad_norm": 0.35289105772972107, "learning_rate": 1.378410660594015e-07, "loss": 0.0585, "step": 43480 }, { "epoch": 0.9581164234521586, "grad_norm": 0.5483177304267883, "learning_rate": 1.3769630689352552e-07, "loss": 0.0692, "step": 43481 }, { "epoch": 0.9581384587416748, "grad_norm": 0.7272781729698181, "learning_rate": 1.3755162342933614e-07, "loss": 0.0617, "step": 43482 }, { "epoch": 0.958160494031191, "grad_norm": 0.5346018075942993, "learning_rate": 1.3740701566756274e-07, "loss": 0.0771, "step": 43483 }, { "epoch": 0.9581825293207071, "grad_norm": 0.38806065917015076, "learning_rate": 1.372624836089481e-07, "loss": 0.0599, "step": 43484 }, { "epoch": 0.9582045646102233, "grad_norm": 0.7803398370742798, "learning_rate": 1.3711802725422496e-07, "loss": 0.0702, "step": 43485 }, { "epoch": 0.9582265998997395, "grad_norm": 0.532232403755188, "learning_rate": 1.3697364660412937e-07, "loss": 0.0772, "step": 43486 }, { "epoch": 0.9582486351892556, "grad_norm": 0.7663856148719788, "learning_rate": 1.3682934165939742e-07, "loss": 0.065, "step": 43487 }, { "epoch": 0.9582706704787718, "grad_norm": 0.5453106164932251, "learning_rate": 1.3668511242076186e-07, "loss": 0.0478, "step": 43488 }, { "epoch": 0.958292705768288, "grad_norm": 0.31056901812553406, "learning_rate": 1.3654095888896212e-07, "loss": 0.0473, "step": 43489 }, { "epoch": 0.9583147410578041, "grad_norm": 0.3410686254501343, "learning_rate": 1.3639688106472924e-07, "loss": 0.0567, "step": 43490 }, { "epoch": 0.9583367763473203, "grad_norm": 0.49850353598594666, "learning_rate": 1.3625287894879769e-07, "loss": 0.0452, "step": 43491 }, { "epoch": 0.9583588116368363, "grad_norm": 0.7000944018363953, "learning_rate": 1.3610895254190016e-07, "loss": 0.042, "step": 43492 }, { "epoch": 0.9583808469263525, "grad_norm": 0.7113586664199829, "learning_rate": 1.359651018447694e-07, "loss": 0.0603, "step": 43493 }, { "epoch": 0.9584028822158687, "grad_norm": 0.47741472721099854, "learning_rate": 1.3582132685814152e-07, "loss": 0.0636, "step": 43494 }, { "epoch": 0.9584249175053848, "grad_norm": 0.7015045285224915, "learning_rate": 1.3567762758274427e-07, "loss": 0.0731, "step": 43495 }, { "epoch": 0.958446952794901, "grad_norm": 0.3475227653980255, "learning_rate": 1.3553400401931205e-07, "loss": 0.0474, "step": 43496 }, { "epoch": 0.9584689880844172, "grad_norm": 0.7031893730163574, "learning_rate": 1.3539045616857592e-07, "loss": 0.0482, "step": 43497 }, { "epoch": 0.9584910233739333, "grad_norm": 0.5802984833717346, "learning_rate": 1.3524698403126866e-07, "loss": 0.065, "step": 43498 }, { "epoch": 0.9585130586634495, "grad_norm": 0.4538869261741638, "learning_rate": 1.3510358760811968e-07, "loss": 0.0532, "step": 43499 }, { "epoch": 0.9585350939529657, "grad_norm": 0.5768043994903564, "learning_rate": 1.3496026689985842e-07, "loss": 0.0624, "step": 43500 }, { "epoch": 0.9585571292424818, "grad_norm": 0.5108864903450012, "learning_rate": 1.348170219072159e-07, "loss": 0.068, "step": 43501 }, { "epoch": 0.958579164531998, "grad_norm": 0.5562801361083984, "learning_rate": 1.3467385263092157e-07, "loss": 0.0477, "step": 43502 }, { "epoch": 0.9586011998215141, "grad_norm": 0.6160704493522644, "learning_rate": 1.3453075907170486e-07, "loss": 0.0667, "step": 43503 }, { "epoch": 0.9586232351110303, "grad_norm": 0.5648950338363647, "learning_rate": 1.3438774123029353e-07, "loss": 0.0472, "step": 43504 }, { "epoch": 0.9586452704005465, "grad_norm": 0.4069773554801941, "learning_rate": 1.342447991074186e-07, "loss": 0.0595, "step": 43505 }, { "epoch": 0.9586673056900626, "grad_norm": 0.41163384914398193, "learning_rate": 1.3410193270380622e-07, "loss": 0.0525, "step": 43506 }, { "epoch": 0.9586893409795788, "grad_norm": 0.47614553570747375, "learning_rate": 1.3395914202018244e-07, "loss": 0.0406, "step": 43507 }, { "epoch": 0.958711376269095, "grad_norm": 0.6857022047042847, "learning_rate": 1.3381642705728002e-07, "loss": 0.0442, "step": 43508 }, { "epoch": 0.9587334115586111, "grad_norm": 1.0520743131637573, "learning_rate": 1.3367378781582008e-07, "loss": 0.0778, "step": 43509 }, { "epoch": 0.9587554468481273, "grad_norm": 0.630720317363739, "learning_rate": 1.3353122429653197e-07, "loss": 0.0619, "step": 43510 }, { "epoch": 0.9587774821376435, "grad_norm": 0.9435610175132751, "learning_rate": 1.3338873650014016e-07, "loss": 0.0694, "step": 43511 }, { "epoch": 0.9587995174271596, "grad_norm": 0.5559087991714478, "learning_rate": 1.3324632442737406e-07, "loss": 0.0552, "step": 43512 }, { "epoch": 0.9588215527166758, "grad_norm": 0.3543044328689575, "learning_rate": 1.3310398807895473e-07, "loss": 0.0354, "step": 43513 }, { "epoch": 0.958843588006192, "grad_norm": 0.5390484929084778, "learning_rate": 1.3296172745560998e-07, "loss": 0.0736, "step": 43514 }, { "epoch": 0.9588656232957081, "grad_norm": 0.6083904504776001, "learning_rate": 1.3281954255806416e-07, "loss": 0.0521, "step": 43515 }, { "epoch": 0.9588876585852243, "grad_norm": 0.31975656747817993, "learning_rate": 1.3267743338703842e-07, "loss": 0.0394, "step": 43516 }, { "epoch": 0.9589096938747403, "grad_norm": 1.3094547986984253, "learning_rate": 1.3253539994326214e-07, "loss": 0.0815, "step": 43517 }, { "epoch": 0.9589317291642565, "grad_norm": 0.45507460832595825, "learning_rate": 1.323934422274531e-07, "loss": 0.0504, "step": 43518 }, { "epoch": 0.9589537644537727, "grad_norm": 0.5481246709823608, "learning_rate": 1.3225156024033734e-07, "loss": 0.0531, "step": 43519 }, { "epoch": 0.9589757997432888, "grad_norm": 0.3907032907009125, "learning_rate": 1.321097539826377e-07, "loss": 0.0405, "step": 43520 }, { "epoch": 0.958997835032805, "grad_norm": 0.5070392489433289, "learning_rate": 1.3196802345507519e-07, "loss": 0.0474, "step": 43521 }, { "epoch": 0.9590198703223212, "grad_norm": 0.4312087595462799, "learning_rate": 1.3182636865837427e-07, "loss": 0.0502, "step": 43522 }, { "epoch": 0.9590419056118373, "grad_norm": 0.7030991315841675, "learning_rate": 1.316847895932527e-07, "loss": 0.0836, "step": 43523 }, { "epoch": 0.9590639409013535, "grad_norm": 0.5428321957588196, "learning_rate": 1.3154328626043487e-07, "loss": 0.0373, "step": 43524 }, { "epoch": 0.9590859761908697, "grad_norm": 0.5998291969299316, "learning_rate": 1.314018586606386e-07, "loss": 0.0888, "step": 43525 }, { "epoch": 0.9591080114803858, "grad_norm": 0.7875792384147644, "learning_rate": 1.3126050679458655e-07, "loss": 0.0596, "step": 43526 }, { "epoch": 0.959130046769902, "grad_norm": 0.35613662004470825, "learning_rate": 1.3111923066299658e-07, "loss": 0.0463, "step": 43527 }, { "epoch": 0.9591520820594182, "grad_norm": 0.7682051658630371, "learning_rate": 1.3097803026659306e-07, "loss": 0.0911, "step": 43528 }, { "epoch": 0.9591741173489343, "grad_norm": 0.5738713145256042, "learning_rate": 1.3083690560608873e-07, "loss": 0.0881, "step": 43529 }, { "epoch": 0.9591961526384505, "grad_norm": 0.7641163468360901, "learning_rate": 1.306958566822064e-07, "loss": 0.074, "step": 43530 }, { "epoch": 0.9592181879279666, "grad_norm": 0.46657195687294006, "learning_rate": 1.305548834956638e-07, "loss": 0.0583, "step": 43531 }, { "epoch": 0.9592402232174828, "grad_norm": 0.7336331605911255, "learning_rate": 1.3041398604718036e-07, "loss": 0.0673, "step": 43532 }, { "epoch": 0.959262258506999, "grad_norm": 0.341237336397171, "learning_rate": 1.302731643374705e-07, "loss": 0.0659, "step": 43533 }, { "epoch": 0.9592842937965151, "grad_norm": 0.7458656430244446, "learning_rate": 1.3013241836725366e-07, "loss": 0.0755, "step": 43534 }, { "epoch": 0.9593063290860313, "grad_norm": 0.40844300389289856, "learning_rate": 1.2999174813724592e-07, "loss": 0.0394, "step": 43535 }, { "epoch": 0.9593283643755475, "grad_norm": 0.3421969711780548, "learning_rate": 1.2985115364816336e-07, "loss": 0.0578, "step": 43536 }, { "epoch": 0.9593503996650636, "grad_norm": 0.7397823333740234, "learning_rate": 1.2971063490072542e-07, "loss": 0.0446, "step": 43537 }, { "epoch": 0.9593724349545798, "grad_norm": 0.5743356943130493, "learning_rate": 1.2957019189564323e-07, "loss": 0.0377, "step": 43538 }, { "epoch": 0.959394470244096, "grad_norm": 0.3940502405166626, "learning_rate": 1.294298246336345e-07, "loss": 0.044, "step": 43539 }, { "epoch": 0.9594165055336121, "grad_norm": 0.5358322858810425, "learning_rate": 1.292895331154137e-07, "loss": 0.0567, "step": 43540 }, { "epoch": 0.9594385408231283, "grad_norm": 0.7340461611747742, "learning_rate": 1.2914931734169688e-07, "loss": 0.0624, "step": 43541 }, { "epoch": 0.9594605761126443, "grad_norm": 0.5770173668861389, "learning_rate": 1.2900917731319516e-07, "loss": 0.0615, "step": 43542 }, { "epoch": 0.9594826114021605, "grad_norm": 0.6018082499504089, "learning_rate": 1.2886911303062466e-07, "loss": 0.0611, "step": 43543 }, { "epoch": 0.9595046466916767, "grad_norm": 0.8774965405464172, "learning_rate": 1.2872912449469976e-07, "loss": 0.0677, "step": 43544 }, { "epoch": 0.9595266819811928, "grad_norm": 0.36914294958114624, "learning_rate": 1.2858921170613157e-07, "loss": 0.0418, "step": 43545 }, { "epoch": 0.959548717270709, "grad_norm": 0.6038103699684143, "learning_rate": 1.2844937466563123e-07, "loss": 0.065, "step": 43546 }, { "epoch": 0.9595707525602252, "grad_norm": 0.5650309920310974, "learning_rate": 1.2830961337391477e-07, "loss": 0.0603, "step": 43547 }, { "epoch": 0.9595927878497413, "grad_norm": 0.21080276370048523, "learning_rate": 1.2816992783169335e-07, "loss": 0.0521, "step": 43548 }, { "epoch": 0.9596148231392575, "grad_norm": 0.8620054721832275, "learning_rate": 1.2803031803967468e-07, "loss": 0.0594, "step": 43549 }, { "epoch": 0.9596368584287737, "grad_norm": 0.46234098076820374, "learning_rate": 1.2789078399857323e-07, "loss": 0.045, "step": 43550 }, { "epoch": 0.9596588937182898, "grad_norm": 0.30891168117523193, "learning_rate": 1.2775132570910176e-07, "loss": 0.0434, "step": 43551 }, { "epoch": 0.959680929007806, "grad_norm": 0.8792246580123901, "learning_rate": 1.2761194317196468e-07, "loss": 0.0479, "step": 43552 }, { "epoch": 0.9597029642973222, "grad_norm": 0.44844719767570496, "learning_rate": 1.2747263638787642e-07, "loss": 0.0315, "step": 43553 }, { "epoch": 0.9597249995868383, "grad_norm": 0.6396197080612183, "learning_rate": 1.273334053575448e-07, "loss": 0.052, "step": 43554 }, { "epoch": 0.9597470348763545, "grad_norm": 0.6953320503234863, "learning_rate": 1.2719425008168084e-07, "loss": 0.0474, "step": 43555 }, { "epoch": 0.9597690701658707, "grad_norm": 0.49646326899528503, "learning_rate": 1.270551705609907e-07, "loss": 0.0629, "step": 43556 }, { "epoch": 0.9597911054553868, "grad_norm": 0.7989304065704346, "learning_rate": 1.2691616679618378e-07, "loss": 0.0495, "step": 43557 }, { "epoch": 0.959813140744903, "grad_norm": 0.5575583577156067, "learning_rate": 1.2677723878796954e-07, "loss": 0.0514, "step": 43558 }, { "epoch": 0.9598351760344191, "grad_norm": 0.6097791790962219, "learning_rate": 1.2663838653705406e-07, "loss": 0.0605, "step": 43559 }, { "epoch": 0.9598572113239353, "grad_norm": 0.7169109582901001, "learning_rate": 1.264996100441468e-07, "loss": 0.0526, "step": 43560 }, { "epoch": 0.9598792466134515, "grad_norm": 0.4192865192890167, "learning_rate": 1.263609093099488e-07, "loss": 0.0562, "step": 43561 }, { "epoch": 0.9599012819029676, "grad_norm": 0.7107470631599426, "learning_rate": 1.2622228433517457e-07, "loss": 0.0675, "step": 43562 }, { "epoch": 0.9599233171924838, "grad_norm": 0.6927095651626587, "learning_rate": 1.2608373512052352e-07, "loss": 0.064, "step": 43563 }, { "epoch": 0.959945352482, "grad_norm": 0.5647760033607483, "learning_rate": 1.2594526166670506e-07, "loss": 0.0346, "step": 43564 }, { "epoch": 0.9599673877715161, "grad_norm": 0.6837620735168457, "learning_rate": 1.2580686397442365e-07, "loss": 0.0591, "step": 43565 }, { "epoch": 0.9599894230610322, "grad_norm": 0.6307904720306396, "learning_rate": 1.2566854204438372e-07, "loss": 0.0414, "step": 43566 }, { "epoch": 0.9600114583505484, "grad_norm": 0.6474208235740662, "learning_rate": 1.255302958772897e-07, "loss": 0.0633, "step": 43567 }, { "epoch": 0.9600334936400645, "grad_norm": 0.7044186592102051, "learning_rate": 1.2539212547384772e-07, "loss": 0.0963, "step": 43568 }, { "epoch": 0.9600555289295807, "grad_norm": 0.3726913630962372, "learning_rate": 1.2525403083475883e-07, "loss": 0.0512, "step": 43569 }, { "epoch": 0.9600775642190968, "grad_norm": 0.5683183670043945, "learning_rate": 1.2511601196072754e-07, "loss": 0.0545, "step": 43570 }, { "epoch": 0.960099599508613, "grad_norm": 0.5998185276985168, "learning_rate": 1.2497806885245656e-07, "loss": 0.0466, "step": 43571 }, { "epoch": 0.9601216347981292, "grad_norm": 0.45447149872779846, "learning_rate": 1.2484020151065034e-07, "loss": 0.0556, "step": 43572 }, { "epoch": 0.9601436700876453, "grad_norm": 0.8340998888015747, "learning_rate": 1.2470240993600833e-07, "loss": 0.0388, "step": 43573 }, { "epoch": 0.9601657053771615, "grad_norm": 0.9488447308540344, "learning_rate": 1.24564694129235e-07, "loss": 0.0942, "step": 43574 }, { "epoch": 0.9601877406666777, "grad_norm": 0.55121910572052, "learning_rate": 1.2442705409102972e-07, "loss": 0.0794, "step": 43575 }, { "epoch": 0.9602097759561938, "grad_norm": 0.5788576006889343, "learning_rate": 1.2428948982209366e-07, "loss": 0.0775, "step": 43576 }, { "epoch": 0.96023181124571, "grad_norm": 0.6895453929901123, "learning_rate": 1.2415200132312953e-07, "loss": 0.0644, "step": 43577 }, { "epoch": 0.9602538465352262, "grad_norm": 0.48760032653808594, "learning_rate": 1.2401458859483683e-07, "loss": 0.0479, "step": 43578 }, { "epoch": 0.9602758818247423, "grad_norm": 0.5568763613700867, "learning_rate": 1.238772516379133e-07, "loss": 0.0492, "step": 43579 }, { "epoch": 0.9602979171142585, "grad_norm": 0.6495417356491089, "learning_rate": 1.2373999045306007e-07, "loss": 0.0439, "step": 43580 }, { "epoch": 0.9603199524037747, "grad_norm": 0.6246696710586548, "learning_rate": 1.2360280504097822e-07, "loss": 0.057, "step": 43581 }, { "epoch": 0.9603419876932908, "grad_norm": 0.3615058362483978, "learning_rate": 1.2346569540236218e-07, "loss": 0.0299, "step": 43582 }, { "epoch": 0.960364022982807, "grad_norm": 0.5651617646217346, "learning_rate": 1.2332866153791478e-07, "loss": 0.0552, "step": 43583 }, { "epoch": 0.9603860582723232, "grad_norm": 0.37531977891921997, "learning_rate": 1.2319170344833043e-07, "loss": 0.0416, "step": 43584 }, { "epoch": 0.9604080935618393, "grad_norm": 0.5667033195495605, "learning_rate": 1.230548211343102e-07, "loss": 0.0608, "step": 43585 }, { "epoch": 0.9604301288513555, "grad_norm": 0.5754082798957825, "learning_rate": 1.2291801459654862e-07, "loss": 0.0512, "step": 43586 }, { "epoch": 0.9604521641408716, "grad_norm": 0.802757978439331, "learning_rate": 1.227812838357434e-07, "loss": 0.0749, "step": 43587 }, { "epoch": 0.9604741994303878, "grad_norm": 0.6676303148269653, "learning_rate": 1.2264462885259065e-07, "loss": 0.0712, "step": 43588 }, { "epoch": 0.960496234719904, "grad_norm": 0.5313281416893005, "learning_rate": 1.225080496477865e-07, "loss": 0.0543, "step": 43589 }, { "epoch": 0.9605182700094201, "grad_norm": 1.0229569673538208, "learning_rate": 1.2237154622202706e-07, "loss": 0.071, "step": 43590 }, { "epoch": 0.9605403052989362, "grad_norm": 0.6398667097091675, "learning_rate": 1.2223511857600677e-07, "loss": 0.0523, "step": 43591 }, { "epoch": 0.9605623405884524, "grad_norm": 0.427482932806015, "learning_rate": 1.2209876671042174e-07, "loss": 0.057, "step": 43592 }, { "epoch": 0.9605843758779685, "grad_norm": 0.5390678644180298, "learning_rate": 1.2196249062596475e-07, "loss": 0.062, "step": 43593 }, { "epoch": 0.9606064111674847, "grad_norm": 0.6097081899642944, "learning_rate": 1.218262903233336e-07, "loss": 0.0561, "step": 43594 }, { "epoch": 0.9606284464570009, "grad_norm": 0.810034990310669, "learning_rate": 1.21690165803216e-07, "loss": 0.0502, "step": 43595 }, { "epoch": 0.960650481746517, "grad_norm": 0.8920298218727112, "learning_rate": 1.215541170663115e-07, "loss": 0.0714, "step": 43596 }, { "epoch": 0.9606725170360332, "grad_norm": 0.19236013293266296, "learning_rate": 1.2141814411330943e-07, "loss": 0.034, "step": 43597 }, { "epoch": 0.9606945523255493, "grad_norm": 0.5808913111686707, "learning_rate": 1.2128224694490263e-07, "loss": 0.0322, "step": 43598 }, { "epoch": 0.9607165876150655, "grad_norm": 0.566803514957428, "learning_rate": 1.2114642556178557e-07, "loss": 0.0326, "step": 43599 }, { "epoch": 0.9607386229045817, "grad_norm": 0.7031686902046204, "learning_rate": 1.2101067996464598e-07, "loss": 0.0841, "step": 43600 }, { "epoch": 0.9607606581940978, "grad_norm": 1.091961145401001, "learning_rate": 1.2087501015418002e-07, "loss": 0.0843, "step": 43601 }, { "epoch": 0.960782693483614, "grad_norm": 0.6394469738006592, "learning_rate": 1.2073941613107542e-07, "loss": 0.0371, "step": 43602 }, { "epoch": 0.9608047287731302, "grad_norm": 0.44261661171913147, "learning_rate": 1.20603897896025e-07, "loss": 0.0724, "step": 43603 }, { "epoch": 0.9608267640626463, "grad_norm": 0.37683308124542236, "learning_rate": 1.2046845544971653e-07, "loss": 0.0657, "step": 43604 }, { "epoch": 0.9608487993521625, "grad_norm": 0.5507234334945679, "learning_rate": 1.203330887928411e-07, "loss": 0.0457, "step": 43605 }, { "epoch": 0.9608708346416787, "grad_norm": 0.33168455958366394, "learning_rate": 1.2019779792608987e-07, "loss": 0.0398, "step": 43606 }, { "epoch": 0.9608928699311948, "grad_norm": 0.5191468000411987, "learning_rate": 1.2006258285014892e-07, "loss": 0.0537, "step": 43607 }, { "epoch": 0.960914905220711, "grad_norm": 0.5034289956092834, "learning_rate": 1.1992744356570938e-07, "loss": 0.0527, "step": 43608 }, { "epoch": 0.9609369405102272, "grad_norm": 0.531474769115448, "learning_rate": 1.1979238007345906e-07, "loss": 0.0744, "step": 43609 }, { "epoch": 0.9609589757997433, "grad_norm": 0.9837220311164856, "learning_rate": 1.1965739237408569e-07, "loss": 0.0613, "step": 43610 }, { "epoch": 0.9609810110892595, "grad_norm": 0.5817426443099976, "learning_rate": 1.195224804682754e-07, "loss": 0.0564, "step": 43611 }, { "epoch": 0.9610030463787756, "grad_norm": 0.7081535458564758, "learning_rate": 1.19387644356716e-07, "loss": 0.0582, "step": 43612 }, { "epoch": 0.9610250816682918, "grad_norm": 0.64735347032547, "learning_rate": 1.1925288404009694e-07, "loss": 0.0357, "step": 43613 }, { "epoch": 0.961047116957808, "grad_norm": 0.9133273959159851, "learning_rate": 1.1911819951910264e-07, "loss": 0.0989, "step": 43614 }, { "epoch": 0.9610691522473241, "grad_norm": 0.5438570380210876, "learning_rate": 1.1898359079441756e-07, "loss": 0.0785, "step": 43615 }, { "epoch": 0.9610911875368402, "grad_norm": 0.48799991607666016, "learning_rate": 1.1884905786672951e-07, "loss": 0.0949, "step": 43616 }, { "epoch": 0.9611132228263564, "grad_norm": 0.5138927102088928, "learning_rate": 1.1871460073672291e-07, "loss": 0.0459, "step": 43617 }, { "epoch": 0.9611352581158725, "grad_norm": 0.336257666349411, "learning_rate": 1.185802194050839e-07, "loss": 0.0405, "step": 43618 }, { "epoch": 0.9611572934053887, "grad_norm": 0.6979674100875854, "learning_rate": 1.1844591387249526e-07, "loss": 0.0554, "step": 43619 }, { "epoch": 0.9611793286949049, "grad_norm": 0.7495318651199341, "learning_rate": 1.1831168413964144e-07, "loss": 0.0407, "step": 43620 }, { "epoch": 0.961201363984421, "grad_norm": 0.7495535016059875, "learning_rate": 1.181775302072069e-07, "loss": 0.0662, "step": 43621 }, { "epoch": 0.9612233992739372, "grad_norm": 0.7726919054985046, "learning_rate": 1.1804345207587274e-07, "loss": 0.0395, "step": 43622 }, { "epoch": 0.9612454345634533, "grad_norm": 0.517058789730072, "learning_rate": 1.1790944974632511e-07, "loss": 0.0599, "step": 43623 }, { "epoch": 0.9612674698529695, "grad_norm": 0.523228645324707, "learning_rate": 1.1777552321924512e-07, "loss": 0.0501, "step": 43624 }, { "epoch": 0.9612895051424857, "grad_norm": 0.25450974702835083, "learning_rate": 1.1764167249531554e-07, "loss": 0.0493, "step": 43625 }, { "epoch": 0.9613115404320018, "grad_norm": 0.8169158101081848, "learning_rate": 1.1750789757521585e-07, "loss": 0.0796, "step": 43626 }, { "epoch": 0.961333575721518, "grad_norm": 0.5821756720542908, "learning_rate": 1.1737419845963048e-07, "loss": 0.0576, "step": 43627 }, { "epoch": 0.9613556110110342, "grad_norm": 0.9605945348739624, "learning_rate": 1.1724057514923891e-07, "loss": 0.0766, "step": 43628 }, { "epoch": 0.9613776463005503, "grad_norm": 0.9231349229812622, "learning_rate": 1.1710702764472059e-07, "loss": 0.0554, "step": 43629 }, { "epoch": 0.9613996815900665, "grad_norm": 0.6845636367797852, "learning_rate": 1.1697355594675662e-07, "loss": 0.0677, "step": 43630 }, { "epoch": 0.9614217168795827, "grad_norm": 0.2962937355041504, "learning_rate": 1.1684016005602982e-07, "loss": 0.0366, "step": 43631 }, { "epoch": 0.9614437521690988, "grad_norm": 0.973469078540802, "learning_rate": 1.1670683997321462e-07, "loss": 0.0946, "step": 43632 }, { "epoch": 0.961465787458615, "grad_norm": 0.5278419852256775, "learning_rate": 1.1657359569899384e-07, "loss": 0.0267, "step": 43633 }, { "epoch": 0.9614878227481312, "grad_norm": 0.639324963092804, "learning_rate": 1.1644042723404358e-07, "loss": 0.0643, "step": 43634 }, { "epoch": 0.9615098580376473, "grad_norm": 0.38673195242881775, "learning_rate": 1.1630733457904496e-07, "loss": 0.0438, "step": 43635 }, { "epoch": 0.9615318933271635, "grad_norm": 1.1795977354049683, "learning_rate": 1.1617431773467579e-07, "loss": 0.0719, "step": 43636 }, { "epoch": 0.9615539286166797, "grad_norm": 0.824643075466156, "learning_rate": 1.1604137670160885e-07, "loss": 0.101, "step": 43637 }, { "epoch": 0.9615759639061958, "grad_norm": 0.44942301511764526, "learning_rate": 1.1590851148052528e-07, "loss": 0.0571, "step": 43638 }, { "epoch": 0.961597999195712, "grad_norm": 0.48717740178108215, "learning_rate": 1.1577572207210285e-07, "loss": 0.0577, "step": 43639 }, { "epoch": 0.9616200344852281, "grad_norm": 0.6029077172279358, "learning_rate": 1.1564300847701436e-07, "loss": 0.0475, "step": 43640 }, { "epoch": 0.9616420697747442, "grad_norm": 0.33402466773986816, "learning_rate": 1.1551037069593762e-07, "loss": 0.0316, "step": 43641 }, { "epoch": 0.9616641050642604, "grad_norm": 0.389131098985672, "learning_rate": 1.153778087295504e-07, "loss": 0.062, "step": 43642 }, { "epoch": 0.9616861403537765, "grad_norm": 0.46316060423851013, "learning_rate": 1.1524532257852382e-07, "loss": 0.0593, "step": 43643 }, { "epoch": 0.9617081756432927, "grad_norm": 1.1132855415344238, "learning_rate": 1.151129122435357e-07, "loss": 0.0627, "step": 43644 }, { "epoch": 0.9617302109328089, "grad_norm": 0.7746042609214783, "learning_rate": 1.1498057772525882e-07, "loss": 0.0583, "step": 43645 }, { "epoch": 0.961752246222325, "grad_norm": 1.0203248262405396, "learning_rate": 1.1484831902436766e-07, "loss": 0.0644, "step": 43646 }, { "epoch": 0.9617742815118412, "grad_norm": 0.8232386708259583, "learning_rate": 1.1471613614153665e-07, "loss": 0.0642, "step": 43647 }, { "epoch": 0.9617963168013574, "grad_norm": 0.4354958236217499, "learning_rate": 1.1458402907743859e-07, "loss": 0.0567, "step": 43648 }, { "epoch": 0.9618183520908735, "grad_norm": 0.5217306017875671, "learning_rate": 1.144519978327463e-07, "loss": 0.0312, "step": 43649 }, { "epoch": 0.9618403873803897, "grad_norm": 0.5409786701202393, "learning_rate": 1.1432004240813088e-07, "loss": 0.0628, "step": 43650 }, { "epoch": 0.9618624226699058, "grad_norm": 0.6682128310203552, "learning_rate": 1.1418816280426847e-07, "loss": 0.0512, "step": 43651 }, { "epoch": 0.961884457959422, "grad_norm": 0.5158334374427795, "learning_rate": 1.1405635902182521e-07, "loss": 0.0539, "step": 43652 }, { "epoch": 0.9619064932489382, "grad_norm": 0.3454892933368683, "learning_rate": 1.1392463106147721e-07, "loss": 0.0575, "step": 43653 }, { "epoch": 0.9619285285384543, "grad_norm": 0.8196315765380859, "learning_rate": 1.1379297892389395e-07, "loss": 0.0638, "step": 43654 }, { "epoch": 0.9619505638279705, "grad_norm": 0.676301896572113, "learning_rate": 1.1366140260974322e-07, "loss": 0.0505, "step": 43655 }, { "epoch": 0.9619725991174867, "grad_norm": 0.5424256324768066, "learning_rate": 1.1352990211970116e-07, "loss": 0.036, "step": 43656 }, { "epoch": 0.9619946344070028, "grad_norm": 0.5634292364120483, "learning_rate": 1.1339847745443055e-07, "loss": 0.0349, "step": 43657 }, { "epoch": 0.962016669696519, "grad_norm": 0.4519115090370178, "learning_rate": 1.1326712861460587e-07, "loss": 0.0511, "step": 43658 }, { "epoch": 0.9620387049860352, "grad_norm": 0.8597380518913269, "learning_rate": 1.131358556008949e-07, "loss": 0.0627, "step": 43659 }, { "epoch": 0.9620607402755513, "grad_norm": 0.8245787620544434, "learning_rate": 1.1300465841396545e-07, "loss": 0.0613, "step": 43660 }, { "epoch": 0.9620827755650675, "grad_norm": 0.6788153648376465, "learning_rate": 1.1287353705448699e-07, "loss": 0.058, "step": 43661 }, { "epoch": 0.9621048108545837, "grad_norm": 0.5240857005119324, "learning_rate": 1.1274249152312565e-07, "loss": 0.0566, "step": 43662 }, { "epoch": 0.9621268461440998, "grad_norm": 0.6781543493270874, "learning_rate": 1.126115218205509e-07, "loss": 0.0662, "step": 43663 }, { "epoch": 0.962148881433616, "grad_norm": 0.5708624720573425, "learning_rate": 1.1248062794742886e-07, "loss": 0.0464, "step": 43664 }, { "epoch": 0.962170916723132, "grad_norm": 0.8701967000961304, "learning_rate": 1.1234980990442567e-07, "loss": 0.0798, "step": 43665 }, { "epoch": 0.9621929520126482, "grad_norm": 0.7815808057785034, "learning_rate": 1.1221906769220913e-07, "loss": 0.0852, "step": 43666 }, { "epoch": 0.9622149873021644, "grad_norm": 0.8886178731918335, "learning_rate": 1.1208840131144371e-07, "loss": 0.0732, "step": 43667 }, { "epoch": 0.9622370225916805, "grad_norm": 0.5760577321052551, "learning_rate": 1.1195781076279555e-07, "loss": 0.0471, "step": 43668 }, { "epoch": 0.9622590578811967, "grad_norm": 0.9822525382041931, "learning_rate": 1.1182729604693076e-07, "loss": 0.0605, "step": 43669 }, { "epoch": 0.9622810931707129, "grad_norm": 0.6279971599578857, "learning_rate": 1.1169685716451383e-07, "loss": 0.0639, "step": 43670 }, { "epoch": 0.962303128460229, "grad_norm": 0.9810724258422852, "learning_rate": 1.1156649411620589e-07, "loss": 0.0582, "step": 43671 }, { "epoch": 0.9623251637497452, "grad_norm": 0.545399010181427, "learning_rate": 1.1143620690267475e-07, "loss": 0.0379, "step": 43672 }, { "epoch": 0.9623471990392614, "grad_norm": 0.5030651688575745, "learning_rate": 1.113059955245832e-07, "loss": 0.0701, "step": 43673 }, { "epoch": 0.9623692343287775, "grad_norm": 0.5892648696899414, "learning_rate": 1.1117585998259572e-07, "loss": 0.0501, "step": 43674 }, { "epoch": 0.9623912696182937, "grad_norm": 0.8382057547569275, "learning_rate": 1.1104580027737177e-07, "loss": 0.0524, "step": 43675 }, { "epoch": 0.9624133049078099, "grad_norm": 0.33645814657211304, "learning_rate": 1.1091581640957416e-07, "loss": 0.0351, "step": 43676 }, { "epoch": 0.962435340197326, "grad_norm": 0.42852985858917236, "learning_rate": 1.1078590837986902e-07, "loss": 0.0576, "step": 43677 }, { "epoch": 0.9624573754868422, "grad_norm": 0.6928203105926514, "learning_rate": 1.1065607618891416e-07, "loss": 0.0656, "step": 43678 }, { "epoch": 0.9624794107763583, "grad_norm": 0.7746129631996155, "learning_rate": 1.1052631983737404e-07, "loss": 0.0853, "step": 43679 }, { "epoch": 0.9625014460658745, "grad_norm": 0.7401357889175415, "learning_rate": 1.1039663932590483e-07, "loss": 0.0551, "step": 43680 }, { "epoch": 0.9625234813553907, "grad_norm": 0.3613639175891876, "learning_rate": 1.1026703465517262e-07, "loss": 0.0522, "step": 43681 }, { "epoch": 0.9625455166449068, "grad_norm": 0.3681636154651642, "learning_rate": 1.101375058258336e-07, "loss": 0.0554, "step": 43682 }, { "epoch": 0.962567551934423, "grad_norm": 0.37798014283180237, "learning_rate": 1.1000805283854887e-07, "loss": 0.04, "step": 43683 }, { "epoch": 0.9625895872239392, "grad_norm": 0.7197871208190918, "learning_rate": 1.0987867569397792e-07, "loss": 0.0597, "step": 43684 }, { "epoch": 0.9626116225134553, "grad_norm": 0.6812973022460938, "learning_rate": 1.0974937439278021e-07, "loss": 0.0675, "step": 43685 }, { "epoch": 0.9626336578029715, "grad_norm": 0.5449576377868652, "learning_rate": 1.0962014893561357e-07, "loss": 0.0618, "step": 43686 }, { "epoch": 0.9626556930924877, "grad_norm": 0.5368725061416626, "learning_rate": 1.0949099932313412e-07, "loss": 0.0538, "step": 43687 }, { "epoch": 0.9626777283820038, "grad_norm": 0.36289182305336, "learning_rate": 1.0936192555600466e-07, "loss": 0.049, "step": 43688 }, { "epoch": 0.96269976367152, "grad_norm": 0.4800150394439697, "learning_rate": 1.0923292763487968e-07, "loss": 0.0502, "step": 43689 }, { "epoch": 0.962721798961036, "grad_norm": 0.5657861232757568, "learning_rate": 1.0910400556041533e-07, "loss": 0.053, "step": 43690 }, { "epoch": 0.9627438342505522, "grad_norm": 0.4612613320350647, "learning_rate": 1.0897515933327107e-07, "loss": 0.0578, "step": 43691 }, { "epoch": 0.9627658695400684, "grad_norm": 1.0025039911270142, "learning_rate": 1.0884638895409971e-07, "loss": 0.0841, "step": 43692 }, { "epoch": 0.9627879048295845, "grad_norm": 0.4204823672771454, "learning_rate": 1.0871769442355906e-07, "loss": 0.0341, "step": 43693 }, { "epoch": 0.9628099401191007, "grad_norm": 0.6181886792182922, "learning_rate": 1.0858907574230525e-07, "loss": 0.0446, "step": 43694 }, { "epoch": 0.9628319754086169, "grad_norm": 0.4984009563922882, "learning_rate": 1.0846053291099278e-07, "loss": 0.0434, "step": 43695 }, { "epoch": 0.962854010698133, "grad_norm": 0.30206388235092163, "learning_rate": 1.0833206593027612e-07, "loss": 0.0402, "step": 43696 }, { "epoch": 0.9628760459876492, "grad_norm": 0.36786967515945435, "learning_rate": 1.0820367480080973e-07, "loss": 0.0519, "step": 43697 }, { "epoch": 0.9628980812771654, "grad_norm": 0.5664316415786743, "learning_rate": 1.0807535952324809e-07, "loss": 0.0636, "step": 43698 }, { "epoch": 0.9629201165666815, "grad_norm": 0.45009127259254456, "learning_rate": 1.0794712009824237e-07, "loss": 0.0435, "step": 43699 }, { "epoch": 0.9629421518561977, "grad_norm": 0.6573516130447388, "learning_rate": 1.0781895652645035e-07, "loss": 0.0883, "step": 43700 }, { "epoch": 0.9629641871457139, "grad_norm": 0.6811717748641968, "learning_rate": 1.0769086880851986e-07, "loss": 0.0606, "step": 43701 }, { "epoch": 0.96298622243523, "grad_norm": 0.45034128427505493, "learning_rate": 1.075628569451087e-07, "loss": 0.0602, "step": 43702 }, { "epoch": 0.9630082577247462, "grad_norm": 0.4074399769306183, "learning_rate": 1.0743492093686302e-07, "loss": 0.0342, "step": 43703 }, { "epoch": 0.9630302930142624, "grad_norm": 0.40484756231307983, "learning_rate": 1.0730706078443897e-07, "loss": 0.0321, "step": 43704 }, { "epoch": 0.9630523283037785, "grad_norm": 0.37473905086517334, "learning_rate": 1.0717927648848436e-07, "loss": 0.0476, "step": 43705 }, { "epoch": 0.9630743635932947, "grad_norm": 0.6360859870910645, "learning_rate": 1.0705156804965366e-07, "loss": 0.0676, "step": 43706 }, { "epoch": 0.9630963988828108, "grad_norm": 0.2996117174625397, "learning_rate": 1.069239354685947e-07, "loss": 0.0758, "step": 43707 }, { "epoch": 0.963118434172327, "grad_norm": 0.7142114639282227, "learning_rate": 1.067963787459586e-07, "loss": 0.0521, "step": 43708 }, { "epoch": 0.9631404694618432, "grad_norm": 0.7038160562515259, "learning_rate": 1.0666889788239486e-07, "loss": 0.0722, "step": 43709 }, { "epoch": 0.9631625047513593, "grad_norm": 0.6753973364830017, "learning_rate": 1.0654149287855297e-07, "loss": 0.0402, "step": 43710 }, { "epoch": 0.9631845400408755, "grad_norm": 0.6332895159721375, "learning_rate": 1.0641416373508239e-07, "loss": 0.049, "step": 43711 }, { "epoch": 0.9632065753303917, "grad_norm": 0.3563450872898102, "learning_rate": 1.0628691045262928e-07, "loss": 0.0277, "step": 43712 }, { "epoch": 0.9632286106199078, "grad_norm": 0.8511371612548828, "learning_rate": 1.0615973303184479e-07, "loss": 0.0628, "step": 43713 }, { "epoch": 0.963250645909424, "grad_norm": 0.5766655206680298, "learning_rate": 1.0603263147337505e-07, "loss": 0.0663, "step": 43714 }, { "epoch": 0.96327268119894, "grad_norm": 0.6146103143692017, "learning_rate": 1.059056057778679e-07, "loss": 0.0444, "step": 43715 }, { "epoch": 0.9632947164884562, "grad_norm": 0.5755698084831238, "learning_rate": 1.057786559459728e-07, "loss": 0.0512, "step": 43716 }, { "epoch": 0.9633167517779724, "grad_norm": 0.4982481300830841, "learning_rate": 1.0565178197833091e-07, "loss": 0.05, "step": 43717 }, { "epoch": 0.9633387870674885, "grad_norm": 0.6867384314537048, "learning_rate": 1.0552498387559173e-07, "loss": 0.0653, "step": 43718 }, { "epoch": 0.9633608223570047, "grad_norm": 0.4555497169494629, "learning_rate": 1.0539826163840138e-07, "loss": 0.0274, "step": 43719 }, { "epoch": 0.9633828576465209, "grad_norm": 0.5401705503463745, "learning_rate": 1.0527161526740604e-07, "loss": 0.0391, "step": 43720 }, { "epoch": 0.963404892936037, "grad_norm": 0.46559545397758484, "learning_rate": 1.0514504476324849e-07, "loss": 0.0669, "step": 43721 }, { "epoch": 0.9634269282255532, "grad_norm": 0.7322515249252319, "learning_rate": 1.0501855012657325e-07, "loss": 0.0713, "step": 43722 }, { "epoch": 0.9634489635150694, "grad_norm": 0.6944125294685364, "learning_rate": 1.048921313580281e-07, "loss": 0.0452, "step": 43723 }, { "epoch": 0.9634709988045855, "grad_norm": 0.6477504372596741, "learning_rate": 1.0476578845825258e-07, "loss": 0.0509, "step": 43724 }, { "epoch": 0.9634930340941017, "grad_norm": 0.7567312717437744, "learning_rate": 1.0463952142789445e-07, "loss": 0.0928, "step": 43725 }, { "epoch": 0.9635150693836179, "grad_norm": 0.5047565698623657, "learning_rate": 1.0451333026759324e-07, "loss": 0.0526, "step": 43726 }, { "epoch": 0.963537104673134, "grad_norm": 0.5625789761543274, "learning_rate": 1.0438721497799342e-07, "loss": 0.0546, "step": 43727 }, { "epoch": 0.9635591399626502, "grad_norm": 0.8309123516082764, "learning_rate": 1.042611755597378e-07, "loss": 0.0456, "step": 43728 }, { "epoch": 0.9635811752521664, "grad_norm": 0.5688436627388, "learning_rate": 1.0413521201346755e-07, "loss": 0.0502, "step": 43729 }, { "epoch": 0.9636032105416825, "grad_norm": 0.6362161636352539, "learning_rate": 1.040093243398238e-07, "loss": 0.0565, "step": 43730 }, { "epoch": 0.9636252458311987, "grad_norm": 0.7318708896636963, "learning_rate": 1.0388351253944939e-07, "loss": 0.0619, "step": 43731 }, { "epoch": 0.9636472811207148, "grad_norm": 0.6034145951271057, "learning_rate": 1.037577766129838e-07, "loss": 0.07, "step": 43732 }, { "epoch": 0.963669316410231, "grad_norm": 1.2482022047042847, "learning_rate": 1.0363211656106652e-07, "loss": 0.0764, "step": 43733 }, { "epoch": 0.9636913516997472, "grad_norm": 0.4627016484737396, "learning_rate": 1.0350653238434204e-07, "loss": 0.0613, "step": 43734 }, { "epoch": 0.9637133869892633, "grad_norm": 0.7551842331886292, "learning_rate": 1.0338102408344485e-07, "loss": 0.0763, "step": 43735 }, { "epoch": 0.9637354222787795, "grad_norm": 0.6166927814483643, "learning_rate": 1.032555916590161e-07, "loss": 0.0667, "step": 43736 }, { "epoch": 0.9637574575682957, "grad_norm": 0.7173905372619629, "learning_rate": 1.0313023511169529e-07, "loss": 0.093, "step": 43737 }, { "epoch": 0.9637794928578118, "grad_norm": 0.7912085652351379, "learning_rate": 1.0300495444212021e-07, "loss": 0.064, "step": 43738 }, { "epoch": 0.9638015281473279, "grad_norm": 0.7562050223350525, "learning_rate": 1.0287974965093039e-07, "loss": 0.075, "step": 43739 }, { "epoch": 0.963823563436844, "grad_norm": 0.500204861164093, "learning_rate": 1.0275462073876196e-07, "loss": 0.0286, "step": 43740 }, { "epoch": 0.9638455987263602, "grad_norm": 0.70575350522995, "learning_rate": 1.0262956770625275e-07, "loss": 0.061, "step": 43741 }, { "epoch": 0.9638676340158764, "grad_norm": 0.4721590578556061, "learning_rate": 1.0250459055403893e-07, "loss": 0.0453, "step": 43742 }, { "epoch": 0.9638896693053925, "grad_norm": 0.2011099010705948, "learning_rate": 1.0237968928275832e-07, "loss": 0.0314, "step": 43743 }, { "epoch": 0.9639117045949087, "grad_norm": 0.6921731233596802, "learning_rate": 1.0225486389304706e-07, "loss": 0.0521, "step": 43744 }, { "epoch": 0.9639337398844249, "grad_norm": 0.46731847524642944, "learning_rate": 1.0213011438553965e-07, "loss": 0.0572, "step": 43745 }, { "epoch": 0.963955775173941, "grad_norm": 0.5996291041374207, "learning_rate": 1.0200544076087392e-07, "loss": 0.068, "step": 43746 }, { "epoch": 0.9639778104634572, "grad_norm": 0.545243501663208, "learning_rate": 1.018808430196827e-07, "loss": 0.0813, "step": 43747 }, { "epoch": 0.9639998457529734, "grad_norm": 0.4858958125114441, "learning_rate": 1.0175632116260214e-07, "loss": 0.0583, "step": 43748 }, { "epoch": 0.9640218810424895, "grad_norm": 0.8298025727272034, "learning_rate": 1.016318751902634e-07, "loss": 0.099, "step": 43749 }, { "epoch": 0.9640439163320057, "grad_norm": 1.1345630884170532, "learning_rate": 1.0150750510330598e-07, "loss": 0.0573, "step": 43750 }, { "epoch": 0.9640659516215219, "grad_norm": 0.6393956542015076, "learning_rate": 1.013832109023577e-07, "loss": 0.0913, "step": 43751 }, { "epoch": 0.964087986911038, "grad_norm": 0.39764848351478577, "learning_rate": 1.012589925880547e-07, "loss": 0.0627, "step": 43752 }, { "epoch": 0.9641100222005542, "grad_norm": 0.5109153985977173, "learning_rate": 1.0113485016102819e-07, "loss": 0.0557, "step": 43753 }, { "epoch": 0.9641320574900704, "grad_norm": 0.42767348885536194, "learning_rate": 1.0101078362191263e-07, "loss": 0.0554, "step": 43754 }, { "epoch": 0.9641540927795865, "grad_norm": 0.772611141204834, "learning_rate": 1.0088679297133752e-07, "loss": 0.0504, "step": 43755 }, { "epoch": 0.9641761280691027, "grad_norm": 0.4570479691028595, "learning_rate": 1.0076287820993568e-07, "loss": 0.0666, "step": 43756 }, { "epoch": 0.9641981633586189, "grad_norm": 0.4417707026004791, "learning_rate": 1.0063903933833829e-07, "loss": 0.0438, "step": 43757 }, { "epoch": 0.964220198648135, "grad_norm": 0.5723497867584229, "learning_rate": 1.005152763571765e-07, "loss": 0.049, "step": 43758 }, { "epoch": 0.9642422339376512, "grad_norm": 0.5462985038757324, "learning_rate": 1.003915892670798e-07, "loss": 0.0684, "step": 43759 }, { "epoch": 0.9642642692271673, "grad_norm": 0.6310727000236511, "learning_rate": 1.0026797806867772e-07, "loss": 0.058, "step": 43760 }, { "epoch": 0.9642863045166835, "grad_norm": 0.9085718989372253, "learning_rate": 1.0014444276260138e-07, "loss": 0.0676, "step": 43761 }, { "epoch": 0.9643083398061997, "grad_norm": 0.5038352012634277, "learning_rate": 1.0002098334948029e-07, "loss": 0.0536, "step": 43762 }, { "epoch": 0.9643303750957158, "grad_norm": 0.3829778730869293, "learning_rate": 9.989759982994062e-08, "loss": 0.0326, "step": 43763 }, { "epoch": 0.9643524103852319, "grad_norm": 1.0803788900375366, "learning_rate": 9.977429220461353e-08, "loss": 0.0709, "step": 43764 }, { "epoch": 0.9643744456747481, "grad_norm": 0.6721540689468384, "learning_rate": 9.96510604741252e-08, "loss": 0.0452, "step": 43765 }, { "epoch": 0.9643964809642642, "grad_norm": 0.5972801446914673, "learning_rate": 9.952790463910677e-08, "loss": 0.0417, "step": 43766 }, { "epoch": 0.9644185162537804, "grad_norm": 0.8065767288208008, "learning_rate": 9.940482470018109e-08, "loss": 0.0903, "step": 43767 }, { "epoch": 0.9644405515432966, "grad_norm": 0.6423446536064148, "learning_rate": 9.928182065797597e-08, "loss": 0.0692, "step": 43768 }, { "epoch": 0.9644625868328127, "grad_norm": 0.5964951515197754, "learning_rate": 9.915889251311927e-08, "loss": 0.0764, "step": 43769 }, { "epoch": 0.9644846221223289, "grad_norm": 0.6265059113502502, "learning_rate": 9.90360402662388e-08, "loss": 0.0747, "step": 43770 }, { "epoch": 0.964506657411845, "grad_norm": 0.44074809551239014, "learning_rate": 9.891326391795574e-08, "loss": 0.08, "step": 43771 }, { "epoch": 0.9645286927013612, "grad_norm": 0.6397741436958313, "learning_rate": 9.879056346889959e-08, "loss": 0.075, "step": 43772 }, { "epoch": 0.9645507279908774, "grad_norm": 0.5319888591766357, "learning_rate": 9.866793891969483e-08, "loss": 0.0328, "step": 43773 }, { "epoch": 0.9645727632803935, "grad_norm": 0.7494503259658813, "learning_rate": 9.854539027096265e-08, "loss": 0.0932, "step": 43774 }, { "epoch": 0.9645947985699097, "grad_norm": 0.7374230623245239, "learning_rate": 9.842291752333089e-08, "loss": 0.0559, "step": 43775 }, { "epoch": 0.9646168338594259, "grad_norm": 0.7180691361427307, "learning_rate": 9.830052067742068e-08, "loss": 0.06, "step": 43776 }, { "epoch": 0.964638869148942, "grad_norm": 0.6871384382247925, "learning_rate": 9.817819973385822e-08, "loss": 0.069, "step": 43777 }, { "epoch": 0.9646609044384582, "grad_norm": 0.628795862197876, "learning_rate": 9.805595469326467e-08, "loss": 0.0567, "step": 43778 }, { "epoch": 0.9646829397279744, "grad_norm": 0.3020581305027008, "learning_rate": 9.793378555626453e-08, "loss": 0.0498, "step": 43779 }, { "epoch": 0.9647049750174905, "grad_norm": 0.643825352191925, "learning_rate": 9.781169232347731e-08, "loss": 0.0506, "step": 43780 }, { "epoch": 0.9647270103070067, "grad_norm": 0.3036198019981384, "learning_rate": 9.768967499552584e-08, "loss": 0.0508, "step": 43781 }, { "epoch": 0.9647490455965229, "grad_norm": 0.2527559995651245, "learning_rate": 9.756773357303461e-08, "loss": 0.0447, "step": 43782 }, { "epoch": 0.964771080886039, "grad_norm": 0.5629783272743225, "learning_rate": 9.744586805661815e-08, "loss": 0.0829, "step": 43783 }, { "epoch": 0.9647931161755552, "grad_norm": 0.5608732104301453, "learning_rate": 9.732407844690427e-08, "loss": 0.0763, "step": 43784 }, { "epoch": 0.9648151514650714, "grad_norm": 0.4813903272151947, "learning_rate": 9.720236474451083e-08, "loss": 0.05, "step": 43785 }, { "epoch": 0.9648371867545875, "grad_norm": 0.6034409403800964, "learning_rate": 9.708072695005565e-08, "loss": 0.061, "step": 43786 }, { "epoch": 0.9648592220441037, "grad_norm": 0.5250982642173767, "learning_rate": 9.695916506415992e-08, "loss": 0.0555, "step": 43787 }, { "epoch": 0.9648812573336198, "grad_norm": 0.46560782194137573, "learning_rate": 9.683767908744313e-08, "loss": 0.047, "step": 43788 }, { "epoch": 0.9649032926231359, "grad_norm": 0.34918251633644104, "learning_rate": 9.671626902052477e-08, "loss": 0.0559, "step": 43789 }, { "epoch": 0.9649253279126521, "grad_norm": 0.7381215691566467, "learning_rate": 9.659493486402105e-08, "loss": 0.0497, "step": 43790 }, { "epoch": 0.9649473632021682, "grad_norm": 0.578262209892273, "learning_rate": 9.647367661855144e-08, "loss": 0.06, "step": 43791 }, { "epoch": 0.9649693984916844, "grad_norm": 0.5724576711654663, "learning_rate": 9.635249428473214e-08, "loss": 0.0603, "step": 43792 }, { "epoch": 0.9649914337812006, "grad_norm": 0.6090586185455322, "learning_rate": 9.62313878631843e-08, "loss": 0.0664, "step": 43793 }, { "epoch": 0.9650134690707167, "grad_norm": 0.5151852965354919, "learning_rate": 9.611035735451912e-08, "loss": 0.0554, "step": 43794 }, { "epoch": 0.9650355043602329, "grad_norm": 0.31504637002944946, "learning_rate": 9.598940275935775e-08, "loss": 0.047, "step": 43795 }, { "epoch": 0.965057539649749, "grad_norm": 0.4345397353172302, "learning_rate": 9.58685240783147e-08, "loss": 0.0507, "step": 43796 }, { "epoch": 0.9650795749392652, "grad_norm": 0.7209631204605103, "learning_rate": 9.574772131200449e-08, "loss": 0.0766, "step": 43797 }, { "epoch": 0.9651016102287814, "grad_norm": 0.4327332079410553, "learning_rate": 9.562699446104328e-08, "loss": 0.0601, "step": 43798 }, { "epoch": 0.9651236455182975, "grad_norm": 0.334690660238266, "learning_rate": 9.550634352604725e-08, "loss": 0.0455, "step": 43799 }, { "epoch": 0.9651456808078137, "grad_norm": 0.42444664239883423, "learning_rate": 9.538576850762925e-08, "loss": 0.0346, "step": 43800 }, { "epoch": 0.9651677160973299, "grad_norm": 1.2060152292251587, "learning_rate": 9.526526940640378e-08, "loss": 0.0915, "step": 43801 }, { "epoch": 0.965189751386846, "grad_norm": 0.635453999042511, "learning_rate": 9.514484622298536e-08, "loss": 0.0494, "step": 43802 }, { "epoch": 0.9652117866763622, "grad_norm": 0.5198597311973572, "learning_rate": 9.502449895798681e-08, "loss": 0.0479, "step": 43803 }, { "epoch": 0.9652338219658784, "grad_norm": 0.5187544822692871, "learning_rate": 9.4904227612021e-08, "loss": 0.0393, "step": 43804 }, { "epoch": 0.9652558572553945, "grad_norm": 1.0427731275558472, "learning_rate": 9.478403218570076e-08, "loss": 0.0779, "step": 43805 }, { "epoch": 0.9652778925449107, "grad_norm": 0.37656474113464355, "learning_rate": 9.466391267963726e-08, "loss": 0.0541, "step": 43806 }, { "epoch": 0.9652999278344269, "grad_norm": 0.7388699054718018, "learning_rate": 9.454386909444501e-08, "loss": 0.0637, "step": 43807 }, { "epoch": 0.965321963123943, "grad_norm": 0.6725478172302246, "learning_rate": 9.442390143073187e-08, "loss": 0.0733, "step": 43808 }, { "epoch": 0.9653439984134592, "grad_norm": 0.26686087250709534, "learning_rate": 9.430400968911235e-08, "loss": 0.0601, "step": 43809 }, { "epoch": 0.9653660337029754, "grad_norm": 1.0609685182571411, "learning_rate": 9.418419387019594e-08, "loss": 0.0703, "step": 43810 }, { "epoch": 0.9653880689924915, "grad_norm": 0.462589830160141, "learning_rate": 9.406445397459051e-08, "loss": 0.0445, "step": 43811 }, { "epoch": 0.9654101042820077, "grad_norm": 0.9114285111427307, "learning_rate": 9.394479000291057e-08, "loss": 0.0638, "step": 43812 }, { "epoch": 0.9654321395715239, "grad_norm": 0.9467686414718628, "learning_rate": 9.382520195576061e-08, "loss": 0.0571, "step": 43813 }, { "epoch": 0.9654541748610399, "grad_norm": 0.719849169254303, "learning_rate": 9.37056898337535e-08, "loss": 0.0482, "step": 43814 }, { "epoch": 0.9654762101505561, "grad_norm": 0.5604057908058167, "learning_rate": 9.358625363749707e-08, "loss": 0.0325, "step": 43815 }, { "epoch": 0.9654982454400722, "grad_norm": 0.7075391411781311, "learning_rate": 9.346689336759916e-08, "loss": 0.0991, "step": 43816 }, { "epoch": 0.9655202807295884, "grad_norm": 0.5870949625968933, "learning_rate": 9.334760902466766e-08, "loss": 0.0653, "step": 43817 }, { "epoch": 0.9655423160191046, "grad_norm": 0.7719472050666809, "learning_rate": 9.32284006093087e-08, "loss": 0.0669, "step": 43818 }, { "epoch": 0.9655643513086207, "grad_norm": 0.4644147455692291, "learning_rate": 9.310926812213349e-08, "loss": 0.0719, "step": 43819 }, { "epoch": 0.9655863865981369, "grad_norm": 0.3655228614807129, "learning_rate": 9.299021156374654e-08, "loss": 0.0498, "step": 43820 }, { "epoch": 0.9656084218876531, "grad_norm": 0.5134537220001221, "learning_rate": 9.287123093475237e-08, "loss": 0.053, "step": 43821 }, { "epoch": 0.9656304571771692, "grad_norm": 0.5997912883758545, "learning_rate": 9.27523262357588e-08, "loss": 0.0623, "step": 43822 }, { "epoch": 0.9656524924666854, "grad_norm": 0.8337410092353821, "learning_rate": 9.26334974673737e-08, "loss": 0.0943, "step": 43823 }, { "epoch": 0.9656745277562016, "grad_norm": 0.8380781412124634, "learning_rate": 9.251474463019827e-08, "loss": 0.0668, "step": 43824 }, { "epoch": 0.9656965630457177, "grad_norm": 0.3722188174724579, "learning_rate": 9.239606772484033e-08, "loss": 0.042, "step": 43825 }, { "epoch": 0.9657185983352339, "grad_norm": 0.7162798643112183, "learning_rate": 9.227746675190441e-08, "loss": 0.0638, "step": 43826 }, { "epoch": 0.96574063362475, "grad_norm": 0.531423807144165, "learning_rate": 9.215894171199168e-08, "loss": 0.0335, "step": 43827 }, { "epoch": 0.9657626689142662, "grad_norm": 0.7402945756912231, "learning_rate": 9.204049260570834e-08, "loss": 0.078, "step": 43828 }, { "epoch": 0.9657847042037824, "grad_norm": 0.504292368888855, "learning_rate": 9.192211943365558e-08, "loss": 0.0645, "step": 43829 }, { "epoch": 0.9658067394932985, "grad_norm": 0.3070543110370636, "learning_rate": 9.180382219643956e-08, "loss": 0.0423, "step": 43830 }, { "epoch": 0.9658287747828147, "grad_norm": 0.7455883026123047, "learning_rate": 9.168560089465983e-08, "loss": 0.0668, "step": 43831 }, { "epoch": 0.9658508100723309, "grad_norm": 0.7874462604522705, "learning_rate": 9.156745552892087e-08, "loss": 0.0805, "step": 43832 }, { "epoch": 0.965872845361847, "grad_norm": 0.3813452422618866, "learning_rate": 9.144938609982389e-08, "loss": 0.066, "step": 43833 }, { "epoch": 0.9658948806513632, "grad_norm": 0.6630708575248718, "learning_rate": 9.133139260796841e-08, "loss": 0.0557, "step": 43834 }, { "epoch": 0.9659169159408794, "grad_norm": 0.4146158695220947, "learning_rate": 9.121347505395727e-08, "loss": 0.0294, "step": 43835 }, { "epoch": 0.9659389512303955, "grad_norm": 0.658531129360199, "learning_rate": 9.109563343839167e-08, "loss": 0.0613, "step": 43836 }, { "epoch": 0.9659609865199117, "grad_norm": 0.710915207862854, "learning_rate": 9.097786776186945e-08, "loss": 0.0599, "step": 43837 }, { "epoch": 0.9659830218094277, "grad_norm": 0.7052910923957825, "learning_rate": 9.08601780249918e-08, "loss": 0.0541, "step": 43838 }, { "epoch": 0.9660050570989439, "grad_norm": 0.6615838408470154, "learning_rate": 9.074256422835824e-08, "loss": 0.0642, "step": 43839 }, { "epoch": 0.9660270923884601, "grad_norm": 0.36346521973609924, "learning_rate": 9.062502637256831e-08, "loss": 0.0529, "step": 43840 }, { "epoch": 0.9660491276779762, "grad_norm": 0.692820131778717, "learning_rate": 9.050756445821984e-08, "loss": 0.0599, "step": 43841 }, { "epoch": 0.9660711629674924, "grad_norm": 0.3690232038497925, "learning_rate": 9.039017848591235e-08, "loss": 0.0525, "step": 43842 }, { "epoch": 0.9660931982570086, "grad_norm": 0.7075117230415344, "learning_rate": 9.027286845624206e-08, "loss": 0.0614, "step": 43843 }, { "epoch": 0.9661152335465247, "grad_norm": 0.49514493346214294, "learning_rate": 9.015563436980679e-08, "loss": 0.0524, "step": 43844 }, { "epoch": 0.9661372688360409, "grad_norm": 0.6221346259117126, "learning_rate": 9.003847622720441e-08, "loss": 0.0513, "step": 43845 }, { "epoch": 0.9661593041255571, "grad_norm": 0.45429879426956177, "learning_rate": 8.992139402903277e-08, "loss": 0.0731, "step": 43846 }, { "epoch": 0.9661813394150732, "grad_norm": 0.25286924839019775, "learning_rate": 8.980438777588474e-08, "loss": 0.052, "step": 43847 }, { "epoch": 0.9662033747045894, "grad_norm": 0.6246949434280396, "learning_rate": 8.968745746835982e-08, "loss": 0.0524, "step": 43848 }, { "epoch": 0.9662254099941056, "grad_norm": 0.685025691986084, "learning_rate": 8.957060310705257e-08, "loss": 0.086, "step": 43849 }, { "epoch": 0.9662474452836217, "grad_norm": 0.33925962448120117, "learning_rate": 8.945382469255747e-08, "loss": 0.0544, "step": 43850 }, { "epoch": 0.9662694805731379, "grad_norm": 0.47460851073265076, "learning_rate": 8.933712222546909e-08, "loss": 0.0541, "step": 43851 }, { "epoch": 0.966291515862654, "grad_norm": 0.5892448425292969, "learning_rate": 8.92204957063819e-08, "loss": 0.0569, "step": 43852 }, { "epoch": 0.9663135511521702, "grad_norm": 0.6967950463294983, "learning_rate": 8.910394513589216e-08, "loss": 0.0461, "step": 43853 }, { "epoch": 0.9663355864416864, "grad_norm": 1.1721864938735962, "learning_rate": 8.898747051459099e-08, "loss": 0.1037, "step": 43854 }, { "epoch": 0.9663576217312025, "grad_norm": 0.2903309464454651, "learning_rate": 8.887107184307297e-08, "loss": 0.0581, "step": 43855 }, { "epoch": 0.9663796570207187, "grad_norm": 0.9406908750534058, "learning_rate": 8.875474912192927e-08, "loss": 0.0637, "step": 43856 }, { "epoch": 0.9664016923102349, "grad_norm": 0.3006199896335602, "learning_rate": 8.863850235175608e-08, "loss": 0.0831, "step": 43857 }, { "epoch": 0.966423727599751, "grad_norm": 0.5403556823730469, "learning_rate": 8.852233153314126e-08, "loss": 0.0543, "step": 43858 }, { "epoch": 0.9664457628892672, "grad_norm": 0.5058090686798096, "learning_rate": 8.840623666667936e-08, "loss": 0.0406, "step": 43859 }, { "epoch": 0.9664677981787834, "grad_norm": 0.39905306696891785, "learning_rate": 8.829021775295986e-08, "loss": 0.031, "step": 43860 }, { "epoch": 0.9664898334682995, "grad_norm": 0.544439435005188, "learning_rate": 8.8174274792574e-08, "loss": 0.0712, "step": 43861 }, { "epoch": 0.9665118687578157, "grad_norm": 0.3435298502445221, "learning_rate": 8.805840778611463e-08, "loss": 0.0565, "step": 43862 }, { "epoch": 0.9665339040473317, "grad_norm": 0.8940375447273254, "learning_rate": 8.794261673416958e-08, "loss": 0.0614, "step": 43863 }, { "epoch": 0.9665559393368479, "grad_norm": 0.9760146141052246, "learning_rate": 8.782690163732843e-08, "loss": 0.0921, "step": 43864 }, { "epoch": 0.9665779746263641, "grad_norm": 0.47419869899749756, "learning_rate": 8.771126249618233e-08, "loss": 0.0619, "step": 43865 }, { "epoch": 0.9666000099158802, "grad_norm": 0.7074621915817261, "learning_rate": 8.759569931131917e-08, "loss": 0.0751, "step": 43866 }, { "epoch": 0.9666220452053964, "grad_norm": 0.6347688436508179, "learning_rate": 8.748021208332846e-08, "loss": 0.0429, "step": 43867 }, { "epoch": 0.9666440804949126, "grad_norm": 0.6090587377548218, "learning_rate": 8.736480081279641e-08, "loss": 0.0541, "step": 43868 }, { "epoch": 0.9666661157844287, "grad_norm": 0.6431059241294861, "learning_rate": 8.724946550031254e-08, "loss": 0.0622, "step": 43869 }, { "epoch": 0.9666881510739449, "grad_norm": 0.7835000157356262, "learning_rate": 8.713420614646473e-08, "loss": 0.0489, "step": 43870 }, { "epoch": 0.9667101863634611, "grad_norm": 0.41154488921165466, "learning_rate": 8.701902275183916e-08, "loss": 0.0654, "step": 43871 }, { "epoch": 0.9667322216529772, "grad_norm": 1.1267597675323486, "learning_rate": 8.690391531702202e-08, "loss": 0.0927, "step": 43872 }, { "epoch": 0.9667542569424934, "grad_norm": 0.43234509229660034, "learning_rate": 8.67888838426012e-08, "loss": 0.0323, "step": 43873 }, { "epoch": 0.9667762922320096, "grad_norm": 0.6365852355957031, "learning_rate": 8.667392832916121e-08, "loss": 0.0671, "step": 43874 }, { "epoch": 0.9667983275215257, "grad_norm": 0.2536536157131195, "learning_rate": 8.65590487772866e-08, "loss": 0.0315, "step": 43875 }, { "epoch": 0.9668203628110419, "grad_norm": 0.5558400750160217, "learning_rate": 8.644424518756689e-08, "loss": 0.0562, "step": 43876 }, { "epoch": 0.966842398100558, "grad_norm": 0.8255943059921265, "learning_rate": 8.63295175605816e-08, "loss": 0.0664, "step": 43877 }, { "epoch": 0.9668644333900742, "grad_norm": 0.7882505059242249, "learning_rate": 8.621486589691863e-08, "loss": 0.0558, "step": 43878 }, { "epoch": 0.9668864686795904, "grad_norm": 0.40667545795440674, "learning_rate": 8.610029019715915e-08, "loss": 0.0559, "step": 43879 }, { "epoch": 0.9669085039691065, "grad_norm": 0.612485408782959, "learning_rate": 8.598579046189103e-08, "loss": 0.0384, "step": 43880 }, { "epoch": 0.9669305392586227, "grad_norm": 0.47479361295700073, "learning_rate": 8.587136669169215e-08, "loss": 0.0802, "step": 43881 }, { "epoch": 0.9669525745481389, "grad_norm": 0.5032934546470642, "learning_rate": 8.575701888715038e-08, "loss": 0.0588, "step": 43882 }, { "epoch": 0.966974609837655, "grad_norm": 0.7101609110832214, "learning_rate": 8.564274704884356e-08, "loss": 0.0507, "step": 43883 }, { "epoch": 0.9669966451271712, "grad_norm": 0.6657691597938538, "learning_rate": 8.552855117735792e-08, "loss": 0.0839, "step": 43884 }, { "epoch": 0.9670186804166874, "grad_norm": 0.31526967883110046, "learning_rate": 8.541443127327297e-08, "loss": 0.0209, "step": 43885 }, { "epoch": 0.9670407157062035, "grad_norm": 0.6526105999946594, "learning_rate": 8.530038733716827e-08, "loss": 0.0614, "step": 43886 }, { "epoch": 0.9670627509957197, "grad_norm": 0.5229630470275879, "learning_rate": 8.518641936962835e-08, "loss": 0.056, "step": 43887 }, { "epoch": 0.9670847862852358, "grad_norm": 0.6050742864608765, "learning_rate": 8.507252737123105e-08, "loss": 0.0905, "step": 43888 }, { "epoch": 0.9671068215747519, "grad_norm": 0.5586788058280945, "learning_rate": 8.495871134255762e-08, "loss": 0.0593, "step": 43889 }, { "epoch": 0.9671288568642681, "grad_norm": 0.6285315752029419, "learning_rate": 8.484497128418755e-08, "loss": 0.0676, "step": 43890 }, { "epoch": 0.9671508921537842, "grad_norm": 0.3359288275241852, "learning_rate": 8.473130719670041e-08, "loss": 0.0516, "step": 43891 }, { "epoch": 0.9671729274433004, "grad_norm": 0.5433021783828735, "learning_rate": 8.461771908067406e-08, "loss": 0.0653, "step": 43892 }, { "epoch": 0.9671949627328166, "grad_norm": 0.36979255080223083, "learning_rate": 8.450420693668803e-08, "loss": 0.0469, "step": 43893 }, { "epoch": 0.9672169980223327, "grad_norm": 0.6373438239097595, "learning_rate": 8.439077076532187e-08, "loss": 0.0946, "step": 43894 }, { "epoch": 0.9672390333118489, "grad_norm": 0.44314801692962646, "learning_rate": 8.42774105671501e-08, "loss": 0.0515, "step": 43895 }, { "epoch": 0.9672610686013651, "grad_norm": 0.8225698471069336, "learning_rate": 8.416412634275394e-08, "loss": 0.0874, "step": 43896 }, { "epoch": 0.9672831038908812, "grad_norm": 0.45801395177841187, "learning_rate": 8.405091809270627e-08, "loss": 0.0611, "step": 43897 }, { "epoch": 0.9673051391803974, "grad_norm": 0.6197053790092468, "learning_rate": 8.39377858175866e-08, "loss": 0.0766, "step": 43898 }, { "epoch": 0.9673271744699136, "grad_norm": 0.5299835205078125, "learning_rate": 8.382472951797115e-08, "loss": 0.055, "step": 43899 }, { "epoch": 0.9673492097594297, "grad_norm": 0.5767003893852234, "learning_rate": 8.371174919443447e-08, "loss": 0.0531, "step": 43900 }, { "epoch": 0.9673712450489459, "grad_norm": 0.9032854437828064, "learning_rate": 8.359884484755276e-08, "loss": 0.0584, "step": 43901 }, { "epoch": 0.9673932803384621, "grad_norm": 0.7446801662445068, "learning_rate": 8.348601647790056e-08, "loss": 0.0559, "step": 43902 }, { "epoch": 0.9674153156279782, "grad_norm": 0.7166922092437744, "learning_rate": 8.337326408605405e-08, "loss": 0.084, "step": 43903 }, { "epoch": 0.9674373509174944, "grad_norm": 0.3295964002609253, "learning_rate": 8.326058767258449e-08, "loss": 0.0554, "step": 43904 }, { "epoch": 0.9674593862070106, "grad_norm": 0.41836491227149963, "learning_rate": 8.314798723806972e-08, "loss": 0.0347, "step": 43905 }, { "epoch": 0.9674814214965267, "grad_norm": 0.6216239929199219, "learning_rate": 8.303546278308094e-08, "loss": 0.0682, "step": 43906 }, { "epoch": 0.9675034567860429, "grad_norm": 0.5262773633003235, "learning_rate": 8.292301430819105e-08, "loss": 0.0612, "step": 43907 }, { "epoch": 0.967525492075559, "grad_norm": 0.9423561692237854, "learning_rate": 8.281064181397291e-08, "loss": 0.0727, "step": 43908 }, { "epoch": 0.9675475273650752, "grad_norm": 0.6740956902503967, "learning_rate": 8.26983453009994e-08, "loss": 0.065, "step": 43909 }, { "epoch": 0.9675695626545914, "grad_norm": 0.4554683566093445, "learning_rate": 8.25861247698434e-08, "loss": 0.0455, "step": 43910 }, { "epoch": 0.9675915979441075, "grad_norm": 0.47644171118736267, "learning_rate": 8.247398022107611e-08, "loss": 0.0721, "step": 43911 }, { "epoch": 0.9676136332336236, "grad_norm": 0.5591546893119812, "learning_rate": 8.236191165526707e-08, "loss": 0.0757, "step": 43912 }, { "epoch": 0.9676356685231398, "grad_norm": 0.4155890643596649, "learning_rate": 8.224991907298918e-08, "loss": 0.0653, "step": 43913 }, { "epoch": 0.9676577038126559, "grad_norm": 0.8131802082061768, "learning_rate": 8.213800247481195e-08, "loss": 0.0411, "step": 43914 }, { "epoch": 0.9676797391021721, "grad_norm": 0.8514087200164795, "learning_rate": 8.202616186130496e-08, "loss": 0.0606, "step": 43915 }, { "epoch": 0.9677017743916883, "grad_norm": 0.8455402851104736, "learning_rate": 8.191439723303773e-08, "loss": 0.063, "step": 43916 }, { "epoch": 0.9677238096812044, "grad_norm": 0.9528350234031677, "learning_rate": 8.180270859058147e-08, "loss": 0.0804, "step": 43917 }, { "epoch": 0.9677458449707206, "grad_norm": 1.0475579500198364, "learning_rate": 8.169109593450241e-08, "loss": 0.093, "step": 43918 }, { "epoch": 0.9677678802602367, "grad_norm": 0.6915740370750427, "learning_rate": 8.157955926537175e-08, "loss": 0.0594, "step": 43919 }, { "epoch": 0.9677899155497529, "grad_norm": 0.7217418551445007, "learning_rate": 8.14680985837557e-08, "loss": 0.0393, "step": 43920 }, { "epoch": 0.9678119508392691, "grad_norm": 0.6017570495605469, "learning_rate": 8.135671389022215e-08, "loss": 0.0618, "step": 43921 }, { "epoch": 0.9678339861287852, "grad_norm": 0.660930871963501, "learning_rate": 8.124540518534063e-08, "loss": 0.0377, "step": 43922 }, { "epoch": 0.9678560214183014, "grad_norm": 0.4688143730163574, "learning_rate": 8.113417246967403e-08, "loss": 0.0415, "step": 43923 }, { "epoch": 0.9678780567078176, "grad_norm": 0.6911453008651733, "learning_rate": 8.102301574379357e-08, "loss": 0.0504, "step": 43924 }, { "epoch": 0.9679000919973337, "grad_norm": 0.5783019661903381, "learning_rate": 8.091193500826044e-08, "loss": 0.0616, "step": 43925 }, { "epoch": 0.9679221272868499, "grad_norm": 0.7428401708602905, "learning_rate": 8.080093026364421e-08, "loss": 0.1028, "step": 43926 }, { "epoch": 0.9679441625763661, "grad_norm": 0.1860736906528473, "learning_rate": 8.069000151050942e-08, "loss": 0.0469, "step": 43927 }, { "epoch": 0.9679661978658822, "grad_norm": 0.5028406977653503, "learning_rate": 8.057914874942229e-08, "loss": 0.0554, "step": 43928 }, { "epoch": 0.9679882331553984, "grad_norm": 0.36919066309928894, "learning_rate": 8.046837198094404e-08, "loss": 0.0682, "step": 43929 }, { "epoch": 0.9680102684449146, "grad_norm": 0.49804362654685974, "learning_rate": 8.035767120564085e-08, "loss": 0.0497, "step": 43930 }, { "epoch": 0.9680323037344307, "grad_norm": 1.0165444612503052, "learning_rate": 8.024704642407731e-08, "loss": 0.0892, "step": 43931 }, { "epoch": 0.9680543390239469, "grad_norm": 0.451491117477417, "learning_rate": 8.013649763681463e-08, "loss": 0.0714, "step": 43932 }, { "epoch": 0.968076374313463, "grad_norm": 0.7879946231842041, "learning_rate": 8.002602484441902e-08, "loss": 0.0651, "step": 43933 }, { "epoch": 0.9680984096029792, "grad_norm": 0.5905119776725769, "learning_rate": 7.991562804745167e-08, "loss": 0.0465, "step": 43934 }, { "epoch": 0.9681204448924954, "grad_norm": 0.5862550735473633, "learning_rate": 7.980530724647384e-08, "loss": 0.0582, "step": 43935 }, { "epoch": 0.9681424801820115, "grad_norm": 0.37956222891807556, "learning_rate": 7.969506244205004e-08, "loss": 0.0894, "step": 43936 }, { "epoch": 0.9681645154715276, "grad_norm": 0.9133708477020264, "learning_rate": 7.95848936347382e-08, "loss": 0.0874, "step": 43937 }, { "epoch": 0.9681865507610438, "grad_norm": 0.641768217086792, "learning_rate": 7.947480082510283e-08, "loss": 0.0522, "step": 43938 }, { "epoch": 0.9682085860505599, "grad_norm": 0.40598711371421814, "learning_rate": 7.93647840137035e-08, "loss": 0.0733, "step": 43939 }, { "epoch": 0.9682306213400761, "grad_norm": 0.9658471345901489, "learning_rate": 7.925484320109977e-08, "loss": 0.0654, "step": 43940 }, { "epoch": 0.9682526566295923, "grad_norm": 0.7455447316169739, "learning_rate": 7.914497838785117e-08, "loss": 0.0652, "step": 43941 }, { "epoch": 0.9682746919191084, "grad_norm": 0.6563845276832581, "learning_rate": 7.90351895745206e-08, "loss": 0.0473, "step": 43942 }, { "epoch": 0.9682967272086246, "grad_norm": 0.3837457597255707, "learning_rate": 7.892547676166262e-08, "loss": 0.0516, "step": 43943 }, { "epoch": 0.9683187624981408, "grad_norm": 0.6555128693580627, "learning_rate": 7.88158399498401e-08, "loss": 0.0594, "step": 43944 }, { "epoch": 0.9683407977876569, "grad_norm": 0.3759706914424896, "learning_rate": 7.870627913961093e-08, "loss": 0.036, "step": 43945 }, { "epoch": 0.9683628330771731, "grad_norm": 0.7219678163528442, "learning_rate": 7.859679433152967e-08, "loss": 0.077, "step": 43946 }, { "epoch": 0.9683848683666892, "grad_norm": 0.5367239117622375, "learning_rate": 7.848738552615753e-08, "loss": 0.0578, "step": 43947 }, { "epoch": 0.9684069036562054, "grad_norm": 1.055911898612976, "learning_rate": 7.837805272405074e-08, "loss": 0.084, "step": 43948 }, { "epoch": 0.9684289389457216, "grad_norm": 0.5545215606689453, "learning_rate": 7.826879592576553e-08, "loss": 0.0604, "step": 43949 }, { "epoch": 0.9684509742352377, "grad_norm": 0.8658493161201477, "learning_rate": 7.815961513185976e-08, "loss": 0.0985, "step": 43950 }, { "epoch": 0.9684730095247539, "grad_norm": 0.7739928364753723, "learning_rate": 7.805051034288968e-08, "loss": 0.0562, "step": 43951 }, { "epoch": 0.9684950448142701, "grad_norm": 0.610661506652832, "learning_rate": 7.794148155940817e-08, "loss": 0.0487, "step": 43952 }, { "epoch": 0.9685170801037862, "grad_norm": 0.5467648506164551, "learning_rate": 7.783252878197312e-08, "loss": 0.0408, "step": 43953 }, { "epoch": 0.9685391153933024, "grad_norm": 0.5318362712860107, "learning_rate": 7.772365201114073e-08, "loss": 0.0438, "step": 43954 }, { "epoch": 0.9685611506828186, "grad_norm": 0.8870701789855957, "learning_rate": 7.76148512474606e-08, "loss": 0.0907, "step": 43955 }, { "epoch": 0.9685831859723347, "grad_norm": 0.36352407932281494, "learning_rate": 7.750612649149224e-08, "loss": 0.0538, "step": 43956 }, { "epoch": 0.9686052212618509, "grad_norm": 1.0286599397659302, "learning_rate": 7.739747774378692e-08, "loss": 0.0496, "step": 43957 }, { "epoch": 0.968627256551367, "grad_norm": 0.5270718932151794, "learning_rate": 7.728890500489749e-08, "loss": 0.0537, "step": 43958 }, { "epoch": 0.9686492918408832, "grad_norm": 1.1502668857574463, "learning_rate": 7.718040827537853e-08, "loss": 0.0678, "step": 43959 }, { "epoch": 0.9686713271303994, "grad_norm": 0.7070714235305786, "learning_rate": 7.707198755578293e-08, "loss": 0.0804, "step": 43960 }, { "epoch": 0.9686933624199155, "grad_norm": 0.3990960121154785, "learning_rate": 7.696364284666024e-08, "loss": 0.0712, "step": 43961 }, { "epoch": 0.9687153977094316, "grad_norm": 0.40078675746917725, "learning_rate": 7.685537414856503e-08, "loss": 0.0542, "step": 43962 }, { "epoch": 0.9687374329989478, "grad_norm": 0.6710761785507202, "learning_rate": 7.67471814620485e-08, "loss": 0.059, "step": 43963 }, { "epoch": 0.9687594682884639, "grad_norm": 0.7555690407752991, "learning_rate": 7.663906478766025e-08, "loss": 0.0557, "step": 43964 }, { "epoch": 0.9687815035779801, "grad_norm": 0.7721138596534729, "learning_rate": 7.653102412595148e-08, "loss": 0.0646, "step": 43965 }, { "epoch": 0.9688035388674963, "grad_norm": 0.5549560785293579, "learning_rate": 7.642305947747341e-08, "loss": 0.0675, "step": 43966 }, { "epoch": 0.9688255741570124, "grad_norm": 0.42937004566192627, "learning_rate": 7.631517084277395e-08, "loss": 0.0628, "step": 43967 }, { "epoch": 0.9688476094465286, "grad_norm": 0.8301091194152832, "learning_rate": 7.620735822240599e-08, "loss": 0.0565, "step": 43968 }, { "epoch": 0.9688696447360448, "grad_norm": 0.4591525197029114, "learning_rate": 7.609962161691575e-08, "loss": 0.0458, "step": 43969 }, { "epoch": 0.9688916800255609, "grad_norm": 0.5281389951705933, "learning_rate": 7.59919610268528e-08, "loss": 0.0764, "step": 43970 }, { "epoch": 0.9689137153150771, "grad_norm": 0.7894513607025146, "learning_rate": 7.588437645276503e-08, "loss": 0.0898, "step": 43971 }, { "epoch": 0.9689357506045932, "grad_norm": 0.36102643609046936, "learning_rate": 7.577686789520366e-08, "loss": 0.0588, "step": 43972 }, { "epoch": 0.9689577858941094, "grad_norm": 0.3691122233867645, "learning_rate": 7.566943535471161e-08, "loss": 0.0602, "step": 43973 }, { "epoch": 0.9689798211836256, "grad_norm": 0.42143768072128296, "learning_rate": 7.556207883183841e-08, "loss": 0.0405, "step": 43974 }, { "epoch": 0.9690018564731417, "grad_norm": 0.6870593428611755, "learning_rate": 7.545479832713031e-08, "loss": 0.0462, "step": 43975 }, { "epoch": 0.9690238917626579, "grad_norm": 0.9435010552406311, "learning_rate": 7.534759384113522e-08, "loss": 0.0466, "step": 43976 }, { "epoch": 0.9690459270521741, "grad_norm": 0.5822954177856445, "learning_rate": 7.524046537439767e-08, "loss": 0.0644, "step": 43977 }, { "epoch": 0.9690679623416902, "grad_norm": 0.729871392250061, "learning_rate": 7.51334129274639e-08, "loss": 0.0669, "step": 43978 }, { "epoch": 0.9690899976312064, "grad_norm": 0.598471462726593, "learning_rate": 7.502643650087849e-08, "loss": 0.0683, "step": 43979 }, { "epoch": 0.9691120329207226, "grad_norm": 1.0204991102218628, "learning_rate": 7.491953609518764e-08, "loss": 0.0759, "step": 43980 }, { "epoch": 0.9691340682102387, "grad_norm": 0.765484631061554, "learning_rate": 7.48127117109343e-08, "loss": 0.0529, "step": 43981 }, { "epoch": 0.9691561034997549, "grad_norm": 0.4611685872077942, "learning_rate": 7.470596334866297e-08, "loss": 0.0823, "step": 43982 }, { "epoch": 0.9691781387892711, "grad_norm": 0.6046254634857178, "learning_rate": 7.459929100891827e-08, "loss": 0.0526, "step": 43983 }, { "epoch": 0.9692001740787872, "grad_norm": 0.4043257236480713, "learning_rate": 7.449269469224307e-08, "loss": 0.0491, "step": 43984 }, { "epoch": 0.9692222093683034, "grad_norm": 0.603736400604248, "learning_rate": 7.438617439918028e-08, "loss": 0.0585, "step": 43985 }, { "epoch": 0.9692442446578194, "grad_norm": 0.43085891008377075, "learning_rate": 7.427973013027278e-08, "loss": 0.0665, "step": 43986 }, { "epoch": 0.9692662799473356, "grad_norm": 0.30243659019470215, "learning_rate": 7.417336188606183e-08, "loss": 0.0543, "step": 43987 }, { "epoch": 0.9692883152368518, "grad_norm": 0.5208554267883301, "learning_rate": 7.406706966709031e-08, "loss": 0.0544, "step": 43988 }, { "epoch": 0.9693103505263679, "grad_norm": 0.6599829196929932, "learning_rate": 7.39608534738978e-08, "loss": 0.0581, "step": 43989 }, { "epoch": 0.9693323858158841, "grad_norm": 0.4104597568511963, "learning_rate": 7.385471330702886e-08, "loss": 0.0331, "step": 43990 }, { "epoch": 0.9693544211054003, "grad_norm": 0.6529275178909302, "learning_rate": 7.374864916701973e-08, "loss": 0.0741, "step": 43991 }, { "epoch": 0.9693764563949164, "grad_norm": 0.325065553188324, "learning_rate": 7.364266105441498e-08, "loss": 0.0744, "step": 43992 }, { "epoch": 0.9693984916844326, "grad_norm": 0.47006964683532715, "learning_rate": 7.353674896975082e-08, "loss": 0.0436, "step": 43993 }, { "epoch": 0.9694205269739488, "grad_norm": 0.477708637714386, "learning_rate": 7.343091291356851e-08, "loss": 0.0651, "step": 43994 }, { "epoch": 0.9694425622634649, "grad_norm": 0.5928648710250854, "learning_rate": 7.332515288640595e-08, "loss": 0.0809, "step": 43995 }, { "epoch": 0.9694645975529811, "grad_norm": 0.776924192905426, "learning_rate": 7.321946888880437e-08, "loss": 0.0652, "step": 43996 }, { "epoch": 0.9694866328424973, "grad_norm": 0.5306821465492249, "learning_rate": 7.311386092130001e-08, "loss": 0.0645, "step": 43997 }, { "epoch": 0.9695086681320134, "grad_norm": 0.5670568943023682, "learning_rate": 7.30083289844291e-08, "loss": 0.0936, "step": 43998 }, { "epoch": 0.9695307034215296, "grad_norm": 0.5310912728309631, "learning_rate": 7.290287307873456e-08, "loss": 0.0522, "step": 43999 }, { "epoch": 0.9695527387110457, "grad_norm": 0.32567235827445984, "learning_rate": 7.27974932047476e-08, "loss": 0.0654, "step": 44000 }, { "epoch": 0.9695747740005619, "grad_norm": 0.46498674154281616, "learning_rate": 7.269218936300781e-08, "loss": 0.0388, "step": 44001 }, { "epoch": 0.9695968092900781, "grad_norm": 0.464186429977417, "learning_rate": 7.258696155405308e-08, "loss": 0.0529, "step": 44002 }, { "epoch": 0.9696188445795942, "grad_norm": 0.5856293439865112, "learning_rate": 7.248180977841634e-08, "loss": 0.0579, "step": 44003 }, { "epoch": 0.9696408798691104, "grad_norm": 0.542574942111969, "learning_rate": 7.237673403663547e-08, "loss": 0.0453, "step": 44004 }, { "epoch": 0.9696629151586266, "grad_norm": 0.4373714029788971, "learning_rate": 7.227173432924339e-08, "loss": 0.0546, "step": 44005 }, { "epoch": 0.9696849504481427, "grad_norm": 0.5556004047393799, "learning_rate": 7.216681065677799e-08, "loss": 0.0468, "step": 44006 }, { "epoch": 0.9697069857376589, "grad_norm": 0.9137578010559082, "learning_rate": 7.206196301977053e-08, "loss": 0.0621, "step": 44007 }, { "epoch": 0.9697290210271751, "grad_norm": 0.7886691689491272, "learning_rate": 7.195719141875723e-08, "loss": 0.0507, "step": 44008 }, { "epoch": 0.9697510563166912, "grad_norm": 1.0625207424163818, "learning_rate": 7.1852495854271e-08, "loss": 0.0559, "step": 44009 }, { "epoch": 0.9697730916062074, "grad_norm": 1.0707380771636963, "learning_rate": 7.174787632684476e-08, "loss": 0.079, "step": 44010 }, { "epoch": 0.9697951268957234, "grad_norm": 0.6097400784492493, "learning_rate": 7.164333283701308e-08, "loss": 0.0675, "step": 44011 }, { "epoch": 0.9698171621852396, "grad_norm": 0.6174811720848083, "learning_rate": 7.153886538530552e-08, "loss": 0.0599, "step": 44012 }, { "epoch": 0.9698391974747558, "grad_norm": 0.42631369829177856, "learning_rate": 7.1434473972255e-08, "loss": 0.0479, "step": 44013 }, { "epoch": 0.9698612327642719, "grad_norm": 0.3753650486469269, "learning_rate": 7.13301585983961e-08, "loss": 0.0494, "step": 44014 }, { "epoch": 0.9698832680537881, "grad_norm": 0.7421358823776245, "learning_rate": 7.12259192642567e-08, "loss": 0.078, "step": 44015 }, { "epoch": 0.9699053033433043, "grad_norm": 0.5078839063644409, "learning_rate": 7.112175597036974e-08, "loss": 0.0542, "step": 44016 }, { "epoch": 0.9699273386328204, "grad_norm": 0.5132625699043274, "learning_rate": 7.101766871726478e-08, "loss": 0.0824, "step": 44017 }, { "epoch": 0.9699493739223366, "grad_norm": 0.5894996523857117, "learning_rate": 7.091365750547307e-08, "loss": 0.037, "step": 44018 }, { "epoch": 0.9699714092118528, "grad_norm": 0.6514600515365601, "learning_rate": 7.080972233552252e-08, "loss": 0.0661, "step": 44019 }, { "epoch": 0.9699934445013689, "grad_norm": 0.3843649625778198, "learning_rate": 7.070586320794604e-08, "loss": 0.0413, "step": 44020 }, { "epoch": 0.9700154797908851, "grad_norm": 0.6502156853675842, "learning_rate": 7.060208012326819e-08, "loss": 0.0395, "step": 44021 }, { "epoch": 0.9700375150804013, "grad_norm": 1.0795217752456665, "learning_rate": 7.049837308202023e-08, "loss": 0.0638, "step": 44022 }, { "epoch": 0.9700595503699174, "grad_norm": 0.6022152900695801, "learning_rate": 7.039474208473006e-08, "loss": 0.0522, "step": 44023 }, { "epoch": 0.9700815856594336, "grad_norm": 0.5703026652336121, "learning_rate": 7.029118713192561e-08, "loss": 0.055, "step": 44024 }, { "epoch": 0.9701036209489498, "grad_norm": 0.8074268102645874, "learning_rate": 7.018770822413312e-08, "loss": 0.0747, "step": 44025 }, { "epoch": 0.9701256562384659, "grad_norm": 0.719603955745697, "learning_rate": 7.008430536188215e-08, "loss": 0.0611, "step": 44026 }, { "epoch": 0.9701476915279821, "grad_norm": 0.5529537796974182, "learning_rate": 6.998097854569729e-08, "loss": 0.0595, "step": 44027 }, { "epoch": 0.9701697268174982, "grad_norm": 0.7859922647476196, "learning_rate": 6.987772777610312e-08, "loss": 0.0397, "step": 44028 }, { "epoch": 0.9701917621070144, "grad_norm": 0.6363009810447693, "learning_rate": 6.977455305363089e-08, "loss": 0.0539, "step": 44029 }, { "epoch": 0.9702137973965306, "grad_norm": 0.35122841596603394, "learning_rate": 6.967145437880185e-08, "loss": 0.0359, "step": 44030 }, { "epoch": 0.9702358326860467, "grad_norm": 0.3996029198169708, "learning_rate": 6.956843175214389e-08, "loss": 0.033, "step": 44031 }, { "epoch": 0.9702578679755629, "grad_norm": 0.7125418782234192, "learning_rate": 6.946548517417828e-08, "loss": 0.0643, "step": 44032 }, { "epoch": 0.9702799032650791, "grad_norm": 0.7107393741607666, "learning_rate": 6.936261464543126e-08, "loss": 0.0622, "step": 44033 }, { "epoch": 0.9703019385545952, "grad_norm": 0.8590237498283386, "learning_rate": 6.925982016642907e-08, "loss": 0.0723, "step": 44034 }, { "epoch": 0.9703239738441114, "grad_norm": 0.5807195901870728, "learning_rate": 6.91571017376913e-08, "loss": 0.0613, "step": 44035 }, { "epoch": 0.9703460091336275, "grad_norm": 0.5381678342819214, "learning_rate": 6.905445935974253e-08, "loss": 0.0586, "step": 44036 }, { "epoch": 0.9703680444231436, "grad_norm": 0.6559697985649109, "learning_rate": 6.895189303310734e-08, "loss": 0.0652, "step": 44037 }, { "epoch": 0.9703900797126598, "grad_norm": 0.6767536997795105, "learning_rate": 6.884940275830532e-08, "loss": 0.0566, "step": 44038 }, { "epoch": 0.970412115002176, "grad_norm": 0.7176371216773987, "learning_rate": 6.874698853586103e-08, "loss": 0.0523, "step": 44039 }, { "epoch": 0.9704341502916921, "grad_norm": 0.9636743664741516, "learning_rate": 6.864465036629575e-08, "loss": 0.0832, "step": 44040 }, { "epoch": 0.9704561855812083, "grad_norm": 0.7749168276786804, "learning_rate": 6.854238825012904e-08, "loss": 0.0674, "step": 44041 }, { "epoch": 0.9704782208707244, "grad_norm": 0.9756665825843811, "learning_rate": 6.844020218788383e-08, "loss": 0.083, "step": 44042 }, { "epoch": 0.9705002561602406, "grad_norm": 0.414793998003006, "learning_rate": 6.833809218007803e-08, "loss": 0.041, "step": 44043 }, { "epoch": 0.9705222914497568, "grad_norm": 0.7084452509880066, "learning_rate": 6.823605822723455e-08, "loss": 0.0724, "step": 44044 }, { "epoch": 0.9705443267392729, "grad_norm": 0.32476016879081726, "learning_rate": 6.813410032987133e-08, "loss": 0.039, "step": 44045 }, { "epoch": 0.9705663620287891, "grad_norm": 0.42193105816841125, "learning_rate": 6.803221848850794e-08, "loss": 0.0613, "step": 44046 }, { "epoch": 0.9705883973183053, "grad_norm": 0.4649655818939209, "learning_rate": 6.793041270366395e-08, "loss": 0.0572, "step": 44047 }, { "epoch": 0.9706104326078214, "grad_norm": 0.6189481019973755, "learning_rate": 6.782868297585898e-08, "loss": 0.0481, "step": 44048 }, { "epoch": 0.9706324678973376, "grad_norm": 0.4796481430530548, "learning_rate": 6.772702930560759e-08, "loss": 0.0613, "step": 44049 }, { "epoch": 0.9706545031868538, "grad_norm": 0.49935269355773926, "learning_rate": 6.76254516934327e-08, "loss": 0.0712, "step": 44050 }, { "epoch": 0.9706765384763699, "grad_norm": 0.8416607975959778, "learning_rate": 6.752395013984558e-08, "loss": 0.0562, "step": 44051 }, { "epoch": 0.9706985737658861, "grad_norm": 0.579172670841217, "learning_rate": 6.742252464536747e-08, "loss": 0.0815, "step": 44052 }, { "epoch": 0.9707206090554023, "grad_norm": 0.6994837522506714, "learning_rate": 6.732117521051462e-08, "loss": 0.07, "step": 44053 }, { "epoch": 0.9707426443449184, "grad_norm": 0.4075351655483246, "learning_rate": 6.721990183580162e-08, "loss": 0.0518, "step": 44054 }, { "epoch": 0.9707646796344346, "grad_norm": 0.8011091351509094, "learning_rate": 6.71187045217464e-08, "loss": 0.0681, "step": 44055 }, { "epoch": 0.9707867149239507, "grad_norm": 0.45923906564712524, "learning_rate": 6.701758326886187e-08, "loss": 0.0748, "step": 44056 }, { "epoch": 0.9708087502134669, "grad_norm": 0.4682022035121918, "learning_rate": 6.691653807766595e-08, "loss": 0.0561, "step": 44057 }, { "epoch": 0.9708307855029831, "grad_norm": 0.8489364981651306, "learning_rate": 6.681556894866992e-08, "loss": 0.0537, "step": 44058 }, { "epoch": 0.9708528207924992, "grad_norm": 0.6367228031158447, "learning_rate": 6.671467588239166e-08, "loss": 0.0465, "step": 44059 }, { "epoch": 0.9708748560820154, "grad_norm": 0.7487889528274536, "learning_rate": 6.661385887934245e-08, "loss": 0.0455, "step": 44060 }, { "epoch": 0.9708968913715315, "grad_norm": 0.5721076130867004, "learning_rate": 6.651311794003856e-08, "loss": 0.0562, "step": 44061 }, { "epoch": 0.9709189266610476, "grad_norm": 1.619004249572754, "learning_rate": 6.641245306498956e-08, "loss": 0.0566, "step": 44062 }, { "epoch": 0.9709409619505638, "grad_norm": 0.5239325761795044, "learning_rate": 6.631186425471003e-08, "loss": 0.0529, "step": 44063 }, { "epoch": 0.97096299724008, "grad_norm": 0.5554527640342712, "learning_rate": 6.621135150971291e-08, "loss": 0.0804, "step": 44064 }, { "epoch": 0.9709850325295961, "grad_norm": 0.3232521116733551, "learning_rate": 6.611091483050946e-08, "loss": 0.068, "step": 44065 }, { "epoch": 0.9710070678191123, "grad_norm": 0.5751768350601196, "learning_rate": 6.601055421761093e-08, "loss": 0.0433, "step": 44066 }, { "epoch": 0.9710291031086284, "grad_norm": 0.618806004524231, "learning_rate": 6.591026967152858e-08, "loss": 0.0471, "step": 44067 }, { "epoch": 0.9710511383981446, "grad_norm": 0.7004998326301575, "learning_rate": 6.581006119277366e-08, "loss": 0.0763, "step": 44068 }, { "epoch": 0.9710731736876608, "grad_norm": 0.7156381607055664, "learning_rate": 6.570992878185744e-08, "loss": 0.0589, "step": 44069 }, { "epoch": 0.9710952089771769, "grad_norm": 0.5703045129776001, "learning_rate": 6.560987243928785e-08, "loss": 0.0692, "step": 44070 }, { "epoch": 0.9711172442666931, "grad_norm": 0.6089501976966858, "learning_rate": 6.550989216557612e-08, "loss": 0.0712, "step": 44071 }, { "epoch": 0.9711392795562093, "grad_norm": 0.5965805053710938, "learning_rate": 6.540998796123188e-08, "loss": 0.034, "step": 44072 }, { "epoch": 0.9711613148457254, "grad_norm": 0.45237797498703003, "learning_rate": 6.531015982676136e-08, "loss": 0.0825, "step": 44073 }, { "epoch": 0.9711833501352416, "grad_norm": 0.7372072339057922, "learning_rate": 6.521040776267584e-08, "loss": 0.0553, "step": 44074 }, { "epoch": 0.9712053854247578, "grad_norm": 0.6018056273460388, "learning_rate": 6.511073176948157e-08, "loss": 0.0432, "step": 44075 }, { "epoch": 0.9712274207142739, "grad_norm": 0.32566124200820923, "learning_rate": 6.501113184768814e-08, "loss": 0.0404, "step": 44076 }, { "epoch": 0.9712494560037901, "grad_norm": 0.42228564620018005, "learning_rate": 6.491160799780183e-08, "loss": 0.0762, "step": 44077 }, { "epoch": 0.9712714912933063, "grad_norm": 0.5703142881393433, "learning_rate": 6.481216022032887e-08, "loss": 0.0474, "step": 44078 }, { "epoch": 0.9712935265828224, "grad_norm": 0.6703436374664307, "learning_rate": 6.471278851577723e-08, "loss": 0.072, "step": 44079 }, { "epoch": 0.9713155618723386, "grad_norm": 0.7914642095565796, "learning_rate": 6.461349288465146e-08, "loss": 0.0568, "step": 44080 }, { "epoch": 0.9713375971618547, "grad_norm": 0.7764705419540405, "learning_rate": 6.451427332745786e-08, "loss": 0.0717, "step": 44081 }, { "epoch": 0.9713596324513709, "grad_norm": 0.49260517954826355, "learning_rate": 6.441512984470266e-08, "loss": 0.054, "step": 44082 }, { "epoch": 0.9713816677408871, "grad_norm": 0.5137966871261597, "learning_rate": 6.431606243689048e-08, "loss": 0.0819, "step": 44083 }, { "epoch": 0.9714037030304032, "grad_norm": 0.6169172525405884, "learning_rate": 6.421707110452591e-08, "loss": 0.0557, "step": 44084 }, { "epoch": 0.9714257383199193, "grad_norm": 0.8388156890869141, "learning_rate": 6.411815584811187e-08, "loss": 0.0659, "step": 44085 }, { "epoch": 0.9714477736094355, "grad_norm": 0.3730807602405548, "learning_rate": 6.401931666815463e-08, "loss": 0.0497, "step": 44086 }, { "epoch": 0.9714698088989516, "grad_norm": 0.7009467482566833, "learning_rate": 6.392055356515547e-08, "loss": 0.0658, "step": 44087 }, { "epoch": 0.9714918441884678, "grad_norm": 0.6262678503990173, "learning_rate": 6.382186653961897e-08, "loss": 0.0773, "step": 44088 }, { "epoch": 0.971513879477984, "grad_norm": 0.3704719841480255, "learning_rate": 6.372325559204639e-08, "loss": 0.0505, "step": 44089 }, { "epoch": 0.9715359147675001, "grad_norm": 0.41013839840888977, "learning_rate": 6.362472072294068e-08, "loss": 0.072, "step": 44090 }, { "epoch": 0.9715579500570163, "grad_norm": 0.44278576970100403, "learning_rate": 6.352626193280475e-08, "loss": 0.0619, "step": 44091 }, { "epoch": 0.9715799853465324, "grad_norm": 0.424316942691803, "learning_rate": 6.342787922213822e-08, "loss": 0.0505, "step": 44092 }, { "epoch": 0.9716020206360486, "grad_norm": 0.4918075203895569, "learning_rate": 6.3329572591444e-08, "loss": 0.0443, "step": 44093 }, { "epoch": 0.9716240559255648, "grad_norm": 0.34187787771224976, "learning_rate": 6.323134204122172e-08, "loss": 0.0528, "step": 44094 }, { "epoch": 0.9716460912150809, "grad_norm": 0.5573070645332336, "learning_rate": 6.313318757197095e-08, "loss": 0.0629, "step": 44095 }, { "epoch": 0.9716681265045971, "grad_norm": 0.3643419146537781, "learning_rate": 6.303510918419297e-08, "loss": 0.0603, "step": 44096 }, { "epoch": 0.9716901617941133, "grad_norm": 0.6206493377685547, "learning_rate": 6.293710687838738e-08, "loss": 0.0728, "step": 44097 }, { "epoch": 0.9717121970836294, "grad_norm": 0.25655117630958557, "learning_rate": 6.283918065505378e-08, "loss": 0.0566, "step": 44098 }, { "epoch": 0.9717342323731456, "grad_norm": 0.42714494466781616, "learning_rate": 6.27413305146901e-08, "loss": 0.0375, "step": 44099 }, { "epoch": 0.9717562676626618, "grad_norm": 0.7543814778327942, "learning_rate": 6.264355645779429e-08, "loss": 0.0744, "step": 44100 }, { "epoch": 0.9717783029521779, "grad_norm": 0.7117205262184143, "learning_rate": 6.254585848486427e-08, "loss": 0.0409, "step": 44101 }, { "epoch": 0.9718003382416941, "grad_norm": 0.40743017196655273, "learning_rate": 6.244823659639798e-08, "loss": 0.0524, "step": 44102 }, { "epoch": 0.9718223735312103, "grad_norm": 0.4232633411884308, "learning_rate": 6.235069079289335e-08, "loss": 0.0406, "step": 44103 }, { "epoch": 0.9718444088207264, "grad_norm": 0.7606911659240723, "learning_rate": 6.225322107484832e-08, "loss": 0.0488, "step": 44104 }, { "epoch": 0.9718664441102426, "grad_norm": 0.7102726101875305, "learning_rate": 6.21558274427575e-08, "loss": 0.0598, "step": 44105 }, { "epoch": 0.9718884793997588, "grad_norm": 0.48282960057258606, "learning_rate": 6.205850989711548e-08, "loss": 0.0464, "step": 44106 }, { "epoch": 0.9719105146892749, "grad_norm": 0.5294744968414307, "learning_rate": 6.196126843842187e-08, "loss": 0.0785, "step": 44107 }, { "epoch": 0.9719325499787911, "grad_norm": 0.7184324860572815, "learning_rate": 6.186410306716795e-08, "loss": 0.0441, "step": 44108 }, { "epoch": 0.9719545852683072, "grad_norm": 0.41297003626823425, "learning_rate": 6.176701378385163e-08, "loss": 0.0575, "step": 44109 }, { "epoch": 0.9719766205578233, "grad_norm": 0.4418140947818756, "learning_rate": 6.167000058896755e-08, "loss": 0.0641, "step": 44110 }, { "epoch": 0.9719986558473395, "grad_norm": 1.0362926721572876, "learning_rate": 6.157306348300695e-08, "loss": 0.0637, "step": 44111 }, { "epoch": 0.9720206911368556, "grad_norm": 0.7561558485031128, "learning_rate": 6.147620246646612e-08, "loss": 0.0455, "step": 44112 }, { "epoch": 0.9720427264263718, "grad_norm": 0.6970177292823792, "learning_rate": 6.1379417539838e-08, "loss": 0.0674, "step": 44113 }, { "epoch": 0.972064761715888, "grad_norm": 0.42038601636886597, "learning_rate": 6.128270870361552e-08, "loss": 0.0631, "step": 44114 }, { "epoch": 0.9720867970054041, "grad_norm": 0.2367781400680542, "learning_rate": 6.118607595828995e-08, "loss": 0.064, "step": 44115 }, { "epoch": 0.9721088322949203, "grad_norm": 0.5066543817520142, "learning_rate": 6.108951930435425e-08, "loss": 0.065, "step": 44116 }, { "epoch": 0.9721308675844365, "grad_norm": 0.40143826603889465, "learning_rate": 6.099303874230133e-08, "loss": 0.0447, "step": 44117 }, { "epoch": 0.9721529028739526, "grad_norm": 0.6518502235412598, "learning_rate": 6.089663427262249e-08, "loss": 0.0975, "step": 44118 }, { "epoch": 0.9721749381634688, "grad_norm": 0.5270894765853882, "learning_rate": 6.080030589580732e-08, "loss": 0.0554, "step": 44119 }, { "epoch": 0.972196973452985, "grad_norm": 0.694082498550415, "learning_rate": 6.07040536123471e-08, "loss": 0.0826, "step": 44120 }, { "epoch": 0.9722190087425011, "grad_norm": 0.5176955461502075, "learning_rate": 6.060787742273143e-08, "loss": 0.0655, "step": 44121 }, { "epoch": 0.9722410440320173, "grad_norm": 0.6631248593330383, "learning_rate": 6.051177732745328e-08, "loss": 0.0416, "step": 44122 }, { "epoch": 0.9722630793215334, "grad_norm": 0.32042017579078674, "learning_rate": 6.041575332699722e-08, "loss": 0.0468, "step": 44123 }, { "epoch": 0.9722851146110496, "grad_norm": 0.6865326762199402, "learning_rate": 6.031980542185622e-08, "loss": 0.0431, "step": 44124 }, { "epoch": 0.9723071499005658, "grad_norm": 0.5857694745063782, "learning_rate": 6.022393361251821e-08, "loss": 0.0688, "step": 44125 }, { "epoch": 0.9723291851900819, "grad_norm": 0.6970723867416382, "learning_rate": 6.012813789946948e-08, "loss": 0.0397, "step": 44126 }, { "epoch": 0.9723512204795981, "grad_norm": 0.5970755219459534, "learning_rate": 6.003241828320127e-08, "loss": 0.0438, "step": 44127 }, { "epoch": 0.9723732557691143, "grad_norm": 0.3774442970752716, "learning_rate": 5.993677476419824e-08, "loss": 0.0569, "step": 44128 }, { "epoch": 0.9723952910586304, "grad_norm": 0.42145001888275146, "learning_rate": 5.984120734294995e-08, "loss": 0.0723, "step": 44129 }, { "epoch": 0.9724173263481466, "grad_norm": 0.3336315155029297, "learning_rate": 5.974571601994105e-08, "loss": 0.0673, "step": 44130 }, { "epoch": 0.9724393616376628, "grad_norm": 0.5788185000419617, "learning_rate": 5.965030079565947e-08, "loss": 0.0547, "step": 44131 }, { "epoch": 0.9724613969271789, "grad_norm": 0.46344757080078125, "learning_rate": 5.955496167058983e-08, "loss": 0.0546, "step": 44132 }, { "epoch": 0.9724834322166951, "grad_norm": 0.6611850261688232, "learning_rate": 5.945969864521839e-08, "loss": 0.0512, "step": 44133 }, { "epoch": 0.9725054675062113, "grad_norm": 0.6836349964141846, "learning_rate": 5.936451172002977e-08, "loss": 0.0555, "step": 44134 }, { "epoch": 0.9725275027957273, "grad_norm": 0.19102546572685242, "learning_rate": 5.926940089551025e-08, "loss": 0.0874, "step": 44135 }, { "epoch": 0.9725495380852435, "grad_norm": 0.5202997922897339, "learning_rate": 5.9174366172144447e-08, "loss": 0.0473, "step": 44136 }, { "epoch": 0.9725715733747596, "grad_norm": 0.6756501197814941, "learning_rate": 5.90794075504153e-08, "loss": 0.0549, "step": 44137 }, { "epoch": 0.9725936086642758, "grad_norm": 0.6712403297424316, "learning_rate": 5.898452503080576e-08, "loss": 0.0601, "step": 44138 }, { "epoch": 0.972615643953792, "grad_norm": 0.4624677896499634, "learning_rate": 5.8889718613800436e-08, "loss": 0.0413, "step": 44139 }, { "epoch": 0.9726376792433081, "grad_norm": 0.5006142854690552, "learning_rate": 5.8794988299880614e-08, "loss": 0.0472, "step": 44140 }, { "epoch": 0.9726597145328243, "grad_norm": 0.8637269735336304, "learning_rate": 5.870033408953257e-08, "loss": 0.069, "step": 44141 }, { "epoch": 0.9726817498223405, "grad_norm": 0.42753005027770996, "learning_rate": 5.860575598323426e-08, "loss": 0.0549, "step": 44142 }, { "epoch": 0.9727037851118566, "grad_norm": 0.4038030207157135, "learning_rate": 5.8511253981468615e-08, "loss": 0.0691, "step": 44143 }, { "epoch": 0.9727258204013728, "grad_norm": 0.7313804626464844, "learning_rate": 5.84168280847186e-08, "loss": 0.0442, "step": 44144 }, { "epoch": 0.972747855690889, "grad_norm": 0.4100455045700073, "learning_rate": 5.832247829346382e-08, "loss": 0.0428, "step": 44145 }, { "epoch": 0.9727698909804051, "grad_norm": 0.6247594952583313, "learning_rate": 5.822820460818557e-08, "loss": 0.0576, "step": 44146 }, { "epoch": 0.9727919262699213, "grad_norm": 0.4582178592681885, "learning_rate": 5.813400702936178e-08, "loss": 0.0686, "step": 44147 }, { "epoch": 0.9728139615594374, "grad_norm": 0.6063542366027832, "learning_rate": 5.803988555747541e-08, "loss": 0.048, "step": 44148 }, { "epoch": 0.9728359968489536, "grad_norm": 0.6326886415481567, "learning_rate": 5.7945840193002754e-08, "loss": 0.0823, "step": 44149 }, { "epoch": 0.9728580321384698, "grad_norm": 0.684634268283844, "learning_rate": 5.7851870936426746e-08, "loss": 0.0532, "step": 44150 }, { "epoch": 0.9728800674279859, "grad_norm": 0.9613879919052124, "learning_rate": 5.7757977788222e-08, "loss": 0.1026, "step": 44151 }, { "epoch": 0.9729021027175021, "grad_norm": 0.6631494164466858, "learning_rate": 5.766416074886982e-08, "loss": 0.0598, "step": 44152 }, { "epoch": 0.9729241380070183, "grad_norm": 0.4653136134147644, "learning_rate": 5.757041981884647e-08, "loss": 0.0406, "step": 44153 }, { "epoch": 0.9729461732965344, "grad_norm": 1.1257797479629517, "learning_rate": 5.7476754998628254e-08, "loss": 0.087, "step": 44154 }, { "epoch": 0.9729682085860506, "grad_norm": 0.5176656246185303, "learning_rate": 5.738316628869478e-08, "loss": 0.0943, "step": 44155 }, { "epoch": 0.9729902438755668, "grad_norm": 0.2333773970603943, "learning_rate": 5.7289653689522324e-08, "loss": 0.0197, "step": 44156 }, { "epoch": 0.9730122791650829, "grad_norm": 0.6981124877929688, "learning_rate": 5.719621720158552e-08, "loss": 0.0656, "step": 44157 }, { "epoch": 0.9730343144545991, "grad_norm": 0.5399723649024963, "learning_rate": 5.7102856825362316e-08, "loss": 0.043, "step": 44158 }, { "epoch": 0.9730563497441151, "grad_norm": 0.6656662821769714, "learning_rate": 5.7009572561327326e-08, "loss": 0.0561, "step": 44159 }, { "epoch": 0.9730783850336313, "grad_norm": 0.6882217526435852, "learning_rate": 5.691636440995518e-08, "loss": 0.0676, "step": 44160 }, { "epoch": 0.9731004203231475, "grad_norm": 0.6238945722579956, "learning_rate": 5.682323237172049e-08, "loss": 0.068, "step": 44161 }, { "epoch": 0.9731224556126636, "grad_norm": 0.5670387148857117, "learning_rate": 5.673017644709955e-08, "loss": 0.0777, "step": 44162 }, { "epoch": 0.9731444909021798, "grad_norm": 0.38484716415405273, "learning_rate": 5.663719663656364e-08, "loss": 0.0558, "step": 44163 }, { "epoch": 0.973166526191696, "grad_norm": 0.4317936599254608, "learning_rate": 5.654429294058905e-08, "loss": 0.0485, "step": 44164 }, { "epoch": 0.9731885614812121, "grad_norm": 0.8107624650001526, "learning_rate": 5.645146535964707e-08, "loss": 0.1023, "step": 44165 }, { "epoch": 0.9732105967707283, "grad_norm": 0.5257708430290222, "learning_rate": 5.635871389421066e-08, "loss": 0.0398, "step": 44166 }, { "epoch": 0.9732326320602445, "grad_norm": 0.38836339116096497, "learning_rate": 5.6266038544752764e-08, "loss": 0.0466, "step": 44167 }, { "epoch": 0.9732546673497606, "grad_norm": 0.44307318329811096, "learning_rate": 5.617343931174634e-08, "loss": 0.0735, "step": 44168 }, { "epoch": 0.9732767026392768, "grad_norm": 0.3539164364337921, "learning_rate": 5.608091619566269e-08, "loss": 0.0322, "step": 44169 }, { "epoch": 0.973298737928793, "grad_norm": 0.62302166223526, "learning_rate": 5.598846919696976e-08, "loss": 0.0412, "step": 44170 }, { "epoch": 0.9733207732183091, "grad_norm": 0.3900655210018158, "learning_rate": 5.589609831614384e-08, "loss": 0.0816, "step": 44171 }, { "epoch": 0.9733428085078253, "grad_norm": 0.8216397166252136, "learning_rate": 5.580380355365122e-08, "loss": 0.0602, "step": 44172 }, { "epoch": 0.9733648437973415, "grad_norm": 0.27986499667167664, "learning_rate": 5.5711584909964864e-08, "loss": 0.0428, "step": 44173 }, { "epoch": 0.9733868790868576, "grad_norm": 0.3482133448123932, "learning_rate": 5.561944238555272e-08, "loss": 0.0357, "step": 44174 }, { "epoch": 0.9734089143763738, "grad_norm": 0.26826125383377075, "learning_rate": 5.5527375980882756e-08, "loss": 0.0512, "step": 44175 }, { "epoch": 0.9734309496658899, "grad_norm": 0.6888330578804016, "learning_rate": 5.543538569642792e-08, "loss": 0.045, "step": 44176 }, { "epoch": 0.9734529849554061, "grad_norm": 0.7252991199493408, "learning_rate": 5.5343471532652847e-08, "loss": 0.0411, "step": 44177 }, { "epoch": 0.9734750202449223, "grad_norm": 0.42587989568710327, "learning_rate": 5.525163349002882e-08, "loss": 0.057, "step": 44178 }, { "epoch": 0.9734970555344384, "grad_norm": 0.28804001212120056, "learning_rate": 5.515987156902214e-08, "loss": 0.0597, "step": 44179 }, { "epoch": 0.9735190908239546, "grad_norm": 0.7141028642654419, "learning_rate": 5.506818577010075e-08, "loss": 0.0523, "step": 44180 }, { "epoch": 0.9735411261134708, "grad_norm": 0.6217389106750488, "learning_rate": 5.4976576093729305e-08, "loss": 0.0352, "step": 44181 }, { "epoch": 0.9735631614029869, "grad_norm": 0.5177487730979919, "learning_rate": 5.488504254037907e-08, "loss": 0.0884, "step": 44182 }, { "epoch": 0.9735851966925031, "grad_norm": 0.5471062660217285, "learning_rate": 5.479358511051136e-08, "loss": 0.0699, "step": 44183 }, { "epoch": 0.9736072319820192, "grad_norm": 0.5506877899169922, "learning_rate": 5.470220380459412e-08, "loss": 0.0513, "step": 44184 }, { "epoch": 0.9736292672715353, "grad_norm": 0.40980568528175354, "learning_rate": 5.4610898623093654e-08, "loss": 0.0434, "step": 44185 }, { "epoch": 0.9736513025610515, "grad_norm": 0.6782172918319702, "learning_rate": 5.451966956647458e-08, "loss": 0.0624, "step": 44186 }, { "epoch": 0.9736733378505676, "grad_norm": 0.4235711693763733, "learning_rate": 5.442851663520154e-08, "loss": 0.0619, "step": 44187 }, { "epoch": 0.9736953731400838, "grad_norm": 0.6540285348892212, "learning_rate": 5.433743982973749e-08, "loss": 0.0453, "step": 44188 }, { "epoch": 0.9737174084296, "grad_norm": 0.8552116751670837, "learning_rate": 5.4246439150547056e-08, "loss": 0.08, "step": 44189 }, { "epoch": 0.9737394437191161, "grad_norm": 0.22125068306922913, "learning_rate": 5.415551459809487e-08, "loss": 0.0509, "step": 44190 }, { "epoch": 0.9737614790086323, "grad_norm": 0.6436063647270203, "learning_rate": 5.406466617284222e-08, "loss": 0.0667, "step": 44191 }, { "epoch": 0.9737835142981485, "grad_norm": 0.7883407473564148, "learning_rate": 5.3973893875253755e-08, "loss": 0.0811, "step": 44192 }, { "epoch": 0.9738055495876646, "grad_norm": 0.7749539017677307, "learning_rate": 5.388319770579075e-08, "loss": 0.0741, "step": 44193 }, { "epoch": 0.9738275848771808, "grad_norm": 0.8883065581321716, "learning_rate": 5.379257766491619e-08, "loss": 0.0584, "step": 44194 }, { "epoch": 0.973849620166697, "grad_norm": 0.45925167202949524, "learning_rate": 5.370203375308969e-08, "loss": 0.04, "step": 44195 }, { "epoch": 0.9738716554562131, "grad_norm": 0.5108252167701721, "learning_rate": 5.361156597077421e-08, "loss": 0.0601, "step": 44196 }, { "epoch": 0.9738936907457293, "grad_norm": 0.5339645743370056, "learning_rate": 5.35211743184294e-08, "loss": 0.0561, "step": 44197 }, { "epoch": 0.9739157260352455, "grad_norm": 0.744791567325592, "learning_rate": 5.343085879651655e-08, "loss": 0.0541, "step": 44198 }, { "epoch": 0.9739377613247616, "grad_norm": 0.795741617679596, "learning_rate": 5.334061940549528e-08, "loss": 0.0544, "step": 44199 }, { "epoch": 0.9739597966142778, "grad_norm": 0.6383783221244812, "learning_rate": 5.325045614582524e-08, "loss": 0.0608, "step": 44200 }, { "epoch": 0.973981831903794, "grad_norm": 0.6157715916633606, "learning_rate": 5.316036901796606e-08, "loss": 0.0526, "step": 44201 }, { "epoch": 0.9740038671933101, "grad_norm": 0.502806544303894, "learning_rate": 5.307035802237403e-08, "loss": 0.069, "step": 44202 }, { "epoch": 0.9740259024828263, "grad_norm": 0.6848402619361877, "learning_rate": 5.298042315951213e-08, "loss": 0.0612, "step": 44203 }, { "epoch": 0.9740479377723424, "grad_norm": 0.887516975402832, "learning_rate": 5.2890564429834976e-08, "loss": 0.0868, "step": 44204 }, { "epoch": 0.9740699730618586, "grad_norm": 0.6829060912132263, "learning_rate": 5.280078183380388e-08, "loss": 0.076, "step": 44205 }, { "epoch": 0.9740920083513748, "grad_norm": 0.6436133980751038, "learning_rate": 5.271107537187181e-08, "loss": 0.0658, "step": 44206 }, { "epoch": 0.9741140436408909, "grad_norm": 0.7936487793922424, "learning_rate": 5.26214450444984e-08, "loss": 0.064, "step": 44207 }, { "epoch": 0.9741360789304071, "grad_norm": 0.7856183052062988, "learning_rate": 5.2531890852138274e-08, "loss": 0.0655, "step": 44208 }, { "epoch": 0.9741581142199232, "grad_norm": 0.9184677004814148, "learning_rate": 5.2442412795249416e-08, "loss": 0.0793, "step": 44209 }, { "epoch": 0.9741801495094393, "grad_norm": 0.6699188351631165, "learning_rate": 5.235301087428645e-08, "loss": 0.0455, "step": 44210 }, { "epoch": 0.9742021847989555, "grad_norm": 0.7360812425613403, "learning_rate": 5.226368508970569e-08, "loss": 0.0366, "step": 44211 }, { "epoch": 0.9742242200884716, "grad_norm": 0.6599453091621399, "learning_rate": 5.21744354419601e-08, "loss": 0.0719, "step": 44212 }, { "epoch": 0.9742462553779878, "grad_norm": 0.36141687631607056, "learning_rate": 5.208526193150764e-08, "loss": 0.0323, "step": 44213 }, { "epoch": 0.974268290667504, "grad_norm": 0.9396336674690247, "learning_rate": 5.199616455879963e-08, "loss": 0.0705, "step": 44214 }, { "epoch": 0.9742903259570201, "grad_norm": 0.6052732467651367, "learning_rate": 5.19071433242907e-08, "loss": 0.0605, "step": 44215 }, { "epoch": 0.9743123612465363, "grad_norm": 0.9746801257133484, "learning_rate": 5.181819822843547e-08, "loss": 0.0652, "step": 44216 }, { "epoch": 0.9743343965360525, "grad_norm": 0.661151111125946, "learning_rate": 5.172932927168528e-08, "loss": 0.0477, "step": 44217 }, { "epoch": 0.9743564318255686, "grad_norm": 0.5834764242172241, "learning_rate": 5.1640536454494735e-08, "loss": 0.0601, "step": 44218 }, { "epoch": 0.9743784671150848, "grad_norm": 0.6772932410240173, "learning_rate": 5.155181977731349e-08, "loss": 0.0908, "step": 44219 }, { "epoch": 0.974400502404601, "grad_norm": 0.7286058068275452, "learning_rate": 5.146317924059452e-08, "loss": 0.0734, "step": 44220 }, { "epoch": 0.9744225376941171, "grad_norm": 0.2837086617946625, "learning_rate": 5.1374614844790776e-08, "loss": 0.0495, "step": 44221 }, { "epoch": 0.9744445729836333, "grad_norm": 1.079702615737915, "learning_rate": 5.128612659035192e-08, "loss": 0.0773, "step": 44222 }, { "epoch": 0.9744666082731495, "grad_norm": 0.5390703082084656, "learning_rate": 5.119771447772925e-08, "loss": 0.0429, "step": 44223 }, { "epoch": 0.9744886435626656, "grad_norm": 0.5466018915176392, "learning_rate": 5.1109378507372404e-08, "loss": 0.0677, "step": 44224 }, { "epoch": 0.9745106788521818, "grad_norm": 0.4879586398601532, "learning_rate": 5.1021118679731025e-08, "loss": 0.0592, "step": 44225 }, { "epoch": 0.974532714141698, "grad_norm": 0.3710026443004608, "learning_rate": 5.0932934995256416e-08, "loss": 0.0421, "step": 44226 }, { "epoch": 0.9745547494312141, "grad_norm": 0.4546979069709778, "learning_rate": 5.084482745439656e-08, "loss": 0.0575, "step": 44227 }, { "epoch": 0.9745767847207303, "grad_norm": 0.7128874659538269, "learning_rate": 5.075679605759942e-08, "loss": 0.051, "step": 44228 }, { "epoch": 0.9745988200102464, "grad_norm": 0.3115275502204895, "learning_rate": 5.066884080531631e-08, "loss": 0.0794, "step": 44229 }, { "epoch": 0.9746208552997626, "grad_norm": 0.7976033687591553, "learning_rate": 5.058096169799187e-08, "loss": 0.0662, "step": 44230 }, { "epoch": 0.9746428905892788, "grad_norm": 0.6072923541069031, "learning_rate": 5.049315873607574e-08, "loss": 0.0569, "step": 44231 }, { "epoch": 0.9746649258787949, "grad_norm": 0.5987225770950317, "learning_rate": 5.040543192001257e-08, "loss": 0.0705, "step": 44232 }, { "epoch": 0.9746869611683111, "grad_norm": 0.8115957975387573, "learning_rate": 5.031778125025366e-08, "loss": 0.0625, "step": 44233 }, { "epoch": 0.9747089964578272, "grad_norm": 0.613296389579773, "learning_rate": 5.023020672724199e-08, "loss": 0.0645, "step": 44234 }, { "epoch": 0.9747310317473433, "grad_norm": 0.398902028799057, "learning_rate": 5.014270835142387e-08, "loss": 0.048, "step": 44235 }, { "epoch": 0.9747530670368595, "grad_norm": 0.6812797784805298, "learning_rate": 5.0055286123245616e-08, "loss": 0.0393, "step": 44236 }, { "epoch": 0.9747751023263757, "grad_norm": 0.1033753827214241, "learning_rate": 4.9967940043153525e-08, "loss": 0.0325, "step": 44237 }, { "epoch": 0.9747971376158918, "grad_norm": 0.6651204228401184, "learning_rate": 4.988067011159225e-08, "loss": 0.0435, "step": 44238 }, { "epoch": 0.974819172905408, "grad_norm": 0.8831846117973328, "learning_rate": 4.979347632900477e-08, "loss": 0.0681, "step": 44239 }, { "epoch": 0.9748412081949241, "grad_norm": 0.7152920961380005, "learning_rate": 4.970635869583573e-08, "loss": 0.0568, "step": 44240 }, { "epoch": 0.9748632434844403, "grad_norm": 0.6250630021095276, "learning_rate": 4.9619317212529766e-08, "loss": 0.0521, "step": 44241 }, { "epoch": 0.9748852787739565, "grad_norm": 0.5986153483390808, "learning_rate": 4.953235187952987e-08, "loss": 0.0647, "step": 44242 }, { "epoch": 0.9749073140634726, "grad_norm": 0.5396701097488403, "learning_rate": 4.944546269727734e-08, "loss": 0.0415, "step": 44243 }, { "epoch": 0.9749293493529888, "grad_norm": 0.5439321994781494, "learning_rate": 4.935864966621684e-08, "loss": 0.0502, "step": 44244 }, { "epoch": 0.974951384642505, "grad_norm": 0.6236768960952759, "learning_rate": 4.9271912786789666e-08, "loss": 0.0454, "step": 44245 }, { "epoch": 0.9749734199320211, "grad_norm": 0.5342437028884888, "learning_rate": 4.9185252059438804e-08, "loss": 0.0702, "step": 44246 }, { "epoch": 0.9749954552215373, "grad_norm": 0.5349184274673462, "learning_rate": 4.90986674846039e-08, "loss": 0.0821, "step": 44247 }, { "epoch": 0.9750174905110535, "grad_norm": 0.8778205513954163, "learning_rate": 4.9012159062727935e-08, "loss": 0.0666, "step": 44248 }, { "epoch": 0.9750395258005696, "grad_norm": 0.43104544281959534, "learning_rate": 4.8925726794248894e-08, "loss": 0.052, "step": 44249 }, { "epoch": 0.9750615610900858, "grad_norm": 0.4339170455932617, "learning_rate": 4.883937067960975e-08, "loss": 0.0468, "step": 44250 }, { "epoch": 0.975083596379602, "grad_norm": 0.6232615113258362, "learning_rate": 4.8753090719248494e-08, "loss": 0.0503, "step": 44251 }, { "epoch": 0.9751056316691181, "grad_norm": 0.5273544192314148, "learning_rate": 4.866688691360477e-08, "loss": 0.0518, "step": 44252 }, { "epoch": 0.9751276669586343, "grad_norm": 0.7386264204978943, "learning_rate": 4.858075926311989e-08, "loss": 0.0756, "step": 44253 }, { "epoch": 0.9751497022481505, "grad_norm": 0.5777132511138916, "learning_rate": 4.849470776822851e-08, "loss": 0.0393, "step": 44254 }, { "epoch": 0.9751717375376666, "grad_norm": 0.5027744770050049, "learning_rate": 4.840873242937194e-08, "loss": 0.0633, "step": 44255 }, { "epoch": 0.9751937728271828, "grad_norm": 0.7602314352989197, "learning_rate": 4.832283324698816e-08, "loss": 0.0533, "step": 44256 }, { "epoch": 0.975215808116699, "grad_norm": 0.4696696102619171, "learning_rate": 4.82370102215135e-08, "loss": 0.0875, "step": 44257 }, { "epoch": 0.975237843406215, "grad_norm": 0.6912431716918945, "learning_rate": 4.815126335338593e-08, "loss": 0.0786, "step": 44258 }, { "epoch": 0.9752598786957312, "grad_norm": 0.44914379715919495, "learning_rate": 4.8065592643041775e-08, "loss": 0.0574, "step": 44259 }, { "epoch": 0.9752819139852473, "grad_norm": 0.6888296604156494, "learning_rate": 4.7979998090917356e-08, "loss": 0.0557, "step": 44260 }, { "epoch": 0.9753039492747635, "grad_norm": 0.14331598579883575, "learning_rate": 4.7894479697447315e-08, "loss": 0.0758, "step": 44261 }, { "epoch": 0.9753259845642797, "grad_norm": 0.4603363871574402, "learning_rate": 4.780903746306964e-08, "loss": 0.0664, "step": 44262 }, { "epoch": 0.9753480198537958, "grad_norm": 0.798376202583313, "learning_rate": 4.772367138821898e-08, "loss": 0.0855, "step": 44263 }, { "epoch": 0.975370055143312, "grad_norm": 0.6857113838195801, "learning_rate": 4.763838147333e-08, "loss": 0.0637, "step": 44264 }, { "epoch": 0.9753920904328282, "grad_norm": 0.5009844899177551, "learning_rate": 4.755316771883567e-08, "loss": 0.0474, "step": 44265 }, { "epoch": 0.9754141257223443, "grad_norm": 0.461734801530838, "learning_rate": 4.7468030125170646e-08, "loss": 0.0486, "step": 44266 }, { "epoch": 0.9754361610118605, "grad_norm": 0.22941066324710846, "learning_rate": 4.738296869277126e-08, "loss": 0.0419, "step": 44267 }, { "epoch": 0.9754581963013766, "grad_norm": 0.32737478613853455, "learning_rate": 4.729798342206715e-08, "loss": 0.0488, "step": 44268 }, { "epoch": 0.9754802315908928, "grad_norm": 0.33558496832847595, "learning_rate": 4.721307431349298e-08, "loss": 0.0415, "step": 44269 }, { "epoch": 0.975502266880409, "grad_norm": 0.27587902545928955, "learning_rate": 4.712824136748173e-08, "loss": 0.0499, "step": 44270 }, { "epoch": 0.9755243021699251, "grad_norm": 0.6692456603050232, "learning_rate": 4.704348458446306e-08, "loss": 0.0544, "step": 44271 }, { "epoch": 0.9755463374594413, "grad_norm": 0.7198635339736938, "learning_rate": 4.695880396487162e-08, "loss": 0.0422, "step": 44272 }, { "epoch": 0.9755683727489575, "grad_norm": 0.5616822242736816, "learning_rate": 4.687419950913707e-08, "loss": 0.0756, "step": 44273 }, { "epoch": 0.9755904080384736, "grad_norm": 1.0743902921676636, "learning_rate": 4.678967121769073e-08, "loss": 0.0696, "step": 44274 }, { "epoch": 0.9756124433279898, "grad_norm": 0.4098697006702423, "learning_rate": 4.670521909096392e-08, "loss": 0.0557, "step": 44275 }, { "epoch": 0.975634478617506, "grad_norm": 0.6275115013122559, "learning_rate": 4.662084312938464e-08, "loss": 0.0816, "step": 44276 }, { "epoch": 0.9756565139070221, "grad_norm": 0.37626057863235474, "learning_rate": 4.653654333338586e-08, "loss": 0.0707, "step": 44277 }, { "epoch": 0.9756785491965383, "grad_norm": 0.799541175365448, "learning_rate": 4.645231970339392e-08, "loss": 0.0545, "step": 44278 }, { "epoch": 0.9757005844860545, "grad_norm": 0.7438635230064392, "learning_rate": 4.636817223983847e-08, "loss": 0.0943, "step": 44279 }, { "epoch": 0.9757226197755706, "grad_norm": 0.5850703120231628, "learning_rate": 4.628410094314917e-08, "loss": 0.0783, "step": 44280 }, { "epoch": 0.9757446550650868, "grad_norm": 0.6816368699073792, "learning_rate": 4.6200105813754e-08, "loss": 0.0856, "step": 44281 }, { "epoch": 0.975766690354603, "grad_norm": 0.8377243876457214, "learning_rate": 4.6116186852080966e-08, "loss": 0.0788, "step": 44282 }, { "epoch": 0.975788725644119, "grad_norm": 0.6119392514228821, "learning_rate": 4.603234405855639e-08, "loss": 0.066, "step": 44283 }, { "epoch": 0.9758107609336352, "grad_norm": 0.9490970969200134, "learning_rate": 4.5948577433608253e-08, "loss": 0.0596, "step": 44284 }, { "epoch": 0.9758327962231513, "grad_norm": 0.4425273537635803, "learning_rate": 4.586488697766289e-08, "loss": 0.0437, "step": 44285 }, { "epoch": 0.9758548315126675, "grad_norm": 0.8231571316719055, "learning_rate": 4.578127269114829e-08, "loss": 0.0552, "step": 44286 }, { "epoch": 0.9758768668021837, "grad_norm": 0.806129515171051, "learning_rate": 4.569773457448578e-08, "loss": 0.0715, "step": 44287 }, { "epoch": 0.9758989020916998, "grad_norm": 0.5269977450370789, "learning_rate": 4.561427262810503e-08, "loss": 0.0536, "step": 44288 }, { "epoch": 0.975920937381216, "grad_norm": 0.7694517970085144, "learning_rate": 4.5530886852430674e-08, "loss": 0.0681, "step": 44289 }, { "epoch": 0.9759429726707322, "grad_norm": 0.41289424896240234, "learning_rate": 4.5447577247887393e-08, "loss": 0.0562, "step": 44290 }, { "epoch": 0.9759650079602483, "grad_norm": 0.8627319931983948, "learning_rate": 4.536434381489651e-08, "loss": 0.0847, "step": 44291 }, { "epoch": 0.9759870432497645, "grad_norm": 0.6799917221069336, "learning_rate": 4.528118655388602e-08, "loss": 0.0736, "step": 44292 }, { "epoch": 0.9760090785392807, "grad_norm": 0.4565489888191223, "learning_rate": 4.519810546527725e-08, "loss": 0.0545, "step": 44293 }, { "epoch": 0.9760311138287968, "grad_norm": 0.6506641507148743, "learning_rate": 4.511510054949486e-08, "loss": 0.0546, "step": 44294 }, { "epoch": 0.976053149118313, "grad_norm": 0.6182729601860046, "learning_rate": 4.5032171806960177e-08, "loss": 0.0533, "step": 44295 }, { "epoch": 0.9760751844078291, "grad_norm": 0.7869056463241577, "learning_rate": 4.49493192380962e-08, "loss": 0.0616, "step": 44296 }, { "epoch": 0.9760972196973453, "grad_norm": 0.7484053373336792, "learning_rate": 4.4866542843324253e-08, "loss": 0.1018, "step": 44297 }, { "epoch": 0.9761192549868615, "grad_norm": 0.3418925404548645, "learning_rate": 4.478384262306734e-08, "loss": 0.0544, "step": 44298 }, { "epoch": 0.9761412902763776, "grad_norm": 1.0160620212554932, "learning_rate": 4.470121857774512e-08, "loss": 0.0775, "step": 44299 }, { "epoch": 0.9761633255658938, "grad_norm": 0.7586274743080139, "learning_rate": 4.461867070777892e-08, "loss": 0.0699, "step": 44300 }, { "epoch": 0.97618536085541, "grad_norm": 0.7795848250389099, "learning_rate": 4.453619901359174e-08, "loss": 0.0603, "step": 44301 }, { "epoch": 0.9762073961449261, "grad_norm": 0.6149128675460815, "learning_rate": 4.445380349559991e-08, "loss": 0.0643, "step": 44302 }, { "epoch": 0.9762294314344423, "grad_norm": 0.5412268042564392, "learning_rate": 4.4371484154224764e-08, "loss": 0.0682, "step": 44303 }, { "epoch": 0.9762514667239585, "grad_norm": 0.49623987078666687, "learning_rate": 4.42892409898843e-08, "loss": 0.0537, "step": 44304 }, { "epoch": 0.9762735020134746, "grad_norm": 0.9708071351051331, "learning_rate": 4.420707400299984e-08, "loss": 0.0797, "step": 44305 }, { "epoch": 0.9762955373029908, "grad_norm": 0.6290338635444641, "learning_rate": 4.412498319398939e-08, "loss": 0.0597, "step": 44306 }, { "epoch": 0.976317572592507, "grad_norm": 0.7119228839874268, "learning_rate": 4.404296856326928e-08, "loss": 0.0641, "step": 44307 }, { "epoch": 0.976339607882023, "grad_norm": 0.246980682015419, "learning_rate": 4.396103011125918e-08, "loss": 0.0554, "step": 44308 }, { "epoch": 0.9763616431715392, "grad_norm": 0.4327002465724945, "learning_rate": 4.387916783837542e-08, "loss": 0.0446, "step": 44309 }, { "epoch": 0.9763836784610553, "grad_norm": 0.5865088701248169, "learning_rate": 4.3797381745034336e-08, "loss": 0.0584, "step": 44310 }, { "epoch": 0.9764057137505715, "grad_norm": 1.0190109014511108, "learning_rate": 4.371567183165392e-08, "loss": 0.0698, "step": 44311 }, { "epoch": 0.9764277490400877, "grad_norm": 0.5388625860214233, "learning_rate": 4.363403809865052e-08, "loss": 0.0526, "step": 44312 }, { "epoch": 0.9764497843296038, "grad_norm": 0.35514068603515625, "learning_rate": 4.355248054643879e-08, "loss": 0.0434, "step": 44313 }, { "epoch": 0.97647181961912, "grad_norm": 0.3482966721057892, "learning_rate": 4.34709991754334e-08, "loss": 0.0473, "step": 44314 }, { "epoch": 0.9764938549086362, "grad_norm": 0.4506208896636963, "learning_rate": 4.3389593986050694e-08, "loss": 0.0511, "step": 44315 }, { "epoch": 0.9765158901981523, "grad_norm": 0.4883241057395935, "learning_rate": 4.330826497870699e-08, "loss": 0.0717, "step": 44316 }, { "epoch": 0.9765379254876685, "grad_norm": 0.5606423020362854, "learning_rate": 4.322701215381364e-08, "loss": 0.0479, "step": 44317 }, { "epoch": 0.9765599607771847, "grad_norm": 0.6020956039428711, "learning_rate": 4.3145835511785306e-08, "loss": 0.064, "step": 44318 }, { "epoch": 0.9765819960667008, "grad_norm": 0.7816165685653687, "learning_rate": 4.306473505303499e-08, "loss": 0.0635, "step": 44319 }, { "epoch": 0.976604031356217, "grad_norm": 0.4949060380458832, "learning_rate": 4.298371077797736e-08, "loss": 0.0492, "step": 44320 }, { "epoch": 0.9766260666457331, "grad_norm": 0.7943324446678162, "learning_rate": 4.290276268702542e-08, "loss": 0.0931, "step": 44321 }, { "epoch": 0.9766481019352493, "grad_norm": 0.5458852052688599, "learning_rate": 4.2821890780588847e-08, "loss": 0.054, "step": 44322 }, { "epoch": 0.9766701372247655, "grad_norm": 0.5294508934020996, "learning_rate": 4.274109505908064e-08, "loss": 0.0596, "step": 44323 }, { "epoch": 0.9766921725142816, "grad_norm": 0.35920771956443787, "learning_rate": 4.2660375522913795e-08, "loss": 0.0406, "step": 44324 }, { "epoch": 0.9767142078037978, "grad_norm": 0.4461928904056549, "learning_rate": 4.2579732172499665e-08, "loss": 0.0404, "step": 44325 }, { "epoch": 0.976736243093314, "grad_norm": 0.3997170329093933, "learning_rate": 4.249916500824624e-08, "loss": 0.0706, "step": 44326 }, { "epoch": 0.9767582783828301, "grad_norm": 0.45749494433403015, "learning_rate": 4.241867403056487e-08, "loss": 0.0478, "step": 44327 }, { "epoch": 0.9767803136723463, "grad_norm": 0.45168206095695496, "learning_rate": 4.23382592398669e-08, "loss": 0.0535, "step": 44328 }, { "epoch": 0.9768023489618625, "grad_norm": 0.27704787254333496, "learning_rate": 4.225792063656031e-08, "loss": 0.0456, "step": 44329 }, { "epoch": 0.9768243842513786, "grad_norm": 0.7445074915885925, "learning_rate": 4.217765822105646e-08, "loss": 0.0566, "step": 44330 }, { "epoch": 0.9768464195408948, "grad_norm": 0.8979204297065735, "learning_rate": 4.209747199376168e-08, "loss": 0.0856, "step": 44331 }, { "epoch": 0.9768684548304108, "grad_norm": 0.4367302656173706, "learning_rate": 4.2017361955085656e-08, "loss": 0.0487, "step": 44332 }, { "epoch": 0.976890490119927, "grad_norm": 0.3809003233909607, "learning_rate": 4.1937328105438045e-08, "loss": 0.0473, "step": 44333 }, { "epoch": 0.9769125254094432, "grad_norm": 0.35191673040390015, "learning_rate": 4.1857370445221866e-08, "loss": 0.0678, "step": 44334 }, { "epoch": 0.9769345606989593, "grad_norm": 0.9543867111206055, "learning_rate": 4.177748897484845e-08, "loss": 0.0802, "step": 44335 }, { "epoch": 0.9769565959884755, "grad_norm": 0.3955410420894623, "learning_rate": 4.1697683694724157e-08, "loss": 0.0527, "step": 44336 }, { "epoch": 0.9769786312779917, "grad_norm": 0.47649285197257996, "learning_rate": 4.16179546052553e-08, "loss": 0.0436, "step": 44337 }, { "epoch": 0.9770006665675078, "grad_norm": 0.46156319975852966, "learning_rate": 4.1538301706846584e-08, "loss": 0.0578, "step": 44338 }, { "epoch": 0.977022701857024, "grad_norm": 0.7544113397598267, "learning_rate": 4.145872499990433e-08, "loss": 0.048, "step": 44339 }, { "epoch": 0.9770447371465402, "grad_norm": 1.2554914951324463, "learning_rate": 4.137922448483489e-08, "loss": 0.0634, "step": 44340 }, { "epoch": 0.9770667724360563, "grad_norm": 0.646197497844696, "learning_rate": 4.129980016204293e-08, "loss": 0.0776, "step": 44341 }, { "epoch": 0.9770888077255725, "grad_norm": 0.3283206820487976, "learning_rate": 4.1220452031931474e-08, "loss": 0.0728, "step": 44342 }, { "epoch": 0.9771108430150887, "grad_norm": 0.6704394817352295, "learning_rate": 4.114118009490686e-08, "loss": 0.0633, "step": 44343 }, { "epoch": 0.9771328783046048, "grad_norm": 0.6607410907745361, "learning_rate": 4.106198435137043e-08, "loss": 0.054, "step": 44344 }, { "epoch": 0.977154913594121, "grad_norm": 0.5358055233955383, "learning_rate": 4.098286480172852e-08, "loss": 0.0495, "step": 44345 }, { "epoch": 0.9771769488836372, "grad_norm": 0.24650949239730835, "learning_rate": 4.090382144638083e-08, "loss": 0.0416, "step": 44346 }, { "epoch": 0.9771989841731533, "grad_norm": 0.3327401876449585, "learning_rate": 4.0824854285732016e-08, "loss": 0.0395, "step": 44347 }, { "epoch": 0.9772210194626695, "grad_norm": 0.37770748138427734, "learning_rate": 4.0745963320185096e-08, "loss": 0.058, "step": 44348 }, { "epoch": 0.9772430547521856, "grad_norm": 1.034437894821167, "learning_rate": 4.066714855014142e-08, "loss": 0.0735, "step": 44349 }, { "epoch": 0.9772650900417018, "grad_norm": 0.6306313872337341, "learning_rate": 4.0588409975998995e-08, "loss": 0.061, "step": 44350 }, { "epoch": 0.977287125331218, "grad_norm": 0.6966075897216797, "learning_rate": 4.050974759816417e-08, "loss": 0.0714, "step": 44351 }, { "epoch": 0.9773091606207341, "grad_norm": 0.7782886624336243, "learning_rate": 4.043116141703329e-08, "loss": 0.0789, "step": 44352 }, { "epoch": 0.9773311959102503, "grad_norm": 0.5735145211219788, "learning_rate": 4.0352651433009366e-08, "loss": 0.0649, "step": 44353 }, { "epoch": 0.9773532311997665, "grad_norm": 0.9848915338516235, "learning_rate": 4.027421764649209e-08, "loss": 0.0663, "step": 44354 }, { "epoch": 0.9773752664892826, "grad_norm": 0.6773905158042908, "learning_rate": 4.0195860057879455e-08, "loss": 0.052, "step": 44355 }, { "epoch": 0.9773973017787988, "grad_norm": 0.6566388607025146, "learning_rate": 4.011757866757115e-08, "loss": 0.0494, "step": 44356 }, { "epoch": 0.9774193370683149, "grad_norm": 0.5585538148880005, "learning_rate": 4.0039373475966867e-08, "loss": 0.057, "step": 44357 }, { "epoch": 0.977441372357831, "grad_norm": 0.6921099424362183, "learning_rate": 3.996124448346294e-08, "loss": 0.0506, "step": 44358 }, { "epoch": 0.9774634076473472, "grad_norm": 0.5787161588668823, "learning_rate": 3.988319169046073e-08, "loss": 0.0302, "step": 44359 }, { "epoch": 0.9774854429368633, "grad_norm": 0.38435304164886475, "learning_rate": 3.980521509735324e-08, "loss": 0.0249, "step": 44360 }, { "epoch": 0.9775074782263795, "grad_norm": 0.5693891048431396, "learning_rate": 3.9727314704541825e-08, "loss": 0.0473, "step": 44361 }, { "epoch": 0.9775295135158957, "grad_norm": 0.5468655824661255, "learning_rate": 3.9649490512421175e-08, "loss": 0.0976, "step": 44362 }, { "epoch": 0.9775515488054118, "grad_norm": 0.6194678544998169, "learning_rate": 3.957174252138762e-08, "loss": 0.0585, "step": 44363 }, { "epoch": 0.977573584094928, "grad_norm": 0.451083242893219, "learning_rate": 3.949407073183753e-08, "loss": 0.0392, "step": 44364 }, { "epoch": 0.9775956193844442, "grad_norm": 0.7501959204673767, "learning_rate": 3.941647514416724e-08, "loss": 0.084, "step": 44365 }, { "epoch": 0.9776176546739603, "grad_norm": 0.44990643858909607, "learning_rate": 3.933895575877145e-08, "loss": 0.052, "step": 44366 }, { "epoch": 0.9776396899634765, "grad_norm": 0.5816921591758728, "learning_rate": 3.926151257604482e-08, "loss": 0.0467, "step": 44367 }, { "epoch": 0.9776617252529927, "grad_norm": 0.6007224321365356, "learning_rate": 3.918414559638039e-08, "loss": 0.0656, "step": 44368 }, { "epoch": 0.9776837605425088, "grad_norm": 0.36587029695510864, "learning_rate": 3.910685482017451e-08, "loss": 0.0488, "step": 44369 }, { "epoch": 0.977705795832025, "grad_norm": 0.862366795539856, "learning_rate": 3.902964024782185e-08, "loss": 0.0745, "step": 44370 }, { "epoch": 0.9777278311215412, "grad_norm": 0.5591808557510376, "learning_rate": 3.895250187971211e-08, "loss": 0.0341, "step": 44371 }, { "epoch": 0.9777498664110573, "grad_norm": 0.5284568071365356, "learning_rate": 3.887543971623997e-08, "loss": 0.0625, "step": 44372 }, { "epoch": 0.9777719017005735, "grad_norm": 0.31740376353263855, "learning_rate": 3.8798453757798446e-08, "loss": 0.0583, "step": 44373 }, { "epoch": 0.9777939369900897, "grad_norm": 0.5692294836044312, "learning_rate": 3.8721544004778895e-08, "loss": 0.0714, "step": 44374 }, { "epoch": 0.9778159722796058, "grad_norm": 0.6685490012168884, "learning_rate": 3.864471045757434e-08, "loss": 0.0543, "step": 44375 }, { "epoch": 0.977838007569122, "grad_norm": 0.3249836564064026, "learning_rate": 3.8567953116574464e-08, "loss": 0.0444, "step": 44376 }, { "epoch": 0.9778600428586381, "grad_norm": 0.40318015217781067, "learning_rate": 3.8491271982170616e-08, "loss": 0.0484, "step": 44377 }, { "epoch": 0.9778820781481543, "grad_norm": 0.9301005601882935, "learning_rate": 3.841466705475416e-08, "loss": 0.0484, "step": 44378 }, { "epoch": 0.9779041134376705, "grad_norm": 0.6505926251411438, "learning_rate": 3.8338138334716444e-08, "loss": 0.0566, "step": 44379 }, { "epoch": 0.9779261487271866, "grad_norm": 1.0245964527130127, "learning_rate": 3.826168582244382e-08, "loss": 0.0927, "step": 44380 }, { "epoch": 0.9779481840167028, "grad_norm": 0.8000964522361755, "learning_rate": 3.818530951832766e-08, "loss": 0.0598, "step": 44381 }, { "epoch": 0.9779702193062189, "grad_norm": 0.4265505075454712, "learning_rate": 3.810900942275763e-08, "loss": 0.0321, "step": 44382 }, { "epoch": 0.977992254595735, "grad_norm": 0.4537050426006317, "learning_rate": 3.803278553612011e-08, "loss": 0.0665, "step": 44383 }, { "epoch": 0.9780142898852512, "grad_norm": 0.46503573656082153, "learning_rate": 3.7956637858806434e-08, "loss": 0.0474, "step": 44384 }, { "epoch": 0.9780363251747674, "grad_norm": 0.7654802203178406, "learning_rate": 3.78805663912013e-08, "loss": 0.0496, "step": 44385 }, { "epoch": 0.9780583604642835, "grad_norm": 0.4658239483833313, "learning_rate": 3.780457113369606e-08, "loss": 0.0466, "step": 44386 }, { "epoch": 0.9780803957537997, "grad_norm": 0.2837076485157013, "learning_rate": 3.772865208667375e-08, "loss": 0.0402, "step": 44387 }, { "epoch": 0.9781024310433158, "grad_norm": 0.583540141582489, "learning_rate": 3.765280925052239e-08, "loss": 0.074, "step": 44388 }, { "epoch": 0.978124466332832, "grad_norm": 0.6061684489250183, "learning_rate": 3.7577042625629996e-08, "loss": 0.0708, "step": 44389 }, { "epoch": 0.9781465016223482, "grad_norm": 0.6286197304725647, "learning_rate": 3.750135221238127e-08, "loss": 0.0505, "step": 44390 }, { "epoch": 0.9781685369118643, "grad_norm": 0.647616446018219, "learning_rate": 3.742573801116089e-08, "loss": 0.0655, "step": 44391 }, { "epoch": 0.9781905722013805, "grad_norm": 0.4676743447780609, "learning_rate": 3.735020002235523e-08, "loss": 0.0494, "step": 44392 }, { "epoch": 0.9782126074908967, "grad_norm": 0.37893810868263245, "learning_rate": 3.727473824634897e-08, "loss": 0.0439, "step": 44393 }, { "epoch": 0.9782346427804128, "grad_norm": 0.5059399604797363, "learning_rate": 3.719935268352514e-08, "loss": 0.0625, "step": 44394 }, { "epoch": 0.978256678069929, "grad_norm": 0.3551913797855377, "learning_rate": 3.712404333426844e-08, "loss": 0.0574, "step": 44395 }, { "epoch": 0.9782787133594452, "grad_norm": 0.2523536682128906, "learning_rate": 3.7048810198963555e-08, "loss": 0.0469, "step": 44396 }, { "epoch": 0.9783007486489613, "grad_norm": 0.7985095381736755, "learning_rate": 3.6973653277993516e-08, "loss": 0.0701, "step": 44397 }, { "epoch": 0.9783227839384775, "grad_norm": 0.6496794819831848, "learning_rate": 3.6898572571739677e-08, "loss": 0.0512, "step": 44398 }, { "epoch": 0.9783448192279937, "grad_norm": 0.8551012873649597, "learning_rate": 3.682356808058507e-08, "loss": 0.0735, "step": 44399 }, { "epoch": 0.9783668545175098, "grad_norm": 0.780368447303772, "learning_rate": 3.674863980491272e-08, "loss": 0.0733, "step": 44400 }, { "epoch": 0.978388889807026, "grad_norm": 0.7693361043930054, "learning_rate": 3.667378774510233e-08, "loss": 0.06, "step": 44401 }, { "epoch": 0.9784109250965422, "grad_norm": 0.7107320427894592, "learning_rate": 3.659901190153692e-08, "loss": 0.0603, "step": 44402 }, { "epoch": 0.9784329603860583, "grad_norm": 0.5648505091667175, "learning_rate": 3.652431227459618e-08, "loss": 0.0531, "step": 44403 }, { "epoch": 0.9784549956755745, "grad_norm": 0.45539799332618713, "learning_rate": 3.644968886466149e-08, "loss": 0.0453, "step": 44404 }, { "epoch": 0.9784770309650906, "grad_norm": 0.5926907062530518, "learning_rate": 3.637514167211253e-08, "loss": 0.0483, "step": 44405 }, { "epoch": 0.9784990662546067, "grad_norm": 0.4764293432235718, "learning_rate": 3.6300670697330676e-08, "loss": 0.0572, "step": 44406 }, { "epoch": 0.9785211015441229, "grad_norm": 0.754054844379425, "learning_rate": 3.622627594069228e-08, "loss": 0.0391, "step": 44407 }, { "epoch": 0.978543136833639, "grad_norm": 0.917457640171051, "learning_rate": 3.615195740257704e-08, "loss": 0.0881, "step": 44408 }, { "epoch": 0.9785651721231552, "grad_norm": 0.8619628548622131, "learning_rate": 3.6077715083366325e-08, "loss": 0.0628, "step": 44409 }, { "epoch": 0.9785872074126714, "grad_norm": 0.4623620808124542, "learning_rate": 3.600354898343483e-08, "loss": 0.0444, "step": 44410 }, { "epoch": 0.9786092427021875, "grad_norm": 0.5024548768997192, "learning_rate": 3.592945910316059e-08, "loss": 0.0468, "step": 44411 }, { "epoch": 0.9786312779917037, "grad_norm": 0.6679134368896484, "learning_rate": 3.5855445442923294e-08, "loss": 0.0543, "step": 44412 }, { "epoch": 0.9786533132812198, "grad_norm": 0.483346164226532, "learning_rate": 3.578150800309765e-08, "loss": 0.0464, "step": 44413 }, { "epoch": 0.978675348570736, "grad_norm": 0.4180762469768524, "learning_rate": 3.570764678406169e-08, "loss": 0.046, "step": 44414 }, { "epoch": 0.9786973838602522, "grad_norm": 0.5674881339073181, "learning_rate": 3.563386178619177e-08, "loss": 0.0629, "step": 44415 }, { "epoch": 0.9787194191497683, "grad_norm": 0.23776958882808685, "learning_rate": 3.5560153009862595e-08, "loss": 0.0562, "step": 44416 }, { "epoch": 0.9787414544392845, "grad_norm": 0.46799394488334656, "learning_rate": 3.5486520455450533e-08, "loss": 0.0674, "step": 44417 }, { "epoch": 0.9787634897288007, "grad_norm": 0.6854785680770874, "learning_rate": 3.5412964123328615e-08, "loss": 0.063, "step": 44418 }, { "epoch": 0.9787855250183168, "grad_norm": 0.8275446891784668, "learning_rate": 3.533948401387488e-08, "loss": 0.0735, "step": 44419 }, { "epoch": 0.978807560307833, "grad_norm": 0.510185718536377, "learning_rate": 3.526608012746069e-08, "loss": 0.0597, "step": 44420 }, { "epoch": 0.9788295955973492, "grad_norm": 0.7069968581199646, "learning_rate": 3.519275246446241e-08, "loss": 0.0681, "step": 44421 }, { "epoch": 0.9788516308868653, "grad_norm": 0.43651455640792847, "learning_rate": 3.511950102525141e-08, "loss": 0.0449, "step": 44422 }, { "epoch": 0.9788736661763815, "grad_norm": 0.548829197883606, "learning_rate": 3.5046325810202396e-08, "loss": 0.0623, "step": 44423 }, { "epoch": 0.9788957014658977, "grad_norm": 0.6012488007545471, "learning_rate": 3.497322681968507e-08, "loss": 0.0676, "step": 44424 }, { "epoch": 0.9789177367554138, "grad_norm": 0.6560308933258057, "learning_rate": 3.490020405407579e-08, "loss": 0.0385, "step": 44425 }, { "epoch": 0.97893977204493, "grad_norm": 0.5060573220252991, "learning_rate": 3.4827257513744273e-08, "loss": 0.0608, "step": 44426 }, { "epoch": 0.9789618073344462, "grad_norm": 0.5279355645179749, "learning_rate": 3.4754387199063545e-08, "loss": 0.0728, "step": 44427 }, { "epoch": 0.9789838426239623, "grad_norm": 0.45535334944725037, "learning_rate": 3.4681593110401644e-08, "loss": 0.0679, "step": 44428 }, { "epoch": 0.9790058779134785, "grad_norm": 0.6132148504257202, "learning_rate": 3.460887524813328e-08, "loss": 0.0816, "step": 44429 }, { "epoch": 0.9790279132029946, "grad_norm": 0.5378783345222473, "learning_rate": 3.4536233612626476e-08, "loss": 0.0528, "step": 44430 }, { "epoch": 0.9790499484925107, "grad_norm": 0.4879997670650482, "learning_rate": 3.4463668204250954e-08, "loss": 0.0715, "step": 44431 }, { "epoch": 0.9790719837820269, "grad_norm": 0.5090328454971313, "learning_rate": 3.439117902337807e-08, "loss": 0.0741, "step": 44432 }, { "epoch": 0.979094019071543, "grad_norm": 0.494080513715744, "learning_rate": 3.4318766070375874e-08, "loss": 0.0706, "step": 44433 }, { "epoch": 0.9791160543610592, "grad_norm": 0.39125436544418335, "learning_rate": 3.424642934561406e-08, "loss": 0.0685, "step": 44434 }, { "epoch": 0.9791380896505754, "grad_norm": 0.4965154230594635, "learning_rate": 3.417416884945901e-08, "loss": 0.0602, "step": 44435 }, { "epoch": 0.9791601249400915, "grad_norm": 0.4733569324016571, "learning_rate": 3.410198458228042e-08, "loss": 0.0564, "step": 44436 }, { "epoch": 0.9791821602296077, "grad_norm": 0.7594168186187744, "learning_rate": 3.4029876544446336e-08, "loss": 0.0473, "step": 44437 }, { "epoch": 0.9792041955191239, "grad_norm": 0.7768568992614746, "learning_rate": 3.395784473632313e-08, "loss": 0.0699, "step": 44438 }, { "epoch": 0.97922623080864, "grad_norm": 0.7711856365203857, "learning_rate": 3.388588915827884e-08, "loss": 0.0552, "step": 44439 }, { "epoch": 0.9792482660981562, "grad_norm": 0.6143009066581726, "learning_rate": 3.3814009810679835e-08, "loss": 0.0575, "step": 44440 }, { "epoch": 0.9792703013876723, "grad_norm": 0.459821492433548, "learning_rate": 3.374220669389083e-08, "loss": 0.0376, "step": 44441 }, { "epoch": 0.9792923366771885, "grad_norm": 0.632763683795929, "learning_rate": 3.367047980827653e-08, "loss": 0.0478, "step": 44442 }, { "epoch": 0.9793143719667047, "grad_norm": 0.5190796852111816, "learning_rate": 3.3598829154206646e-08, "loss": 0.0852, "step": 44443 }, { "epoch": 0.9793364072562208, "grad_norm": 0.5477760434150696, "learning_rate": 3.352725473204088e-08, "loss": 0.0655, "step": 44444 }, { "epoch": 0.979358442545737, "grad_norm": 0.5008255243301392, "learning_rate": 3.3455756542148944e-08, "loss": 0.0561, "step": 44445 }, { "epoch": 0.9793804778352532, "grad_norm": 1.00883948802948, "learning_rate": 3.338433458489054e-08, "loss": 0.0978, "step": 44446 }, { "epoch": 0.9794025131247693, "grad_norm": 0.45729970932006836, "learning_rate": 3.331298886063372e-08, "loss": 0.0478, "step": 44447 }, { "epoch": 0.9794245484142855, "grad_norm": 0.3469051122665405, "learning_rate": 3.324171936973819e-08, "loss": 0.0567, "step": 44448 }, { "epoch": 0.9794465837038017, "grad_norm": 0.6444092988967896, "learning_rate": 3.317052611256699e-08, "loss": 0.0822, "step": 44449 }, { "epoch": 0.9794686189933178, "grad_norm": 0.491859495639801, "learning_rate": 3.30994090894865e-08, "loss": 0.0491, "step": 44450 }, { "epoch": 0.979490654282834, "grad_norm": 0.27152952551841736, "learning_rate": 3.302836830085643e-08, "loss": 0.0441, "step": 44451 }, { "epoch": 0.9795126895723502, "grad_norm": 0.5152187347412109, "learning_rate": 3.295740374703815e-08, "loss": 0.0573, "step": 44452 }, { "epoch": 0.9795347248618663, "grad_norm": 0.6058986783027649, "learning_rate": 3.288651542839305e-08, "loss": 0.0644, "step": 44453 }, { "epoch": 0.9795567601513825, "grad_norm": 0.9140679240226746, "learning_rate": 3.281570334528416e-08, "loss": 0.0858, "step": 44454 }, { "epoch": 0.9795787954408987, "grad_norm": 0.7322643995285034, "learning_rate": 3.274496749807121e-08, "loss": 0.057, "step": 44455 }, { "epoch": 0.9796008307304147, "grad_norm": 0.6563279032707214, "learning_rate": 3.26743078871139e-08, "loss": 0.0571, "step": 44456 }, { "epoch": 0.9796228660199309, "grad_norm": 0.1600242406129837, "learning_rate": 3.260372451277194e-08, "loss": 0.0417, "step": 44457 }, { "epoch": 0.979644901309447, "grad_norm": 0.6621809601783752, "learning_rate": 3.2533217375405046e-08, "loss": 0.0564, "step": 44458 }, { "epoch": 0.9796669365989632, "grad_norm": 0.8644923567771912, "learning_rate": 3.246278647537293e-08, "loss": 0.0675, "step": 44459 }, { "epoch": 0.9796889718884794, "grad_norm": 0.5100225210189819, "learning_rate": 3.239243181303364e-08, "loss": 0.0531, "step": 44460 }, { "epoch": 0.9797110071779955, "grad_norm": 0.5877820253372192, "learning_rate": 3.232215338874689e-08, "loss": 0.0519, "step": 44461 }, { "epoch": 0.9797330424675117, "grad_norm": 0.30576765537261963, "learning_rate": 3.2251951202869054e-08, "loss": 0.0562, "step": 44462 }, { "epoch": 0.9797550777570279, "grad_norm": 0.5871654748916626, "learning_rate": 3.2181825255759855e-08, "loss": 0.0553, "step": 44463 }, { "epoch": 0.979777113046544, "grad_norm": 0.5636034607887268, "learning_rate": 3.2111775547774004e-08, "loss": 0.0599, "step": 44464 }, { "epoch": 0.9797991483360602, "grad_norm": 0.33326810598373413, "learning_rate": 3.204180207926788e-08, "loss": 0.0525, "step": 44465 }, { "epoch": 0.9798211836255764, "grad_norm": 0.3736763596534729, "learning_rate": 3.19719048506012e-08, "loss": 0.0686, "step": 44466 }, { "epoch": 0.9798432189150925, "grad_norm": 0.5708817839622498, "learning_rate": 3.190208386212701e-08, "loss": 0.0601, "step": 44467 }, { "epoch": 0.9798652542046087, "grad_norm": 0.6745582818984985, "learning_rate": 3.183233911420336e-08, "loss": 0.0746, "step": 44468 }, { "epoch": 0.9798872894941248, "grad_norm": 0.5981245636940002, "learning_rate": 3.17626706071833e-08, "loss": 0.0474, "step": 44469 }, { "epoch": 0.979909324783641, "grad_norm": 1.058167576789856, "learning_rate": 3.169307834142155e-08, "loss": 0.0892, "step": 44470 }, { "epoch": 0.9799313600731572, "grad_norm": 0.5156674385070801, "learning_rate": 3.162356231727615e-08, "loss": 0.0351, "step": 44471 }, { "epoch": 0.9799533953626733, "grad_norm": 0.24468295276165009, "learning_rate": 3.155412253509682e-08, "loss": 0.0303, "step": 44472 }, { "epoch": 0.9799754306521895, "grad_norm": 0.40180620551109314, "learning_rate": 3.1484758995238285e-08, "loss": 0.0468, "step": 44473 }, { "epoch": 0.9799974659417057, "grad_norm": 0.8064547777175903, "learning_rate": 3.141547169805692e-08, "loss": 0.0467, "step": 44474 }, { "epoch": 0.9800195012312218, "grad_norm": 0.3581303358078003, "learning_rate": 3.134626064390245e-08, "loss": 0.0357, "step": 44475 }, { "epoch": 0.980041536520738, "grad_norm": 0.6098303198814392, "learning_rate": 3.1277125833127915e-08, "loss": 0.0648, "step": 44476 }, { "epoch": 0.9800635718102542, "grad_norm": 0.6527007818222046, "learning_rate": 3.1208067266084715e-08, "loss": 0.0671, "step": 44477 }, { "epoch": 0.9800856070997703, "grad_norm": 0.5257961750030518, "learning_rate": 3.1139084943127563e-08, "loss": 0.0657, "step": 44478 }, { "epoch": 0.9801076423892865, "grad_norm": 0.5007513761520386, "learning_rate": 3.1070178864604504e-08, "loss": 0.0509, "step": 44479 }, { "epoch": 0.9801296776788027, "grad_norm": 0.7631573677062988, "learning_rate": 3.1001349030868597e-08, "loss": 0.0743, "step": 44480 }, { "epoch": 0.9801517129683187, "grad_norm": 0.5244839787483215, "learning_rate": 3.093259544227123e-08, "loss": 0.0308, "step": 44481 }, { "epoch": 0.9801737482578349, "grad_norm": 0.7410730719566345, "learning_rate": 3.086391809916045e-08, "loss": 0.0348, "step": 44482 }, { "epoch": 0.980195783547351, "grad_norm": 0.5217990279197693, "learning_rate": 3.079531700188598e-08, "loss": 0.0622, "step": 44483 }, { "epoch": 0.9802178188368672, "grad_norm": 0.6854845285415649, "learning_rate": 3.072679215079755e-08, "loss": 0.0551, "step": 44484 }, { "epoch": 0.9802398541263834, "grad_norm": 0.31619659066200256, "learning_rate": 3.065834354624653e-08, "loss": 0.0407, "step": 44485 }, { "epoch": 0.9802618894158995, "grad_norm": 0.6791803240776062, "learning_rate": 3.058997118857931e-08, "loss": 0.0556, "step": 44486 }, { "epoch": 0.9802839247054157, "grad_norm": 0.5755109190940857, "learning_rate": 3.0521675078143963e-08, "loss": 0.0658, "step": 44487 }, { "epoch": 0.9803059599949319, "grad_norm": 0.6693764925003052, "learning_rate": 3.045345521528853e-08, "loss": 0.0648, "step": 44488 }, { "epoch": 0.980327995284448, "grad_norm": 0.5760151147842407, "learning_rate": 3.038531160036273e-08, "loss": 0.0693, "step": 44489 }, { "epoch": 0.9803500305739642, "grad_norm": 0.8478508591651917, "learning_rate": 3.03172442337113e-08, "loss": 0.0653, "step": 44490 }, { "epoch": 0.9803720658634804, "grad_norm": 0.3162963092327118, "learning_rate": 3.0249253115680606e-08, "loss": 0.0376, "step": 44491 }, { "epoch": 0.9803941011529965, "grad_norm": 0.5720295310020447, "learning_rate": 3.018133824661873e-08, "loss": 0.0679, "step": 44492 }, { "epoch": 0.9804161364425127, "grad_norm": 0.5540990233421326, "learning_rate": 3.011349962687038e-08, "loss": 0.0387, "step": 44493 }, { "epoch": 0.9804381717320289, "grad_norm": 0.2846137285232544, "learning_rate": 3.0045737256781944e-08, "loss": 0.0505, "step": 44494 }, { "epoch": 0.980460207021545, "grad_norm": 0.5296010375022888, "learning_rate": 2.997805113669649e-08, "loss": 0.0562, "step": 44495 }, { "epoch": 0.9804822423110612, "grad_norm": 0.45917436480522156, "learning_rate": 2.9910441266962073e-08, "loss": 0.0501, "step": 44496 }, { "epoch": 0.9805042776005773, "grad_norm": 0.6785997748374939, "learning_rate": 2.984290764792008e-08, "loss": 0.0476, "step": 44497 }, { "epoch": 0.9805263128900935, "grad_norm": 0.49822917580604553, "learning_rate": 2.9775450279916905e-08, "loss": 0.0477, "step": 44498 }, { "epoch": 0.9805483481796097, "grad_norm": 0.7582333087921143, "learning_rate": 2.970806916329394e-08, "loss": 0.0684, "step": 44499 }, { "epoch": 0.9805703834691258, "grad_norm": 0.5890107750892639, "learning_rate": 2.9640764298397572e-08, "loss": 0.0818, "step": 44500 }, { "epoch": 0.980592418758642, "grad_norm": 0.6887327432632446, "learning_rate": 2.957353568556753e-08, "loss": 0.0758, "step": 44501 }, { "epoch": 0.9806144540481582, "grad_norm": 0.8595513105392456, "learning_rate": 2.9506383325145213e-08, "loss": 0.0722, "step": 44502 }, { "epoch": 0.9806364893376743, "grad_norm": 0.8423827886581421, "learning_rate": 2.9439307217477008e-08, "loss": 0.045, "step": 44503 }, { "epoch": 0.9806585246271905, "grad_norm": 0.3090240955352783, "learning_rate": 2.9372307362900975e-08, "loss": 0.0451, "step": 44504 }, { "epoch": 0.9806805599167066, "grad_norm": 2.113252878189087, "learning_rate": 2.930538376176184e-08, "loss": 0.0452, "step": 44505 }, { "epoch": 0.9807025952062227, "grad_norm": 0.6634348034858704, "learning_rate": 2.9238536414396e-08, "loss": 0.0458, "step": 44506 }, { "epoch": 0.9807246304957389, "grad_norm": 0.6060686111450195, "learning_rate": 2.9171765321146516e-08, "loss": 0.0718, "step": 44507 }, { "epoch": 0.980746665785255, "grad_norm": 0.8328192234039307, "learning_rate": 2.9105070482354778e-08, "loss": 0.0835, "step": 44508 }, { "epoch": 0.9807687010747712, "grad_norm": 0.4065963327884674, "learning_rate": 2.9038451898357188e-08, "loss": 0.1039, "step": 44509 }, { "epoch": 0.9807907363642874, "grad_norm": 0.45897191762924194, "learning_rate": 2.8971909569496802e-08, "loss": 0.0526, "step": 44510 }, { "epoch": 0.9808127716538035, "grad_norm": 1.1260396242141724, "learning_rate": 2.8905443496108354e-08, "loss": 0.0672, "step": 44511 }, { "epoch": 0.9808348069433197, "grad_norm": 0.5322643518447876, "learning_rate": 2.8839053678533234e-08, "loss": 0.0478, "step": 44512 }, { "epoch": 0.9808568422328359, "grad_norm": 0.5554535388946533, "learning_rate": 2.8772740117109507e-08, "loss": 0.0794, "step": 44513 }, { "epoch": 0.980878877522352, "grad_norm": 0.5909533500671387, "learning_rate": 2.8706502812173575e-08, "loss": 0.042, "step": 44514 }, { "epoch": 0.9809009128118682, "grad_norm": 0.6799724698066711, "learning_rate": 2.864034176406516e-08, "loss": 0.0485, "step": 44515 }, { "epoch": 0.9809229481013844, "grad_norm": 0.5211462378501892, "learning_rate": 2.8574256973118997e-08, "loss": 0.0522, "step": 44516 }, { "epoch": 0.9809449833909005, "grad_norm": 0.5138431787490845, "learning_rate": 2.8508248439673147e-08, "loss": 0.0674, "step": 44517 }, { "epoch": 0.9809670186804167, "grad_norm": 0.6416913270950317, "learning_rate": 2.8442316164062343e-08, "loss": 0.0717, "step": 44518 }, { "epoch": 0.9809890539699329, "grad_norm": 0.43813571333885193, "learning_rate": 2.8376460146622985e-08, "loss": 0.0604, "step": 44519 }, { "epoch": 0.981011089259449, "grad_norm": 0.7842591404914856, "learning_rate": 2.8310680387691468e-08, "loss": 0.0604, "step": 44520 }, { "epoch": 0.9810331245489652, "grad_norm": 0.6820411682128906, "learning_rate": 2.824497688760086e-08, "loss": 0.0719, "step": 44521 }, { "epoch": 0.9810551598384813, "grad_norm": 0.37888461351394653, "learning_rate": 2.817934964668756e-08, "loss": 0.0623, "step": 44522 }, { "epoch": 0.9810771951279975, "grad_norm": 0.5617382526397705, "learning_rate": 2.8113798665286293e-08, "loss": 0.0593, "step": 44523 }, { "epoch": 0.9810992304175137, "grad_norm": 0.5179214477539062, "learning_rate": 2.8048323943730137e-08, "loss": 0.0524, "step": 44524 }, { "epoch": 0.9811212657070298, "grad_norm": 0.6211768984794617, "learning_rate": 2.7982925482350485e-08, "loss": 0.0666, "step": 44525 }, { "epoch": 0.981143300996546, "grad_norm": 0.4055761396884918, "learning_rate": 2.791760328148374e-08, "loss": 0.0461, "step": 44526 }, { "epoch": 0.9811653362860622, "grad_norm": 0.45555219054222107, "learning_rate": 2.78523573414613e-08, "loss": 0.0623, "step": 44527 }, { "epoch": 0.9811873715755783, "grad_norm": 0.7125439047813416, "learning_rate": 2.7787187662616232e-08, "loss": 0.0563, "step": 44528 }, { "epoch": 0.9812094068650945, "grad_norm": 0.4519553482532501, "learning_rate": 2.7722094245278274e-08, "loss": 0.0745, "step": 44529 }, { "epoch": 0.9812314421546106, "grad_norm": 0.6403204202651978, "learning_rate": 2.7657077089782156e-08, "loss": 0.0795, "step": 44530 }, { "epoch": 0.9812534774441267, "grad_norm": 0.5622657537460327, "learning_rate": 2.759213619645595e-08, "loss": 0.0726, "step": 44531 }, { "epoch": 0.9812755127336429, "grad_norm": 0.6125984787940979, "learning_rate": 2.7527271565632726e-08, "loss": 0.0461, "step": 44532 }, { "epoch": 0.981297548023159, "grad_norm": 0.7429602742195129, "learning_rate": 2.7462483197640553e-08, "loss": 0.058, "step": 44533 }, { "epoch": 0.9813195833126752, "grad_norm": 0.5920881628990173, "learning_rate": 2.7397771092812495e-08, "loss": 0.0497, "step": 44534 }, { "epoch": 0.9813416186021914, "grad_norm": 0.6471173763275146, "learning_rate": 2.7333135251476625e-08, "loss": 0.0482, "step": 44535 }, { "epoch": 0.9813636538917075, "grad_norm": 0.4533952474594116, "learning_rate": 2.7268575673961016e-08, "loss": 0.0537, "step": 44536 }, { "epoch": 0.9813856891812237, "grad_norm": 0.5516517162322998, "learning_rate": 2.72040923605954e-08, "loss": 0.0632, "step": 44537 }, { "epoch": 0.9814077244707399, "grad_norm": 0.5556795597076416, "learning_rate": 2.7139685311707852e-08, "loss": 0.0396, "step": 44538 }, { "epoch": 0.981429759760256, "grad_norm": 0.6030574440956116, "learning_rate": 2.707535452762977e-08, "loss": 0.0716, "step": 44539 }, { "epoch": 0.9814517950497722, "grad_norm": 0.6067163348197937, "learning_rate": 2.7011100008682566e-08, "loss": 0.0935, "step": 44540 }, { "epoch": 0.9814738303392884, "grad_norm": 0.5490534901618958, "learning_rate": 2.6946921755199304e-08, "loss": 0.0593, "step": 44541 }, { "epoch": 0.9814958656288045, "grad_norm": 0.3567920923233032, "learning_rate": 2.6882819767504728e-08, "loss": 0.0332, "step": 44542 }, { "epoch": 0.9815179009183207, "grad_norm": 0.7146982550621033, "learning_rate": 2.681879404592358e-08, "loss": 0.0549, "step": 44543 }, { "epoch": 0.9815399362078369, "grad_norm": 1.0915049314498901, "learning_rate": 2.675484459078559e-08, "loss": 0.0981, "step": 44544 }, { "epoch": 0.981561971497353, "grad_norm": 0.5835626125335693, "learning_rate": 2.669097140241217e-08, "loss": 0.0602, "step": 44545 }, { "epoch": 0.9815840067868692, "grad_norm": 0.5674527883529663, "learning_rate": 2.662717448113139e-08, "loss": 0.0536, "step": 44546 }, { "epoch": 0.9816060420763854, "grad_norm": 0.5596547722816467, "learning_rate": 2.6563453827269657e-08, "loss": 0.0501, "step": 44547 }, { "epoch": 0.9816280773659015, "grad_norm": 0.39667707681655884, "learning_rate": 2.6499809441148382e-08, "loss": 0.0324, "step": 44548 }, { "epoch": 0.9816501126554177, "grad_norm": 0.4422895014286041, "learning_rate": 2.6436241323092304e-08, "loss": 0.0465, "step": 44549 }, { "epoch": 0.9816721479449338, "grad_norm": 0.9184733629226685, "learning_rate": 2.637274947342616e-08, "loss": 0.0751, "step": 44550 }, { "epoch": 0.98169418323445, "grad_norm": 0.6304145455360413, "learning_rate": 2.6309333892474698e-08, "loss": 0.064, "step": 44551 }, { "epoch": 0.9817162185239662, "grad_norm": 0.6143516302108765, "learning_rate": 2.6245994580557653e-08, "loss": 0.0511, "step": 44552 }, { "epoch": 0.9817382538134823, "grad_norm": 0.36666423082351685, "learning_rate": 2.618273153799977e-08, "loss": 0.0474, "step": 44553 }, { "epoch": 0.9817602891029985, "grad_norm": 0.86503005027771, "learning_rate": 2.611954476512246e-08, "loss": 0.0527, "step": 44554 }, { "epoch": 0.9817823243925146, "grad_norm": 0.45601898431777954, "learning_rate": 2.6056434262248797e-08, "loss": 0.0313, "step": 44555 }, { "epoch": 0.9818043596820307, "grad_norm": 0.5698192119598389, "learning_rate": 2.5993400029698522e-08, "loss": 0.0718, "step": 44556 }, { "epoch": 0.9818263949715469, "grad_norm": 0.8052424788475037, "learning_rate": 2.5930442067794714e-08, "loss": 0.0608, "step": 44557 }, { "epoch": 0.981848430261063, "grad_norm": 0.4827882945537567, "learning_rate": 2.5867560376857114e-08, "loss": 0.0587, "step": 44558 }, { "epoch": 0.9818704655505792, "grad_norm": 0.5460424423217773, "learning_rate": 2.5804754957203802e-08, "loss": 0.0388, "step": 44559 }, { "epoch": 0.9818925008400954, "grad_norm": 0.6758180260658264, "learning_rate": 2.5742025809157854e-08, "loss": 0.072, "step": 44560 }, { "epoch": 0.9819145361296115, "grad_norm": 0.5727227926254272, "learning_rate": 2.5679372933037348e-08, "loss": 0.0558, "step": 44561 }, { "epoch": 0.9819365714191277, "grad_norm": 0.4139557480812073, "learning_rate": 2.5616796329162027e-08, "loss": 0.034, "step": 44562 }, { "epoch": 0.9819586067086439, "grad_norm": 0.46750974655151367, "learning_rate": 2.5554295997849976e-08, "loss": 0.0606, "step": 44563 }, { "epoch": 0.98198064199816, "grad_norm": 0.7780500054359436, "learning_rate": 2.5491871939419264e-08, "loss": 0.0616, "step": 44564 }, { "epoch": 0.9820026772876762, "grad_norm": 0.39379259943962097, "learning_rate": 2.5429524154189643e-08, "loss": 0.0354, "step": 44565 }, { "epoch": 0.9820247125771924, "grad_norm": 0.7109723687171936, "learning_rate": 2.5367252642477524e-08, "loss": 0.0764, "step": 44566 }, { "epoch": 0.9820467478667085, "grad_norm": 0.5448946952819824, "learning_rate": 2.530505740459932e-08, "loss": 0.054, "step": 44567 }, { "epoch": 0.9820687831562247, "grad_norm": 0.6705011129379272, "learning_rate": 2.5242938440871444e-08, "loss": 0.0533, "step": 44568 }, { "epoch": 0.9820908184457409, "grad_norm": 0.8711276054382324, "learning_rate": 2.5180895751613642e-08, "loss": 0.0836, "step": 44569 }, { "epoch": 0.982112853735257, "grad_norm": 0.7294828295707703, "learning_rate": 2.511892933713733e-08, "loss": 0.0891, "step": 44570 }, { "epoch": 0.9821348890247732, "grad_norm": 0.5534096360206604, "learning_rate": 2.5057039197762255e-08, "loss": 0.0811, "step": 44571 }, { "epoch": 0.9821569243142894, "grad_norm": 0.35619446635246277, "learning_rate": 2.4995225333801495e-08, "loss": 0.0503, "step": 44572 }, { "epoch": 0.9821789596038055, "grad_norm": 0.5625457167625427, "learning_rate": 2.4933487745569804e-08, "loss": 0.0679, "step": 44573 }, { "epoch": 0.9822009948933217, "grad_norm": 0.6088635325431824, "learning_rate": 2.4871826433383594e-08, "loss": 0.0782, "step": 44574 }, { "epoch": 0.9822230301828379, "grad_norm": 0.5329889059066772, "learning_rate": 2.4810241397554278e-08, "loss": 0.0479, "step": 44575 }, { "epoch": 0.982245065472354, "grad_norm": 0.5028389692306519, "learning_rate": 2.474873263839661e-08, "loss": 0.0524, "step": 44576 }, { "epoch": 0.9822671007618702, "grad_norm": 0.6608624458312988, "learning_rate": 2.4687300156223667e-08, "loss": 0.0721, "step": 44577 }, { "epoch": 0.9822891360513863, "grad_norm": 0.7272796630859375, "learning_rate": 2.462594395134854e-08, "loss": 0.0666, "step": 44578 }, { "epoch": 0.9823111713409024, "grad_norm": 0.5477401614189148, "learning_rate": 2.456466402408597e-08, "loss": 0.0469, "step": 44579 }, { "epoch": 0.9823332066304186, "grad_norm": 0.6865639686584473, "learning_rate": 2.4503460374744047e-08, "loss": 0.0535, "step": 44580 }, { "epoch": 0.9823552419199347, "grad_norm": 0.397964209318161, "learning_rate": 2.444233300363752e-08, "loss": 0.0415, "step": 44581 }, { "epoch": 0.9823772772094509, "grad_norm": 0.4285789728164673, "learning_rate": 2.4381281911076138e-08, "loss": 0.0817, "step": 44582 }, { "epoch": 0.9823993124989671, "grad_norm": 0.5267122387886047, "learning_rate": 2.4320307097371318e-08, "loss": 0.0661, "step": 44583 }, { "epoch": 0.9824213477884832, "grad_norm": 0.4660438001155853, "learning_rate": 2.4259408562834484e-08, "loss": 0.0302, "step": 44584 }, { "epoch": 0.9824433830779994, "grad_norm": 0.9216672778129578, "learning_rate": 2.4198586307773717e-08, "loss": 0.091, "step": 44585 }, { "epoch": 0.9824654183675156, "grad_norm": 0.7439815998077393, "learning_rate": 2.4137840332502105e-08, "loss": 0.0607, "step": 44586 }, { "epoch": 0.9824874536570317, "grad_norm": 0.8415820598602295, "learning_rate": 2.407717063732606e-08, "loss": 0.0415, "step": 44587 }, { "epoch": 0.9825094889465479, "grad_norm": 0.4737301766872406, "learning_rate": 2.4016577222555346e-08, "loss": 0.0582, "step": 44588 }, { "epoch": 0.982531524236064, "grad_norm": 0.3329610824584961, "learning_rate": 2.395606008849971e-08, "loss": 0.055, "step": 44589 }, { "epoch": 0.9825535595255802, "grad_norm": 0.43411752581596375, "learning_rate": 2.3895619235467237e-08, "loss": 0.0576, "step": 44590 }, { "epoch": 0.9825755948150964, "grad_norm": 0.5784937739372253, "learning_rate": 2.383525466376435e-08, "loss": 0.0598, "step": 44591 }, { "epoch": 0.9825976301046125, "grad_norm": 0.7762179374694824, "learning_rate": 2.3774966373699136e-08, "loss": 0.0658, "step": 44592 }, { "epoch": 0.9826196653941287, "grad_norm": 0.36059537529945374, "learning_rate": 2.3714754365579683e-08, "loss": 0.0591, "step": 44593 }, { "epoch": 0.9826417006836449, "grad_norm": 0.6312477588653564, "learning_rate": 2.3654618639710744e-08, "loss": 0.0577, "step": 44594 }, { "epoch": 0.982663735973161, "grad_norm": 0.41466471552848816, "learning_rate": 2.359455919640041e-08, "loss": 0.0736, "step": 44595 }, { "epoch": 0.9826857712626772, "grad_norm": 0.5675804018974304, "learning_rate": 2.3534576035953438e-08, "loss": 0.0513, "step": 44596 }, { "epoch": 0.9827078065521934, "grad_norm": 0.4023328423500061, "learning_rate": 2.347466915867791e-08, "loss": 0.0594, "step": 44597 }, { "epoch": 0.9827298418417095, "grad_norm": 0.5940790772438049, "learning_rate": 2.341483856487525e-08, "loss": 0.0615, "step": 44598 }, { "epoch": 0.9827518771312257, "grad_norm": 0.9265324473381042, "learning_rate": 2.3355084254851888e-08, "loss": 0.0646, "step": 44599 }, { "epoch": 0.9827739124207419, "grad_norm": 0.3925766944885254, "learning_rate": 2.3295406228912575e-08, "loss": 0.052, "step": 44600 }, { "epoch": 0.982795947710258, "grad_norm": 0.7181832790374756, "learning_rate": 2.32358044873604e-08, "loss": 0.0694, "step": 44601 }, { "epoch": 0.9828179829997742, "grad_norm": 0.4017598628997803, "learning_rate": 2.3176279030500126e-08, "loss": 0.0415, "step": 44602 }, { "epoch": 0.9828400182892904, "grad_norm": 0.4351016879081726, "learning_rate": 2.311682985863317e-08, "loss": 0.056, "step": 44603 }, { "epoch": 0.9828620535788064, "grad_norm": 0.23612624406814575, "learning_rate": 2.305745697206596e-08, "loss": 0.0675, "step": 44604 }, { "epoch": 0.9828840888683226, "grad_norm": 0.6811280250549316, "learning_rate": 2.299816037109659e-08, "loss": 0.0457, "step": 44605 }, { "epoch": 0.9829061241578387, "grad_norm": 0.5488759279251099, "learning_rate": 2.2938940056028144e-08, "loss": 0.0618, "step": 44606 }, { "epoch": 0.9829281594473549, "grad_norm": 0.3361002206802368, "learning_rate": 2.2879796027163723e-08, "loss": 0.0343, "step": 44607 }, { "epoch": 0.9829501947368711, "grad_norm": 0.6859575510025024, "learning_rate": 2.282072828480475e-08, "loss": 0.0531, "step": 44608 }, { "epoch": 0.9829722300263872, "grad_norm": 0.5357553958892822, "learning_rate": 2.2761736829249313e-08, "loss": 0.0394, "step": 44609 }, { "epoch": 0.9829942653159034, "grad_norm": 0.8904128670692444, "learning_rate": 2.2702821660802177e-08, "loss": 0.0594, "step": 44610 }, { "epoch": 0.9830163006054196, "grad_norm": 0.6038166284561157, "learning_rate": 2.2643982779759766e-08, "loss": 0.0747, "step": 44611 }, { "epoch": 0.9830383358949357, "grad_norm": 0.6349204182624817, "learning_rate": 2.2585220186423505e-08, "loss": 0.0484, "step": 44612 }, { "epoch": 0.9830603711844519, "grad_norm": 0.7578513026237488, "learning_rate": 2.2526533881091494e-08, "loss": 0.0857, "step": 44613 }, { "epoch": 0.983082406473968, "grad_norm": 0.42375317215919495, "learning_rate": 2.246792386406349e-08, "loss": 0.0509, "step": 44614 }, { "epoch": 0.9831044417634842, "grad_norm": 0.4704645872116089, "learning_rate": 2.2409390135637587e-08, "loss": 0.0495, "step": 44615 }, { "epoch": 0.9831264770530004, "grad_norm": 0.470379501581192, "learning_rate": 2.2350932696113548e-08, "loss": 0.0887, "step": 44616 }, { "epoch": 0.9831485123425165, "grad_norm": 0.44622206687927246, "learning_rate": 2.22925515457878e-08, "loss": 0.0627, "step": 44617 }, { "epoch": 0.9831705476320327, "grad_norm": 0.22794613242149353, "learning_rate": 2.223424668495677e-08, "loss": 0.0529, "step": 44618 }, { "epoch": 0.9831925829215489, "grad_norm": 0.8013867735862732, "learning_rate": 2.217601811391856e-08, "loss": 0.0677, "step": 44619 }, { "epoch": 0.983214618211065, "grad_norm": 0.45388033986091614, "learning_rate": 2.2117865832969596e-08, "loss": 0.0683, "step": 44620 }, { "epoch": 0.9832366535005812, "grad_norm": 0.7410857081413269, "learning_rate": 2.2059789842406307e-08, "loss": 0.0793, "step": 44621 }, { "epoch": 0.9832586887900974, "grad_norm": 0.9158787131309509, "learning_rate": 2.2001790142525125e-08, "loss": 0.0799, "step": 44622 }, { "epoch": 0.9832807240796135, "grad_norm": 0.8450411558151245, "learning_rate": 2.1943866733619145e-08, "loss": 0.0868, "step": 44623 }, { "epoch": 0.9833027593691297, "grad_norm": 0.6494102478027344, "learning_rate": 2.1886019615986464e-08, "loss": 0.0504, "step": 44624 }, { "epoch": 0.9833247946586459, "grad_norm": 0.6880136132240295, "learning_rate": 2.1828248789918513e-08, "loss": 0.0609, "step": 44625 }, { "epoch": 0.983346829948162, "grad_norm": 0.5204823613166809, "learning_rate": 2.1770554255713394e-08, "loss": 0.0598, "step": 44626 }, { "epoch": 0.9833688652376782, "grad_norm": 1.1188889741897583, "learning_rate": 2.1712936013660868e-08, "loss": 0.0838, "step": 44627 }, { "epoch": 0.9833909005271944, "grad_norm": 0.7541407346725464, "learning_rate": 2.1655394064057367e-08, "loss": 0.0584, "step": 44628 }, { "epoch": 0.9834129358167104, "grad_norm": 0.7999773025512695, "learning_rate": 2.1597928407194324e-08, "loss": 0.0611, "step": 44629 }, { "epoch": 0.9834349711062266, "grad_norm": 0.5649659037590027, "learning_rate": 2.15405390433665e-08, "loss": 0.076, "step": 44630 }, { "epoch": 0.9834570063957427, "grad_norm": 1.001521110534668, "learning_rate": 2.1483225972863672e-08, "loss": 0.056, "step": 44631 }, { "epoch": 0.9834790416852589, "grad_norm": 0.4834403693675995, "learning_rate": 2.1425989195978935e-08, "loss": 0.0518, "step": 44632 }, { "epoch": 0.9835010769747751, "grad_norm": 0.5850731134414673, "learning_rate": 2.1368828713003717e-08, "loss": 0.0611, "step": 44633 }, { "epoch": 0.9835231122642912, "grad_norm": 0.5855774879455566, "learning_rate": 2.131174452422946e-08, "loss": 0.0498, "step": 44634 }, { "epoch": 0.9835451475538074, "grad_norm": 0.48158028721809387, "learning_rate": 2.1254736629945926e-08, "loss": 0.0578, "step": 44635 }, { "epoch": 0.9835671828433236, "grad_norm": 0.5231695771217346, "learning_rate": 2.119780503044455e-08, "loss": 0.0413, "step": 44636 }, { "epoch": 0.9835892181328397, "grad_norm": 0.7687872052192688, "learning_rate": 2.114094972601677e-08, "loss": 0.0931, "step": 44637 }, { "epoch": 0.9836112534223559, "grad_norm": 0.926304042339325, "learning_rate": 2.1084170716949015e-08, "loss": 0.066, "step": 44638 }, { "epoch": 0.9836332887118721, "grad_norm": 1.0518770217895508, "learning_rate": 2.102746800353106e-08, "loss": 0.0787, "step": 44639 }, { "epoch": 0.9836553240013882, "grad_norm": 0.7949641346931458, "learning_rate": 2.0970841586054336e-08, "loss": 0.0859, "step": 44640 }, { "epoch": 0.9836773592909044, "grad_norm": 0.6682931184768677, "learning_rate": 2.0914291464803614e-08, "loss": 0.0439, "step": 44641 }, { "epoch": 0.9836993945804205, "grad_norm": 0.49972257018089294, "learning_rate": 2.085781764007033e-08, "loss": 0.0559, "step": 44642 }, { "epoch": 0.9837214298699367, "grad_norm": 0.8548154234886169, "learning_rate": 2.0801420112140922e-08, "loss": 0.0699, "step": 44643 }, { "epoch": 0.9837434651594529, "grad_norm": 0.32009226083755493, "learning_rate": 2.074509888130183e-08, "loss": 0.0382, "step": 44644 }, { "epoch": 0.983765500448969, "grad_norm": 0.510819673538208, "learning_rate": 2.068885394783948e-08, "loss": 0.0411, "step": 44645 }, { "epoch": 0.9837875357384852, "grad_norm": 0.6944751143455505, "learning_rate": 2.0632685312041987e-08, "loss": 0.0689, "step": 44646 }, { "epoch": 0.9838095710280014, "grad_norm": 0.8714041709899902, "learning_rate": 2.0576592974195785e-08, "loss": 0.0663, "step": 44647 }, { "epoch": 0.9838316063175175, "grad_norm": 0.5312824249267578, "learning_rate": 2.052057693458398e-08, "loss": 0.0288, "step": 44648 }, { "epoch": 0.9838536416070337, "grad_norm": 0.36331528425216675, "learning_rate": 2.0464637193494673e-08, "loss": 0.0472, "step": 44649 }, { "epoch": 0.9838756768965499, "grad_norm": 0.5790613889694214, "learning_rate": 2.0408773751210973e-08, "loss": 0.0581, "step": 44650 }, { "epoch": 0.983897712186066, "grad_norm": 0.574964702129364, "learning_rate": 2.0352986608017653e-08, "loss": 0.0487, "step": 44651 }, { "epoch": 0.9839197474755822, "grad_norm": 0.5212172269821167, "learning_rate": 2.0297275764199486e-08, "loss": 0.0445, "step": 44652 }, { "epoch": 0.9839417827650982, "grad_norm": 0.44765713810920715, "learning_rate": 2.024164122004124e-08, "loss": 0.0901, "step": 44653 }, { "epoch": 0.9839638180546144, "grad_norm": 0.6706082820892334, "learning_rate": 2.018608297582436e-08, "loss": 0.0552, "step": 44654 }, { "epoch": 0.9839858533441306, "grad_norm": 0.38569772243499756, "learning_rate": 2.0130601031831953e-08, "loss": 0.0471, "step": 44655 }, { "epoch": 0.9840078886336467, "grad_norm": 0.5256693959236145, "learning_rate": 2.007519538834879e-08, "loss": 0.0613, "step": 44656 }, { "epoch": 0.9840299239231629, "grad_norm": 0.6081460118293762, "learning_rate": 2.0019866045654646e-08, "loss": 0.0815, "step": 44657 }, { "epoch": 0.9840519592126791, "grad_norm": 0.6385204195976257, "learning_rate": 1.9964613004032627e-08, "loss": 0.0649, "step": 44658 }, { "epoch": 0.9840739945021952, "grad_norm": 0.5071874856948853, "learning_rate": 1.9909436263762516e-08, "loss": 0.059, "step": 44659 }, { "epoch": 0.9840960297917114, "grad_norm": 0.3810344934463501, "learning_rate": 1.9854335825127413e-08, "loss": 0.0466, "step": 44660 }, { "epoch": 0.9841180650812276, "grad_norm": 0.5860251784324646, "learning_rate": 1.9799311688407096e-08, "loss": 0.041, "step": 44661 }, { "epoch": 0.9841401003707437, "grad_norm": 0.62703537940979, "learning_rate": 1.9744363853883006e-08, "loss": 0.0461, "step": 44662 }, { "epoch": 0.9841621356602599, "grad_norm": 0.5874946713447571, "learning_rate": 1.968949232183326e-08, "loss": 0.0558, "step": 44663 }, { "epoch": 0.9841841709497761, "grad_norm": 0.6143221259117126, "learning_rate": 1.9634697092535957e-08, "loss": 0.0719, "step": 44664 }, { "epoch": 0.9842062062392922, "grad_norm": 0.4705578088760376, "learning_rate": 1.9579978166274216e-08, "loss": 0.0437, "step": 44665 }, { "epoch": 0.9842282415288084, "grad_norm": 0.9197514653205872, "learning_rate": 1.9525335543324475e-08, "loss": 0.0469, "step": 44666 }, { "epoch": 0.9842502768183246, "grad_norm": 0.6487930417060852, "learning_rate": 1.947076922396651e-08, "loss": 0.0839, "step": 44667 }, { "epoch": 0.9842723121078407, "grad_norm": 0.5535683035850525, "learning_rate": 1.9416279208475107e-08, "loss": 0.0528, "step": 44668 }, { "epoch": 0.9842943473973569, "grad_norm": 0.458539217710495, "learning_rate": 1.9361865497131702e-08, "loss": 0.0596, "step": 44669 }, { "epoch": 0.984316382686873, "grad_norm": 0.4329580068588257, "learning_rate": 1.9307528090209414e-08, "loss": 0.0553, "step": 44670 }, { "epoch": 0.9843384179763892, "grad_norm": 0.4112444221973419, "learning_rate": 1.9253266987988017e-08, "loss": 0.0594, "step": 44671 }, { "epoch": 0.9843604532659054, "grad_norm": 1.092495083808899, "learning_rate": 1.9199082190743957e-08, "loss": 0.0526, "step": 44672 }, { "epoch": 0.9843824885554215, "grad_norm": 0.577523946762085, "learning_rate": 1.914497369875201e-08, "loss": 0.0595, "step": 44673 }, { "epoch": 0.9844045238449377, "grad_norm": 0.49949193000793457, "learning_rate": 1.909094151228863e-08, "loss": 0.0427, "step": 44674 }, { "epoch": 0.9844265591344539, "grad_norm": 0.506915271282196, "learning_rate": 1.9036985631626926e-08, "loss": 0.0521, "step": 44675 }, { "epoch": 0.98444859442397, "grad_norm": 0.46521177887916565, "learning_rate": 1.8983106057043344e-08, "loss": 0.0477, "step": 44676 }, { "epoch": 0.9844706297134862, "grad_norm": 0.5344268083572388, "learning_rate": 1.892930278881433e-08, "loss": 0.0524, "step": 44677 }, { "epoch": 0.9844926650030023, "grad_norm": 0.6863265633583069, "learning_rate": 1.8875575827209666e-08, "loss": 0.0667, "step": 44678 }, { "epoch": 0.9845147002925184, "grad_norm": 0.40904128551483154, "learning_rate": 1.8821925172505804e-08, "loss": 0.0337, "step": 44679 }, { "epoch": 0.9845367355820346, "grad_norm": 0.5918290615081787, "learning_rate": 1.8768350824975854e-08, "loss": 0.0389, "step": 44680 }, { "epoch": 0.9845587708715507, "grad_norm": 0.8003235459327698, "learning_rate": 1.8714852784892934e-08, "loss": 0.0551, "step": 44681 }, { "epoch": 0.9845808061610669, "grad_norm": 0.5894453525543213, "learning_rate": 1.8661431052526822e-08, "loss": 0.0542, "step": 44682 }, { "epoch": 0.9846028414505831, "grad_norm": 0.6902034878730774, "learning_rate": 1.860808562815397e-08, "loss": 0.0598, "step": 44683 }, { "epoch": 0.9846248767400992, "grad_norm": 0.34404265880584717, "learning_rate": 1.8554816512042495e-08, "loss": 0.0349, "step": 44684 }, { "epoch": 0.9846469120296154, "grad_norm": 0.8679744601249695, "learning_rate": 1.8501623704465508e-08, "loss": 0.0656, "step": 44685 }, { "epoch": 0.9846689473191316, "grad_norm": 0.20039623975753784, "learning_rate": 1.84485072056928e-08, "loss": 0.0457, "step": 44686 }, { "epoch": 0.9846909826086477, "grad_norm": 0.7440140843391418, "learning_rate": 1.8395467015995814e-08, "loss": 0.0499, "step": 44687 }, { "epoch": 0.9847130178981639, "grad_norm": 0.4575813114643097, "learning_rate": 1.834250313564434e-08, "loss": 0.0622, "step": 44688 }, { "epoch": 0.9847350531876801, "grad_norm": 0.11123207211494446, "learning_rate": 1.8289615564908156e-08, "loss": 0.0443, "step": 44689 }, { "epoch": 0.9847570884771962, "grad_norm": 0.8379142880439758, "learning_rate": 1.823680430405705e-08, "loss": 0.067, "step": 44690 }, { "epoch": 0.9847791237667124, "grad_norm": 0.3771555423736572, "learning_rate": 1.818406935336081e-08, "loss": 0.0644, "step": 44691 }, { "epoch": 0.9848011590562286, "grad_norm": 0.4423554539680481, "learning_rate": 1.8131410713085884e-08, "loss": 0.0582, "step": 44692 }, { "epoch": 0.9848231943457447, "grad_norm": 0.28084617853164673, "learning_rate": 1.8078828383502056e-08, "loss": 0.0397, "step": 44693 }, { "epoch": 0.9848452296352609, "grad_norm": 0.8862844705581665, "learning_rate": 1.802632236487578e-08, "loss": 0.0797, "step": 44694 }, { "epoch": 0.984867264924777, "grad_norm": 0.6541591286659241, "learning_rate": 1.797389265747684e-08, "loss": 0.0396, "step": 44695 }, { "epoch": 0.9848893002142932, "grad_norm": 1.0617464780807495, "learning_rate": 1.792153926157003e-08, "loss": 0.0617, "step": 44696 }, { "epoch": 0.9849113355038094, "grad_norm": 0.7934185862541199, "learning_rate": 1.7869262177423462e-08, "loss": 0.062, "step": 44697 }, { "epoch": 0.9849333707933255, "grad_norm": 0.43537887930870056, "learning_rate": 1.7817061405301926e-08, "loss": 0.043, "step": 44698 }, { "epoch": 0.9849554060828417, "grad_norm": 0.6929253935813904, "learning_rate": 1.7764936945471877e-08, "loss": 0.0539, "step": 44699 }, { "epoch": 0.9849774413723579, "grad_norm": 0.6205780506134033, "learning_rate": 1.7712888798199766e-08, "loss": 0.0391, "step": 44700 }, { "epoch": 0.984999476661874, "grad_norm": 0.4852784276008606, "learning_rate": 1.7660916963750383e-08, "loss": 0.0896, "step": 44701 }, { "epoch": 0.9850215119513902, "grad_norm": 0.7115336656570435, "learning_rate": 1.760902144238852e-08, "loss": 0.0577, "step": 44702 }, { "epoch": 0.9850435472409063, "grad_norm": 0.8401269316673279, "learning_rate": 1.755720223437729e-08, "loss": 0.06, "step": 44703 }, { "epoch": 0.9850655825304224, "grad_norm": 0.19163724780082703, "learning_rate": 1.750545933998149e-08, "loss": 0.0449, "step": 44704 }, { "epoch": 0.9850876178199386, "grad_norm": 0.5657796263694763, "learning_rate": 1.7453792759464238e-08, "loss": 0.0498, "step": 44705 }, { "epoch": 0.9851096531094548, "grad_norm": 0.41138604283332825, "learning_rate": 1.740220249308866e-08, "loss": 0.0597, "step": 44706 }, { "epoch": 0.9851316883989709, "grad_norm": 0.5533481240272522, "learning_rate": 1.735068854111954e-08, "loss": 0.0523, "step": 44707 }, { "epoch": 0.9851537236884871, "grad_norm": 0.6280024647712708, "learning_rate": 1.7299250903816678e-08, "loss": 0.0602, "step": 44708 }, { "epoch": 0.9851757589780032, "grad_norm": 0.5742759704589844, "learning_rate": 1.7247889581444854e-08, "loss": 0.0429, "step": 44709 }, { "epoch": 0.9851977942675194, "grad_norm": 0.512703001499176, "learning_rate": 1.7196604574262197e-08, "loss": 0.0417, "step": 44710 }, { "epoch": 0.9852198295570356, "grad_norm": 1.0179609060287476, "learning_rate": 1.7145395882531833e-08, "loss": 0.0706, "step": 44711 }, { "epoch": 0.9852418648465517, "grad_norm": 0.563080370426178, "learning_rate": 1.709426350651355e-08, "loss": 0.0546, "step": 44712 }, { "epoch": 0.9852639001360679, "grad_norm": 0.40111905336380005, "learning_rate": 1.7043207446470477e-08, "loss": 0.0659, "step": 44713 }, { "epoch": 0.9852859354255841, "grad_norm": 1.2190498113632202, "learning_rate": 1.699222770265907e-08, "loss": 0.0669, "step": 44714 }, { "epoch": 0.9853079707151002, "grad_norm": 0.9724299311637878, "learning_rate": 1.6941324275342452e-08, "loss": 0.0908, "step": 44715 }, { "epoch": 0.9853300060046164, "grad_norm": 0.7427197098731995, "learning_rate": 1.6890497164777085e-08, "loss": 0.05, "step": 44716 }, { "epoch": 0.9853520412941326, "grad_norm": 0.3598284125328064, "learning_rate": 1.6839746371222763e-08, "loss": 0.0523, "step": 44717 }, { "epoch": 0.9853740765836487, "grad_norm": 0.6619433164596558, "learning_rate": 1.678907189493928e-08, "loss": 0.0798, "step": 44718 }, { "epoch": 0.9853961118731649, "grad_norm": 0.7648217082023621, "learning_rate": 1.673847373618309e-08, "loss": 0.0664, "step": 44719 }, { "epoch": 0.9854181471626811, "grad_norm": 0.708713173866272, "learning_rate": 1.6687951895213992e-08, "loss": 0.0522, "step": 44720 }, { "epoch": 0.9854401824521972, "grad_norm": 0.772266685962677, "learning_rate": 1.6637506372285116e-08, "loss": 0.0587, "step": 44721 }, { "epoch": 0.9854622177417134, "grad_norm": 0.4178193211555481, "learning_rate": 1.658713716765792e-08, "loss": 0.0658, "step": 44722 }, { "epoch": 0.9854842530312296, "grad_norm": 0.6904637217521667, "learning_rate": 1.6536844281587194e-08, "loss": 0.0621, "step": 44723 }, { "epoch": 0.9855062883207457, "grad_norm": 0.585679292678833, "learning_rate": 1.6486627714329405e-08, "loss": 0.0691, "step": 44724 }, { "epoch": 0.9855283236102619, "grad_norm": 0.5802313089370728, "learning_rate": 1.6436487466137685e-08, "loss": 0.0592, "step": 44725 }, { "epoch": 0.985550358899778, "grad_norm": 0.47642359137535095, "learning_rate": 1.6386423537271822e-08, "loss": 0.0528, "step": 44726 }, { "epoch": 0.9855723941892942, "grad_norm": 0.44966092705726624, "learning_rate": 1.633643592798495e-08, "loss": 0.0534, "step": 44727 }, { "epoch": 0.9855944294788103, "grad_norm": 0.8052456378936768, "learning_rate": 1.6286524638530198e-08, "loss": 0.0437, "step": 44728 }, { "epoch": 0.9856164647683264, "grad_norm": 0.6498317122459412, "learning_rate": 1.6236689669164027e-08, "loss": 0.0376, "step": 44729 }, { "epoch": 0.9856385000578426, "grad_norm": 0.8259749412536621, "learning_rate": 1.6186931020139574e-08, "loss": 0.0605, "step": 44730 }, { "epoch": 0.9856605353473588, "grad_norm": 0.6266998052597046, "learning_rate": 1.6137248691709962e-08, "loss": 0.057, "step": 44731 }, { "epoch": 0.9856825706368749, "grad_norm": 0.8153507709503174, "learning_rate": 1.6087642684126657e-08, "loss": 0.0633, "step": 44732 }, { "epoch": 0.9857046059263911, "grad_norm": 0.313229501247406, "learning_rate": 1.6038112997646126e-08, "loss": 0.0547, "step": 44733 }, { "epoch": 0.9857266412159073, "grad_norm": 0.85482257604599, "learning_rate": 1.59886596325165e-08, "loss": 0.0584, "step": 44734 }, { "epoch": 0.9857486765054234, "grad_norm": 0.6814959049224854, "learning_rate": 1.5939282588992577e-08, "loss": 0.0403, "step": 44735 }, { "epoch": 0.9857707117949396, "grad_norm": 0.3700375556945801, "learning_rate": 1.588998186732582e-08, "loss": 0.0439, "step": 44736 }, { "epoch": 0.9857927470844557, "grad_norm": 0.4411483407020569, "learning_rate": 1.5840757467766032e-08, "loss": 0.0464, "step": 44737 }, { "epoch": 0.9858147823739719, "grad_norm": 0.6652941107749939, "learning_rate": 1.5791609390564676e-08, "loss": 0.0593, "step": 44738 }, { "epoch": 0.9858368176634881, "grad_norm": 0.658002495765686, "learning_rate": 1.5742537635971556e-08, "loss": 0.0506, "step": 44739 }, { "epoch": 0.9858588529530042, "grad_norm": 0.5701978802680969, "learning_rate": 1.5693542204236466e-08, "loss": 0.0527, "step": 44740 }, { "epoch": 0.9858808882425204, "grad_norm": 0.22790393233299255, "learning_rate": 1.5644623095609212e-08, "loss": 0.0445, "step": 44741 }, { "epoch": 0.9859029235320366, "grad_norm": 0.40260711312294006, "learning_rate": 1.5595780310339593e-08, "loss": 0.0332, "step": 44742 }, { "epoch": 0.9859249588215527, "grad_norm": 0.26613715291023254, "learning_rate": 1.554701384867574e-08, "loss": 0.0399, "step": 44743 }, { "epoch": 0.9859469941110689, "grad_norm": 0.5210495591163635, "learning_rate": 1.5498323710867456e-08, "loss": 0.0515, "step": 44744 }, { "epoch": 0.9859690294005851, "grad_norm": 0.5041109323501587, "learning_rate": 1.544970989715955e-08, "loss": 0.0613, "step": 44745 }, { "epoch": 0.9859910646901012, "grad_norm": 0.7040785551071167, "learning_rate": 1.5401172407803475e-08, "loss": 0.0559, "step": 44746 }, { "epoch": 0.9860130999796174, "grad_norm": 0.38483309745788574, "learning_rate": 1.535271124304405e-08, "loss": 0.0353, "step": 44747 }, { "epoch": 0.9860351352691336, "grad_norm": 0.5164645314216614, "learning_rate": 1.5304326403127733e-08, "loss": 0.0506, "step": 44748 }, { "epoch": 0.9860571705586497, "grad_norm": 0.4712772071361542, "learning_rate": 1.5256017888302665e-08, "loss": 0.0532, "step": 44749 }, { "epoch": 0.9860792058481659, "grad_norm": 0.9308212995529175, "learning_rate": 1.5207785698813647e-08, "loss": 0.0649, "step": 44750 }, { "epoch": 0.986101241137682, "grad_norm": 0.45291298627853394, "learning_rate": 1.515962983490715e-08, "loss": 0.0613, "step": 44751 }, { "epoch": 0.9861232764271981, "grad_norm": 0.23032990097999573, "learning_rate": 1.511155029682798e-08, "loss": 0.0359, "step": 44752 }, { "epoch": 0.9861453117167143, "grad_norm": 0.83753901720047, "learning_rate": 1.5063547084822604e-08, "loss": 0.0599, "step": 44753 }, { "epoch": 0.9861673470062304, "grad_norm": 1.0120503902435303, "learning_rate": 1.5015620199132498e-08, "loss": 0.048, "step": 44754 }, { "epoch": 0.9861893822957466, "grad_norm": 0.715390145778656, "learning_rate": 1.4967769640004127e-08, "loss": 0.0826, "step": 44755 }, { "epoch": 0.9862114175852628, "grad_norm": 0.5877007842063904, "learning_rate": 1.4919995407680633e-08, "loss": 0.0589, "step": 44756 }, { "epoch": 0.9862334528747789, "grad_norm": 0.47239068150520325, "learning_rate": 1.4872297502405153e-08, "loss": 0.0519, "step": 44757 }, { "epoch": 0.9862554881642951, "grad_norm": 0.45871657133102417, "learning_rate": 1.4824675924419162e-08, "loss": 0.0533, "step": 44758 }, { "epoch": 0.9862775234538113, "grad_norm": 0.6431015133857727, "learning_rate": 1.477713067396913e-08, "loss": 0.0695, "step": 44759 }, { "epoch": 0.9862995587433274, "grad_norm": 0.8121421337127686, "learning_rate": 1.47296617512932e-08, "loss": 0.0471, "step": 44760 }, { "epoch": 0.9863215940328436, "grad_norm": 0.4881010949611664, "learning_rate": 1.4682269156636175e-08, "loss": 0.04, "step": 44761 }, { "epoch": 0.9863436293223597, "grad_norm": 0.806616485118866, "learning_rate": 1.4634952890237862e-08, "loss": 0.066, "step": 44762 }, { "epoch": 0.9863656646118759, "grad_norm": 0.8313677906990051, "learning_rate": 1.4587712952338072e-08, "loss": 0.0583, "step": 44763 }, { "epoch": 0.9863876999013921, "grad_norm": 0.5313889980316162, "learning_rate": 1.4540549343179943e-08, "loss": 0.0485, "step": 44764 }, { "epoch": 0.9864097351909082, "grad_norm": 0.4195447266101837, "learning_rate": 1.4493462063003283e-08, "loss": 0.0388, "step": 44765 }, { "epoch": 0.9864317704804244, "grad_norm": 0.4159957468509674, "learning_rate": 1.4446451112046234e-08, "loss": 0.0746, "step": 44766 }, { "epoch": 0.9864538057699406, "grad_norm": 0.6355001330375671, "learning_rate": 1.4399516490550268e-08, "loss": 0.0709, "step": 44767 }, { "epoch": 0.9864758410594567, "grad_norm": 0.45283985137939453, "learning_rate": 1.4352658198753533e-08, "loss": 0.0554, "step": 44768 }, { "epoch": 0.9864978763489729, "grad_norm": 0.393546462059021, "learning_rate": 1.4305876236892501e-08, "loss": 0.0485, "step": 44769 }, { "epoch": 0.9865199116384891, "grad_norm": 0.8261423110961914, "learning_rate": 1.4259170605210315e-08, "loss": 0.0552, "step": 44770 }, { "epoch": 0.9865419469280052, "grad_norm": 0.44715291261672974, "learning_rate": 1.4212541303940118e-08, "loss": 0.0719, "step": 44771 }, { "epoch": 0.9865639822175214, "grad_norm": 0.5627028942108154, "learning_rate": 1.4165988333323387e-08, "loss": 0.04, "step": 44772 }, { "epoch": 0.9865860175070376, "grad_norm": 0.557774543762207, "learning_rate": 1.4119511693593268e-08, "loss": 0.0778, "step": 44773 }, { "epoch": 0.9866080527965537, "grad_norm": 0.4756895899772644, "learning_rate": 1.4073111384989567e-08, "loss": 0.0477, "step": 44774 }, { "epoch": 0.9866300880860699, "grad_norm": 0.8152390122413635, "learning_rate": 1.4026787407748764e-08, "loss": 0.0711, "step": 44775 }, { "epoch": 0.986652123375586, "grad_norm": 0.5471885204315186, "learning_rate": 1.3980539762104006e-08, "loss": 0.0939, "step": 44776 }, { "epoch": 0.9866741586651021, "grad_norm": 0.40938422083854675, "learning_rate": 1.3934368448293434e-08, "loss": 0.0398, "step": 44777 }, { "epoch": 0.9866961939546183, "grad_norm": 0.5986961722373962, "learning_rate": 1.3888273466550194e-08, "loss": 0.1005, "step": 44778 }, { "epoch": 0.9867182292441344, "grad_norm": 0.5482457876205444, "learning_rate": 1.3842254817110766e-08, "loss": 0.0638, "step": 44779 }, { "epoch": 0.9867402645336506, "grad_norm": 0.3451172411441803, "learning_rate": 1.3796312500209962e-08, "loss": 0.0465, "step": 44780 }, { "epoch": 0.9867622998231668, "grad_norm": 0.7453520894050598, "learning_rate": 1.3750446516079262e-08, "loss": 0.0695, "step": 44781 }, { "epoch": 0.9867843351126829, "grad_norm": 0.5404511094093323, "learning_rate": 1.370465686495348e-08, "loss": 0.046, "step": 44782 }, { "epoch": 0.9868063704021991, "grad_norm": 0.8960053324699402, "learning_rate": 1.3658943547067426e-08, "loss": 0.0893, "step": 44783 }, { "epoch": 0.9868284056917153, "grad_norm": 0.6858781576156616, "learning_rate": 1.3613306562652583e-08, "loss": 0.0564, "step": 44784 }, { "epoch": 0.9868504409812314, "grad_norm": 0.6645774841308594, "learning_rate": 1.3567745911940433e-08, "loss": 0.063, "step": 44785 }, { "epoch": 0.9868724762707476, "grad_norm": 0.8004060387611389, "learning_rate": 1.3522261595165785e-08, "loss": 0.0702, "step": 44786 }, { "epoch": 0.9868945115602638, "grad_norm": 0.5818349123001099, "learning_rate": 1.347685361255846e-08, "loss": 0.0563, "step": 44787 }, { "epoch": 0.9869165468497799, "grad_norm": 1.0110982656478882, "learning_rate": 1.343152196434827e-08, "loss": 0.0725, "step": 44788 }, { "epoch": 0.9869385821392961, "grad_norm": 0.8639557361602783, "learning_rate": 1.3386266650768364e-08, "loss": 0.0667, "step": 44789 }, { "epoch": 0.9869606174288122, "grad_norm": 0.41258537769317627, "learning_rate": 1.3341087672048558e-08, "loss": 0.0459, "step": 44790 }, { "epoch": 0.9869826527183284, "grad_norm": 0.9148531556129456, "learning_rate": 1.3295985028420332e-08, "loss": 0.0661, "step": 44791 }, { "epoch": 0.9870046880078446, "grad_norm": 0.5244197249412537, "learning_rate": 1.3250958720111839e-08, "loss": 0.0563, "step": 44792 }, { "epoch": 0.9870267232973607, "grad_norm": 0.5044287443161011, "learning_rate": 1.3206008747352894e-08, "loss": 0.0806, "step": 44793 }, { "epoch": 0.9870487585868769, "grad_norm": 0.7179906964302063, "learning_rate": 1.3161135110371647e-08, "loss": 0.0446, "step": 44794 }, { "epoch": 0.9870707938763931, "grad_norm": 0.6525829434394836, "learning_rate": 1.311633780939625e-08, "loss": 0.0595, "step": 44795 }, { "epoch": 0.9870928291659092, "grad_norm": 1.0137803554534912, "learning_rate": 1.3071616844658186e-08, "loss": 0.0842, "step": 44796 }, { "epoch": 0.9871148644554254, "grad_norm": 0.7372540831565857, "learning_rate": 1.3026972216380606e-08, "loss": 0.0539, "step": 44797 }, { "epoch": 0.9871368997449416, "grad_norm": 0.32589319348335266, "learning_rate": 1.2982403924794995e-08, "loss": 0.0611, "step": 44798 }, { "epoch": 0.9871589350344577, "grad_norm": 0.49751919507980347, "learning_rate": 1.293791197012617e-08, "loss": 0.0585, "step": 44799 }, { "epoch": 0.9871809703239739, "grad_norm": 0.5891963839530945, "learning_rate": 1.2893496352602286e-08, "loss": 0.0473, "step": 44800 }, { "epoch": 0.9872030056134901, "grad_norm": 0.39000943303108215, "learning_rate": 1.2849157072446493e-08, "loss": 0.0667, "step": 44801 }, { "epoch": 0.9872250409030061, "grad_norm": 0.32782360911369324, "learning_rate": 1.2804894129888612e-08, "loss": 0.0491, "step": 44802 }, { "epoch": 0.9872470761925223, "grad_norm": 0.8020473122596741, "learning_rate": 1.2760707525151793e-08, "loss": 0.0656, "step": 44803 }, { "epoch": 0.9872691114820384, "grad_norm": 0.7056152820587158, "learning_rate": 1.271659725846086e-08, "loss": 0.0585, "step": 44804 }, { "epoch": 0.9872911467715546, "grad_norm": 0.7817484736442566, "learning_rate": 1.2672563330040632e-08, "loss": 0.0695, "step": 44805 }, { "epoch": 0.9873131820610708, "grad_norm": 0.7141628265380859, "learning_rate": 1.2628605740115929e-08, "loss": 0.072, "step": 44806 }, { "epoch": 0.9873352173505869, "grad_norm": 0.9001194834709167, "learning_rate": 1.2584724488911569e-08, "loss": 0.0617, "step": 44807 }, { "epoch": 0.9873572526401031, "grad_norm": 0.6735802292823792, "learning_rate": 1.2540919576650712e-08, "loss": 0.0873, "step": 44808 }, { "epoch": 0.9873792879296193, "grad_norm": 0.42260441184043884, "learning_rate": 1.2497191003554842e-08, "loss": 0.0524, "step": 44809 }, { "epoch": 0.9874013232191354, "grad_norm": 0.6310812830924988, "learning_rate": 1.2453538769847116e-08, "loss": 0.0651, "step": 44810 }, { "epoch": 0.9874233585086516, "grad_norm": 0.35532552003860474, "learning_rate": 1.2409962875752356e-08, "loss": 0.0507, "step": 44811 }, { "epoch": 0.9874453937981678, "grad_norm": 0.5810613632202148, "learning_rate": 1.236646332148872e-08, "loss": 0.0445, "step": 44812 }, { "epoch": 0.9874674290876839, "grad_norm": 0.9827192425727844, "learning_rate": 1.2323040107281026e-08, "loss": 0.0701, "step": 44813 }, { "epoch": 0.9874894643772001, "grad_norm": 0.9925821423530579, "learning_rate": 1.2279693233347434e-08, "loss": 0.0713, "step": 44814 }, { "epoch": 0.9875114996667163, "grad_norm": 0.41927972435951233, "learning_rate": 1.2236422699912764e-08, "loss": 0.0602, "step": 44815 }, { "epoch": 0.9875335349562324, "grad_norm": 0.6584209203720093, "learning_rate": 1.219322850719351e-08, "loss": 0.0518, "step": 44816 }, { "epoch": 0.9875555702457486, "grad_norm": 0.6836596727371216, "learning_rate": 1.2150110655411162e-08, "loss": 0.0901, "step": 44817 }, { "epoch": 0.9875776055352647, "grad_norm": 0.1640763282775879, "learning_rate": 1.2107069144785543e-08, "loss": 0.0485, "step": 44818 }, { "epoch": 0.9875996408247809, "grad_norm": 0.7110376954078674, "learning_rate": 1.2064103975536477e-08, "loss": 0.0716, "step": 44819 }, { "epoch": 0.9876216761142971, "grad_norm": 0.20250475406646729, "learning_rate": 1.2021215147882125e-08, "loss": 0.0378, "step": 44820 }, { "epoch": 0.9876437114038132, "grad_norm": 0.3951801657676697, "learning_rate": 1.1978402662038978e-08, "loss": 0.0575, "step": 44821 }, { "epoch": 0.9876657466933294, "grad_norm": 0.7720677256584167, "learning_rate": 1.1935666518228528e-08, "loss": 0.0652, "step": 44822 }, { "epoch": 0.9876877819828456, "grad_norm": 0.7574318051338196, "learning_rate": 1.1893006716667266e-08, "loss": 0.0576, "step": 44823 }, { "epoch": 0.9877098172723617, "grad_norm": 0.8314443826675415, "learning_rate": 1.1850423257571686e-08, "loss": 0.0712, "step": 44824 }, { "epoch": 0.9877318525618779, "grad_norm": 0.41830068826675415, "learning_rate": 1.1807916141159947e-08, "loss": 0.0543, "step": 44825 }, { "epoch": 0.987753887851394, "grad_norm": 0.6769007444381714, "learning_rate": 1.1765485367646877e-08, "loss": 0.0563, "step": 44826 }, { "epoch": 0.9877759231409101, "grad_norm": 0.6870677471160889, "learning_rate": 1.1723130937250637e-08, "loss": 0.0712, "step": 44827 }, { "epoch": 0.9877979584304263, "grad_norm": 0.5522448420524597, "learning_rate": 1.1680852850184388e-08, "loss": 0.0487, "step": 44828 }, { "epoch": 0.9878199937199424, "grad_norm": 0.7849227786064148, "learning_rate": 1.1638651106666287e-08, "loss": 0.0482, "step": 44829 }, { "epoch": 0.9878420290094586, "grad_norm": 0.6195634007453918, "learning_rate": 1.15965257069095e-08, "loss": 0.0422, "step": 44830 }, { "epoch": 0.9878640642989748, "grad_norm": 0.5287322402000427, "learning_rate": 1.1554476651127188e-08, "loss": 0.0735, "step": 44831 }, { "epoch": 0.9878860995884909, "grad_norm": 1.0218768119812012, "learning_rate": 1.151250393953751e-08, "loss": 0.0793, "step": 44832 }, { "epoch": 0.9879081348780071, "grad_norm": 0.4941418468952179, "learning_rate": 1.1470607572351966e-08, "loss": 0.0787, "step": 44833 }, { "epoch": 0.9879301701675233, "grad_norm": 0.45240727066993713, "learning_rate": 1.1428787549783714e-08, "loss": 0.0353, "step": 44834 }, { "epoch": 0.9879522054570394, "grad_norm": 0.6731019616127014, "learning_rate": 1.1387043872045921e-08, "loss": 0.0453, "step": 44835 }, { "epoch": 0.9879742407465556, "grad_norm": 0.20600570738315582, "learning_rate": 1.1345376539351749e-08, "loss": 0.0647, "step": 44836 }, { "epoch": 0.9879962760360718, "grad_norm": 0.6987761855125427, "learning_rate": 1.1303785551912693e-08, "loss": 0.0636, "step": 44837 }, { "epoch": 0.9880183113255879, "grad_norm": 0.4722706079483032, "learning_rate": 1.1262270909941919e-08, "loss": 0.0572, "step": 44838 }, { "epoch": 0.9880403466151041, "grad_norm": 0.5614620447158813, "learning_rate": 1.1220832613649257e-08, "loss": 0.058, "step": 44839 }, { "epoch": 0.9880623819046203, "grad_norm": 0.2589024603366852, "learning_rate": 1.1179470663246205e-08, "loss": 0.0361, "step": 44840 }, { "epoch": 0.9880844171941364, "grad_norm": 0.4717286229133606, "learning_rate": 1.1138185058944261e-08, "loss": 0.0359, "step": 44841 }, { "epoch": 0.9881064524836526, "grad_norm": 0.4227568209171295, "learning_rate": 1.1096975800953257e-08, "loss": 0.0373, "step": 44842 }, { "epoch": 0.9881284877731688, "grad_norm": 0.4610550105571747, "learning_rate": 1.1055842889483025e-08, "loss": 0.0398, "step": 44843 }, { "epoch": 0.9881505230626849, "grad_norm": 0.561805009841919, "learning_rate": 1.1014786324743398e-08, "loss": 0.0757, "step": 44844 }, { "epoch": 0.9881725583522011, "grad_norm": 0.727712094783783, "learning_rate": 1.097380610694254e-08, "loss": 0.0708, "step": 44845 }, { "epoch": 0.9881945936417172, "grad_norm": 0.3314283490180969, "learning_rate": 1.0932902236290287e-08, "loss": 0.0481, "step": 44846 }, { "epoch": 0.9882166289312334, "grad_norm": 0.5566482543945312, "learning_rate": 1.0892074712994804e-08, "loss": 0.0514, "step": 44847 }, { "epoch": 0.9882386642207496, "grad_norm": 0.5192036628723145, "learning_rate": 1.0851323537262592e-08, "loss": 0.0658, "step": 44848 }, { "epoch": 0.9882606995102657, "grad_norm": 0.9439800381660461, "learning_rate": 1.0810648709303483e-08, "loss": 0.066, "step": 44849 }, { "epoch": 0.9882827347997819, "grad_norm": 0.5130444169044495, "learning_rate": 1.077005022932398e-08, "loss": 0.0379, "step": 44850 }, { "epoch": 0.988304770089298, "grad_norm": 0.5053499341011047, "learning_rate": 1.0729528097528918e-08, "loss": 0.031, "step": 44851 }, { "epoch": 0.9883268053788141, "grad_norm": 0.8543380498886108, "learning_rate": 1.068908231412813e-08, "loss": 0.0728, "step": 44852 }, { "epoch": 0.9883488406683303, "grad_norm": 0.6102583408355713, "learning_rate": 1.0648712879324784e-08, "loss": 0.0573, "step": 44853 }, { "epoch": 0.9883708759578465, "grad_norm": 0.7908697128295898, "learning_rate": 1.0608419793325385e-08, "loss": 0.0596, "step": 44854 }, { "epoch": 0.9883929112473626, "grad_norm": 0.5840082168579102, "learning_rate": 1.056820305633477e-08, "loss": 0.0527, "step": 44855 }, { "epoch": 0.9884149465368788, "grad_norm": 0.5390774607658386, "learning_rate": 1.0528062668559434e-08, "loss": 0.037, "step": 44856 }, { "epoch": 0.988436981826395, "grad_norm": 0.4502476751804352, "learning_rate": 1.048799863020089e-08, "loss": 0.067, "step": 44857 }, { "epoch": 0.9884590171159111, "grad_norm": 0.8399543762207031, "learning_rate": 1.04480109414673e-08, "loss": 0.0455, "step": 44858 }, { "epoch": 0.9884810524054273, "grad_norm": 0.36317354440689087, "learning_rate": 1.0408099602558508e-08, "loss": 0.0415, "step": 44859 }, { "epoch": 0.9885030876949434, "grad_norm": 0.445522278547287, "learning_rate": 1.0368264613679346e-08, "loss": 0.0457, "step": 44860 }, { "epoch": 0.9885251229844596, "grad_norm": 0.40400612354278564, "learning_rate": 1.0328505975032987e-08, "loss": 0.0597, "step": 44861 }, { "epoch": 0.9885471582739758, "grad_norm": 0.7739598751068115, "learning_rate": 1.0288823686822601e-08, "loss": 0.0684, "step": 44862 }, { "epoch": 0.9885691935634919, "grad_norm": 0.23717468976974487, "learning_rate": 1.024921774924803e-08, "loss": 0.048, "step": 44863 }, { "epoch": 0.9885912288530081, "grad_norm": 0.591993510723114, "learning_rate": 1.0209688162512443e-08, "loss": 0.0463, "step": 44864 }, { "epoch": 0.9886132641425243, "grad_norm": 0.5464179515838623, "learning_rate": 1.0170234926817346e-08, "loss": 0.0629, "step": 44865 }, { "epoch": 0.9886352994320404, "grad_norm": 0.3822467029094696, "learning_rate": 1.0130858042364245e-08, "loss": 0.0741, "step": 44866 }, { "epoch": 0.9886573347215566, "grad_norm": 0.36067479848861694, "learning_rate": 1.0091557509351313e-08, "loss": 0.0385, "step": 44867 }, { "epoch": 0.9886793700110728, "grad_norm": 0.5808311700820923, "learning_rate": 1.0052333327981722e-08, "loss": 0.0619, "step": 44868 }, { "epoch": 0.9887014053005889, "grad_norm": 0.824734628200531, "learning_rate": 1.0013185498451982e-08, "loss": 0.0561, "step": 44869 }, { "epoch": 0.9887234405901051, "grad_norm": 0.287433922290802, "learning_rate": 9.974114020965264e-09, "loss": 0.0566, "step": 44870 }, { "epoch": 0.9887454758796212, "grad_norm": 0.8454905152320862, "learning_rate": 9.935118895718076e-09, "loss": 0.0462, "step": 44871 }, { "epoch": 0.9887675111691374, "grad_norm": 0.5006709694862366, "learning_rate": 9.896200122908594e-09, "loss": 0.0345, "step": 44872 }, { "epoch": 0.9887895464586536, "grad_norm": 0.8064197301864624, "learning_rate": 9.857357702738324e-09, "loss": 0.0933, "step": 44873 }, { "epoch": 0.9888115817481697, "grad_norm": 1.35782790184021, "learning_rate": 9.818591635400442e-09, "loss": 0.073, "step": 44874 }, { "epoch": 0.9888336170376859, "grad_norm": 0.31977424025535583, "learning_rate": 9.779901921096456e-09, "loss": 0.0572, "step": 44875 }, { "epoch": 0.988855652327202, "grad_norm": 0.47596660256385803, "learning_rate": 9.741288560021212e-09, "loss": 0.0628, "step": 44876 }, { "epoch": 0.9888776876167181, "grad_norm": 0.29772621393203735, "learning_rate": 9.702751552372879e-09, "loss": 0.0337, "step": 44877 }, { "epoch": 0.9888997229062343, "grad_norm": 0.4632447361946106, "learning_rate": 9.664290898346307e-09, "loss": 0.0638, "step": 44878 }, { "epoch": 0.9889217581957505, "grad_norm": 0.3631281852722168, "learning_rate": 9.625906598138001e-09, "loss": 0.0404, "step": 44879 }, { "epoch": 0.9889437934852666, "grad_norm": 0.4264569580554962, "learning_rate": 9.587598651942809e-09, "loss": 0.0654, "step": 44880 }, { "epoch": 0.9889658287747828, "grad_norm": 0.3641528785228729, "learning_rate": 9.549367059957237e-09, "loss": 0.0336, "step": 44881 }, { "epoch": 0.988987864064299, "grad_norm": 0.5406758189201355, "learning_rate": 9.511211822374466e-09, "loss": 0.0415, "step": 44882 }, { "epoch": 0.9890098993538151, "grad_norm": 0.7542532682418823, "learning_rate": 9.473132939389339e-09, "loss": 0.0889, "step": 44883 }, { "epoch": 0.9890319346433313, "grad_norm": 0.7028613090515137, "learning_rate": 9.435130411198368e-09, "loss": 0.0629, "step": 44884 }, { "epoch": 0.9890539699328474, "grad_norm": 0.7691887021064758, "learning_rate": 9.397204237991397e-09, "loss": 0.0647, "step": 44885 }, { "epoch": 0.9890760052223636, "grad_norm": 0.6226260662078857, "learning_rate": 9.359354419961607e-09, "loss": 0.0648, "step": 44886 }, { "epoch": 0.9890980405118798, "grad_norm": 0.7271358966827393, "learning_rate": 9.321580957305509e-09, "loss": 0.0808, "step": 44887 }, { "epoch": 0.9891200758013959, "grad_norm": 0.3741295635700226, "learning_rate": 9.283883850211283e-09, "loss": 0.0458, "step": 44888 }, { "epoch": 0.9891421110909121, "grad_norm": 0.8690945506095886, "learning_rate": 9.24626309887544e-09, "loss": 0.0614, "step": 44889 }, { "epoch": 0.9891641463804283, "grad_norm": 0.5962587594985962, "learning_rate": 9.208718703484498e-09, "loss": 0.0767, "step": 44890 }, { "epoch": 0.9891861816699444, "grad_norm": 0.7246081233024597, "learning_rate": 9.1712506642333e-09, "loss": 0.0735, "step": 44891 }, { "epoch": 0.9892082169594606, "grad_norm": 0.7404770851135254, "learning_rate": 9.133858981311693e-09, "loss": 0.0604, "step": 44892 }, { "epoch": 0.9892302522489768, "grad_norm": 0.5302582383155823, "learning_rate": 9.096543654911194e-09, "loss": 0.0371, "step": 44893 }, { "epoch": 0.9892522875384929, "grad_norm": 0.6650840640068054, "learning_rate": 9.059304685218317e-09, "loss": 0.0632, "step": 44894 }, { "epoch": 0.9892743228280091, "grad_norm": 0.7711208462715149, "learning_rate": 9.022142072427908e-09, "loss": 0.0471, "step": 44895 }, { "epoch": 0.9892963581175253, "grad_norm": 0.40552380681037903, "learning_rate": 8.985055816724818e-09, "loss": 0.0578, "step": 44896 }, { "epoch": 0.9893183934070414, "grad_norm": 0.4639047086238861, "learning_rate": 8.948045918298898e-09, "loss": 0.0665, "step": 44897 }, { "epoch": 0.9893404286965576, "grad_norm": 0.39061033725738525, "learning_rate": 8.911112377341658e-09, "loss": 0.0639, "step": 44898 }, { "epoch": 0.9893624639860737, "grad_norm": 0.39754053950309753, "learning_rate": 8.87425519403795e-09, "loss": 0.0504, "step": 44899 }, { "epoch": 0.9893844992755899, "grad_norm": 0.6272909045219421, "learning_rate": 8.83747436857596e-09, "loss": 0.0445, "step": 44900 }, { "epoch": 0.989406534565106, "grad_norm": 0.5659599900245667, "learning_rate": 8.800769901143868e-09, "loss": 0.0469, "step": 44901 }, { "epoch": 0.9894285698546221, "grad_norm": 0.6156412959098816, "learning_rate": 8.764141791928193e-09, "loss": 0.0554, "step": 44902 }, { "epoch": 0.9894506051441383, "grad_norm": 0.9662308692932129, "learning_rate": 8.727590041117117e-09, "loss": 0.0418, "step": 44903 }, { "epoch": 0.9894726404336545, "grad_norm": 0.4261859059333801, "learning_rate": 8.691114648893827e-09, "loss": 0.0434, "step": 44904 }, { "epoch": 0.9894946757231706, "grad_norm": 0.8419355154037476, "learning_rate": 8.654715615446507e-09, "loss": 0.0583, "step": 44905 }, { "epoch": 0.9895167110126868, "grad_norm": 0.5957054495811462, "learning_rate": 8.618392940958342e-09, "loss": 0.0448, "step": 44906 }, { "epoch": 0.989538746302203, "grad_norm": 0.4170134961605072, "learning_rate": 8.582146625617515e-09, "loss": 0.0757, "step": 44907 }, { "epoch": 0.9895607815917191, "grad_norm": 0.39691418409347534, "learning_rate": 8.545976669605548e-09, "loss": 0.054, "step": 44908 }, { "epoch": 0.9895828168812353, "grad_norm": 0.30764666199684143, "learning_rate": 8.50988307310896e-09, "loss": 0.0442, "step": 44909 }, { "epoch": 0.9896048521707514, "grad_norm": 0.4076341986656189, "learning_rate": 8.473865836310935e-09, "loss": 0.0356, "step": 44910 }, { "epoch": 0.9896268874602676, "grad_norm": 0.5390751361846924, "learning_rate": 8.437924959392996e-09, "loss": 0.0448, "step": 44911 }, { "epoch": 0.9896489227497838, "grad_norm": 0.5498886704444885, "learning_rate": 8.40206044254166e-09, "loss": 0.065, "step": 44912 }, { "epoch": 0.9896709580392999, "grad_norm": 0.5478169918060303, "learning_rate": 8.366272285936783e-09, "loss": 0.053, "step": 44913 }, { "epoch": 0.9896929933288161, "grad_norm": 0.6175640821456909, "learning_rate": 8.330560489761551e-09, "loss": 0.0761, "step": 44914 }, { "epoch": 0.9897150286183323, "grad_norm": 0.4688977599143982, "learning_rate": 8.29492505419749e-09, "loss": 0.0615, "step": 44915 }, { "epoch": 0.9897370639078484, "grad_norm": 0.31035417318344116, "learning_rate": 8.259365979426115e-09, "loss": 0.0414, "step": 44916 }, { "epoch": 0.9897590991973646, "grad_norm": 0.40738657116889954, "learning_rate": 8.223883265630616e-09, "loss": 0.0681, "step": 44917 }, { "epoch": 0.9897811344868808, "grad_norm": 0.5018673539161682, "learning_rate": 8.188476912989184e-09, "loss": 0.0588, "step": 44918 }, { "epoch": 0.9898031697763969, "grad_norm": 0.46597474813461304, "learning_rate": 8.153146921683341e-09, "loss": 0.0687, "step": 44919 }, { "epoch": 0.9898252050659131, "grad_norm": 0.789088249206543, "learning_rate": 8.117893291892941e-09, "loss": 0.0764, "step": 44920 }, { "epoch": 0.9898472403554293, "grad_norm": 0.5003616213798523, "learning_rate": 8.082716023797843e-09, "loss": 0.066, "step": 44921 }, { "epoch": 0.9898692756449454, "grad_norm": 0.6219638586044312, "learning_rate": 8.047615117576235e-09, "loss": 0.0807, "step": 44922 }, { "epoch": 0.9898913109344616, "grad_norm": 0.47436603903770447, "learning_rate": 8.01259057340631e-09, "loss": 0.0376, "step": 44923 }, { "epoch": 0.9899133462239778, "grad_norm": 0.6942325234413147, "learning_rate": 7.977642391469586e-09, "loss": 0.0447, "step": 44924 }, { "epoch": 0.9899353815134938, "grad_norm": 0.6311774849891663, "learning_rate": 7.942770571940927e-09, "loss": 0.0722, "step": 44925 }, { "epoch": 0.98995741680301, "grad_norm": 0.6294880509376526, "learning_rate": 7.907975115000188e-09, "loss": 0.0617, "step": 44926 }, { "epoch": 0.9899794520925261, "grad_norm": 0.5861527323722839, "learning_rate": 7.873256020822228e-09, "loss": 0.0437, "step": 44927 }, { "epoch": 0.9900014873820423, "grad_norm": 0.5803743600845337, "learning_rate": 7.83861328958524e-09, "loss": 0.0694, "step": 44928 }, { "epoch": 0.9900235226715585, "grad_norm": 0.3702535927295685, "learning_rate": 7.804046921465746e-09, "loss": 0.0559, "step": 44929 }, { "epoch": 0.9900455579610746, "grad_norm": 0.4716896712779999, "learning_rate": 7.769556916640275e-09, "loss": 0.063, "step": 44930 }, { "epoch": 0.9900675932505908, "grad_norm": 0.5098820924758911, "learning_rate": 7.735143275283685e-09, "loss": 0.0652, "step": 44931 }, { "epoch": 0.990089628540107, "grad_norm": 0.8100286722183228, "learning_rate": 7.700805997572503e-09, "loss": 0.0554, "step": 44932 }, { "epoch": 0.9901116638296231, "grad_norm": 0.3652501106262207, "learning_rate": 7.666545083678256e-09, "loss": 0.0576, "step": 44933 }, { "epoch": 0.9901336991191393, "grad_norm": 0.7233731150627136, "learning_rate": 7.632360533779137e-09, "loss": 0.0537, "step": 44934 }, { "epoch": 0.9901557344086555, "grad_norm": 0.43761223554611206, "learning_rate": 7.59825234804834e-09, "loss": 0.0498, "step": 44935 }, { "epoch": 0.9901777696981716, "grad_norm": 0.9230408072471619, "learning_rate": 7.56422052665906e-09, "loss": 0.0701, "step": 44936 }, { "epoch": 0.9901998049876878, "grad_norm": 0.6862479448318481, "learning_rate": 7.530265069784492e-09, "loss": 0.052, "step": 44937 }, { "epoch": 0.990221840277204, "grad_norm": 0.3611799478530884, "learning_rate": 7.49638597759783e-09, "loss": 0.0426, "step": 44938 }, { "epoch": 0.9902438755667201, "grad_norm": 0.6696261763572693, "learning_rate": 7.46258325027227e-09, "loss": 0.0684, "step": 44939 }, { "epoch": 0.9902659108562363, "grad_norm": 0.5951566100120544, "learning_rate": 7.428856887979341e-09, "loss": 0.0677, "step": 44940 }, { "epoch": 0.9902879461457524, "grad_norm": 0.6266543865203857, "learning_rate": 7.395206890890571e-09, "loss": 0.0554, "step": 44941 }, { "epoch": 0.9903099814352686, "grad_norm": 0.655661404132843, "learning_rate": 7.361633259177492e-09, "loss": 0.0656, "step": 44942 }, { "epoch": 0.9903320167247848, "grad_norm": 0.3458922803401947, "learning_rate": 7.328135993011631e-09, "loss": 0.039, "step": 44943 }, { "epoch": 0.9903540520143009, "grad_norm": 0.5299853086471558, "learning_rate": 7.294715092562853e-09, "loss": 0.0616, "step": 44944 }, { "epoch": 0.9903760873038171, "grad_norm": 0.6882059574127197, "learning_rate": 7.261370558002689e-09, "loss": 0.0817, "step": 44945 }, { "epoch": 0.9903981225933333, "grad_norm": 0.8249604105949402, "learning_rate": 7.228102389499336e-09, "loss": 0.0893, "step": 44946 }, { "epoch": 0.9904201578828494, "grad_norm": 0.47272631525993347, "learning_rate": 7.194910587222658e-09, "loss": 0.038, "step": 44947 }, { "epoch": 0.9904421931723656, "grad_norm": 0.5862483382225037, "learning_rate": 7.161795151342521e-09, "loss": 0.0406, "step": 44948 }, { "epoch": 0.9904642284618818, "grad_norm": 0.631865918636322, "learning_rate": 7.128756082027121e-09, "loss": 0.0485, "step": 44949 }, { "epoch": 0.9904862637513978, "grad_norm": 0.5014623999595642, "learning_rate": 7.095793379444659e-09, "loss": 0.0592, "step": 44950 }, { "epoch": 0.990508299040914, "grad_norm": 0.6170703172683716, "learning_rate": 7.062907043763334e-09, "loss": 0.0546, "step": 44951 }, { "epoch": 0.9905303343304301, "grad_norm": 0.4283185303211212, "learning_rate": 7.0300970751513425e-09, "loss": 0.0688, "step": 44952 }, { "epoch": 0.9905523696199463, "grad_norm": 0.5312243700027466, "learning_rate": 6.997363473773554e-09, "loss": 0.0654, "step": 44953 }, { "epoch": 0.9905744049094625, "grad_norm": 0.6892764568328857, "learning_rate": 6.964706239799834e-09, "loss": 0.064, "step": 44954 }, { "epoch": 0.9905964401989786, "grad_norm": 0.8185665607452393, "learning_rate": 6.932125373393383e-09, "loss": 0.0534, "step": 44955 }, { "epoch": 0.9906184754884948, "grad_norm": 0.4976256787776947, "learning_rate": 6.899620874720736e-09, "loss": 0.055, "step": 44956 }, { "epoch": 0.990640510778011, "grad_norm": 0.540573239326477, "learning_rate": 6.86719274395009e-09, "loss": 0.0507, "step": 44957 }, { "epoch": 0.9906625460675271, "grad_norm": 0.8023850321769714, "learning_rate": 6.834840981242984e-09, "loss": 0.0514, "step": 44958 }, { "epoch": 0.9906845813570433, "grad_norm": 0.7783190011978149, "learning_rate": 6.802565586767618e-09, "loss": 0.0811, "step": 44959 }, { "epoch": 0.9907066166465595, "grad_norm": 0.4437897205352783, "learning_rate": 6.7703665606855256e-09, "loss": 0.0421, "step": 44960 }, { "epoch": 0.9907286519360756, "grad_norm": 0.702627956867218, "learning_rate": 6.738243903163243e-09, "loss": 0.053, "step": 44961 }, { "epoch": 0.9907506872255918, "grad_norm": 0.6358205676078796, "learning_rate": 6.706197614362308e-09, "loss": 0.0463, "step": 44962 }, { "epoch": 0.990772722515108, "grad_norm": 0.6866046786308289, "learning_rate": 6.674227694447588e-09, "loss": 0.0941, "step": 44963 }, { "epoch": 0.9907947578046241, "grad_norm": 0.5927221775054932, "learning_rate": 6.642334143578954e-09, "loss": 0.0575, "step": 44964 }, { "epoch": 0.9908167930941403, "grad_norm": 0.5285972952842712, "learning_rate": 6.610516961922941e-09, "loss": 0.0511, "step": 44965 }, { "epoch": 0.9908388283836564, "grad_norm": 0.5090565085411072, "learning_rate": 6.578776149639421e-09, "loss": 0.0365, "step": 44966 }, { "epoch": 0.9908608636731726, "grad_norm": 0.8169907927513123, "learning_rate": 6.54711170688993e-09, "loss": 0.0774, "step": 44967 }, { "epoch": 0.9908828989626888, "grad_norm": 0.8019059896469116, "learning_rate": 6.515523633836007e-09, "loss": 0.06, "step": 44968 }, { "epoch": 0.9909049342522049, "grad_norm": 0.4781486392021179, "learning_rate": 6.4840119306391885e-09, "loss": 0.0607, "step": 44969 }, { "epoch": 0.9909269695417211, "grad_norm": 0.6079665422439575, "learning_rate": 6.452576597459348e-09, "loss": 0.0538, "step": 44970 }, { "epoch": 0.9909490048312373, "grad_norm": 0.7807337641716003, "learning_rate": 6.421217634456356e-09, "loss": 0.0333, "step": 44971 }, { "epoch": 0.9909710401207534, "grad_norm": 0.6973407864570618, "learning_rate": 6.389935041790085e-09, "loss": 0.0357, "step": 44972 }, { "epoch": 0.9909930754102696, "grad_norm": 0.3053952157497406, "learning_rate": 6.358728819620408e-09, "loss": 0.0469, "step": 44973 }, { "epoch": 0.9910151106997858, "grad_norm": 0.46976765990257263, "learning_rate": 6.327598968107195e-09, "loss": 0.0378, "step": 44974 }, { "epoch": 0.9910371459893018, "grad_norm": 1.2407357692718506, "learning_rate": 6.296545487405326e-09, "loss": 0.0611, "step": 44975 }, { "epoch": 0.991059181278818, "grad_norm": 0.7947053909301758, "learning_rate": 6.2655683776779995e-09, "loss": 0.0584, "step": 44976 }, { "epoch": 0.9910812165683341, "grad_norm": 0.6767487525939941, "learning_rate": 6.234667639078428e-09, "loss": 0.0833, "step": 44977 }, { "epoch": 0.9911032518578503, "grad_norm": 0.5235669612884521, "learning_rate": 6.203843271766485e-09, "loss": 0.066, "step": 44978 }, { "epoch": 0.9911252871473665, "grad_norm": 0.7083268761634827, "learning_rate": 6.1730952758987105e-09, "loss": 0.0678, "step": 44979 }, { "epoch": 0.9911473224368826, "grad_norm": 0.5319219827651978, "learning_rate": 6.142423651631646e-09, "loss": 0.0335, "step": 44980 }, { "epoch": 0.9911693577263988, "grad_norm": 0.7022209167480469, "learning_rate": 6.111828399121832e-09, "loss": 0.0574, "step": 44981 }, { "epoch": 0.991191393015915, "grad_norm": 0.37244367599487305, "learning_rate": 6.0813095185241476e-09, "loss": 0.0598, "step": 44982 }, { "epoch": 0.9912134283054311, "grad_norm": 0.5203097462654114, "learning_rate": 6.050867009996797e-09, "loss": 0.0572, "step": 44983 }, { "epoch": 0.9912354635949473, "grad_norm": 0.47258085012435913, "learning_rate": 6.020500873689661e-09, "loss": 0.0472, "step": 44984 }, { "epoch": 0.9912574988844635, "grad_norm": 0.4884504973888397, "learning_rate": 5.990211109762611e-09, "loss": 0.0467, "step": 44985 }, { "epoch": 0.9912795341739796, "grad_norm": 0.5483270883560181, "learning_rate": 5.959997718367194e-09, "loss": 0.0598, "step": 44986 }, { "epoch": 0.9913015694634958, "grad_norm": 0.6806747913360596, "learning_rate": 5.929860699658285e-09, "loss": 0.0647, "step": 44987 }, { "epoch": 0.991323604753012, "grad_norm": 0.7834813594818115, "learning_rate": 5.899800053789095e-09, "loss": 0.0688, "step": 44988 }, { "epoch": 0.9913456400425281, "grad_norm": 0.7619009613990784, "learning_rate": 5.869815780912835e-09, "loss": 0.0709, "step": 44989 }, { "epoch": 0.9913676753320443, "grad_norm": 0.41356363892555237, "learning_rate": 5.8399078811810506e-09, "loss": 0.0824, "step": 44990 }, { "epoch": 0.9913897106215604, "grad_norm": 0.45265260338783264, "learning_rate": 5.810076354748617e-09, "loss": 0.0442, "step": 44991 }, { "epoch": 0.9914117459110766, "grad_norm": 0.731151282787323, "learning_rate": 5.780321201765415e-09, "loss": 0.0528, "step": 44992 }, { "epoch": 0.9914337812005928, "grad_norm": 0.5095150470733643, "learning_rate": 5.75064242238299e-09, "loss": 0.051, "step": 44993 }, { "epoch": 0.9914558164901089, "grad_norm": 0.48165711760520935, "learning_rate": 5.721040016752887e-09, "loss": 0.0489, "step": 44994 }, { "epoch": 0.9914778517796251, "grad_norm": 0.5148470997810364, "learning_rate": 5.691513985026653e-09, "loss": 0.044, "step": 44995 }, { "epoch": 0.9914998870691413, "grad_norm": 0.5594759583473206, "learning_rate": 5.662064327354166e-09, "loss": 0.0432, "step": 44996 }, { "epoch": 0.9915219223586574, "grad_norm": 0.5019558668136597, "learning_rate": 5.632691043885307e-09, "loss": 0.0471, "step": 44997 }, { "epoch": 0.9915439576481736, "grad_norm": 0.8375201225280762, "learning_rate": 5.603394134769957e-09, "loss": 0.0594, "step": 44998 }, { "epoch": 0.9915659929376897, "grad_norm": 0.6415765881538391, "learning_rate": 5.574173600157995e-09, "loss": 0.049, "step": 44999 }, { "epoch": 0.9915880282272058, "grad_norm": 0.5684521794319153, "learning_rate": 5.54502944019597e-09, "loss": 0.0557, "step": 45000 }, { "epoch": 0.991610063516722, "grad_norm": 0.5925724506378174, "learning_rate": 5.5159616550337635e-09, "loss": 0.0608, "step": 45001 }, { "epoch": 0.9916320988062381, "grad_norm": 0.7016161680221558, "learning_rate": 5.486970244821255e-09, "loss": 0.0617, "step": 45002 }, { "epoch": 0.9916541340957543, "grad_norm": 0.529464602470398, "learning_rate": 5.4580552097016625e-09, "loss": 0.0923, "step": 45003 }, { "epoch": 0.9916761693852705, "grad_norm": 0.16110488772392273, "learning_rate": 5.4292165498265325e-09, "loss": 0.0435, "step": 45004 }, { "epoch": 0.9916982046747866, "grad_norm": 0.44759243726730347, "learning_rate": 5.400454265340748e-09, "loss": 0.0491, "step": 45005 }, { "epoch": 0.9917202399643028, "grad_norm": 0.40398457646369934, "learning_rate": 5.371768356392526e-09, "loss": 0.0509, "step": 45006 }, { "epoch": 0.991742275253819, "grad_norm": 0.6306987404823303, "learning_rate": 5.343158823125083e-09, "loss": 0.0498, "step": 45007 }, { "epoch": 0.9917643105433351, "grad_norm": 0.44956445693969727, "learning_rate": 5.314625665684969e-09, "loss": 0.0364, "step": 45008 }, { "epoch": 0.9917863458328513, "grad_norm": 0.38111284375190735, "learning_rate": 5.286168884220399e-09, "loss": 0.0459, "step": 45009 }, { "epoch": 0.9918083811223675, "grad_norm": 0.733862042427063, "learning_rate": 5.257788478872927e-09, "loss": 0.0675, "step": 45010 }, { "epoch": 0.9918304164118836, "grad_norm": 0.32853835821151733, "learning_rate": 5.229484449787436e-09, "loss": 0.0391, "step": 45011 }, { "epoch": 0.9918524517013998, "grad_norm": 0.3104645907878876, "learning_rate": 5.201256797110477e-09, "loss": 0.026, "step": 45012 }, { "epoch": 0.991874486990916, "grad_norm": 0.3559321165084839, "learning_rate": 5.173105520983601e-09, "loss": 0.0454, "step": 45013 }, { "epoch": 0.9918965222804321, "grad_norm": 0.39215168356895447, "learning_rate": 5.145030621551694e-09, "loss": 0.043, "step": 45014 }, { "epoch": 0.9919185575699483, "grad_norm": 0.5849476456642151, "learning_rate": 5.117032098956309e-09, "loss": 0.0456, "step": 45015 }, { "epoch": 0.9919405928594645, "grad_norm": 1.0335474014282227, "learning_rate": 5.08910995334233e-09, "loss": 0.0503, "step": 45016 }, { "epoch": 0.9919626281489806, "grad_norm": 0.7628824710845947, "learning_rate": 5.06126418484798e-09, "loss": 0.0718, "step": 45017 }, { "epoch": 0.9919846634384968, "grad_norm": 0.8652254343032837, "learning_rate": 5.033494793619809e-09, "loss": 0.0669, "step": 45018 }, { "epoch": 0.992006698728013, "grad_norm": 0.431761771440506, "learning_rate": 5.0058017797960374e-09, "loss": 0.0589, "step": 45019 }, { "epoch": 0.9920287340175291, "grad_norm": 0.5692242383956909, "learning_rate": 4.978185143518221e-09, "loss": 0.0681, "step": 45020 }, { "epoch": 0.9920507693070453, "grad_norm": 0.4607338607311249, "learning_rate": 4.950644884927913e-09, "loss": 0.0715, "step": 45021 }, { "epoch": 0.9920728045965614, "grad_norm": 0.8735466003417969, "learning_rate": 4.923181004165e-09, "loss": 0.0755, "step": 45022 }, { "epoch": 0.9920948398860776, "grad_norm": 0.7796624302864075, "learning_rate": 4.895793501369372e-09, "loss": 0.0631, "step": 45023 }, { "epoch": 0.9921168751755937, "grad_norm": 0.4833861291408539, "learning_rate": 4.868482376679251e-09, "loss": 0.0541, "step": 45024 }, { "epoch": 0.9921389104651098, "grad_norm": 0.5505911707878113, "learning_rate": 4.84124763023619e-09, "loss": 0.03, "step": 45025 }, { "epoch": 0.992160945754626, "grad_norm": 0.6911675930023193, "learning_rate": 4.814089262178412e-09, "loss": 0.0407, "step": 45026 }, { "epoch": 0.9921829810441422, "grad_norm": 0.6138103008270264, "learning_rate": 4.787007272642474e-09, "loss": 0.044, "step": 45027 }, { "epoch": 0.9922050163336583, "grad_norm": 0.5472707152366638, "learning_rate": 4.7600016617666e-09, "loss": 0.0507, "step": 45028 }, { "epoch": 0.9922270516231745, "grad_norm": 0.7783006429672241, "learning_rate": 4.733072429690677e-09, "loss": 0.0514, "step": 45029 }, { "epoch": 0.9922490869126906, "grad_norm": 0.959805428981781, "learning_rate": 4.706219576549597e-09, "loss": 0.0719, "step": 45030 }, { "epoch": 0.9922711222022068, "grad_norm": 0.6680281758308411, "learning_rate": 4.679443102481584e-09, "loss": 0.0384, "step": 45031 }, { "epoch": 0.992293157491723, "grad_norm": 0.8240622878074646, "learning_rate": 4.652743007621529e-09, "loss": 0.0757, "step": 45032 }, { "epoch": 0.9923151927812391, "grad_norm": 0.7226822376251221, "learning_rate": 4.62611929210599e-09, "loss": 0.0651, "step": 45033 }, { "epoch": 0.9923372280707553, "grad_norm": 0.8624171614646912, "learning_rate": 4.599571956071525e-09, "loss": 0.0579, "step": 45034 }, { "epoch": 0.9923592633602715, "grad_norm": 0.6795855164527893, "learning_rate": 4.573100999651358e-09, "loss": 0.0576, "step": 45035 }, { "epoch": 0.9923812986497876, "grad_norm": 0.16127130389213562, "learning_rate": 4.5467064229837154e-09, "loss": 0.0477, "step": 45036 }, { "epoch": 0.9924033339393038, "grad_norm": 0.462501585483551, "learning_rate": 4.520388226198491e-09, "loss": 0.0538, "step": 45037 }, { "epoch": 0.99242536922882, "grad_norm": 0.8671377897262573, "learning_rate": 4.494146409433908e-09, "loss": 0.0953, "step": 45038 }, { "epoch": 0.9924474045183361, "grad_norm": 0.6851953864097595, "learning_rate": 4.467980972821528e-09, "loss": 0.0588, "step": 45039 }, { "epoch": 0.9924694398078523, "grad_norm": 0.5737457275390625, "learning_rate": 4.441891916494578e-09, "loss": 0.0608, "step": 45040 }, { "epoch": 0.9924914750973685, "grad_norm": 0.37258172035217285, "learning_rate": 4.415879240586284e-09, "loss": 0.0456, "step": 45041 }, { "epoch": 0.9925135103868846, "grad_norm": 0.3475186824798584, "learning_rate": 4.389942945229874e-09, "loss": 0.0468, "step": 45042 }, { "epoch": 0.9925355456764008, "grad_norm": 0.7798328399658203, "learning_rate": 4.364083030555244e-09, "loss": 0.0615, "step": 45043 }, { "epoch": 0.992557580965917, "grad_norm": 0.9505696892738342, "learning_rate": 4.3382994966972846e-09, "loss": 0.053, "step": 45044 }, { "epoch": 0.9925796162554331, "grad_norm": 0.4683724641799927, "learning_rate": 4.312592343785893e-09, "loss": 0.067, "step": 45045 }, { "epoch": 0.9926016515449493, "grad_norm": 0.9311237335205078, "learning_rate": 4.286961571950965e-09, "loss": 0.0743, "step": 45046 }, { "epoch": 0.9926236868344654, "grad_norm": 0.8150525093078613, "learning_rate": 4.261407181322397e-09, "loss": 0.0525, "step": 45047 }, { "epoch": 0.9926457221239816, "grad_norm": 0.7000532150268555, "learning_rate": 4.235929172033415e-09, "loss": 0.0572, "step": 45048 }, { "epoch": 0.9926677574134977, "grad_norm": 0.44131383299827576, "learning_rate": 4.210527544212251e-09, "loss": 0.0338, "step": 45049 }, { "epoch": 0.9926897927030138, "grad_norm": 1.0050792694091797, "learning_rate": 4.1852022979887996e-09, "loss": 0.0678, "step": 45050 }, { "epoch": 0.99271182799253, "grad_norm": 0.8750869035720825, "learning_rate": 4.159953433489627e-09, "loss": 0.0348, "step": 45051 }, { "epoch": 0.9927338632820462, "grad_norm": 0.6410208344459534, "learning_rate": 4.134780950847961e-09, "loss": 0.0536, "step": 45052 }, { "epoch": 0.9927558985715623, "grad_norm": 0.7335688471794128, "learning_rate": 4.109684850187034e-09, "loss": 0.0656, "step": 45053 }, { "epoch": 0.9927779338610785, "grad_norm": 0.6160100698471069, "learning_rate": 4.084665131636744e-09, "loss": 0.0486, "step": 45054 }, { "epoch": 0.9927999691505947, "grad_norm": 0.8247015476226807, "learning_rate": 4.059721795325322e-09, "loss": 0.0694, "step": 45055 }, { "epoch": 0.9928220044401108, "grad_norm": 0.5669237971305847, "learning_rate": 4.0348548413809975e-09, "loss": 0.0464, "step": 45056 }, { "epoch": 0.992844039729627, "grad_norm": 0.33130937814712524, "learning_rate": 4.010064269927005e-09, "loss": 0.039, "step": 45057 }, { "epoch": 0.9928660750191431, "grad_norm": 0.40597134828567505, "learning_rate": 3.9853500810899114e-09, "loss": 0.0595, "step": 45058 }, { "epoch": 0.9928881103086593, "grad_norm": 0.5704472661018372, "learning_rate": 3.960712274999612e-09, "loss": 0.0614, "step": 45059 }, { "epoch": 0.9929101455981755, "grad_norm": 0.7330963015556335, "learning_rate": 3.936150851777675e-09, "loss": 0.0603, "step": 45060 }, { "epoch": 0.9929321808876916, "grad_norm": 0.5667505264282227, "learning_rate": 3.911665811550669e-09, "loss": 0.0467, "step": 45061 }, { "epoch": 0.9929542161772078, "grad_norm": 0.6814613938331604, "learning_rate": 3.8872571544418255e-09, "loss": 0.0507, "step": 45062 }, { "epoch": 0.992976251466724, "grad_norm": 0.3081428110599518, "learning_rate": 3.862924880577712e-09, "loss": 0.0498, "step": 45063 }, { "epoch": 0.9929982867562401, "grad_norm": 0.7389532327651978, "learning_rate": 3.838668990081562e-09, "loss": 0.0739, "step": 45064 }, { "epoch": 0.9930203220457563, "grad_norm": 0.5997339487075806, "learning_rate": 3.814489483076611e-09, "loss": 0.0563, "step": 45065 }, { "epoch": 0.9930423573352725, "grad_norm": 0.6597935557365417, "learning_rate": 3.790386359686093e-09, "loss": 0.064, "step": 45066 }, { "epoch": 0.9930643926247886, "grad_norm": 0.5287886261940002, "learning_rate": 3.766359620031579e-09, "loss": 0.054, "step": 45067 }, { "epoch": 0.9930864279143048, "grad_norm": 0.3912377655506134, "learning_rate": 3.742409264237967e-09, "loss": 0.0547, "step": 45068 }, { "epoch": 0.993108463203821, "grad_norm": 0.5944440960884094, "learning_rate": 3.7185352924251626e-09, "loss": 0.0475, "step": 45069 }, { "epoch": 0.9931304984933371, "grad_norm": 0.6250649094581604, "learning_rate": 3.6947377047164e-09, "loss": 0.0586, "step": 45070 }, { "epoch": 0.9931525337828533, "grad_norm": 0.5107494592666626, "learning_rate": 3.671016501229918e-09, "loss": 0.0802, "step": 45071 }, { "epoch": 0.9931745690723695, "grad_norm": 0.6121299266815186, "learning_rate": 3.6473716820906167e-09, "loss": 0.061, "step": 45072 }, { "epoch": 0.9931966043618855, "grad_norm": 0.5181767344474792, "learning_rate": 3.623803247416735e-09, "loss": 0.0577, "step": 45073 }, { "epoch": 0.9932186396514017, "grad_norm": 0.49966469407081604, "learning_rate": 3.6003111973281768e-09, "loss": 0.0514, "step": 45074 }, { "epoch": 0.9932406749409178, "grad_norm": 0.4490801692008972, "learning_rate": 3.576895531944846e-09, "loss": 0.0564, "step": 45075 }, { "epoch": 0.993262710230434, "grad_norm": 0.43047693371772766, "learning_rate": 3.553556251386647e-09, "loss": 0.0552, "step": 45076 }, { "epoch": 0.9932847455199502, "grad_norm": 0.6729393005371094, "learning_rate": 3.5302933557718186e-09, "loss": 0.0807, "step": 45077 }, { "epoch": 0.9933067808094663, "grad_norm": 0.8836110234260559, "learning_rate": 3.5071068452185996e-09, "loss": 0.0755, "step": 45078 }, { "epoch": 0.9933288160989825, "grad_norm": 0.4948033392429352, "learning_rate": 3.483996719845228e-09, "loss": 0.0443, "step": 45079 }, { "epoch": 0.9933508513884987, "grad_norm": 2.4363582134246826, "learning_rate": 3.4609629797699438e-09, "loss": 0.0633, "step": 45080 }, { "epoch": 0.9933728866780148, "grad_norm": 0.6072437763214111, "learning_rate": 3.4380056251093194e-09, "loss": 0.0787, "step": 45081 }, { "epoch": 0.993394921967531, "grad_norm": 0.552324652671814, "learning_rate": 3.415124655981594e-09, "loss": 0.0444, "step": 45082 }, { "epoch": 0.9934169572570472, "grad_norm": 0.6307885646820068, "learning_rate": 3.3923200725016756e-09, "loss": 0.064, "step": 45083 }, { "epoch": 0.9934389925465633, "grad_norm": 0.6225097179412842, "learning_rate": 3.3695918747861377e-09, "loss": 0.0351, "step": 45084 }, { "epoch": 0.9934610278360795, "grad_norm": 0.5721547603607178, "learning_rate": 3.3469400629515536e-09, "loss": 0.0669, "step": 45085 }, { "epoch": 0.9934830631255956, "grad_norm": 0.7385168075561523, "learning_rate": 3.324364637112831e-09, "loss": 0.0465, "step": 45086 }, { "epoch": 0.9935050984151118, "grad_norm": 0.44376102089881897, "learning_rate": 3.301865597384879e-09, "loss": 0.0568, "step": 45087 }, { "epoch": 0.993527133704628, "grad_norm": 0.6611155867576599, "learning_rate": 3.2794429438826045e-09, "loss": 0.0538, "step": 45088 }, { "epoch": 0.9935491689941441, "grad_norm": 0.5219758749008179, "learning_rate": 3.2570966767209166e-09, "loss": 0.0412, "step": 45089 }, { "epoch": 0.9935712042836603, "grad_norm": 0.6623722314834595, "learning_rate": 3.234826796011392e-09, "loss": 0.0647, "step": 45090 }, { "epoch": 0.9935932395731765, "grad_norm": 0.7807127237319946, "learning_rate": 3.212633301868939e-09, "loss": 0.0599, "step": 45091 }, { "epoch": 0.9936152748626926, "grad_norm": 0.4427219033241272, "learning_rate": 3.1905161944068006e-09, "loss": 0.0523, "step": 45092 }, { "epoch": 0.9936373101522088, "grad_norm": 1.0882022380828857, "learning_rate": 3.1684754737382195e-09, "loss": 0.0627, "step": 45093 }, { "epoch": 0.993659345441725, "grad_norm": 0.697951078414917, "learning_rate": 3.1465111399731074e-09, "loss": 0.0785, "step": 45094 }, { "epoch": 0.9936813807312411, "grad_norm": 0.7387694716453552, "learning_rate": 3.1246231932247072e-09, "loss": 0.0779, "step": 45095 }, { "epoch": 0.9937034160207573, "grad_norm": 0.7551199793815613, "learning_rate": 3.1028116336062618e-09, "loss": 0.0648, "step": 45096 }, { "epoch": 0.9937254513102735, "grad_norm": 1.1621121168136597, "learning_rate": 3.081076461226018e-09, "loss": 0.1165, "step": 45097 }, { "epoch": 0.9937474865997895, "grad_norm": 0.8269281983375549, "learning_rate": 3.059417676195553e-09, "loss": 0.0665, "step": 45098 }, { "epoch": 0.9937695218893057, "grad_norm": 0.5229749083518982, "learning_rate": 3.0378352786247787e-09, "loss": 0.0503, "step": 45099 }, { "epoch": 0.9937915571788218, "grad_norm": 0.43440353870391846, "learning_rate": 3.016329268625273e-09, "loss": 0.0562, "step": 45100 }, { "epoch": 0.993813592468338, "grad_norm": 0.5998855829238892, "learning_rate": 2.994899646306948e-09, "loss": 0.0611, "step": 45101 }, { "epoch": 0.9938356277578542, "grad_norm": 0.5789843797683716, "learning_rate": 2.9735464117763843e-09, "loss": 0.0459, "step": 45102 }, { "epoch": 0.9938576630473703, "grad_norm": 0.5644580721855164, "learning_rate": 2.952269565143495e-09, "loss": 0.0514, "step": 45103 }, { "epoch": 0.9938796983368865, "grad_norm": 0.6767379641532898, "learning_rate": 2.9310691065165264e-09, "loss": 0.0666, "step": 45104 }, { "epoch": 0.9939017336264027, "grad_norm": 0.7975236177444458, "learning_rate": 2.909945036003725e-09, "loss": 0.046, "step": 45105 }, { "epoch": 0.9939237689159188, "grad_norm": 0.6720994114875793, "learning_rate": 2.888897353713338e-09, "loss": 0.0624, "step": 45106 }, { "epoch": 0.993945804205435, "grad_norm": 0.522133469581604, "learning_rate": 2.867926059750281e-09, "loss": 0.0309, "step": 45107 }, { "epoch": 0.9939678394949512, "grad_norm": 0.5728567838668823, "learning_rate": 2.8470311542244666e-09, "loss": 0.0445, "step": 45108 }, { "epoch": 0.9939898747844673, "grad_norm": 0.9675857424736023, "learning_rate": 2.8262126372408105e-09, "loss": 0.0688, "step": 45109 }, { "epoch": 0.9940119100739835, "grad_norm": 0.5812669992446899, "learning_rate": 2.8054705089042287e-09, "loss": 0.0563, "step": 45110 }, { "epoch": 0.9940339453634996, "grad_norm": 0.5954068303108215, "learning_rate": 2.7848047693229685e-09, "loss": 0.033, "step": 45111 }, { "epoch": 0.9940559806530158, "grad_norm": 0.4755314290523529, "learning_rate": 2.7642154186002798e-09, "loss": 0.0773, "step": 45112 }, { "epoch": 0.994078015942532, "grad_norm": 0.5759387612342834, "learning_rate": 2.7437024568410797e-09, "loss": 0.0488, "step": 45113 }, { "epoch": 0.9941000512320481, "grad_norm": 0.7415509223937988, "learning_rate": 2.723265884151949e-09, "loss": 0.0696, "step": 45114 }, { "epoch": 0.9941220865215643, "grad_norm": 0.4776339530944824, "learning_rate": 2.7029057006328072e-09, "loss": 0.0689, "step": 45115 }, { "epoch": 0.9941441218110805, "grad_norm": 0.4237089157104492, "learning_rate": 2.6826219063919023e-09, "loss": 0.0566, "step": 45116 }, { "epoch": 0.9941661571005966, "grad_norm": 0.6951907873153687, "learning_rate": 2.662414501530819e-09, "loss": 0.0614, "step": 45117 }, { "epoch": 0.9941881923901128, "grad_norm": 0.6454085111618042, "learning_rate": 2.6422834861511424e-09, "loss": 0.0779, "step": 45118 }, { "epoch": 0.994210227679629, "grad_norm": 0.5537161827087402, "learning_rate": 2.622228860356124e-09, "loss": 0.0744, "step": 45119 }, { "epoch": 0.9942322629691451, "grad_norm": 0.7107375264167786, "learning_rate": 2.6022506242506794e-09, "loss": 0.0514, "step": 45120 }, { "epoch": 0.9942542982586613, "grad_norm": 0.48004409670829773, "learning_rate": 2.582348777931398e-09, "loss": 0.0383, "step": 45121 }, { "epoch": 0.9942763335481775, "grad_norm": 0.6082025170326233, "learning_rate": 2.5625233215048614e-09, "loss": 0.0434, "step": 45122 }, { "epoch": 0.9942983688376935, "grad_norm": 0.6841900944709778, "learning_rate": 2.542774255067659e-09, "loss": 0.0948, "step": 45123 }, { "epoch": 0.9943204041272097, "grad_norm": 0.5350099205970764, "learning_rate": 2.5231015787230417e-09, "loss": 0.0506, "step": 45124 }, { "epoch": 0.9943424394167258, "grad_norm": 0.5526641607284546, "learning_rate": 2.503505292570929e-09, "loss": 0.064, "step": 45125 }, { "epoch": 0.994364474706242, "grad_norm": 0.9611636996269226, "learning_rate": 2.4839853967112416e-09, "loss": 0.0388, "step": 45126 }, { "epoch": 0.9943865099957582, "grad_norm": 1.03121817111969, "learning_rate": 2.4645418912422336e-09, "loss": 0.0704, "step": 45127 }, { "epoch": 0.9944085452852743, "grad_norm": 0.34743550419807434, "learning_rate": 2.4451747762638253e-09, "loss": 0.0609, "step": 45128 }, { "epoch": 0.9944305805747905, "grad_norm": 0.37143078446388245, "learning_rate": 2.425884051874272e-09, "loss": 0.0476, "step": 45129 }, { "epoch": 0.9944526158643067, "grad_norm": 0.40046459436416626, "learning_rate": 2.4066697181718278e-09, "loss": 0.0467, "step": 45130 }, { "epoch": 0.9944746511538228, "grad_norm": 0.6591762900352478, "learning_rate": 2.3875317752547477e-09, "loss": 0.0593, "step": 45131 }, { "epoch": 0.994496686443339, "grad_norm": 0.23148441314697266, "learning_rate": 2.368470223221286e-09, "loss": 0.0531, "step": 45132 }, { "epoch": 0.9945187217328552, "grad_norm": 0.4820151627063751, "learning_rate": 2.3494850621663677e-09, "loss": 0.0329, "step": 45133 }, { "epoch": 0.9945407570223713, "grad_norm": 1.068617820739746, "learning_rate": 2.3305762921882466e-09, "loss": 0.0769, "step": 45134 }, { "epoch": 0.9945627923118875, "grad_norm": 0.2903565764427185, "learning_rate": 2.3117439133835128e-09, "loss": 0.0503, "step": 45135 }, { "epoch": 0.9945848276014037, "grad_norm": 0.6550456285476685, "learning_rate": 2.2929879258454246e-09, "loss": 0.0567, "step": 45136 }, { "epoch": 0.9946068628909198, "grad_norm": 0.6586815714836121, "learning_rate": 2.274308329673902e-09, "loss": 0.0423, "step": 45137 }, { "epoch": 0.994628898180436, "grad_norm": 0.5494547486305237, "learning_rate": 2.255705124960539e-09, "loss": 0.0586, "step": 45138 }, { "epoch": 0.9946509334699521, "grad_norm": 0.7617752552032471, "learning_rate": 2.237178311801924e-09, "loss": 0.0604, "step": 45139 }, { "epoch": 0.9946729687594683, "grad_norm": 0.23325106501579285, "learning_rate": 2.2187278902913166e-09, "loss": 0.0407, "step": 45140 }, { "epoch": 0.9946950040489845, "grad_norm": 0.3933684825897217, "learning_rate": 2.2003538605236405e-09, "loss": 0.0517, "step": 45141 }, { "epoch": 0.9947170393385006, "grad_norm": 0.5844690203666687, "learning_rate": 2.1820562225904893e-09, "loss": 0.0541, "step": 45142 }, { "epoch": 0.9947390746280168, "grad_norm": 0.8863264322280884, "learning_rate": 2.1638349765884526e-09, "loss": 0.0901, "step": 45143 }, { "epoch": 0.994761109917533, "grad_norm": 0.48905619978904724, "learning_rate": 2.1456901226074577e-09, "loss": 0.0439, "step": 45144 }, { "epoch": 0.9947831452070491, "grad_norm": 0.4085858464241028, "learning_rate": 2.1276216607424294e-09, "loss": 0.0434, "step": 45145 }, { "epoch": 0.9948051804965653, "grad_norm": 0.4660601317882538, "learning_rate": 2.10962959108163e-09, "loss": 0.0305, "step": 45146 }, { "epoch": 0.9948272157860815, "grad_norm": 0.49254798889160156, "learning_rate": 2.0917139137199837e-09, "loss": 0.0542, "step": 45147 }, { "epoch": 0.9948492510755975, "grad_norm": 0.38490769267082214, "learning_rate": 2.073874628749084e-09, "loss": 0.0477, "step": 45148 }, { "epoch": 0.9948712863651137, "grad_norm": 0.60969477891922, "learning_rate": 2.0561117362571936e-09, "loss": 0.0609, "step": 45149 }, { "epoch": 0.9948933216546298, "grad_norm": 0.5266021490097046, "learning_rate": 2.0384252363359057e-09, "loss": 0.0439, "step": 45150 }, { "epoch": 0.994915356944146, "grad_norm": 0.5841307044029236, "learning_rate": 2.0208151290751485e-09, "loss": 0.0784, "step": 45151 }, { "epoch": 0.9949373922336622, "grad_norm": 0.7014485001564026, "learning_rate": 2.0032814145665155e-09, "loss": 0.0586, "step": 45152 }, { "epoch": 0.9949594275231783, "grad_norm": 0.6569543480873108, "learning_rate": 1.9858240928966044e-09, "loss": 0.0565, "step": 45153 }, { "epoch": 0.9949814628126945, "grad_norm": 0.7699933052062988, "learning_rate": 1.9684431641553425e-09, "loss": 0.0563, "step": 45154 }, { "epoch": 0.9950034981022107, "grad_norm": 0.838249146938324, "learning_rate": 1.951138628430993e-09, "loss": 0.077, "step": 45155 }, { "epoch": 0.9950255333917268, "grad_norm": 0.5685204267501831, "learning_rate": 1.9339104858134837e-09, "loss": 0.0408, "step": 45156 }, { "epoch": 0.995047568681243, "grad_norm": 0.7130295038223267, "learning_rate": 1.916758736387747e-09, "loss": 0.0818, "step": 45157 }, { "epoch": 0.9950696039707592, "grad_norm": 0.3748666048049927, "learning_rate": 1.899683380243711e-09, "loss": 0.0757, "step": 45158 }, { "epoch": 0.9950916392602753, "grad_norm": 0.45037928223609924, "learning_rate": 1.882684417466307e-09, "loss": 0.0592, "step": 45159 }, { "epoch": 0.9951136745497915, "grad_norm": 0.5178185105323792, "learning_rate": 1.865761848143799e-09, "loss": 0.0446, "step": 45160 }, { "epoch": 0.9951357098393077, "grad_norm": 0.5550104975700378, "learning_rate": 1.8489156723611178e-09, "loss": 0.0675, "step": 45161 }, { "epoch": 0.9951577451288238, "grad_norm": 0.6030501127243042, "learning_rate": 1.8321458902048616e-09, "loss": 0.0441, "step": 45162 }, { "epoch": 0.99517978041834, "grad_norm": 0.9607412219047546, "learning_rate": 1.8154525017599621e-09, "loss": 0.0708, "step": 45163 }, { "epoch": 0.9952018157078562, "grad_norm": 0.40686672925949097, "learning_rate": 1.7988355071113515e-09, "loss": 0.0531, "step": 45164 }, { "epoch": 0.9952238509973723, "grad_norm": 0.4083010256290436, "learning_rate": 1.782294906343962e-09, "loss": 0.0646, "step": 45165 }, { "epoch": 0.9952458862868885, "grad_norm": 0.6546378135681152, "learning_rate": 1.7658306995427254e-09, "loss": 0.0557, "step": 45166 }, { "epoch": 0.9952679215764046, "grad_norm": 0.9558343291282654, "learning_rate": 1.7494428867909085e-09, "loss": 0.0589, "step": 45167 }, { "epoch": 0.9952899568659208, "grad_norm": 0.7390790581703186, "learning_rate": 1.733131468171778e-09, "loss": 0.0587, "step": 45168 }, { "epoch": 0.995311992155437, "grad_norm": 0.3597433865070343, "learning_rate": 1.7168964437669354e-09, "loss": 0.0388, "step": 45169 }, { "epoch": 0.9953340274449531, "grad_norm": 0.38088223338127136, "learning_rate": 1.700737813662978e-09, "loss": 0.0318, "step": 45170 }, { "epoch": 0.9953560627344693, "grad_norm": 0.40901702642440796, "learning_rate": 1.6846555779381766e-09, "loss": 0.0388, "step": 45171 }, { "epoch": 0.9953780980239854, "grad_norm": 0.5516040325164795, "learning_rate": 1.668649736675798e-09, "loss": 0.0642, "step": 45172 }, { "epoch": 0.9954001333135015, "grad_norm": 0.6144880056381226, "learning_rate": 1.6527202899591086e-09, "loss": 0.0626, "step": 45173 }, { "epoch": 0.9954221686030177, "grad_norm": 0.4552844762802124, "learning_rate": 1.636867237868045e-09, "loss": 0.0233, "step": 45174 }, { "epoch": 0.9954442038925339, "grad_norm": 0.582804799079895, "learning_rate": 1.6210905804825426e-09, "loss": 0.0577, "step": 45175 }, { "epoch": 0.99546623918205, "grad_norm": 0.5834397077560425, "learning_rate": 1.605390317882538e-09, "loss": 0.0517, "step": 45176 }, { "epoch": 0.9954882744715662, "grad_norm": 0.2713630497455597, "learning_rate": 1.5897664501512976e-09, "loss": 0.0229, "step": 45177 }, { "epoch": 0.9955103097610823, "grad_norm": 0.7652875781059265, "learning_rate": 1.5742189773637617e-09, "loss": 0.0791, "step": 45178 }, { "epoch": 0.9955323450505985, "grad_norm": 0.6174317002296448, "learning_rate": 1.5587478996031967e-09, "loss": 0.045, "step": 45179 }, { "epoch": 0.9955543803401147, "grad_norm": 0.5657432079315186, "learning_rate": 1.5433532169462083e-09, "loss": 0.0632, "step": 45180 }, { "epoch": 0.9955764156296308, "grad_norm": 1.0287429094314575, "learning_rate": 1.5280349294727325e-09, "loss": 0.06, "step": 45181 }, { "epoch": 0.995598450919147, "grad_norm": 0.7367843985557556, "learning_rate": 1.5127930372577093e-09, "loss": 0.0484, "step": 45182 }, { "epoch": 0.9956204862086632, "grad_norm": 0.47480452060699463, "learning_rate": 1.49762754038274e-09, "loss": 0.0455, "step": 45183 }, { "epoch": 0.9956425214981793, "grad_norm": 0.7141116857528687, "learning_rate": 1.4825384389227648e-09, "loss": 0.0511, "step": 45184 }, { "epoch": 0.9956645567876955, "grad_norm": 0.4444514811038971, "learning_rate": 1.4675257329543889e-09, "loss": 0.0569, "step": 45185 }, { "epoch": 0.9956865920772117, "grad_norm": 0.3865783214569092, "learning_rate": 1.452589422554218e-09, "loss": 0.0539, "step": 45186 }, { "epoch": 0.9957086273667278, "grad_norm": 0.2635217308998108, "learning_rate": 1.4377295078005227e-09, "loss": 0.0528, "step": 45187 }, { "epoch": 0.995730662656244, "grad_norm": 0.5931330919265747, "learning_rate": 1.4229459887665774e-09, "loss": 0.0545, "step": 45188 }, { "epoch": 0.9957526979457602, "grad_norm": 0.3382537066936493, "learning_rate": 1.4082388655289879e-09, "loss": 0.0511, "step": 45189 }, { "epoch": 0.9957747332352763, "grad_norm": 0.5901804566383362, "learning_rate": 1.393608138161029e-09, "loss": 0.0383, "step": 45190 }, { "epoch": 0.9957967685247925, "grad_norm": 0.40224242210388184, "learning_rate": 1.3790538067393054e-09, "loss": 0.0614, "step": 45191 }, { "epoch": 0.9958188038143087, "grad_norm": 0.756619393825531, "learning_rate": 1.3645758713354273e-09, "loss": 0.0607, "step": 45192 }, { "epoch": 0.9958408391038248, "grad_norm": 0.6372296214103699, "learning_rate": 1.3501743320259996e-09, "loss": 0.0313, "step": 45193 }, { "epoch": 0.995862874393341, "grad_norm": 0.7435776591300964, "learning_rate": 1.3358491888826318e-09, "loss": 0.044, "step": 45194 }, { "epoch": 0.9958849096828571, "grad_norm": 0.48020946979522705, "learning_rate": 1.3216004419785988e-09, "loss": 0.0366, "step": 45195 }, { "epoch": 0.9959069449723733, "grad_norm": 0.2900446951389313, "learning_rate": 1.307428091387175e-09, "loss": 0.0923, "step": 45196 }, { "epoch": 0.9959289802618894, "grad_norm": 0.7840682864189148, "learning_rate": 1.29333213717997e-09, "loss": 0.0579, "step": 45197 }, { "epoch": 0.9959510155514055, "grad_norm": 0.5934045910835266, "learning_rate": 1.279312579428593e-09, "loss": 0.053, "step": 45198 }, { "epoch": 0.9959730508409217, "grad_norm": 0.5673169493675232, "learning_rate": 1.2653694182046537e-09, "loss": 0.0585, "step": 45199 }, { "epoch": 0.9959950861304379, "grad_norm": 0.5464188456535339, "learning_rate": 1.251502653578096e-09, "loss": 0.0676, "step": 45200 }, { "epoch": 0.996017121419954, "grad_norm": 0.8202965259552002, "learning_rate": 1.2377122856221945e-09, "loss": 0.0637, "step": 45201 }, { "epoch": 0.9960391567094702, "grad_norm": 0.5075218081474304, "learning_rate": 1.223998314405228e-09, "loss": 0.0464, "step": 45202 }, { "epoch": 0.9960611919989864, "grad_norm": 0.8606501817703247, "learning_rate": 1.2103607399971406e-09, "loss": 0.0417, "step": 45203 }, { "epoch": 0.9960832272885025, "grad_norm": 0.40708279609680176, "learning_rate": 1.1967995624662108e-09, "loss": 0.0713, "step": 45204 }, { "epoch": 0.9961052625780187, "grad_norm": 0.33290207386016846, "learning_rate": 1.1833147818857138e-09, "loss": 0.0477, "step": 45205 }, { "epoch": 0.9961272978675348, "grad_norm": 0.2845739722251892, "learning_rate": 1.1699063983189318e-09, "loss": 0.0687, "step": 45206 }, { "epoch": 0.996149333157051, "grad_norm": 0.6206848621368408, "learning_rate": 1.1565744118391398e-09, "loss": 0.0503, "step": 45207 }, { "epoch": 0.9961713684465672, "grad_norm": 0.2622070014476776, "learning_rate": 1.1433188225112857e-09, "loss": 0.0576, "step": 45208 }, { "epoch": 0.9961934037360833, "grad_norm": 0.443240225315094, "learning_rate": 1.1301396304019829e-09, "loss": 0.0493, "step": 45209 }, { "epoch": 0.9962154390255995, "grad_norm": 1.028342843055725, "learning_rate": 1.1170368355811756e-09, "loss": 0.0702, "step": 45210 }, { "epoch": 0.9962374743151157, "grad_norm": 0.9335147738456726, "learning_rate": 1.104010438115477e-09, "loss": 0.057, "step": 45211 }, { "epoch": 0.9962595096046318, "grad_norm": 0.6220351457595825, "learning_rate": 1.09106043806817e-09, "loss": 0.0377, "step": 45212 }, { "epoch": 0.996281544894148, "grad_norm": 0.5874268412590027, "learning_rate": 1.078186835507533e-09, "loss": 0.0518, "step": 45213 }, { "epoch": 0.9963035801836642, "grad_norm": 0.5595577359199524, "learning_rate": 1.0653896305001798e-09, "loss": 0.045, "step": 45214 }, { "epoch": 0.9963256154731803, "grad_norm": 0.35160523653030396, "learning_rate": 1.0526688231077274e-09, "loss": 0.0294, "step": 45215 }, { "epoch": 0.9963476507626965, "grad_norm": 0.6892489194869995, "learning_rate": 1.0400244133984549e-09, "loss": 0.0586, "step": 45216 }, { "epoch": 0.9963696860522127, "grad_norm": 0.6612722277641296, "learning_rate": 1.0274564014356447e-09, "loss": 0.0559, "step": 45217 }, { "epoch": 0.9963917213417288, "grad_norm": 0.30102550983428955, "learning_rate": 1.0149647872825796e-09, "loss": 0.0557, "step": 45218 }, { "epoch": 0.996413756631245, "grad_norm": 0.42620041966438293, "learning_rate": 1.0025495710025422e-09, "loss": 0.0582, "step": 45219 }, { "epoch": 0.9964357919207611, "grad_norm": 0.6855955123901367, "learning_rate": 9.90210752660481e-10, "loss": 0.0712, "step": 45220 }, { "epoch": 0.9964578272102773, "grad_norm": 0.36885419487953186, "learning_rate": 9.779483323180127e-10, "loss": 0.0531, "step": 45221 }, { "epoch": 0.9964798624997934, "grad_norm": 0.4203348755836487, "learning_rate": 9.657623100384206e-10, "loss": 0.0635, "step": 45222 }, { "epoch": 0.9965018977893095, "grad_norm": 0.726220965385437, "learning_rate": 9.536526858833218e-10, "loss": 0.0843, "step": 45223 }, { "epoch": 0.9965239330788257, "grad_norm": 0.5741472244262695, "learning_rate": 9.416194599143334e-10, "loss": 0.0558, "step": 45224 }, { "epoch": 0.9965459683683419, "grad_norm": 0.20848791301250458, "learning_rate": 9.296626321930735e-10, "loss": 0.0353, "step": 45225 }, { "epoch": 0.996568003657858, "grad_norm": 0.6828774213790894, "learning_rate": 9.177822027794935e-10, "loss": 0.0683, "step": 45226 }, { "epoch": 0.9965900389473742, "grad_norm": 0.5289086699485779, "learning_rate": 9.059781717352111e-10, "loss": 0.0592, "step": 45227 }, { "epoch": 0.9966120742368904, "grad_norm": 0.3700433671474457, "learning_rate": 8.942505391201782e-10, "loss": 0.0669, "step": 45228 }, { "epoch": 0.9966341095264065, "grad_norm": 0.15376399457454681, "learning_rate": 8.825993049943471e-10, "loss": 0.047, "step": 45229 }, { "epoch": 0.9966561448159227, "grad_norm": 0.4284101128578186, "learning_rate": 8.710244694160041e-10, "loss": 0.0443, "step": 45230 }, { "epoch": 0.9966781801054388, "grad_norm": 0.8280767202377319, "learning_rate": 8.595260324434362e-10, "loss": 0.0694, "step": 45231 }, { "epoch": 0.996700215394955, "grad_norm": 0.5217053890228271, "learning_rate": 8.481039941382607e-10, "loss": 0.054, "step": 45232 }, { "epoch": 0.9967222506844712, "grad_norm": 0.5685285329818726, "learning_rate": 8.367583545554336e-10, "loss": 0.0427, "step": 45233 }, { "epoch": 0.9967442859739873, "grad_norm": 0.5045018792152405, "learning_rate": 8.25489113754907e-10, "loss": 0.0562, "step": 45234 }, { "epoch": 0.9967663212635035, "grad_norm": 0.9329564571380615, "learning_rate": 8.142962717933022e-10, "loss": 0.0838, "step": 45235 }, { "epoch": 0.9967883565530197, "grad_norm": 0.6704280376434326, "learning_rate": 8.031798287289061e-10, "loss": 0.0597, "step": 45236 }, { "epoch": 0.9968103918425358, "grad_norm": 0.38404661417007446, "learning_rate": 7.921397846150091e-10, "loss": 0.0818, "step": 45237 }, { "epoch": 0.996832427132052, "grad_norm": 0.7966626882553101, "learning_rate": 7.811761395115636e-10, "loss": 0.0549, "step": 45238 }, { "epoch": 0.9968544624215682, "grad_norm": 0.6149798631668091, "learning_rate": 7.702888934718599e-10, "loss": 0.0571, "step": 45239 }, { "epoch": 0.9968764977110843, "grad_norm": 0.5806102156639099, "learning_rate": 7.594780465525197e-10, "loss": 0.0541, "step": 45240 }, { "epoch": 0.9968985330006005, "grad_norm": 0.760833203792572, "learning_rate": 7.48743598808499e-10, "loss": 0.0624, "step": 45241 }, { "epoch": 0.9969205682901167, "grad_norm": 0.6888878345489502, "learning_rate": 7.380855502947536e-10, "loss": 0.0692, "step": 45242 }, { "epoch": 0.9969426035796328, "grad_norm": 0.6227827072143555, "learning_rate": 7.275039010645745e-10, "loss": 0.0593, "step": 45243 }, { "epoch": 0.996964638869149, "grad_norm": 1.1380054950714111, "learning_rate": 7.169986511729176e-10, "loss": 0.0836, "step": 45244 }, { "epoch": 0.9969866741586652, "grad_norm": 0.5107117891311646, "learning_rate": 7.065698006730737e-10, "loss": 0.0466, "step": 45245 }, { "epoch": 0.9970087094481812, "grad_norm": 0.37203678488731384, "learning_rate": 6.962173496166679e-10, "loss": 0.0561, "step": 45246 }, { "epoch": 0.9970307447376974, "grad_norm": 0.43761858344078064, "learning_rate": 6.859412980586565e-10, "loss": 0.0634, "step": 45247 }, { "epoch": 0.9970527800272135, "grad_norm": 0.5792484879493713, "learning_rate": 6.757416460506649e-10, "loss": 0.0526, "step": 45248 }, { "epoch": 0.9970748153167297, "grad_norm": 0.43825000524520874, "learning_rate": 6.656183936443183e-10, "loss": 0.0512, "step": 45249 }, { "epoch": 0.9970968506062459, "grad_norm": 0.5261538624763489, "learning_rate": 6.555715408912422e-10, "loss": 0.0502, "step": 45250 }, { "epoch": 0.997118885895762, "grad_norm": 0.7453839778900146, "learning_rate": 6.456010878413965e-10, "loss": 0.0692, "step": 45251 }, { "epoch": 0.9971409211852782, "grad_norm": 0.7240975499153137, "learning_rate": 6.35707034548072e-10, "loss": 0.0808, "step": 45252 }, { "epoch": 0.9971629564747944, "grad_norm": 0.7595250010490417, "learning_rate": 6.258893810595634e-10, "loss": 0.0678, "step": 45253 }, { "epoch": 0.9971849917643105, "grad_norm": 0.5178663730621338, "learning_rate": 6.16148127427496e-10, "loss": 0.0546, "step": 45254 }, { "epoch": 0.9972070270538267, "grad_norm": 0.33466657996177673, "learning_rate": 6.064832737001646e-10, "loss": 0.0445, "step": 45255 }, { "epoch": 0.9972290623433429, "grad_norm": 0.5605576634407043, "learning_rate": 5.968948199275292e-10, "loss": 0.0595, "step": 45256 }, { "epoch": 0.997251097632859, "grad_norm": 0.9431316256523132, "learning_rate": 5.873827661578845e-10, "loss": 0.0867, "step": 45257 }, { "epoch": 0.9972731329223752, "grad_norm": 0.4691615104675293, "learning_rate": 5.779471124411906e-10, "loss": 0.0743, "step": 45258 }, { "epoch": 0.9972951682118913, "grad_norm": 0.36641669273376465, "learning_rate": 5.685878588240767e-10, "loss": 0.0411, "step": 45259 }, { "epoch": 0.9973172035014075, "grad_norm": 0.8747754096984863, "learning_rate": 5.593050053531723e-10, "loss": 0.0898, "step": 45260 }, { "epoch": 0.9973392387909237, "grad_norm": 0.6352241039276123, "learning_rate": 5.500985520784374e-10, "loss": 0.0506, "step": 45261 }, { "epoch": 0.9973612740804398, "grad_norm": 0.7125222086906433, "learning_rate": 5.40968499044836e-10, "loss": 0.0592, "step": 45262 }, { "epoch": 0.997383309369956, "grad_norm": 0.6090961694717407, "learning_rate": 5.319148462989976e-10, "loss": 0.0496, "step": 45263 }, { "epoch": 0.9974053446594722, "grad_norm": 0.7911490201950073, "learning_rate": 5.229375938875514e-10, "loss": 0.0636, "step": 45264 }, { "epoch": 0.9974273799489883, "grad_norm": 0.45681601762771606, "learning_rate": 5.140367418571268e-10, "loss": 0.0425, "step": 45265 }, { "epoch": 0.9974494152385045, "grad_norm": 0.4156160354614258, "learning_rate": 5.052122902510226e-10, "loss": 0.0464, "step": 45266 }, { "epoch": 0.9974714505280207, "grad_norm": 0.5839214324951172, "learning_rate": 4.96464239115868e-10, "loss": 0.073, "step": 45267 }, { "epoch": 0.9974934858175368, "grad_norm": 0.5414983630180359, "learning_rate": 4.877925884949619e-10, "loss": 0.0485, "step": 45268 }, { "epoch": 0.997515521107053, "grad_norm": 0.36410507559776306, "learning_rate": 4.791973384332682e-10, "loss": 0.0606, "step": 45269 }, { "epoch": 0.9975375563965692, "grad_norm": 0.7010732889175415, "learning_rate": 4.706784889740856e-10, "loss": 0.0774, "step": 45270 }, { "epoch": 0.9975595916860852, "grad_norm": 0.7123214602470398, "learning_rate": 4.622360401623782e-10, "loss": 0.0963, "step": 45271 }, { "epoch": 0.9975816269756014, "grad_norm": 0.8538576364517212, "learning_rate": 4.538699920397793e-10, "loss": 0.0759, "step": 45272 }, { "epoch": 0.9976036622651175, "grad_norm": 0.42473188042640686, "learning_rate": 4.455803446479223e-10, "loss": 0.0613, "step": 45273 }, { "epoch": 0.9976256975546337, "grad_norm": 0.5812676548957825, "learning_rate": 4.373670980301059e-10, "loss": 0.0346, "step": 45274 }, { "epoch": 0.9976477328441499, "grad_norm": 0.5160127878189087, "learning_rate": 4.2923025222962874e-10, "loss": 0.0723, "step": 45275 }, { "epoch": 0.997669768133666, "grad_norm": 0.4095136523246765, "learning_rate": 4.211698072847936e-10, "loss": 0.0546, "step": 45276 }, { "epoch": 0.9976918034231822, "grad_norm": 0.42124268412590027, "learning_rate": 4.1318576323889913e-10, "loss": 0.0545, "step": 45277 }, { "epoch": 0.9977138387126984, "grad_norm": 0.6967232823371887, "learning_rate": 4.0527812013191336e-10, "loss": 0.0678, "step": 45278 }, { "epoch": 0.9977358740022145, "grad_norm": 0.7289870381355286, "learning_rate": 3.9744687800546966e-10, "loss": 0.0696, "step": 45279 }, { "epoch": 0.9977579092917307, "grad_norm": 0.4770593047142029, "learning_rate": 3.8969203689620536e-10, "loss": 0.0652, "step": 45280 }, { "epoch": 0.9977799445812469, "grad_norm": 0.6410573124885559, "learning_rate": 3.8201359684741923e-10, "loss": 0.0409, "step": 45281 }, { "epoch": 0.997801979870763, "grad_norm": 0.7231320142745972, "learning_rate": 3.7441155789574855e-10, "loss": 0.063, "step": 45282 }, { "epoch": 0.9978240151602792, "grad_norm": 0.4483514726161957, "learning_rate": 3.668859200811614e-10, "loss": 0.06, "step": 45283 }, { "epoch": 0.9978460504497954, "grad_norm": 0.5708009004592896, "learning_rate": 3.5943668344029514e-10, "loss": 0.0633, "step": 45284 }, { "epoch": 0.9978680857393115, "grad_norm": 0.39424726366996765, "learning_rate": 3.5206384801311776e-10, "loss": 0.0586, "step": 45285 }, { "epoch": 0.9978901210288277, "grad_norm": 0.6871352791786194, "learning_rate": 3.4476741383626664e-10, "loss": 0.064, "step": 45286 }, { "epoch": 0.9979121563183438, "grad_norm": 0.6620792150497437, "learning_rate": 3.3754738094804447e-10, "loss": 0.0533, "step": 45287 }, { "epoch": 0.99793419160786, "grad_norm": 0.44641461968421936, "learning_rate": 3.304037493834233e-10, "loss": 0.0567, "step": 45288 }, { "epoch": 0.9979562268973762, "grad_norm": 0.2901171147823334, "learning_rate": 3.233365191790405e-10, "loss": 0.0508, "step": 45289 }, { "epoch": 0.9979782621868923, "grad_norm": 0.5974534749984741, "learning_rate": 3.1634569037153337e-10, "loss": 0.0527, "step": 45290 }, { "epoch": 0.9980002974764085, "grad_norm": 0.6291481256484985, "learning_rate": 3.0943126299587397e-10, "loss": 0.0602, "step": 45291 }, { "epoch": 0.9980223327659247, "grad_norm": 1.0101115703582764, "learning_rate": 3.025932370886997e-10, "loss": 0.071, "step": 45292 }, { "epoch": 0.9980443680554408, "grad_norm": 0.5052899122238159, "learning_rate": 2.9583161268331713e-10, "loss": 0.0528, "step": 45293 }, { "epoch": 0.998066403344957, "grad_norm": 0.824095606803894, "learning_rate": 2.8914638981469844e-10, "loss": 0.0464, "step": 45294 }, { "epoch": 0.9980884386344732, "grad_norm": 0.9037744402885437, "learning_rate": 2.825375685161502e-10, "loss": 0.0805, "step": 45295 }, { "epoch": 0.9981104739239892, "grad_norm": 0.48838546872138977, "learning_rate": 2.760051488226445e-10, "loss": 0.0718, "step": 45296 }, { "epoch": 0.9981325092135054, "grad_norm": 0.8738489151000977, "learning_rate": 2.69549130767488e-10, "loss": 0.0582, "step": 45297 }, { "epoch": 0.9981545445030215, "grad_norm": 0.7755367159843445, "learning_rate": 2.631695143839874e-10, "loss": 0.0481, "step": 45298 }, { "epoch": 0.9981765797925377, "grad_norm": 1.088563084602356, "learning_rate": 2.5686629970211874e-10, "loss": 0.1023, "step": 45299 }, { "epoch": 0.9981986150820539, "grad_norm": 0.7380250692367554, "learning_rate": 2.5063948675518865e-10, "loss": 0.0679, "step": 45300 }, { "epoch": 0.99822065037157, "grad_norm": 0.34795787930488586, "learning_rate": 2.444890755765039e-10, "loss": 0.0509, "step": 45301 }, { "epoch": 0.9982426856610862, "grad_norm": 0.62235426902771, "learning_rate": 2.384150661960405e-10, "loss": 0.0587, "step": 45302 }, { "epoch": 0.9982647209506024, "grad_norm": 0.6581190228462219, "learning_rate": 2.3241745864377438e-10, "loss": 0.076, "step": 45303 }, { "epoch": 0.9982867562401185, "grad_norm": 0.36861810088157654, "learning_rate": 2.2649625295301236e-10, "loss": 0.0436, "step": 45304 }, { "epoch": 0.9983087915296347, "grad_norm": 0.5317443609237671, "learning_rate": 2.206514491503997e-10, "loss": 0.0412, "step": 45305 }, { "epoch": 0.9983308268191509, "grad_norm": 0.657224178314209, "learning_rate": 2.1488304726924312e-10, "loss": 0.0539, "step": 45306 }, { "epoch": 0.998352862108667, "grad_norm": 0.6359458565711975, "learning_rate": 2.0919104733618799e-10, "loss": 0.0591, "step": 45307 }, { "epoch": 0.9983748973981832, "grad_norm": 0.6823457479476929, "learning_rate": 2.035754493812103e-10, "loss": 0.0397, "step": 45308 }, { "epoch": 0.9983969326876994, "grad_norm": 0.4821685552597046, "learning_rate": 1.9803625343262078e-10, "loss": 0.0735, "step": 45309 }, { "epoch": 0.9984189679772155, "grad_norm": 0.6801928877830505, "learning_rate": 1.9257345951873005e-10, "loss": 0.0599, "step": 45310 }, { "epoch": 0.9984410032667317, "grad_norm": 0.759040117263794, "learning_rate": 1.8718706766951422e-10, "loss": 0.095, "step": 45311 }, { "epoch": 0.9984630385562479, "grad_norm": 0.5860573053359985, "learning_rate": 1.818770779082879e-10, "loss": 0.0825, "step": 45312 }, { "epoch": 0.998485073845764, "grad_norm": 0.593207597732544, "learning_rate": 1.7664349026502713e-10, "loss": 0.0821, "step": 45313 }, { "epoch": 0.9985071091352802, "grad_norm": 0.7425416707992554, "learning_rate": 1.7148630476637729e-10, "loss": 0.081, "step": 45314 }, { "epoch": 0.9985291444247963, "grad_norm": 0.6547090411186218, "learning_rate": 1.6640552143731836e-10, "loss": 0.0506, "step": 45315 }, { "epoch": 0.9985511797143125, "grad_norm": 0.39631932973861694, "learning_rate": 1.614011403044957e-10, "loss": 0.0573, "step": 45316 }, { "epoch": 0.9985732150038287, "grad_norm": 0.9284515976905823, "learning_rate": 1.5647316139288937e-10, "loss": 0.0682, "step": 45317 }, { "epoch": 0.9985952502933448, "grad_norm": 0.33006057143211365, "learning_rate": 1.5162158472914466e-10, "loss": 0.0676, "step": 45318 }, { "epoch": 0.998617285582861, "grad_norm": 0.44889241456985474, "learning_rate": 1.4684641033491098e-10, "loss": 0.0483, "step": 45319 }, { "epoch": 0.9986393208723771, "grad_norm": 0.7229723930358887, "learning_rate": 1.4214763823683364e-10, "loss": 0.0872, "step": 45320 }, { "epoch": 0.9986613561618932, "grad_norm": 0.6482763290405273, "learning_rate": 1.3752526845989267e-10, "loss": 0.0488, "step": 45321 }, { "epoch": 0.9986833914514094, "grad_norm": 0.6975202560424805, "learning_rate": 1.3297930102407209e-10, "loss": 0.0817, "step": 45322 }, { "epoch": 0.9987054267409256, "grad_norm": 1.3075110912322998, "learning_rate": 1.2850973595601724e-10, "loss": 0.0789, "step": 45323 }, { "epoch": 0.9987274620304417, "grad_norm": 0.5520686507225037, "learning_rate": 1.2411657327571213e-10, "loss": 0.0534, "step": 45324 }, { "epoch": 0.9987494973199579, "grad_norm": 1.0971651077270508, "learning_rate": 1.197998130081368e-10, "loss": 0.0653, "step": 45325 }, { "epoch": 0.998771532609474, "grad_norm": 0.4603707194328308, "learning_rate": 1.1555945517327526e-10, "loss": 0.051, "step": 45326 }, { "epoch": 0.9987935678989902, "grad_norm": 0.5515086054801941, "learning_rate": 1.1139549979444219e-10, "loss": 0.0438, "step": 45327 }, { "epoch": 0.9988156031885064, "grad_norm": 1.1145751476287842, "learning_rate": 1.0730794688995626e-10, "loss": 0.0621, "step": 45328 }, { "epoch": 0.9988376384780225, "grad_norm": 0.6182867288589478, "learning_rate": 1.0329679648479751e-10, "loss": 0.0441, "step": 45329 }, { "epoch": 0.9988596737675387, "grad_norm": 0.8775564432144165, "learning_rate": 9.936204859561926e-11, "loss": 0.073, "step": 45330 }, { "epoch": 0.9988817090570549, "grad_norm": 0.7775940895080566, "learning_rate": 9.550370324407088e-11, "loss": 0.068, "step": 45331 }, { "epoch": 0.998903744346571, "grad_norm": 1.0146751403808594, "learning_rate": 9.172176045013636e-11, "loss": 0.058, "step": 45332 }, { "epoch": 0.9989257796360872, "grad_norm": 0.5914894938468933, "learning_rate": 8.801622023213441e-11, "loss": 0.0769, "step": 45333 }, { "epoch": 0.9989478149256034, "grad_norm": 0.6948854923248291, "learning_rate": 8.438708261004901e-11, "loss": 0.0605, "step": 45334 }, { "epoch": 0.9989698502151195, "grad_norm": 0.5896570086479187, "learning_rate": 8.083434760053354e-11, "loss": 0.0544, "step": 45335 }, { "epoch": 0.9989918855046357, "grad_norm": 0.6256506443023682, "learning_rate": 7.7358015223572e-11, "loss": 0.0436, "step": 45336 }, { "epoch": 0.9990139207941519, "grad_norm": 0.7314718961715698, "learning_rate": 7.395808549748306e-11, "loss": 0.0597, "step": 45337 }, { "epoch": 0.999035956083668, "grad_norm": 0.5938366055488586, "learning_rate": 7.063455843725474e-11, "loss": 0.0557, "step": 45338 }, { "epoch": 0.9990579913731842, "grad_norm": 0.4871271252632141, "learning_rate": 6.738743406120573e-11, "loss": 0.0688, "step": 45339 }, { "epoch": 0.9990800266627003, "grad_norm": 0.5213212370872498, "learning_rate": 6.421671238432402e-11, "loss": 0.0539, "step": 45340 }, { "epoch": 0.9991020619522165, "grad_norm": 0.8632364869117737, "learning_rate": 6.112239342492831e-11, "loss": 0.0472, "step": 45341 }, { "epoch": 0.9991240972417327, "grad_norm": 0.6661907434463501, "learning_rate": 5.81044771980066e-11, "loss": 0.0474, "step": 45342 }, { "epoch": 0.9991461325312488, "grad_norm": 0.14568914473056793, "learning_rate": 5.516296372021223e-11, "loss": 0.0387, "step": 45343 }, { "epoch": 0.999168167820765, "grad_norm": 0.5258042812347412, "learning_rate": 5.229785300320256e-11, "loss": 0.0382, "step": 45344 }, { "epoch": 0.9991902031102811, "grad_norm": 0.4452161490917206, "learning_rate": 4.950914506363091e-11, "loss": 0.0435, "step": 45345 }, { "epoch": 0.9992122383997972, "grad_norm": 0.5321437120437622, "learning_rate": 4.679683991481998e-11, "loss": 0.0588, "step": 45346 }, { "epoch": 0.9992342736893134, "grad_norm": 0.5005951523780823, "learning_rate": 4.41609375734231e-11, "loss": 0.053, "step": 45347 }, { "epoch": 0.9992563089788296, "grad_norm": 0.3952036499977112, "learning_rate": 4.160143804943228e-11, "loss": 0.0445, "step": 45348 }, { "epoch": 0.9992783442683457, "grad_norm": 0.5352487564086914, "learning_rate": 3.911834135617021e-11, "loss": 0.0732, "step": 45349 }, { "epoch": 0.9993003795578619, "grad_norm": 0.6321737170219421, "learning_rate": 3.6711647508624876e-11, "loss": 0.0423, "step": 45350 }, { "epoch": 0.999322414847378, "grad_norm": 0.39111098647117615, "learning_rate": 3.438135651678831e-11, "loss": 0.0302, "step": 45351 }, { "epoch": 0.9993444501368942, "grad_norm": 0.4580343961715698, "learning_rate": 3.2127468393983174e-11, "loss": 0.0817, "step": 45352 }, { "epoch": 0.9993664854264104, "grad_norm": 0.7455168962478638, "learning_rate": 2.994998315186681e-11, "loss": 0.0407, "step": 45353 }, { "epoch": 0.9993885207159265, "grad_norm": 1.2520869970321655, "learning_rate": 2.784890080043123e-11, "loss": 0.1179, "step": 45354 }, { "epoch": 0.9994105560054427, "grad_norm": 0.790302574634552, "learning_rate": 2.5824221351333777e-11, "loss": 0.0706, "step": 45355 }, { "epoch": 0.9994325912949589, "grad_norm": 0.5934630036354065, "learning_rate": 2.3875944812901117e-11, "loss": 0.0625, "step": 45356 }, { "epoch": 0.999454626584475, "grad_norm": 0.6291429400444031, "learning_rate": 2.2004071196790597e-11, "loss": 0.0627, "step": 45357 }, { "epoch": 0.9994766618739912, "grad_norm": 0.7242529988288879, "learning_rate": 2.0208600512994223e-11, "loss": 0.0741, "step": 45358 }, { "epoch": 0.9994986971635074, "grad_norm": 0.671126127243042, "learning_rate": 1.848953276983867e-11, "loss": 0.0755, "step": 45359 }, { "epoch": 0.9995207324530235, "grad_norm": 0.5823556780815125, "learning_rate": 1.6846867975650604e-11, "loss": 0.0534, "step": 45360 }, { "epoch": 0.9995427677425397, "grad_norm": 0.5879034399986267, "learning_rate": 1.5280606140422037e-11, "loss": 0.0461, "step": 45361 }, { "epoch": 0.9995648030320559, "grad_norm": 0.47701573371887207, "learning_rate": 1.3790747269148973e-11, "loss": 0.057, "step": 45362 }, { "epoch": 0.999586838321572, "grad_norm": 0.4354237914085388, "learning_rate": 1.2377291371823418e-11, "loss": 0.0495, "step": 45363 }, { "epoch": 0.9996088736110882, "grad_norm": 0.4710164964199066, "learning_rate": 1.1040238456772045e-11, "loss": 0.059, "step": 45364 }, { "epoch": 0.9996309089006044, "grad_norm": 0.59955233335495, "learning_rate": 9.779588527325523e-12, "loss": 0.0545, "step": 45365 }, { "epoch": 0.9996529441901205, "grad_norm": 0.8230961561203003, "learning_rate": 8.595341591810523e-12, "loss": 0.0781, "step": 45366 }, { "epoch": 0.9996749794796367, "grad_norm": 0.6290262341499329, "learning_rate": 7.487497656888387e-12, "loss": 0.0609, "step": 45367 }, { "epoch": 0.9996970147691528, "grad_norm": 0.6448853015899658, "learning_rate": 6.456056727555115e-12, "loss": 0.0525, "step": 45368 }, { "epoch": 0.999719050058669, "grad_norm": 0.3059569001197815, "learning_rate": 5.501018808806713e-12, "loss": 0.0583, "step": 45369 }, { "epoch": 0.9997410853481851, "grad_norm": 0.58753502368927, "learning_rate": 4.622383903973848e-12, "loss": 0.0591, "step": 45370 }, { "epoch": 0.9997631206377012, "grad_norm": 0.5174386501312256, "learning_rate": 3.820152021383194e-12, "loss": 0.0593, "step": 45371 }, { "epoch": 0.9997851559272174, "grad_norm": 0.58122318983078, "learning_rate": 3.0943231627000855e-12, "loss": 0.0589, "step": 45372 }, { "epoch": 0.9998071912167336, "grad_norm": 0.437858521938324, "learning_rate": 2.4448973312551914e-12, "loss": 0.0533, "step": 45373 }, { "epoch": 0.9998292265062497, "grad_norm": 0.23778659105300903, "learning_rate": 1.8718745303791805e-12, "loss": 0.0449, "step": 45374 }, { "epoch": 0.9998512617957659, "grad_norm": 0.5401943325996399, "learning_rate": 1.3752547650680569e-12, "loss": 0.0772, "step": 45375 }, { "epoch": 0.999873297085282, "grad_norm": 0.4772971570491791, "learning_rate": 9.550380353218202e-13, "loss": 0.037, "step": 45376 }, { "epoch": 0.9998953323747982, "grad_norm": 0.8817121982574463, "learning_rate": 6.112243444711396e-13, "loss": 0.0738, "step": 45377 }, { "epoch": 0.9999173676643144, "grad_norm": 0.8314969539642334, "learning_rate": 3.4381369584668423e-13, "loss": 0.0796, "step": 45378 }, { "epoch": 0.9999394029538305, "grad_norm": 0.19463972747325897, "learning_rate": 1.5280608778311945e-13, "loss": 0.0448, "step": 45379 }, { "epoch": 0.9999614382433467, "grad_norm": 0.11651211977005005, "learning_rate": 3.820152194577986e-14, "loss": 0.0351, "step": 45380 }, { "epoch": 0.9999834735328629, "grad_norm": 0.8018556237220764, "learning_rate": 0.0, "loss": 0.0419, "step": 45381 }, { "epoch": 0.9999834735328629, "step": 45381, "total_flos": 4.486078735425995e+18, "train_loss": 0.09635423166789694, "train_runtime": 258358.9167, "train_samples_per_second": 16.863, "train_steps_per_second": 0.176 } ], "logging_steps": 1.0, "max_steps": 45381, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.486078735425995e+18, "train_batch_size": 12, "trial_name": null, "trial_params": null }